From 3ff91807b87d615819a090858ed9618a1cdd8911 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Wed, 7 Dec 2016 09:34:49 +0100 Subject: [PATCH] Move swrenderer into a namespace, add multithreading framework, and move drawers to commands --- src/CMakeLists.txt | 4 +- src/r_3dfloors.cpp | 8 +- src/r_3dfloors.h | 8 +- src/r_bsp.cpp | 11 +- src/r_bsp.h | 8 +- src/r_defs.h | 2 +- src/r_draw.cpp | 4112 +++++++++++++---------------------------- src/r_draw.h | 469 ++--- src/r_draw_pal.cpp | 2593 ++++++++++++++++++++++++++ src/r_draw_pal.h | 333 ++++ src/r_drawt_pal.cpp | 867 +++++++++ src/r_main.cpp | 75 +- src/r_main.h | 12 +- src/r_plane.cpp | 283 +-- src/r_plane.h | 5 + src/r_segs.cpp | 27 +- src/r_segs.h | 5 + src/r_state.h | 2 +- src/r_swrenderer.cpp | 9 + src/r_things.cpp | 34 +- src/r_things.h | 11 +- src/r_thread.cpp | 297 +++ src/r_thread.h | 235 +++ src/v_draw.cpp | 8 +- src/win32/fb_d3d9.cpp | 9 +- 25 files changed, 5955 insertions(+), 3472 deletions(-) create mode 100644 src/r_draw_pal.cpp create mode 100644 src/r_draw_pal.h create mode 100644 src/r_drawt_pal.cpp create mode 100644 src/r_thread.cpp create mode 100644 src/r_thread.h diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 83dad3b9d1..e164a338cb 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -883,7 +883,9 @@ set( FASTMATH_PCH_SOURCES r_3dfloors.cpp r_bsp.cpp r_draw.cpp - r_drawt.cpp + r_draw_pal.cpp + r_drawt_pal.cpp + r_thread.cpp r_main.cpp r_plane.cpp r_segs.cpp diff --git a/src/r_3dfloors.cpp b/src/r_3dfloors.cpp index 61a23187d4..87c8af618e 100644 --- a/src/r_3dfloors.cpp +++ b/src/r_3dfloors.cpp @@ -15,6 +15,11 @@ #include "c_cvars.h" #include "r_3dfloors.h" +CVAR(Int, r_3dfloors, true, 0); + +namespace swrenderer +{ + // external variables int fake3D; F3DFloor *fakeFloor; @@ -28,8 +33,6 @@ HeightLevel *height_cur = NULL; int CurrentMirror = 0; int CurrentSkybox = 0; -CVAR(Int, r_3dfloors, true, 0); - // private variables int height_max = -1; TArray toplist; @@ -160,3 +163,4 @@ void R_3D_LeaveSkybox() CurrentSkybox--; } +} diff --git a/src/r_3dfloors.h b/src/r_3dfloors.h index cacb974443..a703ae19a4 100644 --- a/src/r_3dfloors.h +++ b/src/r_3dfloors.h @@ -3,6 +3,11 @@ #include "p_3dfloors.h" +EXTERN_CVAR(Int, r_3dfloors); + +namespace swrenderer +{ + // special types struct HeightLevel @@ -57,7 +62,6 @@ extern HeightLevel *height_top; extern HeightLevel *height_cur; extern int CurrentMirror; extern int CurrentSkybox; -EXTERN_CVAR(Int, r_3dfloors); // functions void R_3D_DeleteHeights(); @@ -67,4 +71,6 @@ void R_3D_ResetClip(); void R_3D_EnterSkybox(); void R_3D_LeaveSkybox(); +} + #endif diff --git a/src/r_bsp.cpp b/src/r_bsp.cpp index 8d423b3b31..91fb86e928 100644 --- a/src/r_bsp.cpp +++ b/src/r_bsp.cpp @@ -58,6 +58,13 @@ #include "po_man.h" #include "r_data/colormaps.h" +CVAR (Bool, r_drawflat, false, 0) // [RH] Don't texture segs? +EXTERN_CVAR(Bool, r_fullbrightignoresectorcolor); + +namespace swrenderer +{ + using namespace drawerargs; + seg_t* curline; side_t* sidedef; line_t* linedef; @@ -104,8 +111,6 @@ TArray WallPortals(1000); // note: this array needs to go away as subsector_t *InSubsector; -CVAR (Bool, r_drawflat, false, 0) // [RH] Don't texture segs? -EXTERN_CVAR(Bool, r_fullbrightignoresectorcolor); void R_StoreWallRange (int start, int stop); @@ -1396,3 +1401,5 @@ void R_RenderBSPNode (void *node) } R_Subsector ((subsector_t *)((BYTE *)node - 1)); } + +} diff --git a/src/r_bsp.h b/src/r_bsp.h index 48ca7565bb..e4d70c4cf1 100644 --- a/src/r_bsp.h +++ b/src/r_bsp.h @@ -27,6 +27,11 @@ #include #include "r_defs.h" +EXTERN_CVAR (Bool, r_drawflat) // [RH] Don't texture segs? + +namespace swrenderer +{ + // The 3072 below is just an arbitrary value picked to avoid // drawing lines the player is too close to that would overflow // the texture calculations. @@ -109,8 +114,6 @@ extern WORD MirrorFlags; typedef void (*drawfunc_t) (int start, int stop); -EXTERN_CVAR (Bool, r_drawflat) // [RH] Don't texture segs? - // BSP? void R_ClearClipSegs (short left, short right); void R_ClearDrawSegs (); @@ -119,5 +122,6 @@ void R_RenderBSPNode (void *node); // killough 4/13/98: fake floors/ceilings for deep water / fake ceilings: sector_t *R_FakeFlat(sector_t *, sector_t *, int *, int *, bool); +} #endif diff --git a/src/r_defs.h b/src/r_defs.h index a7de16c597..6f3b925c77 100644 --- a/src/r_defs.h +++ b/src/r_defs.h @@ -55,7 +55,7 @@ enum SIL_BOTH }; -extern size_t MaxDrawSegs; +namespace swrenderer { extern size_t MaxDrawSegs; } struct FDisplacement; // diff --git a/src/r_draw.cpp b/src/r_draw.cpp index 6f58ec2a3a..a2bf412e8b 100644 --- a/src/r_draw.cpp +++ b/src/r_draw.cpp @@ -1,27 +1,3 @@ -// Emacs style mode select -*- C++ -*- -//----------------------------------------------------------------------------- -// -// $Id:$ -// -// Copyright (C) 1993-1996 by id Software, Inc. -// -// This source is available for distribution and/or modification -// only under the terms of the DOOM Source Code License as -// published by id Software. All rights reserved. -// -// The source is distributed in the hope that it will be useful, -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// FITNESS FOR A PARTICULAR PURPOSE. See the DOOM Source Code License -// for more details. -// -// $Log:$ -// -// DESCRIPTION: -// The actual span/column drawing functions. -// Here find the main potential for optimization, -// e.g. inline assembly, different algorithms. -// -//----------------------------------------------------------------------------- #include @@ -38,2850 +14,1312 @@ #include "r_data/r_translate.h" #include "v_palette.h" #include "r_data/colormaps.h" +#include "r_plane.h" +#include "r_draw.h" +#include "r_draw_pal.h" +#include "r_thread.h" -#include "gi.h" -#include "stats.h" -#include "x86.h" - -#undef RANGECHECK - -// status bar height at bottom of screen -// [RH] status bar position at bottom of screen -extern int ST_Y; - -// -// All drawing to the view buffer is accomplished in this file. -// The other refresh files only know about ccordinates, -// not the architecture of the frame buffer. -// Conveniently, the frame buffer is a linear one, -// and we need only the base address, -// and the total size == width*height*depth/8., -// - -BYTE* viewimage; -extern "C" { -int ylookup[MAXHEIGHT]; -BYTE *dc_destorg; -} -int scaledviewwidth; - -// [RH] Pointers to the different column drawers. -// These get changed depending on the current -// screen depth and asm/no asm. -void (*R_DrawColumnHoriz)(void); -void (*R_DrawTranslatedColumn)(void); -void (*R_DrawShadedColumn)(void); -void (*R_DrawSpan)(void); -void (*R_DrawSpanMasked)(void); - -// -// R_DrawColumn -// Source is the top of the column to scale. -// -double dc_texturemid; -extern "C" { -int dc_pitch=0xABadCafe; // [RH] Distance between rows - -lighttable_t* dc_colormap; -int dc_x; -int dc_yl; -int dc_yh; -fixed_t dc_iscale; -fixed_t dc_texturefrac; -int dc_color; // [RH] Color for column filler -DWORD dc_srccolor; -DWORD *dc_srcblend; // [RH] Source and destination -DWORD *dc_destblend; // blending lookups - -// first pixel in a column (possibly virtual) -const BYTE* dc_source; - -BYTE* dc_dest; -int dc_count; - -DWORD vplce[4]; -DWORD vince[4]; -BYTE* palookupoffse[4]; -const BYTE* bufplce[4]; -const BYTE* bufplce2[4]; -uint32_t bufheight[4]; - -// just for profiling -int dccount; -} - -int dc_fillcolor; -BYTE *dc_translation; -BYTE shadetables[NUMCOLORMAPS*16*256]; -FDynamicColormap ShadeFakeColormap[16]; -BYTE identitymap[256]; - -EXTERN_CVAR (Int, r_columnmethod) - - -void R_InitShadeMaps() +namespace swrenderer { - int i,j; - // set up shading tables for shaded columns - // 16 colormap sets, progressing from full alpha to minimum visible alpha + // Needed by R_DrawFogBoundary (which probably shouldn't be part of this file) + extern "C" short spanend[MAXHEIGHT]; + extern float rw_light; + extern float rw_lightstep; + extern int wallshade; - BYTE *table = shadetables; + double dc_texturemid; - // Full alpha - for (i = 0; i < 16; ++i) + int ylookup[MAXHEIGHT]; + uint8_t shadetables[NUMCOLORMAPS * 16 * 256]; + FDynamicColormap ShadeFakeColormap[16]; + uint8_t identitymap[256]; + FDynamicColormap identitycolormap; + int fuzzoffset[FUZZTABLE + 1]; + int fuzzpos; + int fuzzviewheight; + + namespace drawerargs { - ShadeFakeColormap[i].Color = ~0u; - ShadeFakeColormap[i].Desaturate = ~0u; - ShadeFakeColormap[i].Next = NULL; - ShadeFakeColormap[i].Maps = table; + int dc_pitch; + lighttable_t *dc_colormap; + int dc_x; + int dc_yl; + int dc_yh; + fixed_t dc_iscale; + fixed_t dc_texturefrac; + uint32_t dc_textureheight; + int dc_color; + uint32_t dc_srccolor; + uint32_t dc_srccolor_bgra; + uint32_t *dc_srcblend; + uint32_t *dc_destblend; + fixed_t dc_srcalpha; + fixed_t dc_destalpha; + const uint8_t *dc_source; + const uint8_t *dc_source2; + uint32_t dc_texturefracx; + uint8_t *dc_translation; + uint8_t *dc_dest; + uint8_t *dc_destorg; + int dc_destheight; + int dc_count; + uint32_t vplce[4]; + uint32_t vince[4]; + uint8_t *palookupoffse[4]; + fixed_t palookuplight[4]; + const uint8_t *bufplce[4]; + const uint8_t *bufplce2[4]; + uint32_t buftexturefracx[4]; + uint32_t bufheight[4]; + int vlinebits; + int mvlinebits; + int tmvlinebits; + int ds_y; + int ds_x1; + int ds_x2; + lighttable_t * ds_colormap; + dsfixed_t ds_light; + dsfixed_t ds_xfrac; + dsfixed_t ds_yfrac; + dsfixed_t ds_xstep; + dsfixed_t ds_ystep; + int ds_xbits; + int ds_ybits; + fixed_t ds_alpha; + double ds_lod; + const uint8_t *ds_source; + int ds_color; + unsigned int dc_tspans[4][MAXHEIGHT]; + unsigned int *dc_ctspan[4]; + unsigned int *horizspan[4]; + } - for (j = 0; j < NUMCOLORMAPS; ++j) + void R_InitColumnDrawers() + { + colfunc = basecolfunc = R_DrawColumn; + fuzzcolfunc = R_DrawFuzzColumn; + transcolfunc = R_DrawTranslatedColumn; + spanfunc = R_DrawSpan; + hcolfunc_pre = R_DrawColumnHoriz; + hcolfunc_post1 = rt_map1col; + hcolfunc_post4 = rt_map4cols; + } + + void R_InitShadeMaps() + { + int i, j; + // set up shading tables for shaded columns + // 16 colormap sets, progressing from full alpha to minimum visible alpha + + uint8_t *table = shadetables; + + // Full alpha + for (i = 0; i < 16; ++i) { - int a = (NUMCOLORMAPS - j) * 256 / NUMCOLORMAPS * (16-i); - for (int k = 0; k < 256; ++k) + ShadeFakeColormap[i].Color = ~0u; + ShadeFakeColormap[i].Desaturate = ~0u; + ShadeFakeColormap[i].Next = NULL; + ShadeFakeColormap[i].Maps = table; + + for (j = 0; j < NUMCOLORMAPS; ++j) { - BYTE v = (((k+2) * a) + 256) >> 14; - table[k] = MIN (v, 64); - } - table += 256; - } - } - for (i = 0; i < NUMCOLORMAPS*16*256; ++i) - { - assert(shadetables[i] <= 64); - } - - // Set up a guaranteed identity map - for (i = 0; i < 256; ++i) - { - identitymap[i] = i; - } -} - -/************************************/ -/* */ -/* Palettized drawers (C versions) */ -/* */ -/************************************/ - -// -// A column is a vertical slice/span from a wall texture that, -// given the DOOM style restrictions on the view orientation, -// will always have constant z depth. -// Thus a special case loop for very fast rendering can -// be used. It has also been used with Wolfenstein 3D. -// -void R_DrawColumn (void) -{ - int count; - BYTE* dest; - fixed_t frac; - fixed_t fracstep; - - count = dc_count; - - // Zero length, column does not exceed a pixel. - if (count <= 0) - return; - - // Framebuffer destination address. - dest = dc_dest; - - // Determine scaling, - // which is the only mapping to be done. - fracstep = dc_iscale; - frac = dc_texturefrac; - - { - // [RH] Get local copies of these variables so that the compiler - // has a better chance of optimizing this well. - BYTE *colormap = dc_colormap; - const BYTE *source = dc_source; - int pitch = dc_pitch; - - // Inner loop that does the actual texture mapping, - // e.g. a DDA-lile scaling. - // This is as fast as it gets. - do - { - // Re-map color indices from wall texture column - // using a lighting/special effects LUT. - *dest = colormap[source[frac>>FRACBITS]]; - - dest += pitch; - frac += fracstep; - - } while (--count); - } -} - - -// [RH] Just fills a column with a color -void R_FillColumnP (void) -{ - int count; - BYTE* dest; - - count = dc_count; - - if (count <= 0) - return; - - dest = dc_dest; - - { - int pitch = dc_pitch; - BYTE color = dc_color; - - do - { - *dest = color; - dest += pitch; - } while (--count); - } -} - -void R_FillAddColumn (void) -{ - int count; - BYTE *dest; - - count = dc_count; - if (count <= 0) - return; - - dest = dc_dest; - DWORD *bg2rgb; - DWORD fg; - - bg2rgb = dc_destblend; - fg = dc_srccolor; - int pitch = dc_pitch; - - do - { - DWORD bg; - bg = (fg + bg2rgb[*dest]) | 0x1f07c1f; - *dest = RGB32k.All[bg & (bg>>15)]; - dest += pitch; - } while (--count); - -} - -void R_FillAddClampColumn (void) -{ - int count; - BYTE *dest; - - count = dc_count; - if (count <= 0) - return; - - dest = dc_dest; - DWORD *bg2rgb; - DWORD fg; - - bg2rgb = dc_destblend; - fg = dc_srccolor; - int pitch = dc_pitch; - - do - { - DWORD a = fg + bg2rgb[*dest]; - DWORD b = a; - - a |= 0x01f07c1f; - b &= 0x40100400; - a &= 0x3fffffff; - b = b - (b >> 5); - a |= b; - *dest = RGB32k.All[a & (a>>15)]; - dest += pitch; - } while (--count); - -} - -void R_FillSubClampColumn (void) -{ - int count; - BYTE *dest; - - count = dc_count; - if (count <= 0) - return; - - dest = dc_dest; - DWORD *bg2rgb; - DWORD fg; - - bg2rgb = dc_destblend; - fg = dc_srccolor | 0x40100400; - int pitch = dc_pitch; - - do - { - DWORD a = fg - bg2rgb[*dest]; - DWORD b = a; - - b &= 0x40100400; - b = b - (b >> 5); - a &= b; - a |= 0x01f07c1f; - *dest = RGB32k.All[a & (a>>15)]; - dest += pitch; - } while (--count); - -} - -void R_FillRevSubClampColumn (void) -{ - int count; - BYTE *dest; - - count = dc_count; - if (count <= 0) - return; - - dest = dc_dest; - DWORD *bg2rgb; - DWORD fg; - - bg2rgb = dc_destblend; - fg = dc_srccolor; - int pitch = dc_pitch; - - do - { - DWORD a = (bg2rgb[*dest] | 0x40100400) - fg; - DWORD b = a; - - b &= 0x40100400; - b = b - (b >> 5); - a &= b; - a |= 0x01f07c1f; - *dest = RGB32k.All[a & (a>>15)]; - dest += pitch; - } while (--count); - -} - -// -// Spectre/Invisibility. -// -#define FUZZTABLE 50 - -extern "C" -{ -int fuzzoffset[FUZZTABLE+1]; // [RH] +1 for the assembly routine -int fuzzpos = 0; -int fuzzviewheight; -} -/* - FUZZOFF,-FUZZOFF,FUZZOFF,-FUZZOFF,FUZZOFF,FUZZOFF,-FUZZOFF, - FUZZOFF,FUZZOFF,-FUZZOFF,FUZZOFF,FUZZOFF,FUZZOFF,-FUZZOFF, - FUZZOFF,FUZZOFF,FUZZOFF,-FUZZOFF,-FUZZOFF,-FUZZOFF,-FUZZOFF, - FUZZOFF,-FUZZOFF,-FUZZOFF,FUZZOFF,FUZZOFF,FUZZOFF,FUZZOFF,-FUZZOFF, - FUZZOFF,-FUZZOFF,FUZZOFF,FUZZOFF,-FUZZOFF,-FUZZOFF,FUZZOFF, - FUZZOFF,-FUZZOFF,-FUZZOFF,-FUZZOFF,-FUZZOFF,FUZZOFF,FUZZOFF, - FUZZOFF,FUZZOFF,-FUZZOFF,FUZZOFF,FUZZOFF,-FUZZOFF,FUZZOFF -*/ - -static const signed char fuzzinit[FUZZTABLE] = { - 1,-1, 1,-1, 1, 1,-1, - 1, 1,-1, 1, 1, 1,-1, - 1, 1, 1,-1,-1,-1,-1, - 1,-1,-1, 1, 1, 1, 1,-1, - 1,-1, 1, 1,-1,-1, 1, - 1,-1,-1,-1,-1, 1, 1, - 1, 1,-1, 1, 1,-1, 1 -}; - -void R_InitFuzzTable (int fuzzoff) -{ - int i; - - for (i = 0; i < FUZZTABLE; i++) - { - fuzzoffset[i] = fuzzinit[i] * fuzzoff; - } -} - -// -// Creates a fuzzy image by copying pixels from adjacent ones above and below. -// Used with an all black colormap, this could create the SHADOW effect, -// i.e. spectres and invisible players. -// -void R_DrawFuzzColumn (void) -{ - int count; - BYTE *dest; - - // Adjust borders. Low... - if (dc_yl == 0) - dc_yl = 1; - - // .. and high. - if (dc_yh > fuzzviewheight) - dc_yh = fuzzviewheight; - - count = dc_yh - dc_yl; - - // Zero length. - if (count < 0) - return; - - count++; - - dest = ylookup[dc_yl] + dc_x + dc_destorg; - - // colormap #6 is used for shading (of 0-31, a bit brighter than average) - { - // [RH] Make local copies of global vars to try and improve - // the optimizations made by the compiler. - int pitch = dc_pitch; - int fuzz = fuzzpos; - int cnt; - BYTE *map = &NormalLight.Maps[6*256]; - - // [RH] Split this into three separate loops to minimize - // the number of times fuzzpos needs to be clamped. - if (fuzz) - { - cnt = MIN(FUZZTABLE-fuzz,count); - count -= cnt; - do - { - *dest = map[dest[fuzzoffset[fuzz++]]]; - dest += pitch; - } while (--cnt); - } - if (fuzz == FUZZTABLE || count > 0) - { - while (count >= FUZZTABLE) - { - fuzz = 0; - cnt = FUZZTABLE; - count -= FUZZTABLE; - do + int a = (NUMCOLORMAPS - j) * 256 / NUMCOLORMAPS * (16 - i); + for (int k = 0; k < 256; ++k) { - *dest = map[dest[fuzzoffset[fuzz++]]]; - dest += pitch; - } while (--cnt); - } - fuzz = 0; - if (count > 0) - { - do - { - *dest = map[dest[fuzzoffset[fuzz++]]]; - dest += pitch; - } while (--count); - } - } - fuzzpos = fuzz; - } -} - -// -// R_DrawTranlucentColumn -// - -/* -[RH] This translucency algorithm is based on DOSDoom 0.65's, but uses -a 32k RGB table instead of an 8k one. At least on my machine, it's -slightly faster (probably because it uses only one shift instead of -two), and it looks considerably less green at the ends of the -translucency range. The extra size doesn't appear to be an issue. - -The following note is from DOSDoom 0.65: - -New translucency algorithm, by Erik Sandberg: - -Basically, we compute the red, green and blue values for each pixel, and -then use a RGB table to check which one of the palette colours that best -represents those RGB values. The RGB table is 8k big, with 4 R-bits, -5 G-bits and 4 B-bits. A 4k table gives a bit too bad precision, and a 32k -table takes up more memory and results in more cache misses, so an 8k -table seemed to be quite ultimate. - -The computation of the RGB for each pixel is accelerated by using two -1k tables for each translucency level. -The xth element of one of these tables contains the r, g and b values for -the colour x, weighted for the current translucency level (for example, -the weighted rgb values for background colour at 75% translucency are 1/4 -of the original rgb values). The rgb values are stored as three -low-precision fixed point values, packed into one long per colour: -Bit 0-4: Frac part of blue (5 bits) -Bit 5-8: Int part of blue (4 bits) -Bit 9-13: Frac part of red (5 bits) -Bit 14-17: Int part of red (4 bits) -Bit 18-22: Frac part of green (5 bits) -Bit 23-27: Int part of green (5 bits) -Bit 28-31: All zeros (4 bits) - -The point of this format is that the two colours now can be added, and -then be converted to a RGB table index very easily: First, we just set -all the frac bits and the four upper zero bits to 1. It's now possible -to get the RGB table index by anding the current value >> 5 with the -current value >> 19. When asm-optimised, this should be the fastest -algorithm that uses RGB tables. - -*/ - -void R_DrawAddColumnP_C (void) -{ - int count; - BYTE *dest; - fixed_t frac; - fixed_t fracstep; - - count = dc_count; - if (count <= 0) - return; - - dest = dc_dest; - - fracstep = dc_iscale; - frac = dc_texturefrac; - - { - DWORD *fg2rgb = dc_srcblend; - DWORD *bg2rgb = dc_destblend; - BYTE *colormap = dc_colormap; - const BYTE *source = dc_source; - int pitch = dc_pitch; - - do - { - DWORD fg = colormap[source[frac>>FRACBITS]]; - DWORD bg = *dest; - - fg = fg2rgb[fg]; - bg = bg2rgb[bg]; - fg = (fg+bg) | 0x1f07c1f; - *dest = RGB32k.All[fg & (fg>>15)]; - dest += pitch; - frac += fracstep; - } while (--count); - } -} - -// -// R_DrawTranslatedColumn -// Used to draw player sprites with the green colorramp mapped to others. -// Could be used with different translation tables, e.g. the lighter colored -// version of the BaronOfHell, the HellKnight, uses identical sprites, kinda -// brightened up. -// - -void R_DrawTranslatedColumnP_C (void) -{ - int count; - BYTE* dest; - fixed_t frac; - fixed_t fracstep; - - count = dc_count; - if (count <= 0) - return; - - dest = dc_dest; - - fracstep = dc_iscale; - frac = dc_texturefrac; - - { - // [RH] Local copies of global vars to improve compiler optimizations - BYTE *colormap = dc_colormap; - BYTE *translation = dc_translation; - const BYTE *source = dc_source; - int pitch = dc_pitch; - - do - { - *dest = colormap[translation[source[frac>>FRACBITS]]]; - dest += pitch; - - frac += fracstep; - } while (--count); - } -} - -// Draw a column that is both translated and translucent -void R_DrawTlatedAddColumnP_C (void) -{ - int count; - BYTE *dest; - fixed_t frac; - fixed_t fracstep; - - count = dc_count; - if (count <= 0) - return; - - dest = dc_dest; - - fracstep = dc_iscale; - frac = dc_texturefrac; - - { - DWORD *fg2rgb = dc_srcblend; - DWORD *bg2rgb = dc_destblend; - BYTE *translation = dc_translation; - BYTE *colormap = dc_colormap; - const BYTE *source = dc_source; - int pitch = dc_pitch; - - do - { - DWORD fg = colormap[translation[source[frac>>FRACBITS]]]; - DWORD bg = *dest; - - fg = fg2rgb[fg]; - bg = bg2rgb[bg]; - fg = (fg+bg) | 0x1f07c1f; - *dest = RGB32k.All[fg & (fg>>15)]; - dest += pitch; - frac += fracstep; - } while (--count); - } -} - -// Draw a column whose "color" values are actually translucency -// levels for a base color stored in dc_color. -void R_DrawShadedColumnP_C (void) -{ - int count; - BYTE *dest; - fixed_t frac, fracstep; - - count = dc_count; - - if (count <= 0) - return; - - dest = dc_dest; - - fracstep = dc_iscale; - frac = dc_texturefrac; - - { - const BYTE *source = dc_source; - BYTE *colormap = dc_colormap; - int pitch = dc_pitch; - DWORD *fgstart = &Col2RGB8[0][dc_color]; - - do - { - DWORD val = colormap[source[frac>>FRACBITS]]; - DWORD fg = fgstart[val<<8]; - val = (Col2RGB8[64-val][*dest] + fg) | 0x1f07c1f; - *dest = RGB32k.All[val & (val>>15)]; - - dest += pitch; - frac += fracstep; - } while (--count); - } -} - -// Add source to destination, clamping it to white -void R_DrawAddClampColumnP_C () -{ - int count; - BYTE *dest; - fixed_t frac; - fixed_t fracstep; - - count = dc_count; - if (count <= 0) - return; - - dest = dc_dest; - - fracstep = dc_iscale; - frac = dc_texturefrac; - - { - BYTE *colormap = dc_colormap; - const BYTE *source = dc_source; - int pitch = dc_pitch; - DWORD *fg2rgb = dc_srcblend; - DWORD *bg2rgb = dc_destblend; - - do - { - DWORD a = fg2rgb[colormap[source[frac>>FRACBITS]]] + bg2rgb[*dest]; - DWORD b = a; - - a |= 0x01f07c1f; - b &= 0x40100400; - a &= 0x3fffffff; - b = b - (b >> 5); - a |= b; - *dest = RGB32k.All[a & (a>>15)]; - dest += pitch; - frac += fracstep; - } while (--count); - } -} - -// Add translated source to destination, clamping it to white -void R_DrawAddClampTranslatedColumnP_C () -{ - int count; - BYTE *dest; - fixed_t frac; - fixed_t fracstep; - - count = dc_count; - if (count <= 0) - return; - - dest = dc_dest; - - fracstep = dc_iscale; - frac = dc_texturefrac; - - { - BYTE *translation = dc_translation; - BYTE *colormap = dc_colormap; - const BYTE *source = dc_source; - int pitch = dc_pitch; - DWORD *fg2rgb = dc_srcblend; - DWORD *bg2rgb = dc_destblend; - - do - { - DWORD a = fg2rgb[colormap[translation[source[frac>>FRACBITS]]]] + bg2rgb[*dest]; - DWORD b = a; - - a |= 0x01f07c1f; - b &= 0x40100400; - a &= 0x3fffffff; - b = b - (b >> 5); - a |= b; - *dest = RGB32k.All[(a>>15) & a]; - dest += pitch; - frac += fracstep; - } while (--count); - } -} - -// Subtract destination from source, clamping it to black -void R_DrawSubClampColumnP_C () -{ - int count; - BYTE *dest; - fixed_t frac; - fixed_t fracstep; - - count = dc_count; - if (count <= 0) - return; - - dest = dc_dest; - - fracstep = dc_iscale; - frac = dc_texturefrac; - - { - BYTE *colormap = dc_colormap; - const BYTE *source = dc_source; - int pitch = dc_pitch; - DWORD *fg2rgb = dc_srcblend; - DWORD *bg2rgb = dc_destblend; - - do - { - DWORD a = (fg2rgb[colormap[source[frac>>FRACBITS]]] | 0x40100400) - bg2rgb[*dest]; - DWORD b = a; - - b &= 0x40100400; - b = b - (b >> 5); - a &= b; - a |= 0x01f07c1f; - *dest = RGB32k.All[a & (a>>15)]; - dest += pitch; - frac += fracstep; - } while (--count); - } -} - -// Subtract destination from source, clamping it to black -void R_DrawSubClampTranslatedColumnP_C () -{ - int count; - BYTE *dest; - fixed_t frac; - fixed_t fracstep; - - count = dc_count; - if (count <= 0) - return; - - dest = dc_dest; - - fracstep = dc_iscale; - frac = dc_texturefrac; - - { - BYTE *translation = dc_translation; - BYTE *colormap = dc_colormap; - const BYTE *source = dc_source; - int pitch = dc_pitch; - DWORD *fg2rgb = dc_srcblend; - DWORD *bg2rgb = dc_destblend; - - do - { - DWORD a = (fg2rgb[colormap[translation[source[frac>>FRACBITS]]]] | 0x40100400) - bg2rgb[*dest]; - DWORD b = a; - - b &= 0x40100400; - b = b - (b >> 5); - a &= b; - a |= 0x01f07c1f; - *dest = RGB32k.All[(a>>15) & a]; - dest += pitch; - frac += fracstep; - } while (--count); - } -} - -// Subtract source from destination, clamping it to black -void R_DrawRevSubClampColumnP_C () -{ - int count; - BYTE *dest; - fixed_t frac; - fixed_t fracstep; - - count = dc_count; - if (count <= 0) - return; - - dest = dc_dest; - - fracstep = dc_iscale; - frac = dc_texturefrac; - - { - BYTE *colormap = dc_colormap; - const BYTE *source = dc_source; - int pitch = dc_pitch; - DWORD *fg2rgb = dc_srcblend; - DWORD *bg2rgb = dc_destblend; - - do - { - DWORD a = (bg2rgb[*dest] | 0x40100400) - fg2rgb[colormap[source[frac>>FRACBITS]]]; - DWORD b = a; - - b &= 0x40100400; - b = b - (b >> 5); - a &= b; - a |= 0x01f07c1f; - *dest = RGB32k.All[a & (a>>15)]; - dest += pitch; - frac += fracstep; - } while (--count); - } -} - -// Subtract source from destination, clamping it to black -void R_DrawRevSubClampTranslatedColumnP_C () -{ - int count; - BYTE *dest; - fixed_t frac; - fixed_t fracstep; - - count = dc_count; - if (count <= 0) - return; - - dest = dc_dest; - - fracstep = dc_iscale; - frac = dc_texturefrac; - - { - BYTE *translation = dc_translation; - BYTE *colormap = dc_colormap; - const BYTE *source = dc_source; - int pitch = dc_pitch; - DWORD *fg2rgb = dc_srcblend; - DWORD *bg2rgb = dc_destblend; - - do - { - DWORD a = (bg2rgb[*dest] | 0x40100400) - fg2rgb[colormap[translation[source[frac>>FRACBITS]]]]; - DWORD b = a; - - b &= 0x40100400; - b = b - (b >> 5); - a &= b; - a |= 0x01f07c1f; - *dest = RGB32k.All[(a>>15) & a]; - dest += pitch; - frac += fracstep; - } while (--count); - } -} - - - -// -// R_DrawSpan -// With DOOM style restrictions on view orientation, -// the floors and ceilings consist of horizontal slices -// or spans with constant z depth. -// However, rotation around the world z axis is possible, -// thus this mapping, while simpler and faster than -// perspective correct texture mapping, has to traverse -// the texture at an angle in all but a few cases. -// In consequence, flats are not stored by column (like walls), -// and the inner loop has to step in texture space u and v. -// -// [RH] I'm not sure who wrote this, but floor/ceiling mapping -// *is* perspective correct for spans of constant z depth, which -// Doom guarantees because it does not let you change your pitch. -// Also, because of the new texture system, flats *are* stored by -// column to make it easy to use them on walls too. To accomodate -// this, the use of x/u and y/v in R_DrawSpan just needs to be -// swapped. -// -extern "C" { -int ds_color; // [RH] color for non-textured spans - -int ds_y; -int ds_x1; -int ds_x2; - -lighttable_t* ds_colormap; - -dsfixed_t ds_xfrac; -dsfixed_t ds_yfrac; -dsfixed_t ds_xstep; -dsfixed_t ds_ystep; -int ds_xbits; -int ds_ybits; - -// start of a floor/ceiling tile image -const BYTE* ds_source; - -// just for profiling -int dscount; - -#ifdef X86_ASM -extern "C" void R_SetSpanSource_ASM (const BYTE *flat); -extern "C" void R_SetSpanSize_ASM (int xbits, int ybits); -extern "C" void R_SetSpanColormap_ASM (BYTE *colormap); -extern "C" BYTE *ds_curcolormap, *ds_cursource, *ds_curtiltedsource; -#endif -} - -//========================================================================== -// -// R_SetSpanSource -// -// Sets the source bitmap for the span drawing routines. -// -//========================================================================== - -void R_SetSpanSource(const BYTE *pixels) -{ - ds_source = pixels; -#ifdef X86_ASM - if (ds_cursource != ds_source) - { - R_SetSpanSource_ASM(pixels); - } -#endif -} - -//========================================================================== -// -// R_SetSpanColormap -// -// Sets the colormap for the span drawing routines. -// -//========================================================================== - -void R_SetSpanColormap(BYTE *colormap) -{ - ds_colormap = colormap; -#ifdef X86_ASM - if (ds_colormap != ds_curcolormap) - { - R_SetSpanColormap_ASM (ds_colormap); - } -#endif -} - -//========================================================================== -// -// R_SetupSpanBits -// -// Sets the texture size for the span drawing routines. -// -//========================================================================== - -void R_SetupSpanBits(FTexture *tex) -{ - tex->GetWidth (); - ds_xbits = tex->WidthBits; - ds_ybits = tex->HeightBits; - if ((1 << ds_xbits) > tex->GetWidth()) - { - ds_xbits--; - } - if ((1 << ds_ybits) > tex->GetHeight()) - { - ds_ybits--; - } -#ifdef X86_ASM - R_SetSpanSize_ASM (ds_xbits, ds_ybits); -#endif -} - -// -// Draws the actual span. -//#ifndef X86_ASM -void R_DrawSpanP_C (void) -{ - dsfixed_t xfrac; - dsfixed_t yfrac; - dsfixed_t xstep; - dsfixed_t ystep; - BYTE* dest; - const BYTE* source = ds_source; - const BYTE* colormap = ds_colormap; - int count; - int spot; - -#ifdef RANGECHECK - if (ds_x2 < ds_x1 || ds_x1 < 0 - || ds_x2 >= screen->width || ds_y > screen->height) - { - I_Error ("R_DrawSpan: %i to %i at %i", ds_x1, ds_x2, ds_y); - } -// dscount++; -#endif - - xfrac = ds_xfrac; - yfrac = ds_yfrac; - - dest = ylookup[ds_y] + ds_x1 + dc_destorg; - - count = ds_x2 - ds_x1 + 1; - - xstep = ds_xstep; - ystep = ds_ystep; - - if (ds_xbits == 6 && ds_ybits == 6) - { - // 64x64 is the most common case by far, so special case it. - do - { - // Current texture index in u,v. - spot = ((xfrac>>(32-6-6))&(63*64)) + (yfrac>>(32-6)); - - // Lookup pixel from flat texture tile, - // re-index using light/colormap. - *dest++ = colormap[source[spot]]; - - // Next step in u,v. - xfrac += xstep; - yfrac += ystep; - } while (--count); - } - else - { - BYTE yshift = 32 - ds_ybits; - BYTE xshift = yshift - ds_xbits; - int xmask = ((1 << ds_xbits) - 1) << ds_ybits; - - do - { - // Current texture index in u,v. - spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); - - // Lookup pixel from flat texture tile, - // re-index using light/colormap. - *dest++ = colormap[source[spot]]; - - // Next step in u,v. - xfrac += xstep; - yfrac += ystep; - } while (--count); - } -} - -// [RH] Draw a span with holes -void R_DrawSpanMaskedP_C (void) -{ - dsfixed_t xfrac; - dsfixed_t yfrac; - dsfixed_t xstep; - dsfixed_t ystep; - BYTE* dest; - const BYTE* source = ds_source; - const BYTE* colormap = ds_colormap; - int count; - int spot; - - xfrac = ds_xfrac; - yfrac = ds_yfrac; - - dest = ylookup[ds_y] + ds_x1 + dc_destorg; - - count = ds_x2 - ds_x1 + 1; - - xstep = ds_xstep; - ystep = ds_ystep; - - if (ds_xbits == 6 && ds_ybits == 6) - { - // 64x64 is the most common case by far, so special case it. - do - { - int texdata; - - spot = ((xfrac>>(32-6-6))&(63*64)) + (yfrac>>(32-6)); - texdata = source[spot]; - if (texdata != 0) - { - *dest = colormap[texdata]; - } - dest++; - xfrac += xstep; - yfrac += ystep; - } while (--count); - } - else - { - BYTE yshift = 32 - ds_ybits; - BYTE xshift = yshift - ds_xbits; - int xmask = ((1 << ds_xbits) - 1) << ds_ybits; - do - { - int texdata; - - spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); - texdata = source[spot]; - if (texdata != 0) - { - *dest = colormap[texdata]; - } - dest++; - xfrac += xstep; - yfrac += ystep; - } while (--count); - } -} -//#endif - -void R_DrawSpanTranslucent (void) -{ - dsfixed_t xfrac; - dsfixed_t yfrac; - dsfixed_t xstep; - dsfixed_t ystep; - BYTE* dest; - const BYTE* source = ds_source; - const BYTE* colormap = ds_colormap; - int count; - int spot; - DWORD *fg2rgb = dc_srcblend; - DWORD *bg2rgb = dc_destblend; - - xfrac = ds_xfrac; - yfrac = ds_yfrac; - - dest = ylookup[ds_y] + ds_x1 + dc_destorg; - - count = ds_x2 - ds_x1 + 1; - - xstep = ds_xstep; - ystep = ds_ystep; - - if (ds_xbits == 6 && ds_ybits == 6) - { - // 64x64 is the most common case by far, so special case it. - do - { - spot = ((xfrac>>(32-6-6))&(63*64)) + (yfrac>>(32-6)); - DWORD fg = colormap[source[spot]]; - DWORD bg = *dest; - fg = fg2rgb[fg]; - bg = bg2rgb[bg]; - fg = (fg+bg) | 0x1f07c1f; - *dest++ = RGB32k.All[fg & (fg>>15)]; - xfrac += xstep; - yfrac += ystep; - } while (--count); - } - else - { - BYTE yshift = 32 - ds_ybits; - BYTE xshift = yshift - ds_xbits; - int xmask = ((1 << ds_xbits) - 1) << ds_ybits; - do - { - spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); - DWORD fg = colormap[source[spot]]; - DWORD bg = *dest; - fg = fg2rgb[fg]; - bg = bg2rgb[bg]; - fg = (fg+bg) | 0x1f07c1f; - *dest++ = RGB32k.All[fg & (fg>>15)]; - xfrac += xstep; - yfrac += ystep; - } while (--count); - } -} - -void R_DrawSpanMaskedTranslucent (void) -{ - dsfixed_t xfrac; - dsfixed_t yfrac; - dsfixed_t xstep; - dsfixed_t ystep; - BYTE* dest; - const BYTE* source = ds_source; - const BYTE* colormap = ds_colormap; - int count; - int spot; - DWORD *fg2rgb = dc_srcblend; - DWORD *bg2rgb = dc_destblend; - - xfrac = ds_xfrac; - yfrac = ds_yfrac; - - dest = ylookup[ds_y] + ds_x1 + dc_destorg; - - count = ds_x2 - ds_x1 + 1; - - xstep = ds_xstep; - ystep = ds_ystep; - - if (ds_xbits == 6 && ds_ybits == 6) - { - // 64x64 is the most common case by far, so special case it. - do - { - BYTE texdata; - - spot = ((xfrac>>(32-6-6))&(63*64)) + (yfrac>>(32-6)); - texdata = source[spot]; - if (texdata != 0) - { - DWORD fg = colormap[texdata]; - DWORD bg = *dest; - fg = fg2rgb[fg]; - bg = bg2rgb[bg]; - fg = (fg+bg) | 0x1f07c1f; - *dest = RGB32k.All[fg & (fg>>15)]; - } - dest++; - xfrac += xstep; - yfrac += ystep; - } while (--count); - } - else - { - BYTE yshift = 32 - ds_ybits; - BYTE xshift = yshift - ds_xbits; - int xmask = ((1 << ds_xbits) - 1) << ds_ybits; - do - { - BYTE texdata; - - spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); - texdata = source[spot]; - if (texdata != 0) - { - DWORD fg = colormap[texdata]; - DWORD bg = *dest; - fg = fg2rgb[fg]; - bg = bg2rgb[bg]; - fg = (fg+bg) | 0x1f07c1f; - *dest = RGB32k.All[fg & (fg>>15)]; - } - dest++; - xfrac += xstep; - yfrac += ystep; - } while (--count); - } -} - -void R_DrawSpanAddClamp (void) -{ - dsfixed_t xfrac; - dsfixed_t yfrac; - dsfixed_t xstep; - dsfixed_t ystep; - BYTE* dest; - const BYTE* source = ds_source; - const BYTE* colormap = ds_colormap; - int count; - int spot; - DWORD *fg2rgb = dc_srcblend; - DWORD *bg2rgb = dc_destblend; - - xfrac = ds_xfrac; - yfrac = ds_yfrac; - - dest = ylookup[ds_y] + ds_x1 + dc_destorg; - - count = ds_x2 - ds_x1 + 1; - - xstep = ds_xstep; - ystep = ds_ystep; - - if (ds_xbits == 6 && ds_ybits == 6) - { - // 64x64 is the most common case by far, so special case it. - do - { - spot = ((xfrac>>(32-6-6))&(63*64)) + (yfrac>>(32-6)); - DWORD a = fg2rgb[colormap[source[spot]]] + bg2rgb[*dest]; - DWORD b = a; - - a |= 0x01f07c1f; - b &= 0x40100400; - a &= 0x3fffffff; - b = b - (b >> 5); - a |= b; - *dest++ = RGB32k.All[a & (a>>15)]; - xfrac += xstep; - yfrac += ystep; - } while (--count); - } - else - { - BYTE yshift = 32 - ds_ybits; - BYTE xshift = yshift - ds_xbits; - int xmask = ((1 << ds_xbits) - 1) << ds_ybits; - do - { - spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); - DWORD a = fg2rgb[colormap[source[spot]]] + bg2rgb[*dest]; - DWORD b = a; - - a |= 0x01f07c1f; - b &= 0x40100400; - a &= 0x3fffffff; - b = b - (b >> 5); - a |= b; - *dest++ = RGB32k.All[a & (a>>15)]; - xfrac += xstep; - yfrac += ystep; - } while (--count); - } -} - -void R_DrawSpanMaskedAddClamp (void) -{ - dsfixed_t xfrac; - dsfixed_t yfrac; - dsfixed_t xstep; - dsfixed_t ystep; - BYTE* dest; - const BYTE* source = ds_source; - const BYTE* colormap = ds_colormap; - int count; - int spot; - DWORD *fg2rgb = dc_srcblend; - DWORD *bg2rgb = dc_destblend; - - xfrac = ds_xfrac; - yfrac = ds_yfrac; - - dest = ylookup[ds_y] + ds_x1 + dc_destorg; - - count = ds_x2 - ds_x1 + 1; - - xstep = ds_xstep; - ystep = ds_ystep; - - if (ds_xbits == 6 && ds_ybits == 6) - { - // 64x64 is the most common case by far, so special case it. - do - { - BYTE texdata; - - spot = ((xfrac>>(32-6-6))&(63*64)) + (yfrac>>(32-6)); - texdata = source[spot]; - if (texdata != 0) - { - DWORD a = fg2rgb[colormap[texdata]] + bg2rgb[*dest]; - DWORD b = a; - - a |= 0x01f07c1f; - b &= 0x40100400; - a &= 0x3fffffff; - b = b - (b >> 5); - a |= b; - *dest = RGB32k.All[a & (a>>15)]; - } - dest++; - xfrac += xstep; - yfrac += ystep; - } while (--count); - } - else - { - BYTE yshift = 32 - ds_ybits; - BYTE xshift = yshift - ds_xbits; - int xmask = ((1 << ds_xbits) - 1) << ds_ybits; - do - { - BYTE texdata; - - spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); - texdata = source[spot]; - if (texdata != 0) - { - DWORD a = fg2rgb[colormap[texdata]] + bg2rgb[*dest]; - DWORD b = a; - - a |= 0x01f07c1f; - b &= 0x40100400; - a &= 0x3fffffff; - b = b - (b >> 5); - a |= b; - *dest = RGB32k.All[a & (a>>15)]; - } - dest++; - xfrac += xstep; - yfrac += ystep; - } while (--count); - } -} - -// [RH] Just fill a span with a color -void R_FillSpan (void) -{ - memset (ylookup[ds_y] + ds_x1 + dc_destorg, ds_color, ds_x2 - ds_x1 + 1); -} - -// Draw a voxel slab -// -// "Build Engine & Tools" Copyright (c) 1993-1997 Ken Silverman -// Ken Silverman's official web site: "http://www.advsys.net/ken" -// See the included license file "BUILDLIC.TXT" for license info. - -// Actually, this is just R_DrawColumn with an extra width parameter. - -#ifndef X86_ASM -static const BYTE *slabcolormap; - -extern "C" void R_SetupDrawSlabC(const BYTE *colormap) -{ - slabcolormap = colormap; -} - -extern "C" void R_DrawSlabC(int dx, fixed_t v, int dy, fixed_t vi, const BYTE *vptr, BYTE *p) -{ - int x; - const BYTE *colormap = slabcolormap; - int pitch = dc_pitch; - - assert(dx > 0); - - if (dx == 1) - { - while (dy > 0) - { - *p = colormap[vptr[v >> FRACBITS]]; - p += pitch; - v += vi; - dy--; - } - } - else if (dx == 2) - { - while (dy > 0) - { - BYTE color = colormap[vptr[v >> FRACBITS]]; - p[0] = color; - p[1] = color; - p += pitch; - v += vi; - dy--; - } - } - else if (dx == 3) - { - while (dy > 0) - { - BYTE color = colormap[vptr[v >> FRACBITS]]; - p[0] = color; - p[1] = color; - p[2] = color; - p += pitch; - v += vi; - dy--; - } - } - else if (dx == 4) - { - while (dy > 0) - { - BYTE color = colormap[vptr[v >> FRACBITS]]; - p[0] = color; - p[1] = color; - p[2] = color; - p[3] = color; - p += pitch; - v += vi; - dy--; - } - } - else while (dy > 0) - { - BYTE color = colormap[vptr[v >> FRACBITS]]; - // The optimizer will probably turn this into a memset call. - // Since dx is not likely to be large, I'm not sure that's a good thing, - // hence the alternatives above. - for (x = 0; x < dx; x++) - { - p[x] = color; - } - p += pitch; - v += vi; - dy--; - } -} -#endif - - -/****************************************************/ -/****************************************************/ - -// wallscan stuff, in C - -#ifndef X86_ASM -static DWORD vlinec1 (); -static int vlinebits; - -DWORD (*dovline1)() = vlinec1; -DWORD (*doprevline1)() = vlinec1; - -#ifdef X64_ASM -extern "C" void vlinetallasm4(); -#define dovline4 vlinetallasm4 -extern "C" void setupvlinetallasm (int); -#else -static void vlinec4 (); -void (*dovline4)() = vlinec4; -#endif - -static DWORD mvlinec1(); -static void mvlinec4(); -static int mvlinebits; - -DWORD (*domvline1)() = mvlinec1; -void (*domvline4)() = mvlinec4; - -#else - -extern "C" -{ -DWORD vlineasm1 (); -DWORD prevlineasm1 (); -DWORD vlinetallasm1 (); -DWORD prevlinetallasm1 (); -void vlineasm4 (); -void vlinetallasmathlon4 (); -void vlinetallasm4 (); -void setupvlineasm (int); -void setupvlinetallasm (int); - -DWORD mvlineasm1(); -void mvlineasm4(); -void setupmvlineasm (int); -} - -DWORD (*dovline1)() = vlinetallasm1; -DWORD (*doprevline1)() = prevlinetallasm1; -void (*dovline4)() = vlinetallasm4; - -DWORD (*domvline1)() = mvlineasm1; -void (*domvline4)() = mvlineasm4; -#endif - -void setupvline (int fracbits) -{ -#ifdef X86_ASM - if (CPU.Family <= 5) - { - if (fracbits >= 24) - { - setupvlineasm (fracbits); - dovline4 = vlineasm4; - dovline1 = vlineasm1; - doprevline1 = prevlineasm1; - } - else - { - setupvlinetallasm (fracbits); - dovline1 = vlinetallasm1; - doprevline1 = prevlinetallasm1; - dovline4 = vlinetallasm4; - } - } - else - { - setupvlinetallasm (fracbits); - if (CPU.bIsAMD && CPU.AMDFamily >= 7) - { - dovline4 = vlinetallasmathlon4; - } - } -#else - vlinebits = fracbits; -#ifdef X64_ASM - setupvlinetallasm(fracbits); -#endif -#endif -} - -#if !defined(X86_ASM) -DWORD vlinec1 () -{ - DWORD fracstep = dc_iscale; - DWORD frac = dc_texturefrac; - BYTE *colormap = dc_colormap; - int count = dc_count; - const BYTE *source = dc_source; - BYTE *dest = dc_dest; - int bits = vlinebits; - int pitch = dc_pitch; - - do - { - *dest = colormap[source[frac>>bits]]; - frac += fracstep; - dest += pitch; - } while (--count); - - return frac; -} - -#ifndef _M_X64 -void vlinec4 () -{ - BYTE *dest = dc_dest; - int count = dc_count; - int bits = vlinebits; - DWORD place; - - do - { - dest[0] = palookupoffse[0][bufplce[0][(place=vplce[0])>>bits]]; vplce[0] = place+vince[0]; - dest[1] = palookupoffse[1][bufplce[1][(place=vplce[1])>>bits]]; vplce[1] = place+vince[1]; - dest[2] = palookupoffse[2][bufplce[2][(place=vplce[2])>>bits]]; vplce[2] = place+vince[2]; - dest[3] = palookupoffse[3][bufplce[3][(place=vplce[3])>>bits]]; vplce[3] = place+vince[3]; - dest += dc_pitch; - } while (--count); -} -#else -// Optimized version for 64 bit. In 64 bit mode, accessing global variables is very expensive so even though -// this exceeds the register count, loading all those values into a local variable is faster than not loading all of them. -void vlinec4() -{ - BYTE *dest = dc_dest; - int count = dc_count; - int bits = vlinebits; - DWORD place; - auto pal0 = palookupoffse[0]; - auto pal1 = palookupoffse[1]; - auto pal2 = palookupoffse[2]; - auto pal3 = palookupoffse[3]; - auto buf0 = bufplce[0]; - auto buf1 = bufplce[1]; - auto buf2 = bufplce[2]; - auto buf3 = bufplce[3]; - const auto vince0 = vince[0]; - const auto vince1 = vince[1]; - const auto vince2 = vince[2]; - const auto vince3 = vince[3]; - auto vplce0 = vplce[0]; - auto vplce1 = vplce[1]; - auto vplce2 = vplce[2]; - auto vplce3 = vplce[3]; - - do - { - dest[0] = pal0[buf0[(place = vplce0) >> bits]]; vplce0 = place + vince0; - dest[1] = pal1[buf1[(place = vplce1) >> bits]]; vplce1 = place + vince1; - dest[2] = pal2[buf2[(place = vplce2) >> bits]]; vplce2 = place + vince2; - dest[3] = pal3[buf3[(place = vplce3) >> bits]]; vplce3 = place + vince3; - dest += dc_pitch; - } while (--count); -} -#endif - -#endif - -void setupmvline (int fracbits) -{ -#if defined(X86_ASM) - setupmvlineasm (fracbits); - domvline1 = mvlineasm1; - domvline4 = mvlineasm4; -#else - mvlinebits = fracbits; -#endif -} - -#if !defined(X86_ASM) -DWORD mvlinec1 () -{ - DWORD fracstep = dc_iscale; - DWORD frac = dc_texturefrac; - BYTE *colormap = dc_colormap; - int count = dc_count; - const BYTE *source = dc_source; - BYTE *dest = dc_dest; - int bits = mvlinebits; - int pitch = dc_pitch; - - do - { - BYTE pix = source[frac>>bits]; - if (pix != 0) - { - *dest = colormap[pix]; - } - frac += fracstep; - dest += pitch; - } while (--count); - - return frac; -} - -void mvlinec4 () -{ - BYTE *dest = dc_dest; - int count = dc_count; - int bits = mvlinebits; - DWORD place; - - do - { - BYTE pix; - - pix = bufplce[0][(place=vplce[0])>>bits]; if(pix) dest[0] = palookupoffse[0][pix]; vplce[0] = place+vince[0]; - pix = bufplce[1][(place=vplce[1])>>bits]; if(pix) dest[1] = palookupoffse[1][pix]; vplce[1] = place+vince[1]; - pix = bufplce[2][(place=vplce[2])>>bits]; if(pix) dest[2] = palookupoffse[2][pix]; vplce[2] = place+vince[2]; - pix = bufplce[3][(place=vplce[3])>>bits]; if(pix) dest[3] = palookupoffse[3][pix]; vplce[3] = place+vince[3]; - dest += dc_pitch; - } while (--count); -} -#endif - -extern "C" short spanend[MAXHEIGHT]; -extern float rw_light; -extern float rw_lightstep; -extern int wallshade; - -static void R_DrawFogBoundarySection (int y, int y2, int x1) -{ - BYTE *colormap = dc_colormap; - BYTE *dest = ylookup[y] + dc_destorg; - - for (; y < y2; ++y) - { - int x2 = spanend[y]; - int x = x1; - do - { - dest[x] = colormap[dest[x]]; - } while (++x <= x2); - dest += dc_pitch; - } -} - -static void R_DrawFogBoundaryLine (int y, int x) -{ - int x2 = spanend[y]; - BYTE *colormap = dc_colormap; - BYTE *dest = ylookup[y] + dc_destorg; - do - { - dest[x] = colormap[dest[x]]; - } while (++x <= x2); -} - -void R_DrawFogBoundary (int x1, int x2, short *uclip, short *dclip) -{ - // This is essentially the same as R_MapVisPlane but with an extra step - // to create new horizontal spans whenever the light changes enough that - // we need to use a new colormap. - - double lightstep = rw_lightstep; - double light = rw_light + rw_lightstep*(x2-x1-1); - int x = x2-1; - int t2 = uclip[x]; - int b2 = dclip[x]; - int rcolormap = GETPALOOKUP(light, wallshade); - int lcolormap; - BYTE *basecolormapdata = basecolormap->Maps; - - if (b2 > t2) - { - clearbufshort (spanend+t2, b2-t2, x); - } - - dc_colormap = basecolormapdata + (rcolormap << COLORMAPSHIFT); - - for (--x; x >= x1; --x) - { - int t1 = uclip[x]; - int b1 = dclip[x]; - const int xr = x+1; - int stop; - - light -= rw_lightstep; - lcolormap = GETPALOOKUP(light, wallshade); - if (lcolormap != rcolormap) - { - if (t2 < b2 && rcolormap != 0) - { // Colormap 0 is always the identity map, so rendering it is - // just a waste of time. - R_DrawFogBoundarySection (t2, b2, xr); - } - if (t1 < t2) t2 = t1; - if (b1 > b2) b2 = b1; - if (t2 < b2) - { - clearbufshort (spanend+t2, b2-t2, x); - } - rcolormap = lcolormap; - dc_colormap = basecolormapdata + (lcolormap << COLORMAPSHIFT); - } - else - { - if (dc_colormap != basecolormapdata) - { - stop = MIN (t1, b2); - while (t2 < stop) - { - R_DrawFogBoundaryLine (t2++, xr); + uint8_t v = (((k + 2) * a) + 256) >> 14; + table[k] = MIN(v, 64); } - stop = MAX (b1, t2); - while (b2 > stop) + table += 256; + } + } + for (i = 0; i < NUMCOLORMAPS * 16 * 256; ++i) + { + assert(shadetables[i] <= 64); + } + + // Set up a guaranteed identity map + for (i = 0; i < 256; ++i) + { + identitymap[i] = i; + } + } + + void R_InitFuzzTable(int fuzzoff) + { + /* + FUZZOFF,-FUZZOFF,FUZZOFF,-FUZZOFF,FUZZOFF,FUZZOFF,-FUZZOFF, + FUZZOFF,FUZZOFF,-FUZZOFF,FUZZOFF,FUZZOFF,FUZZOFF,-FUZZOFF, + FUZZOFF,FUZZOFF,FUZZOFF,-FUZZOFF,-FUZZOFF,-FUZZOFF,-FUZZOFF, + FUZZOFF,-FUZZOFF,-FUZZOFF,FUZZOFF,FUZZOFF,FUZZOFF,FUZZOFF,-FUZZOFF, + FUZZOFF,-FUZZOFF,FUZZOFF,FUZZOFF,-FUZZOFF,-FUZZOFF,FUZZOFF, + FUZZOFF,-FUZZOFF,-FUZZOFF,-FUZZOFF,-FUZZOFF,FUZZOFF,FUZZOFF, + FUZZOFF,FUZZOFF,-FUZZOFF,FUZZOFF,FUZZOFF,-FUZZOFF,FUZZOFF + */ + + static const int8_t fuzzinit[FUZZTABLE] = { + 1,-1, 1,-1, 1, 1,-1, + 1, 1,-1, 1, 1, 1,-1, + 1, 1, 1,-1,-1,-1,-1, + 1,-1,-1, 1, 1, 1, 1,-1, + 1,-1, 1, 1,-1,-1, 1, + 1,-1,-1,-1,-1, 1, 1, + 1, 1,-1, 1, 1,-1, 1 + }; + + for (int i = 0; i < FUZZTABLE; i++) + { + fuzzoffset[i] = fuzzinit[i] * fuzzoff; + } + } + + namespace + { + bool R_SetBlendFunc(int op, fixed_t fglevel, fixed_t bglevel, int flags) + { + using namespace drawerargs; + + // r_drawtrans is a seriously bad thing to turn off. I wonder if I should + // just remove it completely. + if (!r_drawtrans || (op == STYLEOP_Add && fglevel == FRACUNIT && bglevel == 0 && !(flags & STYLEF_InvertSource))) + { + if (flags & STYLEF_ColorIsFixed) { - R_DrawFogBoundaryLine (--b2, xr); + colfunc = R_FillColumn; + hcolfunc_post1 = rt_copy1col; + hcolfunc_post4 = rt_copy4cols; + } + else if (dc_translation == NULL) + { + colfunc = basecolfunc; + hcolfunc_post1 = rt_map1col; + hcolfunc_post4 = rt_map4cols; + } + else + { + colfunc = transcolfunc; + hcolfunc_post1 = rt_tlate1col; + hcolfunc_post4 = rt_tlate4cols; + } + return true; + } + if (flags & STYLEF_InvertSource) + { + dc_srcblend = Col2RGB8_Inverse[fglevel >> 10]; + dc_destblend = Col2RGB8_LessPrecision[bglevel >> 10]; + dc_srcalpha = fglevel; + dc_destalpha = bglevel; + } + else if (op == STYLEOP_Add && fglevel + bglevel <= FRACUNIT) + { + dc_srcblend = Col2RGB8[fglevel >> 10]; + dc_destblend = Col2RGB8[bglevel >> 10]; + dc_srcalpha = fglevel; + dc_destalpha = bglevel; + } + else + { + dc_srcblend = Col2RGB8_LessPrecision[fglevel >> 10]; + dc_destblend = Col2RGB8_LessPrecision[bglevel >> 10]; + dc_srcalpha = fglevel; + dc_destalpha = bglevel; + } + switch (op) + { + case STYLEOP_Add: + if (fglevel == 0 && bglevel == FRACUNIT) + { + return false; + } + if (fglevel + bglevel <= FRACUNIT) + { // Colors won't overflow when added + if (flags & STYLEF_ColorIsFixed) + { + colfunc = R_FillAddColumn; + hcolfunc_post1 = rt_add1col; + hcolfunc_post4 = rt_add4cols; + } + else if (dc_translation == NULL) + { + colfunc = R_DrawAddColumn; + hcolfunc_post1 = rt_add1col; + hcolfunc_post4 = rt_add4cols; + } + else + { + colfunc = R_DrawTlatedAddColumn; + hcolfunc_post1 = rt_tlateadd1col; + hcolfunc_post4 = rt_tlateadd4cols; + } + } + else + { // Colors might overflow when added + if (flags & STYLEF_ColorIsFixed) + { + colfunc = R_FillAddClampColumn; + hcolfunc_post1 = rt_addclamp1col; + hcolfunc_post4 = rt_addclamp4cols; + } + else if (dc_translation == NULL) + { + colfunc = R_DrawAddClampColumn; + hcolfunc_post1 = rt_addclamp1col; + hcolfunc_post4 = rt_addclamp4cols; + } + else + { + colfunc = R_DrawAddClampTranslatedColumn; + hcolfunc_post1 = rt_tlateaddclamp1col; + hcolfunc_post4 = rt_tlateaddclamp4cols; + } + } + return true; + + case STYLEOP_Sub: + if (flags & STYLEF_ColorIsFixed) + { + colfunc = R_FillSubClampColumn; + hcolfunc_post1 = rt_subclamp1col; + hcolfunc_post4 = rt_subclamp4cols; + } + else if (dc_translation == NULL) + { + colfunc = R_DrawSubClampColumn; + hcolfunc_post1 = rt_subclamp1col; + hcolfunc_post4 = rt_subclamp4cols; + } + else + { + colfunc = R_DrawSubClampTranslatedColumn; + hcolfunc_post1 = rt_tlatesubclamp1col; + hcolfunc_post4 = rt_tlatesubclamp4cols; + } + return true; + + case STYLEOP_RevSub: + if (fglevel == 0 && bglevel == FRACUNIT) + { + return false; + } + if (flags & STYLEF_ColorIsFixed) + { + colfunc = R_FillRevSubClampColumn; + hcolfunc_post1 = rt_subclamp1col; + hcolfunc_post4 = rt_subclamp4cols; + } + else if (dc_translation == NULL) + { + colfunc = R_DrawRevSubClampColumn; + hcolfunc_post1 = rt_revsubclamp1col; + hcolfunc_post4 = rt_revsubclamp4cols; + } + else + { + colfunc = R_DrawRevSubClampTranslatedColumn; + hcolfunc_post1 = rt_tlaterevsubclamp1col; + hcolfunc_post4 = rt_tlaterevsubclamp4cols; + } + return true; + + default: + return false; + } + } + + fixed_t GetAlpha(int type, fixed_t alpha) + { + switch (type) + { + case STYLEALPHA_Zero: return 0; + case STYLEALPHA_One: return OPAQUE; + case STYLEALPHA_Src: return alpha; + case STYLEALPHA_InvSrc: return OPAQUE - alpha; + default: return 0; + } + } + + FDynamicColormap *basecolormapsave; + } + + ESPSResult R_SetPatchStyle(FRenderStyle style, fixed_t alpha, int translation, uint32_t color) + { + using namespace drawerargs; + + fixed_t fglevel, bglevel; + + style.CheckFuzz(); + + if (style.BlendOp == STYLEOP_Shadow) + { + style = LegacyRenderStyles[STYLE_TranslucentStencil]; + alpha = TRANSLUC33; + color = 0; + } + + if (style.Flags & STYLEF_TransSoulsAlpha) + { + alpha = fixed_t(transsouls * OPAQUE); + } + else if (style.Flags & STYLEF_Alpha1) + { + alpha = FRACUNIT; + } + else + { + alpha = clamp(alpha, 0, OPAQUE); + } + + if (translation != -1) + { + dc_translation = NULL; + if (translation != 0) + { + FRemapTable *table = TranslationToTable(translation); + if (table != NULL && !table->Inactive) + { + dc_translation = table->Remap; } } - else + } + basecolormapsave = basecolormap; + hcolfunc_pre = R_DrawColumnHoriz; + + // Check for special modes + if (style.BlendOp == STYLEOP_Fuzz) + { + colfunc = fuzzcolfunc; + return DoDraw0; + } + else if (style == LegacyRenderStyles[STYLE_Shaded]) + { + // Shaded drawer only gets 16 levels of alpha because it saves memory. + if ((alpha >>= 12) == 0) + return DontDraw; + colfunc = R_DrawShadedColumn; + hcolfunc_post1 = rt_shaded1col; + hcolfunc_post4 = rt_shaded4cols; + dc_color = fixedcolormap ? fixedcolormap[APART(color)] : basecolormap->Maps[APART(color)]; + dc_colormap = (basecolormap = &ShadeFakeColormap[16 - alpha])->Maps; + if (fixedlightlev >= 0 && fixedcolormap == NULL) { - t2 = MAX (t2, MIN (t1, b2)); - b2 = MIN (b2, MAX (b1, t2)); + dc_colormap += fixedlightlev; } + return r_columnmethod ? DoDraw1 : DoDraw0; + } - stop = MIN (t2, b1); - while (t1 < stop) + fglevel = GetAlpha(style.SrcAlpha, alpha); + bglevel = GetAlpha(style.DestAlpha, alpha); + + if (style.Flags & STYLEF_ColorIsFixed) + { + uint32_t x = fglevel >> 10; + uint32_t r = RPART(color); + uint32_t g = GPART(color); + uint32_t b = BPART(color); + // dc_color is used by the rt_* routines. It is indexed into dc_srcblend. + dc_color = RGB32k.RGB[r >> 3][g >> 3][b >> 3]; + if (style.Flags & STYLEF_InvertSource) { - spanend[t1++] = x; - } - stop = MAX (b2, t2); - while (b1 > stop) - { - spanend[--b1] = x; + r = 255 - r; + g = 255 - g; + b = 255 - b; } + uint32_t alpha = clamp(fglevel >> (FRACBITS - 8), 0, 255); + dc_srccolor_bgra = (alpha << 24) | (r << 16) | (g << 8) | b; + // dc_srccolor is used by the R_Fill* routines. It is premultiplied + // with the alpha. + dc_srccolor = ((((r*x) >> 4) << 20) | ((g*x) >> 4) | ((((b)*x) >> 4) << 10)) & 0x3feffbff; + hcolfunc_pre = R_FillColumnHoriz; + R_SetColorMapLight(identitycolormap.Maps, 0, 0); } - t2 = uclip[x]; - b2 = dclip[x]; - } - if (t2 < b2 && rcolormap != 0) - { - R_DrawFogBoundarySection (t2, b2, x1); - } -} - -int tmvlinebits; - -void setuptmvline (int bits) -{ - tmvlinebits = bits; -} - -fixed_t tmvline1_add () -{ - DWORD fracstep = dc_iscale; - DWORD frac = dc_texturefrac; - BYTE *colormap = dc_colormap; - int count = dc_count; - const BYTE *source = dc_source; - BYTE *dest = dc_dest; - int bits = tmvlinebits; - int pitch = dc_pitch; - - DWORD *fg2rgb = dc_srcblend; - DWORD *bg2rgb = dc_destblend; - - do - { - BYTE pix = source[frac>>bits]; - if (pix != 0) + if (!R_SetBlendFunc(style.BlendOp, fglevel, bglevel, style.Flags)) { - DWORD fg = fg2rgb[colormap[pix]]; - DWORD bg = bg2rgb[*dest]; - fg = (fg+bg) | 0x1f07c1f; - *dest = RGB32k.All[fg & (fg>>15)]; - } - frac += fracstep; - dest += pitch; - } while (--count); - - return frac; -} - -void tmvline4_add () -{ - BYTE *dest = dc_dest; - int count = dc_count; - int bits = tmvlinebits; - - DWORD *fg2rgb = dc_srcblend; - DWORD *bg2rgb = dc_destblend; - - do - { - for (int i = 0; i < 4; ++i) - { - BYTE pix = bufplce[i][vplce[i] >> bits]; - if (pix != 0) - { - DWORD fg = fg2rgb[palookupoffse[i][pix]]; - DWORD bg = bg2rgb[dest[i]]; - fg = (fg+bg) | 0x1f07c1f; - dest[i] = RGB32k.All[fg & (fg>>15)]; - } - vplce[i] += vince[i]; - } - dest += dc_pitch; - } while (--count); -} - -fixed_t tmvline1_addclamp () -{ - DWORD fracstep = dc_iscale; - DWORD frac = dc_texturefrac; - BYTE *colormap = dc_colormap; - int count = dc_count; - const BYTE *source = dc_source; - BYTE *dest = dc_dest; - int bits = tmvlinebits; - int pitch = dc_pitch; - - DWORD *fg2rgb = dc_srcblend; - DWORD *bg2rgb = dc_destblend; - - do - { - BYTE pix = source[frac>>bits]; - if (pix != 0) - { - DWORD a = fg2rgb[colormap[pix]] + bg2rgb[*dest]; - DWORD b = a; - - a |= 0x01f07c1f; - b &= 0x40100400; - a &= 0x3fffffff; - b = b - (b >> 5); - a |= b; - *dest = RGB32k.All[a & (a>>15)]; - } - frac += fracstep; - dest += pitch; - } while (--count); - - return frac; -} - -void tmvline4_addclamp () -{ - BYTE *dest = dc_dest; - int count = dc_count; - int bits = tmvlinebits; - - DWORD *fg2rgb = dc_srcblend; - DWORD *bg2rgb = dc_destblend; - - do - { - for (int i = 0; i < 4; ++i) - { - BYTE pix = bufplce[i][vplce[i] >> bits]; - if (pix != 0) - { - DWORD a = fg2rgb[palookupoffse[i][pix]] + bg2rgb[dest[i]]; - DWORD b = a; - - a |= 0x01f07c1f; - b &= 0x40100400; - a &= 0x3fffffff; - b = b - (b >> 5); - a |= b; - dest[i] = RGB32k.All[a & (a>>15)]; - } - vplce[i] += vince[i]; - } - dest += dc_pitch; - } while (--count); -} - -fixed_t tmvline1_subclamp () -{ - DWORD fracstep = dc_iscale; - DWORD frac = dc_texturefrac; - BYTE *colormap = dc_colormap; - int count = dc_count; - const BYTE *source = dc_source; - BYTE *dest = dc_dest; - int bits = tmvlinebits; - int pitch = dc_pitch; - - DWORD *fg2rgb = dc_srcblend; - DWORD *bg2rgb = dc_destblend; - - do - { - BYTE pix = source[frac>>bits]; - if (pix != 0) - { - DWORD a = (fg2rgb[colormap[pix]] | 0x40100400) - bg2rgb[*dest]; - DWORD b = a; - - b &= 0x40100400; - b = b - (b >> 5); - a &= b; - a |= 0x01f07c1f; - *dest = RGB32k.All[a & (a>>15)]; - } - frac += fracstep; - dest += pitch; - } while (--count); - - return frac; -} - -void tmvline4_subclamp () -{ - BYTE *dest = dc_dest; - int count = dc_count; - int bits = tmvlinebits; - - DWORD *fg2rgb = dc_srcblend; - DWORD *bg2rgb = dc_destblend; - - do - { - for (int i = 0; i < 4; ++i) - { - BYTE pix = bufplce[i][vplce[i] >> bits]; - if (pix != 0) - { - DWORD a = (fg2rgb[palookupoffse[i][pix]] | 0x40100400) - bg2rgb[dest[i]]; - DWORD b = a; - - b &= 0x40100400; - b = b - (b >> 5); - a &= b; - a |= 0x01f07c1f; - dest[i] = RGB32k.All[a & (a>>15)]; - } - vplce[i] += vince[i]; - } - dest += dc_pitch; - } while (--count); -} - -fixed_t tmvline1_revsubclamp () -{ - DWORD fracstep = dc_iscale; - DWORD frac = dc_texturefrac; - BYTE *colormap = dc_colormap; - int count = dc_count; - const BYTE *source = dc_source; - BYTE *dest = dc_dest; - int bits = tmvlinebits; - int pitch = dc_pitch; - - DWORD *fg2rgb = dc_srcblend; - DWORD *bg2rgb = dc_destblend; - - do - { - BYTE pix = source[frac>>bits]; - if (pix != 0) - { - DWORD a = (bg2rgb[*dest] | 0x40100400) - fg2rgb[colormap[pix]]; - DWORD b = a; - - b &= 0x40100400; - b = b - (b >> 5); - a &= b; - a |= 0x01f07c1f; - *dest = RGB32k.All[a & (a>>15)]; - } - frac += fracstep; - dest += pitch; - } while (--count); - - return frac; -} - -void tmvline4_revsubclamp () -{ - BYTE *dest = dc_dest; - int count = dc_count; - int bits = tmvlinebits; - - DWORD *fg2rgb = dc_srcblend; - DWORD *bg2rgb = dc_destblend; - - do - { - for (int i = 0; i < 4; ++i) - { - BYTE pix = bufplce[i][vplce[i] >> bits]; - if (pix != 0) - { - DWORD a = (bg2rgb[dest[i]] | 0x40100400) - fg2rgb[palookupoffse[i][pix]]; - DWORD b = a; - - b &= 0x40100400; - b = b - (b >> 5); - a &= b; - a |= 0x01f07c1f; - dest[i] = RGB32k.All[a & (a>>15)]; - } - vplce[i] += vince[i]; - } - dest += dc_pitch; - } while (--count); -} - -void R_DrawSingleSkyCol1(uint32_t solid_top, uint32_t solid_bottom) -{ - uint8_t *dest = dc_dest; - int count = dc_count; - int pitch = dc_pitch; - const uint8_t *source0 = bufplce[0]; - int textureheight0 = bufheight[0]; - - int32_t frac = vplce[0]; - int32_t fracstep = vince[0]; - - int start_fade = 2; // How fast it should fade out - - int solid_top_r = RPART(solid_top); - int solid_top_g = GPART(solid_top); - int solid_top_b = BPART(solid_top); - int solid_bottom_r = RPART(solid_bottom); - int solid_bottom_g = GPART(solid_bottom); - int solid_bottom_b = BPART(solid_bottom); - - for (int index = 0; index < count; index++) - { - uint32_t sample_index = (((((uint32_t)frac) << 8) >> FRACBITS) * textureheight0) >> FRACBITS; - uint8_t fg = source0[sample_index]; - - int alpha_top = MAX(MIN(frac >> (16 - start_fade), 256), 0); - int alpha_bottom = MAX(MIN(((2 << 24) - frac) >> (16 - start_fade), 256), 0); - - if (alpha_top == 256 && alpha_bottom == 256) - { - *dest = fg; - } - else - { - int inv_alpha_top = 256 - alpha_top; - int inv_alpha_bottom = 256 - alpha_bottom; - - const auto &c = GPalette.BaseColors[fg]; - int c_red = c.r; - int c_green = c.g; - int c_blue = c.b; - c_red = (c_red * alpha_top + solid_top_r * inv_alpha_top) >> 8; - c_green = (c_green * alpha_top + solid_top_g * inv_alpha_top) >> 8; - c_blue = (c_blue * alpha_top + solid_top_b * inv_alpha_top) >> 8; - c_red = (c_red * alpha_bottom + solid_bottom_r * inv_alpha_bottom) >> 8; - c_green = (c_green * alpha_bottom + solid_bottom_g * inv_alpha_bottom) >> 8; - c_blue = (c_blue * alpha_bottom + solid_bottom_b * inv_alpha_bottom) >> 8; - *dest = RGB32k.RGB[(c_red >> 3)][(c_green >> 3)][(c_blue >> 3)]; - } - - frac += fracstep; - dest += pitch; - } -} - -void R_DrawSingleSkyCol4(uint32_t solid_top, uint32_t solid_bottom) -{ - uint8_t *dest = dc_dest; - int count = dc_count; - int pitch = dc_pitch; - const uint8_t *source0[4] = { bufplce[0], bufplce[1], bufplce[2], bufplce[3] }; - int textureheight0 = bufheight[0]; - const uint32_t *palette = (const uint32_t *)GPalette.BaseColors; - int32_t frac[4] = { (int32_t)vplce[0], (int32_t)vplce[1], (int32_t)vplce[2], (int32_t)vplce[3] }; - int32_t fracstep[4] = { (int32_t)vince[0], (int32_t)vince[1], (int32_t)vince[2], (int32_t)vince[3] }; - uint8_t output[4]; - - int start_fade = 2; // How fast it should fade out - - int solid_top_r = RPART(solid_top); - int solid_top_g = GPART(solid_top); - int solid_top_b = BPART(solid_top); - int solid_bottom_r = RPART(solid_bottom); - int solid_bottom_g = GPART(solid_bottom); - int solid_bottom_b = BPART(solid_bottom); - uint32_t solid_top_fill = RGB32k.RGB[(solid_top_r >> 3)][(solid_top_g >> 3)][(solid_top_b >> 3)]; - uint32_t solid_bottom_fill = RGB32k.RGB[(solid_bottom_r >> 3)][(solid_bottom_g >> 3)][(solid_bottom_b >> 3)]; - solid_top_fill = (solid_top_fill << 24) | (solid_top_fill << 16) | (solid_top_fill << 8) | solid_top_fill; - solid_bottom_fill = (solid_bottom_fill << 24) | (solid_bottom_fill << 16) | (solid_bottom_fill << 8) | solid_bottom_fill; - - // Find bands for top solid color, top fade, center textured, bottom fade, bottom solid color: - int fade_length = (1 << (24 - start_fade)); - int start_fadetop_y = (-frac[0]) / fracstep[0]; - int end_fadetop_y = (fade_length - frac[0]) / fracstep[0]; - int start_fadebottom_y = ((2 << 24) - fade_length - frac[0]) / fracstep[0]; - int end_fadebottom_y = ((2 << 24) - frac[0]) / fracstep[0]; - for (int col = 1; col < 4; col++) - { - start_fadetop_y = MIN(start_fadetop_y, (-frac[0]) / fracstep[0]); - end_fadetop_y = MAX(end_fadetop_y, (fade_length - frac[0]) / fracstep[0]); - start_fadebottom_y = MIN(start_fadebottom_y, ((2 << 24) - fade_length - frac[0]) / fracstep[0]); - end_fadebottom_y = MAX(end_fadebottom_y, ((2 << 24) - frac[0]) / fracstep[0]); - } - start_fadetop_y = clamp(start_fadetop_y, 0, count); - end_fadetop_y = clamp(end_fadetop_y, 0, count); - start_fadebottom_y = clamp(start_fadebottom_y, 0, count); - end_fadebottom_y = clamp(end_fadebottom_y, 0, count); - - // Top solid color: - for (int index = 0; index < start_fadetop_y; index++) - { - *((uint32_t*)dest) = solid_top_fill; - dest += pitch; - for (int col = 0; col < 4; col++) - frac[col] += fracstep[col]; - } - - // Top fade: - for (int index = start_fadetop_y; index < end_fadetop_y; index++) - { - for (int col = 0; col < 4; col++) - { - uint32_t sample_index = (((((uint32_t)frac[col]) << 8) >> FRACBITS) * textureheight0) >> FRACBITS; - uint8_t fg = source0[col][sample_index]; - - uint32_t c = palette[fg]; - int alpha_top = MAX(MIN(frac[col] >> (16 - start_fade), 256), 0); - int inv_alpha_top = 256 - alpha_top; - int c_red = RPART(c); - int c_green = GPART(c); - int c_blue = BPART(c); - c_red = (c_red * alpha_top + solid_top_r * inv_alpha_top) >> 8; - c_green = (c_green * alpha_top + solid_top_g * inv_alpha_top) >> 8; - c_blue = (c_blue * alpha_top + solid_top_b * inv_alpha_top) >> 8; - output[col] = RGB32k.RGB[(c_red >> 3)][(c_green >> 3)][(c_blue >> 3)]; - - frac[col] += fracstep[col]; - } - *((uint32_t*)dest) = *((uint32_t*)output); - dest += pitch; - } - - // Textured center: - for (int index = end_fadetop_y; index < start_fadebottom_y; index++) - { - for (int col = 0; col < 4; col++) - { - uint32_t sample_index = (((((uint32_t)frac[col]) << 8) >> FRACBITS) * textureheight0) >> FRACBITS; - output[col] = source0[col][sample_index]; - - frac[col] += fracstep[col]; - } - - *((uint32_t*)dest) = *((uint32_t*)output); - dest += pitch; - } - - // Fade bottom: - for (int index = start_fadebottom_y; index < end_fadebottom_y; index++) - { - for (int col = 0; col < 4; col++) - { - uint32_t sample_index = (((((uint32_t)frac[col]) << 8) >> FRACBITS) * textureheight0) >> FRACBITS; - uint8_t fg = source0[col][sample_index]; - - uint32_t c = palette[fg]; - int alpha_bottom = MAX(MIN(((2 << 24) - frac[col]) >> (16 - start_fade), 256), 0); - int inv_alpha_bottom = 256 - alpha_bottom; - int c_red = RPART(c); - int c_green = GPART(c); - int c_blue = BPART(c); - c_red = (c_red * alpha_bottom + solid_bottom_r * inv_alpha_bottom) >> 8; - c_green = (c_green * alpha_bottom + solid_bottom_g * inv_alpha_bottom) >> 8; - c_blue = (c_blue * alpha_bottom + solid_bottom_b * inv_alpha_bottom) >> 8; - output[col] = RGB32k.RGB[(c_red >> 3)][(c_green >> 3)][(c_blue >> 3)]; - - frac[col] += fracstep[col]; - } - *((uint32_t*)dest) = *((uint32_t*)output); - dest += pitch; - } - - // Bottom solid color: - for (int index = end_fadebottom_y; index < count; index++) - { - *((uint32_t*)dest) = solid_bottom_fill; - dest += pitch; - } -} - -void R_DrawDoubleSkyCol1(uint32_t solid_top, uint32_t solid_bottom) -{ - uint8_t *dest = dc_dest; - int count = dc_count; - int pitch = dc_pitch; - const uint8_t *source0 = bufplce[0]; - const uint8_t *source1 = bufplce2[0]; - int textureheight0 = bufheight[0]; - uint32_t maxtextureheight1 = bufheight[1] - 1; - - int32_t frac = vplce[0]; - int32_t fracstep = vince[0]; - - int start_fade = 2; // How fast it should fade out - - int solid_top_r = RPART(solid_top); - int solid_top_g = GPART(solid_top); - int solid_top_b = BPART(solid_top); - int solid_bottom_r = RPART(solid_bottom); - int solid_bottom_g = GPART(solid_bottom); - int solid_bottom_b = BPART(solid_bottom); - - for (int index = 0; index < count; index++) - { - uint32_t sample_index = (((((uint32_t)frac) << 8) >> FRACBITS) * textureheight0) >> FRACBITS; - uint8_t fg = source0[sample_index]; - if (fg == 0) - { - uint32_t sample_index2 = MIN(sample_index, maxtextureheight1); - fg = source1[sample_index2]; - } - - int alpha_top = MAX(MIN(frac >> (16 - start_fade), 256), 0); - int alpha_bottom = MAX(MIN(((2 << 24) - frac) >> (16 - start_fade), 256), 0); - - if (alpha_top == 256 && alpha_bottom == 256) - { - *dest = fg; - } - else - { - int inv_alpha_top = 256 - alpha_top; - int inv_alpha_bottom = 256 - alpha_bottom; - - const auto &c = GPalette.BaseColors[fg]; - int c_red = c.r; - int c_green = c.g; - int c_blue = c.b; - c_red = (c_red * alpha_top + solid_top_r * inv_alpha_top) >> 8; - c_green = (c_green * alpha_top + solid_top_g * inv_alpha_top) >> 8; - c_blue = (c_blue * alpha_top + solid_top_b * inv_alpha_top) >> 8; - c_red = (c_red * alpha_bottom + solid_bottom_r * inv_alpha_bottom) >> 8; - c_green = (c_green * alpha_bottom + solid_bottom_g * inv_alpha_bottom) >> 8; - c_blue = (c_blue * alpha_bottom + solid_bottom_b * inv_alpha_bottom) >> 8; - *dest = RGB32k.RGB[(c_red >> 3)][(c_green >> 3)][(c_blue >> 3)]; - } - - frac += fracstep; - dest += pitch; - } -} - -void R_DrawDoubleSkyCol4(uint32_t solid_top, uint32_t solid_bottom) -{ - uint8_t *dest = dc_dest; - int count = dc_count; - int pitch = dc_pitch; - const uint8_t *source0[4] = { bufplce[0], bufplce[1], bufplce[2], bufplce[3] }; - const uint8_t *source1[4] = { bufplce2[0], bufplce2[1], bufplce2[2], bufplce2[3] }; - int textureheight0 = bufheight[0]; - uint32_t maxtextureheight1 = bufheight[1] - 1; - const uint32_t *palette = (const uint32_t *)GPalette.BaseColors; - int32_t frac[4] = { (int32_t)vplce[0], (int32_t)vplce[1], (int32_t)vplce[2], (int32_t)vplce[3] }; - int32_t fracstep[4] = { (int32_t)vince[0], (int32_t)vince[1], (int32_t)vince[2], (int32_t)vince[3] }; - uint8_t output[4]; - - int start_fade = 2; // How fast it should fade out - - int solid_top_r = RPART(solid_top); - int solid_top_g = GPART(solid_top); - int solid_top_b = BPART(solid_top); - int solid_bottom_r = RPART(solid_bottom); - int solid_bottom_g = GPART(solid_bottom); - int solid_bottom_b = BPART(solid_bottom); - uint32_t solid_top_fill = RGB32k.RGB[(solid_top_r >> 3)][(solid_top_g >> 3)][(solid_top_b >> 3)]; - uint32_t solid_bottom_fill = RGB32k.RGB[(solid_bottom_r >> 3)][(solid_bottom_g >> 3)][(solid_bottom_b >> 3)]; - solid_top_fill = (solid_top_fill << 24) | (solid_top_fill << 16) | (solid_top_fill << 8) | solid_top_fill; - solid_bottom_fill = (solid_bottom_fill << 24) | (solid_bottom_fill << 16) | (solid_bottom_fill << 8) | solid_bottom_fill; - - // Find bands for top solid color, top fade, center textured, bottom fade, bottom solid color: - int fade_length = (1 << (24 - start_fade)); - int start_fadetop_y = (-frac[0]) / fracstep[0]; - int end_fadetop_y = (fade_length - frac[0]) / fracstep[0]; - int start_fadebottom_y = ((2 << 24) - fade_length - frac[0]) / fracstep[0]; - int end_fadebottom_y = ((2 << 24) - frac[0]) / fracstep[0]; - for (int col = 1; col < 4; col++) - { - start_fadetop_y = MIN(start_fadetop_y, (-frac[0]) / fracstep[0]); - end_fadetop_y = MAX(end_fadetop_y, (fade_length - frac[0]) / fracstep[0]); - start_fadebottom_y = MIN(start_fadebottom_y, ((2 << 24) - fade_length - frac[0]) / fracstep[0]); - end_fadebottom_y = MAX(end_fadebottom_y, ((2 << 24) - frac[0]) / fracstep[0]); - } - start_fadetop_y = clamp(start_fadetop_y, 0, count); - end_fadetop_y = clamp(end_fadetop_y, 0, count); - start_fadebottom_y = clamp(start_fadebottom_y, 0, count); - end_fadebottom_y = clamp(end_fadebottom_y, 0, count); - - // Top solid color: - for (int index = 0; index < start_fadetop_y; index++) - { - *((uint32_t*)dest) = solid_top_fill; - dest += pitch; - for (int col = 0; col < 4; col++) - frac[col] += fracstep[col]; - } - - // Top fade: - for (int index = start_fadetop_y; index < end_fadetop_y; index++) - { - for (int col = 0; col < 4; col++) - { - uint32_t sample_index = (((((uint32_t)frac[col]) << 8) >> FRACBITS) * textureheight0) >> FRACBITS; - uint8_t fg = source0[col][sample_index]; - if (fg == 0) - { - uint32_t sample_index2 = MIN(sample_index, maxtextureheight1); - fg = source1[col][sample_index2]; - } - output[col] = fg; - - uint32_t c = palette[fg]; - int alpha_top = MAX(MIN(frac[col] >> (16 - start_fade), 256), 0); - int inv_alpha_top = 256 - alpha_top; - int c_red = RPART(c); - int c_green = GPART(c); - int c_blue = BPART(c); - c_red = (c_red * alpha_top + solid_top_r * inv_alpha_top) >> 8; - c_green = (c_green * alpha_top + solid_top_g * inv_alpha_top) >> 8; - c_blue = (c_blue * alpha_top + solid_top_b * inv_alpha_top) >> 8; - output[col] = RGB32k.RGB[(c_red >> 3)][(c_green >> 3)][(c_blue >> 3)]; - - frac[col] += fracstep[col]; - } - *((uint32_t*)dest) = *((uint32_t*)output); - dest += pitch; - } - - // Textured center: - for (int index = end_fadetop_y; index < start_fadebottom_y; index++) - { - for (int col = 0; col < 4; col++) - { - uint32_t sample_index = (((((uint32_t)frac[col]) << 8) >> FRACBITS) * textureheight0) >> FRACBITS; - uint8_t fg = source0[col][sample_index]; - if (fg == 0) - { - uint32_t sample_index2 = MIN(sample_index, maxtextureheight1); - fg = source1[col][sample_index2]; - } - output[col] = fg; - - frac[col] += fracstep[col]; - } - - *((uint32_t*)dest) = *((uint32_t*)output); - dest += pitch; - } - - // Fade bottom: - for (int index = start_fadebottom_y; index < end_fadebottom_y; index++) - { - for (int col = 0; col < 4; col++) - { - uint32_t sample_index = (((((uint32_t)frac[col]) << 8) >> FRACBITS) * textureheight0) >> FRACBITS; - uint8_t fg = source0[col][sample_index]; - if (fg == 0) - { - uint32_t sample_index2 = MIN(sample_index, maxtextureheight1); - fg = source1[col][sample_index2]; - } - output[col] = fg; - - uint32_t c = palette[fg]; - int alpha_bottom = MAX(MIN(((2 << 24) - frac[col]) >> (16 - start_fade), 256), 0); - int inv_alpha_bottom = 256 - alpha_bottom; - int c_red = RPART(c); - int c_green = GPART(c); - int c_blue = BPART(c); - c_red = (c_red * alpha_bottom + solid_bottom_r * inv_alpha_bottom) >> 8; - c_green = (c_green * alpha_bottom + solid_bottom_g * inv_alpha_bottom) >> 8; - c_blue = (c_blue * alpha_bottom + solid_bottom_b * inv_alpha_bottom) >> 8; - output[col] = RGB32k.RGB[(c_red >> 3)][(c_green >> 3)][(c_blue >> 3)]; - - frac[col] += fracstep[col]; - } - *((uint32_t*)dest) = *((uint32_t*)output); - dest += pitch; - } - - // Bottom solid color: - for (int index = end_fadebottom_y; index < count; index++) - { - *((uint32_t*)dest) = solid_bottom_fill; - dest += pitch; - } -} - -//========================================================================== -// -// R_GetColumn -// -//========================================================================== - -const BYTE *R_GetColumn (FTexture *tex, int col) -{ - int width; - - // If the texture's width isn't a power of 2, then we need to make it a - // positive offset for proper clamping. - if (col < 0 && (width = tex->GetWidth()) != (1 << tex->WidthBits)) - { - col = width + (col % width); - } - return tex->GetColumn (col, NULL); -} - - -// [RH] Initialize the column drawer pointers -void R_InitColumnDrawers () -{ -#ifdef X86_ASM - R_DrawColumnHoriz = R_DrawColumnHorizP_C; - R_DrawTranslatedColumn = R_DrawTranslatedColumnP_C; - R_DrawShadedColumn = R_DrawShadedColumnP_C; - R_DrawSpan = R_DrawSpanP_ASM; - R_DrawSpanMasked = R_DrawSpanMaskedP_ASM; -#else - R_DrawColumnHoriz = R_DrawColumnHorizP_C; - R_DrawTranslatedColumn = R_DrawTranslatedColumnP_C; - R_DrawShadedColumn = R_DrawShadedColumnP_C; - R_DrawSpan = R_DrawSpanP_C; - R_DrawSpanMasked = R_DrawSpanMaskedP_C; -#endif -} - -// [RH] Choose column drawers in a single place -EXTERN_CVAR (Int, r_drawfuzz) -EXTERN_CVAR (Bool, r_drawtrans) -EXTERN_CVAR (Float, transsouls) - -static FDynamicColormap *basecolormapsave; - -static bool R_SetBlendFunc (int op, fixed_t fglevel, fixed_t bglevel, int flags) -{ - // r_drawtrans is a seriously bad thing to turn off. I wonder if I should - // just remove it completely. - if (!r_drawtrans || (op == STYLEOP_Add && fglevel == FRACUNIT && bglevel == 0 && !(flags & STYLEF_InvertSource))) - { - if (flags & STYLEF_ColorIsFixed) - { - colfunc = R_FillColumnP; - hcolfunc_post1 = rt_copy1col; - hcolfunc_post4 = rt_copy4cols; - } - else if (dc_translation == NULL) - { - colfunc = basecolfunc; - hcolfunc_post1 = rt_map1col; - hcolfunc_post4 = rt_map4cols; - } - else - { - colfunc = transcolfunc; - hcolfunc_post1 = rt_tlate1col; - hcolfunc_post4 = rt_tlate4cols; - } - return true; - } - if (flags & STYLEF_InvertSource) - { - dc_srcblend = Col2RGB8_Inverse[fglevel>>10]; - dc_destblend = Col2RGB8_LessPrecision[bglevel>>10]; - } - else if (op == STYLEOP_Add && fglevel + bglevel <= FRACUNIT) - { - dc_srcblend = Col2RGB8[fglevel>>10]; - dc_destblend = Col2RGB8[bglevel>>10]; - } - else - { - dc_srcblend = Col2RGB8_LessPrecision[fglevel>>10]; - dc_destblend = Col2RGB8_LessPrecision[bglevel>>10]; - } - switch (op) - { - case STYLEOP_Add: - if (fglevel == 0 && bglevel == FRACUNIT) - { - return false; - } - if (fglevel + bglevel <= FRACUNIT) - { // Colors won't overflow when added - if (flags & STYLEF_ColorIsFixed) - { - colfunc = R_FillAddColumn; - hcolfunc_post1 = rt_add1col; - hcolfunc_post4 = rt_add4cols; - } - else if (dc_translation == NULL) - { - colfunc = R_DrawAddColumnP_C; - hcolfunc_post1 = rt_add1col; - hcolfunc_post4 = rt_add4cols; - } - else - { - colfunc = R_DrawTlatedAddColumnP_C; - hcolfunc_post1 = rt_tlateadd1col; - hcolfunc_post4 = rt_tlateadd4cols; - } - } - else - { // Colors might overflow when added - if (flags & STYLEF_ColorIsFixed) - { - colfunc = R_FillAddClampColumn; - hcolfunc_post1 = rt_addclamp1col; - hcolfunc_post4 = rt_addclamp4cols; - } - else if (dc_translation == NULL) - { - colfunc = R_DrawAddClampColumnP_C; - hcolfunc_post1 = rt_addclamp1col; - hcolfunc_post4 = rt_addclamp4cols; - } - else - { - colfunc = R_DrawAddClampTranslatedColumnP_C; - hcolfunc_post1 = rt_tlateaddclamp1col; - hcolfunc_post4 = rt_tlateaddclamp4cols; - } - } - return true; - - case STYLEOP_Sub: - if (flags & STYLEF_ColorIsFixed) - { - colfunc = R_FillSubClampColumn; - hcolfunc_post1 = rt_subclamp1col; - hcolfunc_post4 = rt_subclamp4cols; - } - else if (dc_translation == NULL) - { - colfunc = R_DrawSubClampColumnP_C; - hcolfunc_post1 = rt_subclamp1col; - hcolfunc_post4 = rt_subclamp4cols; - } - else - { - colfunc = R_DrawSubClampTranslatedColumnP_C; - hcolfunc_post1 = rt_tlatesubclamp1col; - hcolfunc_post4 = rt_tlatesubclamp4cols; - } - return true; - - case STYLEOP_RevSub: - if (fglevel == 0 && bglevel == FRACUNIT) - { - return false; - } - if (flags & STYLEF_ColorIsFixed) - { - colfunc = R_FillRevSubClampColumn; - hcolfunc_post1 = rt_subclamp1col; - hcolfunc_post4 = rt_subclamp4cols; - } - else if (dc_translation == NULL) - { - colfunc = R_DrawRevSubClampColumnP_C; - hcolfunc_post1 = rt_revsubclamp1col; - hcolfunc_post4 = rt_revsubclamp4cols; - } - else - { - colfunc = R_DrawRevSubClampTranslatedColumnP_C; - hcolfunc_post1 = rt_tlaterevsubclamp1col; - hcolfunc_post4 = rt_tlaterevsubclamp4cols; - } - return true; - - default: - return false; - } -} - -static fixed_t GetAlpha(int type, fixed_t alpha) -{ - switch (type) - { - case STYLEALPHA_Zero: return 0; - case STYLEALPHA_One: return OPAQUE; - case STYLEALPHA_Src: return alpha; - case STYLEALPHA_InvSrc: return OPAQUE - alpha; - default: return 0; - } -} - -ESPSResult R_SetPatchStyle (FRenderStyle style, fixed_t alpha, int translation, DWORD color) -{ - fixed_t fglevel, bglevel; - - style.CheckFuzz(); - - if (style.BlendOp == STYLEOP_Shadow) - { - style = LegacyRenderStyles[STYLE_TranslucentStencil]; - alpha = TRANSLUC33; - color = 0; - } - - if (style.Flags & STYLEF_TransSoulsAlpha) - { - alpha = fixed_t(transsouls * OPAQUE); - } - else if (style.Flags & STYLEF_Alpha1) - { - alpha = FRACUNIT; - } - else - { - alpha = clamp (alpha, 0, OPAQUE); - } - - dc_translation = NULL; - if (translation != 0) - { - FRemapTable *table = TranslationToTable(translation); - if (table != NULL && !table->Inactive) - { - dc_translation = table->Remap; - } - } - basecolormapsave = basecolormap; - hcolfunc_pre = R_DrawColumnHoriz; - - // Check for special modes - if (style.BlendOp == STYLEOP_Fuzz) - { - colfunc = fuzzcolfunc; - return DoDraw0; - } - else if (style == LegacyRenderStyles[STYLE_Shaded]) - { - // Shaded drawer only gets 16 levels of alpha because it saves memory. - if ((alpha >>= 12) == 0) return DontDraw; - colfunc = R_DrawShadedColumn; - hcolfunc_post1 = rt_shaded1col; - hcolfunc_post4 = rt_shaded4cols; - dc_color = fixedcolormap ? fixedcolormap[APART(color)] : basecolormap->Maps[APART(color)]; - dc_colormap = (basecolormap = &ShadeFakeColormap[16-alpha])->Maps; - if (fixedlightlev >= 0 && fixedcolormap == NULL) - { - dc_colormap += fixedlightlev; } return r_columnmethod ? DoDraw1 : DoDraw0; } - fglevel = GetAlpha(style.SrcAlpha, alpha); - bglevel = GetAlpha(style.DestAlpha, alpha); - - if (style.Flags & STYLEF_ColorIsFixed) + ESPSResult R_SetPatchStyle(FRenderStyle style, float alpha, int translation, uint32_t color) { - int x = fglevel >> 10; - int r = RPART(color); - int g = GPART(color); - int b = BPART(color); - // dc_color is used by the rt_* routines. It is indexed into dc_srcblend. - dc_color = RGB32k.RGB[r>>3][g>>3][b>>3]; - if (style.Flags & STYLEF_InvertSource) + return R_SetPatchStyle(style, FLOAT2FIXED(alpha), translation, color); + } + + void R_FinishSetPatchStyle() + { + basecolormap = basecolormapsave; + } + + const uint8_t *R_GetColumn(FTexture *tex, int col) + { + int width; + + // If the texture's width isn't a power of 2, then we need to make it a + // positive offset for proper clamping. + if (col < 0 && (width = tex->GetWidth()) != (1 << tex->WidthBits)) { - r = 255 - r; - g = 255 - g; - b = 255 - b; + col = width + (col % width); } - // dc_srccolor is used by the R_Fill* routines. It is premultiplied - // with the alpha. - dc_srccolor = ((((r*x)>>4)<<20) | ((g*x)>>4) | ((((b)*x)>>4)<<10)) & 0x3feffbff; - hcolfunc_pre = R_FillColumnHorizP; - dc_colormap = identitymap; + + return tex->GetColumn(col, nullptr); } - if (!R_SetBlendFunc (style.BlendOp, fglevel, bglevel, style.Flags)) + bool R_GetTransMaskDrawers(fixed_t(**tmvline1)(), void(**tmvline4)()) { - return DontDraw; + if (colfunc == R_DrawAddColumn) + { + *tmvline1 = tmvline1_add; + *tmvline4 = tmvline4_add; + return true; + } + if (colfunc == R_DrawAddClampColumn) + { + *tmvline1 = tmvline1_addclamp; + *tmvline4 = tmvline4_addclamp; + return true; + } + if (colfunc == R_DrawSubClampColumn) + { + *tmvline1 = tmvline1_subclamp; + *tmvline4 = tmvline4_subclamp; + return true; + } + if (colfunc == R_DrawRevSubClampColumn) + { + *tmvline1 = tmvline1_revsubclamp; + *tmvline4 = tmvline4_revsubclamp; + return true; + } + return false; + } + + void setupvline(int fracbits) + { + drawerargs::vlinebits = fracbits; + } + + void setupmvline(int fracbits) + { + drawerargs::mvlinebits = fracbits; + } + + void setuptmvline(int fracbits) + { + drawerargs::tmvlinebits = fracbits; + } + + void R_SetColorMapLight(lighttable_t *base_colormap, float light, int shade) + { + using namespace drawerargs; + + dc_colormap = base_colormap + (GETPALOOKUP(light, shade) << COLORMAPSHIFT); + } + + void R_SetDSColorMapLight(lighttable_t *base_colormap, float light, int shade) + { + using namespace drawerargs; + + ds_colormap = base_colormap + (GETPALOOKUP(light, shade) << COLORMAPSHIFT); + } + + void R_SetTranslationMap(lighttable_t *translation) + { + using namespace drawerargs; + + dc_colormap = translation; + } + + void rt_initcols(uint8_t *buffer) + { + using namespace drawerargs; + + for (int y = 3; y >= 0; y--) + horizspan[y] = dc_ctspan[y] = &dc_tspans[y][0]; + + DrawerCommandQueue::QueueCommand(buffer); + } + + void rt_span_coverage(int x, int start, int stop) + { + using namespace drawerargs; + + unsigned int **tspan = &dc_ctspan[x & 3]; + (*tspan)[0] = start; + (*tspan)[1] = stop; + *tspan += 2; + } + + void rt_flip_posts() + { + using namespace drawerargs; + + unsigned int *front = horizspan[dc_x & 3]; + unsigned int *back = dc_ctspan[dc_x & 3] - 2; + + while (front < back) + { + swapvalues(front[0], back[0]); + swapvalues(front[1], back[1]); + front += 2; + back -= 2; + } + } + + void rt_draw4cols(int sx) + { + using namespace drawerargs; + + int x, bad; + unsigned int maxtop, minbot, minnexttop; + + // Place a dummy "span" in each column. These don't get + // drawn. They're just here to avoid special cases in the + // max/min calculations below. + for (x = 0; x < 4; ++x) + { + dc_ctspan[x][0] = screen->GetHeight()+1; + dc_ctspan[x][1] = screen->GetHeight(); + } + + for (;;) + { + // If a column is out of spans, mark it as such + bad = 0; + minnexttop = 0xffffffff; + for (x = 0; x < 4; ++x) + { + if (horizspan[x] >= dc_ctspan[x]) + { + bad |= 1 << x; + } + else if ((horizspan[x]+2)[0] < minnexttop) + { + minnexttop = (horizspan[x]+2)[0]; + } + } + // Once all columns are out of spans, we're done + if (bad == 15) + { + return; + } + + // Find the largest shared area for the spans in each column + maxtop = MAX (MAX (horizspan[0][0], horizspan[1][0]), + MAX (horizspan[2][0], horizspan[3][0])); + minbot = MIN (MIN (horizspan[0][1], horizspan[1][1]), + MIN (horizspan[2][1], horizspan[3][1])); + + // If there is no shared area with these spans, draw each span + // individually and advance to the next spans until we reach a shared area. + // However, only draw spans down to the highest span in the next set of + // spans. If we allow the entire height of a span to be drawn, it could + // prevent any more shared areas from being drawn in these four columns. + // + // Example: Suppose we have the following arrangement: + // A CD + // A CD + // B D + // B D + // aB D + // aBcD + // aBcD + // aBc + // + // If we draw the entire height of the spans, we end up drawing this first: + // A CD + // A CD + // B D + // B D + // B D + // B D + // B D + // B D + // B + // + // This leaves only the "a" and "c" columns to be drawn, and they are not + // part of a shared area, but if we can include B and D with them, we can + // get a shared area. So we cut off everything in the first set just + // above the "a" column and end up drawing this first: + // A CD + // A CD + // B D + // B D + // + // Then the next time through, we have the following arrangement with an + // easily shared area to draw: + // aB D + // aBcD + // aBcD + // aBc + if (bad != 0 || maxtop > minbot) + { + int drawcount = 0; + for (x = 0; x < 4; ++x) + { + if (!(bad & 1)) + { + if (horizspan[x][1] < minnexttop) + { + hcolfunc_post1 (x, sx+x, horizspan[x][0], horizspan[x][1]); + horizspan[x] += 2; + drawcount++; + } + else if (minnexttop > horizspan[x][0]) + { + hcolfunc_post1 (x, sx+x, horizspan[x][0], minnexttop-1); + horizspan[x][0] = minnexttop; + drawcount++; + } + } + bad >>= 1; + } + // Drawcount *should* always be non-zero. The reality is that some situations + // can make this not true. Unfortunately, I'm not sure what those situations are. + if (drawcount == 0) + { + return; + } + continue; + } + + // Draw any span fragments above the shared area. + for (x = 0; x < 4; ++x) + { + if (maxtop > horizspan[x][0]) + { + hcolfunc_post1 (x, sx+x, horizspan[x][0], maxtop-1); + } + } + + // Draw the shared area. + hcolfunc_post4 (sx, maxtop, minbot); + + // For each column, if part of the span is past the shared area, + // set its top to just below the shared area. Otherwise, advance + // to the next span in that column. + for (x = 0; x < 4; ++x) + { + if (minbot < horizspan[x][1]) + { + horizspan[x][0] = minbot+1; + } + else + { + horizspan[x] += 2; + } + } + } + } + + void R_SetupSpanBits(FTexture *tex) + { + using namespace drawerargs; + + tex->GetWidth(); + ds_xbits = tex->WidthBits; + ds_ybits = tex->HeightBits; + if ((1 << ds_xbits) > tex->GetWidth()) + { + ds_xbits--; + } + if ((1 << ds_ybits) > tex->GetHeight()) + { + ds_ybits--; + } + } + + void R_SetSpanColormap(lighttable_t *colormap) + { + using namespace drawerargs; + + ds_colormap = colormap; + } + + void R_SetSpanSource(FTexture *tex) + { + using namespace drawerargs; + + ds_source = tex->GetPixels(); + } + + ///////////////////////////////////////////////////////////////////////// + + void R_FillColumnHoriz() + { + using namespace drawerargs; + + if (dc_count <= 0) + return; + + int x = dc_x & 3; + unsigned int **span = &dc_ctspan[x]; + (*span)[0] = dc_yl; + (*span)[1] = dc_yh; + *span += 2; + + DrawerCommandQueue::QueueCommand(); + } + + void R_DrawColumnHoriz() + { + using namespace drawerargs; + + if (dc_count <= 0) + return; + + int x = dc_x & 3; + unsigned int **span = &dc_ctspan[x]; + (*span)[0] = dc_yl; + (*span)[1] = dc_yh; + *span += 2; + + DrawerCommandQueue::QueueCommand(); + } + + // Copies one span at hx to the screen at sx. + void rt_copy1col(int hx, int sx, int yl, int yh) + { + DrawerCommandQueue::QueueCommand(hx, sx, yl, yh); + } + + // Copies all four spans to the screen starting at sx. + void rt_copy4cols(int sx, int yl, int yh) + { + DrawerCommandQueue::QueueCommand(0, sx, yl, yh); + } + + // Maps one span at hx to the screen at sx. + void rt_map1col(int hx, int sx, int yl, int yh) + { + DrawerCommandQueue::QueueCommand(hx, sx, yl, yh); + } + + // Maps all four spans to the screen starting at sx. + void rt_map4cols(int sx, int yl, int yh) + { + DrawerCommandQueue::QueueCommand(0, sx, yl, yh); + } + + // Translates one span at hx to the screen at sx. + void rt_tlate1col(int hx, int sx, int yl, int yh) + { + DrawerCommandQueue::QueueCommand(hx, sx, yl, yh); + rt_map1col(hx, sx, yl, yh); + } + + // Translates all four spans to the screen starting at sx. + void rt_tlate4cols(int sx, int yl, int yh) + { + DrawerCommandQueue::QueueCommand(0, sx, yl, yh); + rt_map4cols(sx, yl, yh); + } + + // Adds one span at hx to the screen at sx without clamping. + void rt_add1col(int hx, int sx, int yl, int yh) + { + DrawerCommandQueue::QueueCommand(hx, sx, yl, yh); + } + + // Adds all four spans to the screen starting at sx without clamping. + void rt_add4cols(int sx, int yl, int yh) + { + DrawerCommandQueue::QueueCommand(0, sx, yl, yh); + } + + // Translates and adds one span at hx to the screen at sx without clamping. + void rt_tlateadd1col(int hx, int sx, int yl, int yh) + { + DrawerCommandQueue::QueueCommand(hx, sx, yl, yh); + rt_add1col(hx, sx, yl, yh); + } + + // Translates and adds all four spans to the screen starting at sx without clamping. + void rt_tlateadd4cols(int sx, int yl, int yh) + { + DrawerCommandQueue::QueueCommand(0, sx, yl, yh); + rt_add4cols(sx, yl, yh); + } + + // Shades one span at hx to the screen at sx. + void rt_shaded1col(int hx, int sx, int yl, int yh) + { + DrawerCommandQueue::QueueCommand(hx, sx, yl, yh); + } + + // Shades all four spans to the screen starting at sx. + void rt_shaded4cols(int sx, int yl, int yh) + { + DrawerCommandQueue::QueueCommand(0, sx, yl, yh); + } + + // Adds one span at hx to the screen at sx with clamping. + void rt_addclamp1col(int hx, int sx, int yl, int yh) + { + DrawerCommandQueue::QueueCommand(hx, sx, yl, yh); + } + + // Adds all four spans to the screen starting at sx with clamping. + void rt_addclamp4cols(int sx, int yl, int yh) + { + DrawerCommandQueue::QueueCommand(0, sx, yl, yh); + } + + // Translates and adds one span at hx to the screen at sx with clamping. + void rt_tlateaddclamp1col(int hx, int sx, int yl, int yh) + { + DrawerCommandQueue::QueueCommand(hx, sx, yl, yh); + rt_addclamp1col(hx, sx, yl, yh); + } + + // Translates and adds all four spans to the screen starting at sx with clamping. + void rt_tlateaddclamp4cols(int sx, int yl, int yh) + { + DrawerCommandQueue::QueueCommand(0, sx, yl, yh); + rt_addclamp4cols(sx, yl, yh); + } + + // Subtracts one span at hx to the screen at sx with clamping. + void rt_subclamp1col(int hx, int sx, int yl, int yh) + { + DrawerCommandQueue::QueueCommand(hx, sx, yl, yh); + } + + // Subtracts all four spans to the screen starting at sx with clamping. + void rt_subclamp4cols(int sx, int yl, int yh) + { + DrawerCommandQueue::QueueCommand(0, sx, yl, yh); + } + + // Translates and subtracts one span at hx to the screen at sx with clamping. + void rt_tlatesubclamp1col(int hx, int sx, int yl, int yh) + { + DrawerCommandQueue::QueueCommand(hx, sx, yl, yh); + rt_subclamp1col(hx, sx, yl, yh); + } + + // Translates and subtracts all four spans to the screen starting at sx with clamping. + void rt_tlatesubclamp4cols(int sx, int yl, int yh) + { + DrawerCommandQueue::QueueCommand(0, sx, yl, yh); + rt_subclamp4cols(sx, yl, yh); + } + + // Subtracts one span at hx from the screen at sx with clamping. + void rt_revsubclamp1col(int hx, int sx, int yl, int yh) + { + DrawerCommandQueue::QueueCommand(hx, sx, yl, yh); + } + + // Subtracts all four spans from the screen starting at sx with clamping. + void rt_revsubclamp4cols(int sx, int yl, int yh) + { + DrawerCommandQueue::QueueCommand(0, sx, yl, yh); + } + + // Translates and subtracts one span at hx from the screen at sx with clamping. + void rt_tlaterevsubclamp1col(int hx, int sx, int yl, int yh) + { + DrawerCommandQueue::QueueCommand(hx, sx, yl, yh); + rt_revsubclamp1col(hx, sx, yl, yh); + } + + // Translates and subtracts all four spans from the screen starting at sx with clamping. + void rt_tlaterevsubclamp4cols(int sx, int yl, int yh) + { + DrawerCommandQueue::QueueCommand(0, sx, yl, yh); + rt_revsubclamp4cols(sx, yl, yh); + } + + uint32_t vlinec1() + { + using namespace drawerargs; + + DrawerCommandQueue::QueueCommand(); + + return dc_texturefrac + dc_count * dc_iscale; + } + + void vlinec4() + { + using namespace drawerargs; + + DrawerCommandQueue::QueueCommand(); + + for (int i = 0; i < 4; i++) + vplce[i] += vince[i] * dc_count; + } + + uint32_t mvlinec1() + { + using namespace drawerargs; + + DrawerCommandQueue::QueueCommand(); + + return dc_texturefrac + dc_count * dc_iscale; + } + + void mvlinec4() + { + using namespace drawerargs; + + DrawerCommandQueue::QueueCommand(); + + for (int i = 0; i < 4; i++) + vplce[i] += vince[i] * dc_count; + } + + fixed_t tmvline1_add() + { + using namespace drawerargs; + + DrawerCommandQueue::QueueCommand(); + + return dc_texturefrac + dc_count * dc_iscale; + } + + void tmvline4_add() + { + using namespace drawerargs; + + DrawerCommandQueue::QueueCommand(); + + for (int i = 0; i < 4; i++) + vplce[i] += vince[i] * dc_count; + } + + fixed_t tmvline1_addclamp() + { + using namespace drawerargs; + + DrawerCommandQueue::QueueCommand(); + + return dc_texturefrac + dc_count * dc_iscale; + } + + void tmvline4_addclamp() + { + using namespace drawerargs; + + DrawerCommandQueue::QueueCommand(); + + for (int i = 0; i < 4; i++) + vplce[i] += vince[i] * dc_count; + } + + fixed_t tmvline1_subclamp() + { + using namespace drawerargs; + + DrawerCommandQueue::QueueCommand(); + + return dc_texturefrac + dc_count * dc_iscale; + } + + void tmvline4_subclamp() + { + using namespace drawerargs; + + DrawerCommandQueue::QueueCommand(); + + for (int i = 0; i < 4; i++) + vplce[i] += vince[i] * dc_count; + } + + fixed_t tmvline1_revsubclamp() + { + using namespace drawerargs; + + DrawerCommandQueue::QueueCommand(); + + return dc_texturefrac + dc_count * dc_iscale; + } + + void tmvline4_revsubclamp() + { + using namespace drawerargs; + + DrawerCommandQueue::QueueCommand(); + + for (int i = 0; i < 4; i++) + vplce[i] += vince[i] * dc_count; + } + + void R_DrawSingleSkyCol1(uint32_t solid_top, uint32_t solid_bottom) + { + DrawerCommandQueue::QueueCommand(solid_top, solid_bottom); + } + + void R_DrawSingleSkyCol4(uint32_t solid_top, uint32_t solid_bottom) + { + DrawerCommandQueue::QueueCommand(solid_top, solid_bottom); + } + + void R_DrawDoubleSkyCol1(uint32_t solid_top, uint32_t solid_bottom) + { + DrawerCommandQueue::QueueCommand(solid_top, solid_bottom); + } + + void R_DrawDoubleSkyCol4(uint32_t solid_top, uint32_t solid_bottom) + { + DrawerCommandQueue::QueueCommand(solid_top, solid_bottom); + } + + void R_DrawColumn() + { + DrawerCommandQueue::QueueCommand(); + } + + void R_FillColumn() + { + DrawerCommandQueue::QueueCommand(); + } + + void R_FillAddColumn() + { + DrawerCommandQueue::QueueCommand(); + } + + void R_FillAddClampColumn() + { + DrawerCommandQueue::QueueCommand(); + } + + void R_FillSubClampColumn() + { + DrawerCommandQueue::QueueCommand(); + } + + void R_FillRevSubClampColumn() + { + DrawerCommandQueue::QueueCommand(); + } + + void R_DrawFuzzColumn() + { + using namespace drawerargs; + + DrawerCommandQueue::QueueCommand(); + + dc_yl = MAX(dc_yl, 1); + dc_yh = MIN(dc_yh, fuzzviewheight); + if (dc_yl <= dc_yh) + fuzzpos = (fuzzpos + dc_yh - dc_yl + 1) % FUZZTABLE; + } + + void R_DrawAddColumn() + { + DrawerCommandQueue::QueueCommand(); + } + + void R_DrawTranslatedColumn() + { + DrawerCommandQueue::QueueCommand(); + } + + void R_DrawTlatedAddColumn() + { + DrawerCommandQueue::QueueCommand(); + } + + void R_DrawShadedColumn() + { + DrawerCommandQueue::QueueCommand(); + } + + void R_DrawAddClampColumn() + { + DrawerCommandQueue::QueueCommand(); + } + + void R_DrawAddClampTranslatedColumn() + { + DrawerCommandQueue::QueueCommand(); + } + + void R_DrawSubClampColumn() + { + DrawerCommandQueue::QueueCommand(); + } + + void R_DrawSubClampTranslatedColumn() + { + DrawerCommandQueue::QueueCommand(); + } + + void R_DrawRevSubClampColumn() + { + DrawerCommandQueue::QueueCommand(); + } + + void R_DrawRevSubClampTranslatedColumn() + { + DrawerCommandQueue::QueueCommand(); + } + + void R_DrawSpan() + { + DrawerCommandQueue::QueueCommand(); + } + + void R_DrawSpanMasked() + { + DrawerCommandQueue::QueueCommand(); + } + + void R_DrawSpanTranslucent() + { + DrawerCommandQueue::QueueCommand(); + } + + void R_DrawSpanMaskedTranslucent() + { + DrawerCommandQueue::QueueCommand(); + } + + void R_DrawSpanAddClamp() + { + DrawerCommandQueue::QueueCommand(); + } + + void R_DrawSpanMaskedAddClamp() + { + DrawerCommandQueue::QueueCommand(); + } + + void R_FillSpan() + { + DrawerCommandQueue::QueueCommand(); + } + + void R_DrawTiltedSpan(int y, int x1, int x2, const FVector3 &plane_sz, const FVector3 &plane_su, const FVector3 &plane_sv, bool plane_shade, int planeshade, float planelightfloat, fixed_t pviewx, fixed_t pviewy) + { + DrawerCommandQueue::QueueCommand(y, x1, x2, plane_sz, plane_su, plane_sv, plane_shade, planeshade, planelightfloat, pviewx, pviewy); + } + + void R_DrawColoredSpan(int y, int x1, int x2) + { + DrawerCommandQueue::QueueCommand(y, x1, x2); + } + + namespace + { + const uint8_t *slab_colormap; + } + + void R_SetupDrawSlab(uint8_t *colormap) + { + slab_colormap = colormap; + } + + void R_DrawSlab(int dx, fixed_t v, int dy, fixed_t vi, const uint8_t *vptr, uint8_t *p) + { + DrawerCommandQueue::QueueCommand(dx, v, dy, vi, vptr, p, slab_colormap); + } + + void R_DrawFogBoundarySection(int y, int y2, int x1) + { + for (; y < y2; ++y) + { + int x2 = spanend[y]; + DrawerCommandQueue::QueueCommand(y, x1, x2); + } + } + + void R_DrawFogBoundary(int x1, int x2, short *uclip, short *dclip) + { + // This is essentially the same as R_MapVisPlane but with an extra step + // to create new horizontal spans whenever the light changes enough that + // we need to use a new colormap. + + double lightstep = rw_lightstep; + double light = rw_light + rw_lightstep*(x2 - x1 - 1); + int x = x2 - 1; + int t2 = uclip[x]; + int b2 = dclip[x]; + int rcolormap = GETPALOOKUP(light, wallshade); + int lcolormap; + uint8_t *basecolormapdata = basecolormap->Maps; + + if (b2 > t2) + { + clearbufshort(spanend + t2, b2 - t2, x); + } + + R_SetColorMapLight(basecolormap->Maps, (float)light, wallshade); + + uint8_t *fake_dc_colormap = basecolormap->Maps + (GETPALOOKUP(light, wallshade) << COLORMAPSHIFT); + + for (--x; x >= x1; --x) + { + int t1 = uclip[x]; + int b1 = dclip[x]; + const int xr = x + 1; + int stop; + + light -= rw_lightstep; + lcolormap = GETPALOOKUP(light, wallshade); + if (lcolormap != rcolormap) + { + if (t2 < b2 && rcolormap != 0) + { // Colormap 0 is always the identity map, so rendering it is + // just a waste of time. + R_DrawFogBoundarySection(t2, b2, xr); + } + if (t1 < t2) t2 = t1; + if (b1 > b2) b2 = b1; + if (t2 < b2) + { + clearbufshort(spanend + t2, b2 - t2, x); + } + rcolormap = lcolormap; + R_SetColorMapLight(basecolormap->Maps, (float)light, wallshade); + fake_dc_colormap = basecolormap->Maps + (GETPALOOKUP(light, wallshade) << COLORMAPSHIFT); + } + else + { + if (fake_dc_colormap != basecolormapdata) + { + stop = MIN(t1, b2); + while (t2 < stop) + { + int y = t2++; + DrawerCommandQueue::QueueCommand(y, xr, spanend[y]); + } + stop = MAX(b1, t2); + while (b2 > stop) + { + int y = --b2; + DrawerCommandQueue::QueueCommand(y, xr, spanend[y]); + } + } + else + { + t2 = MAX(t2, MIN(t1, b2)); + b2 = MIN(b2, MAX(b1, t2)); + } + + stop = MIN(t2, b1); + while (t1 < stop) + { + spanend[t1++] = x; + } + stop = MAX(b2, t2); + while (b1 > stop) + { + spanend[--b1] = x; + } + } + + t2 = uclip[x]; + b2 = dclip[x]; + } + if (t2 < b2 && rcolormap != 0) + { + R_DrawFogBoundarySection(t2, b2, x1); + } + } + + void R_DrawParticle(vissprite_t *sprite) + { + R_DrawParticle_C(sprite); } - return r_columnmethod ? DoDraw1 : DoDraw0; } - -void R_FinishSetPatchStyle () -{ - basecolormap = basecolormapsave; -} - -bool R_GetTransMaskDrawers (fixed_t (**tmvline1)(), void (**tmvline4)()) -{ - if (colfunc == R_DrawAddColumnP_C) - { - *tmvline1 = tmvline1_add; - *tmvline4 = tmvline4_add; - return true; - } - if (colfunc == R_DrawAddClampColumnP_C) - { - *tmvline1 = tmvline1_addclamp; - *tmvline4 = tmvline4_addclamp; - return true; - } - if (colfunc == R_DrawSubClampColumnP_C) - { - *tmvline1 = tmvline1_subclamp; - *tmvline4 = tmvline4_subclamp; - return true; - } - if (colfunc == R_DrawRevSubClampColumnP_C) - { - *tmvline1 = tmvline1_revsubclamp; - *tmvline4 = tmvline4_revsubclamp; - return true; - } - return false; -} - diff --git a/src/r_draw.h b/src/r_draw.h index 6713d40915..40b3328964 100644 --- a/src/r_draw.h +++ b/src/r_draw.h @@ -1,287 +1,208 @@ -// Emacs style mode select -*- C++ -*- -//----------------------------------------------------------------------------- -// -// $Id:$ -// -// Copyright (C) 1993-1996 by id Software, Inc. -// -// This source is available for distribution and/or modification -// only under the terms of the DOOM Source Code License as -// published by id Software. All rights reserved. -// -// The source is distributed in the hope that it will be useful, -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// FITNESS FOR A PARTICULAR PURPOSE. See the DOOM Source Code License -// for more details. -// -// DESCRIPTION: -// System specific interface stuff. -// -//----------------------------------------------------------------------------- - -#ifndef __R_DRAW__ -#define __R_DRAW__ +#pragma once #include "r_defs.h" -extern "C" int ylookup[MAXHEIGHT]; +EXTERN_CVAR(Bool, r_multithreaded); +EXTERN_CVAR(Int, r_drawfuzz); +EXTERN_CVAR(Bool, r_drawtrans); +EXTERN_CVAR(Float, transsouls); +EXTERN_CVAR(Int, r_columnmethod); -extern "C" int dc_pitch; // [RH] Distance between rows - -extern "C" lighttable_t*dc_colormap; -extern "C" int dc_x; -extern "C" int dc_yl; -extern "C" int dc_yh; -extern "C" fixed_t dc_iscale; -extern double dc_texturemid; -extern "C" fixed_t dc_texturefrac; -extern "C" int dc_color; // [RH] For flat colors (no texturing) -extern "C" DWORD dc_srccolor; -extern "C" DWORD *dc_srcblend; -extern "C" DWORD *dc_destblend; - -// first pixel in a column -extern "C" const BYTE* dc_source; - -extern "C" BYTE *dc_dest, *dc_destorg; -extern "C" int dc_count; - -extern "C" DWORD vplce[4]; -extern "C" DWORD vince[4]; -extern "C" BYTE* palookupoffse[4]; -extern "C" const BYTE* bufplce[4]; -extern "C" const BYTE* bufplce2[4]; -extern "C" uint32_t bufheight[4]; - -// [RH] Temporary buffer for column drawing -extern "C" BYTE *dc_temp; -extern "C" unsigned int dc_tspans[4][MAXHEIGHT]; -extern "C" unsigned int *dc_ctspan[4]; -extern "C" unsigned int horizspans[4]; - - -// [RH] Pointers to the different column and span drawers... - -// The span blitting interface. -// Hook in assembler or system specific BLT here. - -extern DWORD (*dovline1) (); -extern DWORD (*doprevline1) (); -#ifdef X64_ASM -#define dovline4 vlinetallasm4 -extern "C" void vlinetallasm4(); -#else -extern void (*dovline4) (); -#endif -extern void setupvline (int); - -extern DWORD (*domvline1) (); -extern void (*domvline4) (); -extern void setupmvline (int); - -extern void setuptmvline (int); - -// The Spectre/Invisibility effect. -extern void R_DrawFuzzColumn(void); - -// [RH] Draw shaded column -extern void (*R_DrawShadedColumn)(void); - -// Draw with color translation tables, for player sprite rendering, -// Green/Red/Blue/Indigo shirts. -extern void (*R_DrawTranslatedColumn)(void); - -// Span drawing for rows, floor/ceiling. No Spectre effect needed. -extern void (*R_DrawSpan)(void); -void R_SetupSpanBits(FTexture *tex); -void R_SetSpanColormap(BYTE *colormap); -void R_SetSpanSource(const BYTE *pixels); - -// Span drawing for masked textures. -extern void (*R_DrawSpanMasked)(void); - -// Span drawing for translucent textures. -void R_DrawSpanTranslucent(void); - -// Span drawing for masked, translucent textures. -void R_DrawSpanMaskedTranslucent(void); - -// Span drawing for translucent, additive textures. -void R_DrawSpanAddClamp(void); - -// Span drawing for masked, translucent, additive textures. -void R_DrawSpanMaskedAddClamp(void); - -// [RH] Span blit into an interleaved intermediate buffer -extern void (*R_DrawColumnHoriz)(void); - -// [RH] Initialize the above pointers -void R_InitColumnDrawers (); - -// [RH] Moves data from the temporary buffer to the screen. - -void rt_copy1col(int hx, int sx, int yl, int yh); -void rt_copy4cols(int sx, int yl, int yh); -void rt_map4cols(int sx, int yl, int yh); - -extern "C" +namespace swrenderer { + struct vissprite_t; -void rt_shaded1col (int hx, int sx, int yl, int yh); -void rt_shaded4cols_c (int sx, int yl, int yh); -void rt_shaded4cols_asm (int sx, int yl, int yh); + extern double dc_texturemid; -void rt_map1col (int hx, int sx, int yl, int yh); -void rt_add1col (int hx, int sx, int yl, int yh); -void rt_addclamp1col (int hx, int sx, int yl, int yh); -void rt_subclamp1col (int hx, int sx, int yl, int yh); -void rt_revsubclamp1col (int hx, int sx, int yl, int yh); + namespace drawerargs + { + extern int dc_pitch; + extern lighttable_t *dc_colormap; + extern int dc_x; + extern int dc_yl; + extern int dc_yh; + extern fixed_t dc_iscale; + extern fixed_t dc_texturefrac; + extern uint32_t dc_textureheight; + extern int dc_color; + extern uint32_t dc_srccolor; + extern uint32_t dc_srccolor_bgra; + extern uint32_t *dc_srcblend; + extern uint32_t *dc_destblend; + extern fixed_t dc_srcalpha; + extern fixed_t dc_destalpha; + extern const uint8_t *dc_source; + extern const uint8_t *dc_source2; + extern uint32_t dc_texturefracx; + extern uint8_t *dc_translation; + extern uint8_t *dc_dest; + extern uint8_t *dc_destorg; + extern int dc_destheight; + extern int dc_count; -void rt_tlate1col (int hx, int sx, int yl, int yh); -void rt_tlateadd1col (int hx, int sx, int yl, int yh); -void rt_tlateaddclamp1col (int hx, int sx, int yl, int yh); -void rt_tlatesubclamp1col (int hx, int sx, int yl, int yh); -void rt_tlaterevsubclamp1col (int hx, int sx, int yl, int yh); + extern uint32_t vplce[4]; + extern uint32_t vince[4]; + extern uint8_t *palookupoffse[4]; + extern fixed_t palookuplight[4]; + extern const uint8_t *bufplce[4]; + extern const uint8_t *bufplce2[4]; + extern uint32_t buftexturefracx[4]; + extern uint32_t bufheight[4]; + extern int vlinebits; + extern int mvlinebits; + extern int tmvlinebits; -void rt_add4cols_c (int sx, int yl, int yh); -void rt_addclamp4cols_c (int sx, int yl, int yh); -void rt_subclamp4cols (int sx, int yl, int yh); -void rt_revsubclamp4cols (int sx, int yl, int yh); + extern int ds_y; + extern int ds_x1; + extern int ds_x2; + extern lighttable_t * ds_colormap; + extern dsfixed_t ds_light; + extern dsfixed_t ds_xfrac; + extern dsfixed_t ds_yfrac; + extern dsfixed_t ds_xstep; + extern dsfixed_t ds_ystep; + extern int ds_xbits; + extern int ds_ybits; + extern fixed_t ds_alpha; + extern double ds_lod; + extern const uint8_t *ds_source; + extern int ds_color; -void rt_tlate4cols (int sx, int yl, int yh); -void rt_tlateadd4cols (int sx, int yl, int yh); -void rt_tlateaddclamp4cols (int sx, int yl, int yh); -void rt_tlatesubclamp4cols (int sx, int yl, int yh); -void rt_tlaterevsubclamp4cols (int sx, int yl, int yh); + extern unsigned int dc_tspans[4][MAXHEIGHT]; + extern unsigned int *dc_ctspan[4]; + extern unsigned int *horizspan[4]; + } -void rt_add4cols_asm (int sx, int yl, int yh); -void rt_addclamp4cols_asm (int sx, int yl, int yh); + extern int ylookup[MAXHEIGHT]; + extern uint8_t shadetables[/*NUMCOLORMAPS*16*256*/]; + extern FDynamicColormap ShadeFakeColormap[16]; + extern uint8_t identitymap[256]; + extern FDynamicColormap identitycolormap; + + // Spectre/Invisibility. + #define FUZZTABLE 50 + extern int fuzzoffset[FUZZTABLE + 1]; + extern int fuzzpos; + extern int fuzzviewheight; + + void R_InitColumnDrawers(); + void R_InitShadeMaps(); + void R_InitFuzzTable(int fuzzoff); + + enum ESPSResult + { + DontDraw, // not useful to draw this + DoDraw0, // draw this as if r_columnmethod is 0 + DoDraw1, // draw this as if r_columnmethod is 1 + }; + + ESPSResult R_SetPatchStyle(FRenderStyle style, fixed_t alpha, int translation, uint32_t color); + ESPSResult R_SetPatchStyle(FRenderStyle style, float alpha, int translation, uint32_t color); + void R_FinishSetPatchStyle(); // Call this after finished drawing the current thing, in case its style was STYLE_Shade + bool R_GetTransMaskDrawers(fixed_t(**tmvline1)(), void(**tmvline4)()); + + const uint8_t *R_GetColumn(FTexture *tex, int col); + void wallscan(int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat, const uint8_t *(*getcol)(FTexture *tex, int col) = R_GetColumn); + void maskwallscan(int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat, const uint8_t *(*getcol)(FTexture *tex, int col) = R_GetColumn); + void transmaskwallscan(int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat, const uint8_t *(*getcol)(FTexture *tex, int col) = R_GetColumn); + + void rt_initcols(uint8_t *buffer = nullptr); + void rt_span_coverage(int x, int start, int stop); + void rt_draw4cols(int sx); + void rt_flip_posts(); + void rt_copy1col(int hx, int sx, int yl, int yh); + void rt_copy4cols(int sx, int yl, int yh); + void rt_shaded1col(int hx, int sx, int yl, int yh); + void rt_shaded4cols(int sx, int yl, int yh); + void rt_map1col(int hx, int sx, int yl, int yh); + void rt_add1col(int hx, int sx, int yl, int yh); + void rt_addclamp1col(int hx, int sx, int yl, int yh); + void rt_subclamp1col(int hx, int sx, int yl, int yh); + void rt_revsubclamp1col(int hx, int sx, int yl, int yh); + void rt_tlate1col(int hx, int sx, int yl, int yh); + void rt_tlateadd1col(int hx, int sx, int yl, int yh); + void rt_tlateaddclamp1col(int hx, int sx, int yl, int yh); + void rt_tlatesubclamp1col(int hx, int sx, int yl, int yh); + void rt_tlaterevsubclamp1col(int hx, int sx, int yl, int yh); + void rt_map4cols(int sx, int yl, int yh); + void rt_add4cols(int sx, int yl, int yh); + void rt_addclamp4cols(int sx, int yl, int yh); + void rt_subclamp4cols(int sx, int yl, int yh); + void rt_revsubclamp4cols(int sx, int yl, int yh); + void rt_tlate4cols(int sx, int yl, int yh); + void rt_tlateadd4cols(int sx, int yl, int yh); + void rt_tlateaddclamp4cols(int sx, int yl, int yh); + void rt_tlatesubclamp4cols(int sx, int yl, int yh); + void rt_tlaterevsubclamp4cols(int sx, int yl, int yh); + void R_DrawColumnHoriz(); + void R_DrawColumn(); + void R_DrawFuzzColumn(); + void R_DrawTranslatedColumn(); + void R_DrawShadedColumn(); + void R_FillColumn(); + void R_FillAddColumn(); + void R_FillAddClampColumn(); + void R_FillSubClampColumn(); + void R_FillRevSubClampColumn(); + void R_DrawAddColumn(); + void R_DrawTlatedAddColumn(); + void R_DrawAddClampColumn(); + void R_DrawAddClampTranslatedColumn(); + void R_DrawSubClampColumn(); + void R_DrawSubClampTranslatedColumn(); + void R_DrawRevSubClampColumn(); + void R_DrawRevSubClampTranslatedColumn(); + void R_DrawSpan(); + void R_DrawSpanMasked(); + void R_DrawSpanTranslucent(); + void R_DrawSpanMaskedTranslucent(); + void R_DrawSpanAddClamp(); + void R_DrawSpanMaskedAddClamp(); + void R_FillSpan(); + void R_DrawTiltedSpan(int y, int x1, int x2, const FVector3 &plane_sz, const FVector3 &plane_su, const FVector3 &plane_sv, bool plane_shade, int planeshade, float planelightfloat, fixed_t pviewx, fixed_t pviewy); + void R_DrawColoredSpan(int y, int x1, int x2); + void R_SetupDrawSlab(uint8_t *colormap); + void R_DrawSlab(int dx, fixed_t v, int dy, fixed_t vi, const uint8_t *vptr, uint8_t *p); + void R_DrawFogBoundary(int x1, int x2, short *uclip, short *dclip); + uint32_t vlinec1(); + void vlinec4(); + uint32_t mvlinec1(); + void mvlinec4(); + fixed_t tmvline1_add(); + void tmvline4_add(); + fixed_t tmvline1_addclamp(); + void tmvline4_addclamp(); + fixed_t tmvline1_subclamp(); + void tmvline4_subclamp(); + fixed_t tmvline1_revsubclamp(); + void tmvline4_revsubclamp(); + void R_FillColumnHoriz(); + void R_FillSpan(); + + inline uint32_t dovline1() { return vlinec1(); } + inline void dovline4() { vlinec4(); } + inline uint32_t domvline1() { return mvlinec1(); } + inline void domvline4() { mvlinec4(); } + + void setupvline(int fracbits); + void setupmvline(int fracbits); + void setuptmvline(int fracbits); + + void R_DrawSingleSkyCol1(uint32_t solid_top, uint32_t solid_bottom); + void R_DrawSingleSkyCol4(uint32_t solid_top, uint32_t solid_bottom); + void R_DrawDoubleSkyCol1(uint32_t solid_top, uint32_t solid_bottom); + void R_DrawDoubleSkyCol4(uint32_t solid_top, uint32_t solid_bottom); + + void R_SetColorMapLight(lighttable_t *base_colormap, float light, int shade); + void R_SetDSColorMapLight(lighttable_t *base_colormap, float light, int shade); + void R_SetTranslationMap(lighttable_t *translation); + + void R_SetupSpanBits(FTexture *tex); + void R_SetSpanColormap(lighttable_t *colormap); + void R_SetSpanSource(FTexture *tex); + + void R_MapTiltedPlane(int y, int x1); + void R_MapColoredPlane(int y, int x1); + void R_DrawParticle(vissprite_t *); } - - -#ifdef X86_ASM -#define rt_shaded4cols rt_shaded4cols_asm -#define rt_add4cols rt_add4cols_asm -#define rt_addclamp4cols rt_addclamp4cols_asm -#else -#define rt_shaded4cols rt_shaded4cols_c -#define rt_add4cols rt_add4cols_c -#define rt_addclamp4cols rt_addclamp4cols_c -#endif - -void rt_flip_posts(); -void rt_draw4cols (int sx); - -// [RH] Preps the temporary horizontal buffer. -void rt_initcols (BYTE *buffer=NULL); - -void R_DrawFogBoundary (int x1, int x2, short *uclip, short *dclip); - - -#ifdef X86_ASM - - void R_DrawShadedColumnP_C (void); -extern "C" void R_DrawSpanP_ASM (void); -extern "C" void R_DrawSpanMaskedP_ASM (void); - -void R_DrawColumnHorizP_C(void); - -#else - -void R_DrawShadedColumnP_C (void); -void R_DrawSpanP_C (void); -void R_DrawSpanMaskedP_C (void); - -#endif - -void R_DrawColumn(); -void R_DrawColumnHorizP_C(void); -void R_DrawTranslatedColumnP_C(void); -void R_DrawSpanTranslucent (void); -void R_DrawSpanMaskedTranslucent (void); - -void R_DrawTlatedLucentColumnP_C (void); -#define R_DrawTlatedLucentColumn R_DrawTlatedLucentColumnP_C - -void R_FillColumnP (void); -void R_FillColumnHorizP (void); -void R_FillSpan (void); - -#ifdef X86_ASM -#define R_SetupDrawSlab R_SetupDrawSlabA -#define R_DrawSlab R_DrawSlabA -#else -#define R_SetupDrawSlab R_SetupDrawSlabC -#define R_DrawSlab R_DrawSlabC -#endif - -extern "C" void R_SetupDrawSlab(const BYTE *colormap); -extern "C" void R_DrawSlab(int dx, fixed_t v, int dy, fixed_t vi, const BYTE *vptr, BYTE *p); - -extern "C" int ds_y; -extern "C" int ds_x1; -extern "C" int ds_x2; - -extern "C" lighttable_t* ds_colormap; - -extern "C" dsfixed_t ds_xfrac; -extern "C" dsfixed_t ds_yfrac; -extern "C" dsfixed_t ds_xstep; -extern "C" dsfixed_t ds_ystep; -extern "C" int ds_xbits; -extern "C" int ds_ybits; -extern "C" fixed_t ds_alpha; - -// start of a 64*64 tile image -extern "C" const BYTE* ds_source; - -extern "C" int ds_color; // [RH] For flat color (no texturing) - -extern BYTE shadetables[/*NUMCOLORMAPS*16*256*/]; -extern FDynamicColormap ShadeFakeColormap[16]; -extern BYTE identitymap[256]; -extern BYTE *dc_translation; - -// [RH] Added for muliresolution support -void R_InitShadeMaps(); -void R_InitFuzzTable (int fuzzoff); - -// [RH] Consolidate column drawer selection -enum ESPSResult -{ - DontDraw, // not useful to draw this - DoDraw0, // draw this as if r_columnmethod is 0 - DoDraw1, // draw this as if r_columnmethod is 1 -}; -ESPSResult R_SetPatchStyle (FRenderStyle style, fixed_t alpha, int translation, DWORD color); -inline ESPSResult R_SetPatchStyle(FRenderStyle style, float alpha, int translation, DWORD color) -{ - return R_SetPatchStyle(style, FLOAT2FIXED(alpha), translation, color); -} - -// Call this after finished drawing the current thing, in case its -// style was STYLE_Shade -void R_FinishSetPatchStyle (); - -// transmaskwallscan calls this to find out what column drawers to use -bool R_GetTransMaskDrawers (fixed_t (**tmvline1)(), void (**tmvline4)()); - -// Retrieve column data for wallscan. Should probably be removed -// to just use the texture's GetColumn() method. It just exists -// for double-layer skies. -const BYTE *R_GetColumn (FTexture *tex, int col); -void wallscan (int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat, const BYTE *(*getcol)(FTexture *tex, int col)=R_GetColumn); - -// maskwallscan is exactly like wallscan but does not draw anything where the texture is color 0. -void maskwallscan (int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat, const BYTE *(*getcol)(FTexture *tex, int col)=R_GetColumn); - -// transmaskwallscan is like maskwallscan, but it can also blend to the background -void transmaskwallscan (int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat, const BYTE *(*getcol)(FTexture *tex, int col)=R_GetColumn); - -void R_DrawSingleSkyCol1(uint32_t solid_top, uint32_t solid_bottom); -void R_DrawSingleSkyCol4(uint32_t solid_top, uint32_t solid_bottom); -void R_DrawDoubleSkyCol1(uint32_t solid_top, uint32_t solid_bottom); -void R_DrawDoubleSkyCol4(uint32_t solid_top, uint32_t solid_bottom); - -#endif diff --git a/src/r_draw_pal.cpp b/src/r_draw_pal.cpp new file mode 100644 index 0000000000..0264dcbf9e --- /dev/null +++ b/src/r_draw_pal.cpp @@ -0,0 +1,2593 @@ + +#include "templates.h" +#include "doomtype.h" +#include "doomdef.h" +#include "r_defs.h" +#include "r_draw.h" +#include "r_main.h" +#include "r_things.h" +#include "v_video.h" +#include "r_draw_pal.h" + +/* + [RH] This translucency algorithm is based on DOSDoom 0.65's, but uses + a 32k RGB table instead of an 8k one. At least on my machine, it's + slightly faster (probably because it uses only one shift instead of + two), and it looks considerably less green at the ends of the + translucency range. The extra size doesn't appear to be an issue. + + The following note is from DOSDoom 0.65: + + New translucency algorithm, by Erik Sandberg: + + Basically, we compute the red, green and blue values for each pixel, and + then use a RGB table to check which one of the palette colours that best + represents those RGB values. The RGB table is 8k big, with 4 R-bits, + 5 G-bits and 4 B-bits. A 4k table gives a bit too bad precision, and a 32k + table takes up more memory and results in more cache misses, so an 8k + table seemed to be quite ultimate. + + The computation of the RGB for each pixel is accelerated by using two + 1k tables for each translucency level. + The xth element of one of these tables contains the r, g and b values for + the colour x, weighted for the current translucency level (for example, + the weighted rgb values for background colour at 75% translucency are 1/4 + of the original rgb values). The rgb values are stored as three + low-precision fixed point values, packed into one long per colour: + Bit 0-4: Frac part of blue (5 bits) + Bit 5-8: Int part of blue (4 bits) + Bit 9-13: Frac part of red (5 bits) + Bit 14-17: Int part of red (4 bits) + Bit 18-22: Frac part of green (5 bits) + Bit 23-27: Int part of green (5 bits) + Bit 28-31: All zeros (4 bits) + + The point of this format is that the two colours now can be added, and + then be converted to a RGB table index very easily: First, we just set + all the frac bits and the four upper zero bits to 1. It's now possible + to get the RGB table index by anding the current value >> 5 with the + current value >> 19. When asm-optimised, this should be the fastest + algorithm that uses RGB tables. +*/ + +namespace swrenderer +{ + PalWall1Command::PalWall1Command() + { + using namespace drawerargs; + + _iscale = dc_iscale; + _texturefrac = dc_texturefrac; + _colormap = dc_colormap; + _count = dc_count; + _source = dc_source; + _dest = dc_dest; + _vlinebits = vlinebits; + _mvlinebits = mvlinebits; + _tmvlinebits = tmvlinebits; + _pitch = dc_pitch; + _srcblend = dc_srcblend; + _destblend = dc_destblend; + } + + PalWall4Command::PalWall4Command() + { + using namespace drawerargs; + + _dest = dc_dest; + _count = dc_count; + _pitch = dc_pitch; + _vlinebits = vlinebits; + _mvlinebits = mvlinebits; + _tmvlinebits = tmvlinebits; + for (int col = 0; col < 4; col++) + { + _palookupoffse[col] = palookupoffse[col]; + _bufplce[col] = bufplce[col]; + _vince[col] = vince[col]; + _vplce[col] = vplce[col]; + } + _srcblend = dc_srcblend; + _destblend = dc_destblend; + } + + void DrawWall1PalCommand::Execute(DrawerThread *thread) + { + uint32_t fracstep = _iscale; + uint32_t frac = _texturefrac; + uint8_t *colormap = _colormap; + int count = _count; + const uint8_t *source = _source; + uint8_t *dest = _dest; + int bits = _vlinebits; + int pitch = _pitch; + + count = thread->count_for_thread(_dest_y, count); + if (count <= 0) + return; + + dest = thread->dest_for_thread(_dest_y, pitch, dest); + frac += fracstep * thread->skipped_by_thread(_dest_y); + fracstep *= thread->num_cores; + pitch *= thread->num_cores; + + do + { + *dest = colormap[source[frac >> bits]]; + frac += fracstep; + dest += pitch; + } while (--count); + } + + void DrawWall4PalCommand::Execute(DrawerThread *thread) + { + uint8_t *dest = _dest; + int count = _count; + int bits = _vlinebits; + uint32_t place; + auto pal0 = _palookupoffse[0]; + auto pal1 = _palookupoffse[1]; + auto pal2 = _palookupoffse[2]; + auto pal3 = _palookupoffse[3]; + auto buf0 = _bufplce[0]; + auto buf1 = _bufplce[1]; + auto buf2 = _bufplce[2]; + auto buf3 = _bufplce[3]; + auto vince0 = _vince[0]; + auto vince1 = _vince[1]; + auto vince2 = _vince[2]; + auto vince3 = _vince[3]; + auto vplce0 = _vplce[0]; + auto vplce1 = _vplce[1]; + auto vplce2 = _vplce[2]; + auto vplce3 = _vplce[3]; + auto pitch = _pitch; + + count = thread->count_for_thread(_dest_y, count); + if (count <= 0) + return; + + int skipped = thread->skipped_by_thread(_dest_y); + dest = thread->dest_for_thread(_dest_y, pitch, dest); + vplce0 += vince0 * skipped; + vplce1 += vince1 * skipped; + vplce2 += vince2 * skipped; + vplce3 += vince3 * skipped; + vince0 *= thread->num_cores; + vince1 *= thread->num_cores; + vince2 *= thread->num_cores; + vince3 *= thread->num_cores; + pitch *= thread->num_cores; + + do + { + dest[0] = pal0[buf0[(place = vplce0) >> bits]]; vplce0 = place + vince0; + dest[1] = pal1[buf1[(place = vplce1) >> bits]]; vplce1 = place + vince1; + dest[2] = pal2[buf2[(place = vplce2) >> bits]]; vplce2 = place + vince2; + dest[3] = pal3[buf3[(place = vplce3) >> bits]]; vplce3 = place + vince3; + dest += pitch; + } while (--count); + } + + void DrawWallMasked1PalCommand::Execute(DrawerThread *thread) + { + uint32_t fracstep = _iscale; + uint32_t frac = _texturefrac; + uint8_t *colormap = _colormap; + int count = _count; + const uint8_t *source = _source; + uint8_t *dest = _dest; + int bits = _mvlinebits; + int pitch = _pitch; + + count = thread->count_for_thread(_dest_y, count); + if (count <= 0) + return; + + dest = thread->dest_for_thread(_dest_y, pitch, dest); + frac += fracstep * thread->skipped_by_thread(_dest_y); + fracstep *= thread->num_cores; + pitch *= thread->num_cores; + + do + { + uint8_t pix = source[frac >> bits]; + if (pix != 0) + { + *dest = colormap[pix]; + } + frac += fracstep; + dest += pitch; + } while (--count); + } + + void DrawWallMasked4PalCommand::Execute(DrawerThread *thread) + { + uint8_t *dest = _dest; + int count = _count; + int bits = _mvlinebits; + uint32_t place; + auto pal0 = _palookupoffse[0]; + auto pal1 = _palookupoffse[1]; + auto pal2 = _palookupoffse[2]; + auto pal3 = _palookupoffse[3]; + auto buf0 = _bufplce[0]; + auto buf1 = _bufplce[1]; + auto buf2 = _bufplce[2]; + auto buf3 = _bufplce[3]; + auto vince0 = _vince[0]; + auto vince1 = _vince[1]; + auto vince2 = _vince[2]; + auto vince3 = _vince[3]; + auto vplce0 = _vplce[0]; + auto vplce1 = _vplce[1]; + auto vplce2 = _vplce[2]; + auto vplce3 = _vplce[3]; + auto pitch = _pitch; + + count = thread->count_for_thread(_dest_y, count); + if (count <= 0) + return; + + int skipped = thread->skipped_by_thread(_dest_y); + dest = thread->dest_for_thread(_dest_y, pitch, dest); + vplce0 += vince0 * skipped; + vplce1 += vince1 * skipped; + vplce2 += vince2 * skipped; + vplce3 += vince3 * skipped; + vince0 *= thread->num_cores; + vince1 *= thread->num_cores; + vince2 *= thread->num_cores; + vince3 *= thread->num_cores; + pitch *= thread->num_cores; + + do + { + uint8_t pix; + + pix = buf0[(place = vplce0) >> bits]; if (pix) dest[0] = pal0[pix]; vplce0 = place + vince0; + pix = buf1[(place = vplce1) >> bits]; if (pix) dest[1] = pal1[pix]; vplce1 = place + vince1; + pix = buf2[(place = vplce2) >> bits]; if (pix) dest[2] = pal2[pix]; vplce2 = place + vince2; + pix = buf3[(place = vplce3) >> bits]; if (pix) dest[3] = pal3[pix]; vplce3 = place + vince3; + dest += pitch; + } while (--count); + } + + void DrawWallAdd1PalCommand::Execute(DrawerThread *thread) + { + uint32_t fracstep = _iscale; + uint32_t frac = _texturefrac; + uint8_t *colormap = _colormap; + int count = _count; + const uint8_t *source = _source; + uint8_t *dest = _dest; + int bits = _tmvlinebits; + int pitch = _pitch; + + uint32_t *fg2rgb = _srcblend; + uint32_t *bg2rgb = _destblend; + + count = thread->count_for_thread(_dest_y, count); + if (count <= 0) + return; + + dest = thread->dest_for_thread(_dest_y, pitch, dest); + frac += fracstep * thread->skipped_by_thread(_dest_y); + fracstep *= thread->num_cores; + pitch *= thread->num_cores; + + do + { + uint8_t pix = source[frac >> bits]; + if (pix != 0) + { + uint32_t fg = fg2rgb[colormap[pix]]; + uint32_t bg = bg2rgb[*dest]; + fg = (fg + bg) | 0x1f07c1f; + *dest = RGB32k.All[fg & (fg >> 15)]; + } + frac += fracstep; + dest += pitch; + } while (--count); + } + + void DrawWallAdd4PalCommand::Execute(DrawerThread *thread) + { + uint8_t *dest = _dest; + int count = _count; + int bits = _tmvlinebits; + + uint32_t *fg2rgb = _srcblend; + uint32_t *bg2rgb = _destblend; + + uint32_t vplce[4] = { _vplce[0], _vplce[1], _vplce[2], _vplce[3] }; + uint32_t vince[4] = { _vince[0], _vince[1], _vince[2], _vince[3] }; + + count = thread->count_for_thread(_dest_y, count); + if (count <= 0) + return; + + int pitch = _pitch; + int skipped = thread->skipped_by_thread(_dest_y); + dest = thread->dest_for_thread(_dest_y, pitch, dest); + for (int i = 0; i < 4; i++) + { + vplce[i] += vince[i] * skipped; + vince[i] *= thread->num_cores; + } + pitch *= thread->num_cores; + + do + { + for (int i = 0; i < 4; ++i) + { + uint8_t pix = _bufplce[i][vplce[i] >> bits]; + if (pix != 0) + { + uint32_t fg = fg2rgb[_palookupoffse[i][pix]]; + uint32_t bg = bg2rgb[dest[i]]; + fg = (fg + bg) | 0x1f07c1f; + dest[i] = RGB32k.All[fg & (fg >> 15)]; + } + vplce[i] += vince[i]; + } + dest += pitch; + } while (--count); + } + + void DrawWallAddClamp1PalCommand::Execute(DrawerThread *thread) + { + uint32_t fracstep = _iscale; + uint32_t frac = _texturefrac; + uint8_t *colormap = _colormap; + int count = _count; + const uint8_t *source = _source; + uint8_t *dest = _dest; + int bits = _tmvlinebits; + int pitch = _pitch; + + uint32_t *fg2rgb = _srcblend; + uint32_t *bg2rgb = _destblend; + + count = thread->count_for_thread(_dest_y, count); + if (count <= 0) + return; + + dest = thread->dest_for_thread(_dest_y, pitch, dest); + frac += fracstep * thread->skipped_by_thread(_dest_y); + fracstep *= thread->num_cores; + pitch *= thread->num_cores; + + do + { + uint8_t pix = source[frac >> bits]; + if (pix != 0) + { + uint32_t a = fg2rgb[colormap[pix]] + bg2rgb[*dest]; + uint32_t b = a; + + a |= 0x01f07c1f; + b &= 0x40100400; + a &= 0x3fffffff; + b = b - (b >> 5); + a |= b; + *dest = RGB32k.All[a & (a >> 15)]; + } + frac += fracstep; + dest += pitch; + } while (--count); + } + + void DrawWallAddClamp4PalCommand::Execute(DrawerThread *thread) + { + uint8_t *dest = _dest; + int count = _count; + int bits = _tmvlinebits; + + uint32_t *fg2rgb = _srcblend; + uint32_t *bg2rgb = _destblend; + + uint32_t vplce[4] = { _vplce[0], _vplce[1], _vplce[2], _vplce[3] }; + uint32_t vince[4] = { _vince[0], _vince[1], _vince[2], _vince[3] }; + + count = thread->count_for_thread(_dest_y, count); + if (count <= 0) + return; + + int pitch = _pitch; + int skipped = thread->skipped_by_thread(_dest_y); + dest = thread->dest_for_thread(_dest_y, pitch, dest); + for (int i = 0; i < 4; i++) + { + vplce[i] += vince[i] * skipped; + vince[i] *= thread->num_cores; + } + pitch *= thread->num_cores; + + do + { + for (int i = 0; i < 4; ++i) + { + uint8_t pix = _bufplce[i][vplce[i] >> bits]; + if (pix != 0) + { + uint32_t a = fg2rgb[_palookupoffse[i][pix]] + bg2rgb[dest[i]]; + uint32_t b = a; + + a |= 0x01f07c1f; + b &= 0x40100400; + a &= 0x3fffffff; + b = b - (b >> 5); + a |= b; + dest[i] = RGB32k.All[a & (a >> 15)]; + } + vplce[i] += vince[i]; + } + dest += pitch; + } while (--count); + } + + void DrawWallSubClamp1PalCommand::Execute(DrawerThread *thread) + { + uint32_t fracstep = _iscale; + uint32_t frac = _texturefrac; + uint8_t *colormap = _colormap; + int count = _count; + const uint8_t *source = _source; + uint8_t *dest = _dest; + int bits = _tmvlinebits; + int pitch = _pitch; + + uint32_t *fg2rgb = _srcblend; + uint32_t *bg2rgb = _destblend; + + count = thread->count_for_thread(_dest_y, count); + if (count <= 0) + return; + + dest = thread->dest_for_thread(_dest_y, pitch, dest); + frac += fracstep * thread->skipped_by_thread(_dest_y); + fracstep *= thread->num_cores; + pitch *= thread->num_cores; + + do + { + uint8_t pix = source[frac >> bits]; + if (pix != 0) + { + uint32_t a = (fg2rgb[colormap[pix]] | 0x40100400) - bg2rgb[*dest]; + uint32_t b = a; + + b &= 0x40100400; + b = b - (b >> 5); + a &= b; + a |= 0x01f07c1f; + *dest = RGB32k.All[a & (a >> 15)]; + } + frac += fracstep; + dest += pitch; + } while (--count); + } + + void DrawWallSubClamp4PalCommand::Execute(DrawerThread *thread) + { + uint8_t *dest = _dest; + int count = _count; + int bits = _tmvlinebits; + + uint32_t *fg2rgb = _srcblend; + uint32_t *bg2rgb = _destblend; + + uint32_t vplce[4] = { _vplce[0], _vplce[1], _vplce[2], _vplce[3] }; + uint32_t vince[4] = { _vince[0], _vince[1], _vince[2], _vince[3] }; + + count = thread->count_for_thread(_dest_y, count); + if (count <= 0) + return; + + int pitch = _pitch; + int skipped = thread->skipped_by_thread(_dest_y); + dest = thread->dest_for_thread(_dest_y, pitch, dest); + for (int i = 0; i < 4; i++) + { + vplce[i] += vince[i] * skipped; + vince[i] *= thread->num_cores; + } + pitch *= thread->num_cores; + + do + { + for (int i = 0; i < 4; ++i) + { + uint8_t pix = _bufplce[i][vplce[i] >> bits]; + if (pix != 0) + { + uint32_t a = (fg2rgb[_palookupoffse[i][pix]] | 0x40100400) - bg2rgb[dest[i]]; + uint32_t b = a; + + b &= 0x40100400; + b = b - (b >> 5); + a &= b; + a |= 0x01f07c1f; + dest[i] = RGB32k.All[a & (a >> 15)]; + } + vplce[i] += vince[i]; + } + dest += pitch; + } while (--count); + } + + void DrawWallRevSubClamp1PalCommand::Execute(DrawerThread *thread) + { + uint32_t fracstep = _iscale; + uint32_t frac = _texturefrac; + uint8_t *colormap = _colormap; + int count = _count; + const uint8_t *source = _source; + uint8_t *dest = _dest; + int bits = _tmvlinebits; + int pitch = _pitch; + + uint32_t *fg2rgb = _srcblend; + uint32_t *bg2rgb = _destblend; + + count = thread->count_for_thread(_dest_y, count); + if (count <= 0) + return; + + dest = thread->dest_for_thread(_dest_y, pitch, dest); + frac += fracstep * thread->skipped_by_thread(_dest_y); + fracstep *= thread->num_cores; + pitch *= thread->num_cores; + + do + { + uint8_t pix = source[frac >> bits]; + if (pix != 0) + { + uint32_t a = (bg2rgb[*dest] | 0x40100400) - fg2rgb[colormap[pix]]; + uint32_t b = a; + + b &= 0x40100400; + b = b - (b >> 5); + a &= b; + a |= 0x01f07c1f; + *dest = RGB32k.All[a & (a >> 15)]; + } + frac += fracstep; + dest += pitch; + } while (--count); + } + + void DrawWallRevSubClamp4PalCommand::Execute(DrawerThread *thread) + { + uint8_t *dest = _dest; + int count = _count; + int bits = _tmvlinebits; + + uint32_t *fg2rgb = _srcblend; + uint32_t *bg2rgb = _destblend; + + uint32_t vplce[4] = { _vplce[0], _vplce[1], _vplce[2], _vplce[3] }; + uint32_t vince[4] = { _vince[0], _vince[1], _vince[2], _vince[3] }; + + count = thread->count_for_thread(_dest_y, count); + if (count <= 0) + return; + + int pitch = _pitch; + int skipped = thread->skipped_by_thread(_dest_y); + dest = thread->dest_for_thread(_dest_y, pitch, dest); + for (int i = 0; i < 4; i++) + { + vplce[i] += vince[i] * skipped; + vince[i] *= thread->num_cores; + } + pitch *= thread->num_cores; + + do + { + for (int i = 0; i < 4; ++i) + { + uint8_t pix = _bufplce[i][vplce[i] >> bits]; + if (pix != 0) + { + uint32_t a = (bg2rgb[dest[i]] | 0x40100400) - fg2rgb[_palookupoffse[i][pix]]; + uint32_t b = a; + + b &= 0x40100400; + b = b - (b >> 5); + a &= b; + a |= 0x01f07c1f; + dest[i] = RGB32k.All[a & (a >> 15)]; + } + vplce[i] += vince[i]; + } + dest += _pitch; + } while (--count); + } + + ///////////////////////////////////////////////////////////////////////// + + PalSkyCommand::PalSkyCommand(uint32_t solid_top, uint32_t solid_bottom) : solid_top(solid_top), solid_bottom(solid_bottom) + { + using namespace drawerargs; + + _dest = dc_dest; + _count = dc_count; + _pitch = dc_pitch; + for (int col = 0; col < 4; col++) + { + _bufplce[col] = bufplce[col]; + _bufplce2[col] = bufplce2[col]; + _bufheight[col] = bufheight[col]; + _vince[col] = vince[col]; + _vplce[col] = vplce[col]; + } + } + + void DrawSingleSky1PalCommand::Execute(DrawerThread *thread) + { + uint8_t *dest = _dest; + int count = _count; + int pitch = _pitch; + const uint8_t *source0 = _bufplce[0]; + int textureheight0 = _bufheight[0]; + + int32_t frac = _vplce[0]; + int32_t fracstep = _vince[0]; + + int start_fade = 2; // How fast it should fade out + + int solid_top_r = RPART(solid_top); + int solid_top_g = GPART(solid_top); + int solid_top_b = BPART(solid_top); + int solid_bottom_r = RPART(solid_bottom); + int solid_bottom_g = GPART(solid_bottom); + int solid_bottom_b = BPART(solid_bottom); + + count = thread->count_for_thread(_dest_y, count); + if (count <= 0) + return; + + int skipped = thread->skipped_by_thread(_dest_y); + dest = thread->dest_for_thread(_dest_y, pitch, dest); + frac += fracstep * skipped; + fracstep *= thread->num_cores; + pitch *= thread->num_cores; + + for (int index = 0; index < count; index++) + { + uint32_t sample_index = (((((uint32_t)frac) << 8) >> FRACBITS) * textureheight0) >> FRACBITS; + uint8_t fg = source0[sample_index]; + + int alpha_top = MAX(MIN(frac >> (16 - start_fade), 256), 0); + int alpha_bottom = MAX(MIN(((2 << 24) - frac) >> (16 - start_fade), 256), 0); + + if (alpha_top == 256 && alpha_bottom == 256) + { + *dest = fg; + } + else + { + int inv_alpha_top = 256 - alpha_top; + int inv_alpha_bottom = 256 - alpha_bottom; + + const auto &c = GPalette.BaseColors[fg]; + int c_red = c.r; + int c_green = c.g; + int c_blue = c.b; + c_red = (c_red * alpha_top + solid_top_r * inv_alpha_top) >> 8; + c_green = (c_green * alpha_top + solid_top_g * inv_alpha_top) >> 8; + c_blue = (c_blue * alpha_top + solid_top_b * inv_alpha_top) >> 8; + c_red = (c_red * alpha_bottom + solid_bottom_r * inv_alpha_bottom) >> 8; + c_green = (c_green * alpha_bottom + solid_bottom_g * inv_alpha_bottom) >> 8; + c_blue = (c_blue * alpha_bottom + solid_bottom_b * inv_alpha_bottom) >> 8; + *dest = RGB32k.RGB[(c_red >> 3)][(c_green >> 3)][(c_blue >> 3)]; + } + + frac += fracstep; + dest += pitch; + } + } + + void DrawSingleSky4PalCommand::Execute(DrawerThread *thread) + { + uint8_t *dest = _dest; + int count = _count; + int pitch = _pitch; + const uint8_t *source0[4] = { _bufplce[0], _bufplce[1], _bufplce[2], _bufplce[3] }; + int textureheight0 = _bufheight[0]; + const uint32_t *palette = (const uint32_t *)GPalette.BaseColors; + int32_t frac[4] = { (int32_t)_vplce[0], (int32_t)_vplce[1], (int32_t)_vplce[2], (int32_t)_vplce[3] }; + int32_t fracstep[4] = { (int32_t)_vince[0], (int32_t)_vince[1], (int32_t)_vince[2], (int32_t)_vince[3] }; + uint8_t output[4]; + + int start_fade = 2; // How fast it should fade out + + int solid_top_r = RPART(solid_top); + int solid_top_g = GPART(solid_top); + int solid_top_b = BPART(solid_top); + int solid_bottom_r = RPART(solid_bottom); + int solid_bottom_g = GPART(solid_bottom); + int solid_bottom_b = BPART(solid_bottom); + uint32_t solid_top_fill = RGB32k.RGB[(solid_top_r >> 3)][(solid_top_g >> 3)][(solid_top_b >> 3)]; + uint32_t solid_bottom_fill = RGB32k.RGB[(solid_bottom_r >> 3)][(solid_bottom_g >> 3)][(solid_bottom_b >> 3)]; + solid_top_fill = (solid_top_fill << 24) | (solid_top_fill << 16) | (solid_top_fill << 8) | solid_top_fill; + solid_bottom_fill = (solid_bottom_fill << 24) | (solid_bottom_fill << 16) | (solid_bottom_fill << 8) | solid_bottom_fill; + + // Find bands for top solid color, top fade, center textured, bottom fade, bottom solid color: + int fade_length = (1 << (24 - start_fade)); + int start_fadetop_y = (-frac[0]) / fracstep[0]; + int end_fadetop_y = (fade_length - frac[0]) / fracstep[0]; + int start_fadebottom_y = ((2 << 24) - fade_length - frac[0]) / fracstep[0]; + int end_fadebottom_y = ((2 << 24) - frac[0]) / fracstep[0]; + for (int col = 1; col < 4; col++) + { + start_fadetop_y = MIN(start_fadetop_y, (-frac[0]) / fracstep[0]); + end_fadetop_y = MAX(end_fadetop_y, (fade_length - frac[0]) / fracstep[0]); + start_fadebottom_y = MIN(start_fadebottom_y, ((2 << 24) - fade_length - frac[0]) / fracstep[0]); + end_fadebottom_y = MAX(end_fadebottom_y, ((2 << 24) - frac[0]) / fracstep[0]); + } + start_fadetop_y = clamp(start_fadetop_y, 0, count); + end_fadetop_y = clamp(end_fadetop_y, 0, count); + start_fadebottom_y = clamp(start_fadebottom_y, 0, count); + end_fadebottom_y = clamp(end_fadebottom_y, 0, count); + + int skipped = thread->skipped_by_thread(_dest_y); + dest = thread->dest_for_thread(_dest_y, pitch, dest); + for (int col = 0; col < 4; col++) + { + frac[col] += fracstep[col] * skipped; + fracstep[col] *= thread->num_cores; + } + pitch *= thread->num_cores; + int num_cores = thread->num_cores; + int index = skipped; + + // Top solid color: + while (index < start_fadetop_y) + { + *((uint32_t*)dest) = solid_top_fill; + dest += pitch; + for (int col = 0; col < 4; col++) + frac[col] += fracstep[col]; + index += num_cores; + } + + // Top fade: + while (index < end_fadetop_y) + { + for (int col = 0; col < 4; col++) + { + uint32_t sample_index = (((((uint32_t)frac[col]) << 8) >> FRACBITS) * textureheight0) >> FRACBITS; + uint8_t fg = source0[col][sample_index]; + + uint32_t c = palette[fg]; + int alpha_top = MAX(MIN(frac[col] >> (16 - start_fade), 256), 0); + int inv_alpha_top = 256 - alpha_top; + int c_red = RPART(c); + int c_green = GPART(c); + int c_blue = BPART(c); + c_red = (c_red * alpha_top + solid_top_r * inv_alpha_top) >> 8; + c_green = (c_green * alpha_top + solid_top_g * inv_alpha_top) >> 8; + c_blue = (c_blue * alpha_top + solid_top_b * inv_alpha_top) >> 8; + output[col] = RGB32k.RGB[(c_red >> 3)][(c_green >> 3)][(c_blue >> 3)]; + + frac[col] += fracstep[col]; + } + *((uint32_t*)dest) = *((uint32_t*)output); + dest += pitch; + index += num_cores; + } + + // Textured center: + while (index < start_fadebottom_y) + { + for (int col = 0; col < 4; col++) + { + uint32_t sample_index = (((((uint32_t)frac[col]) << 8) >> FRACBITS) * textureheight0) >> FRACBITS; + output[col] = source0[col][sample_index]; + + frac[col] += fracstep[col]; + } + + *((uint32_t*)dest) = *((uint32_t*)output); + dest += pitch; + index += num_cores; + } + + // Fade bottom: + while (index < end_fadebottom_y) + { + for (int col = 0; col < 4; col++) + { + uint32_t sample_index = (((((uint32_t)frac[col]) << 8) >> FRACBITS) * textureheight0) >> FRACBITS; + uint8_t fg = source0[col][sample_index]; + + uint32_t c = palette[fg]; + int alpha_bottom = MAX(MIN(((2 << 24) - frac[col]) >> (16 - start_fade), 256), 0); + int inv_alpha_bottom = 256 - alpha_bottom; + int c_red = RPART(c); + int c_green = GPART(c); + int c_blue = BPART(c); + c_red = (c_red * alpha_bottom + solid_bottom_r * inv_alpha_bottom) >> 8; + c_green = (c_green * alpha_bottom + solid_bottom_g * inv_alpha_bottom) >> 8; + c_blue = (c_blue * alpha_bottom + solid_bottom_b * inv_alpha_bottom) >> 8; + output[col] = RGB32k.RGB[(c_red >> 3)][(c_green >> 3)][(c_blue >> 3)]; + + frac[col] += fracstep[col]; + } + *((uint32_t*)dest) = *((uint32_t*)output); + dest += pitch; + index += num_cores; + } + + // Bottom solid color: + while (index < count) + { + *((uint32_t*)dest) = solid_bottom_fill; + dest += pitch; + index += num_cores; + } + } + + void DrawDoubleSky1PalCommand::Execute(DrawerThread *thread) + { + uint8_t *dest = _dest; + int count = _count; + int pitch = _pitch; + const uint8_t *source0 = _bufplce[0]; + const uint8_t *source1 = _bufplce2[0]; + int textureheight0 = _bufheight[0]; + uint32_t maxtextureheight1 = _bufheight[1] - 1; + + int32_t frac = _vplce[0]; + int32_t fracstep = _vince[0]; + + int start_fade = 2; // How fast it should fade out + + int solid_top_r = RPART(solid_top); + int solid_top_g = GPART(solid_top); + int solid_top_b = BPART(solid_top); + int solid_bottom_r = RPART(solid_bottom); + int solid_bottom_g = GPART(solid_bottom); + int solid_bottom_b = BPART(solid_bottom); + + count = thread->count_for_thread(_dest_y, count); + if (count <= 0) + return; + + int skipped = thread->skipped_by_thread(_dest_y); + dest = thread->dest_for_thread(_dest_y, pitch, dest); + frac += fracstep * skipped; + fracstep *= thread->num_cores; + pitch *= thread->num_cores; + + for (int index = 0; index < count; index++) + { + uint32_t sample_index = (((((uint32_t)frac) << 8) >> FRACBITS) * textureheight0) >> FRACBITS; + uint8_t fg = source0[sample_index]; + if (fg == 0) + { + uint32_t sample_index2 = MIN(sample_index, maxtextureheight1); + fg = source1[sample_index2]; + } + + int alpha_top = MAX(MIN(frac >> (16 - start_fade), 256), 0); + int alpha_bottom = MAX(MIN(((2 << 24) - frac) >> (16 - start_fade), 256), 0); + + if (alpha_top == 256 && alpha_bottom == 256) + { + *dest = fg; + } + else + { + int inv_alpha_top = 256 - alpha_top; + int inv_alpha_bottom = 256 - alpha_bottom; + + const auto &c = GPalette.BaseColors[fg]; + int c_red = c.r; + int c_green = c.g; + int c_blue = c.b; + c_red = (c_red * alpha_top + solid_top_r * inv_alpha_top) >> 8; + c_green = (c_green * alpha_top + solid_top_g * inv_alpha_top) >> 8; + c_blue = (c_blue * alpha_top + solid_top_b * inv_alpha_top) >> 8; + c_red = (c_red * alpha_bottom + solid_bottom_r * inv_alpha_bottom) >> 8; + c_green = (c_green * alpha_bottom + solid_bottom_g * inv_alpha_bottom) >> 8; + c_blue = (c_blue * alpha_bottom + solid_bottom_b * inv_alpha_bottom) >> 8; + *dest = RGB32k.RGB[(c_red >> 3)][(c_green >> 3)][(c_blue >> 3)]; + } + + frac += fracstep; + dest += pitch; + } + } + + void DrawDoubleSky4PalCommand::Execute(DrawerThread *thread) + { + uint8_t *dest = _dest; + int count = _count; + int pitch = _pitch; + const uint8_t *source0[4] = { _bufplce[0], _bufplce[1], _bufplce[2], _bufplce[3] }; + const uint8_t *source1[4] = { _bufplce2[0], _bufplce2[1], _bufplce2[2], _bufplce2[3] }; + int textureheight0 = _bufheight[0]; + uint32_t maxtextureheight1 = _bufheight[1] - 1; + const uint32_t *palette = (const uint32_t *)GPalette.BaseColors; + int32_t frac[4] = { (int32_t)_vplce[0], (int32_t)_vplce[1], (int32_t)_vplce[2], (int32_t)_vplce[3] }; + int32_t fracstep[4] = { (int32_t)_vince[0], (int32_t)_vince[1], (int32_t)_vince[2], (int32_t)_vince[3] }; + uint8_t output[4]; + + int start_fade = 2; // How fast it should fade out + + int solid_top_r = RPART(solid_top); + int solid_top_g = GPART(solid_top); + int solid_top_b = BPART(solid_top); + int solid_bottom_r = RPART(solid_bottom); + int solid_bottom_g = GPART(solid_bottom); + int solid_bottom_b = BPART(solid_bottom); + uint32_t solid_top_fill = RGB32k.RGB[(solid_top_r >> 3)][(solid_top_g >> 3)][(solid_top_b >> 3)]; + uint32_t solid_bottom_fill = RGB32k.RGB[(solid_bottom_r >> 3)][(solid_bottom_g >> 3)][(solid_bottom_b >> 3)]; + solid_top_fill = (solid_top_fill << 24) | (solid_top_fill << 16) | (solid_top_fill << 8) | solid_top_fill; + solid_bottom_fill = (solid_bottom_fill << 24) | (solid_bottom_fill << 16) | (solid_bottom_fill << 8) | solid_bottom_fill; + + // Find bands for top solid color, top fade, center textured, bottom fade, bottom solid color: + int fade_length = (1 << (24 - start_fade)); + int start_fadetop_y = (-frac[0]) / fracstep[0]; + int end_fadetop_y = (fade_length - frac[0]) / fracstep[0]; + int start_fadebottom_y = ((2 << 24) - fade_length - frac[0]) / fracstep[0]; + int end_fadebottom_y = ((2 << 24) - frac[0]) / fracstep[0]; + for (int col = 1; col < 4; col++) + { + start_fadetop_y = MIN(start_fadetop_y, (-frac[0]) / fracstep[0]); + end_fadetop_y = MAX(end_fadetop_y, (fade_length - frac[0]) / fracstep[0]); + start_fadebottom_y = MIN(start_fadebottom_y, ((2 << 24) - fade_length - frac[0]) / fracstep[0]); + end_fadebottom_y = MAX(end_fadebottom_y, ((2 << 24) - frac[0]) / fracstep[0]); + } + start_fadetop_y = clamp(start_fadetop_y, 0, count); + end_fadetop_y = clamp(end_fadetop_y, 0, count); + start_fadebottom_y = clamp(start_fadebottom_y, 0, count); + end_fadebottom_y = clamp(end_fadebottom_y, 0, count); + + int skipped = thread->skipped_by_thread(_dest_y); + dest = thread->dest_for_thread(_dest_y, pitch, dest); + for (int col = 0; col < 4; col++) + { + frac[col] += fracstep[col] * skipped; + fracstep[col] *= thread->num_cores; + } + pitch *= thread->num_cores; + int num_cores = thread->num_cores; + int index = skipped; + + // Top solid color: + while (index < start_fadetop_y) + { + *((uint32_t*)dest) = solid_top_fill; + dest += pitch; + for (int col = 0; col < 4; col++) + frac[col] += fracstep[col]; + index += num_cores; + } + + // Top fade: + while (index < end_fadetop_y) + { + for (int col = 0; col < 4; col++) + { + uint32_t sample_index = (((((uint32_t)frac[col]) << 8) >> FRACBITS) * textureheight0) >> FRACBITS; + uint8_t fg = source0[col][sample_index]; + if (fg == 0) + { + uint32_t sample_index2 = MIN(sample_index, maxtextureheight1); + fg = source1[col][sample_index2]; + } + output[col] = fg; + + uint32_t c = palette[fg]; + int alpha_top = MAX(MIN(frac[col] >> (16 - start_fade), 256), 0); + int inv_alpha_top = 256 - alpha_top; + int c_red = RPART(c); + int c_green = GPART(c); + int c_blue = BPART(c); + c_red = (c_red * alpha_top + solid_top_r * inv_alpha_top) >> 8; + c_green = (c_green * alpha_top + solid_top_g * inv_alpha_top) >> 8; + c_blue = (c_blue * alpha_top + solid_top_b * inv_alpha_top) >> 8; + output[col] = RGB32k.RGB[(c_red >> 3)][(c_green >> 3)][(c_blue >> 3)]; + + frac[col] += fracstep[col]; + } + *((uint32_t*)dest) = *((uint32_t*)output); + dest += pitch; + index += num_cores; + } + + // Textured center: + while (index < start_fadebottom_y) + { + for (int col = 0; col < 4; col++) + { + uint32_t sample_index = (((((uint32_t)frac[col]) << 8) >> FRACBITS) * textureheight0) >> FRACBITS; + uint8_t fg = source0[col][sample_index]; + if (fg == 0) + { + uint32_t sample_index2 = MIN(sample_index, maxtextureheight1); + fg = source1[col][sample_index2]; + } + output[col] = fg; + + frac[col] += fracstep[col]; + } + + *((uint32_t*)dest) = *((uint32_t*)output); + dest += pitch; + index += num_cores; + } + + // Fade bottom: + while (index < end_fadebottom_y) + { + for (int col = 0; col < 4; col++) + { + uint32_t sample_index = (((((uint32_t)frac[col]) << 8) >> FRACBITS) * textureheight0) >> FRACBITS; + uint8_t fg = source0[col][sample_index]; + if (fg == 0) + { + uint32_t sample_index2 = MIN(sample_index, maxtextureheight1); + fg = source1[col][sample_index2]; + } + output[col] = fg; + + uint32_t c = palette[fg]; + int alpha_bottom = MAX(MIN(((2 << 24) - frac[col]) >> (16 - start_fade), 256), 0); + int inv_alpha_bottom = 256 - alpha_bottom; + int c_red = RPART(c); + int c_green = GPART(c); + int c_blue = BPART(c); + c_red = (c_red * alpha_bottom + solid_bottom_r * inv_alpha_bottom) >> 8; + c_green = (c_green * alpha_bottom + solid_bottom_g * inv_alpha_bottom) >> 8; + c_blue = (c_blue * alpha_bottom + solid_bottom_b * inv_alpha_bottom) >> 8; + output[col] = RGB32k.RGB[(c_red >> 3)][(c_green >> 3)][(c_blue >> 3)]; + + frac[col] += fracstep[col]; + } + *((uint32_t*)dest) = *((uint32_t*)output); + dest += pitch; + index += num_cores; + } + + // Bottom solid color: + while (index < count) + { + *((uint32_t*)dest) = solid_bottom_fill; + dest += pitch; + index += num_cores; + } + } + + ///////////////////////////////////////////////////////////////////////// + + PalColumnCommand::PalColumnCommand() + { + using namespace drawerargs; + + _count = dc_count; + _dest = dc_dest; + _pitch = dc_pitch; + _iscale = dc_iscale; + _texturefrac = dc_texturefrac; + _colormap = dc_colormap; + _source = dc_source; + _translation = dc_translation; + _color = dc_color; + _srcblend = dc_srcblend; + _destblend = dc_destblend; + _srccolor = dc_srccolor; + } + + void DrawColumnPalCommand::Execute(DrawerThread *thread) + { + int count; + uint8_t *dest; + fixed_t frac; + fixed_t fracstep; + + count = _count; + + // Framebuffer destination address. + dest = _dest; + + // Determine scaling, + // which is the only mapping to be done. + fracstep = _iscale; + frac = _texturefrac; + + count = thread->count_for_thread(_dest_y, count); + if (count <= 0) + return; + + int pitch = _pitch; + dest = thread->dest_for_thread(_dest_y, pitch, dest); + frac += fracstep * thread->skipped_by_thread(_dest_y); + fracstep *= thread->num_cores; + pitch *= thread->num_cores; + + // [RH] Get local copies of these variables so that the compiler + // has a better chance of optimizing this well. + const uint8_t *colormap = _colormap; + const uint8_t *source = _source; + + // Inner loop that does the actual texture mapping, + // e.g. a DDA-lile scaling. + // This is as fast as it gets. + do + { + // Re-map color indices from wall texture column + // using a lighting/special effects LUT. + *dest = colormap[source[frac >> FRACBITS]]; + + dest += pitch; + frac += fracstep; + + } while (--count); + } + + void FillColumnPalCommand::Execute(DrawerThread *thread) + { + int count; + uint8_t *dest; + + count = _count; + dest = _dest; + + count = thread->count_for_thread(_dest_y, count); + if (count <= 0) + return; + + int pitch = _pitch; + dest = thread->dest_for_thread(_dest_y, pitch, dest); + pitch *= thread->num_cores; + + uint8_t color = _color; + do + { + *dest = color; + dest += pitch; + } while (--count); + } + + void FillColumnAddPalCommand::Execute(DrawerThread *thread) + { + int count; + uint8_t *dest; + + count = _count; + dest = _dest; + uint32_t *bg2rgb; + uint32_t fg; + + bg2rgb = _destblend; + fg = _srccolor; + int pitch = _pitch; + + count = thread->count_for_thread(_dest_y, count); + if (count <= 0) + return; + + dest = thread->dest_for_thread(_dest_y, pitch, dest); + pitch *= thread->num_cores; + + do + { + uint32_t bg; + bg = (fg + bg2rgb[*dest]) | 0x1f07c1f; + *dest = RGB32k.All[bg & (bg >> 15)]; + dest += pitch; + } while (--count); + + } + + void FillColumnAddClampPalCommand::Execute(DrawerThread *thread) + { + int count; + uint8_t *dest; + + count = _count; + + dest = _dest; + uint32_t *bg2rgb; + uint32_t fg; + + bg2rgb = _destblend; + fg = _srccolor; + int pitch = _pitch; + + count = thread->count_for_thread(_dest_y, count); + if (count <= 0) + return; + + dest = thread->dest_for_thread(_dest_y, pitch, dest); + pitch *= thread->num_cores; + + do + { + uint32_t a = fg + bg2rgb[*dest]; + uint32_t b = a; + + a |= 0x01f07c1f; + b &= 0x40100400; + a &= 0x3fffffff; + b = b - (b >> 5); + a |= b; + *dest = RGB32k.All[a & (a >> 15)]; + dest += pitch; + } while (--count); + } + + void FillColumnSubClampPalCommand::Execute(DrawerThread *thread) + { + int count; + uint8_t *dest; + + count = _count; + + dest = _dest; + uint32_t *bg2rgb; + uint32_t fg; + + bg2rgb = _destblend; + fg = _srccolor | 0x40100400; + int pitch = _pitch; + + count = thread->count_for_thread(_dest_y, count); + if (count <= 0) + return; + + dest = thread->dest_for_thread(_dest_y, pitch, dest); + pitch *= thread->num_cores; + + do + { + uint32_t a = fg - bg2rgb[*dest]; + uint32_t b = a; + + b &= 0x40100400; + b = b - (b >> 5); + a &= b; + a |= 0x01f07c1f; + *dest = RGB32k.All[a & (a >> 15)]; + dest += pitch; + } while (--count); + } + + void FillColumnRevSubClampPalCommand::Execute(DrawerThread *thread) + { + int count; + uint8_t *dest; + + count = _count; + if (count <= 0) + return; + + dest = _dest; + uint32_t *bg2rgb; + uint32_t fg; + + bg2rgb = _destblend; + fg = _srccolor; + int pitch = _pitch; + + count = thread->count_for_thread(_dest_y, count); + if (count <= 0) + return; + + dest = thread->dest_for_thread(_dest_y, pitch, dest); + pitch *= thread->num_cores; + + do + { + uint32_t a = (bg2rgb[*dest] | 0x40100400) - fg; + uint32_t b = a; + + b &= 0x40100400; + b = b - (b >> 5); + a &= b; + a |= 0x01f07c1f; + *dest = RGB32k.All[a & (a >> 15)]; + dest += pitch; + } while (--count); + } + + void DrawColumnAddPalCommand::Execute(DrawerThread *thread) + { + int count; + uint8_t *dest; + fixed_t frac; + fixed_t fracstep; + + count = _count; + dest = _dest; + + fracstep = _iscale; + frac = _texturefrac; + + count = thread->count_for_thread(_dest_y, count); + if (count <= 0) + return; + + int pitch = _pitch; + dest = thread->dest_for_thread(_dest_y, pitch, dest); + frac += fracstep * thread->skipped_by_thread(_dest_y); + fracstep *= thread->num_cores; + pitch *= thread->num_cores; + + uint32_t *fg2rgb = _srcblend; + uint32_t *bg2rgb = _destblend; + const uint8_t *colormap = _colormap; + const uint8_t *source = _source; + + do + { + uint32_t fg = colormap[source[frac >> FRACBITS]]; + uint32_t bg = *dest; + + fg = fg2rgb[fg]; + bg = bg2rgb[bg]; + fg = (fg + bg) | 0x1f07c1f; + *dest = RGB32k.All[fg & (fg >> 15)]; + dest += pitch; + frac += fracstep; + } while (--count); + } + + void DrawColumnTranslatedPalCommand::Execute(DrawerThread *thread) + { + int count; + uint8_t* dest; + fixed_t frac; + fixed_t fracstep; + + count = _count; + + dest = _dest; + + fracstep = _iscale; + frac = _texturefrac; + + count = thread->count_for_thread(_dest_y, count); + if (count <= 0) + return; + + int pitch = _pitch; + dest = thread->dest_for_thread(_dest_y, pitch, dest); + frac += fracstep * thread->skipped_by_thread(_dest_y); + fracstep *= thread->num_cores; + pitch *= thread->num_cores; + + // [RH] Local copies of global vars to improve compiler optimizations + const uint8_t *colormap = _colormap; + const uint8_t *translation = _translation; + const uint8_t *source = _source; + + do + { + *dest = colormap[translation[source[frac >> FRACBITS]]]; + dest += pitch; + + frac += fracstep; + } while (--count); + } + + void DrawColumnTlatedAddPalCommand::Execute(DrawerThread *thread) + { + int count; + uint8_t *dest; + fixed_t frac; + fixed_t fracstep; + + count = _count; + dest = _dest; + + fracstep = _iscale; + frac = _texturefrac; + + count = thread->count_for_thread(_dest_y, count); + if (count <= 0) + return; + + int pitch = _pitch; + dest = thread->dest_for_thread(_dest_y, pitch, dest); + frac += fracstep * thread->skipped_by_thread(_dest_y); + fracstep *= thread->num_cores; + pitch *= thread->num_cores; + + uint32_t *fg2rgb = _srcblend; + uint32_t *bg2rgb = _destblend; + const uint8_t *translation = _translation; + const uint8_t *colormap = _colormap; + const uint8_t *source = _source; + + do + { + uint32_t fg = colormap[translation[source[frac >> FRACBITS]]]; + uint32_t bg = *dest; + + fg = fg2rgb[fg]; + bg = bg2rgb[bg]; + fg = (fg + bg) | 0x1f07c1f; + *dest = RGB32k.All[fg & (fg >> 15)]; + dest += pitch; + frac += fracstep; + } while (--count); + } + + void DrawColumnShadedPalCommand::Execute(DrawerThread *thread) + { + int count; + uint8_t *dest; + fixed_t frac, fracstep; + + count = _count; + dest = _dest; + + fracstep = _iscale; + frac = _texturefrac; + + count = thread->count_for_thread(_dest_y, count); + if (count <= 0) + return; + + int pitch = _pitch; + dest = thread->dest_for_thread(_dest_y, pitch, dest); + frac += fracstep * thread->skipped_by_thread(_dest_y); + fracstep *= thread->num_cores; + pitch *= thread->num_cores; + + const uint8_t *source = _source; + const uint8_t *colormap = _colormap; + uint32_t *fgstart = &Col2RGB8[0][_color]; + + do + { + uint32_t val = colormap[source[frac >> FRACBITS]]; + uint32_t fg = fgstart[val << 8]; + val = (Col2RGB8[64 - val][*dest] + fg) | 0x1f07c1f; + *dest = RGB32k.All[val & (val >> 15)]; + + dest += pitch; + frac += fracstep; + } while (--count); + } + + void DrawColumnAddClampPalCommand::Execute(DrawerThread *thread) + { + int count; + uint8_t *dest; + fixed_t frac; + fixed_t fracstep; + + count = _count; + dest = _dest; + + fracstep = _iscale; + frac = _texturefrac; + + count = thread->count_for_thread(_dest_y, count); + if (count <= 0) + return; + + int pitch = _pitch; + dest = thread->dest_for_thread(_dest_y, pitch, dest); + frac += fracstep * thread->skipped_by_thread(_dest_y); + fracstep *= thread->num_cores; + pitch *= thread->num_cores; + + const uint8_t *colormap = _colormap; + const uint8_t *source = _source; + uint32_t *fg2rgb = _srcblend; + uint32_t *bg2rgb = _destblend; + + do + { + uint32_t a = fg2rgb[colormap[source[frac >> FRACBITS]]] + bg2rgb[*dest]; + uint32_t b = a; + + a |= 0x01f07c1f; + b &= 0x40100400; + a &= 0x3fffffff; + b = b - (b >> 5); + a |= b; + *dest = RGB32k.All[a & (a >> 15)]; + dest += pitch; + frac += fracstep; + } while (--count); + } + + void DrawColumnAddClampTranslatedPalCommand::Execute(DrawerThread *thread) + { + int count; + uint8_t *dest; + fixed_t frac; + fixed_t fracstep; + + count = _count; + dest = _dest; + + fracstep = _iscale; + frac = _texturefrac; + + count = thread->count_for_thread(_dest_y, count); + if (count <= 0) + return; + + int pitch = _pitch; + dest = thread->dest_for_thread(_dest_y, pitch, dest); + frac += fracstep * thread->skipped_by_thread(_dest_y); + fracstep *= thread->num_cores; + pitch *= thread->num_cores; + + const uint8_t *translation = _translation; + const uint8_t *colormap = _colormap; + const uint8_t *source = _source; + uint32_t *fg2rgb = _srcblend; + uint32_t *bg2rgb = _destblend; + + do + { + uint32_t a = fg2rgb[colormap[translation[source[frac >> FRACBITS]]]] + bg2rgb[*dest]; + uint32_t b = a; + + a |= 0x01f07c1f; + b &= 0x40100400; + a &= 0x3fffffff; + b = b - (b >> 5); + a |= b; + *dest = RGB32k.All[(a >> 15) & a]; + dest += pitch; + frac += fracstep; + } while (--count); + } + + void DrawColumnSubClampPalCommand::Execute(DrawerThread *thread) + { + int count; + uint8_t *dest; + fixed_t frac; + fixed_t fracstep; + + count = _count; + dest = _dest; + + fracstep = _iscale; + frac = _texturefrac; + + count = thread->count_for_thread(_dest_y, count); + if (count <= 0) + return; + + int pitch = _pitch; + dest = thread->dest_for_thread(_dest_y, pitch, dest); + frac += fracstep * thread->skipped_by_thread(_dest_y); + fracstep *= thread->num_cores; + pitch *= thread->num_cores; + + const uint8_t *colormap = _colormap; + const uint8_t *source = _source; + uint32_t *fg2rgb = _srcblend; + uint32_t *bg2rgb = _destblend; + + do + { + uint32_t a = (fg2rgb[colormap[source[frac >> FRACBITS]]] | 0x40100400) - bg2rgb[*dest]; + uint32_t b = a; + + b &= 0x40100400; + b = b - (b >> 5); + a &= b; + a |= 0x01f07c1f; + *dest = RGB32k.All[a & (a >> 15)]; + dest += pitch; + frac += fracstep; + } while (--count); + } + + void DrawColumnSubClampTranslatedPalCommand::Execute(DrawerThread *thread) + { + int count; + uint8_t *dest; + fixed_t frac; + fixed_t fracstep; + + count = _count; + dest = _dest; + + fracstep = _iscale; + frac = _texturefrac; + + count = thread->count_for_thread(_dest_y, count); + if (count <= 0) + return; + + int pitch = _pitch; + dest = thread->dest_for_thread(_dest_y, pitch, dest); + frac += fracstep * thread->skipped_by_thread(_dest_y); + fracstep *= thread->num_cores; + pitch *= thread->num_cores; + + const uint8_t *translation = _translation; + const uint8_t *colormap = _colormap; + const uint8_t *source = _source; + uint32_t *fg2rgb = _srcblend; + uint32_t *bg2rgb = _destblend; + + do + { + uint32_t a = (fg2rgb[colormap[translation[source[frac >> FRACBITS]]]] | 0x40100400) - bg2rgb[*dest]; + uint32_t b = a; + + b &= 0x40100400; + b = b - (b >> 5); + a &= b; + a |= 0x01f07c1f; + *dest = RGB32k.All[(a >> 15) & a]; + dest += pitch; + frac += fracstep; + } while (--count); + } + + void DrawColumnRevSubClampPalCommand::Execute(DrawerThread *thread) + { + int count; + uint8_t *dest; + fixed_t frac; + fixed_t fracstep; + + count = _count; + dest = _dest; + + fracstep = _iscale; + frac = _texturefrac; + + count = thread->count_for_thread(_dest_y, count); + if (count <= 0) + return; + + int pitch = _pitch; + dest = thread->dest_for_thread(_dest_y, pitch, dest); + frac += fracstep * thread->skipped_by_thread(_dest_y); + fracstep *= thread->num_cores; + pitch *= thread->num_cores; + + const uint8_t *colormap = _colormap; + const uint8_t *source = _source; + uint32_t *fg2rgb = _srcblend; + uint32_t *bg2rgb = _destblend; + + do + { + uint32_t a = (bg2rgb[*dest] | 0x40100400) - fg2rgb[colormap[source[frac >> FRACBITS]]]; + uint32_t b = a; + + b &= 0x40100400; + b = b - (b >> 5); + a &= b; + a |= 0x01f07c1f; + *dest = RGB32k.All[a & (a >> 15)]; + dest += pitch; + frac += fracstep; + } while (--count); + } + + void DrawColumnRevSubClampTranslatedPalCommand::Execute(DrawerThread *thread) + { + int count; + uint8_t *dest; + fixed_t frac; + fixed_t fracstep; + + count = _count; + dest = _dest; + + fracstep = _iscale; + frac = _texturefrac; + + count = thread->count_for_thread(_dest_y, count); + if (count <= 0) + return; + + int pitch = _pitch; + dest = thread->dest_for_thread(_dest_y, pitch, dest); + frac += fracstep * thread->skipped_by_thread(_dest_y); + fracstep *= thread->num_cores; + pitch *= thread->num_cores; + + const uint8_t *translation = _translation; + const uint8_t *colormap = _colormap; + const uint8_t *source = _source; + uint32_t *fg2rgb = _srcblend; + uint32_t *bg2rgb = _destblend; + + do + { + uint32_t a = (bg2rgb[*dest] | 0x40100400) - fg2rgb[colormap[translation[source[frac >> FRACBITS]]]]; + uint32_t b = a; + + b &= 0x40100400; + b = b - (b >> 5); + a &= b; + a |= 0x01f07c1f; + *dest = RGB32k.All[(a >> 15) & a]; + dest += pitch; + frac += fracstep; + } while (--count); + } + + ///////////////////////////////////////////////////////////////////////// + + DrawFuzzColumnPalCommand::DrawFuzzColumnPalCommand() + { + using namespace drawerargs; + + _yl = dc_yl; + _yh = dc_yh; + _x = dc_x; + _destorg = dc_destorg; + _pitch = dc_pitch; + _fuzzpos = fuzzpos; + _fuzzviewheight = fuzzviewheight; + } + + void DrawFuzzColumnPalCommand::Execute(DrawerThread *thread) + { + int yl = MAX(_yl, 1); + int yh = MIN(_yh, _fuzzviewheight); + + int count = thread->count_for_thread(yl, yh - yl + 1); + + // Zero length. + if (count <= 0) + return; + + uint8_t *map = &NormalLight.Maps[6 * 256]; + + uint8_t *dest = thread->dest_for_thread(yl, _pitch, ylookup[yl] + _x + _destorg); + + int pitch = _pitch * thread->num_cores; + int fuzzstep = thread->num_cores; + int fuzz = (_fuzzpos + thread->skipped_by_thread(yl)) % FUZZTABLE; + + yl += thread->skipped_by_thread(yl); + + // Handle the case where we would go out of bounds at the top: + if (yl < fuzzstep) + { + uint8_t *srcdest = dest + fuzzoffset[fuzz] * fuzzstep + pitch; + //assert(static_cast((srcdest - (uint8_t*)dc_destorg) / (_pitch)) < viewheight); + + *dest = map[*srcdest]; + dest += pitch; + fuzz += fuzzstep; + fuzz %= FUZZTABLE; + + count--; + if (count == 0) + return; + } + + bool lowerbounds = (yl + (count + fuzzstep - 1) * fuzzstep > _fuzzviewheight); + if (lowerbounds) + count--; + + // Fuzz where fuzzoffset stays within bounds + while (count > 0) + { + int available = (FUZZTABLE - fuzz); + int next_wrap = available / fuzzstep; + if (available % fuzzstep != 0) + next_wrap++; + + int cnt = MIN(count, next_wrap); + count -= cnt; + do + { + uint8_t *srcdest = dest + fuzzoffset[fuzz] * fuzzstep; + //assert(static_cast((srcdest - (uint8_t*)dc_destorg) / (_pitch)) < viewheight); + + *dest = map[*srcdest]; + dest += pitch; + fuzz += fuzzstep; + } while (--cnt); + + fuzz %= FUZZTABLE; + } + + // Handle the case where we would go out of bounds at the bottom + if (lowerbounds) + { + uint8_t *srcdest = dest + fuzzoffset[fuzz] * fuzzstep - pitch; + //assert(static_cast((srcdest - (uint8_t*)dc_destorg) / (_pitch)) < viewheight); + + *dest = map[*srcdest]; + } + } + + ///////////////////////////////////////////////////////////////////////// + + PalSpanCommand::PalSpanCommand() + { + using namespace drawerargs; + + _source = ds_source; + _colormap = ds_colormap; + _xfrac = ds_xfrac; + _yfrac = ds_yfrac; + _y = ds_y; + _x1 = ds_x1; + _x2 = ds_x2; + _destorg = dc_destorg; + _xstep = ds_xstep; + _ystep = ds_ystep; + _xbits = ds_xbits; + _ybits = ds_ybits; + _srcblend = dc_srcblend; + _destblend = dc_destblend; + _color = ds_color; + } + + void DrawSpanPalCommand::Execute(DrawerThread *thread) + { + if (thread->line_skipped_by_thread(_y)) + return; + + dsfixed_t xfrac; + dsfixed_t yfrac; + dsfixed_t xstep; + dsfixed_t ystep; + uint8_t *dest; + const uint8_t *source = _source; + const uint8_t *colormap = _colormap; + int count; + int spot; + + xfrac = _xfrac; + yfrac = _yfrac; + + dest = ylookup[_y] + _x1 + _destorg; + + count = _x2 - _x1 + 1; + + xstep = _xstep; + ystep = _ystep; + + if (_xbits == 6 && _ybits == 6) + { + // 64x64 is the most common case by far, so special case it. + do + { + // Current texture index in u,v. + spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); + + // Lookup pixel from flat texture tile, + // re-index using light/colormap. + *dest++ = colormap[source[spot]]; + + // Next step in u,v. + xfrac += xstep; + yfrac += ystep; + } while (--count); + } + else + { + uint8_t yshift = 32 - _ybits; + uint8_t xshift = yshift - _xbits; + int xmask = ((1 << _xbits) - 1) << _ybits; + + do + { + // Current texture index in u,v. + spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); + + // Lookup pixel from flat texture tile, + // re-index using light/colormap. + *dest++ = colormap[source[spot]]; + + // Next step in u,v. + xfrac += xstep; + yfrac += ystep; + } while (--count); + } + } + + void DrawSpanMaskedPalCommand::Execute(DrawerThread *thread) + { + if (thread->line_skipped_by_thread(_y)) + return; + + dsfixed_t xfrac; + dsfixed_t yfrac; + dsfixed_t xstep; + dsfixed_t ystep; + uint8_t *dest; + const uint8_t *source = _source; + const uint8_t *colormap = _colormap; + int count; + int spot; + + xfrac = _xfrac; + yfrac = _yfrac; + + dest = ylookup[_y] + _x1 + _destorg; + + count = _x2 - _x1 + 1; + + xstep = _xstep; + ystep = _ystep; + + if (_xbits == 6 && _ybits == 6) + { + // 64x64 is the most common case by far, so special case it. + do + { + int texdata; + + spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); + texdata = source[spot]; + if (texdata != 0) + { + *dest = colormap[texdata]; + } + dest++; + xfrac += xstep; + yfrac += ystep; + } while (--count); + } + else + { + uint8_t yshift = 32 - _ybits; + uint8_t xshift = yshift - _xbits; + int xmask = ((1 << _xbits) - 1) << _ybits; + do + { + int texdata; + + spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); + texdata = source[spot]; + if (texdata != 0) + { + *dest = colormap[texdata]; + } + dest++; + xfrac += xstep; + yfrac += ystep; + } while (--count); + } + } + + void DrawSpanTranslucentPalCommand::Execute(DrawerThread *thread) + { + if (thread->line_skipped_by_thread(_y)) + return; + + dsfixed_t xfrac; + dsfixed_t yfrac; + dsfixed_t xstep; + dsfixed_t ystep; + uint8_t *dest; + const uint8_t *source = _source; + const uint8_t *colormap = _colormap; + int count; + int spot; + uint32_t *fg2rgb = _srcblend; + uint32_t *bg2rgb = _destblend; + + xfrac = _xfrac; + yfrac = _yfrac; + + dest = ylookup[_y] + _x1 + _destorg; + + count = _x2 - _x1 + 1; + + xstep = _xstep; + ystep = _ystep; + + if (_xbits == 6 && _ybits == 6) + { + // 64x64 is the most common case by far, so special case it. + do + { + spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); + uint32_t fg = colormap[source[spot]]; + uint32_t bg = *dest; + fg = fg2rgb[fg]; + bg = bg2rgb[bg]; + fg = (fg + bg) | 0x1f07c1f; + *dest++ = RGB32k.All[fg & (fg >> 15)]; + xfrac += xstep; + yfrac += ystep; + } while (--count); + } + else + { + uint8_t yshift = 32 - _ybits; + uint8_t xshift = yshift - _xbits; + int xmask = ((1 << _xbits) - 1) << _ybits; + do + { + spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); + uint32_t fg = colormap[source[spot]]; + uint32_t bg = *dest; + fg = fg2rgb[fg]; + bg = bg2rgb[bg]; + fg = (fg + bg) | 0x1f07c1f; + *dest++ = RGB32k.All[fg & (fg >> 15)]; + xfrac += xstep; + yfrac += ystep; + } while (--count); + } + } + + void DrawSpanMaskedTranslucentPalCommand::Execute(DrawerThread *thread) + { + if (thread->line_skipped_by_thread(_y)) + return; + + dsfixed_t xfrac; + dsfixed_t yfrac; + dsfixed_t xstep; + dsfixed_t ystep; + uint8_t *dest; + const uint8_t *source = _source; + const uint8_t *colormap = _colormap; + int count; + int spot; + uint32_t *fg2rgb = _srcblend; + uint32_t *bg2rgb = _destblend; + + xfrac = _xfrac; + yfrac = _yfrac; + + dest = ylookup[_y] + _x1 + _destorg; + + count = _x2 - _x1 + 1; + + xstep = _xstep; + ystep = _ystep; + + if (_xbits == 6 && _ybits == 6) + { + // 64x64 is the most common case by far, so special case it. + do + { + uint8_t texdata; + + spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); + texdata = source[spot]; + if (texdata != 0) + { + uint32_t fg = colormap[texdata]; + uint32_t bg = *dest; + fg = fg2rgb[fg]; + bg = bg2rgb[bg]; + fg = (fg + bg) | 0x1f07c1f; + *dest = RGB32k.All[fg & (fg >> 15)]; + } + dest++; + xfrac += xstep; + yfrac += ystep; + } while (--count); + } + else + { + uint8_t yshift = 32 - _ybits; + uint8_t xshift = yshift - _xbits; + int xmask = ((1 << _xbits) - 1) << _ybits; + do + { + uint8_t texdata; + + spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); + texdata = source[spot]; + if (texdata != 0) + { + uint32_t fg = colormap[texdata]; + uint32_t bg = *dest; + fg = fg2rgb[fg]; + bg = bg2rgb[bg]; + fg = (fg + bg) | 0x1f07c1f; + *dest = RGB32k.All[fg & (fg >> 15)]; + } + dest++; + xfrac += xstep; + yfrac += ystep; + } while (--count); + } + } + + void DrawSpanAddClampPalCommand::Execute(DrawerThread *thread) + { + if (thread->line_skipped_by_thread(_y)) + return; + + dsfixed_t xfrac; + dsfixed_t yfrac; + dsfixed_t xstep; + dsfixed_t ystep; + uint8_t *dest; + const uint8_t *source = _source; + const uint8_t *colormap = _colormap; + int count; + int spot; + uint32_t *fg2rgb = _srcblend; + uint32_t *bg2rgb = _destblend; + + xfrac = _xfrac; + yfrac = _yfrac; + + dest = ylookup[_y] + _x1 + _destorg; + + count = _x2 - _x1 + 1; + + xstep = _xstep; + ystep = _ystep; + + if (_xbits == 6 && _ybits == 6) + { + // 64x64 is the most common case by far, so special case it. + do + { + spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); + uint32_t a = fg2rgb[colormap[source[spot]]] + bg2rgb[*dest]; + uint32_t b = a; + + a |= 0x01f07c1f; + b &= 0x40100400; + a &= 0x3fffffff; + b = b - (b >> 5); + a |= b; + *dest++ = RGB32k.All[a & (a >> 15)]; + xfrac += xstep; + yfrac += ystep; + } while (--count); + } + else + { + uint8_t yshift = 32 - _ybits; + uint8_t xshift = yshift - _xbits; + int xmask = ((1 << _xbits) - 1) << _ybits; + do + { + spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); + uint32_t a = fg2rgb[colormap[source[spot]]] + bg2rgb[*dest]; + uint32_t b = a; + + a |= 0x01f07c1f; + b &= 0x40100400; + a &= 0x3fffffff; + b = b - (b >> 5); + a |= b; + *dest++ = RGB32k.All[a & (a >> 15)]; + xfrac += xstep; + yfrac += ystep; + } while (--count); + } + } + + void DrawSpanMaskedAddClampPalCommand::Execute(DrawerThread *thread) + { + if (thread->line_skipped_by_thread(_y)) + return; + + dsfixed_t xfrac; + dsfixed_t yfrac; + dsfixed_t xstep; + dsfixed_t ystep; + uint8_t *dest; + const uint8_t *source = _source; + const uint8_t *colormap = _colormap; + int count; + int spot; + uint32_t *fg2rgb = _srcblend; + uint32_t *bg2rgb = _destblend; + + xfrac = _xfrac; + yfrac = _yfrac; + + dest = ylookup[_y] + _x1 + _destorg; + + count = _x2 - _x1 + 1; + + xstep = _xstep; + ystep = _ystep; + + if (_xbits == 6 && _ybits == 6) + { + // 64x64 is the most common case by far, so special case it. + do + { + uint8_t texdata; + + spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); + texdata = source[spot]; + if (texdata != 0) + { + uint32_t a = fg2rgb[colormap[texdata]] + bg2rgb[*dest]; + uint32_t b = a; + + a |= 0x01f07c1f; + b &= 0x40100400; + a &= 0x3fffffff; + b = b - (b >> 5); + a |= b; + *dest = RGB32k.All[a & (a >> 15)]; + } + dest++; + xfrac += xstep; + yfrac += ystep; + } while (--count); + } + else + { + uint8_t yshift = 32 - _ybits; + uint8_t xshift = yshift - _xbits; + int xmask = ((1 << _xbits) - 1) << _ybits; + do + { + uint8_t texdata; + + spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); + texdata = source[spot]; + if (texdata != 0) + { + uint32_t a = fg2rgb[colormap[texdata]] + bg2rgb[*dest]; + uint32_t b = a; + + a |= 0x01f07c1f; + b &= 0x40100400; + a &= 0x3fffffff; + b = b - (b >> 5); + a |= b; + *dest = RGB32k.All[a & (a >> 15)]; + } + dest++; + xfrac += xstep; + yfrac += ystep; + } while (--count); + } + } + + void FillSpanPalCommand::Execute(DrawerThread *thread) + { + if (thread->line_skipped_by_thread(_y)) + return; + + memset(ylookup[_y] + _x1 + _destorg, _color, _x2 - _x1 + 1); + } + + ///////////////////////////////////////////////////////////////////////// + + DrawTiltedSpanPalCommand::DrawTiltedSpanPalCommand(int y, int x1, int x2, const FVector3 &plane_sz, const FVector3 &plane_su, const FVector3 &plane_sv, bool plane_shade, int planeshade, float planelightfloat, fixed_t pviewx, fixed_t pviewy) + : y(y), x1(x1), x2(x2), plane_sz(plane_sz), plane_su(plane_su), plane_sv(plane_sv), plane_shade(plane_shade), planeshade(planeshade), planelightfloat(planelightfloat), pviewx(pviewx), pviewy(pviewy) + { + using namespace drawerargs; + + _colormap = ds_colormap; + _destorg = dc_destorg; + _ybits = ds_ybits; + _xbits = ds_xbits; + _source = ds_source; + basecolormapdata = basecolormap->Maps; + } + + void DrawTiltedSpanPalCommand::Execute(DrawerThread *thread) + { + if (thread->line_skipped_by_thread(y)) + return; + + const uint8_t **tiltlighting = thread->tiltlighting; + + int width = x2 - x1; + double iz, uz, vz; + uint8_t *fb; + uint32_t u, v; + int i; + + iz = plane_sz[2] + plane_sz[1] * (centery - y) + plane_sz[0] * (x1 - centerx); + + // Lighting is simple. It's just linear interpolation from start to end + if (plane_shade) + { + uz = (iz + plane_sz[0] * width) * planelightfloat; + vz = iz * planelightfloat; + CalcTiltedLighting(vz, uz, width, thread); + } + else + { + for (int i = 0; i < width; ++i) + { + tiltlighting[i] = _colormap; + } + } + + uz = plane_su[2] + plane_su[1] * (centery - y) + plane_su[0] * (x1 - centerx); + vz = plane_sv[2] + plane_sv[1] * (centery - y) + plane_sv[0] * (x1 - centerx); + + fb = ylookup[y] + x1 + _destorg; + + uint8_t vshift = 32 - _ybits; + uint8_t ushift = vshift - _xbits; + int umask = ((1 << _xbits) - 1) << _ybits; + + #if 0 + // The "perfect" reference version of this routine. Pretty slow. + // Use it only to see how things are supposed to look. + i = 0; + do + { + double z = 1.f / iz; + + u = int64_t(uz*z) + pviewx; + v = int64_t(vz*z) + pviewy; + R_SetDSColorMapLight(tiltlighting[i], 0, 0); + fb[i++] = ds_colormap[ds_source[(v >> vshift) | ((u >> ushift) & umask)]]; + iz += plane_sz[0]; + uz += plane_su[0]; + vz += plane_sv[0]; + } while (--width >= 0); + #else + //#define SPANSIZE 32 + //#define INVSPAN 0.03125f + //#define SPANSIZE 8 + //#define INVSPAN 0.125f + #define SPANSIZE 16 + #define INVSPAN 0.0625f + + double startz = 1.f / iz; + double startu = uz*startz; + double startv = vz*startz; + double izstep, uzstep, vzstep; + + izstep = plane_sz[0] * SPANSIZE; + uzstep = plane_su[0] * SPANSIZE; + vzstep = plane_sv[0] * SPANSIZE; + x1 = 0; + width++; + + while (width >= SPANSIZE) + { + iz += izstep; + uz += uzstep; + vz += vzstep; + + double endz = 1.f / iz; + double endu = uz*endz; + double endv = vz*endz; + uint32_t stepu = (uint32_t)int64_t((endu - startu) * INVSPAN); + uint32_t stepv = (uint32_t)int64_t((endv - startv) * INVSPAN); + u = (uint32_t)(int64_t(startu) + pviewx); + v = (uint32_t)(int64_t(startv) + pviewy); + + for (i = SPANSIZE - 1; i >= 0; i--) + { + fb[x1] = *(tiltlighting[x1] + _source[(v >> vshift) | ((u >> ushift) & umask)]); + x1++; + u += stepu; + v += stepv; + } + startu = endu; + startv = endv; + width -= SPANSIZE; + } + if (width > 0) + { + if (width == 1) + { + u = (uint32_t)int64_t(startu); + v = (uint32_t)int64_t(startv); + fb[x1] = *(tiltlighting[x1] + _source[(v >> vshift) | ((u >> ushift) & umask)]); + } + else + { + double left = width; + iz += plane_sz[0] * left; + uz += plane_su[0] * left; + vz += plane_sv[0] * left; + + double endz = 1.f / iz; + double endu = uz*endz; + double endv = vz*endz; + left = 1.f / left; + uint32_t stepu = (uint32_t)int64_t((endu - startu) * left); + uint32_t stepv = (uint32_t)int64_t((endv - startv) * left); + u = (uint32_t)(int64_t(startu) + pviewx); + v = (uint32_t)(int64_t(startv) + pviewy); + + for (; width != 0; width--) + { + fb[x1] = *(tiltlighting[x1] + _source[(v >> vshift) | ((u >> ushift) & umask)]); + x1++; + u += stepu; + v += stepv; + } + } + } + #endif + } + + // Calculates the lighting for one row of a tilted plane. If the definition + // of GETPALOOKUP changes, this needs to change, too. + void DrawTiltedSpanPalCommand::CalcTiltedLighting(double lval, double lend, int width, DrawerThread *thread) + { + const uint8_t **tiltlighting = thread->tiltlighting; + + double lstep; + uint8_t *lightfiller; + int i = 0; + + if (width == 0 || lval == lend) + { // Constant lighting + lightfiller = basecolormapdata + (GETPALOOKUP(lval, planeshade) << COLORMAPSHIFT); + } + else + { + lstep = (lend - lval) / width; + if (lval >= MAXLIGHTVIS) + { // lval starts "too bright". + lightfiller = basecolormapdata + (GETPALOOKUP(lval, planeshade) << COLORMAPSHIFT); + for (; i <= width && lval >= MAXLIGHTVIS; ++i) + { + tiltlighting[i] = lightfiller; + lval += lstep; + } + } + if (lend >= MAXLIGHTVIS) + { // lend ends "too bright". + lightfiller = basecolormapdata + (GETPALOOKUP(lend, planeshade) << COLORMAPSHIFT); + for (; width > i && lend >= MAXLIGHTVIS; --width) + { + tiltlighting[width] = lightfiller; + lend -= lstep; + } + } + if (width > 0) + { + lval = FIXED2DBL(planeshade) - lval; + lend = FIXED2DBL(planeshade) - lend; + lstep = (lend - lval) / width; + if (lstep < 0) + { // Going from dark to light + if (lval < 1.) + { // All bright + lightfiller = basecolormapdata; + } + else + { + if (lval >= NUMCOLORMAPS) + { // Starts beyond the dark end + uint8_t *clight = basecolormapdata + ((NUMCOLORMAPS - 1) << COLORMAPSHIFT); + while (lval >= NUMCOLORMAPS && i <= width) + { + tiltlighting[i++] = clight; + lval += lstep; + } + if (i > width) + return; + } + while (i <= width && lval >= 0) + { + tiltlighting[i++] = basecolormapdata + (xs_ToInt(lval) << COLORMAPSHIFT); + lval += lstep; + } + lightfiller = basecolormapdata; + } + } + else + { // Going from light to dark + if (lval >= (NUMCOLORMAPS - 1)) + { // All dark + lightfiller = basecolormapdata + ((NUMCOLORMAPS - 1) << COLORMAPSHIFT); + } + else + { + while (lval < 0 && i <= width) + { + tiltlighting[i++] = basecolormapdata; + lval += lstep; + } + if (i > width) + return; + while (i <= width && lval < (NUMCOLORMAPS - 1)) + { + tiltlighting[i++] = basecolormapdata + (xs_ToInt(lval) << COLORMAPSHIFT); + lval += lstep; + } + lightfiller = basecolormapdata + ((NUMCOLORMAPS - 1) << COLORMAPSHIFT); + } + } + } + } + for (; i <= width; i++) + { + tiltlighting[i] = lightfiller; + } + } + + ///////////////////////////////////////////////////////////////////////// + + DrawColoredSpanPalCommand::DrawColoredSpanPalCommand(int y, int x1, int x2) : y(y), x1(x1), x2(x2) + { + using namespace drawerargs; + color = ds_color; + destorg = dc_destorg; + } + + void DrawColoredSpanPalCommand::Execute(DrawerThread *thread) + { + if (thread->line_skipped_by_thread(y)) + return; + + memset(ylookup[y] + x1 + destorg, color, x2 - x1 + 1); + } + + ///////////////////////////////////////////////////////////////////////// + + DrawSlabPalCommand::DrawSlabPalCommand(int dx, fixed_t v, int dy, fixed_t vi, const uint8_t *vptr, uint8_t *p, const uint8_t *colormap) + : _dx(dx), _v(v), _dy(dy), _vi(vi), _vptr(vptr), _p(p), _colormap(colormap) + { + using namespace drawerargs; + _pitch = dc_pitch; + _start_y = static_cast((p - dc_destorg) / dc_pitch); + } + + void DrawSlabPalCommand::Execute(DrawerThread *thread) + { + int count = _dy; + uint8_t *dest = _p; + int pitch = _pitch; + int width = _dx; + const uint8_t *colormap = _colormap; + const uint8_t *source = _vptr; + fixed_t fracpos = _v; + fixed_t iscale = _vi; + + count = thread->count_for_thread(_start_y, count); + dest = thread->dest_for_thread(_start_y, pitch, dest); + fracpos += iscale * thread->skipped_by_thread(_start_y); + iscale *= thread->num_cores; + pitch *= thread->num_cores; + + while (count > 0) + { + uint8_t color = colormap[source[fracpos >> FRACBITS]]; + + for (int x = 0; x < width; x++) + dest[x] = color; + + dest += pitch; + fracpos += iscale; + count--; + } + } + + ///////////////////////////////////////////////////////////////////////// + + DrawFogBoundaryLinePalCommand::DrawFogBoundaryLinePalCommand(int y, int x1, int x2) : y(y), x1(x1), x2(x2) + { + using namespace drawerargs; + _colormap = dc_colormap; + _destorg = dc_destorg; + } + + void DrawFogBoundaryLinePalCommand::Execute(DrawerThread *thread) + { + if (thread->line_skipped_by_thread(y)) + return; + + const uint8_t *colormap = _colormap; + uint8_t *dest = ylookup[y] + _destorg; + int x = x1; + do + { + dest[x] = colormap[dest[x]]; + } while (++x <= x2); + } +} diff --git a/src/r_draw_pal.h b/src/r_draw_pal.h new file mode 100644 index 0000000000..a2400d2759 --- /dev/null +++ b/src/r_draw_pal.h @@ -0,0 +1,333 @@ + +#pragma once + +#include "r_draw.h" +#include "v_palette.h" +#include "r_thread.h" + +namespace swrenderer +{ + class PalWall1Command : public DrawerCommand + { + public: + PalWall1Command(); + FString DebugInfo() override { return "PalWallCommand"; } + + protected: + uint32_t _iscale; + uint32_t _texturefrac; + uint8_t *_colormap; + int _count; + const uint8_t *_source; + uint8_t *_dest; + int _vlinebits; + int _mvlinebits; + int _tmvlinebits; + int _pitch; + uint32_t *_srcblend; + uint32_t *_destblend; + }; + + class PalWall4Command : public DrawerCommand + { + public: + PalWall4Command(); + FString DebugInfo() override { return "PalWallCommand"; } + + protected: + uint8_t *_dest; + int _count; + int _pitch; + int _vlinebits; + int _mvlinebits; + int _tmvlinebits; + uint8_t *_palookupoffse[4]; + const uint8_t *_bufplce[4]; + uint32_t _vince[4]; + uint32_t _vplce[4]; + uint32_t *_srcblend; + uint32_t *_destblend; + }; + + class DrawWall1PalCommand : public PalWall1Command { public: void Execute(DrawerThread *thread) override; }; + class DrawWall4PalCommand : public PalWall4Command { public: void Execute(DrawerThread *thread) override; }; + class DrawWallMasked1PalCommand : public PalWall1Command { public: void Execute(DrawerThread *thread) override; }; + class DrawWallMasked4PalCommand : public PalWall4Command { public: void Execute(DrawerThread *thread) override; }; + class DrawWallAdd1PalCommand : public PalWall1Command { public: void Execute(DrawerThread *thread) override; }; + class DrawWallAdd4PalCommand : public PalWall4Command { public: void Execute(DrawerThread *thread) override; }; + class DrawWallAddClamp1PalCommand : public PalWall1Command { public: void Execute(DrawerThread *thread) override; }; + class DrawWallAddClamp4PalCommand : public PalWall4Command { public: void Execute(DrawerThread *thread) override; }; + class DrawWallSubClamp1PalCommand : public PalWall1Command { public: void Execute(DrawerThread *thread) override; }; + class DrawWallSubClamp4PalCommand : public PalWall4Command { public: void Execute(DrawerThread *thread) override; }; + class DrawWallRevSubClamp1PalCommand : public PalWall1Command { public: void Execute(DrawerThread *thread) override; }; + class DrawWallRevSubClamp4PalCommand : public PalWall4Command { public: void Execute(DrawerThread *thread) override; }; + + class PalSkyCommand : public DrawerCommand + { + public: + PalSkyCommand(uint32_t solid_top, uint32_t solid_bottom); + FString DebugInfo() override { return "PalSkyCommand"; } + + protected: + uint32_t solid_top; + uint32_t solid_bottom; + + uint8_t *_dest; + int _count; + int _pitch; + const uint8_t *_bufplce[4]; + const uint8_t *_bufplce2[4]; + int _bufheight[4]; + uint32_t _vince[4]; + uint32_t _vplce[4]; + }; + + class DrawSingleSky1PalCommand : public PalSkyCommand { public: using PalSkyCommand::PalSkyCommand; void Execute(DrawerThread *thread) override; }; + class DrawSingleSky4PalCommand : public PalSkyCommand { public: using PalSkyCommand::PalSkyCommand; void Execute(DrawerThread *thread) override; }; + class DrawDoubleSky1PalCommand : public PalSkyCommand { public: using PalSkyCommand::PalSkyCommand; void Execute(DrawerThread *thread) override; }; + class DrawDoubleSky4PalCommand : public PalSkyCommand { public: using PalSkyCommand::PalSkyCommand; void Execute(DrawerThread *thread) override; }; + + class PalColumnCommand : public DrawerCommand + { + public: + PalColumnCommand(); + FString DebugInfo() override { return "PalColumnCommand"; } + + protected: + int _count; + uint8_t *_dest; + int _pitch; + fixed_t _iscale; + fixed_t _texturefrac; + const uint8_t *_colormap; + const uint8_t *_source; + const uint8_t *_translation; + int _color; + uint32_t *_srcblend; + uint32_t *_destblend; + uint32_t _srccolor; + }; + + class DrawColumnPalCommand : public PalColumnCommand { public: void Execute(DrawerThread *thread) override; }; + class FillColumnPalCommand : public PalColumnCommand { public: void Execute(DrawerThread *thread) override; }; + class FillColumnAddPalCommand : public PalColumnCommand { public: void Execute(DrawerThread *thread) override; }; + class FillColumnAddClampPalCommand : public PalColumnCommand { public: void Execute(DrawerThread *thread) override; }; + class FillColumnSubClampPalCommand : public PalColumnCommand { public: void Execute(DrawerThread *thread) override; }; + class FillColumnRevSubClampPalCommand : public PalColumnCommand { public: void Execute(DrawerThread *thread) override; }; + class DrawColumnAddPalCommand : public PalColumnCommand { public: void Execute(DrawerThread *thread) override; }; + class DrawColumnTranslatedPalCommand : public PalColumnCommand { public: void Execute(DrawerThread *thread) override; }; + class DrawColumnTlatedAddPalCommand : public PalColumnCommand { public: void Execute(DrawerThread *thread) override; }; + class DrawColumnShadedPalCommand : public PalColumnCommand { public: void Execute(DrawerThread *thread) override; }; + class DrawColumnAddClampPalCommand : public PalColumnCommand { public: void Execute(DrawerThread *thread) override; }; + class DrawColumnAddClampTranslatedPalCommand : public PalColumnCommand { public: void Execute(DrawerThread *thread) override; }; + class DrawColumnSubClampPalCommand : public PalColumnCommand { public: void Execute(DrawerThread *thread) override; }; + class DrawColumnSubClampTranslatedPalCommand : public PalColumnCommand { public: void Execute(DrawerThread *thread) override; }; + class DrawColumnRevSubClampPalCommand : public PalColumnCommand { public: void Execute(DrawerThread *thread) override; }; + class DrawColumnRevSubClampTranslatedPalCommand : public PalColumnCommand { public: void Execute(DrawerThread *thread) override; }; + + class DrawFuzzColumnPalCommand : public DrawerCommand + { + public: + DrawFuzzColumnPalCommand(); + void Execute(DrawerThread *thread) override; + FString DebugInfo() override { return "DrawFuzzColumnPalCommand"; } + + private: + int _yl; + int _yh; + int _x; + uint8_t *_destorg; + int _pitch; + int _fuzzpos; + int _fuzzviewheight; + }; + + class PalSpanCommand : public DrawerCommand + { + public: + PalSpanCommand(); + FString DebugInfo() override { return "PalSpanCommand"; } + + protected: + const uint8_t *_source; + const uint8_t *_colormap; + dsfixed_t _xfrac; + dsfixed_t _yfrac; + int _y; + int _x1; + int _x2; + uint8_t *_destorg; + dsfixed_t _xstep; + dsfixed_t _ystep; + int _xbits; + int _ybits; + uint32_t *_srcblend; + uint32_t *_destblend; + int _color; + }; + + class DrawSpanPalCommand : public PalSpanCommand { public: void Execute(DrawerThread *thread) override; }; + class DrawSpanMaskedPalCommand : public PalSpanCommand { public: void Execute(DrawerThread *thread) override; }; + class DrawSpanTranslucentPalCommand : public PalSpanCommand { public: void Execute(DrawerThread *thread) override; }; + class DrawSpanMaskedTranslucentPalCommand : public PalSpanCommand { public: void Execute(DrawerThread *thread) override; }; + class DrawSpanAddClampPalCommand : public PalSpanCommand { public: void Execute(DrawerThread *thread) override; }; + class DrawSpanMaskedAddClampPalCommand : public PalSpanCommand { public: void Execute(DrawerThread *thread) override; }; + class FillSpanPalCommand : public PalSpanCommand { public: void Execute(DrawerThread *thread) override; }; + + class DrawTiltedSpanPalCommand : public DrawerCommand + { + public: + DrawTiltedSpanPalCommand(int y, int x1, int x2, const FVector3 &plane_sz, const FVector3 &plane_su, const FVector3 &plane_sv, bool plane_shade, int planeshade, float planelightfloat, fixed_t pviewx, fixed_t pviewy); + void Execute(DrawerThread *thread) override; + FString DebugInfo() override { return "DrawTiltedSpanPalCommand"; } + + private: + void CalcTiltedLighting(double lval, double lend, int width, DrawerThread *thread); + + int y; + int x1; + int x2; + FVector3 plane_sz; + FVector3 plane_su; + FVector3 plane_sv; + bool plane_shade; + int planeshade; + float planelightfloat; + fixed_t pviewx; + fixed_t pviewy; + + const uint8_t *_colormap; + uint8_t *_destorg; + int _ybits; + int _xbits; + const uint8_t *_source; + uint8_t *basecolormapdata; + }; + + class DrawColoredSpanPalCommand : public PalSpanCommand + { + public: + DrawColoredSpanPalCommand(int y, int x1, int x2); + void Execute(DrawerThread *thread) override; + FString DebugInfo() override { return "DrawColoredSpanPalCommand"; } + + private: + int y; + int x1; + int x2; + int color; + uint8_t *destorg; + }; + + class DrawSlabPalCommand : public PalSpanCommand + { + public: + DrawSlabPalCommand(int dx, fixed_t v, int dy, fixed_t vi, const uint8_t *vptr, uint8_t *p, const uint8_t *colormap); + void Execute(DrawerThread *thread) override; + + private: + int _dx; + fixed_t _v; + int _dy; + fixed_t _vi; + const uint8_t *_vptr; + uint8_t *_p; + const uint8_t *_colormap; + int _pitch; + int _start_y; + }; + + class DrawFogBoundaryLinePalCommand : public PalSpanCommand + { + public: + DrawFogBoundaryLinePalCommand(int y, int x1, int x2); + void Execute(DrawerThread *thread) override; + + private: + int y, x1, x2; + const uint8_t *_colormap; + uint8_t *_destorg; + }; + + class RtInitColsPalCommand : public DrawerCommand + { + public: + RtInitColsPalCommand(uint8_t *buff); + void Execute(DrawerThread *thread) override; + FString DebugInfo() override { return "RtInitColsPalCommand"; } + + private: + uint8_t *buff; + }; + + class PalColumnHorizCommand : public DrawerCommand + { + public: + PalColumnHorizCommand(); + + protected: + const uint8_t *_source; + fixed_t _iscale; + fixed_t _texturefrac; + int _count; + int _color; + int _x; + int _yl; + }; + + class DrawColumnHorizPalCommand : public PalColumnHorizCommand + { + public: + void Execute(DrawerThread *thread) override; + FString DebugInfo() override { return "DrawColumnHorizPalCommand"; } + }; + + class FillColumnHorizPalCommand : public PalColumnHorizCommand + { + public: + void Execute(DrawerThread *thread) override; + FString DebugInfo() override { return "FillColumnHorizPalCommand"; } + }; + + class PalRtCommand : public DrawerCommand + { + public: + PalRtCommand(int hx, int sx, int yl, int yh); + FString DebugInfo() override { return "PalRtCommand"; } + + protected: + int hx, sx, yl, yh; + uint8_t *_destorg; + int _pitch; + const uint8_t *_colormap; + const uint32_t *_srcblend; + const uint32_t *_destblend; + const uint8_t *_translation; + int _color; + }; + + class DrawColumnRt1CopyPalCommand : public PalRtCommand { public: using PalRtCommand::PalRtCommand; void Execute(DrawerThread *thread) override; }; + class DrawColumnRt4CopyPalCommand : public PalRtCommand { public: using PalRtCommand::PalRtCommand; void Execute(DrawerThread *thread) override; }; + class DrawColumnRt1PalCommand : public PalRtCommand { public: using PalRtCommand::PalRtCommand; void Execute(DrawerThread *thread) override; }; + class DrawColumnRt4PalCommand : public PalRtCommand { public: using PalRtCommand::PalRtCommand; void Execute(DrawerThread *thread) override; }; + class DrawColumnRt1TranslatedPalCommand : public PalRtCommand { public: using PalRtCommand::PalRtCommand; void Execute(DrawerThread *thread) override; }; + class DrawColumnRt4TranslatedPalCommand : public PalRtCommand { public: using PalRtCommand::PalRtCommand; void Execute(DrawerThread *thread) override; }; + class DrawColumnRt1AddPalCommand : public PalRtCommand { public: using PalRtCommand::PalRtCommand; void Execute(DrawerThread *thread) override; }; + class DrawColumnRt4AddPalCommand : public PalRtCommand { public: using PalRtCommand::PalRtCommand; void Execute(DrawerThread *thread) override; }; + //class DrawColumnRt1AddTranslatedPalCommand : public PalRtCommand { public: using PalRtCommand::PalRtCommand; void Execute(DrawerThread *thread) override; }; + //class DrawColumnRt4AddTranslatedPalCommand : public PalRtCommand { public: using PalRtCommand::PalRtCommand; void Execute(DrawerThread *thread) override; }; + class DrawColumnRt1ShadedPalCommand : public PalRtCommand { public: using PalRtCommand::PalRtCommand; void Execute(DrawerThread *thread) override; }; + class DrawColumnRt4ShadedPalCommand : public PalRtCommand { public: using PalRtCommand::PalRtCommand; void Execute(DrawerThread *thread) override; }; + class DrawColumnRt1AddClampPalCommand : public PalRtCommand { public: using PalRtCommand::PalRtCommand; void Execute(DrawerThread *thread) override; }; + class DrawColumnRt4AddClampPalCommand : public PalRtCommand { public: using PalRtCommand::PalRtCommand; void Execute(DrawerThread *thread) override; }; + //class DrawColumnRt1AddClampTranslatedPalCommand : public PalRtCommand { public: using PalRtCommand::PalRtCommand; void Execute(DrawerThread *thread) override; }; + //class DrawColumnRt4AddClampTranslatedPalCommand : public PalRtCommand { public: using PalRtCommand::PalRtCommand; void Execute(DrawerThread *thread) override; }; + class DrawColumnRt1SubClampPalCommand : public PalRtCommand { public: using PalRtCommand::PalRtCommand; void Execute(DrawerThread *thread) override; }; + class DrawColumnRt4SubClampPalCommand : public PalRtCommand { public: using PalRtCommand::PalRtCommand; void Execute(DrawerThread *thread) override; }; + //class DrawColumnRt1SubClampTranslatedPalCommand : public PalRtCommand { public: using PalRtCommand::PalRtCommand; void Execute(DrawerThread *thread) override; }; + //class DrawColumnRt4SubClampTranslatedPalCommand : public PalRtCommand { public: using PalRtCommand::PalRtCommand; void Execute(DrawerThread *thread) override; }; + class DrawColumnRt1RevSubClampPalCommand : public PalRtCommand { public: using PalRtCommand::PalRtCommand; void Execute(DrawerThread *thread) override; }; + class DrawColumnRt4RevSubClampPalCommand : public PalRtCommand { public: using PalRtCommand::PalRtCommand; void Execute(DrawerThread *thread) override; }; + //class DrawColumnRt1RevSubClampTranslatedPalCommand : public PalRtCommand { public: using PalRtCommand::PalRtCommand; void Execute(DrawerThread *thread) override; }; + //class DrawColumnRt4RevSubClampTranslatedPalCommand : public PalRtCommand { public: using PalRtCommand::PalRtCommand; void Execute(DrawerThread *thread) override; }; +} diff --git a/src/r_drawt_pal.cpp b/src/r_drawt_pal.cpp new file mode 100644 index 0000000000..3356592d25 --- /dev/null +++ b/src/r_drawt_pal.cpp @@ -0,0 +1,867 @@ +/* +** r_drawt.cpp +** Faster column drawers for modern processors +** +**--------------------------------------------------------------------------- +** Copyright 1998-2006 Randy Heit +** All rights reserved. +** +** Redistribution and use in source and binary forms, with or without +** modification, are permitted provided that the following conditions +** are met: +** +** 1. Redistributions of source code must retain the above copyright +** notice, this list of conditions and the following disclaimer. +** 2. Redistributions in binary form must reproduce the above copyright +** notice, this list of conditions and the following disclaimer in the +** documentation and/or other materials provided with the distribution. +** 3. The name of the author may not be used to endorse or promote products +** derived from this software without specific prior written permission. +** +** THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR +** IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES +** OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. +** IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, +** INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT +** NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +** DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +** THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +** (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF +** THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +**--------------------------------------------------------------------------- +** +** These functions stretch columns into a temporary buffer and then +** map them to the screen. On modern machines, this is faster than drawing +** them directly to the screen. +** +** Will I be able to even understand any of this if I come back to it later? +** Let's hope so. :-) +*/ + +#include "templates.h" +#include "doomtype.h" +#include "doomdef.h" +#include "r_defs.h" +#include "r_draw.h" +#include "r_main.h" +#include "r_things.h" +#include "v_video.h" +#include "r_draw_pal.h" + +// I should have commented this stuff better. +// +// dc_temp is the buffer R_DrawColumnHoriz writes into. +// dc_tspans points into it. +// dc_ctspan points into dc_tspans. +// horizspan also points into dc_tspans. + +// dc_ctspan is advanced while drawing into dc_temp. +// horizspan is advanced up to dc_ctspan when drawing from dc_temp to the screen. + +namespace swrenderer +{ + RtInitColsPalCommand::RtInitColsPalCommand(uint8_t *buff) : buff(buff) + { + } + + void RtInitColsPalCommand::Execute(DrawerThread *thread) + { + thread->dc_temp = buff == nullptr ? thread->dc_temp_buff : buff; + } + + ///////////////////////////////////////////////////////////////////// + + PalColumnHorizCommand::PalColumnHorizCommand() + { + using namespace drawerargs; + + _source = dc_source; + _iscale = dc_iscale; + _texturefrac = dc_texturefrac; + _count = dc_count; + _color = dc_color; + _x = dc_x; + _yl = dc_yl; + } + + void DrawColumnHorizPalCommand::Execute(DrawerThread *thread) + { + int count = _count; + uint8_t *dest; + fixed_t fracstep; + fixed_t frac; + + count = thread->count_for_thread(_yl, count); + if (count <= 0) + return; + + fracstep = _iscale; + frac = _texturefrac; + + const uint8_t *source = _source; + + int x = _x & 3; + dest = &thread->dc_temp[x + thread->temp_line_for_thread(_yl) * 4]; + frac += fracstep * thread->skipped_by_thread(_yl); + fracstep *= thread->num_cores; + + if (count & 1) { + *dest = source[frac >> FRACBITS]; dest += 4; frac += fracstep; + } + if (count & 2) { + dest[0] = source[frac >> FRACBITS]; frac += fracstep; + dest[4] = source[frac >> FRACBITS]; frac += fracstep; + dest += 8; + } + if (count & 4) { + dest[0] = source[frac >> FRACBITS]; frac += fracstep; + dest[4] = source[frac >> FRACBITS]; frac += fracstep; + dest[8] = source[frac >> FRACBITS]; frac += fracstep; + dest[12] = source[frac >> FRACBITS]; frac += fracstep; + dest += 16; + } + count >>= 3; + if (!count) return; + + do + { + dest[0] = source[frac >> FRACBITS]; frac += fracstep; + dest[4] = source[frac >> FRACBITS]; frac += fracstep; + dest[8] = source[frac >> FRACBITS]; frac += fracstep; + dest[12] = source[frac >> FRACBITS]; frac += fracstep; + dest[16] = source[frac >> FRACBITS]; frac += fracstep; + dest[20] = source[frac >> FRACBITS]; frac += fracstep; + dest[24] = source[frac >> FRACBITS]; frac += fracstep; + dest[28] = source[frac >> FRACBITS]; frac += fracstep; + dest += 32; + } while (--count); + } + + void FillColumnHorizPalCommand::Execute(DrawerThread *thread) + { + int count = _count; + uint8_t color = _color; + uint8_t *dest; + + count = thread->count_for_thread(_yl, count); + if (count <= 0) + return; + + int x = _x & 3; + dest = &thread->dc_temp[x + thread->temp_line_for_thread(_yl) * 4]; + + if (count & 1) { + *dest = color; + dest += 4; + } + if (!(count >>= 1)) + return; + do { + dest[0] = color; dest[4] = color; + dest += 8; + } while (--count); + } + + ///////////////////////////////////////////////////////////////////// + + PalRtCommand::PalRtCommand(int hx, int sx, int yl, int yh) : hx(hx), sx(sx), yl(yl), yh(yh) + { + using namespace drawerargs; + + _destorg = dc_destorg; + _pitch = dc_pitch; + _colormap = dc_colormap; + _srcblend = dc_srcblend; + _destblend = dc_destblend; + _translation = dc_translation; + _color = dc_color; + } + + void DrawColumnRt1CopyPalCommand::Execute(DrawerThread *thread) + { + uint8_t *source; + uint8_t *dest; + int count; + int pitch; + + count = yh - yl + 1; + + count = thread->count_for_thread(yl, count); + if (count <= 0) + return; + + dest = ylookup[yl + thread->skipped_by_thread(yl)] + sx + _destorg; + source = &thread->dc_temp[thread->temp_line_for_thread(yl)*4 + hx]; + pitch = _pitch * thread->num_cores; + + if (count & 1) { + *dest = *source; + source += 4; + dest += pitch; + } + if (count & 2) { + dest[0] = source[0]; + dest[pitch] = source[4]; + source += 8; + dest += pitch*2; + } + if (!(count >>= 2)) + return; + + do { + dest[0] = source[0]; + dest[pitch] = source[4]; + dest[pitch*2] = source[8]; + dest[pitch*3] = source[12]; + source += 16; + dest += pitch*4; + } while (--count); + } + + void DrawColumnRt4CopyPalCommand::Execute(DrawerThread *thread) + { + int *source; + int *dest; + int count; + int pitch; + + count = yh - yl + 1; + + count = thread->count_for_thread(yl, count); + if (count <= 0) + return; + + dest = (int *)(ylookup[yl + thread->skipped_by_thread(yl)] + sx + _destorg); + source = (int *)(&thread->dc_temp[thread->temp_line_for_thread(yl)*4]); + pitch = _pitch*thread->num_cores/sizeof(int); + + if (count & 1) { + *dest = *source; + source += 4/sizeof(int); + dest += pitch; + } + if (!(count >>= 1)) + return; + + do { + dest[0] = source[0]; + dest[pitch] = source[4/sizeof(int)]; + source += 8/sizeof(int); + dest += pitch*2; + } while (--count); + } + + void DrawColumnRt1PalCommand::Execute(DrawerThread *thread) + { + const uint8_t *colormap; + uint8_t *source; + uint8_t *dest; + int count; + int pitch; + + count = yh - yl + 1; + + count = thread->count_for_thread(yl, count); + if (count <= 0) + return; + + colormap = _colormap; + dest = ylookup[yl + thread->skipped_by_thread(yl)] + sx + _destorg; + source = &thread->dc_temp[thread->temp_line_for_thread(yl) *4 + hx]; + pitch = _pitch*thread->num_cores; + + if (count & 1) { + *dest = colormap[*source]; + source += 4; + dest += pitch; + } + if (!(count >>= 1)) + return; + + do { + dest[0] = colormap[source[0]]; + dest[pitch] = colormap[source[4]]; + source += 8; + dest += pitch*2; + } while (--count); + } + + void DrawColumnRt4PalCommand::Execute(DrawerThread *thread) + { + const uint8_t *colormap; + uint8_t *source; + uint8_t *dest; + int count; + int pitch; + + count = yh - yl + 1; + + count = thread->count_for_thread(yl, count); + if (count <= 0) + return; + + colormap = _colormap; + dest = ylookup[yl + thread->skipped_by_thread(yl)] + sx + _destorg; + source = &thread->dc_temp[thread->temp_line_for_thread(yl)*4]; + pitch = _pitch*thread->num_cores; + + if (count & 1) { + dest[0] = colormap[source[0]]; + dest[1] = colormap[source[1]]; + dest[2] = colormap[source[2]]; + dest[3] = colormap[source[3]]; + source += 4; + dest += pitch; + } + if (!(count >>= 1)) + return; + + do { + dest[0] = colormap[source[0]]; + dest[1] = colormap[source[1]]; + dest[2] = colormap[source[2]]; + dest[3] = colormap[source[3]]; + dest[pitch] = colormap[source[4]]; + dest[pitch+1] = colormap[source[5]]; + dest[pitch+2] = colormap[source[6]]; + dest[pitch+3] = colormap[source[7]]; + source += 8; + dest += pitch*2; + } while (--count); + } + + void DrawColumnRt1TranslatedPalCommand::Execute(DrawerThread *thread) + { + int count = yh - yl + 1; + count = thread->count_for_thread(yl, count); + if (count <= 0) + return; + + uint8_t *source = &thread->dc_temp[thread->temp_line_for_thread(yl)*4 + hx]; + const uint8_t *translation = _translation; + + // Things we do to hit the compiler's optimizer with a clue bat: + // 1. Parallelism is explicitly spelled out by using a separate + // C instruction for each assembly instruction. GCC lets me + // have four temporaries, but VC++ spills to the stack with + // more than two. Two is probably optimal, anyway. + // 2. The results of the translation lookups are explicitly + // stored in byte-sized variables. This causes the VC++ code + // to use byte mov instructions in most cases; for apparently + // random reasons, it will use movzx for some places. GCC + // ignores this and uses movzx always. + + // Do 8 rows at a time. + for (int count8 = count >> 3; count8; --count8) + { + int c0, c1; + uint8_t b0, b1; + + c0 = source[0]; c1 = source[4]; + b0 = translation[c0]; b1 = translation[c1]; + source[0] = b0; source[4] = b1; + + c0 = source[8]; c1 = source[12]; + b0 = translation[c0]; b1 = translation[c1]; + source[8] = b0; source[12] = b1; + + c0 = source[16]; c1 = source[20]; + b0 = translation[c0]; b1 = translation[c1]; + source[16] = b0; source[20] = b1; + + c0 = source[24]; c1 = source[28]; + b0 = translation[c0]; b1 = translation[c1]; + source[24] = b0; source[28] = b1; + + source += 32; + } + // Finish by doing 1 row at a time. + for (count &= 7; count; --count, source += 4) + { + source[0] = translation[source[0]]; + } + } + + void DrawColumnRt4TranslatedPalCommand::Execute(DrawerThread *thread) + { + int count = yh - yl + 1; + count = thread->count_for_thread(yl, count); + if (count <= 0) + return; + + uint8_t *source = &thread->dc_temp[thread->temp_line_for_thread(yl)*4]; + const uint8_t *translation = _translation; + int c0, c1; + uint8_t b0, b1; + + // Do 2 rows at a time. + for (int count8 = count >> 1; count8; --count8) + { + c0 = source[0]; c1 = source[1]; + b0 = translation[c0]; b1 = translation[c1]; + source[0] = b0; source[1] = b1; + + c0 = source[2]; c1 = source[3]; + b0 = translation[c0]; b1 = translation[c1]; + source[2] = b0; source[3] = b1; + + c0 = source[4]; c1 = source[5]; + b0 = translation[c0]; b1 = translation[c1]; + source[4] = b0; source[5] = b1; + + c0 = source[6]; c1 = source[7]; + b0 = translation[c0]; b1 = translation[c1]; + source[6] = b0; source[7] = b1; + + source += 8; + } + // Do the final row if count was odd. + if (count & 1) + { + c0 = source[0]; c1 = source[1]; + b0 = translation[c0]; b1 = translation[c1]; + source[0] = b0; source[1] = b1; + + c0 = source[2]; c1 = source[3]; + b0 = translation[c0]; b1 = translation[c1]; + source[2] = b0; source[3] = b1; + } + } + + void DrawColumnRt1AddPalCommand::Execute(DrawerThread *thread) + { + const uint8_t *colormap; + uint8_t *source; + uint8_t *dest; + int pitch; + + int count = yh - yl + 1; + count = thread->count_for_thread(yl, count); + if (count <= 0) + return; + + const uint32_t *fg2rgb = _srcblend; + const uint32_t *bg2rgb = _destblend; + dest = ylookup[yl + thread->skipped_by_thread(yl)] + sx + _destorg; + source = &thread->dc_temp[thread->temp_line_for_thread(yl)*4 + hx]; + pitch = _pitch * thread->num_cores; + colormap = _colormap; + + do { + uint32_t fg = colormap[*source]; + uint32_t bg = *dest; + + fg = fg2rgb[fg]; + bg = bg2rgb[bg]; + fg = (fg+bg) | 0x1f07c1f; + *dest = RGB32k.All[fg & (fg>>15)]; + source += 4; + dest += pitch; + } while (--count); + } + + void DrawColumnRt4AddPalCommand::Execute(DrawerThread *thread) + { + const uint8_t *colormap; + uint8_t *source; + uint8_t *dest; + int pitch; + + int count = yh - yl + 1; + count = thread->count_for_thread(yl, count); + if (count <= 0) + return; + + const uint32_t *fg2rgb = _srcblend; + const uint32_t *bg2rgb = _destblend; + dest = ylookup[yl + thread->skipped_by_thread(yl)] + sx + _destorg; + source = &thread->dc_temp[thread->temp_line_for_thread(yl)*4]; + pitch = _pitch * thread->num_cores; + colormap = _colormap; + + do { + uint32_t fg = colormap[source[0]]; + uint32_t bg = dest[0]; + fg = fg2rgb[fg]; + bg = bg2rgb[bg]; + fg = (fg+bg) | 0x1f07c1f; + dest[0] = RGB32k.All[fg & (fg>>15)]; + + fg = colormap[source[1]]; + bg = dest[1]; + fg = fg2rgb[fg]; + bg = bg2rgb[bg]; + fg = (fg+bg) | 0x1f07c1f; + dest[1] = RGB32k.All[fg & (fg>>15)]; + + + fg = colormap[source[2]]; + bg = dest[2]; + fg = fg2rgb[fg]; + bg = bg2rgb[bg]; + fg = (fg+bg) | 0x1f07c1f; + dest[2] = RGB32k.All[fg & (fg>>15)]; + + fg = colormap[source[3]]; + bg = dest[3]; + fg = fg2rgb[fg]; + bg = bg2rgb[bg]; + fg = (fg+bg) | 0x1f07c1f; + dest[3] = RGB32k.All[fg & (fg>>15)]; + + source += 4; + dest += pitch; + } while (--count); + } + + void DrawColumnRt1ShadedPalCommand::Execute(DrawerThread *thread) + { + uint32_t *fgstart; + const uint8_t *colormap; + uint8_t *source; + uint8_t *dest; + int pitch; + + int count = yh - yl + 1; + count = thread->count_for_thread(yl, count); + if (count <= 0) + return; + + fgstart = &Col2RGB8[0][_color]; + colormap = _colormap; + dest = ylookup[yl + thread->skipped_by_thread(yl)] + sx + _destorg; + source = &thread->dc_temp[thread->temp_line_for_thread(yl)*4 + hx]; + pitch = _pitch * thread->num_cores; + + do { + uint32_t val = colormap[*source]; + uint32_t fg = fgstart[val<<8]; + val = (Col2RGB8[64-val][*dest] + fg) | 0x1f07c1f; + *dest = RGB32k.All[val & (val>>15)]; + source += 4; + dest += pitch; + } while (--count); + } + + void DrawColumnRt4ShadedPalCommand::Execute(DrawerThread *thread) + { + uint32_t *fgstart; + const uint8_t *colormap; + uint8_t *source; + uint8_t *dest; + int pitch; + + int count = yh - yl + 1; + count = thread->count_for_thread(yl, count); + if (count <= 0) + return; + + fgstart = &Col2RGB8[0][_color]; + colormap = _colormap; + dest = ylookup[yl + thread->skipped_by_thread(yl)] + sx + _destorg; + source = &thread->dc_temp[thread->temp_line_for_thread(yl)*4]; + pitch = _pitch * thread->num_cores; + + do { + uint32_t val; + + val = colormap[source[0]]; + val = (Col2RGB8[64-val][dest[0]] + fgstart[val<<8]) | 0x1f07c1f; + dest[0] = RGB32k.All[val & (val>>15)]; + + val = colormap[source[1]]; + val = (Col2RGB8[64-val][dest[1]] + fgstart[val<<8]) | 0x1f07c1f; + dest[1] = RGB32k.All[val & (val>>15)]; + + val = colormap[source[2]]; + val = (Col2RGB8[64-val][dest[2]] + fgstart[val<<8]) | 0x1f07c1f; + dest[2] = RGB32k.All[val & (val>>15)]; + + val = colormap[source[3]]; + val = (Col2RGB8[64-val][dest[3]] + fgstart[val<<8]) | 0x1f07c1f; + dest[3] = RGB32k.All[val & (val>>15)]; + + source += 4; + dest += pitch; + } while (--count); + } + + void DrawColumnRt1AddClampPalCommand::Execute(DrawerThread *thread) + { + const uint8_t *colormap; + uint8_t *source; + uint8_t *dest; + int pitch; + + int count = yh - yl + 1; + count = thread->count_for_thread(yl, count); + if (count <= 0) + return; + + const uint32_t *fg2rgb = _srcblend; + const uint32_t *bg2rgb = _destblend; + dest = ylookup[yl + thread->skipped_by_thread(yl)] + sx + _destorg; + source = &thread->dc_temp[thread->temp_line_for_thread(yl)*4 + hx]; + pitch = _pitch * thread->num_cores; + colormap = _colormap; + + do { + uint32_t a = fg2rgb[colormap[*source]] + bg2rgb[*dest]; + uint32_t b = a; + + a |= 0x01f07c1f; + b &= 0x40100400; + a &= 0x3fffffff; + b = b - (b >> 5); + a |= b; + *dest = RGB32k.All[(a>>15) & a]; + source += 4; + dest += pitch; + } while (--count); + } + + void DrawColumnRt4AddClampPalCommand::Execute(DrawerThread *thread) + { + const uint8_t *colormap; + uint8_t *source; + uint8_t *dest; + int pitch; + + int count = yh - yl + 1; + count = thread->count_for_thread(yl, count); + if (count <= 0) + return; + + dest = ylookup[yl + thread->skipped_by_thread(yl)] + sx + _destorg; + source = &thread->dc_temp[thread->temp_line_for_thread(yl)*4]; + pitch = _pitch * thread->num_cores; + colormap = _colormap; + + const uint32_t *fg2rgb = _srcblend; + const uint32_t *bg2rgb = _destblend; + + do { + uint32_t a = fg2rgb[colormap[source[0]]] + bg2rgb[dest[0]]; + uint32_t b = a; + + a |= 0x01f07c1f; + b &= 0x40100400; + a &= 0x3fffffff; + b = b - (b >> 5); + a |= b; + dest[0] = RGB32k.All[(a>>15) & a]; + + a = fg2rgb[colormap[source[1]]] + bg2rgb[dest[1]]; + b = a; + a |= 0x01f07c1f; + b &= 0x40100400; + a &= 0x3fffffff; + b = b - (b >> 5); + a |= b; + dest[1] = RGB32k.All[(a>>15) & a]; + + a = fg2rgb[colormap[source[2]]] + bg2rgb[dest[2]]; + b = a; + a |= 0x01f07c1f; + b &= 0x40100400; + a &= 0x3fffffff; + b = b - (b >> 5); + a |= b; + dest[2] = RGB32k.All[(a>>15) & a]; + + a = fg2rgb[colormap[source[3]]] + bg2rgb[dest[3]]; + b = a; + a |= 0x01f07c1f; + b &= 0x40100400; + a &= 0x3fffffff; + b = b - (b >> 5); + a |= b; + dest[3] = RGB32k.All[(a>>15) & a]; + + source += 4; + dest += pitch; + } while (--count); + } + + void DrawColumnRt1SubClampPalCommand::Execute(DrawerThread *thread) + { + const uint8_t *colormap; + uint8_t *source; + uint8_t *dest; + int pitch; + + int count = yh - yl + 1; + count = thread->count_for_thread(yl, count); + if (count <= 0) + return; + + const uint32_t *fg2rgb = _srcblend; + const uint32_t *bg2rgb = _destblend; + dest = ylookup[yl + thread->skipped_by_thread(yl)] + sx + _destorg; + source = &thread->dc_temp[thread->temp_line_for_thread(yl)*4 + hx]; + pitch = _pitch * thread->num_cores; + colormap = _colormap; + + do { + uint32_t a = (fg2rgb[colormap[*source]] | 0x40100400) - bg2rgb[*dest]; + uint32_t b = a; + + b &= 0x40100400; + b = b - (b >> 5); + a &= b; + a |= 0x01f07c1f; + *dest = RGB32k.All[(a>>15) & a]; + source += 4; + dest += pitch; + } while (--count); + } + + void DrawColumnRt4SubClampPalCommand::Execute(DrawerThread *thread) + { + const uint8_t *colormap; + uint8_t *source; + uint8_t *dest; + int pitch; + + int count = yh - yl + 1; + count = thread->count_for_thread(yl, count); + if (count <= 0) + return; + + const uint32_t *fg2rgb = _srcblend; + const uint32_t *bg2rgb = _destblend; + dest = ylookup[yl + thread->skipped_by_thread(yl)] + sx + _destorg; + source = &thread->dc_temp[thread->temp_line_for_thread(yl)*4]; + pitch = _pitch * thread->num_cores; + colormap = _colormap; + + do { + uint32_t a = (fg2rgb[colormap[source[0]]] | 0x40100400) - bg2rgb[dest[0]]; + uint32_t b = a; + + b &= 0x40100400; + b = b - (b >> 5); + a &= b; + a |= 0x01f07c1f; + dest[0] = RGB32k.All[(a>>15) & a]; + + a = (fg2rgb[colormap[source[1]]] | 0x40100400) - bg2rgb[dest[1]]; + b = a; + b &= 0x40100400; + b = b - (b >> 5); + a &= b; + a |= 0x01f07c1f; + dest[1] = RGB32k.All[(a>>15) & a]; + + a = (fg2rgb[colormap[source[2]]] | 0x40100400) - bg2rgb[dest[2]]; + b = a; + b &= 0x40100400; + b = b - (b >> 5); + a &= b; + a |= 0x01f07c1f; + dest[2] = RGB32k.All[(a>>15) & a]; + + a = (fg2rgb[colormap[source[3]]] | 0x40100400) - bg2rgb[dest[3]]; + b = a; + b &= 0x40100400; + b = b - (b >> 5); + a &= b; + a |= 0x01f07c1f; + dest[3] = RGB32k.All[(a>>15) & a]; + + source += 4; + dest += pitch; + } while (--count); + } + + void DrawColumnRt1RevSubClampPalCommand::Execute(DrawerThread *thread) + { + const uint8_t *colormap; + uint8_t *source; + uint8_t *dest; + int pitch; + + int count = yh - yl + 1; + count = thread->count_for_thread(yl, count); + if (count <= 0) + return; + + const uint32_t *fg2rgb = _srcblend; + const uint32_t *bg2rgb = _destblend; + dest = ylookup[yl + thread->skipped_by_thread(yl)] + sx + _destorg; + source = &thread->dc_temp[thread->temp_line_for_thread(yl)*4 + hx]; + pitch = _pitch * thread->num_cores; + colormap = _colormap; + + do { + uint32_t a = (bg2rgb[*dest] | 0x40100400) - fg2rgb[colormap[*source]]; + uint32_t b = a; + + b &= 0x40100400; + b = b - (b >> 5); + a &= b; + a |= 0x01f07c1f; + *dest = RGB32k.All[(a>>15) & a]; + source += 4; + dest += pitch; + } while (--count); + } + + void DrawColumnRt4RevSubClampPalCommand::Execute(DrawerThread *thread) + { + const uint8_t *colormap; + uint8_t *source; + uint8_t *dest; + int pitch; + + int count = yh - yl + 1; + count = thread->count_for_thread(yl, count); + if (count <= 0) + return; + + const uint32_t *fg2rgb = _srcblend; + const uint32_t *bg2rgb = _destblend; + dest = ylookup[yl + thread->skipped_by_thread(yl)] + sx + _destorg; + source = &thread->dc_temp[thread->temp_line_for_thread(yl)*4]; + pitch = _pitch * thread->num_cores; + colormap = _colormap; + + do { + uint32_t a = (bg2rgb[dest[0]] | 0x40100400) - fg2rgb[colormap[source[0]]]; + uint32_t b = a; + + b &= 0x40100400; + b = b - (b >> 5); + a &= b; + a |= 0x01f07c1f; + dest[0] = RGB32k.All[(a>>15) & a]; + + a = (bg2rgb[dest[1]] | 0x40100400) - fg2rgb[colormap[source[1]]]; + b = a; + b &= 0x40100400; + b = b - (b >> 5); + a &= b; + a |= 0x01f07c1f; + dest[1] = RGB32k.All[(a>>15) & a]; + + a = (bg2rgb[dest[2]] | 0x40100400) - fg2rgb[colormap[source[2]]]; + b = a; + b &= 0x40100400; + b = b - (b >> 5); + a &= b; + a |= 0x01f07c1f; + dest[2] = RGB32k.All[(a>>15) & a]; + + a = (bg2rgb[dest[3]] | 0x40100400) - fg2rgb[colormap[source[3]]]; + b = a; + b &= 0x40100400; + b = b - (b >> 5); + a &= b; + a |= 0x01f07c1f; + dest[3] = RGB32k.All[(a>>15) & a]; + + source += 4; + dest += pitch; + } while (--count); + } +} diff --git a/src/r_main.cpp b/src/r_main.cpp index c69c22c7ba..a6ae47de1b 100644 --- a/src/r_main.cpp +++ b/src/r_main.cpp @@ -58,6 +58,38 @@ #include "v_font.h" #include "r_data/colormaps.h" #include "p_maputl.h" +#include "r_thread.h" + +CVAR (String, r_viewsize, "", CVAR_NOSET) +CVAR (Bool, r_shadercolormaps, true, CVAR_ARCHIVE) + +CUSTOM_CVAR (Int, r_columnmethod, 1, CVAR_ARCHIVE|CVAR_GLOBALCONFIG) +{ + if (self != 0 && self != 1) + { + self = 1; + } + else + { // Trigger the change + setsizeneeded = true; + } +} + +CVAR(Int, r_portal_recursions, 4, CVAR_ARCHIVE) +CVAR(Bool, r_highlight_portals, false, CVAR_ARCHIVE) + +EXTERN_CVAR(Bool, r_fullbrightignoresectorcolor) + +extern cycle_t WallCycles, PlaneCycles, MaskedCycles, WallScanCycles; +extern cycle_t FrameCycles; + +extern bool r_showviewer; + +cycle_t WallCycles, PlaneCycles, MaskedCycles, WallScanCycles; + +namespace swrenderer +{ + using namespace drawerargs; // MACROS ------------------------------------------------------------------ @@ -88,7 +120,6 @@ extern short *openings; extern bool r_fakingunderwater; extern "C" int fuzzviewheight; extern subsector_t *InSubsector; -extern bool r_showviewer; // PRIVATE DATA DECLARATIONS ----------------------------------------------- @@ -100,9 +131,6 @@ bool r_dontmaplines; // PUBLIC DATA DEFINITIONS ------------------------------------------------- -CVAR (String, r_viewsize, "", CVAR_NOSET) -CVAR (Bool, r_shadercolormaps, true, CVAR_ARCHIVE) - double r_BaseVisibility; double r_WallVisibility; double r_FloorVisibility; @@ -157,8 +185,6 @@ void (*hcolfunc_post1) (int hx, int sx, int yl, int yh); void (*hcolfunc_post2) (int hx, int sx, int yl, int yh); void (*hcolfunc_post4) (int sx, int yl, int yh); -cycle_t WallCycles, PlaneCycles, MaskedCycles, WallScanCycles; - // PRIVATE DATA DEFINITIONS ------------------------------------------------ static int lastcenteryfrac; @@ -361,26 +387,6 @@ void R_SWRSetWindow(int windowSize, int fullWidth, int fullHeight, int stHeight, R_SetVisibility(R_GetVisibility()); } -//========================================================================== -// -// CVAR r_columnmethod -// -// Selects which version of the seg renderers to use. -// -//========================================================================== - -CUSTOM_CVAR (Int, r_columnmethod, 1, CVAR_ARCHIVE|CVAR_GLOBALCONFIG) -{ - if (self != 0 && self != 1) - { - self = 1; - } - else - { // Trigger the change - setsizeneeded = true; - } -} - //========================================================================== // // R_Init @@ -455,8 +461,6 @@ void R_CopyStackedViewParameters() // //========================================================================== -EXTERN_CVAR(Bool, r_fullbrightignoresectorcolor) - void R_SetupColormap(player_t *player) { realfixedcolormap = NULL; @@ -574,9 +578,6 @@ void R_SetupFreelook() // //========================================================================== -CVAR(Int, r_portal_recursions, 4, CVAR_ARCHIVE) -CVAR(Bool, r_highlight_portals, false, CVAR_ARCHIVE) - void R_HighlightPortal (PortalDrawseg* pds) { // [ZZ] NO OVERFLOW CHECKS HERE @@ -853,10 +854,10 @@ void R_RenderActorView (AActor *actor, bool dontmaplines) // [RH] Show off segs if r_drawflat is 1 if (r_drawflat) { - hcolfunc_pre = R_FillColumnHorizP; + hcolfunc_pre = R_FillColumnHoriz; hcolfunc_post1 = rt_copy1col; hcolfunc_post4 = rt_copy4cols; - colfunc = R_FillColumnP; + colfunc = R_FillColumn; spanfunc = R_FillSpan; } else @@ -950,6 +951,8 @@ void R_RenderViewToCanvas (AActor *actor, DCanvas *canvas, { const bool savedviewactive = viewactive; + R_BeginDrawerCommands(); + viewwidth = width; RenderTarget = canvas; bRenderingToCanvas = true; @@ -961,6 +964,8 @@ void R_RenderViewToCanvas (AActor *actor, DCanvas *canvas, R_RenderActorView (actor, dontmaplines); + R_EndDrawerCommands(); + RenderTarget = screen; bRenderingToCanvas = false; R_ExecuteSetViewSize (); @@ -991,8 +996,6 @@ void R_MultiresInit () // Displays statistics about rendering times // //========================================================================== -extern cycle_t WallCycles, PlaneCycles, MaskedCycles, WallScanCycles; -extern cycle_t FrameCycles; ADD_STAT (fps) { @@ -1072,3 +1075,5 @@ CCMD (clearscancycles) bestscancycles = HUGE_VAL; } #endif + +} \ No newline at end of file diff --git a/src/r_main.h b/src/r_main.h index 24103393d4..87b56163b0 100644 --- a/src/r_main.h +++ b/src/r_main.h @@ -28,23 +28,26 @@ #include "v_palette.h" #include "r_data/colormaps.h" +extern double ViewCos; +extern double ViewSin; +extern int viewwindowx; +extern int viewwindowy; typedef BYTE lighttable_t; // This could be wider for >8 bit display. +namespace swrenderer +{ + // // POV related. // extern bool bRenderingToCanvas; -extern double ViewCos; -extern double ViewSin; extern fixed_t viewingrangerecip; extern double FocalLengthX, FocalLengthY; extern double InvZtoScale; extern double WallTMapScale2; -extern int viewwindowx; -extern int viewwindowy; extern double CenterX; extern double CenterY; @@ -142,5 +145,6 @@ extern DAngle stacked_angle; extern void R_CopyStackedViewParameters(); +} #endif // __R_MAIN_H__ diff --git a/src/r_plane.cpp b/src/r_plane.cpp index 810aa0003c..8a5ee2263a 100644 --- a/src/r_plane.cpp +++ b/src/r_plane.cpp @@ -63,10 +63,14 @@ #pragma warning(disable:4244) #endif +CVAR(Bool, tilt, false, 0); +CVAR(Bool, r_skyboxes, true, 0) + EXTERN_CVAR(Int, r_skymode) -//EXTERN_CVAR (Int, tx) -//EXTERN_CVAR (Int, ty) +namespace swrenderer +{ + using namespace drawerargs; extern subsector_t *InSubsector; @@ -132,15 +136,12 @@ extern "C" { // spanend holds the end of a plane span in each screen row // short spanend[MAXHEIGHT]; -BYTE *tiltlighting[MAXWIDTH]; int planeshade; FVector3 plane_sz, plane_su, plane_sv; float planelightfloat; bool plane_shade; fixed_t pviewx, pviewy; - -void R_DrawTiltedPlane_ASM (int y, int x1); } float yslope[MAXHEIGHT]; @@ -148,13 +149,6 @@ static fixed_t xscale, yscale; static double xstepscale, ystepscale; static double basexfrac, baseyfrac; -#ifdef X86_ASM -extern "C" void R_SetSpanSource_ASM (const BYTE *flat); -extern "C" void R_SetSpanSize_ASM (int xbits, int ybits); -extern "C" void R_SetSpanColormap_ASM (BYTE *colormap); -extern "C" void R_SetTiltedSpanSource_ASM (const BYTE *flat); -extern "C" BYTE *ds_curcolormap, *ds_cursource, *ds_curtiltedsource; -#endif void R_DrawSinglePlane (visplane_t *, fixed_t alpha, bool additive, bool masked); //========================================================================== @@ -249,11 +243,6 @@ void R_MapPlane (int y, int x1) GlobVis * fabs(CenterY - y), planeshade) << COLORMAPSHIFT); } -#ifdef X86_ASM - if (ds_colormap != ds_curcolormap) - R_SetSpanColormap_ASM (ds_colormap); -#endif - ds_y = y; ds_x1 = x1; ds_x2 = x2; @@ -261,239 +250,15 @@ void R_MapPlane (int y, int x1) spanfunc (); } -//========================================================================== -// -// R_CalcTiltedLighting -// -// Calculates the lighting for one row of a tilted plane. If the definition -// of GETPALOOKUP changes, this needs to change, too. -// -//========================================================================== - -extern "C" { -void R_CalcTiltedLighting (double lval, double lend, int width) -{ - double lstep; - BYTE *lightfiller; - BYTE *basecolormapdata = basecolormap->Maps; - int i = 0; - - if (width == 0 || lval == lend) - { // Constant lighting - lightfiller = basecolormapdata + (GETPALOOKUP(lval, planeshade) << COLORMAPSHIFT); - } - else - { - lstep = (lend - lval) / width; - if (lval >= MAXLIGHTVIS) - { // lval starts "too bright". - lightfiller = basecolormapdata + (GETPALOOKUP(lval, planeshade) << COLORMAPSHIFT); - for (; i <= width && lval >= MAXLIGHTVIS; ++i) - { - tiltlighting[i] = lightfiller; - lval += lstep; - } - } - if (lend >= MAXLIGHTVIS) - { // lend ends "too bright". - lightfiller = basecolormapdata + (GETPALOOKUP(lend, planeshade) << COLORMAPSHIFT); - for (; width > i && lend >= MAXLIGHTVIS; --width) - { - tiltlighting[width] = lightfiller; - lend -= lstep; - } - } - if (width > 0) - { - lval = FIXED2DBL(planeshade) - lval; - lend = FIXED2DBL(planeshade) - lend; - lstep = (lend - lval) / width; - if (lstep < 0) - { // Going from dark to light - if (lval < 1.) - { // All bright - lightfiller = basecolormapdata; - } - else - { - if (lval >= NUMCOLORMAPS) - { // Starts beyond the dark end - BYTE *clight = basecolormapdata + ((NUMCOLORMAPS-1) << COLORMAPSHIFT); - while (lval >= NUMCOLORMAPS && i <= width) - { - tiltlighting[i++] = clight; - lval += lstep; - } - if (i > width) - return; - } - while (i <= width && lval >= 0) - { - tiltlighting[i++] = basecolormapdata + (xs_ToInt(lval) << COLORMAPSHIFT); - lval += lstep; - } - lightfiller = basecolormapdata; - } - } - else - { // Going from light to dark - if (lval >= (NUMCOLORMAPS-1)) - { // All dark - lightfiller = basecolormapdata + ((NUMCOLORMAPS-1) << COLORMAPSHIFT); - } - else - { - while (lval < 0 && i <= width) - { - tiltlighting[i++] = basecolormapdata; - lval += lstep; - } - if (i > width) - return; - while (i <= width && lval < (NUMCOLORMAPS-1)) - { - tiltlighting[i++] = basecolormapdata + (xs_ToInt(lval) << COLORMAPSHIFT); - lval += lstep; - } - lightfiller = basecolormapdata + ((NUMCOLORMAPS-1) << COLORMAPSHIFT); - } - } - } - } - for (; i <= width; i++) - { - tiltlighting[i] = lightfiller; - } -} -} // extern "C" - //========================================================================== // // R_MapTiltedPlane // //========================================================================== -void R_MapTiltedPlane(int y, int x1) +void R_MapTiltedPlane (int y, int x1) { - int x2 = spanend[y]; - int width = x2 - x1; - double iz, uz, vz; - BYTE *fb; - DWORD u, v; - int i; - - iz = plane_sz[2] + plane_sz[1] * (centery - y) + plane_sz[0] * (x1 - centerx); - - // Lighting is simple. It's just linear interpolation from start to end - if (plane_shade) - { - uz = (iz + plane_sz[0] * width) * planelightfloat; - vz = iz * planelightfloat; - R_CalcTiltedLighting(vz, uz, width); - } - - uz = plane_su[2] + plane_su[1] * (centery - y) + plane_su[0] * (x1 - centerx); - vz = plane_sv[2] + plane_sv[1] * (centery - y) + plane_sv[0] * (x1 - centerx); - - fb = ylookup[y] + x1 + dc_destorg; - - BYTE vshift = 32 - ds_ybits; - BYTE ushift = vshift - ds_xbits; - int umask = ((1 << ds_xbits) - 1) << ds_ybits; - -#if 0 // The "perfect" reference version of this routine. Pretty slow. - // Use it only to see how things are supposed to look. - i = 0; - do - { - double z = 1.f/iz; - - u = SQWORD(uz*z) + pviewx; - v = SQWORD(vz*z) + pviewy; - ds_colormap = tiltlighting[i]; - fb[i++] = ds_colormap[ds_source[(v >> vshift) | ((u >> ushift) & umask)]]; - iz += plane_sz[0]; - uz += plane_su[0]; - vz += plane_sv[0]; - } while (--width >= 0); -#else -//#define SPANSIZE 32 -//#define INVSPAN 0.03125f -//#define SPANSIZE 8 -//#define INVSPAN 0.125f -#define SPANSIZE 16 -#define INVSPAN 0.0625f - - double startz = 1.f/iz; - double startu = uz*startz; - double startv = vz*startz; - double izstep, uzstep, vzstep; - - izstep = plane_sz[0] * SPANSIZE; - uzstep = plane_su[0] * SPANSIZE; - vzstep = plane_sv[0] * SPANSIZE; - x1 = 0; - width++; - - while (width >= SPANSIZE) - { - iz += izstep; - uz += uzstep; - vz += vzstep; - - double endz = 1.f/iz; - double endu = uz*endz; - double endv = vz*endz; - DWORD stepu = SQWORD((endu - startu) * INVSPAN); - DWORD stepv = SQWORD((endv - startv) * INVSPAN); - u = SQWORD(startu) + pviewx; - v = SQWORD(startv) + pviewy; - - for (i = SPANSIZE-1; i >= 0; i--) - { - fb[x1] = *(tiltlighting[x1] + ds_source[(v >> vshift) | ((u >> ushift) & umask)]); - x1++; - u += stepu; - v += stepv; - } - startu = endu; - startv = endv; - width -= SPANSIZE; - } - if (width > 0) - { - if (width == 1) - { - u = SQWORD(startu); - v = SQWORD(startv); - fb[x1] = *(tiltlighting[x1] + ds_source[(v >> vshift) | ((u >> ushift) & umask)]); - } - else - { - double left = width; - iz += plane_sz[0] * left; - uz += plane_su[0] * left; - vz += plane_sv[0] * left; - - double endz = 1.f/iz; - double endu = uz*endz; - double endv = vz*endz; - left = 1.f/left; - DWORD stepu = SQWORD((endu - startu) * left); - DWORD stepv = SQWORD((endv - startv) * left); - u = SQWORD(startu) + pviewx; - v = SQWORD(startv) + pviewy; - - for (; width != 0; width--) - { - fb[x1] = *(tiltlighting[x1] + ds_source[(v >> vshift) | ((u >> ushift) & umask)]); - x1++; - u += stepu; - v += stepv; - } - } - } -#endif + R_DrawTiltedSpan(y, x1, spanend[y], plane_sz, plane_su, plane_sv, plane_shade, planeshade, planelightfloat, pviewx, pviewy); } //========================================================================== @@ -502,9 +267,9 @@ void R_MapTiltedPlane(int y, int x1) // //========================================================================== -void R_MapColoredPlane (int y, int x1) +void R_MapColoredPlane(int y, int x1) { - memset (ylookup[y] + x1 + dc_destorg, ds_color, spanend[y] - x1 + 1); + R_DrawColoredSpan(y, x1, spanend[y]); } //========================================================================== @@ -1179,9 +944,6 @@ static void R_DrawSkyStriped (visplane_t *pl) // //========================================================================== -CVAR (Bool, tilt, false, 0); -//CVAR (Int, pa, 0, 0) - int R_DrawPlanes () { visplane_t *pl; @@ -1317,7 +1079,6 @@ void R_DrawSinglePlane (visplane_t *pl, fixed_t alpha, bool additive, bool maske // 9. Put the camera back where it was to begin with. // //========================================================================== -CVAR (Bool, r_skyboxes, true, 0) static int numskyboxes; void R_DrawPortals () @@ -1665,13 +1426,6 @@ void R_DrawSkyPlane (visplane_t *pl) void R_DrawNormalPlane (visplane_t *pl, double _xscale, double _yscale, fixed_t alpha, bool additive, bool masked) { -#ifdef X86_ASM - if (ds_source != ds_cursource) - { - R_SetSpanSource_ASM (ds_source); - } -#endif - if (alpha <= 0) { return; @@ -1896,14 +1650,6 @@ void R_DrawTiltedPlane (visplane_t *pl, double _xscale, double _yscale, fixed_t else ds_colormap = basecolormap->Maps, plane_shade = true; - if (!plane_shade) - { - for (int i = 0; i < viewwidth; ++i) - { - tiltlighting[i] = ds_colormap; - } - } - // Hack in support for 1 x Z and Z x 1 texture sizes if (ds_ybits == 0) { @@ -1913,13 +1659,8 @@ void R_DrawTiltedPlane (visplane_t *pl, double _xscale, double _yscale, fixed_t { plane_su[2] = plane_su[1] = plane_su[0] = 0; } -#if defined(X86_ASM) - if (ds_source != ds_curtiltedsource) - R_SetTiltedSpanSource_ASM (ds_source); - R_MapVisPlane (pl, R_DrawTiltedPlane_ASM); -#else + R_MapVisPlane (pl, R_MapTiltedPlane); -#endif } //========================================================================== @@ -2023,3 +1764,5 @@ bool R_PlaneInitData () return true; } + +} \ No newline at end of file diff --git a/src/r_plane.h b/src/r_plane.h index d4db3dc09c..0e133a7cd2 100644 --- a/src/r_plane.h +++ b/src/r_plane.h @@ -27,6 +27,9 @@ class ASkyViewpoint; +namespace swrenderer +{ + // // The infamous visplane // @@ -113,4 +116,6 @@ bool R_PlaneInitData (void); extern visplane_t* floorplane; extern visplane_t* ceilingplane; +} + #endif // __R_PLANE_H__ diff --git a/src/r_segs.cpp b/src/r_segs.cpp index ac5683b9b2..d4520e91de 100644 --- a/src/r_segs.cpp +++ b/src/r_segs.cpp @@ -57,10 +57,13 @@ CVAR(Bool, r_np2, true, 0) +CVAR(Bool, r_fogboundary, true, 0) +CVAR(Bool, r_drawmirrors, true, 0) EXTERN_CVAR(Bool, r_fullbrightignoresectorcolor); -//CVAR (Int, ty, 8, 0) -//CVAR (Int, tx, 8, 0) +namespace swrenderer +{ + using namespace drawerargs; #define HEIGHTBITS 12 #define HEIGHTSHIFT (FRACBITS-HEIGHTBITS) @@ -141,16 +144,6 @@ void wallscan_np2(int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t static void wallscan_np2_ds(drawseg_t *ds, int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat); static void call_wallscan(int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat, bool mask); -//============================================================================= -// -// CVAR r_fogboundary -// -// If true, makes fog look more "real" by shading the walls separating two -// sectors with different fog. -//============================================================================= - -CVAR(Bool, r_fogboundary, true, 0) - inline bool IsFogBoundary (sector_t *front, sector_t *back) { return r_fogboundary && fixedcolormap == NULL && front->ColorMap->Fade && @@ -158,14 +151,6 @@ inline bool IsFogBoundary (sector_t *front, sector_t *back) (front->GetTexture(sector_t::ceiling) != skyflatnum || back->GetTexture(sector_t::ceiling) != skyflatnum); } -//============================================================================= -// -// CVAR r_drawmirrors -// -// Set to false to disable rendering of mirrors -//============================================================================= - -CVAR(Bool, r_drawmirrors, true, 0) // // R_RenderMaskedSegRange @@ -2994,3 +2979,5 @@ static void R_RenderDecal (side_t *wall, DBaseDecal *decal, drawseg_t *clipper, done: WallC = savecoord; } + +} \ No newline at end of file diff --git a/src/r_segs.h b/src/r_segs.h index 1fc428c964..8610bc6f29 100644 --- a/src/r_segs.h +++ b/src/r_segs.h @@ -23,6 +23,9 @@ #ifndef __R_SEGS_H__ #define __R_SEGS_H__ +namespace swrenderer +{ + struct drawseg_t; void R_RenderMaskedSegRange (drawseg_t *ds, int x1, int x2); @@ -70,4 +73,6 @@ extern int CurrentPortalUniq; extern bool CurrentPortalInSkybox; extern TArray WallPortals; +} + #endif diff --git a/src/r_state.h b/src/r_state.h index b66ad57eb7..cd4aee4be3 100644 --- a/src/r_state.h +++ b/src/r_state.h @@ -80,7 +80,7 @@ extern int numgamesubsectors; extern AActor* camera; // [RH] camera instead of viewplayer extern sector_t* viewsector; // [RH] keep track of sector viewing from -extern angle_t xtoviewangle[MAXWIDTH+1]; +namespace swrenderer { extern angle_t xtoviewangle[MAXWIDTH+1]; } extern DAngle FieldOfView; int R_FindSkin (const char *name, int pclass); // [RH] Find a skin diff --git a/src/r_swrenderer.cpp b/src/r_swrenderer.cpp index 3c33134301..87bce4013a 100644 --- a/src/r_swrenderer.cpp +++ b/src/r_swrenderer.cpp @@ -42,13 +42,20 @@ #include "r_3dfloors.h" #include "textures/textures.h" #include "r_data/voxels.h" +#include "r_thread.h" +namespace swrenderer +{ void R_SWRSetWindow(int windowSize, int fullWidth, int fullHeight, int stHeight, float trueratio); void R_SetupColormap(player_t *); void R_SetupFreelook(); void R_InitRenderer(); +} + +using namespace swrenderer; + //========================================================================== // // DCanvas :: Init @@ -154,9 +161,11 @@ void FSoftwareRenderer::Precache(BYTE *texhitlist, TMap &act void FSoftwareRenderer::RenderView(player_t *player) { + R_BeginDrawerCommands(); R_RenderActorView (player->mo); // [RH] Let cameras draw onto textures that were visible this frame. FCanvasTextureInfo::UpdateAll (); + R_EndDrawerCommands(); } //========================================================================== diff --git a/src/r_things.cpp b/src/r_things.cpp index e7d130fa85..4ba47d63d7 100644 --- a/src/r_things.cpp +++ b/src/r_things.cpp @@ -64,6 +64,21 @@ #include "r_data/voxels.h" #include "p_local.h" #include "p_maputl.h" +#include "r_thread.h" + +EXTERN_CVAR(Bool, st_scale) +EXTERN_CVAR(Bool, r_shadercolormaps) +EXTERN_CVAR(Int, r_drawfuzz) +EXTERN_CVAR(Bool, r_deathcamera); +EXTERN_CVAR(Bool, r_drawplayersprites) +EXTERN_CVAR(Bool, r_drawvoxels) + +CVAR(Bool, r_fullbrightignoresectorcolor, true, CVAR_ARCHIVE | CVAR_GLOBALCONFIG); +//CVAR(Bool, r_splitsprites, true, CVAR_ARCHIVE) + +namespace swrenderer +{ + using namespace drawerargs; // [RH] A c-buffer. Used for keeping track of offscreen voxel spans. @@ -95,12 +110,6 @@ extern float MaskedScaleY; #define BASEXCENTER (160) #define BASEYCENTER (100) -EXTERN_CVAR (Bool, st_scale) -EXTERN_CVAR(Bool, r_shadercolormaps) -EXTERN_CVAR(Int, r_drawfuzz) -EXTERN_CVAR(Bool, r_deathcamera); -CVAR(Bool, r_fullbrightignoresectorcolor, true, CVAR_ARCHIVE | CVAR_GLOBALCONFIG); - // // Sprite rotation 0 is facing the viewer, // rotation 1 is one angle turn CLOCKWISE around the axis. @@ -132,9 +141,6 @@ FTexture *WallSpriteTile; short zeroarray[MAXWIDTH]; short screenheightarray[MAXWIDTH]; -EXTERN_CVAR (Bool, r_drawplayersprites) -EXTERN_CVAR (Bool, r_drawvoxels) - // // INITIALIZATION FUNCTIONS // @@ -639,7 +645,7 @@ void R_DrawVisVoxel(vissprite_t *spr, int minslabz, int maxslabz, short *cliptop { return; } - if (colfunc == fuzzcolfunc || colfunc == R_FillColumnP) + if (colfunc == fuzzcolfunc || colfunc == R_FillColumn) { flags = DVF_OFFSCREEN | DVF_SPANSONLY; } @@ -1758,8 +1764,6 @@ static int sd_comparex (const void *arg1, const void *arg2) return (*(drawseg_t **)arg2)->x2 - (*(drawseg_t **)arg1)->x2; } -CVAR (Bool, r_splitsprites, true, CVAR_ARCHIVE) - // Split up vissprites that intersect drawsegs void R_SplitVisSprites () { @@ -2628,7 +2632,7 @@ static void R_DrawMaskedSegsBehindParticle (const vissprite_t *vis) } } -void R_DrawParticle (vissprite_t *vis) +void R_DrawParticle_C (vissprite_t *vis) { DWORD *bg2rgb; int spacing; @@ -2642,6 +2646,8 @@ void R_DrawParticle (vissprite_t *vis) R_DrawMaskedSegsBehindParticle (vis); + DrawerCommandQueue::WaitForWorkers(); + // vis->renderflags holds translucency level (0-255) { fixed_t fglevel, bglevel; @@ -3237,3 +3243,5 @@ void R_CheckOffscreenBuffer(int width, int height, bool spansonly) OffscreenBufferWidth = width; OffscreenBufferHeight = height; } + +} \ No newline at end of file diff --git a/src/r_things.h b/src/r_things.h index 53b887b181..bf32b655f2 100644 --- a/src/r_things.h +++ b/src/r_things.h @@ -25,6 +25,12 @@ #include "r_bsp.h" +struct particle_t; +struct FVoxel; + +namespace swrenderer +{ + // A vissprite_t is a thing // that will be drawn during a refresh. // I.e. a sprite object that is partly visible. @@ -95,9 +101,7 @@ struct vissprite_t vissprite_t() {} }; -struct particle_t; - -void R_DrawParticle (vissprite_t *); +void R_DrawParticle_C (vissprite_t *); void R_ProjectParticle (particle_t *, const sector_t *sector, int shade, int fakeside); extern int MaxVisSprites; @@ -146,5 +150,6 @@ void R_DrawVoxel(const FVector3 &viewpos, FAngle viewangle, void R_ClipVisSprite (vissprite_t *vis, int xl, int xh); +} #endif diff --git a/src/r_thread.cpp b/src/r_thread.cpp new file mode 100644 index 0000000000..c96f14e74b --- /dev/null +++ b/src/r_thread.cpp @@ -0,0 +1,297 @@ +/* +** Renderer multithreading framework +** Copyright (c) 2016 Magnus Norddahl +** +** This software is provided 'as-is', without any express or implied +** warranty. In no event will the authors be held liable for any damages +** arising from the use of this software. +** +** Permission is granted to anyone to use this software for any purpose, +** including commercial applications, and to alter it and redistribute it +** freely, subject to the following restrictions: +** +** 1. The origin of this software must not be misrepresented; you must not +** claim that you wrote the original software. If you use this software +** in a product, an acknowledgment in the product documentation would be +** appreciated but is not required. +** 2. Altered source versions must be plainly marked as such, and must not be +** misrepresented as being the original software. +** 3. This notice may not be removed or altered from any source distribution. +** +*/ + +#include +#include "templates.h" +#include "doomdef.h" +#include "i_system.h" +#include "w_wad.h" +#include "r_local.h" +#include "v_video.h" +#include "doomstat.h" +#include "st_stuff.h" +#include "g_game.h" +#include "g_level.h" +#include "r_thread.h" + +CVAR(Bool, r_multithreaded, true, CVAR_ARCHIVE | CVAR_GLOBALCONFIG); + +void R_BeginDrawerCommands() +{ + DrawerCommandQueue::Begin(); +} + +void R_EndDrawerCommands() +{ + DrawerCommandQueue::End(); +} + +///////////////////////////////////////////////////////////////////////////// + +DrawerCommandQueue *DrawerCommandQueue::Instance() +{ + static DrawerCommandQueue queue; + return &queue; +} + +DrawerCommandQueue::DrawerCommandQueue() +{ +} + +DrawerCommandQueue::~DrawerCommandQueue() +{ + StopThreads(); +} + +void* DrawerCommandQueue::AllocMemory(size_t size) +{ + // Make sure allocations remain 16-byte aligned + size = (size + 15) / 16 * 16; + + auto queue = Instance(); + if (queue->memorypool_pos + size > memorypool_size) + return nullptr; + + void *data = queue->memorypool + queue->memorypool_pos; + queue->memorypool_pos += size; + return data; +} + +void DrawerCommandQueue::Begin() +{ + auto queue = Instance(); + queue->Finish(); + queue->threaded_render++; +} + +void DrawerCommandQueue::End() +{ + auto queue = Instance(); + queue->Finish(); + if (queue->threaded_render > 0) + queue->threaded_render--; +} + +void DrawerCommandQueue::WaitForWorkers() +{ + Instance()->Finish(); +} + +void DrawerCommandQueue::Finish() +{ + auto queue = Instance(); + if (queue->commands.empty()) + return; + + // Give worker threads something to do: + + std::unique_lock start_lock(queue->start_mutex); + queue->active_commands.swap(queue->commands); + queue->run_id++; + start_lock.unlock(); + + queue->StartThreads(); + queue->start_condition.notify_all(); + + // Do one thread ourselves: + + DrawerThread thread; + thread.core = 0; + thread.num_cores = (int)(queue->threads.size() + 1); + + struct TryCatchData + { + DrawerCommandQueue *queue; + DrawerThread *thread; + size_t command_index; + } data; + + data.queue = queue; + data.thread = &thread; + data.command_index = 0; + VectoredTryCatch(&data, + [](void *data) + { + TryCatchData *d = (TryCatchData*)data; + + for (int pass = 0; pass < d->queue->num_passes; pass++) + { + d->thread->pass_start_y = pass * d->queue->rows_in_pass; + d->thread->pass_end_y = (pass + 1) * d->queue->rows_in_pass; + if (pass + 1 == d->queue->num_passes) + d->thread->pass_end_y = MAX(d->thread->pass_end_y, MAXHEIGHT); + + size_t size = d->queue->active_commands.size(); + for (d->command_index = 0; d->command_index < size; d->command_index++) + { + auto &command = d->queue->active_commands[d->command_index]; + command->Execute(d->thread); + } + } + }, + [](void *data, const char *reason, bool fatal) + { + TryCatchData *d = (TryCatchData*)data; + ReportDrawerError(d->queue->active_commands[d->command_index], true, reason, fatal); + }); + + // Wait for everyone to finish: + + std::unique_lock end_lock(queue->end_mutex); + queue->end_condition.wait(end_lock, [&]() { return queue->finished_threads == queue->threads.size(); }); + + if (!queue->thread_error.IsEmpty()) + { + static bool first = true; + if (queue->thread_error_fatal) + I_FatalError("%s", queue->thread_error.GetChars()); + else if (first) + Printf("%s\n", queue->thread_error.GetChars()); + first = false; + } + + // Clean up batch: + + for (auto &command : queue->active_commands) + command->~DrawerCommand(); + queue->active_commands.clear(); + queue->memorypool_pos = 0; + queue->finished_threads = 0; +} + +void DrawerCommandQueue::StartThreads() +{ + if (!threads.empty()) + return; + + int num_threads = std::thread::hardware_concurrency(); + if (num_threads == 0) + num_threads = 4; + + threads.resize(num_threads - 1); + + for (int i = 0; i < num_threads - 1; i++) + { + DrawerCommandQueue *queue = this; + DrawerThread *thread = &threads[i]; + thread->core = i + 1; + thread->num_cores = num_threads; + thread->thread = std::thread([=]() + { + int run_id = 0; + while (true) + { + // Wait until we are signalled to run: + std::unique_lock start_lock(queue->start_mutex); + queue->start_condition.wait(start_lock, [&]() { return queue->run_id != run_id || queue->shutdown_flag; }); + if (queue->shutdown_flag) + break; + run_id = queue->run_id; + start_lock.unlock(); + + // Do the work: + + struct TryCatchData + { + DrawerCommandQueue *queue; + DrawerThread *thread; + size_t command_index; + } data; + + data.queue = queue; + data.thread = thread; + data.command_index = 0; + VectoredTryCatch(&data, + [](void *data) + { + TryCatchData *d = (TryCatchData*)data; + + for (int pass = 0; pass < d->queue->num_passes; pass++) + { + d->thread->pass_start_y = pass * d->queue->rows_in_pass; + d->thread->pass_end_y = (pass + 1) * d->queue->rows_in_pass; + if (pass + 1 == d->queue->num_passes) + d->thread->pass_end_y = MAX(d->thread->pass_end_y, MAXHEIGHT); + + size_t size = d->queue->active_commands.size(); + for (d->command_index = 0; d->command_index < size; d->command_index++) + { + auto &command = d->queue->active_commands[d->command_index]; + command->Execute(d->thread); + } + } + }, + [](void *data, const char *reason, bool fatal) + { + TryCatchData *d = (TryCatchData*)data; + ReportDrawerError(d->queue->active_commands[d->command_index], true, reason, fatal); + }); + + // Notify main thread that we finished: + std::unique_lock end_lock(queue->end_mutex); + queue->finished_threads++; + end_lock.unlock(); + queue->end_condition.notify_all(); + } + }); + } +} + +void DrawerCommandQueue::StopThreads() +{ + std::unique_lock lock(start_mutex); + shutdown_flag = true; + lock.unlock(); + start_condition.notify_all(); + for (auto &thread : threads) + thread.thread.join(); + threads.clear(); + lock.lock(); + shutdown_flag = false; +} + +void DrawerCommandQueue::ReportDrawerError(DrawerCommand *command, bool worker_thread, const char *reason, bool fatal) +{ + if (worker_thread) + { + std::unique_lock end_lock(Instance()->end_mutex); + if (Instance()->thread_error.IsEmpty() || (!Instance()->thread_error_fatal && fatal)) + { + Instance()->thread_error = reason + (FString)": " + command->DebugInfo(); + Instance()->thread_error_fatal = fatal; + } + } + else + { + static bool first = true; + if (fatal) + I_FatalError("%s: %s", reason, command->DebugInfo().GetChars()); + else if (first) + Printf("%s: %s\n", reason, command->DebugInfo().GetChars()); + first = false; + } +} + +void VectoredTryCatch(void *data, void(*tryBlock)(void *data), void(*catchBlock)(void *data, const char *reason, bool fatal)) +{ + tryBlock(data); +} diff --git a/src/r_thread.h b/src/r_thread.h new file mode 100644 index 0000000000..7962dfc208 --- /dev/null +++ b/src/r_thread.h @@ -0,0 +1,235 @@ +/* +** Renderer multithreading framework +** Copyright (c) 2016 Magnus Norddahl +** +** This software is provided 'as-is', without any express or implied +** warranty. In no event will the authors be held liable for any damages +** arising from the use of this software. +** +** Permission is granted to anyone to use this software for any purpose, +** including commercial applications, and to alter it and redistribute it +** freely, subject to the following restrictions: +** +** 1. The origin of this software must not be misrepresented; you must not +** claim that you wrote the original software. If you use this software +** in a product, an acknowledgment in the product documentation would be +** appreciated but is not required. +** 2. Altered source versions must be plainly marked as such, and must not be +** misrepresented as being the original software. +** 3. This notice may not be removed or altered from any source distribution. +** +*/ + +#pragma once + +#include "r_draw.h" +#include +#include +#include +#include +#include + +// Use multiple threads when drawing +EXTERN_CVAR(Bool, r_multithreaded) + +// Redirect drawer commands to worker threads +void R_BeginDrawerCommands(); + +// Wait until all drawers finished executing +void R_EndDrawerCommands(); + +// Worker data for each thread executing drawer commands +class DrawerThread +{ +public: + std::thread thread; + + // Thread line index of this thread + int core = 0; + + // Number of active threads + int num_cores = 1; + + // Range of rows processed this pass + int pass_start_y = 0; + int pass_end_y = MAXHEIGHT; + + // Working buffer used by Rt drawers + uint8_t dc_temp_buff[MAXHEIGHT * 4]; + uint8_t *dc_temp = nullptr; + + // Working buffer used by Rt drawers, true color edition + uint32_t dc_temp_rgbabuff_rgba[MAXHEIGHT * 4]; + uint32_t *dc_temp_rgba = nullptr; + + // Working buffer used by the tilted (sloped) span drawer + const uint8_t *tiltlighting[MAXWIDTH]; + + // Checks if a line is rendered by this thread + bool line_skipped_by_thread(int line) + { + return line < pass_start_y || line >= pass_end_y || line % num_cores != core; + } + + // The number of lines to skip to reach the first line to be rendered by this thread + int skipped_by_thread(int first_line) + { + int pass_skip = MAX(pass_start_y - first_line, 0); + int core_skip = (num_cores - (first_line + pass_skip - core) % num_cores) % num_cores; + return pass_skip + core_skip; + } + + // The number of lines to be rendered by this thread + int count_for_thread(int first_line, int count) + { + int lines_until_pass_end = MAX(pass_end_y - first_line, 0); + count = MIN(count, lines_until_pass_end); + int c = (count - skipped_by_thread(first_line) + num_cores - 1) / num_cores; + return MAX(c, 0); + } + + // Calculate the dest address for the first line to be rendered by this thread + template + T *dest_for_thread(int first_line, int pitch, T *dest) + { + return dest + skipped_by_thread(first_line) * pitch; + } + + // The first line in the dc_temp buffer used this thread + int temp_line_for_thread(int first_line) + { + return (first_line + skipped_by_thread(first_line)) / num_cores; + } +}; + +// Task to be executed by each worker thread +class DrawerCommand +{ +protected: + int _dest_y; + + void DetectRangeError(uint32_t *&dest, int &dest_y, int &count) + { +#if defined(_MSC_VER) && defined(_DEBUG) + if (dest_y < 0 || count < 0 || dest_y + count > swrenderer::drawerargs::dc_destheight) + __debugbreak(); // Buffer overrun detected! +#endif + + if (dest_y < 0) + { + count += dest_y; + dest_y = 0; + dest = (uint32_t*)swrenderer::drawerargs::dc_destorg; + } + else if (dest_y >= swrenderer::drawerargs::dc_destheight) + { + dest_y = 0; + count = 0; + } + + if (count < 0 || count > MAXHEIGHT) count = 0; + if (dest_y + count >= swrenderer::drawerargs::dc_destheight) + count = swrenderer::drawerargs::dc_destheight - dest_y; + } + +public: + DrawerCommand() + { + _dest_y = static_cast((swrenderer::drawerargs::dc_dest - swrenderer::drawerargs::dc_destorg) / (swrenderer::drawerargs::dc_pitch)); + } + + virtual ~DrawerCommand() { } + + virtual void Execute(DrawerThread *thread) = 0; + virtual FString DebugInfo() = 0; +}; + +void VectoredTryCatch(void *data, void(*tryBlock)(void *data), void(*catchBlock)(void *data, const char *reason, bool fatal)); + +// Manages queueing up commands and executing them on worker threads +class DrawerCommandQueue +{ + enum { memorypool_size = 16 * 1024 * 1024 }; + char memorypool[memorypool_size]; + size_t memorypool_pos = 0; + + std::vector commands; + + std::vector threads; + + std::mutex start_mutex; + std::condition_variable start_condition; + std::vector active_commands; + bool shutdown_flag = false; + int run_id = 0; + + std::mutex end_mutex; + std::condition_variable end_condition; + size_t finished_threads = 0; + FString thread_error; + bool thread_error_fatal = false; + + int threaded_render = 0; + DrawerThread single_core_thread; + int num_passes = 1; + int rows_in_pass = MAXHEIGHT; + + void StartThreads(); + void StopThreads(); + void Finish(); + + static DrawerCommandQueue *Instance(); + static void ReportDrawerError(DrawerCommand *command, bool worker_thread, const char *reason, bool fatal); + + DrawerCommandQueue(); + ~DrawerCommandQueue(); + +public: + // Allocate memory valid for the duration of a command execution + static void* AllocMemory(size_t size); + + // Queue command to be executed by drawer worker threads + template + static void QueueCommand(Types &&... args) + { + auto queue = Instance(); + if (queue->threaded_render == 0 || !r_multithreaded) + { + T command(std::forward(args)...); + VectoredTryCatch(&command, + [](void *data) + { + T *c = (T*)data; + c->Execute(&Instance()->single_core_thread); + }, + [](void *data, const char *reason, bool fatal) + { + T *c = (T*)data; + ReportDrawerError(c, false, reason, fatal); + }); + } + else + { + void *ptr = AllocMemory(sizeof(T)); + if (!ptr) // Out of memory - render what we got + { + queue->Finish(); + ptr = AllocMemory(sizeof(T)); + if (!ptr) + return; + } + T *command = new (ptr)T(std::forward(args)...); + queue->commands.push_back(command); + } + } + + // Redirects all drawing commands to worker threads until End is called + // Begin/End blocks can be nested. + static void Begin(); + + // End redirection and wait until all worker threads finished executing + static void End(); + + // Waits until all worker threads finished executing + static void WaitForWorkers(); +}; diff --git a/src/v_draw.cpp b/src/v_draw.cpp index 8483b9844a..f86a94bcdf 100644 --- a/src/v_draw.cpp +++ b/src/v_draw.cpp @@ -132,6 +132,9 @@ void DCanvas::DrawTexture (FTexture *img, double x, double y, int tags_first, .. void DCanvas::DrawTextureParms(FTexture *img, DrawParms &parms) { #ifndef NO_SWRENDER + using namespace swrenderer; + using namespace drawerargs; + FTexture::Span unmaskedSpan[2]; const FTexture::Span **spanptr, *spans; static short bottomclipper[MAXWIDTH], topclipper[MAXWIDTH]; @@ -1285,6 +1288,9 @@ void DCanvas::FillSimplePoly(FTexture *tex, FVector2 *points, int npoints, FDynamicColormap *colormap, int lightlevel, int bottomclip) { #ifndef NO_SWRENDER + using namespace swrenderer; + using namespace drawerargs; + // Use an equation similar to player sprites to determine shade fixed_t shade = LIGHT2SHADE(lightlevel) - 12*FRACUNIT; float topy, boty, leftx, rightx; @@ -1352,7 +1358,7 @@ void DCanvas::FillSimplePoly(FTexture *tex, FVector2 *points, int npoints, // Setup constant texture mapping parameters. R_SetupSpanBits(tex); R_SetSpanColormap(colormap != NULL ? &colormap->Maps[clamp(shade >> FRACBITS, 0, NUMCOLORMAPS-1) * 256] : identitymap); - R_SetSpanSource(tex->GetPixels()); + R_SetSpanSource(tex); if (ds_xbits != 0) { scalex = double(1u << (32 - ds_xbits)) / scalex; diff --git a/src/win32/fb_d3d9.cpp b/src/win32/fb_d3d9.cpp index 0e8dd3dec9..026bbc63bb 100644 --- a/src/win32/fb_d3d9.cpp +++ b/src/win32/fb_d3d9.cpp @@ -1375,17 +1375,16 @@ void D3DFB::Draw3DPart(bool copy3d) D3DCOLOR color0, color1; if (Accel2D) { - if (realfixedcolormap == NULL) + auto &map = swrenderer::realfixedcolormap; + if (map == NULL) { color0 = 0; color1 = 0xFFFFFFF; } else { - color0 = D3DCOLOR_COLORVALUE(realfixedcolormap->ColorizeStart[0]/2, - realfixedcolormap->ColorizeStart[1]/2, realfixedcolormap->ColorizeStart[2]/2, 0); - color1 = D3DCOLOR_COLORVALUE(realfixedcolormap->ColorizeEnd[0]/2, - realfixedcolormap->ColorizeEnd[1]/2, realfixedcolormap->ColorizeEnd[2]/2, 1); + color0 = D3DCOLOR_COLORVALUE(map->ColorizeStart[0] / 2, map->ColorizeStart[1] / 2, map->ColorizeStart[2] / 2, 0); + color1 = D3DCOLOR_COLORVALUE(map->ColorizeEnd[0] / 2, map->ColorizeEnd[1] / 2, map->ColorizeEnd[2] / 2, 1); SetPixelShader(Shaders[SHADER_SpecialColormapPal]); } }