From 3ff91807b87d615819a090858ed9618a1cdd8911 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Wed, 7 Dec 2016 09:34:49 +0100 Subject: [PATCH 1/9] Move swrenderer into a namespace, add multithreading framework, and move drawers to commands --- src/CMakeLists.txt | 4 +- src/r_3dfloors.cpp | 8 +- src/r_3dfloors.h | 8 +- src/r_bsp.cpp | 11 +- src/r_bsp.h | 8 +- src/r_defs.h | 2 +- src/r_draw.cpp | 4112 +++++++++++++---------------------------- src/r_draw.h | 469 ++--- src/r_draw_pal.cpp | 2593 ++++++++++++++++++++++++++ src/r_draw_pal.h | 333 ++++ src/r_drawt_pal.cpp | 867 +++++++++ src/r_main.cpp | 75 +- src/r_main.h | 12 +- src/r_plane.cpp | 283 +-- src/r_plane.h | 5 + src/r_segs.cpp | 27 +- src/r_segs.h | 5 + src/r_state.h | 2 +- src/r_swrenderer.cpp | 9 + src/r_things.cpp | 34 +- src/r_things.h | 11 +- src/r_thread.cpp | 297 +++ src/r_thread.h | 235 +++ src/v_draw.cpp | 8 +- src/win32/fb_d3d9.cpp | 9 +- 25 files changed, 5955 insertions(+), 3472 deletions(-) create mode 100644 src/r_draw_pal.cpp create mode 100644 src/r_draw_pal.h create mode 100644 src/r_drawt_pal.cpp create mode 100644 src/r_thread.cpp create mode 100644 src/r_thread.h diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 83dad3b9d1..e164a338cb 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -883,7 +883,9 @@ set( FASTMATH_PCH_SOURCES r_3dfloors.cpp r_bsp.cpp r_draw.cpp - r_drawt.cpp + r_draw_pal.cpp + r_drawt_pal.cpp + r_thread.cpp r_main.cpp r_plane.cpp r_segs.cpp diff --git a/src/r_3dfloors.cpp b/src/r_3dfloors.cpp index 61a23187d4..87c8af618e 100644 --- a/src/r_3dfloors.cpp +++ b/src/r_3dfloors.cpp @@ -15,6 +15,11 @@ #include "c_cvars.h" #include "r_3dfloors.h" +CVAR(Int, r_3dfloors, true, 0); + +namespace swrenderer +{ + // external variables int fake3D; F3DFloor *fakeFloor; @@ -28,8 +33,6 @@ HeightLevel *height_cur = NULL; int CurrentMirror = 0; int CurrentSkybox = 0; -CVAR(Int, r_3dfloors, true, 0); - // private variables int height_max = -1; TArray toplist; @@ -160,3 +163,4 @@ void R_3D_LeaveSkybox() CurrentSkybox--; } +} diff --git a/src/r_3dfloors.h b/src/r_3dfloors.h index cacb974443..a703ae19a4 100644 --- a/src/r_3dfloors.h +++ b/src/r_3dfloors.h @@ -3,6 +3,11 @@ #include "p_3dfloors.h" +EXTERN_CVAR(Int, r_3dfloors); + +namespace swrenderer +{ + // special types struct HeightLevel @@ -57,7 +62,6 @@ extern HeightLevel *height_top; extern HeightLevel *height_cur; extern int CurrentMirror; extern int CurrentSkybox; -EXTERN_CVAR(Int, r_3dfloors); // functions void R_3D_DeleteHeights(); @@ -67,4 +71,6 @@ void R_3D_ResetClip(); void R_3D_EnterSkybox(); void R_3D_LeaveSkybox(); +} + #endif diff --git a/src/r_bsp.cpp b/src/r_bsp.cpp index 8d423b3b31..91fb86e928 100644 --- a/src/r_bsp.cpp +++ b/src/r_bsp.cpp @@ -58,6 +58,13 @@ #include "po_man.h" #include "r_data/colormaps.h" +CVAR (Bool, r_drawflat, false, 0) // [RH] Don't texture segs? +EXTERN_CVAR(Bool, r_fullbrightignoresectorcolor); + +namespace swrenderer +{ + using namespace drawerargs; + seg_t* curline; side_t* sidedef; line_t* linedef; @@ -104,8 +111,6 @@ TArray WallPortals(1000); // note: this array needs to go away as subsector_t *InSubsector; -CVAR (Bool, r_drawflat, false, 0) // [RH] Don't texture segs? -EXTERN_CVAR(Bool, r_fullbrightignoresectorcolor); void R_StoreWallRange (int start, int stop); @@ -1396,3 +1401,5 @@ void R_RenderBSPNode (void *node) } R_Subsector ((subsector_t *)((BYTE *)node - 1)); } + +} diff --git a/src/r_bsp.h b/src/r_bsp.h index 48ca7565bb..e4d70c4cf1 100644 --- a/src/r_bsp.h +++ b/src/r_bsp.h @@ -27,6 +27,11 @@ #include #include "r_defs.h" +EXTERN_CVAR (Bool, r_drawflat) // [RH] Don't texture segs? + +namespace swrenderer +{ + // The 3072 below is just an arbitrary value picked to avoid // drawing lines the player is too close to that would overflow // the texture calculations. @@ -109,8 +114,6 @@ extern WORD MirrorFlags; typedef void (*drawfunc_t) (int start, int stop); -EXTERN_CVAR (Bool, r_drawflat) // [RH] Don't texture segs? - // BSP? void R_ClearClipSegs (short left, short right); void R_ClearDrawSegs (); @@ -119,5 +122,6 @@ void R_RenderBSPNode (void *node); // killough 4/13/98: fake floors/ceilings for deep water / fake ceilings: sector_t *R_FakeFlat(sector_t *, sector_t *, int *, int *, bool); +} #endif diff --git a/src/r_defs.h b/src/r_defs.h index a7de16c597..6f3b925c77 100644 --- a/src/r_defs.h +++ b/src/r_defs.h @@ -55,7 +55,7 @@ enum SIL_BOTH }; -extern size_t MaxDrawSegs; +namespace swrenderer { extern size_t MaxDrawSegs; } struct FDisplacement; // diff --git a/src/r_draw.cpp b/src/r_draw.cpp index 6f58ec2a3a..a2bf412e8b 100644 --- a/src/r_draw.cpp +++ b/src/r_draw.cpp @@ -1,27 +1,3 @@ -// Emacs style mode select -*- C++ -*- -//----------------------------------------------------------------------------- -// -// $Id:$ -// -// Copyright (C) 1993-1996 by id Software, Inc. -// -// This source is available for distribution and/or modification -// only under the terms of the DOOM Source Code License as -// published by id Software. All rights reserved. -// -// The source is distributed in the hope that it will be useful, -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// FITNESS FOR A PARTICULAR PURPOSE. See the DOOM Source Code License -// for more details. -// -// $Log:$ -// -// DESCRIPTION: -// The actual span/column drawing functions. -// Here find the main potential for optimization, -// e.g. inline assembly, different algorithms. -// -//----------------------------------------------------------------------------- #include @@ -38,2850 +14,1312 @@ #include "r_data/r_translate.h" #include "v_palette.h" #include "r_data/colormaps.h" +#include "r_plane.h" +#include "r_draw.h" +#include "r_draw_pal.h" +#include "r_thread.h" -#include "gi.h" -#include "stats.h" -#include "x86.h" - -#undef RANGECHECK - -// status bar height at bottom of screen -// [RH] status bar position at bottom of screen -extern int ST_Y; - -// -// All drawing to the view buffer is accomplished in this file. -// The other refresh files only know about ccordinates, -// not the architecture of the frame buffer. -// Conveniently, the frame buffer is a linear one, -// and we need only the base address, -// and the total size == width*height*depth/8., -// - -BYTE* viewimage; -extern "C" { -int ylookup[MAXHEIGHT]; -BYTE *dc_destorg; -} -int scaledviewwidth; - -// [RH] Pointers to the different column drawers. -// These get changed depending on the current -// screen depth and asm/no asm. -void (*R_DrawColumnHoriz)(void); -void (*R_DrawTranslatedColumn)(void); -void (*R_DrawShadedColumn)(void); -void (*R_DrawSpan)(void); -void (*R_DrawSpanMasked)(void); - -// -// R_DrawColumn -// Source is the top of the column to scale. -// -double dc_texturemid; -extern "C" { -int dc_pitch=0xABadCafe; // [RH] Distance between rows - -lighttable_t* dc_colormap; -int dc_x; -int dc_yl; -int dc_yh; -fixed_t dc_iscale; -fixed_t dc_texturefrac; -int dc_color; // [RH] Color for column filler -DWORD dc_srccolor; -DWORD *dc_srcblend; // [RH] Source and destination -DWORD *dc_destblend; // blending lookups - -// first pixel in a column (possibly virtual) -const BYTE* dc_source; - -BYTE* dc_dest; -int dc_count; - -DWORD vplce[4]; -DWORD vince[4]; -BYTE* palookupoffse[4]; -const BYTE* bufplce[4]; -const BYTE* bufplce2[4]; -uint32_t bufheight[4]; - -// just for profiling -int dccount; -} - -int dc_fillcolor; -BYTE *dc_translation; -BYTE shadetables[NUMCOLORMAPS*16*256]; -FDynamicColormap ShadeFakeColormap[16]; -BYTE identitymap[256]; - -EXTERN_CVAR (Int, r_columnmethod) - - -void R_InitShadeMaps() +namespace swrenderer { - int i,j; - // set up shading tables for shaded columns - // 16 colormap sets, progressing from full alpha to minimum visible alpha + // Needed by R_DrawFogBoundary (which probably shouldn't be part of this file) + extern "C" short spanend[MAXHEIGHT]; + extern float rw_light; + extern float rw_lightstep; + extern int wallshade; - BYTE *table = shadetables; + double dc_texturemid; - // Full alpha - for (i = 0; i < 16; ++i) + int ylookup[MAXHEIGHT]; + uint8_t shadetables[NUMCOLORMAPS * 16 * 256]; + FDynamicColormap ShadeFakeColormap[16]; + uint8_t identitymap[256]; + FDynamicColormap identitycolormap; + int fuzzoffset[FUZZTABLE + 1]; + int fuzzpos; + int fuzzviewheight; + + namespace drawerargs { - ShadeFakeColormap[i].Color = ~0u; - ShadeFakeColormap[i].Desaturate = ~0u; - ShadeFakeColormap[i].Next = NULL; - ShadeFakeColormap[i].Maps = table; + int dc_pitch; + lighttable_t *dc_colormap; + int dc_x; + int dc_yl; + int dc_yh; + fixed_t dc_iscale; + fixed_t dc_texturefrac; + uint32_t dc_textureheight; + int dc_color; + uint32_t dc_srccolor; + uint32_t dc_srccolor_bgra; + uint32_t *dc_srcblend; + uint32_t *dc_destblend; + fixed_t dc_srcalpha; + fixed_t dc_destalpha; + const uint8_t *dc_source; + const uint8_t *dc_source2; + uint32_t dc_texturefracx; + uint8_t *dc_translation; + uint8_t *dc_dest; + uint8_t *dc_destorg; + int dc_destheight; + int dc_count; + uint32_t vplce[4]; + uint32_t vince[4]; + uint8_t *palookupoffse[4]; + fixed_t palookuplight[4]; + const uint8_t *bufplce[4]; + const uint8_t *bufplce2[4]; + uint32_t buftexturefracx[4]; + uint32_t bufheight[4]; + int vlinebits; + int mvlinebits; + int tmvlinebits; + int ds_y; + int ds_x1; + int ds_x2; + lighttable_t * ds_colormap; + dsfixed_t ds_light; + dsfixed_t ds_xfrac; + dsfixed_t ds_yfrac; + dsfixed_t ds_xstep; + dsfixed_t ds_ystep; + int ds_xbits; + int ds_ybits; + fixed_t ds_alpha; + double ds_lod; + const uint8_t *ds_source; + int ds_color; + unsigned int dc_tspans[4][MAXHEIGHT]; + unsigned int *dc_ctspan[4]; + unsigned int *horizspan[4]; + } - for (j = 0; j < NUMCOLORMAPS; ++j) + void R_InitColumnDrawers() + { + colfunc = basecolfunc = R_DrawColumn; + fuzzcolfunc = R_DrawFuzzColumn; + transcolfunc = R_DrawTranslatedColumn; + spanfunc = R_DrawSpan; + hcolfunc_pre = R_DrawColumnHoriz; + hcolfunc_post1 = rt_map1col; + hcolfunc_post4 = rt_map4cols; + } + + void R_InitShadeMaps() + { + int i, j; + // set up shading tables for shaded columns + // 16 colormap sets, progressing from full alpha to minimum visible alpha + + uint8_t *table = shadetables; + + // Full alpha + for (i = 0; i < 16; ++i) { - int a = (NUMCOLORMAPS - j) * 256 / NUMCOLORMAPS * (16-i); - for (int k = 0; k < 256; ++k) + ShadeFakeColormap[i].Color = ~0u; + ShadeFakeColormap[i].Desaturate = ~0u; + ShadeFakeColormap[i].Next = NULL; + ShadeFakeColormap[i].Maps = table; + + for (j = 0; j < NUMCOLORMAPS; ++j) { - BYTE v = (((k+2) * a) + 256) >> 14; - table[k] = MIN (v, 64); - } - table += 256; - } - } - for (i = 0; i < NUMCOLORMAPS*16*256; ++i) - { - assert(shadetables[i] <= 64); - } - - // Set up a guaranteed identity map - for (i = 0; i < 256; ++i) - { - identitymap[i] = i; - } -} - -/************************************/ -/* */ -/* Palettized drawers (C versions) */ -/* */ -/************************************/ - -// -// A column is a vertical slice/span from a wall texture that, -// given the DOOM style restrictions on the view orientation, -// will always have constant z depth. -// Thus a special case loop for very fast rendering can -// be used. It has also been used with Wolfenstein 3D. -// -void R_DrawColumn (void) -{ - int count; - BYTE* dest; - fixed_t frac; - fixed_t fracstep; - - count = dc_count; - - // Zero length, column does not exceed a pixel. - if (count <= 0) - return; - - // Framebuffer destination address. - dest = dc_dest; - - // Determine scaling, - // which is the only mapping to be done. - fracstep = dc_iscale; - frac = dc_texturefrac; - - { - // [RH] Get local copies of these variables so that the compiler - // has a better chance of optimizing this well. - BYTE *colormap = dc_colormap; - const BYTE *source = dc_source; - int pitch = dc_pitch; - - // Inner loop that does the actual texture mapping, - // e.g. a DDA-lile scaling. - // This is as fast as it gets. - do - { - // Re-map color indices from wall texture column - // using a lighting/special effects LUT. - *dest = colormap[source[frac>>FRACBITS]]; - - dest += pitch; - frac += fracstep; - - } while (--count); - } -} - - -// [RH] Just fills a column with a color -void R_FillColumnP (void) -{ - int count; - BYTE* dest; - - count = dc_count; - - if (count <= 0) - return; - - dest = dc_dest; - - { - int pitch = dc_pitch; - BYTE color = dc_color; - - do - { - *dest = color; - dest += pitch; - } while (--count); - } -} - -void R_FillAddColumn (void) -{ - int count; - BYTE *dest; - - count = dc_count; - if (count <= 0) - return; - - dest = dc_dest; - DWORD *bg2rgb; - DWORD fg; - - bg2rgb = dc_destblend; - fg = dc_srccolor; - int pitch = dc_pitch; - - do - { - DWORD bg; - bg = (fg + bg2rgb[*dest]) | 0x1f07c1f; - *dest = RGB32k.All[bg & (bg>>15)]; - dest += pitch; - } while (--count); - -} - -void R_FillAddClampColumn (void) -{ - int count; - BYTE *dest; - - count = dc_count; - if (count <= 0) - return; - - dest = dc_dest; - DWORD *bg2rgb; - DWORD fg; - - bg2rgb = dc_destblend; - fg = dc_srccolor; - int pitch = dc_pitch; - - do - { - DWORD a = fg + bg2rgb[*dest]; - DWORD b = a; - - a |= 0x01f07c1f; - b &= 0x40100400; - a &= 0x3fffffff; - b = b - (b >> 5); - a |= b; - *dest = RGB32k.All[a & (a>>15)]; - dest += pitch; - } while (--count); - -} - -void R_FillSubClampColumn (void) -{ - int count; - BYTE *dest; - - count = dc_count; - if (count <= 0) - return; - - dest = dc_dest; - DWORD *bg2rgb; - DWORD fg; - - bg2rgb = dc_destblend; - fg = dc_srccolor | 0x40100400; - int pitch = dc_pitch; - - do - { - DWORD a = fg - bg2rgb[*dest]; - DWORD b = a; - - b &= 0x40100400; - b = b - (b >> 5); - a &= b; - a |= 0x01f07c1f; - *dest = RGB32k.All[a & (a>>15)]; - dest += pitch; - } while (--count); - -} - -void R_FillRevSubClampColumn (void) -{ - int count; - BYTE *dest; - - count = dc_count; - if (count <= 0) - return; - - dest = dc_dest; - DWORD *bg2rgb; - DWORD fg; - - bg2rgb = dc_destblend; - fg = dc_srccolor; - int pitch = dc_pitch; - - do - { - DWORD a = (bg2rgb[*dest] | 0x40100400) - fg; - DWORD b = a; - - b &= 0x40100400; - b = b - (b >> 5); - a &= b; - a |= 0x01f07c1f; - *dest = RGB32k.All[a & (a>>15)]; - dest += pitch; - } while (--count); - -} - -// -// Spectre/Invisibility. -// -#define FUZZTABLE 50 - -extern "C" -{ -int fuzzoffset[FUZZTABLE+1]; // [RH] +1 for the assembly routine -int fuzzpos = 0; -int fuzzviewheight; -} -/* - FUZZOFF,-FUZZOFF,FUZZOFF,-FUZZOFF,FUZZOFF,FUZZOFF,-FUZZOFF, - FUZZOFF,FUZZOFF,-FUZZOFF,FUZZOFF,FUZZOFF,FUZZOFF,-FUZZOFF, - FUZZOFF,FUZZOFF,FUZZOFF,-FUZZOFF,-FUZZOFF,-FUZZOFF,-FUZZOFF, - FUZZOFF,-FUZZOFF,-FUZZOFF,FUZZOFF,FUZZOFF,FUZZOFF,FUZZOFF,-FUZZOFF, - FUZZOFF,-FUZZOFF,FUZZOFF,FUZZOFF,-FUZZOFF,-FUZZOFF,FUZZOFF, - FUZZOFF,-FUZZOFF,-FUZZOFF,-FUZZOFF,-FUZZOFF,FUZZOFF,FUZZOFF, - FUZZOFF,FUZZOFF,-FUZZOFF,FUZZOFF,FUZZOFF,-FUZZOFF,FUZZOFF -*/ - -static const signed char fuzzinit[FUZZTABLE] = { - 1,-1, 1,-1, 1, 1,-1, - 1, 1,-1, 1, 1, 1,-1, - 1, 1, 1,-1,-1,-1,-1, - 1,-1,-1, 1, 1, 1, 1,-1, - 1,-1, 1, 1,-1,-1, 1, - 1,-1,-1,-1,-1, 1, 1, - 1, 1,-1, 1, 1,-1, 1 -}; - -void R_InitFuzzTable (int fuzzoff) -{ - int i; - - for (i = 0; i < FUZZTABLE; i++) - { - fuzzoffset[i] = fuzzinit[i] * fuzzoff; - } -} - -// -// Creates a fuzzy image by copying pixels from adjacent ones above and below. -// Used with an all black colormap, this could create the SHADOW effect, -// i.e. spectres and invisible players. -// -void R_DrawFuzzColumn (void) -{ - int count; - BYTE *dest; - - // Adjust borders. Low... - if (dc_yl == 0) - dc_yl = 1; - - // .. and high. - if (dc_yh > fuzzviewheight) - dc_yh = fuzzviewheight; - - count = dc_yh - dc_yl; - - // Zero length. - if (count < 0) - return; - - count++; - - dest = ylookup[dc_yl] + dc_x + dc_destorg; - - // colormap #6 is used for shading (of 0-31, a bit brighter than average) - { - // [RH] Make local copies of global vars to try and improve - // the optimizations made by the compiler. - int pitch = dc_pitch; - int fuzz = fuzzpos; - int cnt; - BYTE *map = &NormalLight.Maps[6*256]; - - // [RH] Split this into three separate loops to minimize - // the number of times fuzzpos needs to be clamped. - if (fuzz) - { - cnt = MIN(FUZZTABLE-fuzz,count); - count -= cnt; - do - { - *dest = map[dest[fuzzoffset[fuzz++]]]; - dest += pitch; - } while (--cnt); - } - if (fuzz == FUZZTABLE || count > 0) - { - while (count >= FUZZTABLE) - { - fuzz = 0; - cnt = FUZZTABLE; - count -= FUZZTABLE; - do + int a = (NUMCOLORMAPS - j) * 256 / NUMCOLORMAPS * (16 - i); + for (int k = 0; k < 256; ++k) { - *dest = map[dest[fuzzoffset[fuzz++]]]; - dest += pitch; - } while (--cnt); - } - fuzz = 0; - if (count > 0) - { - do - { - *dest = map[dest[fuzzoffset[fuzz++]]]; - dest += pitch; - } while (--count); - } - } - fuzzpos = fuzz; - } -} - -// -// R_DrawTranlucentColumn -// - -/* -[RH] This translucency algorithm is based on DOSDoom 0.65's, but uses -a 32k RGB table instead of an 8k one. At least on my machine, it's -slightly faster (probably because it uses only one shift instead of -two), and it looks considerably less green at the ends of the -translucency range. The extra size doesn't appear to be an issue. - -The following note is from DOSDoom 0.65: - -New translucency algorithm, by Erik Sandberg: - -Basically, we compute the red, green and blue values for each pixel, and -then use a RGB table to check which one of the palette colours that best -represents those RGB values. The RGB table is 8k big, with 4 R-bits, -5 G-bits and 4 B-bits. A 4k table gives a bit too bad precision, and a 32k -table takes up more memory and results in more cache misses, so an 8k -table seemed to be quite ultimate. - -The computation of the RGB for each pixel is accelerated by using two -1k tables for each translucency level. -The xth element of one of these tables contains the r, g and b values for -the colour x, weighted for the current translucency level (for example, -the weighted rgb values for background colour at 75% translucency are 1/4 -of the original rgb values). The rgb values are stored as three -low-precision fixed point values, packed into one long per colour: -Bit 0-4: Frac part of blue (5 bits) -Bit 5-8: Int part of blue (4 bits) -Bit 9-13: Frac part of red (5 bits) -Bit 14-17: Int part of red (4 bits) -Bit 18-22: Frac part of green (5 bits) -Bit 23-27: Int part of green (5 bits) -Bit 28-31: All zeros (4 bits) - -The point of this format is that the two colours now can be added, and -then be converted to a RGB table index very easily: First, we just set -all the frac bits and the four upper zero bits to 1. It's now possible -to get the RGB table index by anding the current value >> 5 with the -current value >> 19. When asm-optimised, this should be the fastest -algorithm that uses RGB tables. - -*/ - -void R_DrawAddColumnP_C (void) -{ - int count; - BYTE *dest; - fixed_t frac; - fixed_t fracstep; - - count = dc_count; - if (count <= 0) - return; - - dest = dc_dest; - - fracstep = dc_iscale; - frac = dc_texturefrac; - - { - DWORD *fg2rgb = dc_srcblend; - DWORD *bg2rgb = dc_destblend; - BYTE *colormap = dc_colormap; - const BYTE *source = dc_source; - int pitch = dc_pitch; - - do - { - DWORD fg = colormap[source[frac>>FRACBITS]]; - DWORD bg = *dest; - - fg = fg2rgb[fg]; - bg = bg2rgb[bg]; - fg = (fg+bg) | 0x1f07c1f; - *dest = RGB32k.All[fg & (fg>>15)]; - dest += pitch; - frac += fracstep; - } while (--count); - } -} - -// -// R_DrawTranslatedColumn -// Used to draw player sprites with the green colorramp mapped to others. -// Could be used with different translation tables, e.g. the lighter colored -// version of the BaronOfHell, the HellKnight, uses identical sprites, kinda -// brightened up. -// - -void R_DrawTranslatedColumnP_C (void) -{ - int count; - BYTE* dest; - fixed_t frac; - fixed_t fracstep; - - count = dc_count; - if (count <= 0) - return; - - dest = dc_dest; - - fracstep = dc_iscale; - frac = dc_texturefrac; - - { - // [RH] Local copies of global vars to improve compiler optimizations - BYTE *colormap = dc_colormap; - BYTE *translation = dc_translation; - const BYTE *source = dc_source; - int pitch = dc_pitch; - - do - { - *dest = colormap[translation[source[frac>>FRACBITS]]]; - dest += pitch; - - frac += fracstep; - } while (--count); - } -} - -// Draw a column that is both translated and translucent -void R_DrawTlatedAddColumnP_C (void) -{ - int count; - BYTE *dest; - fixed_t frac; - fixed_t fracstep; - - count = dc_count; - if (count <= 0) - return; - - dest = dc_dest; - - fracstep = dc_iscale; - frac = dc_texturefrac; - - { - DWORD *fg2rgb = dc_srcblend; - DWORD *bg2rgb = dc_destblend; - BYTE *translation = dc_translation; - BYTE *colormap = dc_colormap; - const BYTE *source = dc_source; - int pitch = dc_pitch; - - do - { - DWORD fg = colormap[translation[source[frac>>FRACBITS]]]; - DWORD bg = *dest; - - fg = fg2rgb[fg]; - bg = bg2rgb[bg]; - fg = (fg+bg) | 0x1f07c1f; - *dest = RGB32k.All[fg & (fg>>15)]; - dest += pitch; - frac += fracstep; - } while (--count); - } -} - -// Draw a column whose "color" values are actually translucency -// levels for a base color stored in dc_color. -void R_DrawShadedColumnP_C (void) -{ - int count; - BYTE *dest; - fixed_t frac, fracstep; - - count = dc_count; - - if (count <= 0) - return; - - dest = dc_dest; - - fracstep = dc_iscale; - frac = dc_texturefrac; - - { - const BYTE *source = dc_source; - BYTE *colormap = dc_colormap; - int pitch = dc_pitch; - DWORD *fgstart = &Col2RGB8[0][dc_color]; - - do - { - DWORD val = colormap[source[frac>>FRACBITS]]; - DWORD fg = fgstart[val<<8]; - val = (Col2RGB8[64-val][*dest] + fg) | 0x1f07c1f; - *dest = RGB32k.All[val & (val>>15)]; - - dest += pitch; - frac += fracstep; - } while (--count); - } -} - -// Add source to destination, clamping it to white -void R_DrawAddClampColumnP_C () -{ - int count; - BYTE *dest; - fixed_t frac; - fixed_t fracstep; - - count = dc_count; - if (count <= 0) - return; - - dest = dc_dest; - - fracstep = dc_iscale; - frac = dc_texturefrac; - - { - BYTE *colormap = dc_colormap; - const BYTE *source = dc_source; - int pitch = dc_pitch; - DWORD *fg2rgb = dc_srcblend; - DWORD *bg2rgb = dc_destblend; - - do - { - DWORD a = fg2rgb[colormap[source[frac>>FRACBITS]]] + bg2rgb[*dest]; - DWORD b = a; - - a |= 0x01f07c1f; - b &= 0x40100400; - a &= 0x3fffffff; - b = b - (b >> 5); - a |= b; - *dest = RGB32k.All[a & (a>>15)]; - dest += pitch; - frac += fracstep; - } while (--count); - } -} - -// Add translated source to destination, clamping it to white -void R_DrawAddClampTranslatedColumnP_C () -{ - int count; - BYTE *dest; - fixed_t frac; - fixed_t fracstep; - - count = dc_count; - if (count <= 0) - return; - - dest = dc_dest; - - fracstep = dc_iscale; - frac = dc_texturefrac; - - { - BYTE *translation = dc_translation; - BYTE *colormap = dc_colormap; - const BYTE *source = dc_source; - int pitch = dc_pitch; - DWORD *fg2rgb = dc_srcblend; - DWORD *bg2rgb = dc_destblend; - - do - { - DWORD a = fg2rgb[colormap[translation[source[frac>>FRACBITS]]]] + bg2rgb[*dest]; - DWORD b = a; - - a |= 0x01f07c1f; - b &= 0x40100400; - a &= 0x3fffffff; - b = b - (b >> 5); - a |= b; - *dest = RGB32k.All[(a>>15) & a]; - dest += pitch; - frac += fracstep; - } while (--count); - } -} - -// Subtract destination from source, clamping it to black -void R_DrawSubClampColumnP_C () -{ - int count; - BYTE *dest; - fixed_t frac; - fixed_t fracstep; - - count = dc_count; - if (count <= 0) - return; - - dest = dc_dest; - - fracstep = dc_iscale; - frac = dc_texturefrac; - - { - BYTE *colormap = dc_colormap; - const BYTE *source = dc_source; - int pitch = dc_pitch; - DWORD *fg2rgb = dc_srcblend; - DWORD *bg2rgb = dc_destblend; - - do - { - DWORD a = (fg2rgb[colormap[source[frac>>FRACBITS]]] | 0x40100400) - bg2rgb[*dest]; - DWORD b = a; - - b &= 0x40100400; - b = b - (b >> 5); - a &= b; - a |= 0x01f07c1f; - *dest = RGB32k.All[a & (a>>15)]; - dest += pitch; - frac += fracstep; - } while (--count); - } -} - -// Subtract destination from source, clamping it to black -void R_DrawSubClampTranslatedColumnP_C () -{ - int count; - BYTE *dest; - fixed_t frac; - fixed_t fracstep; - - count = dc_count; - if (count <= 0) - return; - - dest = dc_dest; - - fracstep = dc_iscale; - frac = dc_texturefrac; - - { - BYTE *translation = dc_translation; - BYTE *colormap = dc_colormap; - const BYTE *source = dc_source; - int pitch = dc_pitch; - DWORD *fg2rgb = dc_srcblend; - DWORD *bg2rgb = dc_destblend; - - do - { - DWORD a = (fg2rgb[colormap[translation[source[frac>>FRACBITS]]]] | 0x40100400) - bg2rgb[*dest]; - DWORD b = a; - - b &= 0x40100400; - b = b - (b >> 5); - a &= b; - a |= 0x01f07c1f; - *dest = RGB32k.All[(a>>15) & a]; - dest += pitch; - frac += fracstep; - } while (--count); - } -} - -// Subtract source from destination, clamping it to black -void R_DrawRevSubClampColumnP_C () -{ - int count; - BYTE *dest; - fixed_t frac; - fixed_t fracstep; - - count = dc_count; - if (count <= 0) - return; - - dest = dc_dest; - - fracstep = dc_iscale; - frac = dc_texturefrac; - - { - BYTE *colormap = dc_colormap; - const BYTE *source = dc_source; - int pitch = dc_pitch; - DWORD *fg2rgb = dc_srcblend; - DWORD *bg2rgb = dc_destblend; - - do - { - DWORD a = (bg2rgb[*dest] | 0x40100400) - fg2rgb[colormap[source[frac>>FRACBITS]]]; - DWORD b = a; - - b &= 0x40100400; - b = b - (b >> 5); - a &= b; - a |= 0x01f07c1f; - *dest = RGB32k.All[a & (a>>15)]; - dest += pitch; - frac += fracstep; - } while (--count); - } -} - -// Subtract source from destination, clamping it to black -void R_DrawRevSubClampTranslatedColumnP_C () -{ - int count; - BYTE *dest; - fixed_t frac; - fixed_t fracstep; - - count = dc_count; - if (count <= 0) - return; - - dest = dc_dest; - - fracstep = dc_iscale; - frac = dc_texturefrac; - - { - BYTE *translation = dc_translation; - BYTE *colormap = dc_colormap; - const BYTE *source = dc_source; - int pitch = dc_pitch; - DWORD *fg2rgb = dc_srcblend; - DWORD *bg2rgb = dc_destblend; - - do - { - DWORD a = (bg2rgb[*dest] | 0x40100400) - fg2rgb[colormap[translation[source[frac>>FRACBITS]]]]; - DWORD b = a; - - b &= 0x40100400; - b = b - (b >> 5); - a &= b; - a |= 0x01f07c1f; - *dest = RGB32k.All[(a>>15) & a]; - dest += pitch; - frac += fracstep; - } while (--count); - } -} - - - -// -// R_DrawSpan -// With DOOM style restrictions on view orientation, -// the floors and ceilings consist of horizontal slices -// or spans with constant z depth. -// However, rotation around the world z axis is possible, -// thus this mapping, while simpler and faster than -// perspective correct texture mapping, has to traverse -// the texture at an angle in all but a few cases. -// In consequence, flats are not stored by column (like walls), -// and the inner loop has to step in texture space u and v. -// -// [RH] I'm not sure who wrote this, but floor/ceiling mapping -// *is* perspective correct for spans of constant z depth, which -// Doom guarantees because it does not let you change your pitch. -// Also, because of the new texture system, flats *are* stored by -// column to make it easy to use them on walls too. To accomodate -// this, the use of x/u and y/v in R_DrawSpan just needs to be -// swapped. -// -extern "C" { -int ds_color; // [RH] color for non-textured spans - -int ds_y; -int ds_x1; -int ds_x2; - -lighttable_t* ds_colormap; - -dsfixed_t ds_xfrac; -dsfixed_t ds_yfrac; -dsfixed_t ds_xstep; -dsfixed_t ds_ystep; -int ds_xbits; -int ds_ybits; - -// start of a floor/ceiling tile image -const BYTE* ds_source; - -// just for profiling -int dscount; - -#ifdef X86_ASM -extern "C" void R_SetSpanSource_ASM (const BYTE *flat); -extern "C" void R_SetSpanSize_ASM (int xbits, int ybits); -extern "C" void R_SetSpanColormap_ASM (BYTE *colormap); -extern "C" BYTE *ds_curcolormap, *ds_cursource, *ds_curtiltedsource; -#endif -} - -//========================================================================== -// -// R_SetSpanSource -// -// Sets the source bitmap for the span drawing routines. -// -//========================================================================== - -void R_SetSpanSource(const BYTE *pixels) -{ - ds_source = pixels; -#ifdef X86_ASM - if (ds_cursource != ds_source) - { - R_SetSpanSource_ASM(pixels); - } -#endif -} - -//========================================================================== -// -// R_SetSpanColormap -// -// Sets the colormap for the span drawing routines. -// -//========================================================================== - -void R_SetSpanColormap(BYTE *colormap) -{ - ds_colormap = colormap; -#ifdef X86_ASM - if (ds_colormap != ds_curcolormap) - { - R_SetSpanColormap_ASM (ds_colormap); - } -#endif -} - -//========================================================================== -// -// R_SetupSpanBits -// -// Sets the texture size for the span drawing routines. -// -//========================================================================== - -void R_SetupSpanBits(FTexture *tex) -{ - tex->GetWidth (); - ds_xbits = tex->WidthBits; - ds_ybits = tex->HeightBits; - if ((1 << ds_xbits) > tex->GetWidth()) - { - ds_xbits--; - } - if ((1 << ds_ybits) > tex->GetHeight()) - { - ds_ybits--; - } -#ifdef X86_ASM - R_SetSpanSize_ASM (ds_xbits, ds_ybits); -#endif -} - -// -// Draws the actual span. -//#ifndef X86_ASM -void R_DrawSpanP_C (void) -{ - dsfixed_t xfrac; - dsfixed_t yfrac; - dsfixed_t xstep; - dsfixed_t ystep; - BYTE* dest; - const BYTE* source = ds_source; - const BYTE* colormap = ds_colormap; - int count; - int spot; - -#ifdef RANGECHECK - if (ds_x2 < ds_x1 || ds_x1 < 0 - || ds_x2 >= screen->width || ds_y > screen->height) - { - I_Error ("R_DrawSpan: %i to %i at %i", ds_x1, ds_x2, ds_y); - } -// dscount++; -#endif - - xfrac = ds_xfrac; - yfrac = ds_yfrac; - - dest = ylookup[ds_y] + ds_x1 + dc_destorg; - - count = ds_x2 - ds_x1 + 1; - - xstep = ds_xstep; - ystep = ds_ystep; - - if (ds_xbits == 6 && ds_ybits == 6) - { - // 64x64 is the most common case by far, so special case it. - do - { - // Current texture index in u,v. - spot = ((xfrac>>(32-6-6))&(63*64)) + (yfrac>>(32-6)); - - // Lookup pixel from flat texture tile, - // re-index using light/colormap. - *dest++ = colormap[source[spot]]; - - // Next step in u,v. - xfrac += xstep; - yfrac += ystep; - } while (--count); - } - else - { - BYTE yshift = 32 - ds_ybits; - BYTE xshift = yshift - ds_xbits; - int xmask = ((1 << ds_xbits) - 1) << ds_ybits; - - do - { - // Current texture index in u,v. - spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); - - // Lookup pixel from flat texture tile, - // re-index using light/colormap. - *dest++ = colormap[source[spot]]; - - // Next step in u,v. - xfrac += xstep; - yfrac += ystep; - } while (--count); - } -} - -// [RH] Draw a span with holes -void R_DrawSpanMaskedP_C (void) -{ - dsfixed_t xfrac; - dsfixed_t yfrac; - dsfixed_t xstep; - dsfixed_t ystep; - BYTE* dest; - const BYTE* source = ds_source; - const BYTE* colormap = ds_colormap; - int count; - int spot; - - xfrac = ds_xfrac; - yfrac = ds_yfrac; - - dest = ylookup[ds_y] + ds_x1 + dc_destorg; - - count = ds_x2 - ds_x1 + 1; - - xstep = ds_xstep; - ystep = ds_ystep; - - if (ds_xbits == 6 && ds_ybits == 6) - { - // 64x64 is the most common case by far, so special case it. - do - { - int texdata; - - spot = ((xfrac>>(32-6-6))&(63*64)) + (yfrac>>(32-6)); - texdata = source[spot]; - if (texdata != 0) - { - *dest = colormap[texdata]; - } - dest++; - xfrac += xstep; - yfrac += ystep; - } while (--count); - } - else - { - BYTE yshift = 32 - ds_ybits; - BYTE xshift = yshift - ds_xbits; - int xmask = ((1 << ds_xbits) - 1) << ds_ybits; - do - { - int texdata; - - spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); - texdata = source[spot]; - if (texdata != 0) - { - *dest = colormap[texdata]; - } - dest++; - xfrac += xstep; - yfrac += ystep; - } while (--count); - } -} -//#endif - -void R_DrawSpanTranslucent (void) -{ - dsfixed_t xfrac; - dsfixed_t yfrac; - dsfixed_t xstep; - dsfixed_t ystep; - BYTE* dest; - const BYTE* source = ds_source; - const BYTE* colormap = ds_colormap; - int count; - int spot; - DWORD *fg2rgb = dc_srcblend; - DWORD *bg2rgb = dc_destblend; - - xfrac = ds_xfrac; - yfrac = ds_yfrac; - - dest = ylookup[ds_y] + ds_x1 + dc_destorg; - - count = ds_x2 - ds_x1 + 1; - - xstep = ds_xstep; - ystep = ds_ystep; - - if (ds_xbits == 6 && ds_ybits == 6) - { - // 64x64 is the most common case by far, so special case it. - do - { - spot = ((xfrac>>(32-6-6))&(63*64)) + (yfrac>>(32-6)); - DWORD fg = colormap[source[spot]]; - DWORD bg = *dest; - fg = fg2rgb[fg]; - bg = bg2rgb[bg]; - fg = (fg+bg) | 0x1f07c1f; - *dest++ = RGB32k.All[fg & (fg>>15)]; - xfrac += xstep; - yfrac += ystep; - } while (--count); - } - else - { - BYTE yshift = 32 - ds_ybits; - BYTE xshift = yshift - ds_xbits; - int xmask = ((1 << ds_xbits) - 1) << ds_ybits; - do - { - spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); - DWORD fg = colormap[source[spot]]; - DWORD bg = *dest; - fg = fg2rgb[fg]; - bg = bg2rgb[bg]; - fg = (fg+bg) | 0x1f07c1f; - *dest++ = RGB32k.All[fg & (fg>>15)]; - xfrac += xstep; - yfrac += ystep; - } while (--count); - } -} - -void R_DrawSpanMaskedTranslucent (void) -{ - dsfixed_t xfrac; - dsfixed_t yfrac; - dsfixed_t xstep; - dsfixed_t ystep; - BYTE* dest; - const BYTE* source = ds_source; - const BYTE* colormap = ds_colormap; - int count; - int spot; - DWORD *fg2rgb = dc_srcblend; - DWORD *bg2rgb = dc_destblend; - - xfrac = ds_xfrac; - yfrac = ds_yfrac; - - dest = ylookup[ds_y] + ds_x1 + dc_destorg; - - count = ds_x2 - ds_x1 + 1; - - xstep = ds_xstep; - ystep = ds_ystep; - - if (ds_xbits == 6 && ds_ybits == 6) - { - // 64x64 is the most common case by far, so special case it. - do - { - BYTE texdata; - - spot = ((xfrac>>(32-6-6))&(63*64)) + (yfrac>>(32-6)); - texdata = source[spot]; - if (texdata != 0) - { - DWORD fg = colormap[texdata]; - DWORD bg = *dest; - fg = fg2rgb[fg]; - bg = bg2rgb[bg]; - fg = (fg+bg) | 0x1f07c1f; - *dest = RGB32k.All[fg & (fg>>15)]; - } - dest++; - xfrac += xstep; - yfrac += ystep; - } while (--count); - } - else - { - BYTE yshift = 32 - ds_ybits; - BYTE xshift = yshift - ds_xbits; - int xmask = ((1 << ds_xbits) - 1) << ds_ybits; - do - { - BYTE texdata; - - spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); - texdata = source[spot]; - if (texdata != 0) - { - DWORD fg = colormap[texdata]; - DWORD bg = *dest; - fg = fg2rgb[fg]; - bg = bg2rgb[bg]; - fg = (fg+bg) | 0x1f07c1f; - *dest = RGB32k.All[fg & (fg>>15)]; - } - dest++; - xfrac += xstep; - yfrac += ystep; - } while (--count); - } -} - -void R_DrawSpanAddClamp (void) -{ - dsfixed_t xfrac; - dsfixed_t yfrac; - dsfixed_t xstep; - dsfixed_t ystep; - BYTE* dest; - const BYTE* source = ds_source; - const BYTE* colormap = ds_colormap; - int count; - int spot; - DWORD *fg2rgb = dc_srcblend; - DWORD *bg2rgb = dc_destblend; - - xfrac = ds_xfrac; - yfrac = ds_yfrac; - - dest = ylookup[ds_y] + ds_x1 + dc_destorg; - - count = ds_x2 - ds_x1 + 1; - - xstep = ds_xstep; - ystep = ds_ystep; - - if (ds_xbits == 6 && ds_ybits == 6) - { - // 64x64 is the most common case by far, so special case it. - do - { - spot = ((xfrac>>(32-6-6))&(63*64)) + (yfrac>>(32-6)); - DWORD a = fg2rgb[colormap[source[spot]]] + bg2rgb[*dest]; - DWORD b = a; - - a |= 0x01f07c1f; - b &= 0x40100400; - a &= 0x3fffffff; - b = b - (b >> 5); - a |= b; - *dest++ = RGB32k.All[a & (a>>15)]; - xfrac += xstep; - yfrac += ystep; - } while (--count); - } - else - { - BYTE yshift = 32 - ds_ybits; - BYTE xshift = yshift - ds_xbits; - int xmask = ((1 << ds_xbits) - 1) << ds_ybits; - do - { - spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); - DWORD a = fg2rgb[colormap[source[spot]]] + bg2rgb[*dest]; - DWORD b = a; - - a |= 0x01f07c1f; - b &= 0x40100400; - a &= 0x3fffffff; - b = b - (b >> 5); - a |= b; - *dest++ = RGB32k.All[a & (a>>15)]; - xfrac += xstep; - yfrac += ystep; - } while (--count); - } -} - -void R_DrawSpanMaskedAddClamp (void) -{ - dsfixed_t xfrac; - dsfixed_t yfrac; - dsfixed_t xstep; - dsfixed_t ystep; - BYTE* dest; - const BYTE* source = ds_source; - const BYTE* colormap = ds_colormap; - int count; - int spot; - DWORD *fg2rgb = dc_srcblend; - DWORD *bg2rgb = dc_destblend; - - xfrac = ds_xfrac; - yfrac = ds_yfrac; - - dest = ylookup[ds_y] + ds_x1 + dc_destorg; - - count = ds_x2 - ds_x1 + 1; - - xstep = ds_xstep; - ystep = ds_ystep; - - if (ds_xbits == 6 && ds_ybits == 6) - { - // 64x64 is the most common case by far, so special case it. - do - { - BYTE texdata; - - spot = ((xfrac>>(32-6-6))&(63*64)) + (yfrac>>(32-6)); - texdata = source[spot]; - if (texdata != 0) - { - DWORD a = fg2rgb[colormap[texdata]] + bg2rgb[*dest]; - DWORD b = a; - - a |= 0x01f07c1f; - b &= 0x40100400; - a &= 0x3fffffff; - b = b - (b >> 5); - a |= b; - *dest = RGB32k.All[a & (a>>15)]; - } - dest++; - xfrac += xstep; - yfrac += ystep; - } while (--count); - } - else - { - BYTE yshift = 32 - ds_ybits; - BYTE xshift = yshift - ds_xbits; - int xmask = ((1 << ds_xbits) - 1) << ds_ybits; - do - { - BYTE texdata; - - spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); - texdata = source[spot]; - if (texdata != 0) - { - DWORD a = fg2rgb[colormap[texdata]] + bg2rgb[*dest]; - DWORD b = a; - - a |= 0x01f07c1f; - b &= 0x40100400; - a &= 0x3fffffff; - b = b - (b >> 5); - a |= b; - *dest = RGB32k.All[a & (a>>15)]; - } - dest++; - xfrac += xstep; - yfrac += ystep; - } while (--count); - } -} - -// [RH] Just fill a span with a color -void R_FillSpan (void) -{ - memset (ylookup[ds_y] + ds_x1 + dc_destorg, ds_color, ds_x2 - ds_x1 + 1); -} - -// Draw a voxel slab -// -// "Build Engine & Tools" Copyright (c) 1993-1997 Ken Silverman -// Ken Silverman's official web site: "http://www.advsys.net/ken" -// See the included license file "BUILDLIC.TXT" for license info. - -// Actually, this is just R_DrawColumn with an extra width parameter. - -#ifndef X86_ASM -static const BYTE *slabcolormap; - -extern "C" void R_SetupDrawSlabC(const BYTE *colormap) -{ - slabcolormap = colormap; -} - -extern "C" void R_DrawSlabC(int dx, fixed_t v, int dy, fixed_t vi, const BYTE *vptr, BYTE *p) -{ - int x; - const BYTE *colormap = slabcolormap; - int pitch = dc_pitch; - - assert(dx > 0); - - if (dx == 1) - { - while (dy > 0) - { - *p = colormap[vptr[v >> FRACBITS]]; - p += pitch; - v += vi; - dy--; - } - } - else if (dx == 2) - { - while (dy > 0) - { - BYTE color = colormap[vptr[v >> FRACBITS]]; - p[0] = color; - p[1] = color; - p += pitch; - v += vi; - dy--; - } - } - else if (dx == 3) - { - while (dy > 0) - { - BYTE color = colormap[vptr[v >> FRACBITS]]; - p[0] = color; - p[1] = color; - p[2] = color; - p += pitch; - v += vi; - dy--; - } - } - else if (dx == 4) - { - while (dy > 0) - { - BYTE color = colormap[vptr[v >> FRACBITS]]; - p[0] = color; - p[1] = color; - p[2] = color; - p[3] = color; - p += pitch; - v += vi; - dy--; - } - } - else while (dy > 0) - { - BYTE color = colormap[vptr[v >> FRACBITS]]; - // The optimizer will probably turn this into a memset call. - // Since dx is not likely to be large, I'm not sure that's a good thing, - // hence the alternatives above. - for (x = 0; x < dx; x++) - { - p[x] = color; - } - p += pitch; - v += vi; - dy--; - } -} -#endif - - -/****************************************************/ -/****************************************************/ - -// wallscan stuff, in C - -#ifndef X86_ASM -static DWORD vlinec1 (); -static int vlinebits; - -DWORD (*dovline1)() = vlinec1; -DWORD (*doprevline1)() = vlinec1; - -#ifdef X64_ASM -extern "C" void vlinetallasm4(); -#define dovline4 vlinetallasm4 -extern "C" void setupvlinetallasm (int); -#else -static void vlinec4 (); -void (*dovline4)() = vlinec4; -#endif - -static DWORD mvlinec1(); -static void mvlinec4(); -static int mvlinebits; - -DWORD (*domvline1)() = mvlinec1; -void (*domvline4)() = mvlinec4; - -#else - -extern "C" -{ -DWORD vlineasm1 (); -DWORD prevlineasm1 (); -DWORD vlinetallasm1 (); -DWORD prevlinetallasm1 (); -void vlineasm4 (); -void vlinetallasmathlon4 (); -void vlinetallasm4 (); -void setupvlineasm (int); -void setupvlinetallasm (int); - -DWORD mvlineasm1(); -void mvlineasm4(); -void setupmvlineasm (int); -} - -DWORD (*dovline1)() = vlinetallasm1; -DWORD (*doprevline1)() = prevlinetallasm1; -void (*dovline4)() = vlinetallasm4; - -DWORD (*domvline1)() = mvlineasm1; -void (*domvline4)() = mvlineasm4; -#endif - -void setupvline (int fracbits) -{ -#ifdef X86_ASM - if (CPU.Family <= 5) - { - if (fracbits >= 24) - { - setupvlineasm (fracbits); - dovline4 = vlineasm4; - dovline1 = vlineasm1; - doprevline1 = prevlineasm1; - } - else - { - setupvlinetallasm (fracbits); - dovline1 = vlinetallasm1; - doprevline1 = prevlinetallasm1; - dovline4 = vlinetallasm4; - } - } - else - { - setupvlinetallasm (fracbits); - if (CPU.bIsAMD && CPU.AMDFamily >= 7) - { - dovline4 = vlinetallasmathlon4; - } - } -#else - vlinebits = fracbits; -#ifdef X64_ASM - setupvlinetallasm(fracbits); -#endif -#endif -} - -#if !defined(X86_ASM) -DWORD vlinec1 () -{ - DWORD fracstep = dc_iscale; - DWORD frac = dc_texturefrac; - BYTE *colormap = dc_colormap; - int count = dc_count; - const BYTE *source = dc_source; - BYTE *dest = dc_dest; - int bits = vlinebits; - int pitch = dc_pitch; - - do - { - *dest = colormap[source[frac>>bits]]; - frac += fracstep; - dest += pitch; - } while (--count); - - return frac; -} - -#ifndef _M_X64 -void vlinec4 () -{ - BYTE *dest = dc_dest; - int count = dc_count; - int bits = vlinebits; - DWORD place; - - do - { - dest[0] = palookupoffse[0][bufplce[0][(place=vplce[0])>>bits]]; vplce[0] = place+vince[0]; - dest[1] = palookupoffse[1][bufplce[1][(place=vplce[1])>>bits]]; vplce[1] = place+vince[1]; - dest[2] = palookupoffse[2][bufplce[2][(place=vplce[2])>>bits]]; vplce[2] = place+vince[2]; - dest[3] = palookupoffse[3][bufplce[3][(place=vplce[3])>>bits]]; vplce[3] = place+vince[3]; - dest += dc_pitch; - } while (--count); -} -#else -// Optimized version for 64 bit. In 64 bit mode, accessing global variables is very expensive so even though -// this exceeds the register count, loading all those values into a local variable is faster than not loading all of them. -void vlinec4() -{ - BYTE *dest = dc_dest; - int count = dc_count; - int bits = vlinebits; - DWORD place; - auto pal0 = palookupoffse[0]; - auto pal1 = palookupoffse[1]; - auto pal2 = palookupoffse[2]; - auto pal3 = palookupoffse[3]; - auto buf0 = bufplce[0]; - auto buf1 = bufplce[1]; - auto buf2 = bufplce[2]; - auto buf3 = bufplce[3]; - const auto vince0 = vince[0]; - const auto vince1 = vince[1]; - const auto vince2 = vince[2]; - const auto vince3 = vince[3]; - auto vplce0 = vplce[0]; - auto vplce1 = vplce[1]; - auto vplce2 = vplce[2]; - auto vplce3 = vplce[3]; - - do - { - dest[0] = pal0[buf0[(place = vplce0) >> bits]]; vplce0 = place + vince0; - dest[1] = pal1[buf1[(place = vplce1) >> bits]]; vplce1 = place + vince1; - dest[2] = pal2[buf2[(place = vplce2) >> bits]]; vplce2 = place + vince2; - dest[3] = pal3[buf3[(place = vplce3) >> bits]]; vplce3 = place + vince3; - dest += dc_pitch; - } while (--count); -} -#endif - -#endif - -void setupmvline (int fracbits) -{ -#if defined(X86_ASM) - setupmvlineasm (fracbits); - domvline1 = mvlineasm1; - domvline4 = mvlineasm4; -#else - mvlinebits = fracbits; -#endif -} - -#if !defined(X86_ASM) -DWORD mvlinec1 () -{ - DWORD fracstep = dc_iscale; - DWORD frac = dc_texturefrac; - BYTE *colormap = dc_colormap; - int count = dc_count; - const BYTE *source = dc_source; - BYTE *dest = dc_dest; - int bits = mvlinebits; - int pitch = dc_pitch; - - do - { - BYTE pix = source[frac>>bits]; - if (pix != 0) - { - *dest = colormap[pix]; - } - frac += fracstep; - dest += pitch; - } while (--count); - - return frac; -} - -void mvlinec4 () -{ - BYTE *dest = dc_dest; - int count = dc_count; - int bits = mvlinebits; - DWORD place; - - do - { - BYTE pix; - - pix = bufplce[0][(place=vplce[0])>>bits]; if(pix) dest[0] = palookupoffse[0][pix]; vplce[0] = place+vince[0]; - pix = bufplce[1][(place=vplce[1])>>bits]; if(pix) dest[1] = palookupoffse[1][pix]; vplce[1] = place+vince[1]; - pix = bufplce[2][(place=vplce[2])>>bits]; if(pix) dest[2] = palookupoffse[2][pix]; vplce[2] = place+vince[2]; - pix = bufplce[3][(place=vplce[3])>>bits]; if(pix) dest[3] = palookupoffse[3][pix]; vplce[3] = place+vince[3]; - dest += dc_pitch; - } while (--count); -} -#endif - -extern "C" short spanend[MAXHEIGHT]; -extern float rw_light; -extern float rw_lightstep; -extern int wallshade; - -static void R_DrawFogBoundarySection (int y, int y2, int x1) -{ - BYTE *colormap = dc_colormap; - BYTE *dest = ylookup[y] + dc_destorg; - - for (; y < y2; ++y) - { - int x2 = spanend[y]; - int x = x1; - do - { - dest[x] = colormap[dest[x]]; - } while (++x <= x2); - dest += dc_pitch; - } -} - -static void R_DrawFogBoundaryLine (int y, int x) -{ - int x2 = spanend[y]; - BYTE *colormap = dc_colormap; - BYTE *dest = ylookup[y] + dc_destorg; - do - { - dest[x] = colormap[dest[x]]; - } while (++x <= x2); -} - -void R_DrawFogBoundary (int x1, int x2, short *uclip, short *dclip) -{ - // This is essentially the same as R_MapVisPlane but with an extra step - // to create new horizontal spans whenever the light changes enough that - // we need to use a new colormap. - - double lightstep = rw_lightstep; - double light = rw_light + rw_lightstep*(x2-x1-1); - int x = x2-1; - int t2 = uclip[x]; - int b2 = dclip[x]; - int rcolormap = GETPALOOKUP(light, wallshade); - int lcolormap; - BYTE *basecolormapdata = basecolormap->Maps; - - if (b2 > t2) - { - clearbufshort (spanend+t2, b2-t2, x); - } - - dc_colormap = basecolormapdata + (rcolormap << COLORMAPSHIFT); - - for (--x; x >= x1; --x) - { - int t1 = uclip[x]; - int b1 = dclip[x]; - const int xr = x+1; - int stop; - - light -= rw_lightstep; - lcolormap = GETPALOOKUP(light, wallshade); - if (lcolormap != rcolormap) - { - if (t2 < b2 && rcolormap != 0) - { // Colormap 0 is always the identity map, so rendering it is - // just a waste of time. - R_DrawFogBoundarySection (t2, b2, xr); - } - if (t1 < t2) t2 = t1; - if (b1 > b2) b2 = b1; - if (t2 < b2) - { - clearbufshort (spanend+t2, b2-t2, x); - } - rcolormap = lcolormap; - dc_colormap = basecolormapdata + (lcolormap << COLORMAPSHIFT); - } - else - { - if (dc_colormap != basecolormapdata) - { - stop = MIN (t1, b2); - while (t2 < stop) - { - R_DrawFogBoundaryLine (t2++, xr); + uint8_t v = (((k + 2) * a) + 256) >> 14; + table[k] = MIN(v, 64); } - stop = MAX (b1, t2); - while (b2 > stop) + table += 256; + } + } + for (i = 0; i < NUMCOLORMAPS * 16 * 256; ++i) + { + assert(shadetables[i] <= 64); + } + + // Set up a guaranteed identity map + for (i = 0; i < 256; ++i) + { + identitymap[i] = i; + } + } + + void R_InitFuzzTable(int fuzzoff) + { + /* + FUZZOFF,-FUZZOFF,FUZZOFF,-FUZZOFF,FUZZOFF,FUZZOFF,-FUZZOFF, + FUZZOFF,FUZZOFF,-FUZZOFF,FUZZOFF,FUZZOFF,FUZZOFF,-FUZZOFF, + FUZZOFF,FUZZOFF,FUZZOFF,-FUZZOFF,-FUZZOFF,-FUZZOFF,-FUZZOFF, + FUZZOFF,-FUZZOFF,-FUZZOFF,FUZZOFF,FUZZOFF,FUZZOFF,FUZZOFF,-FUZZOFF, + FUZZOFF,-FUZZOFF,FUZZOFF,FUZZOFF,-FUZZOFF,-FUZZOFF,FUZZOFF, + FUZZOFF,-FUZZOFF,-FUZZOFF,-FUZZOFF,-FUZZOFF,FUZZOFF,FUZZOFF, + FUZZOFF,FUZZOFF,-FUZZOFF,FUZZOFF,FUZZOFF,-FUZZOFF,FUZZOFF + */ + + static const int8_t fuzzinit[FUZZTABLE] = { + 1,-1, 1,-1, 1, 1,-1, + 1, 1,-1, 1, 1, 1,-1, + 1, 1, 1,-1,-1,-1,-1, + 1,-1,-1, 1, 1, 1, 1,-1, + 1,-1, 1, 1,-1,-1, 1, + 1,-1,-1,-1,-1, 1, 1, + 1, 1,-1, 1, 1,-1, 1 + }; + + for (int i = 0; i < FUZZTABLE; i++) + { + fuzzoffset[i] = fuzzinit[i] * fuzzoff; + } + } + + namespace + { + bool R_SetBlendFunc(int op, fixed_t fglevel, fixed_t bglevel, int flags) + { + using namespace drawerargs; + + // r_drawtrans is a seriously bad thing to turn off. I wonder if I should + // just remove it completely. + if (!r_drawtrans || (op == STYLEOP_Add && fglevel == FRACUNIT && bglevel == 0 && !(flags & STYLEF_InvertSource))) + { + if (flags & STYLEF_ColorIsFixed) { - R_DrawFogBoundaryLine (--b2, xr); + colfunc = R_FillColumn; + hcolfunc_post1 = rt_copy1col; + hcolfunc_post4 = rt_copy4cols; + } + else if (dc_translation == NULL) + { + colfunc = basecolfunc; + hcolfunc_post1 = rt_map1col; + hcolfunc_post4 = rt_map4cols; + } + else + { + colfunc = transcolfunc; + hcolfunc_post1 = rt_tlate1col; + hcolfunc_post4 = rt_tlate4cols; + } + return true; + } + if (flags & STYLEF_InvertSource) + { + dc_srcblend = Col2RGB8_Inverse[fglevel >> 10]; + dc_destblend = Col2RGB8_LessPrecision[bglevel >> 10]; + dc_srcalpha = fglevel; + dc_destalpha = bglevel; + } + else if (op == STYLEOP_Add && fglevel + bglevel <= FRACUNIT) + { + dc_srcblend = Col2RGB8[fglevel >> 10]; + dc_destblend = Col2RGB8[bglevel >> 10]; + dc_srcalpha = fglevel; + dc_destalpha = bglevel; + } + else + { + dc_srcblend = Col2RGB8_LessPrecision[fglevel >> 10]; + dc_destblend = Col2RGB8_LessPrecision[bglevel >> 10]; + dc_srcalpha = fglevel; + dc_destalpha = bglevel; + } + switch (op) + { + case STYLEOP_Add: + if (fglevel == 0 && bglevel == FRACUNIT) + { + return false; + } + if (fglevel + bglevel <= FRACUNIT) + { // Colors won't overflow when added + if (flags & STYLEF_ColorIsFixed) + { + colfunc = R_FillAddColumn; + hcolfunc_post1 = rt_add1col; + hcolfunc_post4 = rt_add4cols; + } + else if (dc_translation == NULL) + { + colfunc = R_DrawAddColumn; + hcolfunc_post1 = rt_add1col; + hcolfunc_post4 = rt_add4cols; + } + else + { + colfunc = R_DrawTlatedAddColumn; + hcolfunc_post1 = rt_tlateadd1col; + hcolfunc_post4 = rt_tlateadd4cols; + } + } + else + { // Colors might overflow when added + if (flags & STYLEF_ColorIsFixed) + { + colfunc = R_FillAddClampColumn; + hcolfunc_post1 = rt_addclamp1col; + hcolfunc_post4 = rt_addclamp4cols; + } + else if (dc_translation == NULL) + { + colfunc = R_DrawAddClampColumn; + hcolfunc_post1 = rt_addclamp1col; + hcolfunc_post4 = rt_addclamp4cols; + } + else + { + colfunc = R_DrawAddClampTranslatedColumn; + hcolfunc_post1 = rt_tlateaddclamp1col; + hcolfunc_post4 = rt_tlateaddclamp4cols; + } + } + return true; + + case STYLEOP_Sub: + if (flags & STYLEF_ColorIsFixed) + { + colfunc = R_FillSubClampColumn; + hcolfunc_post1 = rt_subclamp1col; + hcolfunc_post4 = rt_subclamp4cols; + } + else if (dc_translation == NULL) + { + colfunc = R_DrawSubClampColumn; + hcolfunc_post1 = rt_subclamp1col; + hcolfunc_post4 = rt_subclamp4cols; + } + else + { + colfunc = R_DrawSubClampTranslatedColumn; + hcolfunc_post1 = rt_tlatesubclamp1col; + hcolfunc_post4 = rt_tlatesubclamp4cols; + } + return true; + + case STYLEOP_RevSub: + if (fglevel == 0 && bglevel == FRACUNIT) + { + return false; + } + if (flags & STYLEF_ColorIsFixed) + { + colfunc = R_FillRevSubClampColumn; + hcolfunc_post1 = rt_subclamp1col; + hcolfunc_post4 = rt_subclamp4cols; + } + else if (dc_translation == NULL) + { + colfunc = R_DrawRevSubClampColumn; + hcolfunc_post1 = rt_revsubclamp1col; + hcolfunc_post4 = rt_revsubclamp4cols; + } + else + { + colfunc = R_DrawRevSubClampTranslatedColumn; + hcolfunc_post1 = rt_tlaterevsubclamp1col; + hcolfunc_post4 = rt_tlaterevsubclamp4cols; + } + return true; + + default: + return false; + } + } + + fixed_t GetAlpha(int type, fixed_t alpha) + { + switch (type) + { + case STYLEALPHA_Zero: return 0; + case STYLEALPHA_One: return OPAQUE; + case STYLEALPHA_Src: return alpha; + case STYLEALPHA_InvSrc: return OPAQUE - alpha; + default: return 0; + } + } + + FDynamicColormap *basecolormapsave; + } + + ESPSResult R_SetPatchStyle(FRenderStyle style, fixed_t alpha, int translation, uint32_t color) + { + using namespace drawerargs; + + fixed_t fglevel, bglevel; + + style.CheckFuzz(); + + if (style.BlendOp == STYLEOP_Shadow) + { + style = LegacyRenderStyles[STYLE_TranslucentStencil]; + alpha = TRANSLUC33; + color = 0; + } + + if (style.Flags & STYLEF_TransSoulsAlpha) + { + alpha = fixed_t(transsouls * OPAQUE); + } + else if (style.Flags & STYLEF_Alpha1) + { + alpha = FRACUNIT; + } + else + { + alpha = clamp(alpha, 0, OPAQUE); + } + + if (translation != -1) + { + dc_translation = NULL; + if (translation != 0) + { + FRemapTable *table = TranslationToTable(translation); + if (table != NULL && !table->Inactive) + { + dc_translation = table->Remap; } } - else + } + basecolormapsave = basecolormap; + hcolfunc_pre = R_DrawColumnHoriz; + + // Check for special modes + if (style.BlendOp == STYLEOP_Fuzz) + { + colfunc = fuzzcolfunc; + return DoDraw0; + } + else if (style == LegacyRenderStyles[STYLE_Shaded]) + { + // Shaded drawer only gets 16 levels of alpha because it saves memory. + if ((alpha >>= 12) == 0) + return DontDraw; + colfunc = R_DrawShadedColumn; + hcolfunc_post1 = rt_shaded1col; + hcolfunc_post4 = rt_shaded4cols; + dc_color = fixedcolormap ? fixedcolormap[APART(color)] : basecolormap->Maps[APART(color)]; + dc_colormap = (basecolormap = &ShadeFakeColormap[16 - alpha])->Maps; + if (fixedlightlev >= 0 && fixedcolormap == NULL) { - t2 = MAX (t2, MIN (t1, b2)); - b2 = MIN (b2, MAX (b1, t2)); + dc_colormap += fixedlightlev; } + return r_columnmethod ? DoDraw1 : DoDraw0; + } - stop = MIN (t2, b1); - while (t1 < stop) + fglevel = GetAlpha(style.SrcAlpha, alpha); + bglevel = GetAlpha(style.DestAlpha, alpha); + + if (style.Flags & STYLEF_ColorIsFixed) + { + uint32_t x = fglevel >> 10; + uint32_t r = RPART(color); + uint32_t g = GPART(color); + uint32_t b = BPART(color); + // dc_color is used by the rt_* routines. It is indexed into dc_srcblend. + dc_color = RGB32k.RGB[r >> 3][g >> 3][b >> 3]; + if (style.Flags & STYLEF_InvertSource) { - spanend[t1++] = x; - } - stop = MAX (b2, t2); - while (b1 > stop) - { - spanend[--b1] = x; + r = 255 - r; + g = 255 - g; + b = 255 - b; } + uint32_t alpha = clamp(fglevel >> (FRACBITS - 8), 0, 255); + dc_srccolor_bgra = (alpha << 24) | (r << 16) | (g << 8) | b; + // dc_srccolor is used by the R_Fill* routines. It is premultiplied + // with the alpha. + dc_srccolor = ((((r*x) >> 4) << 20) | ((g*x) >> 4) | ((((b)*x) >> 4) << 10)) & 0x3feffbff; + hcolfunc_pre = R_FillColumnHoriz; + R_SetColorMapLight(identitycolormap.Maps, 0, 0); } - t2 = uclip[x]; - b2 = dclip[x]; - } - if (t2 < b2 && rcolormap != 0) - { - R_DrawFogBoundarySection (t2, b2, x1); - } -} - -int tmvlinebits; - -void setuptmvline (int bits) -{ - tmvlinebits = bits; -} - -fixed_t tmvline1_add () -{ - DWORD fracstep = dc_iscale; - DWORD frac = dc_texturefrac; - BYTE *colormap = dc_colormap; - int count = dc_count; - const BYTE *source = dc_source; - BYTE *dest = dc_dest; - int bits = tmvlinebits; - int pitch = dc_pitch; - - DWORD *fg2rgb = dc_srcblend; - DWORD *bg2rgb = dc_destblend; - - do - { - BYTE pix = source[frac>>bits]; - if (pix != 0) + if (!R_SetBlendFunc(style.BlendOp, fglevel, bglevel, style.Flags)) { - DWORD fg = fg2rgb[colormap[pix]]; - DWORD bg = bg2rgb[*dest]; - fg = (fg+bg) | 0x1f07c1f; - *dest = RGB32k.All[fg & (fg>>15)]; - } - frac += fracstep; - dest += pitch; - } while (--count); - - return frac; -} - -void tmvline4_add () -{ - BYTE *dest = dc_dest; - int count = dc_count; - int bits = tmvlinebits; - - DWORD *fg2rgb = dc_srcblend; - DWORD *bg2rgb = dc_destblend; - - do - { - for (int i = 0; i < 4; ++i) - { - BYTE pix = bufplce[i][vplce[i] >> bits]; - if (pix != 0) - { - DWORD fg = fg2rgb[palookupoffse[i][pix]]; - DWORD bg = bg2rgb[dest[i]]; - fg = (fg+bg) | 0x1f07c1f; - dest[i] = RGB32k.All[fg & (fg>>15)]; - } - vplce[i] += vince[i]; - } - dest += dc_pitch; - } while (--count); -} - -fixed_t tmvline1_addclamp () -{ - DWORD fracstep = dc_iscale; - DWORD frac = dc_texturefrac; - BYTE *colormap = dc_colormap; - int count = dc_count; - const BYTE *source = dc_source; - BYTE *dest = dc_dest; - int bits = tmvlinebits; - int pitch = dc_pitch; - - DWORD *fg2rgb = dc_srcblend; - DWORD *bg2rgb = dc_destblend; - - do - { - BYTE pix = source[frac>>bits]; - if (pix != 0) - { - DWORD a = fg2rgb[colormap[pix]] + bg2rgb[*dest]; - DWORD b = a; - - a |= 0x01f07c1f; - b &= 0x40100400; - a &= 0x3fffffff; - b = b - (b >> 5); - a |= b; - *dest = RGB32k.All[a & (a>>15)]; - } - frac += fracstep; - dest += pitch; - } while (--count); - - return frac; -} - -void tmvline4_addclamp () -{ - BYTE *dest = dc_dest; - int count = dc_count; - int bits = tmvlinebits; - - DWORD *fg2rgb = dc_srcblend; - DWORD *bg2rgb = dc_destblend; - - do - { - for (int i = 0; i < 4; ++i) - { - BYTE pix = bufplce[i][vplce[i] >> bits]; - if (pix != 0) - { - DWORD a = fg2rgb[palookupoffse[i][pix]] + bg2rgb[dest[i]]; - DWORD b = a; - - a |= 0x01f07c1f; - b &= 0x40100400; - a &= 0x3fffffff; - b = b - (b >> 5); - a |= b; - dest[i] = RGB32k.All[a & (a>>15)]; - } - vplce[i] += vince[i]; - } - dest += dc_pitch; - } while (--count); -} - -fixed_t tmvline1_subclamp () -{ - DWORD fracstep = dc_iscale; - DWORD frac = dc_texturefrac; - BYTE *colormap = dc_colormap; - int count = dc_count; - const BYTE *source = dc_source; - BYTE *dest = dc_dest; - int bits = tmvlinebits; - int pitch = dc_pitch; - - DWORD *fg2rgb = dc_srcblend; - DWORD *bg2rgb = dc_destblend; - - do - { - BYTE pix = source[frac>>bits]; - if (pix != 0) - { - DWORD a = (fg2rgb[colormap[pix]] | 0x40100400) - bg2rgb[*dest]; - DWORD b = a; - - b &= 0x40100400; - b = b - (b >> 5); - a &= b; - a |= 0x01f07c1f; - *dest = RGB32k.All[a & (a>>15)]; - } - frac += fracstep; - dest += pitch; - } while (--count); - - return frac; -} - -void tmvline4_subclamp () -{ - BYTE *dest = dc_dest; - int count = dc_count; - int bits = tmvlinebits; - - DWORD *fg2rgb = dc_srcblend; - DWORD *bg2rgb = dc_destblend; - - do - { - for (int i = 0; i < 4; ++i) - { - BYTE pix = bufplce[i][vplce[i] >> bits]; - if (pix != 0) - { - DWORD a = (fg2rgb[palookupoffse[i][pix]] | 0x40100400) - bg2rgb[dest[i]]; - DWORD b = a; - - b &= 0x40100400; - b = b - (b >> 5); - a &= b; - a |= 0x01f07c1f; - dest[i] = RGB32k.All[a & (a>>15)]; - } - vplce[i] += vince[i]; - } - dest += dc_pitch; - } while (--count); -} - -fixed_t tmvline1_revsubclamp () -{ - DWORD fracstep = dc_iscale; - DWORD frac = dc_texturefrac; - BYTE *colormap = dc_colormap; - int count = dc_count; - const BYTE *source = dc_source; - BYTE *dest = dc_dest; - int bits = tmvlinebits; - int pitch = dc_pitch; - - DWORD *fg2rgb = dc_srcblend; - DWORD *bg2rgb = dc_destblend; - - do - { - BYTE pix = source[frac>>bits]; - if (pix != 0) - { - DWORD a = (bg2rgb[*dest] | 0x40100400) - fg2rgb[colormap[pix]]; - DWORD b = a; - - b &= 0x40100400; - b = b - (b >> 5); - a &= b; - a |= 0x01f07c1f; - *dest = RGB32k.All[a & (a>>15)]; - } - frac += fracstep; - dest += pitch; - } while (--count); - - return frac; -} - -void tmvline4_revsubclamp () -{ - BYTE *dest = dc_dest; - int count = dc_count; - int bits = tmvlinebits; - - DWORD *fg2rgb = dc_srcblend; - DWORD *bg2rgb = dc_destblend; - - do - { - for (int i = 0; i < 4; ++i) - { - BYTE pix = bufplce[i][vplce[i] >> bits]; - if (pix != 0) - { - DWORD a = (bg2rgb[dest[i]] | 0x40100400) - fg2rgb[palookupoffse[i][pix]]; - DWORD b = a; - - b &= 0x40100400; - b = b - (b >> 5); - a &= b; - a |= 0x01f07c1f; - dest[i] = RGB32k.All[a & (a>>15)]; - } - vplce[i] += vince[i]; - } - dest += dc_pitch; - } while (--count); -} - -void R_DrawSingleSkyCol1(uint32_t solid_top, uint32_t solid_bottom) -{ - uint8_t *dest = dc_dest; - int count = dc_count; - int pitch = dc_pitch; - const uint8_t *source0 = bufplce[0]; - int textureheight0 = bufheight[0]; - - int32_t frac = vplce[0]; - int32_t fracstep = vince[0]; - - int start_fade = 2; // How fast it should fade out - - int solid_top_r = RPART(solid_top); - int solid_top_g = GPART(solid_top); - int solid_top_b = BPART(solid_top); - int solid_bottom_r = RPART(solid_bottom); - int solid_bottom_g = GPART(solid_bottom); - int solid_bottom_b = BPART(solid_bottom); - - for (int index = 0; index < count; index++) - { - uint32_t sample_index = (((((uint32_t)frac) << 8) >> FRACBITS) * textureheight0) >> FRACBITS; - uint8_t fg = source0[sample_index]; - - int alpha_top = MAX(MIN(frac >> (16 - start_fade), 256), 0); - int alpha_bottom = MAX(MIN(((2 << 24) - frac) >> (16 - start_fade), 256), 0); - - if (alpha_top == 256 && alpha_bottom == 256) - { - *dest = fg; - } - else - { - int inv_alpha_top = 256 - alpha_top; - int inv_alpha_bottom = 256 - alpha_bottom; - - const auto &c = GPalette.BaseColors[fg]; - int c_red = c.r; - int c_green = c.g; - int c_blue = c.b; - c_red = (c_red * alpha_top + solid_top_r * inv_alpha_top) >> 8; - c_green = (c_green * alpha_top + solid_top_g * inv_alpha_top) >> 8; - c_blue = (c_blue * alpha_top + solid_top_b * inv_alpha_top) >> 8; - c_red = (c_red * alpha_bottom + solid_bottom_r * inv_alpha_bottom) >> 8; - c_green = (c_green * alpha_bottom + solid_bottom_g * inv_alpha_bottom) >> 8; - c_blue = (c_blue * alpha_bottom + solid_bottom_b * inv_alpha_bottom) >> 8; - *dest = RGB32k.RGB[(c_red >> 3)][(c_green >> 3)][(c_blue >> 3)]; - } - - frac += fracstep; - dest += pitch; - } -} - -void R_DrawSingleSkyCol4(uint32_t solid_top, uint32_t solid_bottom) -{ - uint8_t *dest = dc_dest; - int count = dc_count; - int pitch = dc_pitch; - const uint8_t *source0[4] = { bufplce[0], bufplce[1], bufplce[2], bufplce[3] }; - int textureheight0 = bufheight[0]; - const uint32_t *palette = (const uint32_t *)GPalette.BaseColors; - int32_t frac[4] = { (int32_t)vplce[0], (int32_t)vplce[1], (int32_t)vplce[2], (int32_t)vplce[3] }; - int32_t fracstep[4] = { (int32_t)vince[0], (int32_t)vince[1], (int32_t)vince[2], (int32_t)vince[3] }; - uint8_t output[4]; - - int start_fade = 2; // How fast it should fade out - - int solid_top_r = RPART(solid_top); - int solid_top_g = GPART(solid_top); - int solid_top_b = BPART(solid_top); - int solid_bottom_r = RPART(solid_bottom); - int solid_bottom_g = GPART(solid_bottom); - int solid_bottom_b = BPART(solid_bottom); - uint32_t solid_top_fill = RGB32k.RGB[(solid_top_r >> 3)][(solid_top_g >> 3)][(solid_top_b >> 3)]; - uint32_t solid_bottom_fill = RGB32k.RGB[(solid_bottom_r >> 3)][(solid_bottom_g >> 3)][(solid_bottom_b >> 3)]; - solid_top_fill = (solid_top_fill << 24) | (solid_top_fill << 16) | (solid_top_fill << 8) | solid_top_fill; - solid_bottom_fill = (solid_bottom_fill << 24) | (solid_bottom_fill << 16) | (solid_bottom_fill << 8) | solid_bottom_fill; - - // Find bands for top solid color, top fade, center textured, bottom fade, bottom solid color: - int fade_length = (1 << (24 - start_fade)); - int start_fadetop_y = (-frac[0]) / fracstep[0]; - int end_fadetop_y = (fade_length - frac[0]) / fracstep[0]; - int start_fadebottom_y = ((2 << 24) - fade_length - frac[0]) / fracstep[0]; - int end_fadebottom_y = ((2 << 24) - frac[0]) / fracstep[0]; - for (int col = 1; col < 4; col++) - { - start_fadetop_y = MIN(start_fadetop_y, (-frac[0]) / fracstep[0]); - end_fadetop_y = MAX(end_fadetop_y, (fade_length - frac[0]) / fracstep[0]); - start_fadebottom_y = MIN(start_fadebottom_y, ((2 << 24) - fade_length - frac[0]) / fracstep[0]); - end_fadebottom_y = MAX(end_fadebottom_y, ((2 << 24) - frac[0]) / fracstep[0]); - } - start_fadetop_y = clamp(start_fadetop_y, 0, count); - end_fadetop_y = clamp(end_fadetop_y, 0, count); - start_fadebottom_y = clamp(start_fadebottom_y, 0, count); - end_fadebottom_y = clamp(end_fadebottom_y, 0, count); - - // Top solid color: - for (int index = 0; index < start_fadetop_y; index++) - { - *((uint32_t*)dest) = solid_top_fill; - dest += pitch; - for (int col = 0; col < 4; col++) - frac[col] += fracstep[col]; - } - - // Top fade: - for (int index = start_fadetop_y; index < end_fadetop_y; index++) - { - for (int col = 0; col < 4; col++) - { - uint32_t sample_index = (((((uint32_t)frac[col]) << 8) >> FRACBITS) * textureheight0) >> FRACBITS; - uint8_t fg = source0[col][sample_index]; - - uint32_t c = palette[fg]; - int alpha_top = MAX(MIN(frac[col] >> (16 - start_fade), 256), 0); - int inv_alpha_top = 256 - alpha_top; - int c_red = RPART(c); - int c_green = GPART(c); - int c_blue = BPART(c); - c_red = (c_red * alpha_top + solid_top_r * inv_alpha_top) >> 8; - c_green = (c_green * alpha_top + solid_top_g * inv_alpha_top) >> 8; - c_blue = (c_blue * alpha_top + solid_top_b * inv_alpha_top) >> 8; - output[col] = RGB32k.RGB[(c_red >> 3)][(c_green >> 3)][(c_blue >> 3)]; - - frac[col] += fracstep[col]; - } - *((uint32_t*)dest) = *((uint32_t*)output); - dest += pitch; - } - - // Textured center: - for (int index = end_fadetop_y; index < start_fadebottom_y; index++) - { - for (int col = 0; col < 4; col++) - { - uint32_t sample_index = (((((uint32_t)frac[col]) << 8) >> FRACBITS) * textureheight0) >> FRACBITS; - output[col] = source0[col][sample_index]; - - frac[col] += fracstep[col]; - } - - *((uint32_t*)dest) = *((uint32_t*)output); - dest += pitch; - } - - // Fade bottom: - for (int index = start_fadebottom_y; index < end_fadebottom_y; index++) - { - for (int col = 0; col < 4; col++) - { - uint32_t sample_index = (((((uint32_t)frac[col]) << 8) >> FRACBITS) * textureheight0) >> FRACBITS; - uint8_t fg = source0[col][sample_index]; - - uint32_t c = palette[fg]; - int alpha_bottom = MAX(MIN(((2 << 24) - frac[col]) >> (16 - start_fade), 256), 0); - int inv_alpha_bottom = 256 - alpha_bottom; - int c_red = RPART(c); - int c_green = GPART(c); - int c_blue = BPART(c); - c_red = (c_red * alpha_bottom + solid_bottom_r * inv_alpha_bottom) >> 8; - c_green = (c_green * alpha_bottom + solid_bottom_g * inv_alpha_bottom) >> 8; - c_blue = (c_blue * alpha_bottom + solid_bottom_b * inv_alpha_bottom) >> 8; - output[col] = RGB32k.RGB[(c_red >> 3)][(c_green >> 3)][(c_blue >> 3)]; - - frac[col] += fracstep[col]; - } - *((uint32_t*)dest) = *((uint32_t*)output); - dest += pitch; - } - - // Bottom solid color: - for (int index = end_fadebottom_y; index < count; index++) - { - *((uint32_t*)dest) = solid_bottom_fill; - dest += pitch; - } -} - -void R_DrawDoubleSkyCol1(uint32_t solid_top, uint32_t solid_bottom) -{ - uint8_t *dest = dc_dest; - int count = dc_count; - int pitch = dc_pitch; - const uint8_t *source0 = bufplce[0]; - const uint8_t *source1 = bufplce2[0]; - int textureheight0 = bufheight[0]; - uint32_t maxtextureheight1 = bufheight[1] - 1; - - int32_t frac = vplce[0]; - int32_t fracstep = vince[0]; - - int start_fade = 2; // How fast it should fade out - - int solid_top_r = RPART(solid_top); - int solid_top_g = GPART(solid_top); - int solid_top_b = BPART(solid_top); - int solid_bottom_r = RPART(solid_bottom); - int solid_bottom_g = GPART(solid_bottom); - int solid_bottom_b = BPART(solid_bottom); - - for (int index = 0; index < count; index++) - { - uint32_t sample_index = (((((uint32_t)frac) << 8) >> FRACBITS) * textureheight0) >> FRACBITS; - uint8_t fg = source0[sample_index]; - if (fg == 0) - { - uint32_t sample_index2 = MIN(sample_index, maxtextureheight1); - fg = source1[sample_index2]; - } - - int alpha_top = MAX(MIN(frac >> (16 - start_fade), 256), 0); - int alpha_bottom = MAX(MIN(((2 << 24) - frac) >> (16 - start_fade), 256), 0); - - if (alpha_top == 256 && alpha_bottom == 256) - { - *dest = fg; - } - else - { - int inv_alpha_top = 256 - alpha_top; - int inv_alpha_bottom = 256 - alpha_bottom; - - const auto &c = GPalette.BaseColors[fg]; - int c_red = c.r; - int c_green = c.g; - int c_blue = c.b; - c_red = (c_red * alpha_top + solid_top_r * inv_alpha_top) >> 8; - c_green = (c_green * alpha_top + solid_top_g * inv_alpha_top) >> 8; - c_blue = (c_blue * alpha_top + solid_top_b * inv_alpha_top) >> 8; - c_red = (c_red * alpha_bottom + solid_bottom_r * inv_alpha_bottom) >> 8; - c_green = (c_green * alpha_bottom + solid_bottom_g * inv_alpha_bottom) >> 8; - c_blue = (c_blue * alpha_bottom + solid_bottom_b * inv_alpha_bottom) >> 8; - *dest = RGB32k.RGB[(c_red >> 3)][(c_green >> 3)][(c_blue >> 3)]; - } - - frac += fracstep; - dest += pitch; - } -} - -void R_DrawDoubleSkyCol4(uint32_t solid_top, uint32_t solid_bottom) -{ - uint8_t *dest = dc_dest; - int count = dc_count; - int pitch = dc_pitch; - const uint8_t *source0[4] = { bufplce[0], bufplce[1], bufplce[2], bufplce[3] }; - const uint8_t *source1[4] = { bufplce2[0], bufplce2[1], bufplce2[2], bufplce2[3] }; - int textureheight0 = bufheight[0]; - uint32_t maxtextureheight1 = bufheight[1] - 1; - const uint32_t *palette = (const uint32_t *)GPalette.BaseColors; - int32_t frac[4] = { (int32_t)vplce[0], (int32_t)vplce[1], (int32_t)vplce[2], (int32_t)vplce[3] }; - int32_t fracstep[4] = { (int32_t)vince[0], (int32_t)vince[1], (int32_t)vince[2], (int32_t)vince[3] }; - uint8_t output[4]; - - int start_fade = 2; // How fast it should fade out - - int solid_top_r = RPART(solid_top); - int solid_top_g = GPART(solid_top); - int solid_top_b = BPART(solid_top); - int solid_bottom_r = RPART(solid_bottom); - int solid_bottom_g = GPART(solid_bottom); - int solid_bottom_b = BPART(solid_bottom); - uint32_t solid_top_fill = RGB32k.RGB[(solid_top_r >> 3)][(solid_top_g >> 3)][(solid_top_b >> 3)]; - uint32_t solid_bottom_fill = RGB32k.RGB[(solid_bottom_r >> 3)][(solid_bottom_g >> 3)][(solid_bottom_b >> 3)]; - solid_top_fill = (solid_top_fill << 24) | (solid_top_fill << 16) | (solid_top_fill << 8) | solid_top_fill; - solid_bottom_fill = (solid_bottom_fill << 24) | (solid_bottom_fill << 16) | (solid_bottom_fill << 8) | solid_bottom_fill; - - // Find bands for top solid color, top fade, center textured, bottom fade, bottom solid color: - int fade_length = (1 << (24 - start_fade)); - int start_fadetop_y = (-frac[0]) / fracstep[0]; - int end_fadetop_y = (fade_length - frac[0]) / fracstep[0]; - int start_fadebottom_y = ((2 << 24) - fade_length - frac[0]) / fracstep[0]; - int end_fadebottom_y = ((2 << 24) - frac[0]) / fracstep[0]; - for (int col = 1; col < 4; col++) - { - start_fadetop_y = MIN(start_fadetop_y, (-frac[0]) / fracstep[0]); - end_fadetop_y = MAX(end_fadetop_y, (fade_length - frac[0]) / fracstep[0]); - start_fadebottom_y = MIN(start_fadebottom_y, ((2 << 24) - fade_length - frac[0]) / fracstep[0]); - end_fadebottom_y = MAX(end_fadebottom_y, ((2 << 24) - frac[0]) / fracstep[0]); - } - start_fadetop_y = clamp(start_fadetop_y, 0, count); - end_fadetop_y = clamp(end_fadetop_y, 0, count); - start_fadebottom_y = clamp(start_fadebottom_y, 0, count); - end_fadebottom_y = clamp(end_fadebottom_y, 0, count); - - // Top solid color: - for (int index = 0; index < start_fadetop_y; index++) - { - *((uint32_t*)dest) = solid_top_fill; - dest += pitch; - for (int col = 0; col < 4; col++) - frac[col] += fracstep[col]; - } - - // Top fade: - for (int index = start_fadetop_y; index < end_fadetop_y; index++) - { - for (int col = 0; col < 4; col++) - { - uint32_t sample_index = (((((uint32_t)frac[col]) << 8) >> FRACBITS) * textureheight0) >> FRACBITS; - uint8_t fg = source0[col][sample_index]; - if (fg == 0) - { - uint32_t sample_index2 = MIN(sample_index, maxtextureheight1); - fg = source1[col][sample_index2]; - } - output[col] = fg; - - uint32_t c = palette[fg]; - int alpha_top = MAX(MIN(frac[col] >> (16 - start_fade), 256), 0); - int inv_alpha_top = 256 - alpha_top; - int c_red = RPART(c); - int c_green = GPART(c); - int c_blue = BPART(c); - c_red = (c_red * alpha_top + solid_top_r * inv_alpha_top) >> 8; - c_green = (c_green * alpha_top + solid_top_g * inv_alpha_top) >> 8; - c_blue = (c_blue * alpha_top + solid_top_b * inv_alpha_top) >> 8; - output[col] = RGB32k.RGB[(c_red >> 3)][(c_green >> 3)][(c_blue >> 3)]; - - frac[col] += fracstep[col]; - } - *((uint32_t*)dest) = *((uint32_t*)output); - dest += pitch; - } - - // Textured center: - for (int index = end_fadetop_y; index < start_fadebottom_y; index++) - { - for (int col = 0; col < 4; col++) - { - uint32_t sample_index = (((((uint32_t)frac[col]) << 8) >> FRACBITS) * textureheight0) >> FRACBITS; - uint8_t fg = source0[col][sample_index]; - if (fg == 0) - { - uint32_t sample_index2 = MIN(sample_index, maxtextureheight1); - fg = source1[col][sample_index2]; - } - output[col] = fg; - - frac[col] += fracstep[col]; - } - - *((uint32_t*)dest) = *((uint32_t*)output); - dest += pitch; - } - - // Fade bottom: - for (int index = start_fadebottom_y; index < end_fadebottom_y; index++) - { - for (int col = 0; col < 4; col++) - { - uint32_t sample_index = (((((uint32_t)frac[col]) << 8) >> FRACBITS) * textureheight0) >> FRACBITS; - uint8_t fg = source0[col][sample_index]; - if (fg == 0) - { - uint32_t sample_index2 = MIN(sample_index, maxtextureheight1); - fg = source1[col][sample_index2]; - } - output[col] = fg; - - uint32_t c = palette[fg]; - int alpha_bottom = MAX(MIN(((2 << 24) - frac[col]) >> (16 - start_fade), 256), 0); - int inv_alpha_bottom = 256 - alpha_bottom; - int c_red = RPART(c); - int c_green = GPART(c); - int c_blue = BPART(c); - c_red = (c_red * alpha_bottom + solid_bottom_r * inv_alpha_bottom) >> 8; - c_green = (c_green * alpha_bottom + solid_bottom_g * inv_alpha_bottom) >> 8; - c_blue = (c_blue * alpha_bottom + solid_bottom_b * inv_alpha_bottom) >> 8; - output[col] = RGB32k.RGB[(c_red >> 3)][(c_green >> 3)][(c_blue >> 3)]; - - frac[col] += fracstep[col]; - } - *((uint32_t*)dest) = *((uint32_t*)output); - dest += pitch; - } - - // Bottom solid color: - for (int index = end_fadebottom_y; index < count; index++) - { - *((uint32_t*)dest) = solid_bottom_fill; - dest += pitch; - } -} - -//========================================================================== -// -// R_GetColumn -// -//========================================================================== - -const BYTE *R_GetColumn (FTexture *tex, int col) -{ - int width; - - // If the texture's width isn't a power of 2, then we need to make it a - // positive offset for proper clamping. - if (col < 0 && (width = tex->GetWidth()) != (1 << tex->WidthBits)) - { - col = width + (col % width); - } - return tex->GetColumn (col, NULL); -} - - -// [RH] Initialize the column drawer pointers -void R_InitColumnDrawers () -{ -#ifdef X86_ASM - R_DrawColumnHoriz = R_DrawColumnHorizP_C; - R_DrawTranslatedColumn = R_DrawTranslatedColumnP_C; - R_DrawShadedColumn = R_DrawShadedColumnP_C; - R_DrawSpan = R_DrawSpanP_ASM; - R_DrawSpanMasked = R_DrawSpanMaskedP_ASM; -#else - R_DrawColumnHoriz = R_DrawColumnHorizP_C; - R_DrawTranslatedColumn = R_DrawTranslatedColumnP_C; - R_DrawShadedColumn = R_DrawShadedColumnP_C; - R_DrawSpan = R_DrawSpanP_C; - R_DrawSpanMasked = R_DrawSpanMaskedP_C; -#endif -} - -// [RH] Choose column drawers in a single place -EXTERN_CVAR (Int, r_drawfuzz) -EXTERN_CVAR (Bool, r_drawtrans) -EXTERN_CVAR (Float, transsouls) - -static FDynamicColormap *basecolormapsave; - -static bool R_SetBlendFunc (int op, fixed_t fglevel, fixed_t bglevel, int flags) -{ - // r_drawtrans is a seriously bad thing to turn off. I wonder if I should - // just remove it completely. - if (!r_drawtrans || (op == STYLEOP_Add && fglevel == FRACUNIT && bglevel == 0 && !(flags & STYLEF_InvertSource))) - { - if (flags & STYLEF_ColorIsFixed) - { - colfunc = R_FillColumnP; - hcolfunc_post1 = rt_copy1col; - hcolfunc_post4 = rt_copy4cols; - } - else if (dc_translation == NULL) - { - colfunc = basecolfunc; - hcolfunc_post1 = rt_map1col; - hcolfunc_post4 = rt_map4cols; - } - else - { - colfunc = transcolfunc; - hcolfunc_post1 = rt_tlate1col; - hcolfunc_post4 = rt_tlate4cols; - } - return true; - } - if (flags & STYLEF_InvertSource) - { - dc_srcblend = Col2RGB8_Inverse[fglevel>>10]; - dc_destblend = Col2RGB8_LessPrecision[bglevel>>10]; - } - else if (op == STYLEOP_Add && fglevel + bglevel <= FRACUNIT) - { - dc_srcblend = Col2RGB8[fglevel>>10]; - dc_destblend = Col2RGB8[bglevel>>10]; - } - else - { - dc_srcblend = Col2RGB8_LessPrecision[fglevel>>10]; - dc_destblend = Col2RGB8_LessPrecision[bglevel>>10]; - } - switch (op) - { - case STYLEOP_Add: - if (fglevel == 0 && bglevel == FRACUNIT) - { - return false; - } - if (fglevel + bglevel <= FRACUNIT) - { // Colors won't overflow when added - if (flags & STYLEF_ColorIsFixed) - { - colfunc = R_FillAddColumn; - hcolfunc_post1 = rt_add1col; - hcolfunc_post4 = rt_add4cols; - } - else if (dc_translation == NULL) - { - colfunc = R_DrawAddColumnP_C; - hcolfunc_post1 = rt_add1col; - hcolfunc_post4 = rt_add4cols; - } - else - { - colfunc = R_DrawTlatedAddColumnP_C; - hcolfunc_post1 = rt_tlateadd1col; - hcolfunc_post4 = rt_tlateadd4cols; - } - } - else - { // Colors might overflow when added - if (flags & STYLEF_ColorIsFixed) - { - colfunc = R_FillAddClampColumn; - hcolfunc_post1 = rt_addclamp1col; - hcolfunc_post4 = rt_addclamp4cols; - } - else if (dc_translation == NULL) - { - colfunc = R_DrawAddClampColumnP_C; - hcolfunc_post1 = rt_addclamp1col; - hcolfunc_post4 = rt_addclamp4cols; - } - else - { - colfunc = R_DrawAddClampTranslatedColumnP_C; - hcolfunc_post1 = rt_tlateaddclamp1col; - hcolfunc_post4 = rt_tlateaddclamp4cols; - } - } - return true; - - case STYLEOP_Sub: - if (flags & STYLEF_ColorIsFixed) - { - colfunc = R_FillSubClampColumn; - hcolfunc_post1 = rt_subclamp1col; - hcolfunc_post4 = rt_subclamp4cols; - } - else if (dc_translation == NULL) - { - colfunc = R_DrawSubClampColumnP_C; - hcolfunc_post1 = rt_subclamp1col; - hcolfunc_post4 = rt_subclamp4cols; - } - else - { - colfunc = R_DrawSubClampTranslatedColumnP_C; - hcolfunc_post1 = rt_tlatesubclamp1col; - hcolfunc_post4 = rt_tlatesubclamp4cols; - } - return true; - - case STYLEOP_RevSub: - if (fglevel == 0 && bglevel == FRACUNIT) - { - return false; - } - if (flags & STYLEF_ColorIsFixed) - { - colfunc = R_FillRevSubClampColumn; - hcolfunc_post1 = rt_subclamp1col; - hcolfunc_post4 = rt_subclamp4cols; - } - else if (dc_translation == NULL) - { - colfunc = R_DrawRevSubClampColumnP_C; - hcolfunc_post1 = rt_revsubclamp1col; - hcolfunc_post4 = rt_revsubclamp4cols; - } - else - { - colfunc = R_DrawRevSubClampTranslatedColumnP_C; - hcolfunc_post1 = rt_tlaterevsubclamp1col; - hcolfunc_post4 = rt_tlaterevsubclamp4cols; - } - return true; - - default: - return false; - } -} - -static fixed_t GetAlpha(int type, fixed_t alpha) -{ - switch (type) - { - case STYLEALPHA_Zero: return 0; - case STYLEALPHA_One: return OPAQUE; - case STYLEALPHA_Src: return alpha; - case STYLEALPHA_InvSrc: return OPAQUE - alpha; - default: return 0; - } -} - -ESPSResult R_SetPatchStyle (FRenderStyle style, fixed_t alpha, int translation, DWORD color) -{ - fixed_t fglevel, bglevel; - - style.CheckFuzz(); - - if (style.BlendOp == STYLEOP_Shadow) - { - style = LegacyRenderStyles[STYLE_TranslucentStencil]; - alpha = TRANSLUC33; - color = 0; - } - - if (style.Flags & STYLEF_TransSoulsAlpha) - { - alpha = fixed_t(transsouls * OPAQUE); - } - else if (style.Flags & STYLEF_Alpha1) - { - alpha = FRACUNIT; - } - else - { - alpha = clamp (alpha, 0, OPAQUE); - } - - dc_translation = NULL; - if (translation != 0) - { - FRemapTable *table = TranslationToTable(translation); - if (table != NULL && !table->Inactive) - { - dc_translation = table->Remap; - } - } - basecolormapsave = basecolormap; - hcolfunc_pre = R_DrawColumnHoriz; - - // Check for special modes - if (style.BlendOp == STYLEOP_Fuzz) - { - colfunc = fuzzcolfunc; - return DoDraw0; - } - else if (style == LegacyRenderStyles[STYLE_Shaded]) - { - // Shaded drawer only gets 16 levels of alpha because it saves memory. - if ((alpha >>= 12) == 0) return DontDraw; - colfunc = R_DrawShadedColumn; - hcolfunc_post1 = rt_shaded1col; - hcolfunc_post4 = rt_shaded4cols; - dc_color = fixedcolormap ? fixedcolormap[APART(color)] : basecolormap->Maps[APART(color)]; - dc_colormap = (basecolormap = &ShadeFakeColormap[16-alpha])->Maps; - if (fixedlightlev >= 0 && fixedcolormap == NULL) - { - dc_colormap += fixedlightlev; } return r_columnmethod ? DoDraw1 : DoDraw0; } - fglevel = GetAlpha(style.SrcAlpha, alpha); - bglevel = GetAlpha(style.DestAlpha, alpha); - - if (style.Flags & STYLEF_ColorIsFixed) + ESPSResult R_SetPatchStyle(FRenderStyle style, float alpha, int translation, uint32_t color) { - int x = fglevel >> 10; - int r = RPART(color); - int g = GPART(color); - int b = BPART(color); - // dc_color is used by the rt_* routines. It is indexed into dc_srcblend. - dc_color = RGB32k.RGB[r>>3][g>>3][b>>3]; - if (style.Flags & STYLEF_InvertSource) + return R_SetPatchStyle(style, FLOAT2FIXED(alpha), translation, color); + } + + void R_FinishSetPatchStyle() + { + basecolormap = basecolormapsave; + } + + const uint8_t *R_GetColumn(FTexture *tex, int col) + { + int width; + + // If the texture's width isn't a power of 2, then we need to make it a + // positive offset for proper clamping. + if (col < 0 && (width = tex->GetWidth()) != (1 << tex->WidthBits)) { - r = 255 - r; - g = 255 - g; - b = 255 - b; + col = width + (col % width); } - // dc_srccolor is used by the R_Fill* routines. It is premultiplied - // with the alpha. - dc_srccolor = ((((r*x)>>4)<<20) | ((g*x)>>4) | ((((b)*x)>>4)<<10)) & 0x3feffbff; - hcolfunc_pre = R_FillColumnHorizP; - dc_colormap = identitymap; + + return tex->GetColumn(col, nullptr); } - if (!R_SetBlendFunc (style.BlendOp, fglevel, bglevel, style.Flags)) + bool R_GetTransMaskDrawers(fixed_t(**tmvline1)(), void(**tmvline4)()) { - return DontDraw; + if (colfunc == R_DrawAddColumn) + { + *tmvline1 = tmvline1_add; + *tmvline4 = tmvline4_add; + return true; + } + if (colfunc == R_DrawAddClampColumn) + { + *tmvline1 = tmvline1_addclamp; + *tmvline4 = tmvline4_addclamp; + return true; + } + if (colfunc == R_DrawSubClampColumn) + { + *tmvline1 = tmvline1_subclamp; + *tmvline4 = tmvline4_subclamp; + return true; + } + if (colfunc == R_DrawRevSubClampColumn) + { + *tmvline1 = tmvline1_revsubclamp; + *tmvline4 = tmvline4_revsubclamp; + return true; + } + return false; + } + + void setupvline(int fracbits) + { + drawerargs::vlinebits = fracbits; + } + + void setupmvline(int fracbits) + { + drawerargs::mvlinebits = fracbits; + } + + void setuptmvline(int fracbits) + { + drawerargs::tmvlinebits = fracbits; + } + + void R_SetColorMapLight(lighttable_t *base_colormap, float light, int shade) + { + using namespace drawerargs; + + dc_colormap = base_colormap + (GETPALOOKUP(light, shade) << COLORMAPSHIFT); + } + + void R_SetDSColorMapLight(lighttable_t *base_colormap, float light, int shade) + { + using namespace drawerargs; + + ds_colormap = base_colormap + (GETPALOOKUP(light, shade) << COLORMAPSHIFT); + } + + void R_SetTranslationMap(lighttable_t *translation) + { + using namespace drawerargs; + + dc_colormap = translation; + } + + void rt_initcols(uint8_t *buffer) + { + using namespace drawerargs; + + for (int y = 3; y >= 0; y--) + horizspan[y] = dc_ctspan[y] = &dc_tspans[y][0]; + + DrawerCommandQueue::QueueCommand(buffer); + } + + void rt_span_coverage(int x, int start, int stop) + { + using namespace drawerargs; + + unsigned int **tspan = &dc_ctspan[x & 3]; + (*tspan)[0] = start; + (*tspan)[1] = stop; + *tspan += 2; + } + + void rt_flip_posts() + { + using namespace drawerargs; + + unsigned int *front = horizspan[dc_x & 3]; + unsigned int *back = dc_ctspan[dc_x & 3] - 2; + + while (front < back) + { + swapvalues(front[0], back[0]); + swapvalues(front[1], back[1]); + front += 2; + back -= 2; + } + } + + void rt_draw4cols(int sx) + { + using namespace drawerargs; + + int x, bad; + unsigned int maxtop, minbot, minnexttop; + + // Place a dummy "span" in each column. These don't get + // drawn. They're just here to avoid special cases in the + // max/min calculations below. + for (x = 0; x < 4; ++x) + { + dc_ctspan[x][0] = screen->GetHeight()+1; + dc_ctspan[x][1] = screen->GetHeight(); + } + + for (;;) + { + // If a column is out of spans, mark it as such + bad = 0; + minnexttop = 0xffffffff; + for (x = 0; x < 4; ++x) + { + if (horizspan[x] >= dc_ctspan[x]) + { + bad |= 1 << x; + } + else if ((horizspan[x]+2)[0] < minnexttop) + { + minnexttop = (horizspan[x]+2)[0]; + } + } + // Once all columns are out of spans, we're done + if (bad == 15) + { + return; + } + + // Find the largest shared area for the spans in each column + maxtop = MAX (MAX (horizspan[0][0], horizspan[1][0]), + MAX (horizspan[2][0], horizspan[3][0])); + minbot = MIN (MIN (horizspan[0][1], horizspan[1][1]), + MIN (horizspan[2][1], horizspan[3][1])); + + // If there is no shared area with these spans, draw each span + // individually and advance to the next spans until we reach a shared area. + // However, only draw spans down to the highest span in the next set of + // spans. If we allow the entire height of a span to be drawn, it could + // prevent any more shared areas from being drawn in these four columns. + // + // Example: Suppose we have the following arrangement: + // A CD + // A CD + // B D + // B D + // aB D + // aBcD + // aBcD + // aBc + // + // If we draw the entire height of the spans, we end up drawing this first: + // A CD + // A CD + // B D + // B D + // B D + // B D + // B D + // B D + // B + // + // This leaves only the "a" and "c" columns to be drawn, and they are not + // part of a shared area, but if we can include B and D with them, we can + // get a shared area. So we cut off everything in the first set just + // above the "a" column and end up drawing this first: + // A CD + // A CD + // B D + // B D + // + // Then the next time through, we have the following arrangement with an + // easily shared area to draw: + // aB D + // aBcD + // aBcD + // aBc + if (bad != 0 || maxtop > minbot) + { + int drawcount = 0; + for (x = 0; x < 4; ++x) + { + if (!(bad & 1)) + { + if (horizspan[x][1] < minnexttop) + { + hcolfunc_post1 (x, sx+x, horizspan[x][0], horizspan[x][1]); + horizspan[x] += 2; + drawcount++; + } + else if (minnexttop > horizspan[x][0]) + { + hcolfunc_post1 (x, sx+x, horizspan[x][0], minnexttop-1); + horizspan[x][0] = minnexttop; + drawcount++; + } + } + bad >>= 1; + } + // Drawcount *should* always be non-zero. The reality is that some situations + // can make this not true. Unfortunately, I'm not sure what those situations are. + if (drawcount == 0) + { + return; + } + continue; + } + + // Draw any span fragments above the shared area. + for (x = 0; x < 4; ++x) + { + if (maxtop > horizspan[x][0]) + { + hcolfunc_post1 (x, sx+x, horizspan[x][0], maxtop-1); + } + } + + // Draw the shared area. + hcolfunc_post4 (sx, maxtop, minbot); + + // For each column, if part of the span is past the shared area, + // set its top to just below the shared area. Otherwise, advance + // to the next span in that column. + for (x = 0; x < 4; ++x) + { + if (minbot < horizspan[x][1]) + { + horizspan[x][0] = minbot+1; + } + else + { + horizspan[x] += 2; + } + } + } + } + + void R_SetupSpanBits(FTexture *tex) + { + using namespace drawerargs; + + tex->GetWidth(); + ds_xbits = tex->WidthBits; + ds_ybits = tex->HeightBits; + if ((1 << ds_xbits) > tex->GetWidth()) + { + ds_xbits--; + } + if ((1 << ds_ybits) > tex->GetHeight()) + { + ds_ybits--; + } + } + + void R_SetSpanColormap(lighttable_t *colormap) + { + using namespace drawerargs; + + ds_colormap = colormap; + } + + void R_SetSpanSource(FTexture *tex) + { + using namespace drawerargs; + + ds_source = tex->GetPixels(); + } + + ///////////////////////////////////////////////////////////////////////// + + void R_FillColumnHoriz() + { + using namespace drawerargs; + + if (dc_count <= 0) + return; + + int x = dc_x & 3; + unsigned int **span = &dc_ctspan[x]; + (*span)[0] = dc_yl; + (*span)[1] = dc_yh; + *span += 2; + + DrawerCommandQueue::QueueCommand(); + } + + void R_DrawColumnHoriz() + { + using namespace drawerargs; + + if (dc_count <= 0) + return; + + int x = dc_x & 3; + unsigned int **span = &dc_ctspan[x]; + (*span)[0] = dc_yl; + (*span)[1] = dc_yh; + *span += 2; + + DrawerCommandQueue::QueueCommand(); + } + + // Copies one span at hx to the screen at sx. + void rt_copy1col(int hx, int sx, int yl, int yh) + { + DrawerCommandQueue::QueueCommand(hx, sx, yl, yh); + } + + // Copies all four spans to the screen starting at sx. + void rt_copy4cols(int sx, int yl, int yh) + { + DrawerCommandQueue::QueueCommand(0, sx, yl, yh); + } + + // Maps one span at hx to the screen at sx. + void rt_map1col(int hx, int sx, int yl, int yh) + { + DrawerCommandQueue::QueueCommand(hx, sx, yl, yh); + } + + // Maps all four spans to the screen starting at sx. + void rt_map4cols(int sx, int yl, int yh) + { + DrawerCommandQueue::QueueCommand(0, sx, yl, yh); + } + + // Translates one span at hx to the screen at sx. + void rt_tlate1col(int hx, int sx, int yl, int yh) + { + DrawerCommandQueue::QueueCommand(hx, sx, yl, yh); + rt_map1col(hx, sx, yl, yh); + } + + // Translates all four spans to the screen starting at sx. + void rt_tlate4cols(int sx, int yl, int yh) + { + DrawerCommandQueue::QueueCommand(0, sx, yl, yh); + rt_map4cols(sx, yl, yh); + } + + // Adds one span at hx to the screen at sx without clamping. + void rt_add1col(int hx, int sx, int yl, int yh) + { + DrawerCommandQueue::QueueCommand(hx, sx, yl, yh); + } + + // Adds all four spans to the screen starting at sx without clamping. + void rt_add4cols(int sx, int yl, int yh) + { + DrawerCommandQueue::QueueCommand(0, sx, yl, yh); + } + + // Translates and adds one span at hx to the screen at sx without clamping. + void rt_tlateadd1col(int hx, int sx, int yl, int yh) + { + DrawerCommandQueue::QueueCommand(hx, sx, yl, yh); + rt_add1col(hx, sx, yl, yh); + } + + // Translates and adds all four spans to the screen starting at sx without clamping. + void rt_tlateadd4cols(int sx, int yl, int yh) + { + DrawerCommandQueue::QueueCommand(0, sx, yl, yh); + rt_add4cols(sx, yl, yh); + } + + // Shades one span at hx to the screen at sx. + void rt_shaded1col(int hx, int sx, int yl, int yh) + { + DrawerCommandQueue::QueueCommand(hx, sx, yl, yh); + } + + // Shades all four spans to the screen starting at sx. + void rt_shaded4cols(int sx, int yl, int yh) + { + DrawerCommandQueue::QueueCommand(0, sx, yl, yh); + } + + // Adds one span at hx to the screen at sx with clamping. + void rt_addclamp1col(int hx, int sx, int yl, int yh) + { + DrawerCommandQueue::QueueCommand(hx, sx, yl, yh); + } + + // Adds all four spans to the screen starting at sx with clamping. + void rt_addclamp4cols(int sx, int yl, int yh) + { + DrawerCommandQueue::QueueCommand(0, sx, yl, yh); + } + + // Translates and adds one span at hx to the screen at sx with clamping. + void rt_tlateaddclamp1col(int hx, int sx, int yl, int yh) + { + DrawerCommandQueue::QueueCommand(hx, sx, yl, yh); + rt_addclamp1col(hx, sx, yl, yh); + } + + // Translates and adds all four spans to the screen starting at sx with clamping. + void rt_tlateaddclamp4cols(int sx, int yl, int yh) + { + DrawerCommandQueue::QueueCommand(0, sx, yl, yh); + rt_addclamp4cols(sx, yl, yh); + } + + // Subtracts one span at hx to the screen at sx with clamping. + void rt_subclamp1col(int hx, int sx, int yl, int yh) + { + DrawerCommandQueue::QueueCommand(hx, sx, yl, yh); + } + + // Subtracts all four spans to the screen starting at sx with clamping. + void rt_subclamp4cols(int sx, int yl, int yh) + { + DrawerCommandQueue::QueueCommand(0, sx, yl, yh); + } + + // Translates and subtracts one span at hx to the screen at sx with clamping. + void rt_tlatesubclamp1col(int hx, int sx, int yl, int yh) + { + DrawerCommandQueue::QueueCommand(hx, sx, yl, yh); + rt_subclamp1col(hx, sx, yl, yh); + } + + // Translates and subtracts all four spans to the screen starting at sx with clamping. + void rt_tlatesubclamp4cols(int sx, int yl, int yh) + { + DrawerCommandQueue::QueueCommand(0, sx, yl, yh); + rt_subclamp4cols(sx, yl, yh); + } + + // Subtracts one span at hx from the screen at sx with clamping. + void rt_revsubclamp1col(int hx, int sx, int yl, int yh) + { + DrawerCommandQueue::QueueCommand(hx, sx, yl, yh); + } + + // Subtracts all four spans from the screen starting at sx with clamping. + void rt_revsubclamp4cols(int sx, int yl, int yh) + { + DrawerCommandQueue::QueueCommand(0, sx, yl, yh); + } + + // Translates and subtracts one span at hx from the screen at sx with clamping. + void rt_tlaterevsubclamp1col(int hx, int sx, int yl, int yh) + { + DrawerCommandQueue::QueueCommand(hx, sx, yl, yh); + rt_revsubclamp1col(hx, sx, yl, yh); + } + + // Translates and subtracts all four spans from the screen starting at sx with clamping. + void rt_tlaterevsubclamp4cols(int sx, int yl, int yh) + { + DrawerCommandQueue::QueueCommand(0, sx, yl, yh); + rt_revsubclamp4cols(sx, yl, yh); + } + + uint32_t vlinec1() + { + using namespace drawerargs; + + DrawerCommandQueue::QueueCommand(); + + return dc_texturefrac + dc_count * dc_iscale; + } + + void vlinec4() + { + using namespace drawerargs; + + DrawerCommandQueue::QueueCommand(); + + for (int i = 0; i < 4; i++) + vplce[i] += vince[i] * dc_count; + } + + uint32_t mvlinec1() + { + using namespace drawerargs; + + DrawerCommandQueue::QueueCommand(); + + return dc_texturefrac + dc_count * dc_iscale; + } + + void mvlinec4() + { + using namespace drawerargs; + + DrawerCommandQueue::QueueCommand(); + + for (int i = 0; i < 4; i++) + vplce[i] += vince[i] * dc_count; + } + + fixed_t tmvline1_add() + { + using namespace drawerargs; + + DrawerCommandQueue::QueueCommand(); + + return dc_texturefrac + dc_count * dc_iscale; + } + + void tmvline4_add() + { + using namespace drawerargs; + + DrawerCommandQueue::QueueCommand(); + + for (int i = 0; i < 4; i++) + vplce[i] += vince[i] * dc_count; + } + + fixed_t tmvline1_addclamp() + { + using namespace drawerargs; + + DrawerCommandQueue::QueueCommand(); + + return dc_texturefrac + dc_count * dc_iscale; + } + + void tmvline4_addclamp() + { + using namespace drawerargs; + + DrawerCommandQueue::QueueCommand(); + + for (int i = 0; i < 4; i++) + vplce[i] += vince[i] * dc_count; + } + + fixed_t tmvline1_subclamp() + { + using namespace drawerargs; + + DrawerCommandQueue::QueueCommand(); + + return dc_texturefrac + dc_count * dc_iscale; + } + + void tmvline4_subclamp() + { + using namespace drawerargs; + + DrawerCommandQueue::QueueCommand(); + + for (int i = 0; i < 4; i++) + vplce[i] += vince[i] * dc_count; + } + + fixed_t tmvline1_revsubclamp() + { + using namespace drawerargs; + + DrawerCommandQueue::QueueCommand(); + + return dc_texturefrac + dc_count * dc_iscale; + } + + void tmvline4_revsubclamp() + { + using namespace drawerargs; + + DrawerCommandQueue::QueueCommand(); + + for (int i = 0; i < 4; i++) + vplce[i] += vince[i] * dc_count; + } + + void R_DrawSingleSkyCol1(uint32_t solid_top, uint32_t solid_bottom) + { + DrawerCommandQueue::QueueCommand(solid_top, solid_bottom); + } + + void R_DrawSingleSkyCol4(uint32_t solid_top, uint32_t solid_bottom) + { + DrawerCommandQueue::QueueCommand(solid_top, solid_bottom); + } + + void R_DrawDoubleSkyCol1(uint32_t solid_top, uint32_t solid_bottom) + { + DrawerCommandQueue::QueueCommand(solid_top, solid_bottom); + } + + void R_DrawDoubleSkyCol4(uint32_t solid_top, uint32_t solid_bottom) + { + DrawerCommandQueue::QueueCommand(solid_top, solid_bottom); + } + + void R_DrawColumn() + { + DrawerCommandQueue::QueueCommand(); + } + + void R_FillColumn() + { + DrawerCommandQueue::QueueCommand(); + } + + void R_FillAddColumn() + { + DrawerCommandQueue::QueueCommand(); + } + + void R_FillAddClampColumn() + { + DrawerCommandQueue::QueueCommand(); + } + + void R_FillSubClampColumn() + { + DrawerCommandQueue::QueueCommand(); + } + + void R_FillRevSubClampColumn() + { + DrawerCommandQueue::QueueCommand(); + } + + void R_DrawFuzzColumn() + { + using namespace drawerargs; + + DrawerCommandQueue::QueueCommand(); + + dc_yl = MAX(dc_yl, 1); + dc_yh = MIN(dc_yh, fuzzviewheight); + if (dc_yl <= dc_yh) + fuzzpos = (fuzzpos + dc_yh - dc_yl + 1) % FUZZTABLE; + } + + void R_DrawAddColumn() + { + DrawerCommandQueue::QueueCommand(); + } + + void R_DrawTranslatedColumn() + { + DrawerCommandQueue::QueueCommand(); + } + + void R_DrawTlatedAddColumn() + { + DrawerCommandQueue::QueueCommand(); + } + + void R_DrawShadedColumn() + { + DrawerCommandQueue::QueueCommand(); + } + + void R_DrawAddClampColumn() + { + DrawerCommandQueue::QueueCommand(); + } + + void R_DrawAddClampTranslatedColumn() + { + DrawerCommandQueue::QueueCommand(); + } + + void R_DrawSubClampColumn() + { + DrawerCommandQueue::QueueCommand(); + } + + void R_DrawSubClampTranslatedColumn() + { + DrawerCommandQueue::QueueCommand(); + } + + void R_DrawRevSubClampColumn() + { + DrawerCommandQueue::QueueCommand(); + } + + void R_DrawRevSubClampTranslatedColumn() + { + DrawerCommandQueue::QueueCommand(); + } + + void R_DrawSpan() + { + DrawerCommandQueue::QueueCommand(); + } + + void R_DrawSpanMasked() + { + DrawerCommandQueue::QueueCommand(); + } + + void R_DrawSpanTranslucent() + { + DrawerCommandQueue::QueueCommand(); + } + + void R_DrawSpanMaskedTranslucent() + { + DrawerCommandQueue::QueueCommand(); + } + + void R_DrawSpanAddClamp() + { + DrawerCommandQueue::QueueCommand(); + } + + void R_DrawSpanMaskedAddClamp() + { + DrawerCommandQueue::QueueCommand(); + } + + void R_FillSpan() + { + DrawerCommandQueue::QueueCommand(); + } + + void R_DrawTiltedSpan(int y, int x1, int x2, const FVector3 &plane_sz, const FVector3 &plane_su, const FVector3 &plane_sv, bool plane_shade, int planeshade, float planelightfloat, fixed_t pviewx, fixed_t pviewy) + { + DrawerCommandQueue::QueueCommand(y, x1, x2, plane_sz, plane_su, plane_sv, plane_shade, planeshade, planelightfloat, pviewx, pviewy); + } + + void R_DrawColoredSpan(int y, int x1, int x2) + { + DrawerCommandQueue::QueueCommand(y, x1, x2); + } + + namespace + { + const uint8_t *slab_colormap; + } + + void R_SetupDrawSlab(uint8_t *colormap) + { + slab_colormap = colormap; + } + + void R_DrawSlab(int dx, fixed_t v, int dy, fixed_t vi, const uint8_t *vptr, uint8_t *p) + { + DrawerCommandQueue::QueueCommand(dx, v, dy, vi, vptr, p, slab_colormap); + } + + void R_DrawFogBoundarySection(int y, int y2, int x1) + { + for (; y < y2; ++y) + { + int x2 = spanend[y]; + DrawerCommandQueue::QueueCommand(y, x1, x2); + } + } + + void R_DrawFogBoundary(int x1, int x2, short *uclip, short *dclip) + { + // This is essentially the same as R_MapVisPlane but with an extra step + // to create new horizontal spans whenever the light changes enough that + // we need to use a new colormap. + + double lightstep = rw_lightstep; + double light = rw_light + rw_lightstep*(x2 - x1 - 1); + int x = x2 - 1; + int t2 = uclip[x]; + int b2 = dclip[x]; + int rcolormap = GETPALOOKUP(light, wallshade); + int lcolormap; + uint8_t *basecolormapdata = basecolormap->Maps; + + if (b2 > t2) + { + clearbufshort(spanend + t2, b2 - t2, x); + } + + R_SetColorMapLight(basecolormap->Maps, (float)light, wallshade); + + uint8_t *fake_dc_colormap = basecolormap->Maps + (GETPALOOKUP(light, wallshade) << COLORMAPSHIFT); + + for (--x; x >= x1; --x) + { + int t1 = uclip[x]; + int b1 = dclip[x]; + const int xr = x + 1; + int stop; + + light -= rw_lightstep; + lcolormap = GETPALOOKUP(light, wallshade); + if (lcolormap != rcolormap) + { + if (t2 < b2 && rcolormap != 0) + { // Colormap 0 is always the identity map, so rendering it is + // just a waste of time. + R_DrawFogBoundarySection(t2, b2, xr); + } + if (t1 < t2) t2 = t1; + if (b1 > b2) b2 = b1; + if (t2 < b2) + { + clearbufshort(spanend + t2, b2 - t2, x); + } + rcolormap = lcolormap; + R_SetColorMapLight(basecolormap->Maps, (float)light, wallshade); + fake_dc_colormap = basecolormap->Maps + (GETPALOOKUP(light, wallshade) << COLORMAPSHIFT); + } + else + { + if (fake_dc_colormap != basecolormapdata) + { + stop = MIN(t1, b2); + while (t2 < stop) + { + int y = t2++; + DrawerCommandQueue::QueueCommand(y, xr, spanend[y]); + } + stop = MAX(b1, t2); + while (b2 > stop) + { + int y = --b2; + DrawerCommandQueue::QueueCommand(y, xr, spanend[y]); + } + } + else + { + t2 = MAX(t2, MIN(t1, b2)); + b2 = MIN(b2, MAX(b1, t2)); + } + + stop = MIN(t2, b1); + while (t1 < stop) + { + spanend[t1++] = x; + } + stop = MAX(b2, t2); + while (b1 > stop) + { + spanend[--b1] = x; + } + } + + t2 = uclip[x]; + b2 = dclip[x]; + } + if (t2 < b2 && rcolormap != 0) + { + R_DrawFogBoundarySection(t2, b2, x1); + } + } + + void R_DrawParticle(vissprite_t *sprite) + { + R_DrawParticle_C(sprite); } - return r_columnmethod ? DoDraw1 : DoDraw0; } - -void R_FinishSetPatchStyle () -{ - basecolormap = basecolormapsave; -} - -bool R_GetTransMaskDrawers (fixed_t (**tmvline1)(), void (**tmvline4)()) -{ - if (colfunc == R_DrawAddColumnP_C) - { - *tmvline1 = tmvline1_add; - *tmvline4 = tmvline4_add; - return true; - } - if (colfunc == R_DrawAddClampColumnP_C) - { - *tmvline1 = tmvline1_addclamp; - *tmvline4 = tmvline4_addclamp; - return true; - } - if (colfunc == R_DrawSubClampColumnP_C) - { - *tmvline1 = tmvline1_subclamp; - *tmvline4 = tmvline4_subclamp; - return true; - } - if (colfunc == R_DrawRevSubClampColumnP_C) - { - *tmvline1 = tmvline1_revsubclamp; - *tmvline4 = tmvline4_revsubclamp; - return true; - } - return false; -} - diff --git a/src/r_draw.h b/src/r_draw.h index 6713d40915..40b3328964 100644 --- a/src/r_draw.h +++ b/src/r_draw.h @@ -1,287 +1,208 @@ -// Emacs style mode select -*- C++ -*- -//----------------------------------------------------------------------------- -// -// $Id:$ -// -// Copyright (C) 1993-1996 by id Software, Inc. -// -// This source is available for distribution and/or modification -// only under the terms of the DOOM Source Code License as -// published by id Software. All rights reserved. -// -// The source is distributed in the hope that it will be useful, -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// FITNESS FOR A PARTICULAR PURPOSE. See the DOOM Source Code License -// for more details. -// -// DESCRIPTION: -// System specific interface stuff. -// -//----------------------------------------------------------------------------- - -#ifndef __R_DRAW__ -#define __R_DRAW__ +#pragma once #include "r_defs.h" -extern "C" int ylookup[MAXHEIGHT]; +EXTERN_CVAR(Bool, r_multithreaded); +EXTERN_CVAR(Int, r_drawfuzz); +EXTERN_CVAR(Bool, r_drawtrans); +EXTERN_CVAR(Float, transsouls); +EXTERN_CVAR(Int, r_columnmethod); -extern "C" int dc_pitch; // [RH] Distance between rows - -extern "C" lighttable_t*dc_colormap; -extern "C" int dc_x; -extern "C" int dc_yl; -extern "C" int dc_yh; -extern "C" fixed_t dc_iscale; -extern double dc_texturemid; -extern "C" fixed_t dc_texturefrac; -extern "C" int dc_color; // [RH] For flat colors (no texturing) -extern "C" DWORD dc_srccolor; -extern "C" DWORD *dc_srcblend; -extern "C" DWORD *dc_destblend; - -// first pixel in a column -extern "C" const BYTE* dc_source; - -extern "C" BYTE *dc_dest, *dc_destorg; -extern "C" int dc_count; - -extern "C" DWORD vplce[4]; -extern "C" DWORD vince[4]; -extern "C" BYTE* palookupoffse[4]; -extern "C" const BYTE* bufplce[4]; -extern "C" const BYTE* bufplce2[4]; -extern "C" uint32_t bufheight[4]; - -// [RH] Temporary buffer for column drawing -extern "C" BYTE *dc_temp; -extern "C" unsigned int dc_tspans[4][MAXHEIGHT]; -extern "C" unsigned int *dc_ctspan[4]; -extern "C" unsigned int horizspans[4]; - - -// [RH] Pointers to the different column and span drawers... - -// The span blitting interface. -// Hook in assembler or system specific BLT here. - -extern DWORD (*dovline1) (); -extern DWORD (*doprevline1) (); -#ifdef X64_ASM -#define dovline4 vlinetallasm4 -extern "C" void vlinetallasm4(); -#else -extern void (*dovline4) (); -#endif -extern void setupvline (int); - -extern DWORD (*domvline1) (); -extern void (*domvline4) (); -extern void setupmvline (int); - -extern void setuptmvline (int); - -// The Spectre/Invisibility effect. -extern void R_DrawFuzzColumn(void); - -// [RH] Draw shaded column -extern void (*R_DrawShadedColumn)(void); - -// Draw with color translation tables, for player sprite rendering, -// Green/Red/Blue/Indigo shirts. -extern void (*R_DrawTranslatedColumn)(void); - -// Span drawing for rows, floor/ceiling. No Spectre effect needed. -extern void (*R_DrawSpan)(void); -void R_SetupSpanBits(FTexture *tex); -void R_SetSpanColormap(BYTE *colormap); -void R_SetSpanSource(const BYTE *pixels); - -// Span drawing for masked textures. -extern void (*R_DrawSpanMasked)(void); - -// Span drawing for translucent textures. -void R_DrawSpanTranslucent(void); - -// Span drawing for masked, translucent textures. -void R_DrawSpanMaskedTranslucent(void); - -// Span drawing for translucent, additive textures. -void R_DrawSpanAddClamp(void); - -// Span drawing for masked, translucent, additive textures. -void R_DrawSpanMaskedAddClamp(void); - -// [RH] Span blit into an interleaved intermediate buffer -extern void (*R_DrawColumnHoriz)(void); - -// [RH] Initialize the above pointers -void R_InitColumnDrawers (); - -// [RH] Moves data from the temporary buffer to the screen. - -void rt_copy1col(int hx, int sx, int yl, int yh); -void rt_copy4cols(int sx, int yl, int yh); -void rt_map4cols(int sx, int yl, int yh); - -extern "C" +namespace swrenderer { + struct vissprite_t; -void rt_shaded1col (int hx, int sx, int yl, int yh); -void rt_shaded4cols_c (int sx, int yl, int yh); -void rt_shaded4cols_asm (int sx, int yl, int yh); + extern double dc_texturemid; -void rt_map1col (int hx, int sx, int yl, int yh); -void rt_add1col (int hx, int sx, int yl, int yh); -void rt_addclamp1col (int hx, int sx, int yl, int yh); -void rt_subclamp1col (int hx, int sx, int yl, int yh); -void rt_revsubclamp1col (int hx, int sx, int yl, int yh); + namespace drawerargs + { + extern int dc_pitch; + extern lighttable_t *dc_colormap; + extern int dc_x; + extern int dc_yl; + extern int dc_yh; + extern fixed_t dc_iscale; + extern fixed_t dc_texturefrac; + extern uint32_t dc_textureheight; + extern int dc_color; + extern uint32_t dc_srccolor; + extern uint32_t dc_srccolor_bgra; + extern uint32_t *dc_srcblend; + extern uint32_t *dc_destblend; + extern fixed_t dc_srcalpha; + extern fixed_t dc_destalpha; + extern const uint8_t *dc_source; + extern const uint8_t *dc_source2; + extern uint32_t dc_texturefracx; + extern uint8_t *dc_translation; + extern uint8_t *dc_dest; + extern uint8_t *dc_destorg; + extern int dc_destheight; + extern int dc_count; -void rt_tlate1col (int hx, int sx, int yl, int yh); -void rt_tlateadd1col (int hx, int sx, int yl, int yh); -void rt_tlateaddclamp1col (int hx, int sx, int yl, int yh); -void rt_tlatesubclamp1col (int hx, int sx, int yl, int yh); -void rt_tlaterevsubclamp1col (int hx, int sx, int yl, int yh); + extern uint32_t vplce[4]; + extern uint32_t vince[4]; + extern uint8_t *palookupoffse[4]; + extern fixed_t palookuplight[4]; + extern const uint8_t *bufplce[4]; + extern const uint8_t *bufplce2[4]; + extern uint32_t buftexturefracx[4]; + extern uint32_t bufheight[4]; + extern int vlinebits; + extern int mvlinebits; + extern int tmvlinebits; -void rt_add4cols_c (int sx, int yl, int yh); -void rt_addclamp4cols_c (int sx, int yl, int yh); -void rt_subclamp4cols (int sx, int yl, int yh); -void rt_revsubclamp4cols (int sx, int yl, int yh); + extern int ds_y; + extern int ds_x1; + extern int ds_x2; + extern lighttable_t * ds_colormap; + extern dsfixed_t ds_light; + extern dsfixed_t ds_xfrac; + extern dsfixed_t ds_yfrac; + extern dsfixed_t ds_xstep; + extern dsfixed_t ds_ystep; + extern int ds_xbits; + extern int ds_ybits; + extern fixed_t ds_alpha; + extern double ds_lod; + extern const uint8_t *ds_source; + extern int ds_color; -void rt_tlate4cols (int sx, int yl, int yh); -void rt_tlateadd4cols (int sx, int yl, int yh); -void rt_tlateaddclamp4cols (int sx, int yl, int yh); -void rt_tlatesubclamp4cols (int sx, int yl, int yh); -void rt_tlaterevsubclamp4cols (int sx, int yl, int yh); + extern unsigned int dc_tspans[4][MAXHEIGHT]; + extern unsigned int *dc_ctspan[4]; + extern unsigned int *horizspan[4]; + } -void rt_add4cols_asm (int sx, int yl, int yh); -void rt_addclamp4cols_asm (int sx, int yl, int yh); + extern int ylookup[MAXHEIGHT]; + extern uint8_t shadetables[/*NUMCOLORMAPS*16*256*/]; + extern FDynamicColormap ShadeFakeColormap[16]; + extern uint8_t identitymap[256]; + extern FDynamicColormap identitycolormap; + + // Spectre/Invisibility. + #define FUZZTABLE 50 + extern int fuzzoffset[FUZZTABLE + 1]; + extern int fuzzpos; + extern int fuzzviewheight; + + void R_InitColumnDrawers(); + void R_InitShadeMaps(); + void R_InitFuzzTable(int fuzzoff); + + enum ESPSResult + { + DontDraw, // not useful to draw this + DoDraw0, // draw this as if r_columnmethod is 0 + DoDraw1, // draw this as if r_columnmethod is 1 + }; + + ESPSResult R_SetPatchStyle(FRenderStyle style, fixed_t alpha, int translation, uint32_t color); + ESPSResult R_SetPatchStyle(FRenderStyle style, float alpha, int translation, uint32_t color); + void R_FinishSetPatchStyle(); // Call this after finished drawing the current thing, in case its style was STYLE_Shade + bool R_GetTransMaskDrawers(fixed_t(**tmvline1)(), void(**tmvline4)()); + + const uint8_t *R_GetColumn(FTexture *tex, int col); + void wallscan(int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat, const uint8_t *(*getcol)(FTexture *tex, int col) = R_GetColumn); + void maskwallscan(int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat, const uint8_t *(*getcol)(FTexture *tex, int col) = R_GetColumn); + void transmaskwallscan(int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat, const uint8_t *(*getcol)(FTexture *tex, int col) = R_GetColumn); + + void rt_initcols(uint8_t *buffer = nullptr); + void rt_span_coverage(int x, int start, int stop); + void rt_draw4cols(int sx); + void rt_flip_posts(); + void rt_copy1col(int hx, int sx, int yl, int yh); + void rt_copy4cols(int sx, int yl, int yh); + void rt_shaded1col(int hx, int sx, int yl, int yh); + void rt_shaded4cols(int sx, int yl, int yh); + void rt_map1col(int hx, int sx, int yl, int yh); + void rt_add1col(int hx, int sx, int yl, int yh); + void rt_addclamp1col(int hx, int sx, int yl, int yh); + void rt_subclamp1col(int hx, int sx, int yl, int yh); + void rt_revsubclamp1col(int hx, int sx, int yl, int yh); + void rt_tlate1col(int hx, int sx, int yl, int yh); + void rt_tlateadd1col(int hx, int sx, int yl, int yh); + void rt_tlateaddclamp1col(int hx, int sx, int yl, int yh); + void rt_tlatesubclamp1col(int hx, int sx, int yl, int yh); + void rt_tlaterevsubclamp1col(int hx, int sx, int yl, int yh); + void rt_map4cols(int sx, int yl, int yh); + void rt_add4cols(int sx, int yl, int yh); + void rt_addclamp4cols(int sx, int yl, int yh); + void rt_subclamp4cols(int sx, int yl, int yh); + void rt_revsubclamp4cols(int sx, int yl, int yh); + void rt_tlate4cols(int sx, int yl, int yh); + void rt_tlateadd4cols(int sx, int yl, int yh); + void rt_tlateaddclamp4cols(int sx, int yl, int yh); + void rt_tlatesubclamp4cols(int sx, int yl, int yh); + void rt_tlaterevsubclamp4cols(int sx, int yl, int yh); + void R_DrawColumnHoriz(); + void R_DrawColumn(); + void R_DrawFuzzColumn(); + void R_DrawTranslatedColumn(); + void R_DrawShadedColumn(); + void R_FillColumn(); + void R_FillAddColumn(); + void R_FillAddClampColumn(); + void R_FillSubClampColumn(); + void R_FillRevSubClampColumn(); + void R_DrawAddColumn(); + void R_DrawTlatedAddColumn(); + void R_DrawAddClampColumn(); + void R_DrawAddClampTranslatedColumn(); + void R_DrawSubClampColumn(); + void R_DrawSubClampTranslatedColumn(); + void R_DrawRevSubClampColumn(); + void R_DrawRevSubClampTranslatedColumn(); + void R_DrawSpan(); + void R_DrawSpanMasked(); + void R_DrawSpanTranslucent(); + void R_DrawSpanMaskedTranslucent(); + void R_DrawSpanAddClamp(); + void R_DrawSpanMaskedAddClamp(); + void R_FillSpan(); + void R_DrawTiltedSpan(int y, int x1, int x2, const FVector3 &plane_sz, const FVector3 &plane_su, const FVector3 &plane_sv, bool plane_shade, int planeshade, float planelightfloat, fixed_t pviewx, fixed_t pviewy); + void R_DrawColoredSpan(int y, int x1, int x2); + void R_SetupDrawSlab(uint8_t *colormap); + void R_DrawSlab(int dx, fixed_t v, int dy, fixed_t vi, const uint8_t *vptr, uint8_t *p); + void R_DrawFogBoundary(int x1, int x2, short *uclip, short *dclip); + uint32_t vlinec1(); + void vlinec4(); + uint32_t mvlinec1(); + void mvlinec4(); + fixed_t tmvline1_add(); + void tmvline4_add(); + fixed_t tmvline1_addclamp(); + void tmvline4_addclamp(); + fixed_t tmvline1_subclamp(); + void tmvline4_subclamp(); + fixed_t tmvline1_revsubclamp(); + void tmvline4_revsubclamp(); + void R_FillColumnHoriz(); + void R_FillSpan(); + + inline uint32_t dovline1() { return vlinec1(); } + inline void dovline4() { vlinec4(); } + inline uint32_t domvline1() { return mvlinec1(); } + inline void domvline4() { mvlinec4(); } + + void setupvline(int fracbits); + void setupmvline(int fracbits); + void setuptmvline(int fracbits); + + void R_DrawSingleSkyCol1(uint32_t solid_top, uint32_t solid_bottom); + void R_DrawSingleSkyCol4(uint32_t solid_top, uint32_t solid_bottom); + void R_DrawDoubleSkyCol1(uint32_t solid_top, uint32_t solid_bottom); + void R_DrawDoubleSkyCol4(uint32_t solid_top, uint32_t solid_bottom); + + void R_SetColorMapLight(lighttable_t *base_colormap, float light, int shade); + void R_SetDSColorMapLight(lighttable_t *base_colormap, float light, int shade); + void R_SetTranslationMap(lighttable_t *translation); + + void R_SetupSpanBits(FTexture *tex); + void R_SetSpanColormap(lighttable_t *colormap); + void R_SetSpanSource(FTexture *tex); + + void R_MapTiltedPlane(int y, int x1); + void R_MapColoredPlane(int y, int x1); + void R_DrawParticle(vissprite_t *); } - - -#ifdef X86_ASM -#define rt_shaded4cols rt_shaded4cols_asm -#define rt_add4cols rt_add4cols_asm -#define rt_addclamp4cols rt_addclamp4cols_asm -#else -#define rt_shaded4cols rt_shaded4cols_c -#define rt_add4cols rt_add4cols_c -#define rt_addclamp4cols rt_addclamp4cols_c -#endif - -void rt_flip_posts(); -void rt_draw4cols (int sx); - -// [RH] Preps the temporary horizontal buffer. -void rt_initcols (BYTE *buffer=NULL); - -void R_DrawFogBoundary (int x1, int x2, short *uclip, short *dclip); - - -#ifdef X86_ASM - - void R_DrawShadedColumnP_C (void); -extern "C" void R_DrawSpanP_ASM (void); -extern "C" void R_DrawSpanMaskedP_ASM (void); - -void R_DrawColumnHorizP_C(void); - -#else - -void R_DrawShadedColumnP_C (void); -void R_DrawSpanP_C (void); -void R_DrawSpanMaskedP_C (void); - -#endif - -void R_DrawColumn(); -void R_DrawColumnHorizP_C(void); -void R_DrawTranslatedColumnP_C(void); -void R_DrawSpanTranslucent (void); -void R_DrawSpanMaskedTranslucent (void); - -void R_DrawTlatedLucentColumnP_C (void); -#define R_DrawTlatedLucentColumn R_DrawTlatedLucentColumnP_C - -void R_FillColumnP (void); -void R_FillColumnHorizP (void); -void R_FillSpan (void); - -#ifdef X86_ASM -#define R_SetupDrawSlab R_SetupDrawSlabA -#define R_DrawSlab R_DrawSlabA -#else -#define R_SetupDrawSlab R_SetupDrawSlabC -#define R_DrawSlab R_DrawSlabC -#endif - -extern "C" void R_SetupDrawSlab(const BYTE *colormap); -extern "C" void R_DrawSlab(int dx, fixed_t v, int dy, fixed_t vi, const BYTE *vptr, BYTE *p); - -extern "C" int ds_y; -extern "C" int ds_x1; -extern "C" int ds_x2; - -extern "C" lighttable_t* ds_colormap; - -extern "C" dsfixed_t ds_xfrac; -extern "C" dsfixed_t ds_yfrac; -extern "C" dsfixed_t ds_xstep; -extern "C" dsfixed_t ds_ystep; -extern "C" int ds_xbits; -extern "C" int ds_ybits; -extern "C" fixed_t ds_alpha; - -// start of a 64*64 tile image -extern "C" const BYTE* ds_source; - -extern "C" int ds_color; // [RH] For flat color (no texturing) - -extern BYTE shadetables[/*NUMCOLORMAPS*16*256*/]; -extern FDynamicColormap ShadeFakeColormap[16]; -extern BYTE identitymap[256]; -extern BYTE *dc_translation; - -// [RH] Added for muliresolution support -void R_InitShadeMaps(); -void R_InitFuzzTable (int fuzzoff); - -// [RH] Consolidate column drawer selection -enum ESPSResult -{ - DontDraw, // not useful to draw this - DoDraw0, // draw this as if r_columnmethod is 0 - DoDraw1, // draw this as if r_columnmethod is 1 -}; -ESPSResult R_SetPatchStyle (FRenderStyle style, fixed_t alpha, int translation, DWORD color); -inline ESPSResult R_SetPatchStyle(FRenderStyle style, float alpha, int translation, DWORD color) -{ - return R_SetPatchStyle(style, FLOAT2FIXED(alpha), translation, color); -} - -// Call this after finished drawing the current thing, in case its -// style was STYLE_Shade -void R_FinishSetPatchStyle (); - -// transmaskwallscan calls this to find out what column drawers to use -bool R_GetTransMaskDrawers (fixed_t (**tmvline1)(), void (**tmvline4)()); - -// Retrieve column data for wallscan. Should probably be removed -// to just use the texture's GetColumn() method. It just exists -// for double-layer skies. -const BYTE *R_GetColumn (FTexture *tex, int col); -void wallscan (int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat, const BYTE *(*getcol)(FTexture *tex, int col)=R_GetColumn); - -// maskwallscan is exactly like wallscan but does not draw anything where the texture is color 0. -void maskwallscan (int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat, const BYTE *(*getcol)(FTexture *tex, int col)=R_GetColumn); - -// transmaskwallscan is like maskwallscan, but it can also blend to the background -void transmaskwallscan (int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat, const BYTE *(*getcol)(FTexture *tex, int col)=R_GetColumn); - -void R_DrawSingleSkyCol1(uint32_t solid_top, uint32_t solid_bottom); -void R_DrawSingleSkyCol4(uint32_t solid_top, uint32_t solid_bottom); -void R_DrawDoubleSkyCol1(uint32_t solid_top, uint32_t solid_bottom); -void R_DrawDoubleSkyCol4(uint32_t solid_top, uint32_t solid_bottom); - -#endif diff --git a/src/r_draw_pal.cpp b/src/r_draw_pal.cpp new file mode 100644 index 0000000000..0264dcbf9e --- /dev/null +++ b/src/r_draw_pal.cpp @@ -0,0 +1,2593 @@ + +#include "templates.h" +#include "doomtype.h" +#include "doomdef.h" +#include "r_defs.h" +#include "r_draw.h" +#include "r_main.h" +#include "r_things.h" +#include "v_video.h" +#include "r_draw_pal.h" + +/* + [RH] This translucency algorithm is based on DOSDoom 0.65's, but uses + a 32k RGB table instead of an 8k one. At least on my machine, it's + slightly faster (probably because it uses only one shift instead of + two), and it looks considerably less green at the ends of the + translucency range. The extra size doesn't appear to be an issue. + + The following note is from DOSDoom 0.65: + + New translucency algorithm, by Erik Sandberg: + + Basically, we compute the red, green and blue values for each pixel, and + then use a RGB table to check which one of the palette colours that best + represents those RGB values. The RGB table is 8k big, with 4 R-bits, + 5 G-bits and 4 B-bits. A 4k table gives a bit too bad precision, and a 32k + table takes up more memory and results in more cache misses, so an 8k + table seemed to be quite ultimate. + + The computation of the RGB for each pixel is accelerated by using two + 1k tables for each translucency level. + The xth element of one of these tables contains the r, g and b values for + the colour x, weighted for the current translucency level (for example, + the weighted rgb values for background colour at 75% translucency are 1/4 + of the original rgb values). The rgb values are stored as three + low-precision fixed point values, packed into one long per colour: + Bit 0-4: Frac part of blue (5 bits) + Bit 5-8: Int part of blue (4 bits) + Bit 9-13: Frac part of red (5 bits) + Bit 14-17: Int part of red (4 bits) + Bit 18-22: Frac part of green (5 bits) + Bit 23-27: Int part of green (5 bits) + Bit 28-31: All zeros (4 bits) + + The point of this format is that the two colours now can be added, and + then be converted to a RGB table index very easily: First, we just set + all the frac bits and the four upper zero bits to 1. It's now possible + to get the RGB table index by anding the current value >> 5 with the + current value >> 19. When asm-optimised, this should be the fastest + algorithm that uses RGB tables. +*/ + +namespace swrenderer +{ + PalWall1Command::PalWall1Command() + { + using namespace drawerargs; + + _iscale = dc_iscale; + _texturefrac = dc_texturefrac; + _colormap = dc_colormap; + _count = dc_count; + _source = dc_source; + _dest = dc_dest; + _vlinebits = vlinebits; + _mvlinebits = mvlinebits; + _tmvlinebits = tmvlinebits; + _pitch = dc_pitch; + _srcblend = dc_srcblend; + _destblend = dc_destblend; + } + + PalWall4Command::PalWall4Command() + { + using namespace drawerargs; + + _dest = dc_dest; + _count = dc_count; + _pitch = dc_pitch; + _vlinebits = vlinebits; + _mvlinebits = mvlinebits; + _tmvlinebits = tmvlinebits; + for (int col = 0; col < 4; col++) + { + _palookupoffse[col] = palookupoffse[col]; + _bufplce[col] = bufplce[col]; + _vince[col] = vince[col]; + _vplce[col] = vplce[col]; + } + _srcblend = dc_srcblend; + _destblend = dc_destblend; + } + + void DrawWall1PalCommand::Execute(DrawerThread *thread) + { + uint32_t fracstep = _iscale; + uint32_t frac = _texturefrac; + uint8_t *colormap = _colormap; + int count = _count; + const uint8_t *source = _source; + uint8_t *dest = _dest; + int bits = _vlinebits; + int pitch = _pitch; + + count = thread->count_for_thread(_dest_y, count); + if (count <= 0) + return; + + dest = thread->dest_for_thread(_dest_y, pitch, dest); + frac += fracstep * thread->skipped_by_thread(_dest_y); + fracstep *= thread->num_cores; + pitch *= thread->num_cores; + + do + { + *dest = colormap[source[frac >> bits]]; + frac += fracstep; + dest += pitch; + } while (--count); + } + + void DrawWall4PalCommand::Execute(DrawerThread *thread) + { + uint8_t *dest = _dest; + int count = _count; + int bits = _vlinebits; + uint32_t place; + auto pal0 = _palookupoffse[0]; + auto pal1 = _palookupoffse[1]; + auto pal2 = _palookupoffse[2]; + auto pal3 = _palookupoffse[3]; + auto buf0 = _bufplce[0]; + auto buf1 = _bufplce[1]; + auto buf2 = _bufplce[2]; + auto buf3 = _bufplce[3]; + auto vince0 = _vince[0]; + auto vince1 = _vince[1]; + auto vince2 = _vince[2]; + auto vince3 = _vince[3]; + auto vplce0 = _vplce[0]; + auto vplce1 = _vplce[1]; + auto vplce2 = _vplce[2]; + auto vplce3 = _vplce[3]; + auto pitch = _pitch; + + count = thread->count_for_thread(_dest_y, count); + if (count <= 0) + return; + + int skipped = thread->skipped_by_thread(_dest_y); + dest = thread->dest_for_thread(_dest_y, pitch, dest); + vplce0 += vince0 * skipped; + vplce1 += vince1 * skipped; + vplce2 += vince2 * skipped; + vplce3 += vince3 * skipped; + vince0 *= thread->num_cores; + vince1 *= thread->num_cores; + vince2 *= thread->num_cores; + vince3 *= thread->num_cores; + pitch *= thread->num_cores; + + do + { + dest[0] = pal0[buf0[(place = vplce0) >> bits]]; vplce0 = place + vince0; + dest[1] = pal1[buf1[(place = vplce1) >> bits]]; vplce1 = place + vince1; + dest[2] = pal2[buf2[(place = vplce2) >> bits]]; vplce2 = place + vince2; + dest[3] = pal3[buf3[(place = vplce3) >> bits]]; vplce3 = place + vince3; + dest += pitch; + } while (--count); + } + + void DrawWallMasked1PalCommand::Execute(DrawerThread *thread) + { + uint32_t fracstep = _iscale; + uint32_t frac = _texturefrac; + uint8_t *colormap = _colormap; + int count = _count; + const uint8_t *source = _source; + uint8_t *dest = _dest; + int bits = _mvlinebits; + int pitch = _pitch; + + count = thread->count_for_thread(_dest_y, count); + if (count <= 0) + return; + + dest = thread->dest_for_thread(_dest_y, pitch, dest); + frac += fracstep * thread->skipped_by_thread(_dest_y); + fracstep *= thread->num_cores; + pitch *= thread->num_cores; + + do + { + uint8_t pix = source[frac >> bits]; + if (pix != 0) + { + *dest = colormap[pix]; + } + frac += fracstep; + dest += pitch; + } while (--count); + } + + void DrawWallMasked4PalCommand::Execute(DrawerThread *thread) + { + uint8_t *dest = _dest; + int count = _count; + int bits = _mvlinebits; + uint32_t place; + auto pal0 = _palookupoffse[0]; + auto pal1 = _palookupoffse[1]; + auto pal2 = _palookupoffse[2]; + auto pal3 = _palookupoffse[3]; + auto buf0 = _bufplce[0]; + auto buf1 = _bufplce[1]; + auto buf2 = _bufplce[2]; + auto buf3 = _bufplce[3]; + auto vince0 = _vince[0]; + auto vince1 = _vince[1]; + auto vince2 = _vince[2]; + auto vince3 = _vince[3]; + auto vplce0 = _vplce[0]; + auto vplce1 = _vplce[1]; + auto vplce2 = _vplce[2]; + auto vplce3 = _vplce[3]; + auto pitch = _pitch; + + count = thread->count_for_thread(_dest_y, count); + if (count <= 0) + return; + + int skipped = thread->skipped_by_thread(_dest_y); + dest = thread->dest_for_thread(_dest_y, pitch, dest); + vplce0 += vince0 * skipped; + vplce1 += vince1 * skipped; + vplce2 += vince2 * skipped; + vplce3 += vince3 * skipped; + vince0 *= thread->num_cores; + vince1 *= thread->num_cores; + vince2 *= thread->num_cores; + vince3 *= thread->num_cores; + pitch *= thread->num_cores; + + do + { + uint8_t pix; + + pix = buf0[(place = vplce0) >> bits]; if (pix) dest[0] = pal0[pix]; vplce0 = place + vince0; + pix = buf1[(place = vplce1) >> bits]; if (pix) dest[1] = pal1[pix]; vplce1 = place + vince1; + pix = buf2[(place = vplce2) >> bits]; if (pix) dest[2] = pal2[pix]; vplce2 = place + vince2; + pix = buf3[(place = vplce3) >> bits]; if (pix) dest[3] = pal3[pix]; vplce3 = place + vince3; + dest += pitch; + } while (--count); + } + + void DrawWallAdd1PalCommand::Execute(DrawerThread *thread) + { + uint32_t fracstep = _iscale; + uint32_t frac = _texturefrac; + uint8_t *colormap = _colormap; + int count = _count; + const uint8_t *source = _source; + uint8_t *dest = _dest; + int bits = _tmvlinebits; + int pitch = _pitch; + + uint32_t *fg2rgb = _srcblend; + uint32_t *bg2rgb = _destblend; + + count = thread->count_for_thread(_dest_y, count); + if (count <= 0) + return; + + dest = thread->dest_for_thread(_dest_y, pitch, dest); + frac += fracstep * thread->skipped_by_thread(_dest_y); + fracstep *= thread->num_cores; + pitch *= thread->num_cores; + + do + { + uint8_t pix = source[frac >> bits]; + if (pix != 0) + { + uint32_t fg = fg2rgb[colormap[pix]]; + uint32_t bg = bg2rgb[*dest]; + fg = (fg + bg) | 0x1f07c1f; + *dest = RGB32k.All[fg & (fg >> 15)]; + } + frac += fracstep; + dest += pitch; + } while (--count); + } + + void DrawWallAdd4PalCommand::Execute(DrawerThread *thread) + { + uint8_t *dest = _dest; + int count = _count; + int bits = _tmvlinebits; + + uint32_t *fg2rgb = _srcblend; + uint32_t *bg2rgb = _destblend; + + uint32_t vplce[4] = { _vplce[0], _vplce[1], _vplce[2], _vplce[3] }; + uint32_t vince[4] = { _vince[0], _vince[1], _vince[2], _vince[3] }; + + count = thread->count_for_thread(_dest_y, count); + if (count <= 0) + return; + + int pitch = _pitch; + int skipped = thread->skipped_by_thread(_dest_y); + dest = thread->dest_for_thread(_dest_y, pitch, dest); + for (int i = 0; i < 4; i++) + { + vplce[i] += vince[i] * skipped; + vince[i] *= thread->num_cores; + } + pitch *= thread->num_cores; + + do + { + for (int i = 0; i < 4; ++i) + { + uint8_t pix = _bufplce[i][vplce[i] >> bits]; + if (pix != 0) + { + uint32_t fg = fg2rgb[_palookupoffse[i][pix]]; + uint32_t bg = bg2rgb[dest[i]]; + fg = (fg + bg) | 0x1f07c1f; + dest[i] = RGB32k.All[fg & (fg >> 15)]; + } + vplce[i] += vince[i]; + } + dest += pitch; + } while (--count); + } + + void DrawWallAddClamp1PalCommand::Execute(DrawerThread *thread) + { + uint32_t fracstep = _iscale; + uint32_t frac = _texturefrac; + uint8_t *colormap = _colormap; + int count = _count; + const uint8_t *source = _source; + uint8_t *dest = _dest; + int bits = _tmvlinebits; + int pitch = _pitch; + + uint32_t *fg2rgb = _srcblend; + uint32_t *bg2rgb = _destblend; + + count = thread->count_for_thread(_dest_y, count); + if (count <= 0) + return; + + dest = thread->dest_for_thread(_dest_y, pitch, dest); + frac += fracstep * thread->skipped_by_thread(_dest_y); + fracstep *= thread->num_cores; + pitch *= thread->num_cores; + + do + { + uint8_t pix = source[frac >> bits]; + if (pix != 0) + { + uint32_t a = fg2rgb[colormap[pix]] + bg2rgb[*dest]; + uint32_t b = a; + + a |= 0x01f07c1f; + b &= 0x40100400; + a &= 0x3fffffff; + b = b - (b >> 5); + a |= b; + *dest = RGB32k.All[a & (a >> 15)]; + } + frac += fracstep; + dest += pitch; + } while (--count); + } + + void DrawWallAddClamp4PalCommand::Execute(DrawerThread *thread) + { + uint8_t *dest = _dest; + int count = _count; + int bits = _tmvlinebits; + + uint32_t *fg2rgb = _srcblend; + uint32_t *bg2rgb = _destblend; + + uint32_t vplce[4] = { _vplce[0], _vplce[1], _vplce[2], _vplce[3] }; + uint32_t vince[4] = { _vince[0], _vince[1], _vince[2], _vince[3] }; + + count = thread->count_for_thread(_dest_y, count); + if (count <= 0) + return; + + int pitch = _pitch; + int skipped = thread->skipped_by_thread(_dest_y); + dest = thread->dest_for_thread(_dest_y, pitch, dest); + for (int i = 0; i < 4; i++) + { + vplce[i] += vince[i] * skipped; + vince[i] *= thread->num_cores; + } + pitch *= thread->num_cores; + + do + { + for (int i = 0; i < 4; ++i) + { + uint8_t pix = _bufplce[i][vplce[i] >> bits]; + if (pix != 0) + { + uint32_t a = fg2rgb[_palookupoffse[i][pix]] + bg2rgb[dest[i]]; + uint32_t b = a; + + a |= 0x01f07c1f; + b &= 0x40100400; + a &= 0x3fffffff; + b = b - (b >> 5); + a |= b; + dest[i] = RGB32k.All[a & (a >> 15)]; + } + vplce[i] += vince[i]; + } + dest += pitch; + } while (--count); + } + + void DrawWallSubClamp1PalCommand::Execute(DrawerThread *thread) + { + uint32_t fracstep = _iscale; + uint32_t frac = _texturefrac; + uint8_t *colormap = _colormap; + int count = _count; + const uint8_t *source = _source; + uint8_t *dest = _dest; + int bits = _tmvlinebits; + int pitch = _pitch; + + uint32_t *fg2rgb = _srcblend; + uint32_t *bg2rgb = _destblend; + + count = thread->count_for_thread(_dest_y, count); + if (count <= 0) + return; + + dest = thread->dest_for_thread(_dest_y, pitch, dest); + frac += fracstep * thread->skipped_by_thread(_dest_y); + fracstep *= thread->num_cores; + pitch *= thread->num_cores; + + do + { + uint8_t pix = source[frac >> bits]; + if (pix != 0) + { + uint32_t a = (fg2rgb[colormap[pix]] | 0x40100400) - bg2rgb[*dest]; + uint32_t b = a; + + b &= 0x40100400; + b = b - (b >> 5); + a &= b; + a |= 0x01f07c1f; + *dest = RGB32k.All[a & (a >> 15)]; + } + frac += fracstep; + dest += pitch; + } while (--count); + } + + void DrawWallSubClamp4PalCommand::Execute(DrawerThread *thread) + { + uint8_t *dest = _dest; + int count = _count; + int bits = _tmvlinebits; + + uint32_t *fg2rgb = _srcblend; + uint32_t *bg2rgb = _destblend; + + uint32_t vplce[4] = { _vplce[0], _vplce[1], _vplce[2], _vplce[3] }; + uint32_t vince[4] = { _vince[0], _vince[1], _vince[2], _vince[3] }; + + count = thread->count_for_thread(_dest_y, count); + if (count <= 0) + return; + + int pitch = _pitch; + int skipped = thread->skipped_by_thread(_dest_y); + dest = thread->dest_for_thread(_dest_y, pitch, dest); + for (int i = 0; i < 4; i++) + { + vplce[i] += vince[i] * skipped; + vince[i] *= thread->num_cores; + } + pitch *= thread->num_cores; + + do + { + for (int i = 0; i < 4; ++i) + { + uint8_t pix = _bufplce[i][vplce[i] >> bits]; + if (pix != 0) + { + uint32_t a = (fg2rgb[_palookupoffse[i][pix]] | 0x40100400) - bg2rgb[dest[i]]; + uint32_t b = a; + + b &= 0x40100400; + b = b - (b >> 5); + a &= b; + a |= 0x01f07c1f; + dest[i] = RGB32k.All[a & (a >> 15)]; + } + vplce[i] += vince[i]; + } + dest += pitch; + } while (--count); + } + + void DrawWallRevSubClamp1PalCommand::Execute(DrawerThread *thread) + { + uint32_t fracstep = _iscale; + uint32_t frac = _texturefrac; + uint8_t *colormap = _colormap; + int count = _count; + const uint8_t *source = _source; + uint8_t *dest = _dest; + int bits = _tmvlinebits; + int pitch = _pitch; + + uint32_t *fg2rgb = _srcblend; + uint32_t *bg2rgb = _destblend; + + count = thread->count_for_thread(_dest_y, count); + if (count <= 0) + return; + + dest = thread->dest_for_thread(_dest_y, pitch, dest); + frac += fracstep * thread->skipped_by_thread(_dest_y); + fracstep *= thread->num_cores; + pitch *= thread->num_cores; + + do + { + uint8_t pix = source[frac >> bits]; + if (pix != 0) + { + uint32_t a = (bg2rgb[*dest] | 0x40100400) - fg2rgb[colormap[pix]]; + uint32_t b = a; + + b &= 0x40100400; + b = b - (b >> 5); + a &= b; + a |= 0x01f07c1f; + *dest = RGB32k.All[a & (a >> 15)]; + } + frac += fracstep; + dest += pitch; + } while (--count); + } + + void DrawWallRevSubClamp4PalCommand::Execute(DrawerThread *thread) + { + uint8_t *dest = _dest; + int count = _count; + int bits = _tmvlinebits; + + uint32_t *fg2rgb = _srcblend; + uint32_t *bg2rgb = _destblend; + + uint32_t vplce[4] = { _vplce[0], _vplce[1], _vplce[2], _vplce[3] }; + uint32_t vince[4] = { _vince[0], _vince[1], _vince[2], _vince[3] }; + + count = thread->count_for_thread(_dest_y, count); + if (count <= 0) + return; + + int pitch = _pitch; + int skipped = thread->skipped_by_thread(_dest_y); + dest = thread->dest_for_thread(_dest_y, pitch, dest); + for (int i = 0; i < 4; i++) + { + vplce[i] += vince[i] * skipped; + vince[i] *= thread->num_cores; + } + pitch *= thread->num_cores; + + do + { + for (int i = 0; i < 4; ++i) + { + uint8_t pix = _bufplce[i][vplce[i] >> bits]; + if (pix != 0) + { + uint32_t a = (bg2rgb[dest[i]] | 0x40100400) - fg2rgb[_palookupoffse[i][pix]]; + uint32_t b = a; + + b &= 0x40100400; + b = b - (b >> 5); + a &= b; + a |= 0x01f07c1f; + dest[i] = RGB32k.All[a & (a >> 15)]; + } + vplce[i] += vince[i]; + } + dest += _pitch; + } while (--count); + } + + ///////////////////////////////////////////////////////////////////////// + + PalSkyCommand::PalSkyCommand(uint32_t solid_top, uint32_t solid_bottom) : solid_top(solid_top), solid_bottom(solid_bottom) + { + using namespace drawerargs; + + _dest = dc_dest; + _count = dc_count; + _pitch = dc_pitch; + for (int col = 0; col < 4; col++) + { + _bufplce[col] = bufplce[col]; + _bufplce2[col] = bufplce2[col]; + _bufheight[col] = bufheight[col]; + _vince[col] = vince[col]; + _vplce[col] = vplce[col]; + } + } + + void DrawSingleSky1PalCommand::Execute(DrawerThread *thread) + { + uint8_t *dest = _dest; + int count = _count; + int pitch = _pitch; + const uint8_t *source0 = _bufplce[0]; + int textureheight0 = _bufheight[0]; + + int32_t frac = _vplce[0]; + int32_t fracstep = _vince[0]; + + int start_fade = 2; // How fast it should fade out + + int solid_top_r = RPART(solid_top); + int solid_top_g = GPART(solid_top); + int solid_top_b = BPART(solid_top); + int solid_bottom_r = RPART(solid_bottom); + int solid_bottom_g = GPART(solid_bottom); + int solid_bottom_b = BPART(solid_bottom); + + count = thread->count_for_thread(_dest_y, count); + if (count <= 0) + return; + + int skipped = thread->skipped_by_thread(_dest_y); + dest = thread->dest_for_thread(_dest_y, pitch, dest); + frac += fracstep * skipped; + fracstep *= thread->num_cores; + pitch *= thread->num_cores; + + for (int index = 0; index < count; index++) + { + uint32_t sample_index = (((((uint32_t)frac) << 8) >> FRACBITS) * textureheight0) >> FRACBITS; + uint8_t fg = source0[sample_index]; + + int alpha_top = MAX(MIN(frac >> (16 - start_fade), 256), 0); + int alpha_bottom = MAX(MIN(((2 << 24) - frac) >> (16 - start_fade), 256), 0); + + if (alpha_top == 256 && alpha_bottom == 256) + { + *dest = fg; + } + else + { + int inv_alpha_top = 256 - alpha_top; + int inv_alpha_bottom = 256 - alpha_bottom; + + const auto &c = GPalette.BaseColors[fg]; + int c_red = c.r; + int c_green = c.g; + int c_blue = c.b; + c_red = (c_red * alpha_top + solid_top_r * inv_alpha_top) >> 8; + c_green = (c_green * alpha_top + solid_top_g * inv_alpha_top) >> 8; + c_blue = (c_blue * alpha_top + solid_top_b * inv_alpha_top) >> 8; + c_red = (c_red * alpha_bottom + solid_bottom_r * inv_alpha_bottom) >> 8; + c_green = (c_green * alpha_bottom + solid_bottom_g * inv_alpha_bottom) >> 8; + c_blue = (c_blue * alpha_bottom + solid_bottom_b * inv_alpha_bottom) >> 8; + *dest = RGB32k.RGB[(c_red >> 3)][(c_green >> 3)][(c_blue >> 3)]; + } + + frac += fracstep; + dest += pitch; + } + } + + void DrawSingleSky4PalCommand::Execute(DrawerThread *thread) + { + uint8_t *dest = _dest; + int count = _count; + int pitch = _pitch; + const uint8_t *source0[4] = { _bufplce[0], _bufplce[1], _bufplce[2], _bufplce[3] }; + int textureheight0 = _bufheight[0]; + const uint32_t *palette = (const uint32_t *)GPalette.BaseColors; + int32_t frac[4] = { (int32_t)_vplce[0], (int32_t)_vplce[1], (int32_t)_vplce[2], (int32_t)_vplce[3] }; + int32_t fracstep[4] = { (int32_t)_vince[0], (int32_t)_vince[1], (int32_t)_vince[2], (int32_t)_vince[3] }; + uint8_t output[4]; + + int start_fade = 2; // How fast it should fade out + + int solid_top_r = RPART(solid_top); + int solid_top_g = GPART(solid_top); + int solid_top_b = BPART(solid_top); + int solid_bottom_r = RPART(solid_bottom); + int solid_bottom_g = GPART(solid_bottom); + int solid_bottom_b = BPART(solid_bottom); + uint32_t solid_top_fill = RGB32k.RGB[(solid_top_r >> 3)][(solid_top_g >> 3)][(solid_top_b >> 3)]; + uint32_t solid_bottom_fill = RGB32k.RGB[(solid_bottom_r >> 3)][(solid_bottom_g >> 3)][(solid_bottom_b >> 3)]; + solid_top_fill = (solid_top_fill << 24) | (solid_top_fill << 16) | (solid_top_fill << 8) | solid_top_fill; + solid_bottom_fill = (solid_bottom_fill << 24) | (solid_bottom_fill << 16) | (solid_bottom_fill << 8) | solid_bottom_fill; + + // Find bands for top solid color, top fade, center textured, bottom fade, bottom solid color: + int fade_length = (1 << (24 - start_fade)); + int start_fadetop_y = (-frac[0]) / fracstep[0]; + int end_fadetop_y = (fade_length - frac[0]) / fracstep[0]; + int start_fadebottom_y = ((2 << 24) - fade_length - frac[0]) / fracstep[0]; + int end_fadebottom_y = ((2 << 24) - frac[0]) / fracstep[0]; + for (int col = 1; col < 4; col++) + { + start_fadetop_y = MIN(start_fadetop_y, (-frac[0]) / fracstep[0]); + end_fadetop_y = MAX(end_fadetop_y, (fade_length - frac[0]) / fracstep[0]); + start_fadebottom_y = MIN(start_fadebottom_y, ((2 << 24) - fade_length - frac[0]) / fracstep[0]); + end_fadebottom_y = MAX(end_fadebottom_y, ((2 << 24) - frac[0]) / fracstep[0]); + } + start_fadetop_y = clamp(start_fadetop_y, 0, count); + end_fadetop_y = clamp(end_fadetop_y, 0, count); + start_fadebottom_y = clamp(start_fadebottom_y, 0, count); + end_fadebottom_y = clamp(end_fadebottom_y, 0, count); + + int skipped = thread->skipped_by_thread(_dest_y); + dest = thread->dest_for_thread(_dest_y, pitch, dest); + for (int col = 0; col < 4; col++) + { + frac[col] += fracstep[col] * skipped; + fracstep[col] *= thread->num_cores; + } + pitch *= thread->num_cores; + int num_cores = thread->num_cores; + int index = skipped; + + // Top solid color: + while (index < start_fadetop_y) + { + *((uint32_t*)dest) = solid_top_fill; + dest += pitch; + for (int col = 0; col < 4; col++) + frac[col] += fracstep[col]; + index += num_cores; + } + + // Top fade: + while (index < end_fadetop_y) + { + for (int col = 0; col < 4; col++) + { + uint32_t sample_index = (((((uint32_t)frac[col]) << 8) >> FRACBITS) * textureheight0) >> FRACBITS; + uint8_t fg = source0[col][sample_index]; + + uint32_t c = palette[fg]; + int alpha_top = MAX(MIN(frac[col] >> (16 - start_fade), 256), 0); + int inv_alpha_top = 256 - alpha_top; + int c_red = RPART(c); + int c_green = GPART(c); + int c_blue = BPART(c); + c_red = (c_red * alpha_top + solid_top_r * inv_alpha_top) >> 8; + c_green = (c_green * alpha_top + solid_top_g * inv_alpha_top) >> 8; + c_blue = (c_blue * alpha_top + solid_top_b * inv_alpha_top) >> 8; + output[col] = RGB32k.RGB[(c_red >> 3)][(c_green >> 3)][(c_blue >> 3)]; + + frac[col] += fracstep[col]; + } + *((uint32_t*)dest) = *((uint32_t*)output); + dest += pitch; + index += num_cores; + } + + // Textured center: + while (index < start_fadebottom_y) + { + for (int col = 0; col < 4; col++) + { + uint32_t sample_index = (((((uint32_t)frac[col]) << 8) >> FRACBITS) * textureheight0) >> FRACBITS; + output[col] = source0[col][sample_index]; + + frac[col] += fracstep[col]; + } + + *((uint32_t*)dest) = *((uint32_t*)output); + dest += pitch; + index += num_cores; + } + + // Fade bottom: + while (index < end_fadebottom_y) + { + for (int col = 0; col < 4; col++) + { + uint32_t sample_index = (((((uint32_t)frac[col]) << 8) >> FRACBITS) * textureheight0) >> FRACBITS; + uint8_t fg = source0[col][sample_index]; + + uint32_t c = palette[fg]; + int alpha_bottom = MAX(MIN(((2 << 24) - frac[col]) >> (16 - start_fade), 256), 0); + int inv_alpha_bottom = 256 - alpha_bottom; + int c_red = RPART(c); + int c_green = GPART(c); + int c_blue = BPART(c); + c_red = (c_red * alpha_bottom + solid_bottom_r * inv_alpha_bottom) >> 8; + c_green = (c_green * alpha_bottom + solid_bottom_g * inv_alpha_bottom) >> 8; + c_blue = (c_blue * alpha_bottom + solid_bottom_b * inv_alpha_bottom) >> 8; + output[col] = RGB32k.RGB[(c_red >> 3)][(c_green >> 3)][(c_blue >> 3)]; + + frac[col] += fracstep[col]; + } + *((uint32_t*)dest) = *((uint32_t*)output); + dest += pitch; + index += num_cores; + } + + // Bottom solid color: + while (index < count) + { + *((uint32_t*)dest) = solid_bottom_fill; + dest += pitch; + index += num_cores; + } + } + + void DrawDoubleSky1PalCommand::Execute(DrawerThread *thread) + { + uint8_t *dest = _dest; + int count = _count; + int pitch = _pitch; + const uint8_t *source0 = _bufplce[0]; + const uint8_t *source1 = _bufplce2[0]; + int textureheight0 = _bufheight[0]; + uint32_t maxtextureheight1 = _bufheight[1] - 1; + + int32_t frac = _vplce[0]; + int32_t fracstep = _vince[0]; + + int start_fade = 2; // How fast it should fade out + + int solid_top_r = RPART(solid_top); + int solid_top_g = GPART(solid_top); + int solid_top_b = BPART(solid_top); + int solid_bottom_r = RPART(solid_bottom); + int solid_bottom_g = GPART(solid_bottom); + int solid_bottom_b = BPART(solid_bottom); + + count = thread->count_for_thread(_dest_y, count); + if (count <= 0) + return; + + int skipped = thread->skipped_by_thread(_dest_y); + dest = thread->dest_for_thread(_dest_y, pitch, dest); + frac += fracstep * skipped; + fracstep *= thread->num_cores; + pitch *= thread->num_cores; + + for (int index = 0; index < count; index++) + { + uint32_t sample_index = (((((uint32_t)frac) << 8) >> FRACBITS) * textureheight0) >> FRACBITS; + uint8_t fg = source0[sample_index]; + if (fg == 0) + { + uint32_t sample_index2 = MIN(sample_index, maxtextureheight1); + fg = source1[sample_index2]; + } + + int alpha_top = MAX(MIN(frac >> (16 - start_fade), 256), 0); + int alpha_bottom = MAX(MIN(((2 << 24) - frac) >> (16 - start_fade), 256), 0); + + if (alpha_top == 256 && alpha_bottom == 256) + { + *dest = fg; + } + else + { + int inv_alpha_top = 256 - alpha_top; + int inv_alpha_bottom = 256 - alpha_bottom; + + const auto &c = GPalette.BaseColors[fg]; + int c_red = c.r; + int c_green = c.g; + int c_blue = c.b; + c_red = (c_red * alpha_top + solid_top_r * inv_alpha_top) >> 8; + c_green = (c_green * alpha_top + solid_top_g * inv_alpha_top) >> 8; + c_blue = (c_blue * alpha_top + solid_top_b * inv_alpha_top) >> 8; + c_red = (c_red * alpha_bottom + solid_bottom_r * inv_alpha_bottom) >> 8; + c_green = (c_green * alpha_bottom + solid_bottom_g * inv_alpha_bottom) >> 8; + c_blue = (c_blue * alpha_bottom + solid_bottom_b * inv_alpha_bottom) >> 8; + *dest = RGB32k.RGB[(c_red >> 3)][(c_green >> 3)][(c_blue >> 3)]; + } + + frac += fracstep; + dest += pitch; + } + } + + void DrawDoubleSky4PalCommand::Execute(DrawerThread *thread) + { + uint8_t *dest = _dest; + int count = _count; + int pitch = _pitch; + const uint8_t *source0[4] = { _bufplce[0], _bufplce[1], _bufplce[2], _bufplce[3] }; + const uint8_t *source1[4] = { _bufplce2[0], _bufplce2[1], _bufplce2[2], _bufplce2[3] }; + int textureheight0 = _bufheight[0]; + uint32_t maxtextureheight1 = _bufheight[1] - 1; + const uint32_t *palette = (const uint32_t *)GPalette.BaseColors; + int32_t frac[4] = { (int32_t)_vplce[0], (int32_t)_vplce[1], (int32_t)_vplce[2], (int32_t)_vplce[3] }; + int32_t fracstep[4] = { (int32_t)_vince[0], (int32_t)_vince[1], (int32_t)_vince[2], (int32_t)_vince[3] }; + uint8_t output[4]; + + int start_fade = 2; // How fast it should fade out + + int solid_top_r = RPART(solid_top); + int solid_top_g = GPART(solid_top); + int solid_top_b = BPART(solid_top); + int solid_bottom_r = RPART(solid_bottom); + int solid_bottom_g = GPART(solid_bottom); + int solid_bottom_b = BPART(solid_bottom); + uint32_t solid_top_fill = RGB32k.RGB[(solid_top_r >> 3)][(solid_top_g >> 3)][(solid_top_b >> 3)]; + uint32_t solid_bottom_fill = RGB32k.RGB[(solid_bottom_r >> 3)][(solid_bottom_g >> 3)][(solid_bottom_b >> 3)]; + solid_top_fill = (solid_top_fill << 24) | (solid_top_fill << 16) | (solid_top_fill << 8) | solid_top_fill; + solid_bottom_fill = (solid_bottom_fill << 24) | (solid_bottom_fill << 16) | (solid_bottom_fill << 8) | solid_bottom_fill; + + // Find bands for top solid color, top fade, center textured, bottom fade, bottom solid color: + int fade_length = (1 << (24 - start_fade)); + int start_fadetop_y = (-frac[0]) / fracstep[0]; + int end_fadetop_y = (fade_length - frac[0]) / fracstep[0]; + int start_fadebottom_y = ((2 << 24) - fade_length - frac[0]) / fracstep[0]; + int end_fadebottom_y = ((2 << 24) - frac[0]) / fracstep[0]; + for (int col = 1; col < 4; col++) + { + start_fadetop_y = MIN(start_fadetop_y, (-frac[0]) / fracstep[0]); + end_fadetop_y = MAX(end_fadetop_y, (fade_length - frac[0]) / fracstep[0]); + start_fadebottom_y = MIN(start_fadebottom_y, ((2 << 24) - fade_length - frac[0]) / fracstep[0]); + end_fadebottom_y = MAX(end_fadebottom_y, ((2 << 24) - frac[0]) / fracstep[0]); + } + start_fadetop_y = clamp(start_fadetop_y, 0, count); + end_fadetop_y = clamp(end_fadetop_y, 0, count); + start_fadebottom_y = clamp(start_fadebottom_y, 0, count); + end_fadebottom_y = clamp(end_fadebottom_y, 0, count); + + int skipped = thread->skipped_by_thread(_dest_y); + dest = thread->dest_for_thread(_dest_y, pitch, dest); + for (int col = 0; col < 4; col++) + { + frac[col] += fracstep[col] * skipped; + fracstep[col] *= thread->num_cores; + } + pitch *= thread->num_cores; + int num_cores = thread->num_cores; + int index = skipped; + + // Top solid color: + while (index < start_fadetop_y) + { + *((uint32_t*)dest) = solid_top_fill; + dest += pitch; + for (int col = 0; col < 4; col++) + frac[col] += fracstep[col]; + index += num_cores; + } + + // Top fade: + while (index < end_fadetop_y) + { + for (int col = 0; col < 4; col++) + { + uint32_t sample_index = (((((uint32_t)frac[col]) << 8) >> FRACBITS) * textureheight0) >> FRACBITS; + uint8_t fg = source0[col][sample_index]; + if (fg == 0) + { + uint32_t sample_index2 = MIN(sample_index, maxtextureheight1); + fg = source1[col][sample_index2]; + } + output[col] = fg; + + uint32_t c = palette[fg]; + int alpha_top = MAX(MIN(frac[col] >> (16 - start_fade), 256), 0); + int inv_alpha_top = 256 - alpha_top; + int c_red = RPART(c); + int c_green = GPART(c); + int c_blue = BPART(c); + c_red = (c_red * alpha_top + solid_top_r * inv_alpha_top) >> 8; + c_green = (c_green * alpha_top + solid_top_g * inv_alpha_top) >> 8; + c_blue = (c_blue * alpha_top + solid_top_b * inv_alpha_top) >> 8; + output[col] = RGB32k.RGB[(c_red >> 3)][(c_green >> 3)][(c_blue >> 3)]; + + frac[col] += fracstep[col]; + } + *((uint32_t*)dest) = *((uint32_t*)output); + dest += pitch; + index += num_cores; + } + + // Textured center: + while (index < start_fadebottom_y) + { + for (int col = 0; col < 4; col++) + { + uint32_t sample_index = (((((uint32_t)frac[col]) << 8) >> FRACBITS) * textureheight0) >> FRACBITS; + uint8_t fg = source0[col][sample_index]; + if (fg == 0) + { + uint32_t sample_index2 = MIN(sample_index, maxtextureheight1); + fg = source1[col][sample_index2]; + } + output[col] = fg; + + frac[col] += fracstep[col]; + } + + *((uint32_t*)dest) = *((uint32_t*)output); + dest += pitch; + index += num_cores; + } + + // Fade bottom: + while (index < end_fadebottom_y) + { + for (int col = 0; col < 4; col++) + { + uint32_t sample_index = (((((uint32_t)frac[col]) << 8) >> FRACBITS) * textureheight0) >> FRACBITS; + uint8_t fg = source0[col][sample_index]; + if (fg == 0) + { + uint32_t sample_index2 = MIN(sample_index, maxtextureheight1); + fg = source1[col][sample_index2]; + } + output[col] = fg; + + uint32_t c = palette[fg]; + int alpha_bottom = MAX(MIN(((2 << 24) - frac[col]) >> (16 - start_fade), 256), 0); + int inv_alpha_bottom = 256 - alpha_bottom; + int c_red = RPART(c); + int c_green = GPART(c); + int c_blue = BPART(c); + c_red = (c_red * alpha_bottom + solid_bottom_r * inv_alpha_bottom) >> 8; + c_green = (c_green * alpha_bottom + solid_bottom_g * inv_alpha_bottom) >> 8; + c_blue = (c_blue * alpha_bottom + solid_bottom_b * inv_alpha_bottom) >> 8; + output[col] = RGB32k.RGB[(c_red >> 3)][(c_green >> 3)][(c_blue >> 3)]; + + frac[col] += fracstep[col]; + } + *((uint32_t*)dest) = *((uint32_t*)output); + dest += pitch; + index += num_cores; + } + + // Bottom solid color: + while (index < count) + { + *((uint32_t*)dest) = solid_bottom_fill; + dest += pitch; + index += num_cores; + } + } + + ///////////////////////////////////////////////////////////////////////// + + PalColumnCommand::PalColumnCommand() + { + using namespace drawerargs; + + _count = dc_count; + _dest = dc_dest; + _pitch = dc_pitch; + _iscale = dc_iscale; + _texturefrac = dc_texturefrac; + _colormap = dc_colormap; + _source = dc_source; + _translation = dc_translation; + _color = dc_color; + _srcblend = dc_srcblend; + _destblend = dc_destblend; + _srccolor = dc_srccolor; + } + + void DrawColumnPalCommand::Execute(DrawerThread *thread) + { + int count; + uint8_t *dest; + fixed_t frac; + fixed_t fracstep; + + count = _count; + + // Framebuffer destination address. + dest = _dest; + + // Determine scaling, + // which is the only mapping to be done. + fracstep = _iscale; + frac = _texturefrac; + + count = thread->count_for_thread(_dest_y, count); + if (count <= 0) + return; + + int pitch = _pitch; + dest = thread->dest_for_thread(_dest_y, pitch, dest); + frac += fracstep * thread->skipped_by_thread(_dest_y); + fracstep *= thread->num_cores; + pitch *= thread->num_cores; + + // [RH] Get local copies of these variables so that the compiler + // has a better chance of optimizing this well. + const uint8_t *colormap = _colormap; + const uint8_t *source = _source; + + // Inner loop that does the actual texture mapping, + // e.g. a DDA-lile scaling. + // This is as fast as it gets. + do + { + // Re-map color indices from wall texture column + // using a lighting/special effects LUT. + *dest = colormap[source[frac >> FRACBITS]]; + + dest += pitch; + frac += fracstep; + + } while (--count); + } + + void FillColumnPalCommand::Execute(DrawerThread *thread) + { + int count; + uint8_t *dest; + + count = _count; + dest = _dest; + + count = thread->count_for_thread(_dest_y, count); + if (count <= 0) + return; + + int pitch = _pitch; + dest = thread->dest_for_thread(_dest_y, pitch, dest); + pitch *= thread->num_cores; + + uint8_t color = _color; + do + { + *dest = color; + dest += pitch; + } while (--count); + } + + void FillColumnAddPalCommand::Execute(DrawerThread *thread) + { + int count; + uint8_t *dest; + + count = _count; + dest = _dest; + uint32_t *bg2rgb; + uint32_t fg; + + bg2rgb = _destblend; + fg = _srccolor; + int pitch = _pitch; + + count = thread->count_for_thread(_dest_y, count); + if (count <= 0) + return; + + dest = thread->dest_for_thread(_dest_y, pitch, dest); + pitch *= thread->num_cores; + + do + { + uint32_t bg; + bg = (fg + bg2rgb[*dest]) | 0x1f07c1f; + *dest = RGB32k.All[bg & (bg >> 15)]; + dest += pitch; + } while (--count); + + } + + void FillColumnAddClampPalCommand::Execute(DrawerThread *thread) + { + int count; + uint8_t *dest; + + count = _count; + + dest = _dest; + uint32_t *bg2rgb; + uint32_t fg; + + bg2rgb = _destblend; + fg = _srccolor; + int pitch = _pitch; + + count = thread->count_for_thread(_dest_y, count); + if (count <= 0) + return; + + dest = thread->dest_for_thread(_dest_y, pitch, dest); + pitch *= thread->num_cores; + + do + { + uint32_t a = fg + bg2rgb[*dest]; + uint32_t b = a; + + a |= 0x01f07c1f; + b &= 0x40100400; + a &= 0x3fffffff; + b = b - (b >> 5); + a |= b; + *dest = RGB32k.All[a & (a >> 15)]; + dest += pitch; + } while (--count); + } + + void FillColumnSubClampPalCommand::Execute(DrawerThread *thread) + { + int count; + uint8_t *dest; + + count = _count; + + dest = _dest; + uint32_t *bg2rgb; + uint32_t fg; + + bg2rgb = _destblend; + fg = _srccolor | 0x40100400; + int pitch = _pitch; + + count = thread->count_for_thread(_dest_y, count); + if (count <= 0) + return; + + dest = thread->dest_for_thread(_dest_y, pitch, dest); + pitch *= thread->num_cores; + + do + { + uint32_t a = fg - bg2rgb[*dest]; + uint32_t b = a; + + b &= 0x40100400; + b = b - (b >> 5); + a &= b; + a |= 0x01f07c1f; + *dest = RGB32k.All[a & (a >> 15)]; + dest += pitch; + } while (--count); + } + + void FillColumnRevSubClampPalCommand::Execute(DrawerThread *thread) + { + int count; + uint8_t *dest; + + count = _count; + if (count <= 0) + return; + + dest = _dest; + uint32_t *bg2rgb; + uint32_t fg; + + bg2rgb = _destblend; + fg = _srccolor; + int pitch = _pitch; + + count = thread->count_for_thread(_dest_y, count); + if (count <= 0) + return; + + dest = thread->dest_for_thread(_dest_y, pitch, dest); + pitch *= thread->num_cores; + + do + { + uint32_t a = (bg2rgb[*dest] | 0x40100400) - fg; + uint32_t b = a; + + b &= 0x40100400; + b = b - (b >> 5); + a &= b; + a |= 0x01f07c1f; + *dest = RGB32k.All[a & (a >> 15)]; + dest += pitch; + } while (--count); + } + + void DrawColumnAddPalCommand::Execute(DrawerThread *thread) + { + int count; + uint8_t *dest; + fixed_t frac; + fixed_t fracstep; + + count = _count; + dest = _dest; + + fracstep = _iscale; + frac = _texturefrac; + + count = thread->count_for_thread(_dest_y, count); + if (count <= 0) + return; + + int pitch = _pitch; + dest = thread->dest_for_thread(_dest_y, pitch, dest); + frac += fracstep * thread->skipped_by_thread(_dest_y); + fracstep *= thread->num_cores; + pitch *= thread->num_cores; + + uint32_t *fg2rgb = _srcblend; + uint32_t *bg2rgb = _destblend; + const uint8_t *colormap = _colormap; + const uint8_t *source = _source; + + do + { + uint32_t fg = colormap[source[frac >> FRACBITS]]; + uint32_t bg = *dest; + + fg = fg2rgb[fg]; + bg = bg2rgb[bg]; + fg = (fg + bg) | 0x1f07c1f; + *dest = RGB32k.All[fg & (fg >> 15)]; + dest += pitch; + frac += fracstep; + } while (--count); + } + + void DrawColumnTranslatedPalCommand::Execute(DrawerThread *thread) + { + int count; + uint8_t* dest; + fixed_t frac; + fixed_t fracstep; + + count = _count; + + dest = _dest; + + fracstep = _iscale; + frac = _texturefrac; + + count = thread->count_for_thread(_dest_y, count); + if (count <= 0) + return; + + int pitch = _pitch; + dest = thread->dest_for_thread(_dest_y, pitch, dest); + frac += fracstep * thread->skipped_by_thread(_dest_y); + fracstep *= thread->num_cores; + pitch *= thread->num_cores; + + // [RH] Local copies of global vars to improve compiler optimizations + const uint8_t *colormap = _colormap; + const uint8_t *translation = _translation; + const uint8_t *source = _source; + + do + { + *dest = colormap[translation[source[frac >> FRACBITS]]]; + dest += pitch; + + frac += fracstep; + } while (--count); + } + + void DrawColumnTlatedAddPalCommand::Execute(DrawerThread *thread) + { + int count; + uint8_t *dest; + fixed_t frac; + fixed_t fracstep; + + count = _count; + dest = _dest; + + fracstep = _iscale; + frac = _texturefrac; + + count = thread->count_for_thread(_dest_y, count); + if (count <= 0) + return; + + int pitch = _pitch; + dest = thread->dest_for_thread(_dest_y, pitch, dest); + frac += fracstep * thread->skipped_by_thread(_dest_y); + fracstep *= thread->num_cores; + pitch *= thread->num_cores; + + uint32_t *fg2rgb = _srcblend; + uint32_t *bg2rgb = _destblend; + const uint8_t *translation = _translation; + const uint8_t *colormap = _colormap; + const uint8_t *source = _source; + + do + { + uint32_t fg = colormap[translation[source[frac >> FRACBITS]]]; + uint32_t bg = *dest; + + fg = fg2rgb[fg]; + bg = bg2rgb[bg]; + fg = (fg + bg) | 0x1f07c1f; + *dest = RGB32k.All[fg & (fg >> 15)]; + dest += pitch; + frac += fracstep; + } while (--count); + } + + void DrawColumnShadedPalCommand::Execute(DrawerThread *thread) + { + int count; + uint8_t *dest; + fixed_t frac, fracstep; + + count = _count; + dest = _dest; + + fracstep = _iscale; + frac = _texturefrac; + + count = thread->count_for_thread(_dest_y, count); + if (count <= 0) + return; + + int pitch = _pitch; + dest = thread->dest_for_thread(_dest_y, pitch, dest); + frac += fracstep * thread->skipped_by_thread(_dest_y); + fracstep *= thread->num_cores; + pitch *= thread->num_cores; + + const uint8_t *source = _source; + const uint8_t *colormap = _colormap; + uint32_t *fgstart = &Col2RGB8[0][_color]; + + do + { + uint32_t val = colormap[source[frac >> FRACBITS]]; + uint32_t fg = fgstart[val << 8]; + val = (Col2RGB8[64 - val][*dest] + fg) | 0x1f07c1f; + *dest = RGB32k.All[val & (val >> 15)]; + + dest += pitch; + frac += fracstep; + } while (--count); + } + + void DrawColumnAddClampPalCommand::Execute(DrawerThread *thread) + { + int count; + uint8_t *dest; + fixed_t frac; + fixed_t fracstep; + + count = _count; + dest = _dest; + + fracstep = _iscale; + frac = _texturefrac; + + count = thread->count_for_thread(_dest_y, count); + if (count <= 0) + return; + + int pitch = _pitch; + dest = thread->dest_for_thread(_dest_y, pitch, dest); + frac += fracstep * thread->skipped_by_thread(_dest_y); + fracstep *= thread->num_cores; + pitch *= thread->num_cores; + + const uint8_t *colormap = _colormap; + const uint8_t *source = _source; + uint32_t *fg2rgb = _srcblend; + uint32_t *bg2rgb = _destblend; + + do + { + uint32_t a = fg2rgb[colormap[source[frac >> FRACBITS]]] + bg2rgb[*dest]; + uint32_t b = a; + + a |= 0x01f07c1f; + b &= 0x40100400; + a &= 0x3fffffff; + b = b - (b >> 5); + a |= b; + *dest = RGB32k.All[a & (a >> 15)]; + dest += pitch; + frac += fracstep; + } while (--count); + } + + void DrawColumnAddClampTranslatedPalCommand::Execute(DrawerThread *thread) + { + int count; + uint8_t *dest; + fixed_t frac; + fixed_t fracstep; + + count = _count; + dest = _dest; + + fracstep = _iscale; + frac = _texturefrac; + + count = thread->count_for_thread(_dest_y, count); + if (count <= 0) + return; + + int pitch = _pitch; + dest = thread->dest_for_thread(_dest_y, pitch, dest); + frac += fracstep * thread->skipped_by_thread(_dest_y); + fracstep *= thread->num_cores; + pitch *= thread->num_cores; + + const uint8_t *translation = _translation; + const uint8_t *colormap = _colormap; + const uint8_t *source = _source; + uint32_t *fg2rgb = _srcblend; + uint32_t *bg2rgb = _destblend; + + do + { + uint32_t a = fg2rgb[colormap[translation[source[frac >> FRACBITS]]]] + bg2rgb[*dest]; + uint32_t b = a; + + a |= 0x01f07c1f; + b &= 0x40100400; + a &= 0x3fffffff; + b = b - (b >> 5); + a |= b; + *dest = RGB32k.All[(a >> 15) & a]; + dest += pitch; + frac += fracstep; + } while (--count); + } + + void DrawColumnSubClampPalCommand::Execute(DrawerThread *thread) + { + int count; + uint8_t *dest; + fixed_t frac; + fixed_t fracstep; + + count = _count; + dest = _dest; + + fracstep = _iscale; + frac = _texturefrac; + + count = thread->count_for_thread(_dest_y, count); + if (count <= 0) + return; + + int pitch = _pitch; + dest = thread->dest_for_thread(_dest_y, pitch, dest); + frac += fracstep * thread->skipped_by_thread(_dest_y); + fracstep *= thread->num_cores; + pitch *= thread->num_cores; + + const uint8_t *colormap = _colormap; + const uint8_t *source = _source; + uint32_t *fg2rgb = _srcblend; + uint32_t *bg2rgb = _destblend; + + do + { + uint32_t a = (fg2rgb[colormap[source[frac >> FRACBITS]]] | 0x40100400) - bg2rgb[*dest]; + uint32_t b = a; + + b &= 0x40100400; + b = b - (b >> 5); + a &= b; + a |= 0x01f07c1f; + *dest = RGB32k.All[a & (a >> 15)]; + dest += pitch; + frac += fracstep; + } while (--count); + } + + void DrawColumnSubClampTranslatedPalCommand::Execute(DrawerThread *thread) + { + int count; + uint8_t *dest; + fixed_t frac; + fixed_t fracstep; + + count = _count; + dest = _dest; + + fracstep = _iscale; + frac = _texturefrac; + + count = thread->count_for_thread(_dest_y, count); + if (count <= 0) + return; + + int pitch = _pitch; + dest = thread->dest_for_thread(_dest_y, pitch, dest); + frac += fracstep * thread->skipped_by_thread(_dest_y); + fracstep *= thread->num_cores; + pitch *= thread->num_cores; + + const uint8_t *translation = _translation; + const uint8_t *colormap = _colormap; + const uint8_t *source = _source; + uint32_t *fg2rgb = _srcblend; + uint32_t *bg2rgb = _destblend; + + do + { + uint32_t a = (fg2rgb[colormap[translation[source[frac >> FRACBITS]]]] | 0x40100400) - bg2rgb[*dest]; + uint32_t b = a; + + b &= 0x40100400; + b = b - (b >> 5); + a &= b; + a |= 0x01f07c1f; + *dest = RGB32k.All[(a >> 15) & a]; + dest += pitch; + frac += fracstep; + } while (--count); + } + + void DrawColumnRevSubClampPalCommand::Execute(DrawerThread *thread) + { + int count; + uint8_t *dest; + fixed_t frac; + fixed_t fracstep; + + count = _count; + dest = _dest; + + fracstep = _iscale; + frac = _texturefrac; + + count = thread->count_for_thread(_dest_y, count); + if (count <= 0) + return; + + int pitch = _pitch; + dest = thread->dest_for_thread(_dest_y, pitch, dest); + frac += fracstep * thread->skipped_by_thread(_dest_y); + fracstep *= thread->num_cores; + pitch *= thread->num_cores; + + const uint8_t *colormap = _colormap; + const uint8_t *source = _source; + uint32_t *fg2rgb = _srcblend; + uint32_t *bg2rgb = _destblend; + + do + { + uint32_t a = (bg2rgb[*dest] | 0x40100400) - fg2rgb[colormap[source[frac >> FRACBITS]]]; + uint32_t b = a; + + b &= 0x40100400; + b = b - (b >> 5); + a &= b; + a |= 0x01f07c1f; + *dest = RGB32k.All[a & (a >> 15)]; + dest += pitch; + frac += fracstep; + } while (--count); + } + + void DrawColumnRevSubClampTranslatedPalCommand::Execute(DrawerThread *thread) + { + int count; + uint8_t *dest; + fixed_t frac; + fixed_t fracstep; + + count = _count; + dest = _dest; + + fracstep = _iscale; + frac = _texturefrac; + + count = thread->count_for_thread(_dest_y, count); + if (count <= 0) + return; + + int pitch = _pitch; + dest = thread->dest_for_thread(_dest_y, pitch, dest); + frac += fracstep * thread->skipped_by_thread(_dest_y); + fracstep *= thread->num_cores; + pitch *= thread->num_cores; + + const uint8_t *translation = _translation; + const uint8_t *colormap = _colormap; + const uint8_t *source = _source; + uint32_t *fg2rgb = _srcblend; + uint32_t *bg2rgb = _destblend; + + do + { + uint32_t a = (bg2rgb[*dest] | 0x40100400) - fg2rgb[colormap[translation[source[frac >> FRACBITS]]]]; + uint32_t b = a; + + b &= 0x40100400; + b = b - (b >> 5); + a &= b; + a |= 0x01f07c1f; + *dest = RGB32k.All[(a >> 15) & a]; + dest += pitch; + frac += fracstep; + } while (--count); + } + + ///////////////////////////////////////////////////////////////////////// + + DrawFuzzColumnPalCommand::DrawFuzzColumnPalCommand() + { + using namespace drawerargs; + + _yl = dc_yl; + _yh = dc_yh; + _x = dc_x; + _destorg = dc_destorg; + _pitch = dc_pitch; + _fuzzpos = fuzzpos; + _fuzzviewheight = fuzzviewheight; + } + + void DrawFuzzColumnPalCommand::Execute(DrawerThread *thread) + { + int yl = MAX(_yl, 1); + int yh = MIN(_yh, _fuzzviewheight); + + int count = thread->count_for_thread(yl, yh - yl + 1); + + // Zero length. + if (count <= 0) + return; + + uint8_t *map = &NormalLight.Maps[6 * 256]; + + uint8_t *dest = thread->dest_for_thread(yl, _pitch, ylookup[yl] + _x + _destorg); + + int pitch = _pitch * thread->num_cores; + int fuzzstep = thread->num_cores; + int fuzz = (_fuzzpos + thread->skipped_by_thread(yl)) % FUZZTABLE; + + yl += thread->skipped_by_thread(yl); + + // Handle the case where we would go out of bounds at the top: + if (yl < fuzzstep) + { + uint8_t *srcdest = dest + fuzzoffset[fuzz] * fuzzstep + pitch; + //assert(static_cast((srcdest - (uint8_t*)dc_destorg) / (_pitch)) < viewheight); + + *dest = map[*srcdest]; + dest += pitch; + fuzz += fuzzstep; + fuzz %= FUZZTABLE; + + count--; + if (count == 0) + return; + } + + bool lowerbounds = (yl + (count + fuzzstep - 1) * fuzzstep > _fuzzviewheight); + if (lowerbounds) + count--; + + // Fuzz where fuzzoffset stays within bounds + while (count > 0) + { + int available = (FUZZTABLE - fuzz); + int next_wrap = available / fuzzstep; + if (available % fuzzstep != 0) + next_wrap++; + + int cnt = MIN(count, next_wrap); + count -= cnt; + do + { + uint8_t *srcdest = dest + fuzzoffset[fuzz] * fuzzstep; + //assert(static_cast((srcdest - (uint8_t*)dc_destorg) / (_pitch)) < viewheight); + + *dest = map[*srcdest]; + dest += pitch; + fuzz += fuzzstep; + } while (--cnt); + + fuzz %= FUZZTABLE; + } + + // Handle the case where we would go out of bounds at the bottom + if (lowerbounds) + { + uint8_t *srcdest = dest + fuzzoffset[fuzz] * fuzzstep - pitch; + //assert(static_cast((srcdest - (uint8_t*)dc_destorg) / (_pitch)) < viewheight); + + *dest = map[*srcdest]; + } + } + + ///////////////////////////////////////////////////////////////////////// + + PalSpanCommand::PalSpanCommand() + { + using namespace drawerargs; + + _source = ds_source; + _colormap = ds_colormap; + _xfrac = ds_xfrac; + _yfrac = ds_yfrac; + _y = ds_y; + _x1 = ds_x1; + _x2 = ds_x2; + _destorg = dc_destorg; + _xstep = ds_xstep; + _ystep = ds_ystep; + _xbits = ds_xbits; + _ybits = ds_ybits; + _srcblend = dc_srcblend; + _destblend = dc_destblend; + _color = ds_color; + } + + void DrawSpanPalCommand::Execute(DrawerThread *thread) + { + if (thread->line_skipped_by_thread(_y)) + return; + + dsfixed_t xfrac; + dsfixed_t yfrac; + dsfixed_t xstep; + dsfixed_t ystep; + uint8_t *dest; + const uint8_t *source = _source; + const uint8_t *colormap = _colormap; + int count; + int spot; + + xfrac = _xfrac; + yfrac = _yfrac; + + dest = ylookup[_y] + _x1 + _destorg; + + count = _x2 - _x1 + 1; + + xstep = _xstep; + ystep = _ystep; + + if (_xbits == 6 && _ybits == 6) + { + // 64x64 is the most common case by far, so special case it. + do + { + // Current texture index in u,v. + spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); + + // Lookup pixel from flat texture tile, + // re-index using light/colormap. + *dest++ = colormap[source[spot]]; + + // Next step in u,v. + xfrac += xstep; + yfrac += ystep; + } while (--count); + } + else + { + uint8_t yshift = 32 - _ybits; + uint8_t xshift = yshift - _xbits; + int xmask = ((1 << _xbits) - 1) << _ybits; + + do + { + // Current texture index in u,v. + spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); + + // Lookup pixel from flat texture tile, + // re-index using light/colormap. + *dest++ = colormap[source[spot]]; + + // Next step in u,v. + xfrac += xstep; + yfrac += ystep; + } while (--count); + } + } + + void DrawSpanMaskedPalCommand::Execute(DrawerThread *thread) + { + if (thread->line_skipped_by_thread(_y)) + return; + + dsfixed_t xfrac; + dsfixed_t yfrac; + dsfixed_t xstep; + dsfixed_t ystep; + uint8_t *dest; + const uint8_t *source = _source; + const uint8_t *colormap = _colormap; + int count; + int spot; + + xfrac = _xfrac; + yfrac = _yfrac; + + dest = ylookup[_y] + _x1 + _destorg; + + count = _x2 - _x1 + 1; + + xstep = _xstep; + ystep = _ystep; + + if (_xbits == 6 && _ybits == 6) + { + // 64x64 is the most common case by far, so special case it. + do + { + int texdata; + + spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); + texdata = source[spot]; + if (texdata != 0) + { + *dest = colormap[texdata]; + } + dest++; + xfrac += xstep; + yfrac += ystep; + } while (--count); + } + else + { + uint8_t yshift = 32 - _ybits; + uint8_t xshift = yshift - _xbits; + int xmask = ((1 << _xbits) - 1) << _ybits; + do + { + int texdata; + + spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); + texdata = source[spot]; + if (texdata != 0) + { + *dest = colormap[texdata]; + } + dest++; + xfrac += xstep; + yfrac += ystep; + } while (--count); + } + } + + void DrawSpanTranslucentPalCommand::Execute(DrawerThread *thread) + { + if (thread->line_skipped_by_thread(_y)) + return; + + dsfixed_t xfrac; + dsfixed_t yfrac; + dsfixed_t xstep; + dsfixed_t ystep; + uint8_t *dest; + const uint8_t *source = _source; + const uint8_t *colormap = _colormap; + int count; + int spot; + uint32_t *fg2rgb = _srcblend; + uint32_t *bg2rgb = _destblend; + + xfrac = _xfrac; + yfrac = _yfrac; + + dest = ylookup[_y] + _x1 + _destorg; + + count = _x2 - _x1 + 1; + + xstep = _xstep; + ystep = _ystep; + + if (_xbits == 6 && _ybits == 6) + { + // 64x64 is the most common case by far, so special case it. + do + { + spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); + uint32_t fg = colormap[source[spot]]; + uint32_t bg = *dest; + fg = fg2rgb[fg]; + bg = bg2rgb[bg]; + fg = (fg + bg) | 0x1f07c1f; + *dest++ = RGB32k.All[fg & (fg >> 15)]; + xfrac += xstep; + yfrac += ystep; + } while (--count); + } + else + { + uint8_t yshift = 32 - _ybits; + uint8_t xshift = yshift - _xbits; + int xmask = ((1 << _xbits) - 1) << _ybits; + do + { + spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); + uint32_t fg = colormap[source[spot]]; + uint32_t bg = *dest; + fg = fg2rgb[fg]; + bg = bg2rgb[bg]; + fg = (fg + bg) | 0x1f07c1f; + *dest++ = RGB32k.All[fg & (fg >> 15)]; + xfrac += xstep; + yfrac += ystep; + } while (--count); + } + } + + void DrawSpanMaskedTranslucentPalCommand::Execute(DrawerThread *thread) + { + if (thread->line_skipped_by_thread(_y)) + return; + + dsfixed_t xfrac; + dsfixed_t yfrac; + dsfixed_t xstep; + dsfixed_t ystep; + uint8_t *dest; + const uint8_t *source = _source; + const uint8_t *colormap = _colormap; + int count; + int spot; + uint32_t *fg2rgb = _srcblend; + uint32_t *bg2rgb = _destblend; + + xfrac = _xfrac; + yfrac = _yfrac; + + dest = ylookup[_y] + _x1 + _destorg; + + count = _x2 - _x1 + 1; + + xstep = _xstep; + ystep = _ystep; + + if (_xbits == 6 && _ybits == 6) + { + // 64x64 is the most common case by far, so special case it. + do + { + uint8_t texdata; + + spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); + texdata = source[spot]; + if (texdata != 0) + { + uint32_t fg = colormap[texdata]; + uint32_t bg = *dest; + fg = fg2rgb[fg]; + bg = bg2rgb[bg]; + fg = (fg + bg) | 0x1f07c1f; + *dest = RGB32k.All[fg & (fg >> 15)]; + } + dest++; + xfrac += xstep; + yfrac += ystep; + } while (--count); + } + else + { + uint8_t yshift = 32 - _ybits; + uint8_t xshift = yshift - _xbits; + int xmask = ((1 << _xbits) - 1) << _ybits; + do + { + uint8_t texdata; + + spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); + texdata = source[spot]; + if (texdata != 0) + { + uint32_t fg = colormap[texdata]; + uint32_t bg = *dest; + fg = fg2rgb[fg]; + bg = bg2rgb[bg]; + fg = (fg + bg) | 0x1f07c1f; + *dest = RGB32k.All[fg & (fg >> 15)]; + } + dest++; + xfrac += xstep; + yfrac += ystep; + } while (--count); + } + } + + void DrawSpanAddClampPalCommand::Execute(DrawerThread *thread) + { + if (thread->line_skipped_by_thread(_y)) + return; + + dsfixed_t xfrac; + dsfixed_t yfrac; + dsfixed_t xstep; + dsfixed_t ystep; + uint8_t *dest; + const uint8_t *source = _source; + const uint8_t *colormap = _colormap; + int count; + int spot; + uint32_t *fg2rgb = _srcblend; + uint32_t *bg2rgb = _destblend; + + xfrac = _xfrac; + yfrac = _yfrac; + + dest = ylookup[_y] + _x1 + _destorg; + + count = _x2 - _x1 + 1; + + xstep = _xstep; + ystep = _ystep; + + if (_xbits == 6 && _ybits == 6) + { + // 64x64 is the most common case by far, so special case it. + do + { + spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); + uint32_t a = fg2rgb[colormap[source[spot]]] + bg2rgb[*dest]; + uint32_t b = a; + + a |= 0x01f07c1f; + b &= 0x40100400; + a &= 0x3fffffff; + b = b - (b >> 5); + a |= b; + *dest++ = RGB32k.All[a & (a >> 15)]; + xfrac += xstep; + yfrac += ystep; + } while (--count); + } + else + { + uint8_t yshift = 32 - _ybits; + uint8_t xshift = yshift - _xbits; + int xmask = ((1 << _xbits) - 1) << _ybits; + do + { + spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); + uint32_t a = fg2rgb[colormap[source[spot]]] + bg2rgb[*dest]; + uint32_t b = a; + + a |= 0x01f07c1f; + b &= 0x40100400; + a &= 0x3fffffff; + b = b - (b >> 5); + a |= b; + *dest++ = RGB32k.All[a & (a >> 15)]; + xfrac += xstep; + yfrac += ystep; + } while (--count); + } + } + + void DrawSpanMaskedAddClampPalCommand::Execute(DrawerThread *thread) + { + if (thread->line_skipped_by_thread(_y)) + return; + + dsfixed_t xfrac; + dsfixed_t yfrac; + dsfixed_t xstep; + dsfixed_t ystep; + uint8_t *dest; + const uint8_t *source = _source; + const uint8_t *colormap = _colormap; + int count; + int spot; + uint32_t *fg2rgb = _srcblend; + uint32_t *bg2rgb = _destblend; + + xfrac = _xfrac; + yfrac = _yfrac; + + dest = ylookup[_y] + _x1 + _destorg; + + count = _x2 - _x1 + 1; + + xstep = _xstep; + ystep = _ystep; + + if (_xbits == 6 && _ybits == 6) + { + // 64x64 is the most common case by far, so special case it. + do + { + uint8_t texdata; + + spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); + texdata = source[spot]; + if (texdata != 0) + { + uint32_t a = fg2rgb[colormap[texdata]] + bg2rgb[*dest]; + uint32_t b = a; + + a |= 0x01f07c1f; + b &= 0x40100400; + a &= 0x3fffffff; + b = b - (b >> 5); + a |= b; + *dest = RGB32k.All[a & (a >> 15)]; + } + dest++; + xfrac += xstep; + yfrac += ystep; + } while (--count); + } + else + { + uint8_t yshift = 32 - _ybits; + uint8_t xshift = yshift - _xbits; + int xmask = ((1 << _xbits) - 1) << _ybits; + do + { + uint8_t texdata; + + spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); + texdata = source[spot]; + if (texdata != 0) + { + uint32_t a = fg2rgb[colormap[texdata]] + bg2rgb[*dest]; + uint32_t b = a; + + a |= 0x01f07c1f; + b &= 0x40100400; + a &= 0x3fffffff; + b = b - (b >> 5); + a |= b; + *dest = RGB32k.All[a & (a >> 15)]; + } + dest++; + xfrac += xstep; + yfrac += ystep; + } while (--count); + } + } + + void FillSpanPalCommand::Execute(DrawerThread *thread) + { + if (thread->line_skipped_by_thread(_y)) + return; + + memset(ylookup[_y] + _x1 + _destorg, _color, _x2 - _x1 + 1); + } + + ///////////////////////////////////////////////////////////////////////// + + DrawTiltedSpanPalCommand::DrawTiltedSpanPalCommand(int y, int x1, int x2, const FVector3 &plane_sz, const FVector3 &plane_su, const FVector3 &plane_sv, bool plane_shade, int planeshade, float planelightfloat, fixed_t pviewx, fixed_t pviewy) + : y(y), x1(x1), x2(x2), plane_sz(plane_sz), plane_su(plane_su), plane_sv(plane_sv), plane_shade(plane_shade), planeshade(planeshade), planelightfloat(planelightfloat), pviewx(pviewx), pviewy(pviewy) + { + using namespace drawerargs; + + _colormap = ds_colormap; + _destorg = dc_destorg; + _ybits = ds_ybits; + _xbits = ds_xbits; + _source = ds_source; + basecolormapdata = basecolormap->Maps; + } + + void DrawTiltedSpanPalCommand::Execute(DrawerThread *thread) + { + if (thread->line_skipped_by_thread(y)) + return; + + const uint8_t **tiltlighting = thread->tiltlighting; + + int width = x2 - x1; + double iz, uz, vz; + uint8_t *fb; + uint32_t u, v; + int i; + + iz = plane_sz[2] + plane_sz[1] * (centery - y) + plane_sz[0] * (x1 - centerx); + + // Lighting is simple. It's just linear interpolation from start to end + if (plane_shade) + { + uz = (iz + plane_sz[0] * width) * planelightfloat; + vz = iz * planelightfloat; + CalcTiltedLighting(vz, uz, width, thread); + } + else + { + for (int i = 0; i < width; ++i) + { + tiltlighting[i] = _colormap; + } + } + + uz = plane_su[2] + plane_su[1] * (centery - y) + plane_su[0] * (x1 - centerx); + vz = plane_sv[2] + plane_sv[1] * (centery - y) + plane_sv[0] * (x1 - centerx); + + fb = ylookup[y] + x1 + _destorg; + + uint8_t vshift = 32 - _ybits; + uint8_t ushift = vshift - _xbits; + int umask = ((1 << _xbits) - 1) << _ybits; + + #if 0 + // The "perfect" reference version of this routine. Pretty slow. + // Use it only to see how things are supposed to look. + i = 0; + do + { + double z = 1.f / iz; + + u = int64_t(uz*z) + pviewx; + v = int64_t(vz*z) + pviewy; + R_SetDSColorMapLight(tiltlighting[i], 0, 0); + fb[i++] = ds_colormap[ds_source[(v >> vshift) | ((u >> ushift) & umask)]]; + iz += plane_sz[0]; + uz += plane_su[0]; + vz += plane_sv[0]; + } while (--width >= 0); + #else + //#define SPANSIZE 32 + //#define INVSPAN 0.03125f + //#define SPANSIZE 8 + //#define INVSPAN 0.125f + #define SPANSIZE 16 + #define INVSPAN 0.0625f + + double startz = 1.f / iz; + double startu = uz*startz; + double startv = vz*startz; + double izstep, uzstep, vzstep; + + izstep = plane_sz[0] * SPANSIZE; + uzstep = plane_su[0] * SPANSIZE; + vzstep = plane_sv[0] * SPANSIZE; + x1 = 0; + width++; + + while (width >= SPANSIZE) + { + iz += izstep; + uz += uzstep; + vz += vzstep; + + double endz = 1.f / iz; + double endu = uz*endz; + double endv = vz*endz; + uint32_t stepu = (uint32_t)int64_t((endu - startu) * INVSPAN); + uint32_t stepv = (uint32_t)int64_t((endv - startv) * INVSPAN); + u = (uint32_t)(int64_t(startu) + pviewx); + v = (uint32_t)(int64_t(startv) + pviewy); + + for (i = SPANSIZE - 1; i >= 0; i--) + { + fb[x1] = *(tiltlighting[x1] + _source[(v >> vshift) | ((u >> ushift) & umask)]); + x1++; + u += stepu; + v += stepv; + } + startu = endu; + startv = endv; + width -= SPANSIZE; + } + if (width > 0) + { + if (width == 1) + { + u = (uint32_t)int64_t(startu); + v = (uint32_t)int64_t(startv); + fb[x1] = *(tiltlighting[x1] + _source[(v >> vshift) | ((u >> ushift) & umask)]); + } + else + { + double left = width; + iz += plane_sz[0] * left; + uz += plane_su[0] * left; + vz += plane_sv[0] * left; + + double endz = 1.f / iz; + double endu = uz*endz; + double endv = vz*endz; + left = 1.f / left; + uint32_t stepu = (uint32_t)int64_t((endu - startu) * left); + uint32_t stepv = (uint32_t)int64_t((endv - startv) * left); + u = (uint32_t)(int64_t(startu) + pviewx); + v = (uint32_t)(int64_t(startv) + pviewy); + + for (; width != 0; width--) + { + fb[x1] = *(tiltlighting[x1] + _source[(v >> vshift) | ((u >> ushift) & umask)]); + x1++; + u += stepu; + v += stepv; + } + } + } + #endif + } + + // Calculates the lighting for one row of a tilted plane. If the definition + // of GETPALOOKUP changes, this needs to change, too. + void DrawTiltedSpanPalCommand::CalcTiltedLighting(double lval, double lend, int width, DrawerThread *thread) + { + const uint8_t **tiltlighting = thread->tiltlighting; + + double lstep; + uint8_t *lightfiller; + int i = 0; + + if (width == 0 || lval == lend) + { // Constant lighting + lightfiller = basecolormapdata + (GETPALOOKUP(lval, planeshade) << COLORMAPSHIFT); + } + else + { + lstep = (lend - lval) / width; + if (lval >= MAXLIGHTVIS) + { // lval starts "too bright". + lightfiller = basecolormapdata + (GETPALOOKUP(lval, planeshade) << COLORMAPSHIFT); + for (; i <= width && lval >= MAXLIGHTVIS; ++i) + { + tiltlighting[i] = lightfiller; + lval += lstep; + } + } + if (lend >= MAXLIGHTVIS) + { // lend ends "too bright". + lightfiller = basecolormapdata + (GETPALOOKUP(lend, planeshade) << COLORMAPSHIFT); + for (; width > i && lend >= MAXLIGHTVIS; --width) + { + tiltlighting[width] = lightfiller; + lend -= lstep; + } + } + if (width > 0) + { + lval = FIXED2DBL(planeshade) - lval; + lend = FIXED2DBL(planeshade) - lend; + lstep = (lend - lval) / width; + if (lstep < 0) + { // Going from dark to light + if (lval < 1.) + { // All bright + lightfiller = basecolormapdata; + } + else + { + if (lval >= NUMCOLORMAPS) + { // Starts beyond the dark end + uint8_t *clight = basecolormapdata + ((NUMCOLORMAPS - 1) << COLORMAPSHIFT); + while (lval >= NUMCOLORMAPS && i <= width) + { + tiltlighting[i++] = clight; + lval += lstep; + } + if (i > width) + return; + } + while (i <= width && lval >= 0) + { + tiltlighting[i++] = basecolormapdata + (xs_ToInt(lval) << COLORMAPSHIFT); + lval += lstep; + } + lightfiller = basecolormapdata; + } + } + else + { // Going from light to dark + if (lval >= (NUMCOLORMAPS - 1)) + { // All dark + lightfiller = basecolormapdata + ((NUMCOLORMAPS - 1) << COLORMAPSHIFT); + } + else + { + while (lval < 0 && i <= width) + { + tiltlighting[i++] = basecolormapdata; + lval += lstep; + } + if (i > width) + return; + while (i <= width && lval < (NUMCOLORMAPS - 1)) + { + tiltlighting[i++] = basecolormapdata + (xs_ToInt(lval) << COLORMAPSHIFT); + lval += lstep; + } + lightfiller = basecolormapdata + ((NUMCOLORMAPS - 1) << COLORMAPSHIFT); + } + } + } + } + for (; i <= width; i++) + { + tiltlighting[i] = lightfiller; + } + } + + ///////////////////////////////////////////////////////////////////////// + + DrawColoredSpanPalCommand::DrawColoredSpanPalCommand(int y, int x1, int x2) : y(y), x1(x1), x2(x2) + { + using namespace drawerargs; + color = ds_color; + destorg = dc_destorg; + } + + void DrawColoredSpanPalCommand::Execute(DrawerThread *thread) + { + if (thread->line_skipped_by_thread(y)) + return; + + memset(ylookup[y] + x1 + destorg, color, x2 - x1 + 1); + } + + ///////////////////////////////////////////////////////////////////////// + + DrawSlabPalCommand::DrawSlabPalCommand(int dx, fixed_t v, int dy, fixed_t vi, const uint8_t *vptr, uint8_t *p, const uint8_t *colormap) + : _dx(dx), _v(v), _dy(dy), _vi(vi), _vptr(vptr), _p(p), _colormap(colormap) + { + using namespace drawerargs; + _pitch = dc_pitch; + _start_y = static_cast((p - dc_destorg) / dc_pitch); + } + + void DrawSlabPalCommand::Execute(DrawerThread *thread) + { + int count = _dy; + uint8_t *dest = _p; + int pitch = _pitch; + int width = _dx; + const uint8_t *colormap = _colormap; + const uint8_t *source = _vptr; + fixed_t fracpos = _v; + fixed_t iscale = _vi; + + count = thread->count_for_thread(_start_y, count); + dest = thread->dest_for_thread(_start_y, pitch, dest); + fracpos += iscale * thread->skipped_by_thread(_start_y); + iscale *= thread->num_cores; + pitch *= thread->num_cores; + + while (count > 0) + { + uint8_t color = colormap[source[fracpos >> FRACBITS]]; + + for (int x = 0; x < width; x++) + dest[x] = color; + + dest += pitch; + fracpos += iscale; + count--; + } + } + + ///////////////////////////////////////////////////////////////////////// + + DrawFogBoundaryLinePalCommand::DrawFogBoundaryLinePalCommand(int y, int x1, int x2) : y(y), x1(x1), x2(x2) + { + using namespace drawerargs; + _colormap = dc_colormap; + _destorg = dc_destorg; + } + + void DrawFogBoundaryLinePalCommand::Execute(DrawerThread *thread) + { + if (thread->line_skipped_by_thread(y)) + return; + + const uint8_t *colormap = _colormap; + uint8_t *dest = ylookup[y] + _destorg; + int x = x1; + do + { + dest[x] = colormap[dest[x]]; + } while (++x <= x2); + } +} diff --git a/src/r_draw_pal.h b/src/r_draw_pal.h new file mode 100644 index 0000000000..a2400d2759 --- /dev/null +++ b/src/r_draw_pal.h @@ -0,0 +1,333 @@ + +#pragma once + +#include "r_draw.h" +#include "v_palette.h" +#include "r_thread.h" + +namespace swrenderer +{ + class PalWall1Command : public DrawerCommand + { + public: + PalWall1Command(); + FString DebugInfo() override { return "PalWallCommand"; } + + protected: + uint32_t _iscale; + uint32_t _texturefrac; + uint8_t *_colormap; + int _count; + const uint8_t *_source; + uint8_t *_dest; + int _vlinebits; + int _mvlinebits; + int _tmvlinebits; + int _pitch; + uint32_t *_srcblend; + uint32_t *_destblend; + }; + + class PalWall4Command : public DrawerCommand + { + public: + PalWall4Command(); + FString DebugInfo() override { return "PalWallCommand"; } + + protected: + uint8_t *_dest; + int _count; + int _pitch; + int _vlinebits; + int _mvlinebits; + int _tmvlinebits; + uint8_t *_palookupoffse[4]; + const uint8_t *_bufplce[4]; + uint32_t _vince[4]; + uint32_t _vplce[4]; + uint32_t *_srcblend; + uint32_t *_destblend; + }; + + class DrawWall1PalCommand : public PalWall1Command { public: void Execute(DrawerThread *thread) override; }; + class DrawWall4PalCommand : public PalWall4Command { public: void Execute(DrawerThread *thread) override; }; + class DrawWallMasked1PalCommand : public PalWall1Command { public: void Execute(DrawerThread *thread) override; }; + class DrawWallMasked4PalCommand : public PalWall4Command { public: void Execute(DrawerThread *thread) override; }; + class DrawWallAdd1PalCommand : public PalWall1Command { public: void Execute(DrawerThread *thread) override; }; + class DrawWallAdd4PalCommand : public PalWall4Command { public: void Execute(DrawerThread *thread) override; }; + class DrawWallAddClamp1PalCommand : public PalWall1Command { public: void Execute(DrawerThread *thread) override; }; + class DrawWallAddClamp4PalCommand : public PalWall4Command { public: void Execute(DrawerThread *thread) override; }; + class DrawWallSubClamp1PalCommand : public PalWall1Command { public: void Execute(DrawerThread *thread) override; }; + class DrawWallSubClamp4PalCommand : public PalWall4Command { public: void Execute(DrawerThread *thread) override; }; + class DrawWallRevSubClamp1PalCommand : public PalWall1Command { public: void Execute(DrawerThread *thread) override; }; + class DrawWallRevSubClamp4PalCommand : public PalWall4Command { public: void Execute(DrawerThread *thread) override; }; + + class PalSkyCommand : public DrawerCommand + { + public: + PalSkyCommand(uint32_t solid_top, uint32_t solid_bottom); + FString DebugInfo() override { return "PalSkyCommand"; } + + protected: + uint32_t solid_top; + uint32_t solid_bottom; + + uint8_t *_dest; + int _count; + int _pitch; + const uint8_t *_bufplce[4]; + const uint8_t *_bufplce2[4]; + int _bufheight[4]; + uint32_t _vince[4]; + uint32_t _vplce[4]; + }; + + class DrawSingleSky1PalCommand : public PalSkyCommand { public: using PalSkyCommand::PalSkyCommand; void Execute(DrawerThread *thread) override; }; + class DrawSingleSky4PalCommand : public PalSkyCommand { public: using PalSkyCommand::PalSkyCommand; void Execute(DrawerThread *thread) override; }; + class DrawDoubleSky1PalCommand : public PalSkyCommand { public: using PalSkyCommand::PalSkyCommand; void Execute(DrawerThread *thread) override; }; + class DrawDoubleSky4PalCommand : public PalSkyCommand { public: using PalSkyCommand::PalSkyCommand; void Execute(DrawerThread *thread) override; }; + + class PalColumnCommand : public DrawerCommand + { + public: + PalColumnCommand(); + FString DebugInfo() override { return "PalColumnCommand"; } + + protected: + int _count; + uint8_t *_dest; + int _pitch; + fixed_t _iscale; + fixed_t _texturefrac; + const uint8_t *_colormap; + const uint8_t *_source; + const uint8_t *_translation; + int _color; + uint32_t *_srcblend; + uint32_t *_destblend; + uint32_t _srccolor; + }; + + class DrawColumnPalCommand : public PalColumnCommand { public: void Execute(DrawerThread *thread) override; }; + class FillColumnPalCommand : public PalColumnCommand { public: void Execute(DrawerThread *thread) override; }; + class FillColumnAddPalCommand : public PalColumnCommand { public: void Execute(DrawerThread *thread) override; }; + class FillColumnAddClampPalCommand : public PalColumnCommand { public: void Execute(DrawerThread *thread) override; }; + class FillColumnSubClampPalCommand : public PalColumnCommand { public: void Execute(DrawerThread *thread) override; }; + class FillColumnRevSubClampPalCommand : public PalColumnCommand { public: void Execute(DrawerThread *thread) override; }; + class DrawColumnAddPalCommand : public PalColumnCommand { public: void Execute(DrawerThread *thread) override; }; + class DrawColumnTranslatedPalCommand : public PalColumnCommand { public: void Execute(DrawerThread *thread) override; }; + class DrawColumnTlatedAddPalCommand : public PalColumnCommand { public: void Execute(DrawerThread *thread) override; }; + class DrawColumnShadedPalCommand : public PalColumnCommand { public: void Execute(DrawerThread *thread) override; }; + class DrawColumnAddClampPalCommand : public PalColumnCommand { public: void Execute(DrawerThread *thread) override; }; + class DrawColumnAddClampTranslatedPalCommand : public PalColumnCommand { public: void Execute(DrawerThread *thread) override; }; + class DrawColumnSubClampPalCommand : public PalColumnCommand { public: void Execute(DrawerThread *thread) override; }; + class DrawColumnSubClampTranslatedPalCommand : public PalColumnCommand { public: void Execute(DrawerThread *thread) override; }; + class DrawColumnRevSubClampPalCommand : public PalColumnCommand { public: void Execute(DrawerThread *thread) override; }; + class DrawColumnRevSubClampTranslatedPalCommand : public PalColumnCommand { public: void Execute(DrawerThread *thread) override; }; + + class DrawFuzzColumnPalCommand : public DrawerCommand + { + public: + DrawFuzzColumnPalCommand(); + void Execute(DrawerThread *thread) override; + FString DebugInfo() override { return "DrawFuzzColumnPalCommand"; } + + private: + int _yl; + int _yh; + int _x; + uint8_t *_destorg; + int _pitch; + int _fuzzpos; + int _fuzzviewheight; + }; + + class PalSpanCommand : public DrawerCommand + { + public: + PalSpanCommand(); + FString DebugInfo() override { return "PalSpanCommand"; } + + protected: + const uint8_t *_source; + const uint8_t *_colormap; + dsfixed_t _xfrac; + dsfixed_t _yfrac; + int _y; + int _x1; + int _x2; + uint8_t *_destorg; + dsfixed_t _xstep; + dsfixed_t _ystep; + int _xbits; + int _ybits; + uint32_t *_srcblend; + uint32_t *_destblend; + int _color; + }; + + class DrawSpanPalCommand : public PalSpanCommand { public: void Execute(DrawerThread *thread) override; }; + class DrawSpanMaskedPalCommand : public PalSpanCommand { public: void Execute(DrawerThread *thread) override; }; + class DrawSpanTranslucentPalCommand : public PalSpanCommand { public: void Execute(DrawerThread *thread) override; }; + class DrawSpanMaskedTranslucentPalCommand : public PalSpanCommand { public: void Execute(DrawerThread *thread) override; }; + class DrawSpanAddClampPalCommand : public PalSpanCommand { public: void Execute(DrawerThread *thread) override; }; + class DrawSpanMaskedAddClampPalCommand : public PalSpanCommand { public: void Execute(DrawerThread *thread) override; }; + class FillSpanPalCommand : public PalSpanCommand { public: void Execute(DrawerThread *thread) override; }; + + class DrawTiltedSpanPalCommand : public DrawerCommand + { + public: + DrawTiltedSpanPalCommand(int y, int x1, int x2, const FVector3 &plane_sz, const FVector3 &plane_su, const FVector3 &plane_sv, bool plane_shade, int planeshade, float planelightfloat, fixed_t pviewx, fixed_t pviewy); + void Execute(DrawerThread *thread) override; + FString DebugInfo() override { return "DrawTiltedSpanPalCommand"; } + + private: + void CalcTiltedLighting(double lval, double lend, int width, DrawerThread *thread); + + int y; + int x1; + int x2; + FVector3 plane_sz; + FVector3 plane_su; + FVector3 plane_sv; + bool plane_shade; + int planeshade; + float planelightfloat; + fixed_t pviewx; + fixed_t pviewy; + + const uint8_t *_colormap; + uint8_t *_destorg; + int _ybits; + int _xbits; + const uint8_t *_source; + uint8_t *basecolormapdata; + }; + + class DrawColoredSpanPalCommand : public PalSpanCommand + { + public: + DrawColoredSpanPalCommand(int y, int x1, int x2); + void Execute(DrawerThread *thread) override; + FString DebugInfo() override { return "DrawColoredSpanPalCommand"; } + + private: + int y; + int x1; + int x2; + int color; + uint8_t *destorg; + }; + + class DrawSlabPalCommand : public PalSpanCommand + { + public: + DrawSlabPalCommand(int dx, fixed_t v, int dy, fixed_t vi, const uint8_t *vptr, uint8_t *p, const uint8_t *colormap); + void Execute(DrawerThread *thread) override; + + private: + int _dx; + fixed_t _v; + int _dy; + fixed_t _vi; + const uint8_t *_vptr; + uint8_t *_p; + const uint8_t *_colormap; + int _pitch; + int _start_y; + }; + + class DrawFogBoundaryLinePalCommand : public PalSpanCommand + { + public: + DrawFogBoundaryLinePalCommand(int y, int x1, int x2); + void Execute(DrawerThread *thread) override; + + private: + int y, x1, x2; + const uint8_t *_colormap; + uint8_t *_destorg; + }; + + class RtInitColsPalCommand : public DrawerCommand + { + public: + RtInitColsPalCommand(uint8_t *buff); + void Execute(DrawerThread *thread) override; + FString DebugInfo() override { return "RtInitColsPalCommand"; } + + private: + uint8_t *buff; + }; + + class PalColumnHorizCommand : public DrawerCommand + { + public: + PalColumnHorizCommand(); + + protected: + const uint8_t *_source; + fixed_t _iscale; + fixed_t _texturefrac; + int _count; + int _color; + int _x; + int _yl; + }; + + class DrawColumnHorizPalCommand : public PalColumnHorizCommand + { + public: + void Execute(DrawerThread *thread) override; + FString DebugInfo() override { return "DrawColumnHorizPalCommand"; } + }; + + class FillColumnHorizPalCommand : public PalColumnHorizCommand + { + public: + void Execute(DrawerThread *thread) override; + FString DebugInfo() override { return "FillColumnHorizPalCommand"; } + }; + + class PalRtCommand : public DrawerCommand + { + public: + PalRtCommand(int hx, int sx, int yl, int yh); + FString DebugInfo() override { return "PalRtCommand"; } + + protected: + int hx, sx, yl, yh; + uint8_t *_destorg; + int _pitch; + const uint8_t *_colormap; + const uint32_t *_srcblend; + const uint32_t *_destblend; + const uint8_t *_translation; + int _color; + }; + + class DrawColumnRt1CopyPalCommand : public PalRtCommand { public: using PalRtCommand::PalRtCommand; void Execute(DrawerThread *thread) override; }; + class DrawColumnRt4CopyPalCommand : public PalRtCommand { public: using PalRtCommand::PalRtCommand; void Execute(DrawerThread *thread) override; }; + class DrawColumnRt1PalCommand : public PalRtCommand { public: using PalRtCommand::PalRtCommand; void Execute(DrawerThread *thread) override; }; + class DrawColumnRt4PalCommand : public PalRtCommand { public: using PalRtCommand::PalRtCommand; void Execute(DrawerThread *thread) override; }; + class DrawColumnRt1TranslatedPalCommand : public PalRtCommand { public: using PalRtCommand::PalRtCommand; void Execute(DrawerThread *thread) override; }; + class DrawColumnRt4TranslatedPalCommand : public PalRtCommand { public: using PalRtCommand::PalRtCommand; void Execute(DrawerThread *thread) override; }; + class DrawColumnRt1AddPalCommand : public PalRtCommand { public: using PalRtCommand::PalRtCommand; void Execute(DrawerThread *thread) override; }; + class DrawColumnRt4AddPalCommand : public PalRtCommand { public: using PalRtCommand::PalRtCommand; void Execute(DrawerThread *thread) override; }; + //class DrawColumnRt1AddTranslatedPalCommand : public PalRtCommand { public: using PalRtCommand::PalRtCommand; void Execute(DrawerThread *thread) override; }; + //class DrawColumnRt4AddTranslatedPalCommand : public PalRtCommand { public: using PalRtCommand::PalRtCommand; void Execute(DrawerThread *thread) override; }; + class DrawColumnRt1ShadedPalCommand : public PalRtCommand { public: using PalRtCommand::PalRtCommand; void Execute(DrawerThread *thread) override; }; + class DrawColumnRt4ShadedPalCommand : public PalRtCommand { public: using PalRtCommand::PalRtCommand; void Execute(DrawerThread *thread) override; }; + class DrawColumnRt1AddClampPalCommand : public PalRtCommand { public: using PalRtCommand::PalRtCommand; void Execute(DrawerThread *thread) override; }; + class DrawColumnRt4AddClampPalCommand : public PalRtCommand { public: using PalRtCommand::PalRtCommand; void Execute(DrawerThread *thread) override; }; + //class DrawColumnRt1AddClampTranslatedPalCommand : public PalRtCommand { public: using PalRtCommand::PalRtCommand; void Execute(DrawerThread *thread) override; }; + //class DrawColumnRt4AddClampTranslatedPalCommand : public PalRtCommand { public: using PalRtCommand::PalRtCommand; void Execute(DrawerThread *thread) override; }; + class DrawColumnRt1SubClampPalCommand : public PalRtCommand { public: using PalRtCommand::PalRtCommand; void Execute(DrawerThread *thread) override; }; + class DrawColumnRt4SubClampPalCommand : public PalRtCommand { public: using PalRtCommand::PalRtCommand; void Execute(DrawerThread *thread) override; }; + //class DrawColumnRt1SubClampTranslatedPalCommand : public PalRtCommand { public: using PalRtCommand::PalRtCommand; void Execute(DrawerThread *thread) override; }; + //class DrawColumnRt4SubClampTranslatedPalCommand : public PalRtCommand { public: using PalRtCommand::PalRtCommand; void Execute(DrawerThread *thread) override; }; + class DrawColumnRt1RevSubClampPalCommand : public PalRtCommand { public: using PalRtCommand::PalRtCommand; void Execute(DrawerThread *thread) override; }; + class DrawColumnRt4RevSubClampPalCommand : public PalRtCommand { public: using PalRtCommand::PalRtCommand; void Execute(DrawerThread *thread) override; }; + //class DrawColumnRt1RevSubClampTranslatedPalCommand : public PalRtCommand { public: using PalRtCommand::PalRtCommand; void Execute(DrawerThread *thread) override; }; + //class DrawColumnRt4RevSubClampTranslatedPalCommand : public PalRtCommand { public: using PalRtCommand::PalRtCommand; void Execute(DrawerThread *thread) override; }; +} diff --git a/src/r_drawt_pal.cpp b/src/r_drawt_pal.cpp new file mode 100644 index 0000000000..3356592d25 --- /dev/null +++ b/src/r_drawt_pal.cpp @@ -0,0 +1,867 @@ +/* +** r_drawt.cpp +** Faster column drawers for modern processors +** +**--------------------------------------------------------------------------- +** Copyright 1998-2006 Randy Heit +** All rights reserved. +** +** Redistribution and use in source and binary forms, with or without +** modification, are permitted provided that the following conditions +** are met: +** +** 1. Redistributions of source code must retain the above copyright +** notice, this list of conditions and the following disclaimer. +** 2. Redistributions in binary form must reproduce the above copyright +** notice, this list of conditions and the following disclaimer in the +** documentation and/or other materials provided with the distribution. +** 3. The name of the author may not be used to endorse or promote products +** derived from this software without specific prior written permission. +** +** THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR +** IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES +** OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. +** IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, +** INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT +** NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +** DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +** THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +** (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF +** THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +**--------------------------------------------------------------------------- +** +** These functions stretch columns into a temporary buffer and then +** map them to the screen. On modern machines, this is faster than drawing +** them directly to the screen. +** +** Will I be able to even understand any of this if I come back to it later? +** Let's hope so. :-) +*/ + +#include "templates.h" +#include "doomtype.h" +#include "doomdef.h" +#include "r_defs.h" +#include "r_draw.h" +#include "r_main.h" +#include "r_things.h" +#include "v_video.h" +#include "r_draw_pal.h" + +// I should have commented this stuff better. +// +// dc_temp is the buffer R_DrawColumnHoriz writes into. +// dc_tspans points into it. +// dc_ctspan points into dc_tspans. +// horizspan also points into dc_tspans. + +// dc_ctspan is advanced while drawing into dc_temp. +// horizspan is advanced up to dc_ctspan when drawing from dc_temp to the screen. + +namespace swrenderer +{ + RtInitColsPalCommand::RtInitColsPalCommand(uint8_t *buff) : buff(buff) + { + } + + void RtInitColsPalCommand::Execute(DrawerThread *thread) + { + thread->dc_temp = buff == nullptr ? thread->dc_temp_buff : buff; + } + + ///////////////////////////////////////////////////////////////////// + + PalColumnHorizCommand::PalColumnHorizCommand() + { + using namespace drawerargs; + + _source = dc_source; + _iscale = dc_iscale; + _texturefrac = dc_texturefrac; + _count = dc_count; + _color = dc_color; + _x = dc_x; + _yl = dc_yl; + } + + void DrawColumnHorizPalCommand::Execute(DrawerThread *thread) + { + int count = _count; + uint8_t *dest; + fixed_t fracstep; + fixed_t frac; + + count = thread->count_for_thread(_yl, count); + if (count <= 0) + return; + + fracstep = _iscale; + frac = _texturefrac; + + const uint8_t *source = _source; + + int x = _x & 3; + dest = &thread->dc_temp[x + thread->temp_line_for_thread(_yl) * 4]; + frac += fracstep * thread->skipped_by_thread(_yl); + fracstep *= thread->num_cores; + + if (count & 1) { + *dest = source[frac >> FRACBITS]; dest += 4; frac += fracstep; + } + if (count & 2) { + dest[0] = source[frac >> FRACBITS]; frac += fracstep; + dest[4] = source[frac >> FRACBITS]; frac += fracstep; + dest += 8; + } + if (count & 4) { + dest[0] = source[frac >> FRACBITS]; frac += fracstep; + dest[4] = source[frac >> FRACBITS]; frac += fracstep; + dest[8] = source[frac >> FRACBITS]; frac += fracstep; + dest[12] = source[frac >> FRACBITS]; frac += fracstep; + dest += 16; + } + count >>= 3; + if (!count) return; + + do + { + dest[0] = source[frac >> FRACBITS]; frac += fracstep; + dest[4] = source[frac >> FRACBITS]; frac += fracstep; + dest[8] = source[frac >> FRACBITS]; frac += fracstep; + dest[12] = source[frac >> FRACBITS]; frac += fracstep; + dest[16] = source[frac >> FRACBITS]; frac += fracstep; + dest[20] = source[frac >> FRACBITS]; frac += fracstep; + dest[24] = source[frac >> FRACBITS]; frac += fracstep; + dest[28] = source[frac >> FRACBITS]; frac += fracstep; + dest += 32; + } while (--count); + } + + void FillColumnHorizPalCommand::Execute(DrawerThread *thread) + { + int count = _count; + uint8_t color = _color; + uint8_t *dest; + + count = thread->count_for_thread(_yl, count); + if (count <= 0) + return; + + int x = _x & 3; + dest = &thread->dc_temp[x + thread->temp_line_for_thread(_yl) * 4]; + + if (count & 1) { + *dest = color; + dest += 4; + } + if (!(count >>= 1)) + return; + do { + dest[0] = color; dest[4] = color; + dest += 8; + } while (--count); + } + + ///////////////////////////////////////////////////////////////////// + + PalRtCommand::PalRtCommand(int hx, int sx, int yl, int yh) : hx(hx), sx(sx), yl(yl), yh(yh) + { + using namespace drawerargs; + + _destorg = dc_destorg; + _pitch = dc_pitch; + _colormap = dc_colormap; + _srcblend = dc_srcblend; + _destblend = dc_destblend; + _translation = dc_translation; + _color = dc_color; + } + + void DrawColumnRt1CopyPalCommand::Execute(DrawerThread *thread) + { + uint8_t *source; + uint8_t *dest; + int count; + int pitch; + + count = yh - yl + 1; + + count = thread->count_for_thread(yl, count); + if (count <= 0) + return; + + dest = ylookup[yl + thread->skipped_by_thread(yl)] + sx + _destorg; + source = &thread->dc_temp[thread->temp_line_for_thread(yl)*4 + hx]; + pitch = _pitch * thread->num_cores; + + if (count & 1) { + *dest = *source; + source += 4; + dest += pitch; + } + if (count & 2) { + dest[0] = source[0]; + dest[pitch] = source[4]; + source += 8; + dest += pitch*2; + } + if (!(count >>= 2)) + return; + + do { + dest[0] = source[0]; + dest[pitch] = source[4]; + dest[pitch*2] = source[8]; + dest[pitch*3] = source[12]; + source += 16; + dest += pitch*4; + } while (--count); + } + + void DrawColumnRt4CopyPalCommand::Execute(DrawerThread *thread) + { + int *source; + int *dest; + int count; + int pitch; + + count = yh - yl + 1; + + count = thread->count_for_thread(yl, count); + if (count <= 0) + return; + + dest = (int *)(ylookup[yl + thread->skipped_by_thread(yl)] + sx + _destorg); + source = (int *)(&thread->dc_temp[thread->temp_line_for_thread(yl)*4]); + pitch = _pitch*thread->num_cores/sizeof(int); + + if (count & 1) { + *dest = *source; + source += 4/sizeof(int); + dest += pitch; + } + if (!(count >>= 1)) + return; + + do { + dest[0] = source[0]; + dest[pitch] = source[4/sizeof(int)]; + source += 8/sizeof(int); + dest += pitch*2; + } while (--count); + } + + void DrawColumnRt1PalCommand::Execute(DrawerThread *thread) + { + const uint8_t *colormap; + uint8_t *source; + uint8_t *dest; + int count; + int pitch; + + count = yh - yl + 1; + + count = thread->count_for_thread(yl, count); + if (count <= 0) + return; + + colormap = _colormap; + dest = ylookup[yl + thread->skipped_by_thread(yl)] + sx + _destorg; + source = &thread->dc_temp[thread->temp_line_for_thread(yl) *4 + hx]; + pitch = _pitch*thread->num_cores; + + if (count & 1) { + *dest = colormap[*source]; + source += 4; + dest += pitch; + } + if (!(count >>= 1)) + return; + + do { + dest[0] = colormap[source[0]]; + dest[pitch] = colormap[source[4]]; + source += 8; + dest += pitch*2; + } while (--count); + } + + void DrawColumnRt4PalCommand::Execute(DrawerThread *thread) + { + const uint8_t *colormap; + uint8_t *source; + uint8_t *dest; + int count; + int pitch; + + count = yh - yl + 1; + + count = thread->count_for_thread(yl, count); + if (count <= 0) + return; + + colormap = _colormap; + dest = ylookup[yl + thread->skipped_by_thread(yl)] + sx + _destorg; + source = &thread->dc_temp[thread->temp_line_for_thread(yl)*4]; + pitch = _pitch*thread->num_cores; + + if (count & 1) { + dest[0] = colormap[source[0]]; + dest[1] = colormap[source[1]]; + dest[2] = colormap[source[2]]; + dest[3] = colormap[source[3]]; + source += 4; + dest += pitch; + } + if (!(count >>= 1)) + return; + + do { + dest[0] = colormap[source[0]]; + dest[1] = colormap[source[1]]; + dest[2] = colormap[source[2]]; + dest[3] = colormap[source[3]]; + dest[pitch] = colormap[source[4]]; + dest[pitch+1] = colormap[source[5]]; + dest[pitch+2] = colormap[source[6]]; + dest[pitch+3] = colormap[source[7]]; + source += 8; + dest += pitch*2; + } while (--count); + } + + void DrawColumnRt1TranslatedPalCommand::Execute(DrawerThread *thread) + { + int count = yh - yl + 1; + count = thread->count_for_thread(yl, count); + if (count <= 0) + return; + + uint8_t *source = &thread->dc_temp[thread->temp_line_for_thread(yl)*4 + hx]; + const uint8_t *translation = _translation; + + // Things we do to hit the compiler's optimizer with a clue bat: + // 1. Parallelism is explicitly spelled out by using a separate + // C instruction for each assembly instruction. GCC lets me + // have four temporaries, but VC++ spills to the stack with + // more than two. Two is probably optimal, anyway. + // 2. The results of the translation lookups are explicitly + // stored in byte-sized variables. This causes the VC++ code + // to use byte mov instructions in most cases; for apparently + // random reasons, it will use movzx for some places. GCC + // ignores this and uses movzx always. + + // Do 8 rows at a time. + for (int count8 = count >> 3; count8; --count8) + { + int c0, c1; + uint8_t b0, b1; + + c0 = source[0]; c1 = source[4]; + b0 = translation[c0]; b1 = translation[c1]; + source[0] = b0; source[4] = b1; + + c0 = source[8]; c1 = source[12]; + b0 = translation[c0]; b1 = translation[c1]; + source[8] = b0; source[12] = b1; + + c0 = source[16]; c1 = source[20]; + b0 = translation[c0]; b1 = translation[c1]; + source[16] = b0; source[20] = b1; + + c0 = source[24]; c1 = source[28]; + b0 = translation[c0]; b1 = translation[c1]; + source[24] = b0; source[28] = b1; + + source += 32; + } + // Finish by doing 1 row at a time. + for (count &= 7; count; --count, source += 4) + { + source[0] = translation[source[0]]; + } + } + + void DrawColumnRt4TranslatedPalCommand::Execute(DrawerThread *thread) + { + int count = yh - yl + 1; + count = thread->count_for_thread(yl, count); + if (count <= 0) + return; + + uint8_t *source = &thread->dc_temp[thread->temp_line_for_thread(yl)*4]; + const uint8_t *translation = _translation; + int c0, c1; + uint8_t b0, b1; + + // Do 2 rows at a time. + for (int count8 = count >> 1; count8; --count8) + { + c0 = source[0]; c1 = source[1]; + b0 = translation[c0]; b1 = translation[c1]; + source[0] = b0; source[1] = b1; + + c0 = source[2]; c1 = source[3]; + b0 = translation[c0]; b1 = translation[c1]; + source[2] = b0; source[3] = b1; + + c0 = source[4]; c1 = source[5]; + b0 = translation[c0]; b1 = translation[c1]; + source[4] = b0; source[5] = b1; + + c0 = source[6]; c1 = source[7]; + b0 = translation[c0]; b1 = translation[c1]; + source[6] = b0; source[7] = b1; + + source += 8; + } + // Do the final row if count was odd. + if (count & 1) + { + c0 = source[0]; c1 = source[1]; + b0 = translation[c0]; b1 = translation[c1]; + source[0] = b0; source[1] = b1; + + c0 = source[2]; c1 = source[3]; + b0 = translation[c0]; b1 = translation[c1]; + source[2] = b0; source[3] = b1; + } + } + + void DrawColumnRt1AddPalCommand::Execute(DrawerThread *thread) + { + const uint8_t *colormap; + uint8_t *source; + uint8_t *dest; + int pitch; + + int count = yh - yl + 1; + count = thread->count_for_thread(yl, count); + if (count <= 0) + return; + + const uint32_t *fg2rgb = _srcblend; + const uint32_t *bg2rgb = _destblend; + dest = ylookup[yl + thread->skipped_by_thread(yl)] + sx + _destorg; + source = &thread->dc_temp[thread->temp_line_for_thread(yl)*4 + hx]; + pitch = _pitch * thread->num_cores; + colormap = _colormap; + + do { + uint32_t fg = colormap[*source]; + uint32_t bg = *dest; + + fg = fg2rgb[fg]; + bg = bg2rgb[bg]; + fg = (fg+bg) | 0x1f07c1f; + *dest = RGB32k.All[fg & (fg>>15)]; + source += 4; + dest += pitch; + } while (--count); + } + + void DrawColumnRt4AddPalCommand::Execute(DrawerThread *thread) + { + const uint8_t *colormap; + uint8_t *source; + uint8_t *dest; + int pitch; + + int count = yh - yl + 1; + count = thread->count_for_thread(yl, count); + if (count <= 0) + return; + + const uint32_t *fg2rgb = _srcblend; + const uint32_t *bg2rgb = _destblend; + dest = ylookup[yl + thread->skipped_by_thread(yl)] + sx + _destorg; + source = &thread->dc_temp[thread->temp_line_for_thread(yl)*4]; + pitch = _pitch * thread->num_cores; + colormap = _colormap; + + do { + uint32_t fg = colormap[source[0]]; + uint32_t bg = dest[0]; + fg = fg2rgb[fg]; + bg = bg2rgb[bg]; + fg = (fg+bg) | 0x1f07c1f; + dest[0] = RGB32k.All[fg & (fg>>15)]; + + fg = colormap[source[1]]; + bg = dest[1]; + fg = fg2rgb[fg]; + bg = bg2rgb[bg]; + fg = (fg+bg) | 0x1f07c1f; + dest[1] = RGB32k.All[fg & (fg>>15)]; + + + fg = colormap[source[2]]; + bg = dest[2]; + fg = fg2rgb[fg]; + bg = bg2rgb[bg]; + fg = (fg+bg) | 0x1f07c1f; + dest[2] = RGB32k.All[fg & (fg>>15)]; + + fg = colormap[source[3]]; + bg = dest[3]; + fg = fg2rgb[fg]; + bg = bg2rgb[bg]; + fg = (fg+bg) | 0x1f07c1f; + dest[3] = RGB32k.All[fg & (fg>>15)]; + + source += 4; + dest += pitch; + } while (--count); + } + + void DrawColumnRt1ShadedPalCommand::Execute(DrawerThread *thread) + { + uint32_t *fgstart; + const uint8_t *colormap; + uint8_t *source; + uint8_t *dest; + int pitch; + + int count = yh - yl + 1; + count = thread->count_for_thread(yl, count); + if (count <= 0) + return; + + fgstart = &Col2RGB8[0][_color]; + colormap = _colormap; + dest = ylookup[yl + thread->skipped_by_thread(yl)] + sx + _destorg; + source = &thread->dc_temp[thread->temp_line_for_thread(yl)*4 + hx]; + pitch = _pitch * thread->num_cores; + + do { + uint32_t val = colormap[*source]; + uint32_t fg = fgstart[val<<8]; + val = (Col2RGB8[64-val][*dest] + fg) | 0x1f07c1f; + *dest = RGB32k.All[val & (val>>15)]; + source += 4; + dest += pitch; + } while (--count); + } + + void DrawColumnRt4ShadedPalCommand::Execute(DrawerThread *thread) + { + uint32_t *fgstart; + const uint8_t *colormap; + uint8_t *source; + uint8_t *dest; + int pitch; + + int count = yh - yl + 1; + count = thread->count_for_thread(yl, count); + if (count <= 0) + return; + + fgstart = &Col2RGB8[0][_color]; + colormap = _colormap; + dest = ylookup[yl + thread->skipped_by_thread(yl)] + sx + _destorg; + source = &thread->dc_temp[thread->temp_line_for_thread(yl)*4]; + pitch = _pitch * thread->num_cores; + + do { + uint32_t val; + + val = colormap[source[0]]; + val = (Col2RGB8[64-val][dest[0]] + fgstart[val<<8]) | 0x1f07c1f; + dest[0] = RGB32k.All[val & (val>>15)]; + + val = colormap[source[1]]; + val = (Col2RGB8[64-val][dest[1]] + fgstart[val<<8]) | 0x1f07c1f; + dest[1] = RGB32k.All[val & (val>>15)]; + + val = colormap[source[2]]; + val = (Col2RGB8[64-val][dest[2]] + fgstart[val<<8]) | 0x1f07c1f; + dest[2] = RGB32k.All[val & (val>>15)]; + + val = colormap[source[3]]; + val = (Col2RGB8[64-val][dest[3]] + fgstart[val<<8]) | 0x1f07c1f; + dest[3] = RGB32k.All[val & (val>>15)]; + + source += 4; + dest += pitch; + } while (--count); + } + + void DrawColumnRt1AddClampPalCommand::Execute(DrawerThread *thread) + { + const uint8_t *colormap; + uint8_t *source; + uint8_t *dest; + int pitch; + + int count = yh - yl + 1; + count = thread->count_for_thread(yl, count); + if (count <= 0) + return; + + const uint32_t *fg2rgb = _srcblend; + const uint32_t *bg2rgb = _destblend; + dest = ylookup[yl + thread->skipped_by_thread(yl)] + sx + _destorg; + source = &thread->dc_temp[thread->temp_line_for_thread(yl)*4 + hx]; + pitch = _pitch * thread->num_cores; + colormap = _colormap; + + do { + uint32_t a = fg2rgb[colormap[*source]] + bg2rgb[*dest]; + uint32_t b = a; + + a |= 0x01f07c1f; + b &= 0x40100400; + a &= 0x3fffffff; + b = b - (b >> 5); + a |= b; + *dest = RGB32k.All[(a>>15) & a]; + source += 4; + dest += pitch; + } while (--count); + } + + void DrawColumnRt4AddClampPalCommand::Execute(DrawerThread *thread) + { + const uint8_t *colormap; + uint8_t *source; + uint8_t *dest; + int pitch; + + int count = yh - yl + 1; + count = thread->count_for_thread(yl, count); + if (count <= 0) + return; + + dest = ylookup[yl + thread->skipped_by_thread(yl)] + sx + _destorg; + source = &thread->dc_temp[thread->temp_line_for_thread(yl)*4]; + pitch = _pitch * thread->num_cores; + colormap = _colormap; + + const uint32_t *fg2rgb = _srcblend; + const uint32_t *bg2rgb = _destblend; + + do { + uint32_t a = fg2rgb[colormap[source[0]]] + bg2rgb[dest[0]]; + uint32_t b = a; + + a |= 0x01f07c1f; + b &= 0x40100400; + a &= 0x3fffffff; + b = b - (b >> 5); + a |= b; + dest[0] = RGB32k.All[(a>>15) & a]; + + a = fg2rgb[colormap[source[1]]] + bg2rgb[dest[1]]; + b = a; + a |= 0x01f07c1f; + b &= 0x40100400; + a &= 0x3fffffff; + b = b - (b >> 5); + a |= b; + dest[1] = RGB32k.All[(a>>15) & a]; + + a = fg2rgb[colormap[source[2]]] + bg2rgb[dest[2]]; + b = a; + a |= 0x01f07c1f; + b &= 0x40100400; + a &= 0x3fffffff; + b = b - (b >> 5); + a |= b; + dest[2] = RGB32k.All[(a>>15) & a]; + + a = fg2rgb[colormap[source[3]]] + bg2rgb[dest[3]]; + b = a; + a |= 0x01f07c1f; + b &= 0x40100400; + a &= 0x3fffffff; + b = b - (b >> 5); + a |= b; + dest[3] = RGB32k.All[(a>>15) & a]; + + source += 4; + dest += pitch; + } while (--count); + } + + void DrawColumnRt1SubClampPalCommand::Execute(DrawerThread *thread) + { + const uint8_t *colormap; + uint8_t *source; + uint8_t *dest; + int pitch; + + int count = yh - yl + 1; + count = thread->count_for_thread(yl, count); + if (count <= 0) + return; + + const uint32_t *fg2rgb = _srcblend; + const uint32_t *bg2rgb = _destblend; + dest = ylookup[yl + thread->skipped_by_thread(yl)] + sx + _destorg; + source = &thread->dc_temp[thread->temp_line_for_thread(yl)*4 + hx]; + pitch = _pitch * thread->num_cores; + colormap = _colormap; + + do { + uint32_t a = (fg2rgb[colormap[*source]] | 0x40100400) - bg2rgb[*dest]; + uint32_t b = a; + + b &= 0x40100400; + b = b - (b >> 5); + a &= b; + a |= 0x01f07c1f; + *dest = RGB32k.All[(a>>15) & a]; + source += 4; + dest += pitch; + } while (--count); + } + + void DrawColumnRt4SubClampPalCommand::Execute(DrawerThread *thread) + { + const uint8_t *colormap; + uint8_t *source; + uint8_t *dest; + int pitch; + + int count = yh - yl + 1; + count = thread->count_for_thread(yl, count); + if (count <= 0) + return; + + const uint32_t *fg2rgb = _srcblend; + const uint32_t *bg2rgb = _destblend; + dest = ylookup[yl + thread->skipped_by_thread(yl)] + sx + _destorg; + source = &thread->dc_temp[thread->temp_line_for_thread(yl)*4]; + pitch = _pitch * thread->num_cores; + colormap = _colormap; + + do { + uint32_t a = (fg2rgb[colormap[source[0]]] | 0x40100400) - bg2rgb[dest[0]]; + uint32_t b = a; + + b &= 0x40100400; + b = b - (b >> 5); + a &= b; + a |= 0x01f07c1f; + dest[0] = RGB32k.All[(a>>15) & a]; + + a = (fg2rgb[colormap[source[1]]] | 0x40100400) - bg2rgb[dest[1]]; + b = a; + b &= 0x40100400; + b = b - (b >> 5); + a &= b; + a |= 0x01f07c1f; + dest[1] = RGB32k.All[(a>>15) & a]; + + a = (fg2rgb[colormap[source[2]]] | 0x40100400) - bg2rgb[dest[2]]; + b = a; + b &= 0x40100400; + b = b - (b >> 5); + a &= b; + a |= 0x01f07c1f; + dest[2] = RGB32k.All[(a>>15) & a]; + + a = (fg2rgb[colormap[source[3]]] | 0x40100400) - bg2rgb[dest[3]]; + b = a; + b &= 0x40100400; + b = b - (b >> 5); + a &= b; + a |= 0x01f07c1f; + dest[3] = RGB32k.All[(a>>15) & a]; + + source += 4; + dest += pitch; + } while (--count); + } + + void DrawColumnRt1RevSubClampPalCommand::Execute(DrawerThread *thread) + { + const uint8_t *colormap; + uint8_t *source; + uint8_t *dest; + int pitch; + + int count = yh - yl + 1; + count = thread->count_for_thread(yl, count); + if (count <= 0) + return; + + const uint32_t *fg2rgb = _srcblend; + const uint32_t *bg2rgb = _destblend; + dest = ylookup[yl + thread->skipped_by_thread(yl)] + sx + _destorg; + source = &thread->dc_temp[thread->temp_line_for_thread(yl)*4 + hx]; + pitch = _pitch * thread->num_cores; + colormap = _colormap; + + do { + uint32_t a = (bg2rgb[*dest] | 0x40100400) - fg2rgb[colormap[*source]]; + uint32_t b = a; + + b &= 0x40100400; + b = b - (b >> 5); + a &= b; + a |= 0x01f07c1f; + *dest = RGB32k.All[(a>>15) & a]; + source += 4; + dest += pitch; + } while (--count); + } + + void DrawColumnRt4RevSubClampPalCommand::Execute(DrawerThread *thread) + { + const uint8_t *colormap; + uint8_t *source; + uint8_t *dest; + int pitch; + + int count = yh - yl + 1; + count = thread->count_for_thread(yl, count); + if (count <= 0) + return; + + const uint32_t *fg2rgb = _srcblend; + const uint32_t *bg2rgb = _destblend; + dest = ylookup[yl + thread->skipped_by_thread(yl)] + sx + _destorg; + source = &thread->dc_temp[thread->temp_line_for_thread(yl)*4]; + pitch = _pitch * thread->num_cores; + colormap = _colormap; + + do { + uint32_t a = (bg2rgb[dest[0]] | 0x40100400) - fg2rgb[colormap[source[0]]]; + uint32_t b = a; + + b &= 0x40100400; + b = b - (b >> 5); + a &= b; + a |= 0x01f07c1f; + dest[0] = RGB32k.All[(a>>15) & a]; + + a = (bg2rgb[dest[1]] | 0x40100400) - fg2rgb[colormap[source[1]]]; + b = a; + b &= 0x40100400; + b = b - (b >> 5); + a &= b; + a |= 0x01f07c1f; + dest[1] = RGB32k.All[(a>>15) & a]; + + a = (bg2rgb[dest[2]] | 0x40100400) - fg2rgb[colormap[source[2]]]; + b = a; + b &= 0x40100400; + b = b - (b >> 5); + a &= b; + a |= 0x01f07c1f; + dest[2] = RGB32k.All[(a>>15) & a]; + + a = (bg2rgb[dest[3]] | 0x40100400) - fg2rgb[colormap[source[3]]]; + b = a; + b &= 0x40100400; + b = b - (b >> 5); + a &= b; + a |= 0x01f07c1f; + dest[3] = RGB32k.All[(a>>15) & a]; + + source += 4; + dest += pitch; + } while (--count); + } +} diff --git a/src/r_main.cpp b/src/r_main.cpp index c69c22c7ba..a6ae47de1b 100644 --- a/src/r_main.cpp +++ b/src/r_main.cpp @@ -58,6 +58,38 @@ #include "v_font.h" #include "r_data/colormaps.h" #include "p_maputl.h" +#include "r_thread.h" + +CVAR (String, r_viewsize, "", CVAR_NOSET) +CVAR (Bool, r_shadercolormaps, true, CVAR_ARCHIVE) + +CUSTOM_CVAR (Int, r_columnmethod, 1, CVAR_ARCHIVE|CVAR_GLOBALCONFIG) +{ + if (self != 0 && self != 1) + { + self = 1; + } + else + { // Trigger the change + setsizeneeded = true; + } +} + +CVAR(Int, r_portal_recursions, 4, CVAR_ARCHIVE) +CVAR(Bool, r_highlight_portals, false, CVAR_ARCHIVE) + +EXTERN_CVAR(Bool, r_fullbrightignoresectorcolor) + +extern cycle_t WallCycles, PlaneCycles, MaskedCycles, WallScanCycles; +extern cycle_t FrameCycles; + +extern bool r_showviewer; + +cycle_t WallCycles, PlaneCycles, MaskedCycles, WallScanCycles; + +namespace swrenderer +{ + using namespace drawerargs; // MACROS ------------------------------------------------------------------ @@ -88,7 +120,6 @@ extern short *openings; extern bool r_fakingunderwater; extern "C" int fuzzviewheight; extern subsector_t *InSubsector; -extern bool r_showviewer; // PRIVATE DATA DECLARATIONS ----------------------------------------------- @@ -100,9 +131,6 @@ bool r_dontmaplines; // PUBLIC DATA DEFINITIONS ------------------------------------------------- -CVAR (String, r_viewsize, "", CVAR_NOSET) -CVAR (Bool, r_shadercolormaps, true, CVAR_ARCHIVE) - double r_BaseVisibility; double r_WallVisibility; double r_FloorVisibility; @@ -157,8 +185,6 @@ void (*hcolfunc_post1) (int hx, int sx, int yl, int yh); void (*hcolfunc_post2) (int hx, int sx, int yl, int yh); void (*hcolfunc_post4) (int sx, int yl, int yh); -cycle_t WallCycles, PlaneCycles, MaskedCycles, WallScanCycles; - // PRIVATE DATA DEFINITIONS ------------------------------------------------ static int lastcenteryfrac; @@ -361,26 +387,6 @@ void R_SWRSetWindow(int windowSize, int fullWidth, int fullHeight, int stHeight, R_SetVisibility(R_GetVisibility()); } -//========================================================================== -// -// CVAR r_columnmethod -// -// Selects which version of the seg renderers to use. -// -//========================================================================== - -CUSTOM_CVAR (Int, r_columnmethod, 1, CVAR_ARCHIVE|CVAR_GLOBALCONFIG) -{ - if (self != 0 && self != 1) - { - self = 1; - } - else - { // Trigger the change - setsizeneeded = true; - } -} - //========================================================================== // // R_Init @@ -455,8 +461,6 @@ void R_CopyStackedViewParameters() // //========================================================================== -EXTERN_CVAR(Bool, r_fullbrightignoresectorcolor) - void R_SetupColormap(player_t *player) { realfixedcolormap = NULL; @@ -574,9 +578,6 @@ void R_SetupFreelook() // //========================================================================== -CVAR(Int, r_portal_recursions, 4, CVAR_ARCHIVE) -CVAR(Bool, r_highlight_portals, false, CVAR_ARCHIVE) - void R_HighlightPortal (PortalDrawseg* pds) { // [ZZ] NO OVERFLOW CHECKS HERE @@ -853,10 +854,10 @@ void R_RenderActorView (AActor *actor, bool dontmaplines) // [RH] Show off segs if r_drawflat is 1 if (r_drawflat) { - hcolfunc_pre = R_FillColumnHorizP; + hcolfunc_pre = R_FillColumnHoriz; hcolfunc_post1 = rt_copy1col; hcolfunc_post4 = rt_copy4cols; - colfunc = R_FillColumnP; + colfunc = R_FillColumn; spanfunc = R_FillSpan; } else @@ -950,6 +951,8 @@ void R_RenderViewToCanvas (AActor *actor, DCanvas *canvas, { const bool savedviewactive = viewactive; + R_BeginDrawerCommands(); + viewwidth = width; RenderTarget = canvas; bRenderingToCanvas = true; @@ -961,6 +964,8 @@ void R_RenderViewToCanvas (AActor *actor, DCanvas *canvas, R_RenderActorView (actor, dontmaplines); + R_EndDrawerCommands(); + RenderTarget = screen; bRenderingToCanvas = false; R_ExecuteSetViewSize (); @@ -991,8 +996,6 @@ void R_MultiresInit () // Displays statistics about rendering times // //========================================================================== -extern cycle_t WallCycles, PlaneCycles, MaskedCycles, WallScanCycles; -extern cycle_t FrameCycles; ADD_STAT (fps) { @@ -1072,3 +1075,5 @@ CCMD (clearscancycles) bestscancycles = HUGE_VAL; } #endif + +} \ No newline at end of file diff --git a/src/r_main.h b/src/r_main.h index 24103393d4..87b56163b0 100644 --- a/src/r_main.h +++ b/src/r_main.h @@ -28,23 +28,26 @@ #include "v_palette.h" #include "r_data/colormaps.h" +extern double ViewCos; +extern double ViewSin; +extern int viewwindowx; +extern int viewwindowy; typedef BYTE lighttable_t; // This could be wider for >8 bit display. +namespace swrenderer +{ + // // POV related. // extern bool bRenderingToCanvas; -extern double ViewCos; -extern double ViewSin; extern fixed_t viewingrangerecip; extern double FocalLengthX, FocalLengthY; extern double InvZtoScale; extern double WallTMapScale2; -extern int viewwindowx; -extern int viewwindowy; extern double CenterX; extern double CenterY; @@ -142,5 +145,6 @@ extern DAngle stacked_angle; extern void R_CopyStackedViewParameters(); +} #endif // __R_MAIN_H__ diff --git a/src/r_plane.cpp b/src/r_plane.cpp index 810aa0003c..8a5ee2263a 100644 --- a/src/r_plane.cpp +++ b/src/r_plane.cpp @@ -63,10 +63,14 @@ #pragma warning(disable:4244) #endif +CVAR(Bool, tilt, false, 0); +CVAR(Bool, r_skyboxes, true, 0) + EXTERN_CVAR(Int, r_skymode) -//EXTERN_CVAR (Int, tx) -//EXTERN_CVAR (Int, ty) +namespace swrenderer +{ + using namespace drawerargs; extern subsector_t *InSubsector; @@ -132,15 +136,12 @@ extern "C" { // spanend holds the end of a plane span in each screen row // short spanend[MAXHEIGHT]; -BYTE *tiltlighting[MAXWIDTH]; int planeshade; FVector3 plane_sz, plane_su, plane_sv; float planelightfloat; bool plane_shade; fixed_t pviewx, pviewy; - -void R_DrawTiltedPlane_ASM (int y, int x1); } float yslope[MAXHEIGHT]; @@ -148,13 +149,6 @@ static fixed_t xscale, yscale; static double xstepscale, ystepscale; static double basexfrac, baseyfrac; -#ifdef X86_ASM -extern "C" void R_SetSpanSource_ASM (const BYTE *flat); -extern "C" void R_SetSpanSize_ASM (int xbits, int ybits); -extern "C" void R_SetSpanColormap_ASM (BYTE *colormap); -extern "C" void R_SetTiltedSpanSource_ASM (const BYTE *flat); -extern "C" BYTE *ds_curcolormap, *ds_cursource, *ds_curtiltedsource; -#endif void R_DrawSinglePlane (visplane_t *, fixed_t alpha, bool additive, bool masked); //========================================================================== @@ -249,11 +243,6 @@ void R_MapPlane (int y, int x1) GlobVis * fabs(CenterY - y), planeshade) << COLORMAPSHIFT); } -#ifdef X86_ASM - if (ds_colormap != ds_curcolormap) - R_SetSpanColormap_ASM (ds_colormap); -#endif - ds_y = y; ds_x1 = x1; ds_x2 = x2; @@ -261,239 +250,15 @@ void R_MapPlane (int y, int x1) spanfunc (); } -//========================================================================== -// -// R_CalcTiltedLighting -// -// Calculates the lighting for one row of a tilted plane. If the definition -// of GETPALOOKUP changes, this needs to change, too. -// -//========================================================================== - -extern "C" { -void R_CalcTiltedLighting (double lval, double lend, int width) -{ - double lstep; - BYTE *lightfiller; - BYTE *basecolormapdata = basecolormap->Maps; - int i = 0; - - if (width == 0 || lval == lend) - { // Constant lighting - lightfiller = basecolormapdata + (GETPALOOKUP(lval, planeshade) << COLORMAPSHIFT); - } - else - { - lstep = (lend - lval) / width; - if (lval >= MAXLIGHTVIS) - { // lval starts "too bright". - lightfiller = basecolormapdata + (GETPALOOKUP(lval, planeshade) << COLORMAPSHIFT); - for (; i <= width && lval >= MAXLIGHTVIS; ++i) - { - tiltlighting[i] = lightfiller; - lval += lstep; - } - } - if (lend >= MAXLIGHTVIS) - { // lend ends "too bright". - lightfiller = basecolormapdata + (GETPALOOKUP(lend, planeshade) << COLORMAPSHIFT); - for (; width > i && lend >= MAXLIGHTVIS; --width) - { - tiltlighting[width] = lightfiller; - lend -= lstep; - } - } - if (width > 0) - { - lval = FIXED2DBL(planeshade) - lval; - lend = FIXED2DBL(planeshade) - lend; - lstep = (lend - lval) / width; - if (lstep < 0) - { // Going from dark to light - if (lval < 1.) - { // All bright - lightfiller = basecolormapdata; - } - else - { - if (lval >= NUMCOLORMAPS) - { // Starts beyond the dark end - BYTE *clight = basecolormapdata + ((NUMCOLORMAPS-1) << COLORMAPSHIFT); - while (lval >= NUMCOLORMAPS && i <= width) - { - tiltlighting[i++] = clight; - lval += lstep; - } - if (i > width) - return; - } - while (i <= width && lval >= 0) - { - tiltlighting[i++] = basecolormapdata + (xs_ToInt(lval) << COLORMAPSHIFT); - lval += lstep; - } - lightfiller = basecolormapdata; - } - } - else - { // Going from light to dark - if (lval >= (NUMCOLORMAPS-1)) - { // All dark - lightfiller = basecolormapdata + ((NUMCOLORMAPS-1) << COLORMAPSHIFT); - } - else - { - while (lval < 0 && i <= width) - { - tiltlighting[i++] = basecolormapdata; - lval += lstep; - } - if (i > width) - return; - while (i <= width && lval < (NUMCOLORMAPS-1)) - { - tiltlighting[i++] = basecolormapdata + (xs_ToInt(lval) << COLORMAPSHIFT); - lval += lstep; - } - lightfiller = basecolormapdata + ((NUMCOLORMAPS-1) << COLORMAPSHIFT); - } - } - } - } - for (; i <= width; i++) - { - tiltlighting[i] = lightfiller; - } -} -} // extern "C" - //========================================================================== // // R_MapTiltedPlane // //========================================================================== -void R_MapTiltedPlane(int y, int x1) +void R_MapTiltedPlane (int y, int x1) { - int x2 = spanend[y]; - int width = x2 - x1; - double iz, uz, vz; - BYTE *fb; - DWORD u, v; - int i; - - iz = plane_sz[2] + plane_sz[1] * (centery - y) + plane_sz[0] * (x1 - centerx); - - // Lighting is simple. It's just linear interpolation from start to end - if (plane_shade) - { - uz = (iz + plane_sz[0] * width) * planelightfloat; - vz = iz * planelightfloat; - R_CalcTiltedLighting(vz, uz, width); - } - - uz = plane_su[2] + plane_su[1] * (centery - y) + plane_su[0] * (x1 - centerx); - vz = plane_sv[2] + plane_sv[1] * (centery - y) + plane_sv[0] * (x1 - centerx); - - fb = ylookup[y] + x1 + dc_destorg; - - BYTE vshift = 32 - ds_ybits; - BYTE ushift = vshift - ds_xbits; - int umask = ((1 << ds_xbits) - 1) << ds_ybits; - -#if 0 // The "perfect" reference version of this routine. Pretty slow. - // Use it only to see how things are supposed to look. - i = 0; - do - { - double z = 1.f/iz; - - u = SQWORD(uz*z) + pviewx; - v = SQWORD(vz*z) + pviewy; - ds_colormap = tiltlighting[i]; - fb[i++] = ds_colormap[ds_source[(v >> vshift) | ((u >> ushift) & umask)]]; - iz += plane_sz[0]; - uz += plane_su[0]; - vz += plane_sv[0]; - } while (--width >= 0); -#else -//#define SPANSIZE 32 -//#define INVSPAN 0.03125f -//#define SPANSIZE 8 -//#define INVSPAN 0.125f -#define SPANSIZE 16 -#define INVSPAN 0.0625f - - double startz = 1.f/iz; - double startu = uz*startz; - double startv = vz*startz; - double izstep, uzstep, vzstep; - - izstep = plane_sz[0] * SPANSIZE; - uzstep = plane_su[0] * SPANSIZE; - vzstep = plane_sv[0] * SPANSIZE; - x1 = 0; - width++; - - while (width >= SPANSIZE) - { - iz += izstep; - uz += uzstep; - vz += vzstep; - - double endz = 1.f/iz; - double endu = uz*endz; - double endv = vz*endz; - DWORD stepu = SQWORD((endu - startu) * INVSPAN); - DWORD stepv = SQWORD((endv - startv) * INVSPAN); - u = SQWORD(startu) + pviewx; - v = SQWORD(startv) + pviewy; - - for (i = SPANSIZE-1; i >= 0; i--) - { - fb[x1] = *(tiltlighting[x1] + ds_source[(v >> vshift) | ((u >> ushift) & umask)]); - x1++; - u += stepu; - v += stepv; - } - startu = endu; - startv = endv; - width -= SPANSIZE; - } - if (width > 0) - { - if (width == 1) - { - u = SQWORD(startu); - v = SQWORD(startv); - fb[x1] = *(tiltlighting[x1] + ds_source[(v >> vshift) | ((u >> ushift) & umask)]); - } - else - { - double left = width; - iz += plane_sz[0] * left; - uz += plane_su[0] * left; - vz += plane_sv[0] * left; - - double endz = 1.f/iz; - double endu = uz*endz; - double endv = vz*endz; - left = 1.f/left; - DWORD stepu = SQWORD((endu - startu) * left); - DWORD stepv = SQWORD((endv - startv) * left); - u = SQWORD(startu) + pviewx; - v = SQWORD(startv) + pviewy; - - for (; width != 0; width--) - { - fb[x1] = *(tiltlighting[x1] + ds_source[(v >> vshift) | ((u >> ushift) & umask)]); - x1++; - u += stepu; - v += stepv; - } - } - } -#endif + R_DrawTiltedSpan(y, x1, spanend[y], plane_sz, plane_su, plane_sv, plane_shade, planeshade, planelightfloat, pviewx, pviewy); } //========================================================================== @@ -502,9 +267,9 @@ void R_MapTiltedPlane(int y, int x1) // //========================================================================== -void R_MapColoredPlane (int y, int x1) +void R_MapColoredPlane(int y, int x1) { - memset (ylookup[y] + x1 + dc_destorg, ds_color, spanend[y] - x1 + 1); + R_DrawColoredSpan(y, x1, spanend[y]); } //========================================================================== @@ -1179,9 +944,6 @@ static void R_DrawSkyStriped (visplane_t *pl) // //========================================================================== -CVAR (Bool, tilt, false, 0); -//CVAR (Int, pa, 0, 0) - int R_DrawPlanes () { visplane_t *pl; @@ -1317,7 +1079,6 @@ void R_DrawSinglePlane (visplane_t *pl, fixed_t alpha, bool additive, bool maske // 9. Put the camera back where it was to begin with. // //========================================================================== -CVAR (Bool, r_skyboxes, true, 0) static int numskyboxes; void R_DrawPortals () @@ -1665,13 +1426,6 @@ void R_DrawSkyPlane (visplane_t *pl) void R_DrawNormalPlane (visplane_t *pl, double _xscale, double _yscale, fixed_t alpha, bool additive, bool masked) { -#ifdef X86_ASM - if (ds_source != ds_cursource) - { - R_SetSpanSource_ASM (ds_source); - } -#endif - if (alpha <= 0) { return; @@ -1896,14 +1650,6 @@ void R_DrawTiltedPlane (visplane_t *pl, double _xscale, double _yscale, fixed_t else ds_colormap = basecolormap->Maps, plane_shade = true; - if (!plane_shade) - { - for (int i = 0; i < viewwidth; ++i) - { - tiltlighting[i] = ds_colormap; - } - } - // Hack in support for 1 x Z and Z x 1 texture sizes if (ds_ybits == 0) { @@ -1913,13 +1659,8 @@ void R_DrawTiltedPlane (visplane_t *pl, double _xscale, double _yscale, fixed_t { plane_su[2] = plane_su[1] = plane_su[0] = 0; } -#if defined(X86_ASM) - if (ds_source != ds_curtiltedsource) - R_SetTiltedSpanSource_ASM (ds_source); - R_MapVisPlane (pl, R_DrawTiltedPlane_ASM); -#else + R_MapVisPlane (pl, R_MapTiltedPlane); -#endif } //========================================================================== @@ -2023,3 +1764,5 @@ bool R_PlaneInitData () return true; } + +} \ No newline at end of file diff --git a/src/r_plane.h b/src/r_plane.h index d4db3dc09c..0e133a7cd2 100644 --- a/src/r_plane.h +++ b/src/r_plane.h @@ -27,6 +27,9 @@ class ASkyViewpoint; +namespace swrenderer +{ + // // The infamous visplane // @@ -113,4 +116,6 @@ bool R_PlaneInitData (void); extern visplane_t* floorplane; extern visplane_t* ceilingplane; +} + #endif // __R_PLANE_H__ diff --git a/src/r_segs.cpp b/src/r_segs.cpp index ac5683b9b2..d4520e91de 100644 --- a/src/r_segs.cpp +++ b/src/r_segs.cpp @@ -57,10 +57,13 @@ CVAR(Bool, r_np2, true, 0) +CVAR(Bool, r_fogboundary, true, 0) +CVAR(Bool, r_drawmirrors, true, 0) EXTERN_CVAR(Bool, r_fullbrightignoresectorcolor); -//CVAR (Int, ty, 8, 0) -//CVAR (Int, tx, 8, 0) +namespace swrenderer +{ + using namespace drawerargs; #define HEIGHTBITS 12 #define HEIGHTSHIFT (FRACBITS-HEIGHTBITS) @@ -141,16 +144,6 @@ void wallscan_np2(int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t static void wallscan_np2_ds(drawseg_t *ds, int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat); static void call_wallscan(int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat, bool mask); -//============================================================================= -// -// CVAR r_fogboundary -// -// If true, makes fog look more "real" by shading the walls separating two -// sectors with different fog. -//============================================================================= - -CVAR(Bool, r_fogboundary, true, 0) - inline bool IsFogBoundary (sector_t *front, sector_t *back) { return r_fogboundary && fixedcolormap == NULL && front->ColorMap->Fade && @@ -158,14 +151,6 @@ inline bool IsFogBoundary (sector_t *front, sector_t *back) (front->GetTexture(sector_t::ceiling) != skyflatnum || back->GetTexture(sector_t::ceiling) != skyflatnum); } -//============================================================================= -// -// CVAR r_drawmirrors -// -// Set to false to disable rendering of mirrors -//============================================================================= - -CVAR(Bool, r_drawmirrors, true, 0) // // R_RenderMaskedSegRange @@ -2994,3 +2979,5 @@ static void R_RenderDecal (side_t *wall, DBaseDecal *decal, drawseg_t *clipper, done: WallC = savecoord; } + +} \ No newline at end of file diff --git a/src/r_segs.h b/src/r_segs.h index 1fc428c964..8610bc6f29 100644 --- a/src/r_segs.h +++ b/src/r_segs.h @@ -23,6 +23,9 @@ #ifndef __R_SEGS_H__ #define __R_SEGS_H__ +namespace swrenderer +{ + struct drawseg_t; void R_RenderMaskedSegRange (drawseg_t *ds, int x1, int x2); @@ -70,4 +73,6 @@ extern int CurrentPortalUniq; extern bool CurrentPortalInSkybox; extern TArray WallPortals; +} + #endif diff --git a/src/r_state.h b/src/r_state.h index b66ad57eb7..cd4aee4be3 100644 --- a/src/r_state.h +++ b/src/r_state.h @@ -80,7 +80,7 @@ extern int numgamesubsectors; extern AActor* camera; // [RH] camera instead of viewplayer extern sector_t* viewsector; // [RH] keep track of sector viewing from -extern angle_t xtoviewangle[MAXWIDTH+1]; +namespace swrenderer { extern angle_t xtoviewangle[MAXWIDTH+1]; } extern DAngle FieldOfView; int R_FindSkin (const char *name, int pclass); // [RH] Find a skin diff --git a/src/r_swrenderer.cpp b/src/r_swrenderer.cpp index 3c33134301..87bce4013a 100644 --- a/src/r_swrenderer.cpp +++ b/src/r_swrenderer.cpp @@ -42,13 +42,20 @@ #include "r_3dfloors.h" #include "textures/textures.h" #include "r_data/voxels.h" +#include "r_thread.h" +namespace swrenderer +{ void R_SWRSetWindow(int windowSize, int fullWidth, int fullHeight, int stHeight, float trueratio); void R_SetupColormap(player_t *); void R_SetupFreelook(); void R_InitRenderer(); +} + +using namespace swrenderer; + //========================================================================== // // DCanvas :: Init @@ -154,9 +161,11 @@ void FSoftwareRenderer::Precache(BYTE *texhitlist, TMap &act void FSoftwareRenderer::RenderView(player_t *player) { + R_BeginDrawerCommands(); R_RenderActorView (player->mo); // [RH] Let cameras draw onto textures that were visible this frame. FCanvasTextureInfo::UpdateAll (); + R_EndDrawerCommands(); } //========================================================================== diff --git a/src/r_things.cpp b/src/r_things.cpp index e7d130fa85..4ba47d63d7 100644 --- a/src/r_things.cpp +++ b/src/r_things.cpp @@ -64,6 +64,21 @@ #include "r_data/voxels.h" #include "p_local.h" #include "p_maputl.h" +#include "r_thread.h" + +EXTERN_CVAR(Bool, st_scale) +EXTERN_CVAR(Bool, r_shadercolormaps) +EXTERN_CVAR(Int, r_drawfuzz) +EXTERN_CVAR(Bool, r_deathcamera); +EXTERN_CVAR(Bool, r_drawplayersprites) +EXTERN_CVAR(Bool, r_drawvoxels) + +CVAR(Bool, r_fullbrightignoresectorcolor, true, CVAR_ARCHIVE | CVAR_GLOBALCONFIG); +//CVAR(Bool, r_splitsprites, true, CVAR_ARCHIVE) + +namespace swrenderer +{ + using namespace drawerargs; // [RH] A c-buffer. Used for keeping track of offscreen voxel spans. @@ -95,12 +110,6 @@ extern float MaskedScaleY; #define BASEXCENTER (160) #define BASEYCENTER (100) -EXTERN_CVAR (Bool, st_scale) -EXTERN_CVAR(Bool, r_shadercolormaps) -EXTERN_CVAR(Int, r_drawfuzz) -EXTERN_CVAR(Bool, r_deathcamera); -CVAR(Bool, r_fullbrightignoresectorcolor, true, CVAR_ARCHIVE | CVAR_GLOBALCONFIG); - // // Sprite rotation 0 is facing the viewer, // rotation 1 is one angle turn CLOCKWISE around the axis. @@ -132,9 +141,6 @@ FTexture *WallSpriteTile; short zeroarray[MAXWIDTH]; short screenheightarray[MAXWIDTH]; -EXTERN_CVAR (Bool, r_drawplayersprites) -EXTERN_CVAR (Bool, r_drawvoxels) - // // INITIALIZATION FUNCTIONS // @@ -639,7 +645,7 @@ void R_DrawVisVoxel(vissprite_t *spr, int minslabz, int maxslabz, short *cliptop { return; } - if (colfunc == fuzzcolfunc || colfunc == R_FillColumnP) + if (colfunc == fuzzcolfunc || colfunc == R_FillColumn) { flags = DVF_OFFSCREEN | DVF_SPANSONLY; } @@ -1758,8 +1764,6 @@ static int sd_comparex (const void *arg1, const void *arg2) return (*(drawseg_t **)arg2)->x2 - (*(drawseg_t **)arg1)->x2; } -CVAR (Bool, r_splitsprites, true, CVAR_ARCHIVE) - // Split up vissprites that intersect drawsegs void R_SplitVisSprites () { @@ -2628,7 +2632,7 @@ static void R_DrawMaskedSegsBehindParticle (const vissprite_t *vis) } } -void R_DrawParticle (vissprite_t *vis) +void R_DrawParticle_C (vissprite_t *vis) { DWORD *bg2rgb; int spacing; @@ -2642,6 +2646,8 @@ void R_DrawParticle (vissprite_t *vis) R_DrawMaskedSegsBehindParticle (vis); + DrawerCommandQueue::WaitForWorkers(); + // vis->renderflags holds translucency level (0-255) { fixed_t fglevel, bglevel; @@ -3237,3 +3243,5 @@ void R_CheckOffscreenBuffer(int width, int height, bool spansonly) OffscreenBufferWidth = width; OffscreenBufferHeight = height; } + +} \ No newline at end of file diff --git a/src/r_things.h b/src/r_things.h index 53b887b181..bf32b655f2 100644 --- a/src/r_things.h +++ b/src/r_things.h @@ -25,6 +25,12 @@ #include "r_bsp.h" +struct particle_t; +struct FVoxel; + +namespace swrenderer +{ + // A vissprite_t is a thing // that will be drawn during a refresh. // I.e. a sprite object that is partly visible. @@ -95,9 +101,7 @@ struct vissprite_t vissprite_t() {} }; -struct particle_t; - -void R_DrawParticle (vissprite_t *); +void R_DrawParticle_C (vissprite_t *); void R_ProjectParticle (particle_t *, const sector_t *sector, int shade, int fakeside); extern int MaxVisSprites; @@ -146,5 +150,6 @@ void R_DrawVoxel(const FVector3 &viewpos, FAngle viewangle, void R_ClipVisSprite (vissprite_t *vis, int xl, int xh); +} #endif diff --git a/src/r_thread.cpp b/src/r_thread.cpp new file mode 100644 index 0000000000..c96f14e74b --- /dev/null +++ b/src/r_thread.cpp @@ -0,0 +1,297 @@ +/* +** Renderer multithreading framework +** Copyright (c) 2016 Magnus Norddahl +** +** This software is provided 'as-is', without any express or implied +** warranty. In no event will the authors be held liable for any damages +** arising from the use of this software. +** +** Permission is granted to anyone to use this software for any purpose, +** including commercial applications, and to alter it and redistribute it +** freely, subject to the following restrictions: +** +** 1. The origin of this software must not be misrepresented; you must not +** claim that you wrote the original software. If you use this software +** in a product, an acknowledgment in the product documentation would be +** appreciated but is not required. +** 2. Altered source versions must be plainly marked as such, and must not be +** misrepresented as being the original software. +** 3. This notice may not be removed or altered from any source distribution. +** +*/ + +#include +#include "templates.h" +#include "doomdef.h" +#include "i_system.h" +#include "w_wad.h" +#include "r_local.h" +#include "v_video.h" +#include "doomstat.h" +#include "st_stuff.h" +#include "g_game.h" +#include "g_level.h" +#include "r_thread.h" + +CVAR(Bool, r_multithreaded, true, CVAR_ARCHIVE | CVAR_GLOBALCONFIG); + +void R_BeginDrawerCommands() +{ + DrawerCommandQueue::Begin(); +} + +void R_EndDrawerCommands() +{ + DrawerCommandQueue::End(); +} + +///////////////////////////////////////////////////////////////////////////// + +DrawerCommandQueue *DrawerCommandQueue::Instance() +{ + static DrawerCommandQueue queue; + return &queue; +} + +DrawerCommandQueue::DrawerCommandQueue() +{ +} + +DrawerCommandQueue::~DrawerCommandQueue() +{ + StopThreads(); +} + +void* DrawerCommandQueue::AllocMemory(size_t size) +{ + // Make sure allocations remain 16-byte aligned + size = (size + 15) / 16 * 16; + + auto queue = Instance(); + if (queue->memorypool_pos + size > memorypool_size) + return nullptr; + + void *data = queue->memorypool + queue->memorypool_pos; + queue->memorypool_pos += size; + return data; +} + +void DrawerCommandQueue::Begin() +{ + auto queue = Instance(); + queue->Finish(); + queue->threaded_render++; +} + +void DrawerCommandQueue::End() +{ + auto queue = Instance(); + queue->Finish(); + if (queue->threaded_render > 0) + queue->threaded_render--; +} + +void DrawerCommandQueue::WaitForWorkers() +{ + Instance()->Finish(); +} + +void DrawerCommandQueue::Finish() +{ + auto queue = Instance(); + if (queue->commands.empty()) + return; + + // Give worker threads something to do: + + std::unique_lock start_lock(queue->start_mutex); + queue->active_commands.swap(queue->commands); + queue->run_id++; + start_lock.unlock(); + + queue->StartThreads(); + queue->start_condition.notify_all(); + + // Do one thread ourselves: + + DrawerThread thread; + thread.core = 0; + thread.num_cores = (int)(queue->threads.size() + 1); + + struct TryCatchData + { + DrawerCommandQueue *queue; + DrawerThread *thread; + size_t command_index; + } data; + + data.queue = queue; + data.thread = &thread; + data.command_index = 0; + VectoredTryCatch(&data, + [](void *data) + { + TryCatchData *d = (TryCatchData*)data; + + for (int pass = 0; pass < d->queue->num_passes; pass++) + { + d->thread->pass_start_y = pass * d->queue->rows_in_pass; + d->thread->pass_end_y = (pass + 1) * d->queue->rows_in_pass; + if (pass + 1 == d->queue->num_passes) + d->thread->pass_end_y = MAX(d->thread->pass_end_y, MAXHEIGHT); + + size_t size = d->queue->active_commands.size(); + for (d->command_index = 0; d->command_index < size; d->command_index++) + { + auto &command = d->queue->active_commands[d->command_index]; + command->Execute(d->thread); + } + } + }, + [](void *data, const char *reason, bool fatal) + { + TryCatchData *d = (TryCatchData*)data; + ReportDrawerError(d->queue->active_commands[d->command_index], true, reason, fatal); + }); + + // Wait for everyone to finish: + + std::unique_lock end_lock(queue->end_mutex); + queue->end_condition.wait(end_lock, [&]() { return queue->finished_threads == queue->threads.size(); }); + + if (!queue->thread_error.IsEmpty()) + { + static bool first = true; + if (queue->thread_error_fatal) + I_FatalError("%s", queue->thread_error.GetChars()); + else if (first) + Printf("%s\n", queue->thread_error.GetChars()); + first = false; + } + + // Clean up batch: + + for (auto &command : queue->active_commands) + command->~DrawerCommand(); + queue->active_commands.clear(); + queue->memorypool_pos = 0; + queue->finished_threads = 0; +} + +void DrawerCommandQueue::StartThreads() +{ + if (!threads.empty()) + return; + + int num_threads = std::thread::hardware_concurrency(); + if (num_threads == 0) + num_threads = 4; + + threads.resize(num_threads - 1); + + for (int i = 0; i < num_threads - 1; i++) + { + DrawerCommandQueue *queue = this; + DrawerThread *thread = &threads[i]; + thread->core = i + 1; + thread->num_cores = num_threads; + thread->thread = std::thread([=]() + { + int run_id = 0; + while (true) + { + // Wait until we are signalled to run: + std::unique_lock start_lock(queue->start_mutex); + queue->start_condition.wait(start_lock, [&]() { return queue->run_id != run_id || queue->shutdown_flag; }); + if (queue->shutdown_flag) + break; + run_id = queue->run_id; + start_lock.unlock(); + + // Do the work: + + struct TryCatchData + { + DrawerCommandQueue *queue; + DrawerThread *thread; + size_t command_index; + } data; + + data.queue = queue; + data.thread = thread; + data.command_index = 0; + VectoredTryCatch(&data, + [](void *data) + { + TryCatchData *d = (TryCatchData*)data; + + for (int pass = 0; pass < d->queue->num_passes; pass++) + { + d->thread->pass_start_y = pass * d->queue->rows_in_pass; + d->thread->pass_end_y = (pass + 1) * d->queue->rows_in_pass; + if (pass + 1 == d->queue->num_passes) + d->thread->pass_end_y = MAX(d->thread->pass_end_y, MAXHEIGHT); + + size_t size = d->queue->active_commands.size(); + for (d->command_index = 0; d->command_index < size; d->command_index++) + { + auto &command = d->queue->active_commands[d->command_index]; + command->Execute(d->thread); + } + } + }, + [](void *data, const char *reason, bool fatal) + { + TryCatchData *d = (TryCatchData*)data; + ReportDrawerError(d->queue->active_commands[d->command_index], true, reason, fatal); + }); + + // Notify main thread that we finished: + std::unique_lock end_lock(queue->end_mutex); + queue->finished_threads++; + end_lock.unlock(); + queue->end_condition.notify_all(); + } + }); + } +} + +void DrawerCommandQueue::StopThreads() +{ + std::unique_lock lock(start_mutex); + shutdown_flag = true; + lock.unlock(); + start_condition.notify_all(); + for (auto &thread : threads) + thread.thread.join(); + threads.clear(); + lock.lock(); + shutdown_flag = false; +} + +void DrawerCommandQueue::ReportDrawerError(DrawerCommand *command, bool worker_thread, const char *reason, bool fatal) +{ + if (worker_thread) + { + std::unique_lock end_lock(Instance()->end_mutex); + if (Instance()->thread_error.IsEmpty() || (!Instance()->thread_error_fatal && fatal)) + { + Instance()->thread_error = reason + (FString)": " + command->DebugInfo(); + Instance()->thread_error_fatal = fatal; + } + } + else + { + static bool first = true; + if (fatal) + I_FatalError("%s: %s", reason, command->DebugInfo().GetChars()); + else if (first) + Printf("%s: %s\n", reason, command->DebugInfo().GetChars()); + first = false; + } +} + +void VectoredTryCatch(void *data, void(*tryBlock)(void *data), void(*catchBlock)(void *data, const char *reason, bool fatal)) +{ + tryBlock(data); +} diff --git a/src/r_thread.h b/src/r_thread.h new file mode 100644 index 0000000000..7962dfc208 --- /dev/null +++ b/src/r_thread.h @@ -0,0 +1,235 @@ +/* +** Renderer multithreading framework +** Copyright (c) 2016 Magnus Norddahl +** +** This software is provided 'as-is', without any express or implied +** warranty. In no event will the authors be held liable for any damages +** arising from the use of this software. +** +** Permission is granted to anyone to use this software for any purpose, +** including commercial applications, and to alter it and redistribute it +** freely, subject to the following restrictions: +** +** 1. The origin of this software must not be misrepresented; you must not +** claim that you wrote the original software. If you use this software +** in a product, an acknowledgment in the product documentation would be +** appreciated but is not required. +** 2. Altered source versions must be plainly marked as such, and must not be +** misrepresented as being the original software. +** 3. This notice may not be removed or altered from any source distribution. +** +*/ + +#pragma once + +#include "r_draw.h" +#include +#include +#include +#include +#include + +// Use multiple threads when drawing +EXTERN_CVAR(Bool, r_multithreaded) + +// Redirect drawer commands to worker threads +void R_BeginDrawerCommands(); + +// Wait until all drawers finished executing +void R_EndDrawerCommands(); + +// Worker data for each thread executing drawer commands +class DrawerThread +{ +public: + std::thread thread; + + // Thread line index of this thread + int core = 0; + + // Number of active threads + int num_cores = 1; + + // Range of rows processed this pass + int pass_start_y = 0; + int pass_end_y = MAXHEIGHT; + + // Working buffer used by Rt drawers + uint8_t dc_temp_buff[MAXHEIGHT * 4]; + uint8_t *dc_temp = nullptr; + + // Working buffer used by Rt drawers, true color edition + uint32_t dc_temp_rgbabuff_rgba[MAXHEIGHT * 4]; + uint32_t *dc_temp_rgba = nullptr; + + // Working buffer used by the tilted (sloped) span drawer + const uint8_t *tiltlighting[MAXWIDTH]; + + // Checks if a line is rendered by this thread + bool line_skipped_by_thread(int line) + { + return line < pass_start_y || line >= pass_end_y || line % num_cores != core; + } + + // The number of lines to skip to reach the first line to be rendered by this thread + int skipped_by_thread(int first_line) + { + int pass_skip = MAX(pass_start_y - first_line, 0); + int core_skip = (num_cores - (first_line + pass_skip - core) % num_cores) % num_cores; + return pass_skip + core_skip; + } + + // The number of lines to be rendered by this thread + int count_for_thread(int first_line, int count) + { + int lines_until_pass_end = MAX(pass_end_y - first_line, 0); + count = MIN(count, lines_until_pass_end); + int c = (count - skipped_by_thread(first_line) + num_cores - 1) / num_cores; + return MAX(c, 0); + } + + // Calculate the dest address for the first line to be rendered by this thread + template + T *dest_for_thread(int first_line, int pitch, T *dest) + { + return dest + skipped_by_thread(first_line) * pitch; + } + + // The first line in the dc_temp buffer used this thread + int temp_line_for_thread(int first_line) + { + return (first_line + skipped_by_thread(first_line)) / num_cores; + } +}; + +// Task to be executed by each worker thread +class DrawerCommand +{ +protected: + int _dest_y; + + void DetectRangeError(uint32_t *&dest, int &dest_y, int &count) + { +#if defined(_MSC_VER) && defined(_DEBUG) + if (dest_y < 0 || count < 0 || dest_y + count > swrenderer::drawerargs::dc_destheight) + __debugbreak(); // Buffer overrun detected! +#endif + + if (dest_y < 0) + { + count += dest_y; + dest_y = 0; + dest = (uint32_t*)swrenderer::drawerargs::dc_destorg; + } + else if (dest_y >= swrenderer::drawerargs::dc_destheight) + { + dest_y = 0; + count = 0; + } + + if (count < 0 || count > MAXHEIGHT) count = 0; + if (dest_y + count >= swrenderer::drawerargs::dc_destheight) + count = swrenderer::drawerargs::dc_destheight - dest_y; + } + +public: + DrawerCommand() + { + _dest_y = static_cast((swrenderer::drawerargs::dc_dest - swrenderer::drawerargs::dc_destorg) / (swrenderer::drawerargs::dc_pitch)); + } + + virtual ~DrawerCommand() { } + + virtual void Execute(DrawerThread *thread) = 0; + virtual FString DebugInfo() = 0; +}; + +void VectoredTryCatch(void *data, void(*tryBlock)(void *data), void(*catchBlock)(void *data, const char *reason, bool fatal)); + +// Manages queueing up commands and executing them on worker threads +class DrawerCommandQueue +{ + enum { memorypool_size = 16 * 1024 * 1024 }; + char memorypool[memorypool_size]; + size_t memorypool_pos = 0; + + std::vector commands; + + std::vector threads; + + std::mutex start_mutex; + std::condition_variable start_condition; + std::vector active_commands; + bool shutdown_flag = false; + int run_id = 0; + + std::mutex end_mutex; + std::condition_variable end_condition; + size_t finished_threads = 0; + FString thread_error; + bool thread_error_fatal = false; + + int threaded_render = 0; + DrawerThread single_core_thread; + int num_passes = 1; + int rows_in_pass = MAXHEIGHT; + + void StartThreads(); + void StopThreads(); + void Finish(); + + static DrawerCommandQueue *Instance(); + static void ReportDrawerError(DrawerCommand *command, bool worker_thread, const char *reason, bool fatal); + + DrawerCommandQueue(); + ~DrawerCommandQueue(); + +public: + // Allocate memory valid for the duration of a command execution + static void* AllocMemory(size_t size); + + // Queue command to be executed by drawer worker threads + template + static void QueueCommand(Types &&... args) + { + auto queue = Instance(); + if (queue->threaded_render == 0 || !r_multithreaded) + { + T command(std::forward(args)...); + VectoredTryCatch(&command, + [](void *data) + { + T *c = (T*)data; + c->Execute(&Instance()->single_core_thread); + }, + [](void *data, const char *reason, bool fatal) + { + T *c = (T*)data; + ReportDrawerError(c, false, reason, fatal); + }); + } + else + { + void *ptr = AllocMemory(sizeof(T)); + if (!ptr) // Out of memory - render what we got + { + queue->Finish(); + ptr = AllocMemory(sizeof(T)); + if (!ptr) + return; + } + T *command = new (ptr)T(std::forward(args)...); + queue->commands.push_back(command); + } + } + + // Redirects all drawing commands to worker threads until End is called + // Begin/End blocks can be nested. + static void Begin(); + + // End redirection and wait until all worker threads finished executing + static void End(); + + // Waits until all worker threads finished executing + static void WaitForWorkers(); +}; diff --git a/src/v_draw.cpp b/src/v_draw.cpp index 8483b9844a..f86a94bcdf 100644 --- a/src/v_draw.cpp +++ b/src/v_draw.cpp @@ -132,6 +132,9 @@ void DCanvas::DrawTexture (FTexture *img, double x, double y, int tags_first, .. void DCanvas::DrawTextureParms(FTexture *img, DrawParms &parms) { #ifndef NO_SWRENDER + using namespace swrenderer; + using namespace drawerargs; + FTexture::Span unmaskedSpan[2]; const FTexture::Span **spanptr, *spans; static short bottomclipper[MAXWIDTH], topclipper[MAXWIDTH]; @@ -1285,6 +1288,9 @@ void DCanvas::FillSimplePoly(FTexture *tex, FVector2 *points, int npoints, FDynamicColormap *colormap, int lightlevel, int bottomclip) { #ifndef NO_SWRENDER + using namespace swrenderer; + using namespace drawerargs; + // Use an equation similar to player sprites to determine shade fixed_t shade = LIGHT2SHADE(lightlevel) - 12*FRACUNIT; float topy, boty, leftx, rightx; @@ -1352,7 +1358,7 @@ void DCanvas::FillSimplePoly(FTexture *tex, FVector2 *points, int npoints, // Setup constant texture mapping parameters. R_SetupSpanBits(tex); R_SetSpanColormap(colormap != NULL ? &colormap->Maps[clamp(shade >> FRACBITS, 0, NUMCOLORMAPS-1) * 256] : identitymap); - R_SetSpanSource(tex->GetPixels()); + R_SetSpanSource(tex); if (ds_xbits != 0) { scalex = double(1u << (32 - ds_xbits)) / scalex; diff --git a/src/win32/fb_d3d9.cpp b/src/win32/fb_d3d9.cpp index 0e8dd3dec9..026bbc63bb 100644 --- a/src/win32/fb_d3d9.cpp +++ b/src/win32/fb_d3d9.cpp @@ -1375,17 +1375,16 @@ void D3DFB::Draw3DPart(bool copy3d) D3DCOLOR color0, color1; if (Accel2D) { - if (realfixedcolormap == NULL) + auto &map = swrenderer::realfixedcolormap; + if (map == NULL) { color0 = 0; color1 = 0xFFFFFFF; } else { - color0 = D3DCOLOR_COLORVALUE(realfixedcolormap->ColorizeStart[0]/2, - realfixedcolormap->ColorizeStart[1]/2, realfixedcolormap->ColorizeStart[2]/2, 0); - color1 = D3DCOLOR_COLORVALUE(realfixedcolormap->ColorizeEnd[0]/2, - realfixedcolormap->ColorizeEnd[1]/2, realfixedcolormap->ColorizeEnd[2]/2, 1); + color0 = D3DCOLOR_COLORVALUE(map->ColorizeStart[0] / 2, map->ColorizeStart[1] / 2, map->ColorizeStart[2] / 2, 0); + color1 = D3DCOLOR_COLORVALUE(map->ColorizeEnd[0] / 2, map->ColorizeEnd[1] / 2, map->ColorizeEnd[2] / 2, 1); SetPixelShader(Shaders[SHADER_SpecialColormapPal]); } } From 1e42c6f227aac0c2248abf9d7a4910caddee354f Mon Sep 17 00:00:00 2001 From: Christoph Oelckers Date: Wed, 7 Dec 2016 11:40:59 +0100 Subject: [PATCH 2/9] - added copyright headers to two files missing them. --- src/r_draw.cpp | 33 +++++++++++++++++++++++++++++++++ src/r_draw_pal.cpp | 33 +++++++++++++++++++++++++++++++++ 2 files changed, 66 insertions(+) diff --git a/src/r_draw.cpp b/src/r_draw.cpp index a2bf412e8b..52f5f24e16 100644 --- a/src/r_draw.cpp +++ b/src/r_draw.cpp @@ -1,3 +1,36 @@ +/* +** r_draw.cpp +** +**--------------------------------------------------------------------------- +** Copyright 1998-2016 Randy Heit +** Copyright 2016 Magnus Norddahl +** All rights reserved. +** +** Redistribution and use in source and binary forms, with or without +** modification, are permitted provided that the following conditions +** are met: +** +** 1. Redistributions of source code must retain the above copyright +** notice, this list of conditions and the following disclaimer. +** 2. Redistributions in binary form must reproduce the above copyright +** notice, this list of conditions and the following disclaimer in the +** documentation and/or other materials provided with the distribution. +** 3. The name of the author may not be used to endorse or promote products +** derived from this software without specific prior written permission. +** +** THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR +** IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES +** OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. +** IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, +** INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT +** NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +** DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +** THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +** (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF +** THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +**--------------------------------------------------------------------------- +** +*/ #include diff --git a/src/r_draw_pal.cpp b/src/r_draw_pal.cpp index 0264dcbf9e..b508dd221c 100644 --- a/src/r_draw_pal.cpp +++ b/src/r_draw_pal.cpp @@ -1,3 +1,36 @@ +/* +** r_draw_pal.cpp +** +**--------------------------------------------------------------------------- +** Copyright 1998-2016 Randy Heit +** Copyright 2016 Magnus Norddahl +** All rights reserved. +** +** Redistribution and use in source and binary forms, with or without +** modification, are permitted provided that the following conditions +** are met: +** +** 1. Redistributions of source code must retain the above copyright +** notice, this list of conditions and the following disclaimer. +** 2. Redistributions in binary form must reproduce the above copyright +** notice, this list of conditions and the following disclaimer in the +** documentation and/or other materials provided with the distribution. +** 3. The name of the author may not be used to endorse or promote products +** derived from this software without specific prior written permission. +** +** THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR +** IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES +** OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. +** IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, +** INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT +** NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +** DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +** THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +** (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF +** THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +**--------------------------------------------------------------------------- +** +*/ #include "templates.h" #include "doomtype.h" From 42346c58d3587fec0fdebd6a29c63b130049ead8 Mon Sep 17 00:00:00 2001 From: Christoph Oelckers Date: Wed, 7 Dec 2016 12:31:43 +0100 Subject: [PATCH 3/9] - disabled assembly entirely to make the MT drawer submission compile. This still requires a review of the two non-drawer functions that get 'lost'. --- src/CMakeLists.txt | 138 --------------------------------------------- src/doomtype.h | 51 ----------------- src/r_main.cpp | 3 - src/v_video.h | 4 -- 4 files changed, 196 deletions(-) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index e164a338cb..3f54e0fcf9 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -14,12 +14,6 @@ include( CheckIncludeFiles ) include( CheckLibraryExists ) include( FindPkgConfig ) -if( NOT APPLE ) - option( NO_ASM "Disable assembly code" OFF ) -else() - # At the moment asm code doesn't work with OS X, so disable by default - option( NO_ASM "Disable assembly code" ON ) -endif() if( ZD_CMAKE_COMPILER_IS_GNUCXX_COMPATIBLE ) option( NO_STRIP "Do not strip Release or MinSizeRel builds" ) # At least some versions of Xcode fail if you strip with the linker @@ -114,7 +108,6 @@ if( WIN32 ) ) set( FMOD_INC_PATH_SUFFIXES PATH_SUFFIXES inc ) set( FMOD_LIB_PATH_SUFFIXES PATH_SUFFIXES lib ) - set( NASM_NAMES nasmw nasm ) find_path( D3D_INCLUDE_DIR d3d9.h PATHS ENV DXSDK_DIR @@ -239,7 +232,6 @@ else() endif() endif() endif() - set( NASM_NAMES nasm ) if( NO_GTK ) add_definitions( -DNO_GTK ) @@ -379,105 +371,6 @@ endif() find_package( FluidSynth ) -# Search for NASM - -if( NOT NO_ASM ) - if( UNIX AND X64 ) - find_program( GAS_PATH as ) - - if( GAS_PATH ) - set( ASSEMBLER ${GAS_PATH} ) - else() - message( STATUS "Could not find as. Disabling assembly code." ) - set( NO_ASM ON ) - endif() - else() - find_program( NASM_PATH NAMES ${NASM_NAMES} ) - find_program( YASM_PATH yasm ) - - if( X64 ) - if( YASM_PATH ) - set( ASSEMBLER ${YASM_PATH} ) - else() - message( STATUS "Could not find YASM. Disabling assembly code." ) - set( NO_ASM ON ) - endif() - else() - if( NASM_PATH ) - set( ASSEMBLER ${NASM_PATH} ) - else() - message( STATUS "Could not find NASM. Disabling assembly code." ) - set( NO_ASM ON ) - endif() - endif() - endif() - - # I think the only reason there was a version requirement was because the - # executable name for Windows changed from 0.x to 2.0, right? This is - # how to do it in case I need to do something similar later. - - # execute_process( COMMAND ${NASM_PATH} -v - # OUTPUT_VARIABLE NASM_VER_STRING ) - # string( REGEX REPLACE ".*version ([0-9]+[.][0-9]+).*" "\\1" NASM_VER "${NASM_VER_STRING}" ) - # if( NOT NASM_VER LESS 2 ) - # message( SEND_ERROR "NASM version should be 2 or later. (Installed version is ${NASM_VER}.)" ) - # endif() -endif() - -if( NOT NO_ASM ) - # Valgrind support is meaningless without assembly code. - if( VALGRIND ) - add_definitions( -DVALGRIND_AWARE=1 ) - # If you're Valgrinding, you probably want to keep symbols around. - set( NO_STRIP ON ) - endif() - - # Tell CMake how to assemble our files - if( UNIX ) - set( ASM_OUTPUT_EXTENSION .o ) - if( X64 ) - set( ASM_FLAGS ) - set( ASM_SOURCE_EXTENSION .s ) - else() - if( APPLE ) - set( ASM_FLAGS -fmacho -DM_TARGET_MACHO ) - else() - set( ASM_FLAGS -felf -DM_TARGET_LINUX ) - endif() - set( ASM_FLAGS "${ASM_FLAGS}" -i${CMAKE_CURRENT_SOURCE_DIR}/ ) - set( ASM_SOURCE_EXTENSION .asm ) - endif() - else() - set( ASM_OUTPUT_EXTENSION .obj ) - set( ASM_SOURCE_EXTENSION .asm ) - if( X64 ) - set( ASM_FLAGS -f win64 -DWIN32 -DWIN64 ) - else() - set( ASM_FLAGS -f win32 -DWIN32 -i${CMAKE_CURRENT_SOURCE_DIR}/ ) - endif() - endif() - if( WIN32 AND NOT X64 ) - set( FIXRTEXT fixrtext ) - else() - set( FIXRTEXT "" ) - endif() - message( STATUS "Selected assembler: ${ASSEMBLER}" ) - MACRO( ADD_ASM_FILE indir infile ) - set( ASM_OUTPUT_${infile} "${CMAKE_CURRENT_BINARY_DIR}/CMakeFiles/zdoom.dir/${indir}/${infile}${ASM_OUTPUT_EXTENSION}" ) - if( WIN32 AND NOT X64 ) - set( FIXRTEXT_${infile} COMMAND ${FIXRTEXT} "${ASM_OUTPUT_${infile}}" ) - else() - set( FIXRTEXT_${infile} COMMAND "" ) - endif() - add_custom_command( OUTPUT ${ASM_OUTPUT_${infile}} - COMMAND ${CMAKE_COMMAND} -E make_directory ${CMAKE_CURRENT_BINARY_DIR}/CMakeFiles/zdoom.dir/${indir} - COMMAND ${ASSEMBLER} ${ASM_FLAGS} -o"${ASM_OUTPUT_${infile}}" "${CMAKE_CURRENT_SOURCE_DIR}/${indir}/${infile}${ASM_SOURCE_EXTENSION}" - ${FIXRTEXT_${infile}} - DEPENDS ${indir}/${infile}.asm ${FIXRTEXT} ) - set( ASM_SOURCES ${ASM_SOURCES} "${ASM_OUTPUT_${infile}}" ) - ENDMACRO() -endif() - # Decide on SSE setup set( SSE_MATTERS NO ) @@ -756,24 +649,6 @@ else() set( OTHER_SYSTEM_SOURCES ${PLAT_WIN32_SOURCES} ${PLAT_OSX_SOURCES} ${PLAT_COCOA_SOURCES} ) endif() -if( NOT ASM_SOURCES ) - set( ASM_SOURCES "" ) -endif() - -if( NO_ASM ) - add_definitions( -DNOASM ) -else() - if( X64 ) - ADD_ASM_FILE( asm_x86_64 tmap3 ) - else() - ADD_ASM_FILE( asm_ia32 a ) - ADD_ASM_FILE( asm_ia32 misc ) - ADD_ASM_FILE( asm_ia32 tmap ) - ADD_ASM_FILE( asm_ia32 tmap2 ) - ADD_ASM_FILE( asm_ia32 tmap3 ) - endif() -endif() - add_custom_command( OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/xlat_parser.c ${CMAKE_CURRENT_BINARY_DIR}/xlat_parser.h COMMAND lemon -C${CMAKE_CURRENT_BINARY_DIR} ${CMAKE_CURRENT_SOURCE_DIR}/xlat/xlat_parser.y DEPENDS lemon ${CMAKE_CURRENT_SOURCE_DIR}/xlat/xlat_parser.y ) @@ -866,16 +741,6 @@ set( NOT_COMPILED_SOURCE_FILES scripting/zscript/zcc-parse.lemon zcc-parse.c zcc-parse.h - - # We could have the ASM macro add these files, but it wouldn't add all - # platforms. - asm_ia32/a.asm - asm_ia32/misc.asm - asm_ia32/tmap.asm - asm_ia32/tmap2.asm - asm_ia32/tmap3.asm - asm_x86_64/tmap3.asm - asm_x86_64/tmap3.s ) set( FASTMATH_PCH_SOURCES @@ -1208,7 +1073,6 @@ add_executable( zdoom WIN32 MACOSX_BUNDLE ${HEADER_FILES} ${NOT_COMPILED_SOURCE_FILES} __autostart.cpp - ${ASM_SOURCES} ${SYSTEM_SOURCES} ${X86_SOURCES} ${FASTMATH_SOURCES} @@ -1371,8 +1235,6 @@ install(TARGETS zdoom DESTINATION ${INSTALL_PATH} COMPONENT "Game executable") -source_group("Assembly Files\\ia32" REGULAR_EXPRESSION "^${CMAKE_CURRENT_SOURCE_DIR}/asm_ia32/.+") -source_group("Assembly Files\\x86_64" REGULAR_EXPRESSION "^${CMAKE_CURRENT_SOURCE_DIR}/asm_x86_64/.+") source_group("Audio Files" REGULAR_EXPRESSION "^${CMAKE_CURRENT_SOURCE_DIR}/sound/.+") source_group("Audio Files\\OPL Synth" REGULAR_EXPRESSION "^${CMAKE_CURRENT_SOURCE_DIR}/oplsynth/.+") source_group("Audio Files\\OPL Synth\\DOSBox" FILES oplsynth/dosbox/opl.cpp oplsynth/dosbox/opl.h) diff --git a/src/doomtype.h b/src/doomtype.h index a9818df78c..264713d1b7 100644 --- a/src/doomtype.h +++ b/src/doomtype.h @@ -48,57 +48,6 @@ class PClassActor; typedef TMap FClassMap; -// Since this file is included by everything, it seems an appropriate place -// to check the NOASM/USEASM macros. - -// There are three assembly-related macros: -// -// NOASM - Assembly code is disabled -// X86_ASM - Using ia32 assembly code -// X64_ASM - Using amd64 assembly code -// -// Note that these relate only to using the pure assembly code. Inline -// assembly may still be used without respect to these macros, as -// deemed appropriate. - -#ifndef NOASM -// Select the appropriate type of assembly code to use. - -#if defined(_M_IX86) || defined(__i386__) - -#define X86_ASM -#ifdef X64_ASM -#undef X64_ASM -#endif - -#elif defined(_M_X64) || defined(__amd64__) - -#define X64_ASM -#ifdef X86_ASM -#undef X86_ASM -#endif - -#else - -#define NOASM - -#endif - -#endif - -#ifdef NOASM -// Ensure no assembly macros are defined if NOASM is defined. - -#ifdef X86_ASM -#undef X86_ASM -#endif - -#ifdef X64_ASM -#undef X64_ASM -#endif - -#endif - #if defined(_MSC_VER) #define NOVTABLE __declspec(novtable) diff --git a/src/r_main.cpp b/src/r_main.cpp index a6ae47de1b..0ee075140d 100644 --- a/src/r_main.cpp +++ b/src/r_main.cpp @@ -812,9 +812,6 @@ void R_SetupBuffer () { dc_pitch = pitch; R_InitFuzzTable (pitch); -#if defined(X86_ASM) || defined(X64_ASM) - ASM_PatchPitch (); -#endif } dc_destorg = lineptr; for (int i = 0; i < RenderTarget->GetHeight(); i++) diff --git a/src/v_video.h b/src/v_video.h index 971aa6c13d..b72f670947 100644 --- a/src/v_video.h +++ b/src/v_video.h @@ -516,10 +516,6 @@ void V_RefreshViewBorder (); void V_SetBorderNeedRefresh(); -#if defined(X86_ASM) || defined(X64_ASM) -extern "C" void ASM_PatchPitch (void); -#endif - int CheckRatio (int width, int height, int *trueratio=NULL); static inline int CheckRatio (double width, double height) { return CheckRatio(int(width), int(height)); } inline bool IsRatioWidescreen(int ratio) { return (ratio & 3) != 0; } From 5910067c4473a682727d8e1e7cdd92f0ea060260 Mon Sep 17 00:00:00 2001 From: Christoph Oelckers Date: Wed, 7 Dec 2016 14:26:26 +0100 Subject: [PATCH 4/9] - discontinue using the MMX assembly version of DoBlending. Some benchmarking shows that on SSE systems it only harms performance and compared to the intrinsics version the gains are too marginal for something this infrequently called. Doing 100000 calls of DoBlending results in a 5 ms decrease of using assembly vs intrinsics on a 3.4 GHz Core i7, meaning that even on a computer that is 10x slower you can still do 1000 or so blends per frame without a speed hit. --- src/v_palette.cpp | 8 +++++--- src/x86.cpp | 23 +++-------------------- 2 files changed, 8 insertions(+), 23 deletions(-) diff --git a/src/v_palette.cpp b/src/v_palette.cpp index 934a57dd3c..49fbd6cb6f 100644 --- a/src/v_palette.cpp +++ b/src/v_palette.cpp @@ -384,8 +384,8 @@ void InitPalette () R_InitColormaps (); } -extern "C" void DoBlending_MMX (const PalEntry *from, PalEntry *to, int count, int r, int g, int b, int a); -extern void DoBlending_SSE2 (const PalEntry *from, PalEntry *to, int count, int r, int g, int b, int a); +void DoBlending_MMX (const PalEntry *from, PalEntry *to, int count, int r, int g, int b, int a); +void DoBlending_SSE2 (const PalEntry *from, PalEntry *to, int count, int r, int g, int b, int a); void DoBlending (const PalEntry *from, PalEntry *to, int count, int r, int g, int b, int a) { @@ -395,6 +395,7 @@ void DoBlending (const PalEntry *from, PalEntry *to, int count, int r, int g, in { memcpy (to, from, count * sizeof(DWORD)); } + return; } else if (a == 256) { @@ -405,6 +406,7 @@ void DoBlending (const PalEntry *from, PalEntry *to, int count, int r, int g, in { to[i] = t; } + return; } #if defined(_M_X64) || defined(_M_IX86) || defined(__i386__) || defined(__amd64__) else if (CPU.bSSE2) @@ -423,7 +425,7 @@ void DoBlending (const PalEntry *from, PalEntry *to, int count, int r, int g, in } } #endif -#ifdef X86_ASM +#if defined(_M_IX86) || defined(__i386__) else if (CPU.bMMX) { if (count >= 4) diff --git a/src/x86.cpp b/src/x86.cpp index f6c878da61..17c946ac0f 100644 --- a/src/x86.cpp +++ b/src/x86.cpp @@ -227,10 +227,9 @@ void DumpCPUInfo(const CPUInfo *cpu) } } -#if 0 -// Compiler output for this function is crap compared to the assembly -// version, which is why it isn't used. -void DoBlending_MMX2(const PalEntry *from, PalEntry *to, int count, int r, int g, int b, int a) +#if !defined(__amd64__) && !defined(_M_X64) + +void DoBlending_MMX(const PalEntry *from, PalEntry *to, int count, int r, int g, int b, int a) { __m64 blendcolor; __m64 blendalpha; @@ -272,9 +271,6 @@ void DoBlending_MMX2(const PalEntry *from, PalEntry *to, int count, int r, int g } #endif -#ifdef X86_ASM -extern "C" void DoBlending_MMX(const PalEntry *from, PalEntry *to, int count, int r, int g, int b, int a); -#endif void DoBlending_SSE2(const PalEntry *from, PalEntry *to, int count, int r, int g, int b, int a) { @@ -288,17 +284,6 @@ void DoBlending_SSE2(const PalEntry *from, PalEntry *to, int count, int r, int g unaligned = ((size_t)from | (size_t)to) & 0xF; -#ifdef X86_ASM - // For unaligned accesses, the assembly MMX version is slightly faster. - // Note that using unaligned SSE loads and stores is still faster than - // the compiler-generated MMX version. - if (unaligned) - { - DoBlending_MMX(from, to, count, r, g, b, a); - return; - } -#endif - #if defined(__amd64__) || defined(_M_X64) long long color; @@ -326,7 +311,6 @@ void DoBlending_SSE2(const PalEntry *from, PalEntry *to, int count, int r, int g zero = _mm_setzero_si128(); -#ifndef X86_ASM if (unaligned) { for (count >>= 2; count > 0; --count) @@ -346,7 +330,6 @@ void DoBlending_SSE2(const PalEntry *from, PalEntry *to, int count, int r, int g } } else -#endif { for (count >>= 2; count > 0; --count) { From ed141943e1391e4abc2949e5f01feaf6ad53145a Mon Sep 17 00:00:00 2001 From: Christoph Oelckers Date: Wed, 7 Dec 2016 14:39:15 +0100 Subject: [PATCH 5/9] - removed use of BestColor_MMX because there is no measurable improvement at all on a modern system. On top of that this function does not get called nearly often enough to justify the hassle. Like DoBlending this would require hundreds of calls per frame to make any impact that would be measurable. --- src/v_palette.cpp | 11 +---------- 1 file changed, 1 insertion(+), 10 deletions(-) diff --git a/src/v_palette.cpp b/src/v_palette.cpp index 49fbd6cb6f..aa39ba7913 100644 --- a/src/v_palette.cpp +++ b/src/v_palette.cpp @@ -106,20 +106,11 @@ CCMD (bumpgamma) /* Palette management stuff */ /****************************/ -extern "C" BYTE BestColor_MMX (DWORD rgb, const DWORD *pal); - int BestColor (const uint32 *pal_in, int r, int g, int b, int first, int num) { -#ifdef X86_ASM - if (CPU.bMMX) - { - int pre = 256 - num - first; - return BestColor_MMX (((first+pre)<<24)|(r<<16)|(g<<8)|b, pal_in-pre) - pre; - } -#endif const PalEntry *pal = (const PalEntry *)pal_in; int bestcolor = first; - int bestdist = 257*257+257*257+257*257; + int bestdist = 257 * 257 + 257 * 257 + 257 * 257; for (int color = first; color < num; color++) { From a118903e3ef88e093ab5cfb3babef03684cd9608 Mon Sep 17 00:00:00 2001 From: Christoph Oelckers Date: Wed, 7 Dec 2016 14:41:21 +0100 Subject: [PATCH 6/9] - complete removal of assembly stuff. Nothing of this gets used anymore. --- src/asm_ia32/a.asm | 812 ------------------------------ src/asm_ia32/misc.asm | 200 -------- src/asm_ia32/tmap.asm | 1002 -------------------------------------- src/asm_ia32/tmap2.asm | 643 ------------------------ src/asm_ia32/tmap3.asm | 344 ------------- src/asm_x86_64/tmap3.asm | 150 ------ src/asm_x86_64/tmap3.s | 141 ------ 7 files changed, 3292 deletions(-) delete mode 100644 src/asm_ia32/a.asm delete mode 100644 src/asm_ia32/misc.asm delete mode 100644 src/asm_ia32/tmap.asm delete mode 100644 src/asm_ia32/tmap2.asm delete mode 100644 src/asm_ia32/tmap3.asm delete mode 100644 src/asm_x86_64/tmap3.asm delete mode 100644 src/asm_x86_64/tmap3.s diff --git a/src/asm_ia32/a.asm b/src/asm_ia32/a.asm deleted file mode 100644 index 786396d4a4..0000000000 --- a/src/asm_ia32/a.asm +++ /dev/null @@ -1,812 +0,0 @@ -; "Build Engine & Tools" Copyright (c) 1993-1997 Ken Silverman -; Ken Silverman's official web site: "http://www.advsys.net/ken" -; See the included license file "BUILDLIC.TXT" for license info. -; This file has been modified from Ken Silverman's original release - -%include "valgrind.inc" - - SECTION .data - -%ifndef M_TARGET_LINUX -%define ylookup _ylookup -%define vince _vince -%define vplce _vplce -%define palookupoffse _palookupoffse -%define bufplce _bufplce -%define dc_iscale _dc_iscale -%define dc_colormap _dc_colormap -%define dc_count _dc_count -%define dc_dest _dc_dest -%define dc_source _dc_source -%define dc_texturefrac _dc_texturefrac - -%define setupvlineasm _setupvlineasm -%define prevlineasm1 _prevlineasm1 -%define vlineasm1 _vlineasm1 -%define vlineasm4 _vlineasm4 - -%define setupmvlineasm _setupmvlineasm -%define mvlineasm1 _mvlineasm1 -%define mvlineasm4 _mvlineasm4 - -%define R_SetupDrawSlabA _R_SetupDrawSlabA -%define R_DrawSlabA _R_DrawSlabA -%endif - -EXTERN ylookup ; near - -EXTERN vplce ; near -EXTERN vince ; near -EXTERN palookupoffse ; near -EXTERN bufplce ; near - -EXTERN dc_iscale -EXTERN dc_colormap -EXTERN dc_count -EXTERN dc_dest -EXTERN dc_source -EXTERN dc_texturefrac - - SECTION .text - -ALIGN 16 -GLOBAL setvlinebpl_ -setvlinebpl_: - mov [fixchain1a+2], eax - mov [fixchain1b+2], eax - mov [fixchain2a+2], eax - mov [fixchain1m+2], eax - mov [fixchain2ma+2], eax - mov [fixchain2mb+2], eax - selfmod fixchain1a, fixchain2mb+6 - -setdrawslabbpl: - mov dword [voxbpl1+2], eax - mov dword [voxbpl2+2], eax - mov dword [voxbpl3+2], eax - mov dword [voxbpl4+2], eax - mov dword [voxbpl5+2], eax - mov dword [voxbpl6+2], eax - mov dword [voxbpl7+2], eax - mov dword [voxbpl8+2], eax - selfmod voxbpl1, voxpl8+6 - ret - - SECTION .data - -lastslabcolormap: - dd 4 - - SECTION .text - -GLOBAL R_SetupDrawSlabA -GLOBAL @R_SetupDrawSlabA@4 -R_SetupDrawSlabA: - mov ecx, [esp+4] -@R_SetupDrawSlabA@4: - cmp [lastslabcolormap], ecx - je .done - mov [lastslabcolormap], ecx - mov dword [voxpal1+2], ecx - mov dword [voxpal2+2], ecx - mov dword [voxpal3+2], ecx - mov dword [voxpal4+2], ecx - mov dword [voxpal5+2], ecx - mov dword [voxpal6+2], ecx - mov dword [voxpal7+2], ecx - mov dword [voxpal8+2], ecx -.done ret - - -; pass it log2(texheight) - -ALIGN 16 -GLOBAL setupvlineasm -setupvlineasm: - mov ecx, [esp+4] - - ;First 2 lines for VLINEASM1, rest for VLINEASM4 - mov byte [premach3a+2], cl - mov byte [mach3a+2], cl - - mov byte [machvsh1+2], cl ;32-shy - mov byte [machvsh3+2], cl ;32-shy - mov byte [machvsh5+2], cl ;32-shy - mov byte [machvsh6+2], cl ;32-shy - mov ch, cl - sub ch, 16 - mov byte [machvsh8+2], ch ;16-shy - neg cl - mov byte [machvsh7+2], cl ;shy - mov byte [machvsh9+2], cl ;shy - mov byte [machvsh10+2], cl ;shy - mov byte [machvsh11+2], cl ;shy - mov byte [machvsh12+2], cl ;shy - mov eax, 1 - shl eax, cl - dec eax - mov dword [machvsh2+2], eax ;(1<>sh) -;vplc3 = (ebp<<(32-sh))+((edx&65535)<<(16-sh)) -machvsh5: shl esi, 88h ;32-sh - mov eax, edx -machvsh6: shl ebp, 88h ;32-sh - and edx, 0000ffffh -machvsh7: shr eax, 88h ;sh - add esi, eax -machvsh8: shl edx, 88h ;16-sh - add ebp, edx - mov dword [vplce+12], esi - mov dword [vplce+4], ebp - - pop edi - pop esi - pop ebx - pop ebp - ret - -;************************************************************************* -;************************* Masked Vertical Lines ************************* -;************************************************************************* - -; pass it log2(texheight) - -ALIGN 16 -GLOBAL setupmvlineasm -setupmvlineasm: - mov ecx, dword [esp+4] - mov byte [maskmach3a+2], cl - mov byte [machmv13+2], cl - - mov byte [machmv14+2], cl - mov byte [machmv15+2], cl - mov byte [machmv16+2], cl - selfmod maskmach3a, machmv13+6 - ret - -ALIGN 16 -GLOBAL mvlineasm1 ;Masked vline -mvlineasm1: - push ebx - push edi - push esi - push ebp - mov ecx, [dc_count] - mov ebp, [dc_colormap] - mov edi, [dc_dest] - mov eax, [dc_iscale] - mov edx, [dc_texturefrac] - mov esi, [dc_source] -beginmvline: - mov ebx, edx -maskmach3a: shr ebx, 32 - movzx ebx, byte [esi+ebx] - cmp ebx, 0 - je short skipmask1 -maskmach3c: mov bl, byte [ebp+ebx] - mov [edi], bl -skipmask1: add edx, eax -fixchain1m: add edi, 320 - dec ecx - jnz short beginmvline - - pop ebp - pop esi - pop edi - pop ebx - mov eax, edx - ret - -ALIGN 16 -GLOBAL mvlineasm4 -mvlineasm4: - push ebx - push esi - push edi - push ebp - - mov ecx,[dc_count] - mov edi,[dc_dest] - - mov eax, [bufplce+0] - mov ebx, [bufplce+4] - mov [machmv1+3], eax - mov [machmv4+3], ebx - mov eax, [bufplce+8] - mov ebx, [bufplce+12] - mov [machmv7+3], eax - mov [machmv10+3], ebx - - mov eax, [palookupoffse] - mov ebx, [palookupoffse+4] - mov [machmv2+2], eax - mov [machmv5+2], ebx - mov eax, [palookupoffse+8] - mov ebx, [palookupoffse+12] - mov [machmv8+2], eax - mov [machmv11+2], ebx - - mov eax, [vince] ;vince - mov ebx, [vince+4] - xor bl, bl - mov [machmv3+2], eax - mov [machmv6+2], ebx - mov eax, [vince+8] - mov ebx, [vince+12] - mov [machmv9+2], eax - mov [machmv12+2], ebx - - inc ecx - push ecx - mov ecx, [vplce+0] - mov edx, [vplce+4] - mov esi, [vplce+8] - mov ebp, [vplce+12] -fixchain2ma: sub edi, 320 - - selfmod beginmvlineasm4, machmv2+6 - jmp short beginmvlineasm4 -ALIGN 16 -beginmvlineasm4: - dec dword [esp] - jz near endmvlineasm4 - - mov eax, ebp - mov ebx, esi -machmv16: shr eax, 32 -machmv12: add ebp, 0x88888888 ;vince[3] -machmv15: shr ebx, 32 -machmv9: add esi, 0x88888888 ;vince[2] -machmv10: movzx eax, byte [eax+0x88888888];bufplce[3] -machmv7: movzx ebx, byte [ebx+0x88888888];bufplce[2] - cmp eax, 1 - adc dl, dl - cmp ebx, 1 - adc dl, dl -machmv8: mov bl, [ebx+0x88888888] ;palookupoffs[2] -machmv11: mov bh, [eax+0x88888888] ;palookupoffs[3] - - mov eax, edx -machmv6: add edx, 0x88888888 ;vince[1] -machmv14: shr eax, 32 - shl ebx, 16 -machmv4: movzx eax, byte [eax+0x88888888];bufplce[1] - cmp eax, 1 - adc dl, dl -machmv5: mov bh, [eax+0x88888888] ;palookupoffs[1] - - mov eax, ecx -machmv3: add ecx, 0x88888888 ;vince[0] -machmv13: shr eax, 32 -machmv1: movzx eax, byte [eax+0x88888888];bufplce[0] - cmp eax, 1 - adc dl, dl -machmv2: mov bl, [eax+0x88888888] ;palookupoffs[0] - - xor eax, eax - shl dl, 4 -fixchain2mb: add edi, 320 - mov al, dl - add eax, mvcase15 - jmp eax ;16 byte cases - -ALIGN 16 -endmvlineasm4: - mov [vplce], ecx - mov [vplce+4], edx - mov [vplce+8], esi - mov [vplce+12], ebp - pop ecx - pop ebp - pop edi - pop esi - pop ebx - ret - - ;5,7,8,8,11,13,12,14,11,13,14,14,12,14,15,7 -ALIGN 16 -mvcase15: mov [edi], ebx - jmp beginmvlineasm4 -ALIGN 16 -mvcase14: mov [edi+1], bh - shr ebx, 16 - mov [edi+2], bx - jmp beginmvlineasm4 -ALIGN 16 -mvcase13: mov [edi], bl - shr ebx, 16 - mov [edi+2], bx - jmp beginmvlineasm4 -ALIGN 16 -mvcase12: shr ebx, 16 - mov [edi+2], bx - jmp beginmvlineasm4 -ALIGN 16 -mvcase11: mov [edi], bx - shr ebx, 16 - mov [edi+3], bh - jmp beginmvlineasm4 -ALIGN 16 -mvcase10: mov [edi+1], bh - shr ebx, 16 - mov [edi+3], bh - jmp beginmvlineasm4 -ALIGN 16 -mvcase9: mov [edi], bl - shr ebx, 16 - mov [edi+3], bh - jmp beginmvlineasm4 -ALIGN 16 -mvcase8: shr ebx, 16 - mov [edi+3], bh - jmp beginmvlineasm4 -ALIGN 16 -mvcase7: mov [edi], bx - shr ebx, 16 - mov [edi+2], bl - jmp beginmvlineasm4 -ALIGN 16 -mvcase6: shr ebx, 8 - mov [edi+1], bx - jmp beginmvlineasm4 -ALIGN 16 -mvcase5: mov [edi], bl - shr ebx, 16 - mov [edi+2], bl - jmp beginmvlineasm4 -ALIGN 16 -mvcase4: shr ebx, 16 - mov [edi+2], bl - jmp beginmvlineasm4 -ALIGN 16 -mvcase3: mov [edi], bx - jmp beginmvlineasm4 -ALIGN 16 -mvcase2: mov [edi+1], bh - jmp beginmvlineasm4 -ALIGN 16 -mvcase1: mov [edi], bl - jmp beginmvlineasm4 -ALIGN 16 -mvcase0: jmp beginmvlineasm4 - -align 16 - - -;************************************************************************* -;***************************** Voxel Slabs ******************************* -;************************************************************************* - -GLOBAL R_DrawSlabA -R_DrawSlabA: - push ebx - push ebp - push esi - push edi - - mov eax, [esp+5*4+0] - mov ebx, [esp+5*4+4] - mov ecx, [esp+5*4+8] - mov edx, [esp+5*4+12] - mov esi, [esp+5*4+16] - mov edi, [esp+5*4+20] - - cmp eax, 2 - je voxbegdraw2 - ja voxskip2 - xor eax, eax -voxbegdraw1: - mov ebp, ebx - shr ebp, 16 - add ebx, edx - dec ecx - mov al, byte [esi+ebp] -voxpal1: mov al, byte [eax+88888888h] - mov byte [edi], al -voxbpl1: lea edi, [edi+88888888h] - jnz voxbegdraw1 - jmp voxskipslab5 - -voxbegdraw2: - mov ebp, ebx - shr ebp, 16 - add ebx, edx - xor eax, eax - dec ecx - mov al, byte [esi+ebp] -voxpal2: mov al, byte [eax+88888888h] - mov ah, al - mov word [edi], ax -voxbpl2: lea edi, [edi+88888888h] - jnz voxbegdraw2 - jmp voxskipslab5 - -voxskip2: - cmp eax, 4 - jne voxskip4 - xor eax, eax -voxbegdraw4: - mov ebp, ebx - add ebx, edx - shr ebp, 16 - xor eax, eax - mov al, byte [esi+ebp] -voxpal3: mov al, byte [eax+88888888h] - mov ah, al - shl eax, 8 - mov al, ah - shl eax, 8 - mov al, ah - mov dword [edi], eax -voxbpl3: add edi, 88888888h - dec ecx - jnz voxbegdraw4 - jmp voxskipslab5 - -voxskip4: - add eax, edi - - test edi, 1 - jz voxskipslab1 - cmp edi, eax - je voxskipslab1 - - push eax - push ebx - push ecx - push edi -voxbegslab1: - mov ebp, ebx - add ebx, edx - shr ebp, 16 - xor eax, eax - mov al, byte [esi+ebp] -voxpal4: mov al, byte [eax+88888888h] - mov byte [edi], al -voxbpl4: add edi, 88888888h - dec ecx - jnz voxbegslab1 - pop edi - pop ecx - pop ebx - pop eax - inc edi - -voxskipslab1: - push eax - test edi, 2 - jz voxskipslab2 - dec eax - cmp edi, eax - jge voxskipslab2 - - push ebx - push ecx - push edi -voxbegslab2: - mov ebp, ebx - add ebx, edx - shr ebp, 16 - xor eax, eax - mov al, byte [esi+ebp] -voxpal5: mov al, byte [eax+88888888h] - mov ah, al - mov word [edi], ax -voxbpl5: add edi, 88888888h - dec ecx - jnz voxbegslab2 - pop edi - pop ecx - pop ebx - add edi, 2 - -voxskipslab2: - mov eax, [esp] - - sub eax, 3 - cmp edi, eax - jge voxskipslab3 - -voxprebegslab3: - push ebx - push ecx - push edi -voxbegslab3: - mov ebp, ebx - add ebx, edx - shr ebp, 16 - xor eax, eax - mov al, byte [esi+ebp] -voxpal6: mov al, byte [eax+88888888h] - mov ah, al - shl eax, 8 - mov al, ah - shl eax, 8 - mov al, ah - mov dword [edi], eax -voxbpl6: add edi, 88888888h - dec ecx - jnz voxbegslab3 - pop edi - pop ecx - pop ebx - add edi, 4 - - mov eax, [esp] - - sub eax, 3 - cmp edi, eax - jl voxprebegslab3 - -voxskipslab3: - mov eax, [esp] - - dec eax - cmp edi, eax - jge voxskipslab4 - - push ebx - push ecx - push edi -voxbegslab4: - mov ebp, ebx - add ebx, edx - shr ebp, 16 - xor eax, eax - mov al, byte [esi+ebp] -voxpal7: mov al, byte [eax+88888888h] - mov ah, al - mov word [edi], ax -voxbpl7: add edi, 88888888h - dec ecx - jnz voxbegslab4 - pop edi - pop ecx - pop ebx - add edi, 2 - -voxskipslab4: - pop eax - - cmp edi, eax - je voxskipslab5 - -voxbegslab5: - mov ebp, ebx - add ebx, edx - shr ebp, 16 - xor eax, eax - mov al, byte [esi+ebp] -voxpal8: mov al, byte [eax+88888888h] - mov byte [edi], al -voxbpl8: add edi, 88888888h - dec ecx - jnz voxbegslab5 - -voxskipslab5: - pop edi - pop esi - pop ebp - pop ebx - ret - -align 16 - -%ifdef M_TARGET_MACHO -GLOBAL _rtext_a_end -_rtext_a_end: -%endif diff --git a/src/asm_ia32/misc.asm b/src/asm_ia32/misc.asm deleted file mode 100644 index b825a4d02a..0000000000 --- a/src/asm_ia32/misc.asm +++ /dev/null @@ -1,200 +0,0 @@ -;* -;* misc.nas -;* Miscellaneous assembly functions -;* -;*--------------------------------------------------------------------------- -;* Copyright 1998-2006 Randy Heit -;* All rights reserved. -;* -;* Redistribution and use in source and binary forms, with or without -;* modification, are permitted provided that the following conditions -;* are met: -;* -;* 1. Redistributions of source code must retain the above copyright -;* notice, this list of conditions and the following disclaimer. -;* 2. Redistributions in binary form must reproduce the above copyright -;* notice, this list of conditions and the following disclaimer in the -;* documentation and/or other materials provided with the distribution. -;* 3. The name of the author may not be used to endorse or promote products -;* derived from this software without specific prior written permission. -;* -;* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR -;* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES -;* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. -;* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, -;* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT -;* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -;* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -;* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -;* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF -;* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -;*--------------------------------------------------------------------------- -;* - -BITS 32 - -%ifndef M_TARGET_LINUX - -%define DoBlending_MMX _DoBlending_MMX -%define BestColor_MMX _BestColor_MMX - -%endif - -%ifdef M_TARGET_WATCOM - SEGMENT DATA PUBLIC ALIGN=16 CLASS=DATA USE32 - SEGMENT DATA -%else - SECTION .data -%endif - -Blending256: - dd 0x01000100,0x00000100 - -%ifdef M_TARGET_WATCOM - SEGMENT CODE PUBLIC ALIGN=16 CLASS=CODE USE32 - SEGMENT CODE -%else - SECTION .text -%endif - -;----------------------------------------------------------- -; -; DoBlending_MMX -; -; MMX version of DoBlending -; -; (DWORD *from, DWORD *to, count, tor, tog, tob, toa) -;----------------------------------------------------------- - -GLOBAL DoBlending_MMX - -DoBlending_MMX: - pxor mm0,mm0 ; mm0 = 0 - mov eax,[esp+4*4] - shl eax,16 - mov edx,[esp+4*5] - shl edx,8 - or eax,[esp+4*6] - or eax,edx - mov ecx,[esp+4*3] ; ecx = count - movd mm1,eax ; mm1 = 00000000 00RRGGBB - mov eax,[esp+4*7] - shl eax,16 - mov edx,[esp+4*7] - shl edx,8 - or eax,[esp+4*7] - or eax,edx - mov edx,[esp+4*2] ; edx = dest - movd mm6,eax ; mm6 = 00000000 00AAAAAA - punpcklbw mm1,mm0 ; mm1 = 000000RR 00GG00BB - movq mm7,[Blending256] - punpcklbw mm6,mm0 ; mm6 = 000000AA 00AA00AA - mov eax,[esp+4*1] ; eax = source - pmullw mm1,mm6 ; mm1 = 000000RR 00GG00BB (multiplied by alpha) - psubusw mm7,mm6 ; mm7 = 000000aa 00aa00aa (one minus alpha) - nop ; Does this actually pair on a Pentium? - -; Do four colors per iteration: Count must be a multiple of four. - -.loop movq mm2,[eax] ; mm2 = 00r2g2b2 00r1g1b1 - add eax,8 - movq mm3,mm2 ; mm3 = 00r2g2b2 00r1g1b1 - punpcklbw mm2,mm0 ; mm2 = 000000r1 00g100b1 - punpckhbw mm3,mm0 ; mm3 = 000000r2 00g200b2 - pmullw mm2,mm7 ; mm2 = 0000r1rr g1ggb1bb - add edx,8 - pmullw mm3,mm7 ; mm3 = 0000r2rr g2ggb2bb - sub ecx,2 - paddusw mm2,mm1 - psrlw mm2,8 - paddusw mm3,mm1 - psrlw mm3,8 - packuswb mm2,mm3 ; mm2 = 00r2g2b2 00r1g1b1 - movq [edx-8],mm2 - - movq mm2,[eax] ; mm2 = 00r2g2b2 00r1g1b1 - add eax,8 - movq mm3,mm2 ; mm3 = 00r2g2b2 00r1g1b1 - punpcklbw mm2,mm0 ; mm2 = 000000r1 00g100b1 - punpckhbw mm3,mm0 ; mm3 = 000000r2 00g200b2 - pmullw mm2,mm7 ; mm2 = 0000r1rr g1ggb1bb - add edx,8 - pmullw mm3,mm7 ; mm3 = 0000r2rr g2ggb2bb - sub ecx,2 - paddusw mm2,mm1 - psrlw mm2,8 - paddusw mm3,mm1 - psrlw mm3,8 - packuswb mm2,mm3 ; mm2 = 00r2g2b2 00r1g1b1 - movq [edx-8],mm2 - - jnz .loop - - emms - ret - -;----------------------------------------------------------- -; -; BestColor_MMX -; -; Picks the closest matching color from a palette -; -; Passed FFRRGGBB and palette array in same format -; FF is the index of the first palette entry to consider -; -;----------------------------------------------------------- - -GLOBAL BestColor_MMX -GLOBAL @BestColor_MMX@8 - -BestColor_MMX: - mov ecx,[esp+4] - mov edx,[esp+8] -@BestColor_MMX@8: - pxor mm0,mm0 - movd mm1,ecx ; mm1 = color searching for - mov eax,257*257+257*257+257*257 ;eax = bestdist - push ebx - punpcklbw mm1,mm0 - mov ebx,ecx ; ebx = best color - shr ecx,24 ; ecx = count - and ebx,0xffffff - push esi - push ebp - -.loop movd mm2,[edx+ecx*4] ; mm2 = color considering now - inc ecx - punpcklbw mm2,mm0 - movq mm3,mm1 - psubsw mm3,mm2 - pmullw mm3,mm3 ; mm3 = color distance squared - - movd ebp,mm3 ; add the three components - psrlq mm3,32 ; into ebp to get the real - mov esi,ebp ; (squared) distance - shr esi,16 - and ebp,0xffff - add ebp,esi - movd esi,mm3 - add ebp,esi - - jz .perf ; found a perfect match - cmp eax,ebp - jb .skip - mov eax,ebp - lea ebx,[ecx-1] -.skip cmp ecx,256 - jne .loop - mov eax,ebx - pop ebp - pop esi - pop ebx - emms - ret - -.perf lea eax,[ecx-1] - pop ebp - pop esi - pop ebx - emms - ret diff --git a/src/asm_ia32/tmap.asm b/src/asm_ia32/tmap.asm deleted file mode 100644 index 2096b92229..0000000000 --- a/src/asm_ia32/tmap.asm +++ /dev/null @@ -1,1002 +0,0 @@ -;* -;* tmap.nas -;* The texture-mapping inner loops in pure assembly language. -;* -;*--------------------------------------------------------------------------- -;* Copyright 1998-2006 Randy Heit -;* All rights reserved. -;* -;* Redistribution and use in source and binary forms, with or without -;* modification, are permitted provided that the following conditions -;* are met: -;* -;* 1. Redistributions of source code must retain the above copyright -;* notice, this list of conditions and the following disclaimer. -;* 2. Redistributions in binary form must reproduce the above copyright -;* notice, this list of conditions and the following disclaimer in the -;* documentation and/or other materials provided with the distribution. -;* 3. The name of the author may not be used to endorse or promote products -;* derived from this software without specific prior written permission. -;* -;* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR -;* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES -;* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. -;* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, -;* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT -;* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -;* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -;* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -;* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF -;* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -;*--------------------------------------------------------------------------- -;* - -BITS 32 - -%include "valgrind.inc" - -; Segment/section definition macros. - - SECTION .data - -%define SPACEFILLER4 (0x44444444) - -; If you change this in r_draw.c, be sure to change it here, too! -FUZZTABLE equ 50 - -%ifndef M_TARGET_LINUX - -%define ylookup _ylookup -%define centery _centery -%define fuzzpos _fuzzpos -%define fuzzoffset _fuzzoffset -%define NormalLight _NormalLight -%define viewheight _viewheight -%define fuzzviewheight _fuzzviewheight -%define CPU _CPU - -%define dc_pitch _dc_pitch -%define dc_colormap _dc_colormap -%define dc_color _dc_color -%define dc_iscale _dc_iscale -%define dc_texturefrac _dc_texturefrac -%define dc_srcblend _dc_srcblend -%define dc_destblend _dc_destblend -%define dc_source _dc_source -%define dc_yl _dc_yl -%define dc_yh _dc_yh -%define dc_x _dc_x -%define dc_count _dc_count -%define dc_dest _dc_dest -%define dc_destorg _dc_destorg - -%define Col2RGB8 _Col2RGB8 -%define RGB32k _RGB32k - -%define dc_ctspan _dc_ctspan -%define dc_temp _dc_temp - -%define ds_xstep _ds_xstep -%define ds_ystep _ds_ystep -%define ds_colormap _ds_colormap -%define ds_source _ds_source -%define ds_x1 _ds_x1 -%define ds_x2 _ds_x2 -%define ds_xfrac _ds_xfrac -%define ds_yfrac _ds_yfrac -%define ds_y _ds_y - -%define ds_cursource _ds_cursource -%define ds_curcolormap _ds_curcolormap - -%define R_SetSpanSource_ASM _R_SetSpanSource_ASM -%define R_SetSpanSize_ASM _R_SetSpanSize_ASM -%define R_SetSpanColormap_ASM _R_SetSpanColormap_ASM -%define R_SetupShadedCol _R_SetupShadedCol -%define R_SetupAddCol _R_SetupAddCol -%define R_SetupAddClampCol _R_SetupAddClampCol - -%endif - -EXTERN ylookup -EXTERN centery -EXTERN fuzzpos -EXTERN fuzzoffset -EXTERN NormalLight -EXTERN viewheight -EXTERN fuzzviewheight -EXTERN CPU - -EXTERN dc_pitch -EXTERN dc_colormap -EXTERN dc_color -EXTERN dc_iscale -EXTERN dc_texturefrac -EXTERN dc_srcblend -EXTERN dc_destblend -EXTERN dc_source -EXTERN dc_yl -EXTERN dc_yh -EXTERN dc_x -EXTERN dc_count -EXTERN dc_dest -EXTERN dc_destorg - -EXTERN dc_ctspan -EXTERN dc_temp - -EXTERN Col2RGB8 -EXTERN RGB32k - -EXTERN ds_xstep -EXTERN ds_ystep -EXTERN ds_colormap -EXTERN ds_source -EXTERN ds_x1 -EXTERN ds_x2 -EXTERN ds_xfrac -EXTERN ds_yfrac -EXTERN ds_y - -GLOBAL ds_cursource -GLOBAL ds_curcolormap - - -ds_cursource: - DD 0 - -ds_curcolormap: - DD 0 - - -; Local stuff: -lastAddress DD 0 -pixelcount DD 0 - - SECTION .text - - -GLOBAL @R_SetSpanSource_ASM@4 -GLOBAL R_SetSpanSource_ASM - -R_SetSpanSource_ASM: - mov ecx,[esp+4] - -@R_SetSpanSource_ASM@4: - mov [spreada+2],ecx - mov [spreadb+2],ecx - mov [spreadc+2],ecx - mov [spreadd+2],ecx - mov [spreade+2],ecx - mov [spreadf+2],ecx - mov [spreadg+2],ecx - - mov [mspreada+2],ecx - mov [mspreadb+2],ecx - mov [mspreadc+2],ecx - mov [mspreadd+2],ecx - mov [mspreade+2],ecx - mov [mspreadf+2],ecx - mov [mspreadg+2],ecx - - selfmod spreada, mspreadg+6 - - mov [ds_cursource],ecx - ret - -GLOBAL @R_SetSpanColormap_ASM@4 -GLOBAL R_SetSpanColormap_ASM - -R_SetSpanColormap_ASM: - mov ecx,[esp+4] - -@R_SetSpanColormap_ASM@4: - mov [spmapa+2],ecx - mov [spmapb+2],ecx - mov [spmapc+2],ecx - mov [spmapd+2],ecx - mov [spmape+2],ecx - mov [spmapf+2],ecx - mov [spmapg+2],ecx - - mov [mspmapa+2],ecx - mov [mspmapb+2],ecx - mov [mspmapc+2],ecx - mov [mspmapd+2],ecx - mov [mspmape+2],ecx - mov [mspmapf+2],ecx - mov [mspmapg+2],ecx - - selfmod spmapa, mspmapg+6 - - mov [ds_curcolormap],ecx - ret - -GLOBAL R_SetSpanSize_ASM - -EXTERN SetTiltedSpanSize - -R_SetSpanSize_ASM: - mov edx,[esp+4] - mov ecx,[esp+8] - call SetTiltedSpanSize - - mov [dsy1+2],dl - mov [dsy2+2],dl - - mov [dsx1+2],cl - mov [dsx2+2],cl - mov [dsx3+2],cl - mov [dsx4+2],cl - mov [dsx5+2],cl - mov [dsx6+2],cl - mov [dsx7+2],cl - - mov [dmsy1+2],dl - mov [dmsy2+2],dl - - mov [dmsx1+2],cl - mov [dmsx2+2],cl - mov [dmsx3+2],cl - mov [dmsx4+2],cl - mov [dmsx5+2],cl - mov [dmsx6+2],cl - mov [dmsx7+2],cl - - push ecx - add ecx,edx - mov eax,1 - shl eax,cl - dec eax - mov [dsm1+2],eax - mov [dsm5+1],eax - mov [dsm6+1],eax - mov [dsm7+1],eax - - mov [dmsm1+2],eax - mov [dmsm5+1],eax - mov [dmsm6+1],eax - mov [dmsm7+1],eax - pop ecx - ror eax,cl - mov [dsm2+2],eax - mov [dsm3+2],eax - mov [dsm4+2],eax - - mov [dmsm2+2],eax - mov [dmsm3+2],eax - mov [dmsm4+2],eax - and eax,0xffff - not eax - mov [dsm8+2],eax - mov [dsm9+2],eax - - mov [dmsm8+2],eax - mov [dmsm9+2],eax - - neg dl - mov [dsy3+2],dl - mov [dsy4+2],dl - - mov [dmsy3+2],dl - mov [dmsy4+2],dl - - selfmod dsy1, dmsm7+6 - -aret: ret - -%ifdef M_TARGET_MACHO - SECTION .text align=64 -%else - SECTION .rtext progbits alloc exec write align=64 -%endif - -%ifdef M_TARGET_MACHO -GLOBAL _rtext_tmap_start -_rtext_tmap_start: -%endif - -rtext_start: - -GLOBAL @R_DrawSpanP_ASM@0 -GLOBAL _R_DrawSpanP_ASM -GLOBAL R_DrawSpanP_ASM - -; eax: scratch -; ebx: zero -; ecx: yfrac at top end, xfrac int part at low end -; edx: xfrac frac part at top end -; edi: dest -; ebp: scratch -; esi: count -; [esp]: xstep -; [esp+4]: ystep - - align 16 - -@R_DrawSpanP_ASM@0: -_R_DrawSpanP_ASM: -R_DrawSpanP_ASM: - mov eax,[ds_x2] - mov ecx,[ds_x1] - sub eax,ecx - jl near rdspret ; count < 0: nothing to do, so leave - - push ebx - push edi - push ebp - push esi - sub esp, 8 - - mov edi,ecx - add edi,[dc_destorg] - mov ecx,[ds_y] - add edi,[ylookup+ecx*4] - mov edx,[ds_xstep] -dsy1: shl edx,6 - mov ebp,[ds_xstep] -dsy3: shr ebp,26 - xor ebx,ebx - lea esi,[eax+1] - mov [esp],edx - mov edx,[ds_ystep] - mov ecx,[ds_xfrac] -dsy4: shr ecx,26 -dsm8: and edx,strict dword 0xffffffc0 - or ebp,edx - mov [esp+4],ebp - mov ebp,[ds_yfrac] - mov edx,[ds_xfrac] -dsy2: shl edx,6 -dsm9: and ebp,strict dword 0xffffffc0 - or ecx,ebp - shr esi,1 - jnc dseven1 - -; do odd pixel - - mov ebp,ecx -dsx1: rol ebp,6 -dsm1: and ebp,0xfff - add edx,[esp] - adc ecx,[esp+4] -spreada mov bl,[ebp+SPACEFILLER4] -spmapa mov bl,[ebx+SPACEFILLER4] - mov [edi],bl - inc edi - -dseven1 shr esi,1 - jnc dsrest - -; do two more pixels - mov ebp,ecx - add edx,[esp] - adc ecx,[esp+4] -dsm2: and ebp,0xfc00003f -dsx2: rol ebp,6 - mov eax,ecx - add edx,[esp] - adc ecx,[esp+4] -spreadb mov bl,[ebp+SPACEFILLER4] ;read texel1 -dsx3: rol eax,6 -dsm6: and eax,0xfff -spmapb mov bl,[ebx+SPACEFILLER4] ;map texel1 - mov [edi],bl ;store texel1 - add edi,2 -spreadc mov bl,[eax+SPACEFILLER4] ;read texel2 -spmapc mov bl,[ebx+SPACEFILLER4] ;map texel2 - mov [edi-1],bl ;store texel2 - -; do the rest - -dsrest test esi,esi - jz near dsdone - - align 16 - -dsloop mov ebp,ecx -spstep1d add edx,[esp] -spstep2d adc ecx,[esp+4] -dsm3: and ebp,0xfc00003f -dsx4: rol ebp,6 - mov eax,ecx -spstep1e add edx,[esp] -spstep2e adc ecx,[esp+4] -spreadd mov bl,[ebp+SPACEFILLER4] ;read texel1 -dsx5: rol eax,6 -dsm5: and eax,0xfff -spmapd mov bl,[ebx+SPACEFILLER4] ;map texel1 - mov [edi],bl ;store texel1 - mov ebp,ecx -spreade mov bl,[eax+SPACEFILLER4] ;read texel2 -spstep1f add edx,[esp] -spstep2f adc ecx,[esp+4] -dsm4: and ebp,0xfc00003f -dsx6: rol ebp,6 -spmape mov bl,[ebx+SPACEFILLER4] ;map texel2 - mov eax,ecx - mov [edi+1],bl ;store texel2 -spreadf mov bl,[ebp+SPACEFILLER4] ;read texel3 -spmapf mov bl,[ebx+SPACEFILLER4] ;map texel3 - add edi,4 -dsx7: rol eax,6 -dsm7: and eax,0xfff - mov [edi-2],bl ;store texel3 -spreadg mov bl,[eax+SPACEFILLER4] ;read texel4 -spstep1g add edx,[esp] -spstep2g adc ecx,[esp+4] -spmapg mov bl,[ebx+SPACEFILLER4] ;map texel4 - dec esi - mov [edi-1],bl ;store texel4 - jnz near dsloop - -dsdone add esp,8 - pop esi - pop ebp - pop edi - pop ebx - -rdspret ret - -; This is the same as the previous routine, except it doesn't draw pixels -; where the texture's color value is 0. - -GLOBAL @R_DrawSpanMaskedP_ASM@0 -GLOBAL _R_DrawSpanMaskedP_ASM -GLOBAL R_DrawSpanMaskedP_ASM - -; eax: scratch -; ebx: zero -; ecx: yfrac at top end, xfrac int part at low end -; edx: xfrac frac part at top end -; edi: dest -; ebp: scratch -; esi: count -; [esp]: xstep -; [esp+4]: ystep - - align 16 - -@R_DrawSpanMaskedP_ASM@0: -_R_DrawSpanMaskedP_ASM: -R_DrawSpanMaskedP_ASM: - mov eax,[ds_x2] - mov ecx,[ds_x1] - sub eax,ecx - jl rdspret ; count < 0: nothing to do, so leave - - push ebx - push edi - push ebp - push esi - sub esp,8 - - mov edi,ecx - add edi,[dc_destorg] - mov ecx,[ds_y] - add edi,[ylookup+ecx*4] - mov edx,[ds_xstep] -dmsy1: shl edx,6 - mov ebp,[ds_xstep] -dmsy3: shr ebp,26 - xor ebx,ebx - lea esi,[eax+1] - mov [esp],edx - mov edx,[ds_ystep] - mov ecx,[ds_xfrac] -dmsy4: shr ecx,26 -dmsm8: and edx,strict dword 0xffffffc0 - or ebp,edx - mov [esp+4],ebp - mov ebp,[ds_yfrac] - mov edx,[ds_xfrac] -dmsy2: shl edx,6 -dmsm9: and ebp,strict dword 0xffffffc0 - or ecx,ebp - shr esi,1 - jnc dmseven1 - -; do odd pixel - - mov ebp,ecx -dmsx1: rol ebp,6 -dmsm1: and ebp,0xfff - add edx,[esp] - adc ecx,[esp+4] -mspreada mov bl,[ebp+SPACEFILLER4] - cmp bl,0 - je mspskipa -mspmapa mov bl,[ebx+SPACEFILLER4] - mov [edi],bl -mspskipa: inc edi - -dmseven1 shr esi,1 - jnc dmsrest - -; do two more pixels - mov ebp,ecx - add edx,[esp] - adc ecx,[esp+4] -dmsm2: and ebp,0xfc00003f -dmsx2: rol ebp,6 - mov eax,ecx - add edx,[esp] - adc ecx,[esp+4] -mspreadb mov bl,[ebp+SPACEFILLER4] ;read texel1 -dmsx3: rol eax,6 -dmsm6: and eax,0xfff - cmp bl,0 - je mspskipb -mspmapb mov bl,[ebx+SPACEFILLER4] ;map texel1 - mov [edi],bl ;store texel1 -mspskipb add edi,2 -mspreadc mov bl,[eax+SPACEFILLER4] ;read texel2 - cmp bl,0 - je dmsrest -mspmapc mov bl,[ebx+SPACEFILLER4] ;map texel2 - mov [edi-1],bl ;store texel2 - -; do the rest - -dmsrest test esi,esi - jz near dmsdone - - align 16 - -dmsloop mov ebp,ecx -mspstep1d add edx,[esp] -mspstep2d adc ecx,[esp+4] -dmsm3: and ebp,0xfc00003f -dmsx4: rol ebp,6 - mov eax,ecx -mspstep1e add edx,[esp] -mspstep2e adc ecx,[esp+4] -mspreadd mov bl,[ebp+SPACEFILLER4] ;read texel1 -dmsx5: rol eax,6 -dmsm5: and eax,0xfff - cmp bl,0 - mov ebp,ecx - je mspreade -mspmapd mov bl,[ebx+SPACEFILLER4] ;map texel1 - mov [edi],bl ;store texel1 -mspreade mov bl,[eax+SPACEFILLER4] ;read texel2 -mspstep1f add edx,[esp] -mspstep2f adc ecx,[esp+4] -dmsm4: and ebp,0xfc00003f -dmsx6: rol ebp,6 - cmp bl,0 - mov eax,ecx - je mspreadf -mspmape mov bl,[ebx+SPACEFILLER4] ;map texel2 - mov [edi+1],bl ;store texel2 -mspreadf mov bl,[ebp+SPACEFILLER4] ;read texel3 - add edi,4 -dmsx7: rol eax,6 -dmsm7: and eax,0xfff - cmp bl,0 - je mspreadg -mspmapf mov bl,[ebx+SPACEFILLER4] ;map texel3 - mov [edi-2],bl ;store texel3 -mspreadg mov bl,[eax+SPACEFILLER4] ;read texel4 -mspstep1g add edx,[esp] -mspstep2g adc ecx,[esp+4] - cmp bl,0 - je mspskipg -mspmapg mov bl,[ebx+SPACEFILLER4] ;map texel4 - mov [edi-1],bl ;store texel4 -mspskipg dec esi - jnz near dmsloop - -dmsdone add esp,8 - pop esi - pop ebp - pop edi - pop ebx - - ret - - - - -GLOBAL rt_shaded4cols_asm -GLOBAL _rt_shaded4cols_asm - -rt_shaded4cols_asm: -_rt_shaded4cols_asm: - mov ecx,[esp+8] - push ebp - mov ebp,[esp+16] - sub ebp,ecx - js near s4nil - mov eax,[ylookup+ecx*4] - add eax,[dc_destorg] ; eax = destination - push ebx - push esi - mov esi,[dc_temp] - inc ebp ; ebp = count - add eax,[esp+16] - push edi - lea esi,[esi+ecx*4] ; esi = source - - align 16 - -s4loop: movzx edx,byte [esi] - movzx ecx,byte [esi+1] -s4cm1: movzx edx,byte [SPACEFILLER4+edx] ; colormap -s4cm2: movzx edi,byte [SPACEFILLER4+ecx] ; colormap - shl edx,8 - movzx ebx,byte [eax] - shl edi,8 - movzx ecx,byte [eax+1] - sub ebx,edx - sub ecx,edi - mov ebx,[Col2RGB8+0x10000+ebx*4] - mov ecx,[Col2RGB8+0x10000+ecx*4] -s4fg1: add ebx,[SPACEFILLER4+edx*4] -s4fg2: add ecx,[SPACEFILLER4+edi*4] - or ebx,0x1f07c1f - or ecx,0x1f07c1f - mov edx,ebx - shr ebx,15 - mov edi,ecx - shr ecx,15 - and edx,ebx - and ecx,edi - mov bl,[RGB32k+edx] - movzx edx,byte [esi+2] - mov bh,[RGB32k+ecx] - movzx ecx,byte [esi+3] - mov [eax],bl - mov [eax+1],bh - -s4cm3: movzx edx,byte [SPACEFILLER4+edx] ; colormap -s4cm4: movzx edi,byte [SPACEFILLER4+ecx] ; colormap - shl edx,8 - movzx ebx,byte [eax+2] - shl edi,8 - movzx ecx,byte [eax+3] - sub ebx,edx - sub ecx,edi - mov ebx,[Col2RGB8+0x10000+ebx*4] - mov ecx,[Col2RGB8+0x10000+ecx*4] -s4fg3: add ebx,[SPACEFILLER4+edx*4] -s4fg4: add ecx,[SPACEFILLER4+edi*4] - or ebx,0x1f07c1f - or ecx,0x1f07c1f - mov edx,ebx - shr ebx,15 - mov edi,ecx - shr ecx,15 - and edx,ebx - and ecx,edi -s4p: add eax,320 ; pitch - add esi,4 - mov bl,[RGB32k+edx] - mov bh,[RGB32k+ecx] -s4p2: mov [eax-320+2],bl -s4p3: mov [eax-320+3],bh - dec ebp - jne s4loop - - pop edi - pop esi - pop ebx -s4nil: pop ebp - ret - - align 16 - -GLOBAL rt_add4cols_asm -GLOBAL _rt_add4cols_asm - -rt_add4cols_asm: -_rt_add4cols_asm: - mov ecx,[esp+8] - push edi - mov edi,[esp+16] - sub edi,ecx - js near a4nil - mov eax,[ylookup+ecx*4] - add eax,[dc_destorg] - push ebx - push esi - mov esi,[dc_temp] - push ebp - inc edi - add eax,[esp+20] - lea esi,[esi+ecx*4] - - align 16 -a4loop: - movzx ebx,byte [esi] - movzx edx,byte [esi+1] - movzx ecx,byte [eax] - movzx ebp,byte [eax+1] -a4cm1: movzx ebx,byte [SPACEFILLER4+ebx] ; colormap -a4cm2: movzx edx,byte [SPACEFILLER4+edx] ; colormap -a4bg1: mov ecx,[SPACEFILLER4+ecx*4] ; bg2rgb -a4bg2: mov ebp,[SPACEFILLER4+ebp*4] ; bg2rgb -a4fg1: add ecx,[SPACEFILLER4+ebx*4] ; fg2rgb -a4fg2: add ebp,[SPACEFILLER4+edx*4] ; fg2rgb - or ecx,0x01f07c1f - or ebp,0x01f07c1f - mov ebx,ecx - shr ecx,15 - mov edx,ebp - shr ebp,15 - and ecx,ebx - and ebp,edx - movzx ebx,byte [esi+2] - movzx edx,byte [esi+3] - mov cl,[RGB32k+ecx] - mov ch,[RGB32k+ebp] - mov [eax],cl - mov [eax+1],ch - - movzx ecx,byte [eax+2] - movzx ebp,byte [eax+3] -a4cm3: movzx ebx,byte [SPACEFILLER4+ebx] ; colormap -a4cm4: movzx edx,byte [SPACEFILLER4+edx] ; colormap -a4bg3: mov ecx,[SPACEFILLER4+ecx*4] ; bg2rgb -a4bg4: mov ebp,[SPACEFILLER4+ebp*4] ; bg2rgb -a4fg3: add ecx,[SPACEFILLER4+ebx*4] ; fg2rgb -a4fg4: add ebp,[SPACEFILLER4+edx*4] ; fg2rgb - or ecx,0x01f07c1f - or ebp,0x01f07c1f - mov ebx,ecx - shr ecx,15 - mov edx,ebp - shr ebp,15 - and ebx,ecx - and edx,ebp - mov cl,[RGB32k+ebx] - mov ch,[RGB32k+edx] - mov [eax+2],cl - mov [eax+3],ch - - add esi,4 -a4p: add eax,320 ; pitch - sub edi,1 - jne a4loop - pop ebp - pop esi - pop ebx -a4nil: pop edi - ret - - align 16 - -GLOBAL rt_addclamp4cols_asm -GLOBAL _rt_addclamp4cols_asm - -rt_addclamp4cols_asm: -_rt_addclamp4cols_asm: - mov ecx,[esp+8] - push edi - mov edi,[esp+16] - sub edi,ecx - js near ac4nil - mov eax,[ylookup+ecx*4] - add eax,[dc_destorg] - push ebx - push esi - mov esi,[dc_temp] - push ebp - inc edi - add eax,[esp+20] - lea esi,[esi+ecx*4] - push edi - - align 16 -ac4loop: - movzx ebx,byte [esi] - movzx edx,byte [esi+1] - mov [esp],edi -ac4cm1: movzx ebx,byte [SPACEFILLER4+ebx] ; colormap -ac4cm2: movzx edx,byte [SPACEFILLER4+edx] ; colormap - movzx ecx,byte [eax] - movzx ebp,byte [eax+1] -ac4fg1: mov ebx,[SPACEFILLER4+ebx*4] ; fg2rgb -ac4fg2: mov edx,[SPACEFILLER4+edx*4] ; fg2rgb -ac4bg1: add ebx,[SPACEFILLER4+ecx*4] ; bg2rgb -ac4bg2: add edx,[SPACEFILLER4+ebp*4] ; bg2rgb - mov ecx,ebx - or ebx,0x01f07c1f - and ecx,0x40100400 - and ebx,0x3fffffff - mov edi,ecx - shr ecx,5 - mov ebp,edx - sub edi,ecx - or edx,0x01f07c1f - or ebx,edi - mov ecx,ebx - shr ebx,15 - and ebp,0x40100400 - and ebx,ecx - and edx,0x3fffffff - mov edi,ebp - shr ebp,5 - mov cl,[RGB32k+ebx] - sub edi,ebp - mov [eax],cl - or edx,edi - mov ebp,edx - shr edx,15 - movzx ebx,byte [esi+2] - and ebp,edx - movzx edx,byte [esi+3] -ac4cm3: movzx ebx,byte [SPACEFILLER4+ebx] ; colormap - mov cl,[RGB32k+ebp] -ac4cm4: movzx edx,byte [SPACEFILLER4+edx] ; colormap - mov [eax+1],cl - movzx ecx,byte [eax+2] - movzx ebp,byte [eax+3] -ac4fg3: mov ebx,[SPACEFILLER4+ebx*4] ; fg2rgb -ac4fg4: mov edx,[SPACEFILLER4+edx*4] ; fg2rgb -ac4bg3: add ebx,[SPACEFILLER4+ecx*4] ; bg2rgb -ac4bg4: add edx,[SPACEFILLER4+ebp*4] ; bg2rgb - mov ecx,ebx - or ebx,0x01f07c1f - and ecx,0x40100400 - and ebx,0x3fffffff - mov edi,ecx - shr ecx,5 - mov ebp,edx - sub edi,ecx - or edx,0x01f07c1f - or ebx,edi - mov ecx,ebx - shr ebx,15 - and ebp,0x40100400 - and ebx,ecx - and edx,0x3fffffff - mov edi,ebp - shr ebp,5 - mov cl,[RGB32k+ebx] - sub edi,ebp - mov [eax+2],cl - or edx,edi - mov edi,[esp] - mov ebp,edx - shr edx,15 - add esi,4 - and edx,ebp - mov cl,[RGB32k+edx] - mov [eax+3],cl - -ac4p: add eax,320 ; pitch - sub edi,1 - jne ac4loop - pop edi - - pop ebp - pop esi - pop ebx -ac4nil: pop edi - ret - -rtext_end: -%ifdef M_TARGET_MACHO -GLOBAL _rtext_tmap_end -_rtext_tmap_end: -%endif - align 16 - -;************************ - - SECTION .text - -GLOBAL R_SetupShadedCol -GLOBAL @R_SetupShadedCol@0 - -# Patch the values of dc_colormap and dc_color into the shaded column drawer. - -R_SetupShadedCol: -@R_SetupShadedCol@0: - mov eax,[dc_colormap] - cmp [s4cm1+3],eax - je .cmdone - mov [s4cm1+3],eax - mov [s4cm2+3],eax - mov [s4cm3+3],eax - mov [s4cm4+3],eax -.cmdone mov eax,[dc_color] - lea eax,[Col2RGB8+eax*4] - cmp [s4fg1+3],eax - je .cdone - mov [s4fg1+3],eax - mov [s4fg2+3],eax - mov [s4fg3+3],eax - mov [s4fg4+3],eax - selfmod s4cm1, s4fg4+7 -.cdone ret - -GLOBAL R_SetupAddCol -GLOBAL @R_SetupAddCol@0 - -# Patch the values of dc_colormap, dc_srcblend, and dc_destblend into the -# unclamped adding column drawer. - -R_SetupAddCol: -@R_SetupAddCol@0: - mov eax,[dc_colormap] - cmp [a4cm1+3],eax - je .cmdone - mov [a4cm1+3],eax - mov [a4cm2+3],eax - mov [a4cm3+3],eax - mov [a4cm4+3],eax -.cmdone mov eax,[dc_srcblend] - cmp [a4fg1+3],eax - je .sbdone - mov [a4fg1+3],eax - mov [a4fg2+3],eax - mov [a4fg3+3],eax - mov [a4fg4+3],eax -.sbdone mov eax,[dc_destblend] - cmp [a4bg1+3],eax - je .dbdone - mov [a4bg1+3],eax - mov [a4bg2+3],eax - mov [a4bg3+3],eax - mov [a4bg4+3],eax - selfmod a4cm1, a4bg4+7 -.dbdone ret - -GLOBAL R_SetupAddClampCol -GLOBAL @R_SetupAddClampCol@0 - -# Patch the values of dc_colormap, dc_srcblend, and dc_destblend into the -# add with clamping column drawer. - -R_SetupAddClampCol: -@R_SetupAddClampCol@0: - mov eax,[dc_colormap] - cmp [ac4cm1+3],eax - je .cmdone - mov [ac4cm1+3],eax - mov [ac4cm2+3],eax - mov [ac4cm3+3],eax - mov [ac4cm4+3],eax -.cmdone mov eax,[dc_srcblend] - cmp [ac4fg1+3],eax - je .sbdone - mov [ac4fg1+3],eax - mov [ac4fg2+3],eax - mov [ac4fg3+3],eax - mov [ac4fg4+3],eax -.sbdone mov eax,[dc_destblend] - cmp [ac4bg1+3],eax - je .dbdone - mov [ac4bg1+3],eax - mov [ac4bg2+3],eax - mov [ac4bg3+3],eax - mov [ac4bg4+3],eax - selfmod ac4cm1, ac4bg4+7 -.dbdone ret - -EXTERN setvlinebpl_ -EXTERN setpitch3 - -GLOBAL @ASM_PatchPitch@0 -GLOBAL _ASM_PatchPitch -GLOBAL ASM_PatchPitch - -ASM_PatchPitch: -_ASM_PatchPitch: -@ASM_PatchPitch@0: - mov eax,[dc_pitch] - mov [s4p+1],eax - mov [a4p+1],eax - mov [ac4p+1],eax - mov ecx,eax - neg ecx - inc ecx - inc ecx - mov [s4p2+2],ecx - inc ecx - mov [s4p3+2],ecx - selfmod rtext_start, rtext_end - call setpitch3 - jmp setvlinebpl_ diff --git a/src/asm_ia32/tmap2.asm b/src/asm_ia32/tmap2.asm deleted file mode 100644 index ab1695d3cd..0000000000 --- a/src/asm_ia32/tmap2.asm +++ /dev/null @@ -1,643 +0,0 @@ -;* -;* tmap2.nas -;* The tilted plane inner loop. -;* -;*--------------------------------------------------------------------------- -;* Copyright 1998-2006 Randy Heit -;* All rights reserved. -;* -;* Redistribution and use in source and binary forms, with or without -;* modification, are permitted provided that the following conditions -;* are met: -;* -;* 1. Redistributions of source code must retain the above copyright -;* notice, this list of conditions and the following disclaimer. -;* 2. Redistributions in binary form must reproduce the above copyright -;* notice, this list of conditions and the following disclaimer in the -;* documentation and/or other materials provided with the distribution. -;* 3. The name of the author may not be used to endorse or promote products -;* derived from this software without specific prior written permission. -;* -;* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR -;* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES -;* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. -;* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, -;* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT -;* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -;* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -;* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -;* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF -;* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -;*--------------------------------------------------------------------------- -;* -;* I tried doing the ROL trick that R_DrawSpanP_ASM uses, and it was -;* actually slightly slower than the more straight-forward approach -;* used here, probably because the trick requires too much setup time. -;* - -BITS 32 - -%include "valgrind.inc" - -%define SPACEFILLER4 (0x44444444) - -%ifndef M_TARGET_LINUX - -%define plane_sz _plane_sz -%define plane_su _plane_su -%define plane_sv _plane_sv -%define plane_shade _plane_shade -%define planelightfloat _planelightfloat -%define spanend _spanend -%define ylookup _ylookup -%define dc_destorg _dc_destorg -%define ds_colormap _ds_colormap -%define ds_source _ds_source -%define centery _centery -%define centerx _centerx -%define ds_curtiltedsource _ds_curtiltedsource -%define pviewx _pviewx -%define pviewy _pviewy -%define tiltlighting _tiltlighting - -%define R_DrawTiltedPlane_ASM _R_DrawTiltedPlane_ASM -%define R_SetTiltedSpanSource_ASM _R_SetTiltedSpanSource_ASM -%define R_CalcTiltedLighting _R_CalcTiltedLighting - -%endif - -EXTERN plane_sz -EXTERN plane_su -EXTERN plane_sv -EXTERN planelightfloat -EXTERN spanend -EXTERN ylookup -EXTERN dc_destorg -EXTERN ds_colormap -EXTERN centery -EXTERN centerx -EXTERN ds_source -EXTERN plane_shade -EXTERN pviewx -EXTERN pviewy -EXTERN tiltlighting -EXTERN R_CalcTiltedLighting - -GLOBAL ds_curtiltedsource - -%define sv_i plane_sv -%define sv_j plane_sv+4 -%define sv_k plane_sv+8 - -%define su_i plane_su -%define su_j plane_su+4 -%define su_k plane_su+8 - -%define sz_i plane_sz -%define sz_j plane_sz+4 -%define sz_k plane_sz+8 - -%define SPANBITS 3 - - section .bss - -start_u: resq 1 -start_v: resq 1 -step_u: resq 1 -step_v: resq 1 - -step_iz: resq 1 -step_uz: resq 1 -step_vz: resq 1 - -end_z: resd 1 - - section .data - -ds_curtiltedsource: dd SPACEFILLER4 - -fp_1: -spanrecips: dd 0x3f800000 ; 1/1 - dd 0x3f000000 ; 1/2 - dd 0x3eaaaaab ; 1/3 - dd 0x3e800000 ; 1/4 - dd 0x3e4ccccd ; 1/5 - dd 0x3e2aaaab ; 1/6 - dd 0x3e124925 ; 1/7 -fp_8recip: dd 0x3e000000 ; 1/8 - dd 0x3de38e39 ; 1/9 - dd 0x3dcccccd ; 1/10 - dd 0x3dba2e8c ; 1/11 - dd 0x3daaaaab ; 1/12 - dd 0x3d9d89d9 ; 1/13 - dd 0x3d924925 ; 1/14 - dd 0x3d888889 ; 1/15 - -fp_quickint: dd 0x3f800000 ; 1 - dd 0x40000000 ; 2 - dd 0x40400000 ; 3 - dd 0x40800000 ; 4 - dd 0x40a00000 ; 5 - dd 0x40c00000 ; 6 - dd 0x40e00000 ; 7 -fp_8: dd 0x41000000 ; 8 - - section .text - -GLOBAL R_SetTiltedSpanSource_ASM -GLOBAL @R_SetTiltedSpanSource_ASM@4 - -R_SetTiltedSpanSource_ASM: - mov ecx,[esp+4] - -@R_SetTiltedSpanSource_ASM@4: - mov [fetch1+3],ecx - mov [fetch2+3],ecx - mov [fetch3+3],ecx - mov [fetch4+3],ecx - mov [fetch5+3],ecx - mov [fetch6+3],ecx - mov [fetch7+3],ecx - mov [fetch8+3],ecx - mov [fetch9+3],ecx - mov [fetch10+3],ecx - mov [ds_curtiltedsource],ecx - selfmod rtext_start, rtext_end - ret - -GLOBAL SetTiltedSpanSize - -SetTiltedSpanSize: - push ecx - mov cl,dl - neg cl - mov eax,1 - shl eax,cl - mov cl,[esp] - neg cl - mov [x1+2],cl - mov [x2+2],cl - mov [x3+2],cl - mov [x4+2],cl - mov [x5+2],cl - mov [x6+2],cl - mov [x7+2],cl - mov [x8+2],cl - mov [x9+2],cl - mov [x10+2],cl - - sub cl,dl - dec eax - mov [y1+2],cl - mov [y2+2],cl - mov [y3+2],cl - mov [y4+2],cl - mov [y5+2],cl - mov [y6+2],cl - mov [y7+2],cl - mov [y8+2],cl - mov [y9+2],cl - mov [y10+2],cl - cmp eax,0 ; if x bits is 0, mask must be 0 too. - jz .notted - not eax -.notted: - pop ecx - - mov [m1+2],eax - mov [m2+2],eax - mov [m3+2],eax - mov [m4+2],eax - mov [m5+2],eax - mov [m6+2],eax - mov [m7+2],eax - mov [m8+2],eax - mov [m9+2],eax - mov [m10+2],eax - - selfmod rtext_start, rtext_end - - ret - -%ifndef M_TARGET_MACHO - SECTION .rtext progbits alloc exec write align=64 -%else - SECTION .text align=64 -GLOBAL _rtext_tmap2_start -_rtext_tmap2_start: -%endif - -rtext_start: - -GLOBAL R_DrawTiltedPlane_ASM -GLOBAL @R_DrawTiltedPlane_ASM@8 - -R_DrawTiltedPlane_ASM: - mov ecx,[esp+4] - mov edx,[esp+8] - - ; ecx = y - ; edx = x - -@R_DrawTiltedPlane_ASM@8: - push ebx - push esi - push edi - push ebp - - mov eax,[centery] - movzx ebx,word [spanend+ecx*2] - sub eax,ecx ; eax = centery-y - sub ebx,edx ; ebx = span length - 1 - mov edi,[ylookup+ecx*4] - push eax - add edi,[dc_destorg] - add edi,edx ; edi = frame buffer pointer - sub edx,[centerx] ; edx = x-centerx - push edx - xor eax,eax - - fild dword [esp+4] ; ymul - fild dword [esp] ; xmul | ymul - fld dword [sv_j] ; sv.j | xmul | ymul - fmul st0,st2 ; sv.j*ymul | xmul | ymul - fld dword [su_j] ; su.j | sv.j*ymul | xmul | ymul - fmul st0,st3 ; su.j*ymul | sv.j*ymul | xmul | ymul - fld dword [sz_j] ; sz.j | su.j*ymul | sv.j*ymul | xmul | ymul - fmulp st4,st0 ; su.j*ymul | sv.j*ymul | xmul | sz.j*ymul - fld dword [sv_i] ; sv.i | su.j*ymul | sv.j*ymul | xmul | sz.j*ymul - fmul st0,st3 ; sv.i*xmul | su.j*ymul | sv.j*ymul | xmul | sz.j*ymul - fld dword [su_i] ; su.i | sv.i*xmul | su.j*ymul | sv.j*ymul | xmul | sz.j*ymul - fmul st0,st4 ; su.i*xmul | sv.i*xmul | su.j*ymul | sv.j*ymul | xmul | sz.j*ymul - fld dword [sz_i] ; sz.i | su.i*xmul | sv.i*xmul | su.j*ymul | sv.j*ymul | xmul | sz.j*ymul - fmulp st5,st0 ; su.i*xmul | sv.i*xmul | su.j*ymul | sv.j*ymul | sz.i*xmul | sz.j*ymul - fxch st1 ; sv.i*xmul | su.i*xmul | su.j*ymul | sv.j*ymul | sz.i*xmul | sz.j*ymul - faddp st3,st0 ; su.i*xmul | su.j*ymul | sv.i*xmul+sv.j*ymul | sz.i*xmul | sz.j*ymul - faddp st1,st0 ; su.i*xmul+su.j*ymul | sv.i*xmul+sv.j*ymul | sz.i*xmul | sz.j*ymul - fxch st3 ; sz.j*ymul | sv.i*xmul+sv.j*ymul | sz.i*xmul | su.i*xmul+su.j*ymul - faddp st2,st0 ; sv.i*xmul+sv.j*ymul | sz.i*xmul+sz.j*ymul | su.i*xmul+su.j*ymul - fadd dword [sv_k] ; v/z | sz.i*xmul+sz.j*ymul | su.i*xmul+su.j*ymul - fxch st1 ; sz.i*xmul+sz.j*ymul | v/z | su.i*xmul+su.j*ymul - fadd dword [sz_k] ; 1/z | v/z | su.i*xmul+su.j*ymul - fxch st2 ; su.i*xmul+su.j*ymul | v/z | 1/z - fadd dword [su_k] ; u/z | v/z | 1/z - fxch st2 ; 1/z | v/z | u/z - fxch st1 ; v/z | 1/z | u/z - -; if lighting is on, fill out the light table - mov al,[plane_shade] - test al,al - jz .litup - - push ebx - fild dword [esp] ; width | v/z | 1/z | u/z - fmul dword [sz_i] ; width*sz.i | v/z | 1/z | u/z - fadd st0,st2 ; 1/endz | v/z | 1/z | u/z - fld st2 ; 1/z | 1/endz | v/z | 1/z | u/z - fmul dword [planelightfloat] - fxch st1 - fmul dword [planelightfloat] - sub esp,16 - fstp qword [esp] - fstp qword [esp+8] - call R_CalcTiltedLighting - add esp, 20 - xor eax, eax - -.litup add esp, 8 - -; calculate initial z, u, and v values - fld st1 ; 1/z | v/z | 1/z | u/z - fdivr dword [fp_1] ; z | v/z | 1/z | u/z - - fld st3 ; u/z | z | v/z | 1/z | u/z - fmul st0,st1 ; u | z | v/z | 1/z | u/z - fld st2 ; v/z | u | z | v/z | 1/z | u/z - fmulp st2,st0 ; u | v | v/z | 1/z | u/z - fld st0 - fistp qword [start_u] - fld st1 - fistp qword [start_v] - - cmp ebx,7 ; Do we have at least 8 pixels to plot? - jl near ShortStrip - -; yes, we do, so figure out tex coords at end of this span - -; multiply i values by span length (8) - fld dword [su_i] ; su.i - fmul dword [fp_8] ; su.i*8 - fld dword [sv_i] ; sv.i | su.i*8 - fmul dword [fp_8] ; sv.i*8 | su.i*8 - fld dword [sz_i] ; sz.i | sv.i*8 | su.i*8 - fmul dword [fp_8] ; sz.i*8 | sv.i*8 | su.i*8 - fxch st2 ; su.i*8 | sv.i*8 | sz.i*8 - fstp qword [step_uz] ; sv.i*8 | sz.i*8 - fstp qword [step_vz] ; sz.i*8 - fst qword [step_iz] ; sz.i*8 - -; find tex coords at start of next span - faddp st4 - fld qword [step_vz] - faddp st3 - fld qword [step_uz] - faddp st5 - - fld st3 ; 1/z | u | v | v/z | 1/z | u/z - fdivr dword [fp_1] ; z | u | v | v/z | 1/z | u/z - fst dword [end_z] - fld st5 ; u/z | z | u | v | v/z | 1/z | u/z - fmul st0,st1 ; u' | z | u | v | v/z | 1/z | u/z - fxch st1 ; z | u' | u | v | v/z | 1/z | u/z - fmul st0,st4 ; v' | u' | u | v | v/z | 1/z | u/z - fxch st3 ; v | u' | u | v' | v/z | 1/z | u/z - -; now subtract to get stepping values for this span - fsubr st0,st3 ; v'-v | u' | u | v' | v/z | 1/z | u/z - fxch st2 ; u | u' | v'-v | v' | v/z | 1/z | u/z - fsubr st0,st1 ; u'-u | u' | v'-v | v' | v/z | 1/z | u/z - fxch st2 ; v'-v | u' | u'-u | v' | v/z | 1/z | u/z - fmul dword [fp_8recip] ; vstep | u' | u'-u | v' | v/z | 1/z | u/z - fxch st1 ; u' | vstep | u'-u | v' | v/z | 1/z | u/z - fxch st2 ; u'-u | vstep | u' | v' | v/z | 1/z | u/z - fmul dword [fp_8recip] ; ustep | vstep | u' | v' | v/z | 1/z | u/z - fxch st1 ; vstep | ustep | u' | v' | v/z | 1/z | u/z - fistp qword [step_v] ; ustep | u' | v' | v/z | 1/z | u/z - fistp qword [step_u] ; u | v | v/z | 1/z | u/z - -FullSpan: - xor eax,eax - cmp ebx,15 ; is there another complete span after this one? - jl NextIsShort - -; there is a complete span after this one - fld qword [step_iz] - faddp st4,st0 - fld qword [step_vz] - faddp st3,st0 - fld qword [step_uz] - faddp st5,st0 - jmp StartDiv - -NextIsShort: - cmp ebx,8 ; if next span is no more than 1 pixel, then we already - jle DrawFullSpan ; know everything we need to draw it - - fld dword [sz_i] ; sz.i | u | v | v/z | 1/z | u/z - fmul dword [fp_quickint-8*4+ebx*4] - fld dword [sv_i] ; sv.i | sz.i | u | v | v/z | 1/z | u/z - fmul dword [fp_quickint-8*4+ebx*4] - fld dword [su_i] ; su.i | sv.i | sz.i | u | v | v/z | 1/z | u/z - fmul dword [fp_quickint-8*4+ebx*4] - fxch st2 ; sz.i | sv.i | su.i | u | v | v/z | 1/z | u/z - faddp st6,st0 ; sv.i | su.i | u | v | v/z | 1/z | u/z - faddp st4,st0 ; su.i | u | v | v/z | 1/z | u/z - faddp st5,st0 ; u | v | v/z | 1/z | u/z - -StartDiv: - fld st3 ; 1/z | u | v | v/z | 1/z | u/z - fdivr dword [fp_1] ; z | u | v | v/z | 1/z | u/z - -DrawFullSpan: - mov ecx,[start_v] - mov edx,[start_u] - - add ecx,[pviewy] - add edx,[pviewx] - - mov esi,edx - mov ebp,ecx -x1 shr ebp,26 -m1 and esi,0xfc000000 -y1 shr esi,20 - add ecx,[step_v] - add edx,[step_u] -fetch1 mov al,[ebp+esi+SPACEFILLER4] - mov ebp,[tiltlighting+ebx*4] - mov esi,edx - mov al,[ebp+eax] - mov ebp,ecx - mov [edi+0],al - -x2 shr ebp,26 -m2 and esi,0xfc000000 -y2 shr esi,20 - add ecx,[step_v] - add edx,[step_u] -fetch2 mov al,[ebp+esi+SPACEFILLER4] - mov ebp,[tiltlighting+ebx*4-4] - mov esi,edx - mov al,[ebp+eax] - mov ebp,ecx - mov [edi+1],al - -x3 shr ebp,26 -m3 and esi,0xfc000000 -y3 shr esi,20 - add ecx,[step_v] - add edx,[step_u] -fetch3 mov al,[ebp+esi+SPACEFILLER4] - mov ebp,[tiltlighting+ebx*4-8] - mov esi,edx - mov al,[ebp+eax] - mov ebp,ecx - mov [edi+2],al - -x4 shr ebp,26 -m4 and esi,0xfc000000 -y4 shr esi,20 - add ecx,[step_v] - add edx,[step_u] -fetch4 mov al,[ebp+esi+SPACEFILLER4] - mov ebp,[tiltlighting+ebx*4-12] - mov esi,edx - mov al,[ebp+eax] - mov ebp,ecx - mov [edi+3],al - -x5 shr ebp,26 -m5 and esi,0xfc000000 -y5 shr esi,20 - add ecx,[step_v] - add edx,[step_u] -fetch5 mov al,[ebp+esi+SPACEFILLER4] - mov ebp,[tiltlighting+ebx*4-16] - mov esi,edx - mov al,[ebp+eax] - mov ebp,ecx - mov [edi+4],al - -x6 shr ebp,26 -m6 and esi,0xfc000000 -y6 shr esi,20 - add ecx,[step_v] - add edx,[step_u] -fetch6 mov al,[ebp+esi+SPACEFILLER4] - mov ebp,[tiltlighting+ebx*4-20] - mov esi,edx - mov al,[ebp+eax] - mov ebp,ecx - mov [edi+5],al - -x7 shr ebp,26 -m7 and esi,0xfc000000 -y7 shr esi,20 - add ecx,[step_v] - add edx,[step_u] -fetch7 mov al,[ebp+esi+SPACEFILLER4] - mov ebp,[tiltlighting+ebx*4-24] -x8 shr ecx,26 - mov al,[ebp+eax] -m8 and edx,0xfc000000 - mov [edi+6],al - -y8 shr edx,20 - mov ebp,[tiltlighting+ebx*4-28] -fetch8 mov al,[edx+ecx+SPACEFILLER4] - mov al,[ebp+eax] - mov [edi+7],al - add edi,8 - - sub ebx,8 - jl near Done - - fld st1 - fistp qword [start_u] - fld st2 - fistp qword [start_v] - - cmp ebx,7 - jl near EndIsShort - - fst dword [end_z] - fld st5 ; u/z | z | u | v | v/z | 1/z | u/z - fmul st0,st1 ; u' | z | u | v | v/z | 1/z | u/z - fxch st1 ; z | u' | u | v | v/z | 1/z | u/z - fmul st0,st4 ; v' | u' | u | v | v/z | 1/z | u/z - fxch st3 ; v | u' | u | v' | v/z | 1/z | u/z - fsubr st0,st3 ; v'-v | u' | u | v' | v/z | 1/z | u/z - fxch st2 ; u | u' | v'-v | v' | v/z | 1/z | u/z - fsubr st0,st1 ; u'-u | u' | v'-v | v' | v/z | 1/z | u/z - fxch st2 ; v'-v | u' | u'-u | v' | v/z | 1/z | u/z - fmul dword [fp_8recip] ; vstep | u' | u'-u | v' | v/z | 1/z | u/z - fxch st1 ; u' | vstep | u'-u | v' | v/z | 1/z | u/z - fxch st2 ; u'-u | vstep | u' | v' | v/z | 1/z | u/z - fmul dword [fp_8recip] ; ustep | vstep | u' | v' | v/z | 1/z | u/z - fxch st1 ; vstep | ustep | u' | v' | v/z | 1/z | u/z - fistp qword [step_v] ; ustep | u' | v' | v/z | 1/z | u/z - fistp qword [step_u] ; u | v | v/z | 1/z | u/z - jmp FullSpan - -OnlyOnePixelAtEnd: - fld st0 - fistp qword [start_u] - fld st1 - fistp qword [start_v] - -OnlyOnePixel: - mov edx,[start_v] - mov ecx,[start_u] - add edx,[pviewy] - add ecx,[pviewx] -x9 shr edx,26 -m9 and ecx,0xfc000000 -y9 shr ecx,20 - mov ebp,[tiltlighting] -fetch9 mov al,[ecx+edx+SPACEFILLER4] - mov al,[ebp+eax] - mov [edi],al - -Done: - fcompp - fcompp - fstp st0 - - pop ebp - pop edi - pop esi - pop ebx - ret - -ShortStrip: - cmp ebx,0 - jle near OnlyOnePixel - -MoreThanOnePixel: - fld dword [sz_i] ; sz.i | u | v | v/z | 1/z | u/z - fmul dword [fp_quickint+ebx*4] - fld dword [sv_i] ; sv.i | sz.i | u | v | v/z | 1/z | u/z - fmul dword [fp_quickint+ebx*4] - fld dword [su_i] ; su.i | sv.i | sz.i | u | v | v/z | 1/z | u/z - fmul dword [fp_quickint+ebx*4] - fxch st2 ; sz.i | sv.i | su.i | u | v | v/z | 1/z | u/z - faddp st6,st0 ; sv.i | su.i | u | v | v/z | 1/z | u/z - faddp st4,st0 ; su.i | u | v | v/z | 1/z | u/z - faddp st5,st0 ; u | v | v/z | 1/z | u/z - fld st3 ; 1/z | u | v | v/z | 1/z | u/z - fdivr dword [fp_1] ; z | u | v | v/z | 1/z | u/z - jmp CalcPartialSteps - -EndIsShort: - cmp ebx,0 - je near OnlyOnePixelAtEnd - -CalcPartialSteps: - fst dword [end_z] - fld st5 ; u/z | z | u | v | v/z | 1/z | u/z - fmul st0,st1 ; u' | z | u | v | v/z | 1/z | u/z - fxch st1 ; z | u' | u | v | v/z | 1/z | u/z - fmul st0,st4 ; v' | u' | u | v | v/z | 1/z | u/z - fxch st1 ; u' | v' | u | v | v/z | 1/z | u/z - fsubrp st2,st0 ; v' | u'-u | v | v/z | 1/z | u/z - fsubrp st2,st0 ; u'-u | v'-v | v/z | 1/z | u/z - fmul dword [spanrecips+ebx*4] ;ustep | v'-v | v/z | 1/z | u/z - fxch st1 ; v'-v | ustep | v/z | 1/z | u/z - fmul dword [spanrecips+ebx*4] ;vstep | ustep | v/z | 1/z | u/z - fxch st1 ; ustep | vstep | v/z | 1/z | u/z - fistp qword [step_u] ; vstep | v/z | 1/z | u/z - fistp qword [step_v] ; v/z | 1/z | u/z - - mov ecx,[start_v] - mov edx,[start_u] - - add ecx,[pviewy] - add edx,[pviewx] - - mov esi,edx - mov ebp,ecx -endloop: -x10 shr ebp,26 -m10 and esi,0xfc000000 - -y10 shr esi,20 - inc edi - - add ecx,[step_v] - add edx,[step_u] - -fetch10 mov al,[ebp+esi+SPACEFILLER4] - mov ebp,[tiltlighting+ebx*4] - - mov esi,edx - dec ebx - - mov al,[ebp+eax] - mov ebp,ecx - - mov [edi-1],al - jge endloop - - fcompp - fstp st0 - - pop ebp - pop edi - pop esi - pop ebx - ret - -rtext_end: -%ifdef M_TARGET_MACHO -GLOBAL _rtext_tmap2_end -_rtext_tmap2_end: -%endif diff --git a/src/asm_ia32/tmap3.asm b/src/asm_ia32/tmap3.asm deleted file mode 100644 index bafc33627f..0000000000 --- a/src/asm_ia32/tmap3.asm +++ /dev/null @@ -1,344 +0,0 @@ -%include "valgrind.inc" - -%ifdef M_TARGET_WATCOM - SEGMENT DATA PUBLIC ALIGN=16 CLASS=DATA USE32 - SEGMENT DATA -%else - SECTION .data -%endif - -%ifndef M_TARGET_LINUX -%define ylookup _ylookup -%define vplce _vplce -%define vince _vince -%define palookupoffse _palookupoffse -%define bufplce _bufplce -%define dc_iscale _dc_iscale -%define dc_colormap _dc_colormap -%define dc_count _dc_count -%define dc_dest _dc_dest -%define dc_source _dc_source -%define dc_texturefrac _dc_texturefrac -%define dc_pitch _dc_pitch - -%define setupvlinetallasm _setupvlinetallasm -%define vlinetallasm4 _vlinetallasm4 -%define vlinetallasmathlon4 _vlinetallasmathlon4 -%define vlinetallasm1 _vlinetallasm1 -%define prevlinetallasm1 _prevlinetallasm1 -%endif - -EXTERN vplce -EXTERN vince -EXTERN palookupoffse -EXTERN bufplce - -EXTERN ylookup -EXTERN dc_iscale -EXTERN dc_colormap -EXTERN dc_count -EXTERN dc_dest -EXTERN dc_source -EXTERN dc_texturefrac -EXTERN dc_pitch - -GLOBAL vlt4pitch -GLOBAL vlt1pitch - -%ifdef M_TARGET_WATCOM - SEGMENT CODE PUBLIC ALIGN=16 CLASS=CODE USE32 - SEGMENT CODE -%else - SECTION .text -%endif - -ALIGN 16 -GLOBAL setpitch3 -setpitch3: - mov [vltpitch+2], eax - mov [vltpitcha+2],eax - mov [vlt1pitch1+2], eax - mov [vlt1pitch2+2], eax - selfmod vltpitch, vlt1pitch2+6 - ret - -ALIGN 16 -GLOBAL setupvlinetallasm -setupvlinetallasm: - mov ecx, [esp+4] - mov [shifter1+2], cl - mov [shifter2+2], cl - mov [shifter3+2], cl - mov [shifter4+2], cl - mov [shifter1a+2], cl - mov [shifter2a+2], cl - mov [shifter3a+2], cl - mov [shifter4a+2], cl - mov [preshift+2], cl - mov [shift11+2], cl - mov [shift12+2], cl - selfmod shifter1, shift12+6 - ret - -%ifdef M_TARGET_MACHO - SECTION .text align=64 -GLOBAL _rtext_tmap3_start -_rtext_tmap3_start: -%else - SECTION .rtext progbits alloc exec write align=64 -%endif - -ALIGN 16 - -GLOBAL vlinetallasm4 -vlinetallasm4: - push ebx - mov eax, [bufplce+0] - mov ebx, [bufplce+4] - mov ecx, [bufplce+8] - mov edx, [bufplce+12] - mov [source1+3], eax - mov [source2+3], ebx - mov [source3+3], ecx - mov [source4+3], edx - mov eax, [palookupoffse+0] - mov ebx, [palookupoffse+4] - mov ecx, [palookupoffse+8] - mov edx, [palookupoffse+12] - mov [lookup1+2], eax - mov [lookup2+2], ebx - mov [lookup3+2], ecx - mov [lookup4+2], edx - mov eax, [vince+0] - mov ebx, [vince+4] - mov ecx, [vince+8] - mov edx, [vince+12] - mov [step1+2], eax - mov [step2+2], ebx - mov [step3+2], ecx - mov [step4+1], edx - push ebp - push esi - push edi - mov ecx, [dc_count] - mov edi, [dc_dest] - mov eax, dword [ylookup+ecx*4-4] - add eax, edi - sub edi, eax - mov [write1+2],eax - inc eax - mov [write2+2],eax - inc eax - mov [write3+2],eax - inc eax - mov [write4+2],eax - mov ebx, [vplce] - mov ecx, [vplce+4] - mov esi, [vplce+8] - mov eax, [vplce+12] - selfmod loopit, vltpitch - jmp loopit - -ALIGN 16 -loopit: - mov edx, ebx -shifter1: shr edx, 24 -source1: movzx edx, BYTE [edx+0x88888888] -lookup1: mov dl, [edx+0x88888888] -write1: mov [edi+0x88888880], dl -step1: add ebx, 0x88888888 - mov edx, ecx -shifter2: shr edx, 24 -source2: movzx edx, BYTE [edx+0x88888888] -lookup2: mov dl, [edx+0x88888888] -write2: mov [edi+0x88888881], dl -step2: add ecx, 0x88888888 - mov edx, esi -shifter3: shr edx, 24 -source3: movzx edx, BYTE [edx+0x88888888] -lookup3: mov dl, BYTE [edx+0x88888888] -write3: mov [edi+0x88888882], dl -step3: add esi, 0x88888888 - mov edx, eax -shifter4: shr edx, 24 -source4: movzx edx, BYTE [edx+0x88888888] -lookup4: mov dl, [edx+0x88888888] -write4: mov [edi+0x88888883], dl -step4: add eax, 0x88888888 -vltpitch: add edi, 320 - jle near loopit - - mov [vplce], ebx - mov [vplce+4], ecx - mov [vplce+8], esi - mov [vplce+12], eax - - pop edi - pop esi - pop ebp - pop ebx - - ret - - ALIGN 16 - -GLOBAL vlinetallasmathlon4 -vlinetallasmathlon4: - push ebx - mov eax, [bufplce+0] - mov ebx, [bufplce+4] - mov ecx, [bufplce+8] - mov edx, [bufplce+12] - mov [source1a+3], eax - mov [source2a+3], ebx - mov [source3a+3], ecx - mov [source4a+3], edx - mov eax, [palookupoffse+0] - mov ebx, [palookupoffse+4] - mov ecx, [palookupoffse+8] - mov edx, [palookupoffse+12] - mov [lookup1a+2], eax - mov [lookup2a+2], ebx - mov [lookup3a+2], ecx - mov [lookup4a+2], edx - mov eax, [vince+0] - mov ebx, [vince+4] - mov ecx, [vince+8] - mov edx, [vince+12] - mov [step1a+2], eax - mov [step2a+2], ebx - mov [step3a+2], ecx - mov [step4a+1], edx - push ebp - push esi - push edi - mov ecx, [dc_count] - mov edi, [dc_dest] - mov eax, dword [ylookup+ecx*4-4] - add eax, edi - sub edi, eax - mov [write1a+2],eax - inc eax - mov [write2a+2],eax - inc eax - mov [write3a+2],eax - inc eax - mov [write4a+2],eax - mov ebp, [vplce] - mov ecx, [vplce+4] - mov esi, [vplce+8] - mov eax, [vplce+12] - selfmod loopita, vltpitcha - jmp loopita - -; Unfortunately, this code has not been carefully analyzed to determine -; how well it utilizes the processor's instruction units. Instead, I just -; kept rearranging code, seeing what sped it up and what slowed it down -; until I arrived at this. The is the fastest version I was able to -; manage, but that does not mean it cannot be made faster with careful -; instructing shuffling. - - ALIGN 64 - -loopita: mov edx, ebp - mov ebx, ecx -shifter1a: shr edx, 24 -shifter2a: shr ebx, 24 -source1a: movzx edx, BYTE [edx+0x88888888] -source2a: movzx ebx, BYTE [ebx+0x88888888] -step1a: add ebp, 0x88888888 -step2a: add ecx, 0x88888888 -lookup1a: mov dl, [edx+0x88888888] -lookup2a: mov dh, [ebx+0x88888888] - mov ebx, esi -write1a: mov [edi+0x88888880], dl -write2a: mov [edi+0x88888881], dh -shifter3a: shr ebx, 24 - mov edx, eax -source3a: movzx ebx, BYTE [ebx+0x88888888] -shifter4a: shr edx, 24 -step3a: add esi, 0x88888888 -source4a: movzx edx, BYTE [edx+0x88888888] -step4a: add eax, 0x88888888 -lookup3a: mov bl, [ebx+0x88888888] -lookup4a: mov dl, [edx+0x88888888] -write3a: mov [edi+0x88888882], bl -write4a: mov [edi+0x88888883], dl -vltpitcha: add edi, 320 - jle near loopita - - mov [vplce], ebp - mov [vplce+4], ecx - mov [vplce+8], esi - mov [vplce+12], eax - - pop edi - pop esi - pop ebp - pop ebx - - ret - -ALIGN 16 -GLOBAL prevlinetallasm1 -prevlinetallasm1: - mov ecx, [dc_count] - cmp ecx, 1 - ja vlinetallasm1 - - mov eax, [dc_iscale] - mov edx, [dc_texturefrac] - add eax, edx - mov ecx, [dc_source] -preshift: shr edx, 16 - push ebx - push edi - mov edi, [dc_colormap] - movzx ebx, byte [ecx+edx] - mov ecx, [dc_dest] - mov bl, byte [edi+ebx] - pop edi - mov byte [ecx], bl - pop ebx - ret - -ALIGN 16 -GLOBAL vlinetallasm1 -vlinetallasm1: - push ebp - push ebx - push edi - push esi - - mov ebp, [dc_count] - mov ebx, [dc_texturefrac] ; ebx = frac - mov edi, [dc_dest] - mov ecx, ebx -shift11: shr ecx, 16 - mov esi, [dc_source] - mov edx, [dc_iscale] -vlt1pitch1: sub edi, 0x88888888 - mov eax, [dc_colormap] - -loop2: - movzx ecx, BYTE [esi+ecx] - add ebx, edx -vlt1pitch2: add edi, 0x88888888 - mov cl,[eax+ecx] - mov [edi],cl - mov ecx,ebx -shift12: shr ecx,16 - dec ebp - jnz loop2 - - mov eax,ebx - pop esi - pop edi - pop ebx - pop ebp - ret - -%ifdef M_TARGET_MACHO -GLOBAL _rtext_tmap3_end -_rtext_tmap3_end: -%endif diff --git a/src/asm_x86_64/tmap3.asm b/src/asm_x86_64/tmap3.asm deleted file mode 100644 index e0f568fea1..0000000000 --- a/src/asm_x86_64/tmap3.asm +++ /dev/null @@ -1,150 +0,0 @@ -%ifnidn __OUTPUT_FORMAT__,win64 -%error tmap3.asm is for Win64 output. You should use tmap.s for other systems. -%endif - -BITS 64 -DEFAULT REL - -EXTERN vplce -EXTERN vince -EXTERN palookupoffse -EXTERN bufplce - -EXTERN dc_count -EXTERN dc_dest -EXTERN dc_pitch - -SECTION .text - -GLOBAL ASM_PatchPitch -ASM_PatchPitch: - mov ecx, [dc_pitch] - mov [pm+3], ecx - mov [vltpitch+3], ecx - ret - align 16 - -GLOBAL setupvlinetallasm -setupvlinetallasm: - mov [shifter1+2], cl - mov [shifter2+2], cl - mov [shifter3+2], cl - mov [shifter4+2], cl - ret - align 16 - -; Yasm can't do progbits alloc exec for win64? -; Hmm, looks like it's automatic. No worries, then. -SECTION .rtext write ;progbits alloc exec - -GLOBAL vlinetallasm4 -PROC_FRAME vlinetallasm4 - rex_push_reg rbx - push_reg rdi - push_reg r15 - push_reg r14 - push_reg r13 - push_reg r12 - push_reg rbp - push_reg rsi - alloc_stack 8 ; Stack must be 16-byte aligned -END_PROLOGUE -; rax = bufplce base address -; rbx = -; rcx = offset from rdi/count (negative) -; edx/rdx = scratch -; rdi = bottom of columns to write to -; r8d-r11d = column offsets -; r12-r15 = palookupoffse[0] - palookupoffse[4] - - mov ecx, [dc_count] - mov rdi, [dc_dest] - test ecx, ecx - jle vltepilog ; count must be positive - - mov rax, [bufplce] - mov r8, [bufplce+8] - sub r8, rax - mov r9, [bufplce+16] - sub r9, rax - mov r10, [bufplce+24] - sub r10, rax - mov [source2+4], r8d - mov [source3+4], r9d - mov [source4+4], r10d - -pm: imul rcx, 320 - - mov r12, [palookupoffse] - mov r13, [palookupoffse+8] - mov r14, [palookupoffse+16] - mov r15, [palookupoffse+24] - - mov r8d, [vince] - mov r9d, [vince+4] - mov r10d, [vince+8] - mov r11d, [vince+12] - mov [step1+3], r8d - mov [step2+3], r9d - mov [step3+3], r10d - mov [step4+3], r11d - - add rdi, rcx - neg rcx - - mov r8d, [vplce] - mov r9d, [vplce+4] - mov r10d, [vplce+8] - mov r11d, [vplce+12] - jmp loopit - -ALIGN 16 -loopit: - mov edx, r8d -shifter1: shr edx, 24 -step1: add r8d, 0x88888888 - movzx edx, BYTE [rax+rdx] - mov ebx, r9d - mov dl, [r12+rdx] -shifter2: shr ebx, 24 -step2: add r9d, 0x88888888 -source2: movzx ebx, BYTE [rax+rbx+0x88888888] - mov ebp, r10d - mov bl, [r13+rbx] -shifter3: shr ebp, 24 -step3: add r10d, 0x88888888 -source3: movzx ebp, BYTE [rax+rbp+0x88888888] - mov esi, r11d - mov bpl, BYTE [r14+rbp] -shifter4: shr esi, 24 -step4: add r11d, 0x88888888 -source4: movzx esi, BYTE [rax+rsi+0x88888888] - mov [rdi+rcx], dl - mov [rdi+rcx+1], bl - mov sil, BYTE [r15+rsi] - mov [rdi+rcx+2], bpl - mov [rdi+rcx+3], sil - -vltpitch: add rcx, 320 - jl loopit - - mov [vplce], r8d - mov [vplce+4], r9d - mov [vplce+8], r10d - mov [vplce+12], r11d - -vltepilog: - add rsp, 8 - pop rsi - pop rbp - pop r12 - pop r13 - pop r14 - pop r15 - pop rdi - pop rbx - ret -vlinetallasm4_end: -ENDPROC_FRAME - ALIGN 16 - diff --git a/src/asm_x86_64/tmap3.s b/src/asm_x86_64/tmap3.s deleted file mode 100644 index 867d11c759..0000000000 --- a/src/asm_x86_64/tmap3.s +++ /dev/null @@ -1,141 +0,0 @@ -#%include "valgrind.inc" - - .section .text - -.globl ASM_PatchPitch -ASM_PatchPitch: - movl dc_pitch(%rip), %ecx - movl %ecx, pm+3(%rip) - movl %ecx, vltpitch+3(%rip) -# selfmod pm, vltpitch+6 - ret - .align 16 - -.globl setupvlinetallasm -setupvlinetallasm: - movb %dil, shifter1+2(%rip) - movb %dil, shifter2+2(%rip) - movb %dil, shifter3+2(%rip) - movb %dil, shifter4+2(%rip) -# selfmod shifter1, shifter4+3 - ret - .align 16 - - .section .rtext,"awx" - -.globl vlinetallasm4 - .type vlinetallasm4,@function -vlinetallasm4: - .cfi_startproc - push %rbx - push %rdi - push %r15 - push %r14 - push %r13 - push %r12 - push %rbp - push %rsi - subq $8, %rsp # Does the stack need to be 16-byte aligned for Linux? - .cfi_adjust_cfa_offset 8 - -# rax = bufplce base address -# rbx = -# rcx = offset from rdi/count (negative) -# edx/rdx = scratch -# rdi = bottom of columns to write to -# r8d-r11d = column offsets -# r12-r15 = palookupoffse[0] - palookupoffse[4] - - movl dc_count(%rip), %ecx - movq dc_dest(%rip), %rdi - testl %ecx, %ecx - jle vltepilog # count must be positive - - movq bufplce(%rip), %rax - movq bufplce+8(%rip), %r8 - subq %rax, %r8 - movq bufplce+16(%rip), %r9 - subq %rax, %r9 - movq bufplce+24(%rip), %r10 - subq %rax, %r10 - movl %r8d, source2+4(%rip) - movl %r9d, source3+4(%rip) - movl %r10d, source4+4(%rip) - -pm: imulq $320, %rcx - - movq palookupoffse(%rip), %r12 - movq palookupoffse+8(%rip), %r13 - movq palookupoffse+16(%rip), %r14 - movq palookupoffse+24(%rip), %r15 - - movl vince(%rip), %r8d - movl vince+4(%rip), %r9d - movl vince+8(%rip), %r10d - movl vince+12(%rip), %r11d - movl %r8d, step1+3(%rip) - movl %r9d, step2+3(%rip) - movl %r10d, step3+3(%rip) - movl %r11d, step4+3(%rip) - - addq %rcx, %rdi - negq %rcx - - movl vplce(%rip), %r8d - movl vplce+4(%rip), %r9d - movl vplce+8(%rip), %r10d - movl vplce+12(%rip), %r11d -# selfmod loopit, vltepilog - jmp loopit - - .align 16 -loopit: - movl %r8d, %edx -shifter1: shrl $24, %edx -step1: addl $0x44444444, %r8d - movzbl (%rax,%rdx), %edx - movl %r9d, %ebx - movb (%r12,%rdx), %dl -shifter2: shrl $24, %ebx -step2: addl $0x44444444, %r9d -source2: movzbl 0x44444444(%rax,%rbx), %ebx - movl %r10d, %ebp - movb (%r13,%rbx), %bl -shifter3: shr $24, %ebp -step3: addl $0x44444444, %r10d -source3: movzbl 0x44444444(%rax,%rbp), %ebp - movl %r11d, %esi - movb (%r14,%rbp), %bpl -shifter4: shr $24, %esi -step4: add $0x44444444, %r11d -source4: movzbl 0x44444444(%rax,%rsi), %esi - movb %dl, (%rdi,%rcx) - movb %bl, 1(%rdi,%rcx) - movb (%r15,%rsi), %sil - movb %bpl, 2(%rdi,%rcx) - movb %sil, 3(%rdi,%rcx) - -vltpitch: addq $320, %rcx - jl loopit - - movl %r8d, vplce(%rip) - movl %r9d, vplce+4(%rip) - movl %r10d, vplce+8(%rip) - movl %r11d, vplce+12(%rip) - -vltepilog: - addq $8, %rsp - .cfi_adjust_cfa_offset -8 - pop %rsi - pop %rbp - pop %r12 - pop %r13 - pop %r14 - pop %r15 - pop %rdi - pop %rbx - ret - .cfi_endproc - .align 16 - - From 2677976cae8c628c49c2f4df23ddfcb42f63222c Mon Sep 17 00:00:00 2001 From: Christoph Oelckers Date: Wed, 7 Dec 2016 15:44:25 +0100 Subject: [PATCH 7/9] - r_drawt.cpp is no longer needed. --- src/r_drawt.cpp | 1118 ----------------------------------------------- 1 file changed, 1118 deletions(-) delete mode 100644 src/r_drawt.cpp diff --git a/src/r_drawt.cpp b/src/r_drawt.cpp deleted file mode 100644 index a4f581d12a..0000000000 --- a/src/r_drawt.cpp +++ /dev/null @@ -1,1118 +0,0 @@ -/* -** r_drawt.cpp -** Faster column drawers for modern processors -** -**--------------------------------------------------------------------------- -** Copyright 1998-2006 Randy Heit -** All rights reserved. -** -** Redistribution and use in source and binary forms, with or without -** modification, are permitted provided that the following conditions -** are met: -** -** 1. Redistributions of source code must retain the above copyright -** notice, this list of conditions and the following disclaimer. -** 2. Redistributions in binary form must reproduce the above copyright -** notice, this list of conditions and the following disclaimer in the -** documentation and/or other materials provided with the distribution. -** 3. The name of the author may not be used to endorse or promote products -** derived from this software without specific prior written permission. -** -** THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR -** IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES -** OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. -** IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, -** INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT -** NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -** DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -** THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -** (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF -** THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -**--------------------------------------------------------------------------- -** -** These functions stretch columns into a temporary buffer and then -** map them to the screen. On modern machines, this is faster than drawing -** them directly to the screen. -** -** Will I be able to even understand any of this if I come back to it later? -** Let's hope so. :-) -*/ - -#include "templates.h" -#include "doomtype.h" -#include "doomdef.h" -#include "r_defs.h" -#include "r_draw.h" -#include "r_main.h" -#include "r_things.h" -#include "v_video.h" - -// I should have commented this stuff better. -// -// dc_temp is the buffer R_DrawColumnHoriz writes into. -// dc_tspans points into it. -// dc_ctspan points into dc_tspans. -// horizspan also points into dc_tspans. - -// dc_ctspan is advanced while drawing into dc_temp. -// horizspan is advanced up to dc_ctspan when drawing from dc_temp to the screen. - -BYTE dc_tempbuff[MAXHEIGHT*4]; -BYTE *dc_temp; -unsigned int dc_tspans[4][MAXHEIGHT]; -unsigned int *dc_ctspan[4]; -unsigned int *horizspan[4]; - -#ifdef X86_ASM -extern "C" void R_SetupShadedCol(); -extern "C" void R_SetupAddCol(); -extern "C" void R_SetupAddClampCol(); -#endif - -// Copies one span at hx to the screen at sx. -void rt_copy1col (int hx, int sx, int yl, int yh) -{ - BYTE *source; - BYTE *dest; - int count; - int pitch; - - count = yh-yl; - if (count < 0) - return; - count++; - - dest = ylookup[yl] + sx + dc_destorg; - source = &dc_temp[yl*4 + hx]; - pitch = dc_pitch; - - if (count & 1) { - *dest = *source; - source += 4; - dest += pitch; - } - if (count & 2) { - dest[0] = source[0]; - dest[pitch] = source[4]; - source += 8; - dest += pitch*2; - } - if (!(count >>= 2)) - return; - - do { - dest[0] = source[0]; - dest[pitch] = source[4]; - dest[pitch*2] = source[8]; - dest[pitch*3] = source[12]; - source += 16; - dest += pitch*4; - } while (--count); -} - -// Copies all four spans to the screen starting at sx. -void rt_copy4cols (int sx, int yl, int yh) -{ - int *source; - int *dest; - int count; - int pitch; - - count = yh-yl; - if (count < 0) - return; - count++; - - dest = (int *)(ylookup[yl] + sx + dc_destorg); - source = (int *)(&dc_temp[yl*4]); - pitch = dc_pitch/sizeof(int); - - if (count & 1) { - *dest = *source; - source += 4/sizeof(int); - dest += pitch; - } - if (!(count >>= 1)) - return; - - do { - dest[0] = source[0]; - dest[pitch] = source[4/sizeof(int)]; - source += 8/sizeof(int); - dest += pitch*2; - } while (--count); -} - -// Maps one span at hx to the screen at sx. -void rt_map1col (int hx, int sx, int yl, int yh) -{ - BYTE *colormap; - BYTE *source; - BYTE *dest; - int count; - int pitch; - - count = yh-yl; - if (count < 0) - return; - count++; - - colormap = dc_colormap; - dest = ylookup[yl] + sx + dc_destorg; - source = &dc_temp[yl*4 + hx]; - pitch = dc_pitch; - - if (count & 1) { - *dest = colormap[*source]; - source += 4; - dest += pitch; - } - if (!(count >>= 1)) - return; - - do { - dest[0] = colormap[source[0]]; - dest[pitch] = colormap[source[4]]; - source += 8; - dest += pitch*2; - } while (--count); -} - -// Maps all four spans to the screen starting at sx. -void rt_map4cols (int sx, int yl, int yh) -{ - BYTE *colormap; - BYTE *source; - BYTE *dest; - int count; - int pitch; - - count = yh-yl; - if (count < 0) - return; - count++; - - colormap = dc_colormap; - dest = ylookup[yl] + sx + dc_destorg; - source = &dc_temp[yl*4]; - pitch = dc_pitch; - - if (count & 1) { - dest[0] = colormap[source[0]]; - dest[1] = colormap[source[1]]; - dest[2] = colormap[source[2]]; - dest[3] = colormap[source[3]]; - source += 4; - dest += pitch; - } - if (!(count >>= 1)) - return; - - do { - dest[0] = colormap[source[0]]; - dest[1] = colormap[source[1]]; - dest[2] = colormap[source[2]]; - dest[3] = colormap[source[3]]; - dest[pitch] = colormap[source[4]]; - dest[pitch+1] = colormap[source[5]]; - dest[pitch+2] = colormap[source[6]]; - dest[pitch+3] = colormap[source[7]]; - source += 8; - dest += pitch*2; - } while (--count); -} - -void rt_Translate1col(const BYTE *translation, int hx, int yl, int yh) -{ - int count = yh - yl + 1; - BYTE *source = &dc_temp[yl*4 + hx]; - - // Things we do to hit the compiler's optimizer with a clue bat: - // 1. Parallelism is explicitly spelled out by using a separate - // C instruction for each assembly instruction. GCC lets me - // have four temporaries, but VC++ spills to the stack with - // more than two. Two is probably optimal, anyway. - // 2. The results of the translation lookups are explicitly - // stored in byte-sized variables. This causes the VC++ code - // to use byte mov instructions in most cases; for apparently - // random reasons, it will use movzx for some places. GCC - // ignores this and uses movzx always. - - // Do 8 rows at a time. - for (int count8 = count >> 3; count8; --count8) - { - int c0, c1; - BYTE b0, b1; - - c0 = source[0]; c1 = source[4]; - b0 = translation[c0]; b1 = translation[c1]; - source[0] = b0; source[4] = b1; - - c0 = source[8]; c1 = source[12]; - b0 = translation[c0]; b1 = translation[c1]; - source[8] = b0; source[12] = b1; - - c0 = source[16]; c1 = source[20]; - b0 = translation[c0]; b1 = translation[c1]; - source[16] = b0; source[20] = b1; - - c0 = source[24]; c1 = source[28]; - b0 = translation[c0]; b1 = translation[c1]; - source[24] = b0; source[28] = b1; - - source += 32; - } - // Finish by doing 1 row at a time. - for (count &= 7; count; --count, source += 4) - { - source[0] = translation[source[0]]; - } -} - -void rt_Translate4cols(const BYTE *translation, int yl, int yh) -{ - int count = yh - yl + 1; - BYTE *source = &dc_temp[yl*4]; - int c0, c1; - BYTE b0, b1; - - // Do 2 rows at a time. - for (int count8 = count >> 1; count8; --count8) - { - c0 = source[0]; c1 = source[1]; - b0 = translation[c0]; b1 = translation[c1]; - source[0] = b0; source[1] = b1; - - c0 = source[2]; c1 = source[3]; - b0 = translation[c0]; b1 = translation[c1]; - source[2] = b0; source[3] = b1; - - c0 = source[4]; c1 = source[5]; - b0 = translation[c0]; b1 = translation[c1]; - source[4] = b0; source[5] = b1; - - c0 = source[6]; c1 = source[7]; - b0 = translation[c0]; b1 = translation[c1]; - source[6] = b0; source[7] = b1; - - source += 8; - } - // Do the final row if count was odd. - if (count & 1) - { - c0 = source[0]; c1 = source[1]; - b0 = translation[c0]; b1 = translation[c1]; - source[0] = b0; source[1] = b1; - - c0 = source[2]; c1 = source[3]; - b0 = translation[c0]; b1 = translation[c1]; - source[2] = b0; source[3] = b1; - } -} - -// Translates one span at hx to the screen at sx. -void rt_tlate1col (int hx, int sx, int yl, int yh) -{ - rt_Translate1col(dc_translation, hx, yl, yh); - rt_map1col(hx, sx, yl, yh); -} - -// Translates all four spans to the screen starting at sx. -void rt_tlate4cols (int sx, int yl, int yh) -{ - rt_Translate4cols(dc_translation, yl, yh); - rt_map4cols(sx, yl, yh); -} - -// Adds one span at hx to the screen at sx without clamping. -void rt_add1col (int hx, int sx, int yl, int yh) -{ - BYTE *colormap; - BYTE *source; - BYTE *dest; - int count; - int pitch; - - count = yh-yl; - if (count < 0) - return; - count++; - - DWORD *fg2rgb = dc_srcblend; - DWORD *bg2rgb = dc_destblend; - dest = ylookup[yl] + sx + dc_destorg; - source = &dc_temp[yl*4 + hx]; - pitch = dc_pitch; - colormap = dc_colormap; - - do { - DWORD fg = colormap[*source]; - DWORD bg = *dest; - - fg = fg2rgb[fg]; - bg = bg2rgb[bg]; - fg = (fg+bg) | 0x1f07c1f; - *dest = RGB32k.All[fg & (fg>>15)]; - source += 4; - dest += pitch; - } while (--count); -} - -// Adds all four spans to the screen starting at sx without clamping. -void rt_add4cols_c (int sx, int yl, int yh) -{ - BYTE *colormap; - BYTE *source; - BYTE *dest; - int count; - int pitch; - - count = yh-yl; - if (count < 0) - return; - count++; - - DWORD *fg2rgb = dc_srcblend; - DWORD *bg2rgb = dc_destblend; - dest = ylookup[yl] + sx + dc_destorg; - source = &dc_temp[yl*4]; - pitch = dc_pitch; - colormap = dc_colormap; - - do { - DWORD fg = colormap[source[0]]; - DWORD bg = dest[0]; - fg = fg2rgb[fg]; - bg = bg2rgb[bg]; - fg = (fg+bg) | 0x1f07c1f; - dest[0] = RGB32k.All[fg & (fg>>15)]; - - fg = colormap[source[1]]; - bg = dest[1]; - fg = fg2rgb[fg]; - bg = bg2rgb[bg]; - fg = (fg+bg) | 0x1f07c1f; - dest[1] = RGB32k.All[fg & (fg>>15)]; - - - fg = colormap[source[2]]; - bg = dest[2]; - fg = fg2rgb[fg]; - bg = bg2rgb[bg]; - fg = (fg+bg) | 0x1f07c1f; - dest[2] = RGB32k.All[fg & (fg>>15)]; - - fg = colormap[source[3]]; - bg = dest[3]; - fg = fg2rgb[fg]; - bg = bg2rgb[bg]; - fg = (fg+bg) | 0x1f07c1f; - dest[3] = RGB32k.All[fg & (fg>>15)]; - - source += 4; - dest += pitch; - } while (--count); -} - -// Translates and adds one span at hx to the screen at sx without clamping. -void rt_tlateadd1col (int hx, int sx, int yl, int yh) -{ - rt_Translate1col(dc_translation, hx, yl, yh); - rt_add1col(hx, sx, yl, yh); -} - -// Translates and adds all four spans to the screen starting at sx without clamping. -void rt_tlateadd4cols (int sx, int yl, int yh) -{ - rt_Translate4cols(dc_translation, yl, yh); - rt_add4cols(sx, yl, yh); -} - -// Shades one span at hx to the screen at sx. -void rt_shaded1col (int hx, int sx, int yl, int yh) -{ - DWORD *fgstart; - BYTE *colormap; - BYTE *source; - BYTE *dest; - int count; - int pitch; - - count = yh-yl; - if (count < 0) - return; - count++; - - fgstart = &Col2RGB8[0][dc_color]; - colormap = dc_colormap; - dest = ylookup[yl] + sx + dc_destorg; - source = &dc_temp[yl*4 + hx]; - pitch = dc_pitch; - - do { - DWORD val = colormap[*source]; - DWORD fg = fgstart[val<<8]; - val = (Col2RGB8[64-val][*dest] + fg) | 0x1f07c1f; - *dest = RGB32k.All[val & (val>>15)]; - source += 4; - dest += pitch; - } while (--count); -} - -// Shades all four spans to the screen starting at sx. -void rt_shaded4cols_c (int sx, int yl, int yh) -{ - DWORD *fgstart; - BYTE *colormap; - BYTE *source; - BYTE *dest; - int count; - int pitch; - - count = yh-yl; - if (count < 0) - return; - count++; - - fgstart = &Col2RGB8[0][dc_color]; - colormap = dc_colormap; - dest = ylookup[yl] + sx + dc_destorg; - source = &dc_temp[yl*4]; - pitch = dc_pitch; - - do { - DWORD val; - - val = colormap[source[0]]; - val = (Col2RGB8[64-val][dest[0]] + fgstart[val<<8]) | 0x1f07c1f; - dest[0] = RGB32k.All[val & (val>>15)]; - - val = colormap[source[1]]; - val = (Col2RGB8[64-val][dest[1]] + fgstart[val<<8]) | 0x1f07c1f; - dest[1] = RGB32k.All[val & (val>>15)]; - - val = colormap[source[2]]; - val = (Col2RGB8[64-val][dest[2]] + fgstart[val<<8]) | 0x1f07c1f; - dest[2] = RGB32k.All[val & (val>>15)]; - - val = colormap[source[3]]; - val = (Col2RGB8[64-val][dest[3]] + fgstart[val<<8]) | 0x1f07c1f; - dest[3] = RGB32k.All[val & (val>>15)]; - - source += 4; - dest += pitch; - } while (--count); -} - -// Adds one span at hx to the screen at sx with clamping. -void rt_addclamp1col (int hx, int sx, int yl, int yh) -{ - BYTE *colormap; - BYTE *source; - BYTE *dest; - int count; - int pitch; - - count = yh-yl; - if (count < 0) - return; - count++; - - DWORD *fg2rgb = dc_srcblend; - DWORD *bg2rgb = dc_destblend; - dest = ylookup[yl] + sx + dc_destorg; - source = &dc_temp[yl*4 + hx]; - pitch = dc_pitch; - colormap = dc_colormap; - - do { - DWORD a = fg2rgb[colormap[*source]] + bg2rgb[*dest]; - DWORD b = a; - - a |= 0x01f07c1f; - b &= 0x40100400; - a &= 0x3fffffff; - b = b - (b >> 5); - a |= b; - *dest = RGB32k.All[(a>>15) & a]; - source += 4; - dest += pitch; - } while (--count); -} - -// Adds all four spans to the screen starting at sx with clamping. -void rt_addclamp4cols_c (int sx, int yl, int yh) -{ - BYTE *colormap; - BYTE *source; - BYTE *dest; - int count; - int pitch; - - count = yh-yl; - if (count < 0) - return; - count++; - - DWORD *fg2rgb = dc_srcblend; - DWORD *bg2rgb = dc_destblend; - dest = ylookup[yl] + sx + dc_destorg; - source = &dc_temp[yl*4]; - pitch = dc_pitch; - colormap = dc_colormap; - - do { - DWORD a = fg2rgb[colormap[source[0]]] + bg2rgb[dest[0]]; - DWORD b = a; - - a |= 0x01f07c1f; - b &= 0x40100400; - a &= 0x3fffffff; - b = b - (b >> 5); - a |= b; - dest[0] = RGB32k.All[(a>>15) & a]; - - a = fg2rgb[colormap[source[1]]] + bg2rgb[dest[1]]; - b = a; - a |= 0x01f07c1f; - b &= 0x40100400; - a &= 0x3fffffff; - b = b - (b >> 5); - a |= b; - dest[1] = RGB32k.All[(a>>15) & a]; - - a = fg2rgb[colormap[source[2]]] + bg2rgb[dest[2]]; - b = a; - a |= 0x01f07c1f; - b &= 0x40100400; - a &= 0x3fffffff; - b = b - (b >> 5); - a |= b; - dest[2] = RGB32k.All[(a>>15) & a]; - - a = fg2rgb[colormap[source[3]]] + bg2rgb[dest[3]]; - b = a; - a |= 0x01f07c1f; - b &= 0x40100400; - a &= 0x3fffffff; - b = b - (b >> 5); - a |= b; - dest[3] = RGB32k.All[(a>>15) & a]; - - source += 4; - dest += pitch; - } while (--count); -} - -// Translates and adds one span at hx to the screen at sx with clamping. -void rt_tlateaddclamp1col (int hx, int sx, int yl, int yh) -{ - rt_Translate1col(dc_translation, hx, yl, yh); - rt_addclamp1col(hx, sx, yl, yh); -} - -// Translates and adds all four spans to the screen starting at sx with clamping. -void rt_tlateaddclamp4cols (int sx, int yl, int yh) -{ - rt_Translate4cols(dc_translation, yl, yh); - rt_addclamp4cols(sx, yl, yh); -} - -// Subtracts one span at hx to the screen at sx with clamping. -void rt_subclamp1col (int hx, int sx, int yl, int yh) -{ - BYTE *colormap; - BYTE *source; - BYTE *dest; - int count; - int pitch; - - count = yh-yl; - if (count < 0) - return; - count++; - - DWORD *fg2rgb = dc_srcblend; - DWORD *bg2rgb = dc_destblend; - dest = ylookup[yl] + sx + dc_destorg; - source = &dc_temp[yl*4 + hx]; - pitch = dc_pitch; - colormap = dc_colormap; - - do { - DWORD a = (fg2rgb[colormap[*source]] | 0x40100400) - bg2rgb[*dest]; - DWORD b = a; - - b &= 0x40100400; - b = b - (b >> 5); - a &= b; - a |= 0x01f07c1f; - *dest = RGB32k.All[(a>>15) & a]; - source += 4; - dest += pitch; - } while (--count); -} - -// Subtracts all four spans to the screen starting at sx with clamping. -void rt_subclamp4cols (int sx, int yl, int yh) -{ - BYTE *colormap; - BYTE *source; - BYTE *dest; - int count; - int pitch; - - count = yh-yl; - if (count < 0) - return; - count++; - - DWORD *fg2rgb = dc_srcblend; - DWORD *bg2rgb = dc_destblend; - dest = ylookup[yl] + sx + dc_destorg; - source = &dc_temp[yl*4]; - pitch = dc_pitch; - colormap = dc_colormap; - - do { - DWORD a = (fg2rgb[colormap[source[0]]] | 0x40100400) - bg2rgb[dest[0]]; - DWORD b = a; - - b &= 0x40100400; - b = b - (b >> 5); - a &= b; - a |= 0x01f07c1f; - dest[0] = RGB32k.All[(a>>15) & a]; - - a = (fg2rgb[colormap[source[1]]] | 0x40100400) - bg2rgb[dest[1]]; - b = a; - b &= 0x40100400; - b = b - (b >> 5); - a &= b; - a |= 0x01f07c1f; - dest[1] = RGB32k.All[(a>>15) & a]; - - a = (fg2rgb[colormap[source[2]]] | 0x40100400) - bg2rgb[dest[2]]; - b = a; - b &= 0x40100400; - b = b - (b >> 5); - a &= b; - a |= 0x01f07c1f; - dest[2] = RGB32k.All[(a>>15) & a]; - - a = (fg2rgb[colormap[source[3]]] | 0x40100400) - bg2rgb[dest[3]]; - b = a; - b &= 0x40100400; - b = b - (b >> 5); - a &= b; - a |= 0x01f07c1f; - dest[3] = RGB32k.All[(a>>15) & a]; - - source += 4; - dest += pitch; - } while (--count); -} - -// Translates and subtracts one span at hx to the screen at sx with clamping. -void rt_tlatesubclamp1col (int hx, int sx, int yl, int yh) -{ - rt_Translate1col(dc_translation, hx, yl, yh); - rt_subclamp1col(hx, sx, yl, yh); -} - -// Translates and subtracts all four spans to the screen starting at sx with clamping. -void rt_tlatesubclamp4cols (int sx, int yl, int yh) -{ - rt_Translate4cols(dc_translation, yl, yh); - rt_subclamp4cols(sx, yl, yh); -} - -// Subtracts one span at hx from the screen at sx with clamping. -void rt_revsubclamp1col (int hx, int sx, int yl, int yh) -{ - BYTE *colormap; - BYTE *source; - BYTE *dest; - int count; - int pitch; - - count = yh-yl; - if (count < 0) - return; - count++; - - DWORD *fg2rgb = dc_srcblend; - DWORD *bg2rgb = dc_destblend; - dest = ylookup[yl] + sx + dc_destorg; - source = &dc_temp[yl*4 + hx]; - pitch = dc_pitch; - colormap = dc_colormap; - - do { - DWORD a = (bg2rgb[*dest] | 0x40100400) - fg2rgb[colormap[*source]]; - DWORD b = a; - - b &= 0x40100400; - b = b - (b >> 5); - a &= b; - a |= 0x01f07c1f; - *dest = RGB32k.All[(a>>15) & a]; - source += 4; - dest += pitch; - } while (--count); -} - -// Subtracts all four spans from the screen starting at sx with clamping. -void rt_revsubclamp4cols (int sx, int yl, int yh) -{ - BYTE *colormap; - BYTE *source; - BYTE *dest; - int count; - int pitch; - - count = yh-yl; - if (count < 0) - return; - count++; - - DWORD *fg2rgb = dc_srcblend; - DWORD *bg2rgb = dc_destblend; - dest = ylookup[yl] + sx + dc_destorg; - source = &dc_temp[yl*4]; - pitch = dc_pitch; - colormap = dc_colormap; - - do { - DWORD a = (bg2rgb[dest[0]] | 0x40100400) - fg2rgb[colormap[source[0]]]; - DWORD b = a; - - b &= 0x40100400; - b = b - (b >> 5); - a &= b; - a |= 0x01f07c1f; - dest[0] = RGB32k.All[(a>>15) & a]; - - a = (bg2rgb[dest[1]] | 0x40100400) - fg2rgb[colormap[source[1]]]; - b = a; - b &= 0x40100400; - b = b - (b >> 5); - a &= b; - a |= 0x01f07c1f; - dest[1] = RGB32k.All[(a>>15) & a]; - - a = (bg2rgb[dest[2]] | 0x40100400) - fg2rgb[colormap[source[2]]]; - b = a; - b &= 0x40100400; - b = b - (b >> 5); - a &= b; - a |= 0x01f07c1f; - dest[2] = RGB32k.All[(a>>15) & a]; - - a = (bg2rgb[dest[3]] | 0x40100400) - fg2rgb[colormap[source[3]]]; - b = a; - b &= 0x40100400; - b = b - (b >> 5); - a &= b; - a |= 0x01f07c1f; - dest[3] = RGB32k.All[(a>>15) & a]; - - source += 4; - dest += pitch; - } while (--count); -} - -// Translates and subtracts one span at hx from the screen at sx with clamping. -void rt_tlaterevsubclamp1col (int hx, int sx, int yl, int yh) -{ - rt_Translate1col(dc_translation, hx, yl, yh); - rt_revsubclamp1col(hx, sx, yl, yh); -} - -// Translates and subtracts all four spans from the screen starting at sx with clamping. -void rt_tlaterevsubclamp4cols (int sx, int yl, int yh) -{ - rt_Translate4cols(dc_translation, yl, yh); - rt_revsubclamp4cols(sx, yl, yh); -} - -// Reorder the posts so that they get drawn top-to-bottom instead of bottom-to-top. -void rt_flip_posts() -{ - unsigned int *front = horizspan[dc_x & 3]; - unsigned int *back = dc_ctspan[dc_x & 3] - 2; - - while (front < back) - { - swapvalues(front[0], back[0]); - swapvalues(front[1], back[1]); - front += 2; - back -= 2; - } -} - -// Copies all spans in all four columns to the screen starting at sx. -// sx should be dword-aligned. -void rt_draw4cols (int sx) -{ - int x, bad; - unsigned int maxtop, minbot, minnexttop; - - // Place a dummy "span" in each column. These don't get - // drawn. They're just here to avoid special cases in the - // max/min calculations below. - for (x = 0; x < 4; ++x) - { - dc_ctspan[x][0] = screen->GetHeight()+1; - dc_ctspan[x][1] = screen->GetHeight(); - } - -#ifdef X86_ASM - // Setup assembly routines for changed colormaps or other parameters. - if (hcolfunc_post4 == rt_shaded4cols) - { - R_SetupShadedCol(); - } - else if (hcolfunc_post4 == rt_addclamp4cols || hcolfunc_post4 == rt_tlateaddclamp4cols) - { - R_SetupAddClampCol(); - } - else if (hcolfunc_post4 == rt_add4cols || hcolfunc_post4 == rt_tlateadd4cols) - { - R_SetupAddCol(); - } -#endif - - for (;;) - { - // If a column is out of spans, mark it as such - bad = 0; - minnexttop = 0xffffffff; - for (x = 0; x < 4; ++x) - { - if (horizspan[x] >= dc_ctspan[x]) - { - bad |= 1 << x; - } - else if ((horizspan[x]+2)[0] < minnexttop) - { - minnexttop = (horizspan[x]+2)[0]; - } - } - // Once all columns are out of spans, we're done - if (bad == 15) - { - return; - } - - // Find the largest shared area for the spans in each column - maxtop = MAX (MAX (horizspan[0][0], horizspan[1][0]), - MAX (horizspan[2][0], horizspan[3][0])); - minbot = MIN (MIN (horizspan[0][1], horizspan[1][1]), - MIN (horizspan[2][1], horizspan[3][1])); - - // If there is no shared area with these spans, draw each span - // individually and advance to the next spans until we reach a shared area. - // However, only draw spans down to the highest span in the next set of - // spans. If we allow the entire height of a span to be drawn, it could - // prevent any more shared areas from being drawn in these four columns. - // - // Example: Suppose we have the following arrangement: - // A CD - // A CD - // B D - // B D - // aB D - // aBcD - // aBcD - // aBc - // - // If we draw the entire height of the spans, we end up drawing this first: - // A CD - // A CD - // B D - // B D - // B D - // B D - // B D - // B D - // B - // - // This leaves only the "a" and "c" columns to be drawn, and they are not - // part of a shared area, but if we can include B and D with them, we can - // get a shared area. So we cut off everything in the first set just - // above the "a" column and end up drawing this first: - // A CD - // A CD - // B D - // B D - // - // Then the next time through, we have the following arrangement with an - // easily shared area to draw: - // aB D - // aBcD - // aBcD - // aBc - if (bad != 0 || maxtop > minbot) - { - int drawcount = 0; - for (x = 0; x < 4; ++x) - { - if (!(bad & 1)) - { - if (horizspan[x][1] < minnexttop) - { - hcolfunc_post1 (x, sx+x, horizspan[x][0], horizspan[x][1]); - horizspan[x] += 2; - drawcount++; - } - else if (minnexttop > horizspan[x][0]) - { - hcolfunc_post1 (x, sx+x, horizspan[x][0], minnexttop-1); - horizspan[x][0] = minnexttop; - drawcount++; - } - } - bad >>= 1; - } - // Drawcount *should* always be non-zero. The reality is that some situations - // can make this not true. Unfortunately, I'm not sure what those situations are. - if (drawcount == 0) - { - return; - } - continue; - } - - // Draw any span fragments above the shared area. - for (x = 0; x < 4; ++x) - { - if (maxtop > horizspan[x][0]) - { - hcolfunc_post1 (x, sx+x, horizspan[x][0], maxtop-1); - } - } - - // Draw the shared area. - hcolfunc_post4 (sx, maxtop, minbot); - - // For each column, if part of the span is past the shared area, - // set its top to just below the shared area. Otherwise, advance - // to the next span in that column. - for (x = 0; x < 4; ++x) - { - if (minbot < horizspan[x][1]) - { - horizspan[x][0] = minbot+1; - } - else - { - horizspan[x] += 2; - } - } - } -} - -// Before each pass through a rendering loop that uses these routines, -// call this function to set up the span pointers. -void rt_initcols (BYTE *buff) -{ - int y; - - dc_temp = buff == NULL ? dc_tempbuff : buff; - for (y = 3; y >= 0; y--) - horizspan[y] = dc_ctspan[y] = &dc_tspans[y][0]; -} - -// Stretches a column into a temporary buffer which is later -// drawn to the screen along with up to three other columns. -void R_DrawColumnHorizP_C (void) -{ - int count = dc_count; - BYTE *dest; - fixed_t fracstep; - fixed_t frac; - - if (count <= 0) - return; - - { - int x = dc_x & 3; - unsigned int **span; - - span = &dc_ctspan[x]; - (*span)[0] = dc_yl; - (*span)[1] = dc_yh; - *span += 2; - dest = &dc_temp[x + 4*dc_yl]; - } - fracstep = dc_iscale; - frac = dc_texturefrac; - - { - const BYTE *source = dc_source; - - if (count & 1) { - *dest = source[frac>>FRACBITS]; dest += 4; frac += fracstep; - } - if (count & 2) { - dest[0] = source[frac>>FRACBITS]; frac += fracstep; - dest[4] = source[frac>>FRACBITS]; frac += fracstep; - dest += 8; - } - if (count & 4) { - dest[0] = source[frac>>FRACBITS]; frac += fracstep; - dest[4] = source[frac>>FRACBITS]; frac += fracstep; - dest[8] = source[frac>>FRACBITS]; frac += fracstep; - dest[12]= source[frac>>FRACBITS]; frac += fracstep; - dest += 16; - } - count >>= 3; - if (!count) return; - - do - { - dest[0] = source[frac>>FRACBITS]; frac += fracstep; - dest[4] = source[frac>>FRACBITS]; frac += fracstep; - dest[8] = source[frac>>FRACBITS]; frac += fracstep; - dest[12]= source[frac>>FRACBITS]; frac += fracstep; - dest[16]= source[frac>>FRACBITS]; frac += fracstep; - dest[20]= source[frac>>FRACBITS]; frac += fracstep; - dest[24]= source[frac>>FRACBITS]; frac += fracstep; - dest[28]= source[frac>>FRACBITS]; frac += fracstep; - dest += 32; - } while (--count); - } -} - -// [RH] Just fills a column with a given color -void R_FillColumnHorizP (void) -{ - int count = dc_count; - BYTE color = dc_color; - BYTE *dest; - - if (count <= 0) - return; - - { - int x = dc_x & 3; - unsigned int **span = &dc_ctspan[x]; - - (*span)[0] = dc_yl; - (*span)[1] = dc_yh; - *span += 2; - dest = &dc_temp[x + 4*dc_yl]; - } - - if (count & 1) { - *dest = color; - dest += 4; - } - if (!(count >>= 1)) - return; - do { - dest[0] = color; dest[4] = color; - dest += 8; - } while (--count); -} From e4c208602dab77e91fc24d1ff17ba24d3cddb497 Mon Sep 17 00:00:00 2001 From: Edoardo Prezioso Date: Wed, 7 Dec 2016 11:28:40 +0100 Subject: [PATCH 8/9] - Fixed GCC/Clang warning/error. --- src/d_main.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/d_main.cpp b/src/d_main.cpp index 0f2d5af92b..04ba34cbb0 100644 --- a/src/d_main.cpp +++ b/src/d_main.cpp @@ -1030,7 +1030,7 @@ void D_DoomLoop () catch (CVMAbortException &error) { error.MaybePrintMessage(); - Printf("%s", error.stacktrace); + Printf("%s", error.stacktrace.GetChars()); D_ErrorCleanup(); } } From 6adcba3c4b46a1762ee753467551cad64727bd4d Mon Sep 17 00:00:00 2001 From: Christoph Oelckers Date: Wed, 7 Dec 2016 18:39:09 +0100 Subject: [PATCH 9/9] - fixed linkage for fuzzviewheight - fixed naming conflict with GCC's virtual table. --- src/r_draw_pal.cpp | 4 ++-- src/r_draw_pal.h | 2 +- src/r_main.cpp | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/r_draw_pal.cpp b/src/r_draw_pal.cpp index b508dd221c..cfb55a6706 100644 --- a/src/r_draw_pal.cpp +++ b/src/r_draw_pal.cpp @@ -2564,7 +2564,7 @@ namespace swrenderer ///////////////////////////////////////////////////////////////////////// DrawSlabPalCommand::DrawSlabPalCommand(int dx, fixed_t v, int dy, fixed_t vi, const uint8_t *vptr, uint8_t *p, const uint8_t *colormap) - : _dx(dx), _v(v), _dy(dy), _vi(vi), _vptr(vptr), _p(p), _colormap(colormap) + : _dx(dx), _v(v), _dy(dy), _vi(vi), _vvptr(vptr), _p(p), _colormap(colormap) { using namespace drawerargs; _pitch = dc_pitch; @@ -2578,7 +2578,7 @@ namespace swrenderer int pitch = _pitch; int width = _dx; const uint8_t *colormap = _colormap; - const uint8_t *source = _vptr; + const uint8_t *source = _vvptr; fixed_t fracpos = _v; fixed_t iscale = _vi; diff --git a/src/r_draw_pal.h b/src/r_draw_pal.h index a2400d2759..f2b1f05712 100644 --- a/src/r_draw_pal.h +++ b/src/r_draw_pal.h @@ -230,7 +230,7 @@ namespace swrenderer fixed_t _v; int _dy; fixed_t _vi; - const uint8_t *_vptr; + const uint8_t *_vvptr; uint8_t *_p; const uint8_t *_colormap; int _pitch; diff --git a/src/r_main.cpp b/src/r_main.cpp index 0ee075140d..4bf15d4b99 100644 --- a/src/r_main.cpp +++ b/src/r_main.cpp @@ -118,7 +118,7 @@ static void R_ShutdownRenderer(); extern short *openings; extern bool r_fakingunderwater; -extern "C" int fuzzviewheight; +extern int fuzzviewheight; extern subsector_t *InSubsector;