// Emacs style mode select -*- C++ -*- //----------------------------------------------------------------------------- // // $Id:$ // // Copyright (C) 1993-1996 by id Software, Inc. // // This source is available for distribution and/or modification // only under the terms of the DOOM Source Code License as // published by id Software. All rights reserved. // // The source is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // FITNESS FOR A PARTICULAR PURPOSE. See the DOOM Source Code License // for more details. // // $Log:$ // // DESCRIPTION: // True color span/column drawing functions. // //----------------------------------------------------------------------------- #include #include "templates.h" #include "doomdef.h" #include "i_system.h" #include "w_wad.h" #include "r_local.h" #include "v_video.h" #include "doomstat.h" #include "st_stuff.h" #include "g_game.h" #include "g_level.h" #include "r_data/r_translate.h" #include "v_palette.h" #include "r_data/colormaps.h" #include "r_plane.h" #include "r_draw_rgba.h" #include "r_compiler/llvmdrawers.h" #include "gl/data/gl_matrix.h" #include "gi.h" #include "stats.h" #include "x86.h" #include extern "C" short spanend[MAXHEIGHT]; extern float rw_light; extern float rw_lightstep; extern int wallshade; // Use linear filtering when scaling up CVAR(Bool, r_magfilter, false, CVAR_ARCHIVE | CVAR_GLOBALCONFIG); // Use linear filtering when scaling down CVAR(Bool, r_minfilter, true, CVAR_ARCHIVE | CVAR_GLOBALCONFIG); // Use mipmapped textures CVAR(Bool, r_mipmap, true, CVAR_ARCHIVE | CVAR_GLOBALCONFIG); ///////////////////////////////////////////////////////////////////////////// class DrawSpanLLVMCommand : public DrawerCommand { public: DrawSpanLLVMCommand() { args.xfrac = ds_xfrac; args.yfrac = ds_yfrac; args.xstep = ds_xstep; args.ystep = ds_ystep; args.x1 = ds_x1; args.x2 = ds_x2; args.y = ds_y; args.xbits = ds_xbits; args.ybits = ds_ybits; args.destorg = (uint32_t*)dc_destorg; args.destpitch = dc_pitch; args.source = (const uint32_t*)ds_source; args.light = LightBgra::calc_light_multiplier(ds_light); args.light_red = ds_shade_constants.light_red; args.light_green = ds_shade_constants.light_green; args.light_blue = ds_shade_constants.light_blue; args.light_alpha = ds_shade_constants.light_alpha; args.fade_red = ds_shade_constants.fade_red; args.fade_green = ds_shade_constants.fade_green; args.fade_blue = ds_shade_constants.fade_blue; args.fade_alpha = ds_shade_constants.fade_alpha; args.desaturate = ds_shade_constants.desaturate; args.srcalpha = dc_srcalpha >> (FRACBITS - 8); args.destalpha = dc_destalpha >> (FRACBITS - 8); args.flags = 0; if (ds_shade_constants.simple_shade) args.flags |= DrawSpanArgs::simple_shade; if (!sampler_setup(args.source, args.xbits, args.ybits, args.xstep, args.ystep, ds_source_mipmapped)) args.flags |= DrawSpanArgs::nearest_filter; } void Execute(DrawerThread *thread) override { if (thread->skipped_by_thread(args.y)) return; LLVMDrawers::Instance()->DrawSpan(&args); } FString DebugInfo() override { return "DrawSpan\n" + args.ToString(); } protected: DrawSpanArgs args; private: inline static bool sampler_setup(const uint32_t * &source, int &xbits, int &ybits, fixed_t xstep, fixed_t ystep, bool mipmapped) { // Is this a magfilter or minfilter? fixed_t xmagnitude = abs(xstep) >> (32 - xbits - FRACBITS); fixed_t ymagnitude = abs(ystep) >> (32 - ybits - FRACBITS); fixed_t magnitude = (xmagnitude + ymagnitude) * 2 + (1 << (FRACBITS - 1)); bool magnifying = (magnitude >> FRACBITS == 0); if (r_mipmap && mipmapped) { int level = magnitude >> (FRACBITS + 1); while (level != 0) { if (xbits <= 2 || ybits <= 2) break; source += (1 << (xbits)) * (1 << (ybits)); xbits -= 1; ybits -= 1; level >>= 1; } } return (magnifying && r_magfilter) || (!magnifying && r_minfilter); } }; class DrawSpanMaskedLLVMCommand : public DrawSpanLLVMCommand { public: void Execute(DrawerThread *thread) override { if (thread->skipped_by_thread(args.y)) return; LLVMDrawers::Instance()->DrawSpanMasked(&args); } }; class DrawSpanTranslucentLLVMCommand : public DrawSpanLLVMCommand { public: void Execute(DrawerThread *thread) override { if (thread->skipped_by_thread(args.y)) return; LLVMDrawers::Instance()->DrawSpanTranslucent(&args); } }; class DrawSpanMaskedTranslucentLLVMCommand : public DrawSpanLLVMCommand { public: void Execute(DrawerThread *thread) override { if (thread->skipped_by_thread(args.y)) return; LLVMDrawers::Instance()->DrawSpanMaskedTranslucent(&args); } }; class DrawSpanAddClampLLVMCommand : public DrawSpanLLVMCommand { public: void Execute(DrawerThread *thread) override { if (thread->skipped_by_thread(args.y)) return; LLVMDrawers::Instance()->DrawSpanAddClamp(&args); } }; class DrawSpanMaskedAddClampLLVMCommand : public DrawSpanLLVMCommand { public: void Execute(DrawerThread *thread) override { if (thread->skipped_by_thread(args.y)) return; LLVMDrawers::Instance()->DrawSpanMaskedAddClamp(&args); } }; ///////////////////////////////////////////////////////////////////////////// class DrawWall4LLVMCommand : public DrawerCommand { protected: DrawWallArgs args; WorkerThreadData ThreadData(DrawerThread *thread) { WorkerThreadData d; d.core = thread->core; d.num_cores = thread->num_cores; d.pass_start_y = thread->pass_start_y; d.pass_end_y = thread->pass_end_y; return d; } public: DrawWall4LLVMCommand() { args.dest = (uint32_t*)dc_dest; args.dest_y = _dest_y; args.count = dc_count; args.pitch = dc_pitch; args.light_red = dc_shade_constants.light_red; args.light_green = dc_shade_constants.light_green; args.light_blue = dc_shade_constants.light_blue; args.light_alpha = dc_shade_constants.light_alpha; args.fade_red = dc_shade_constants.fade_red; args.fade_green = dc_shade_constants.fade_green; args.fade_blue = dc_shade_constants.fade_blue; args.fade_alpha = dc_shade_constants.fade_alpha; args.desaturate = dc_shade_constants.desaturate; for (int i = 0; i < 4; i++) { args.texturefrac[i] = vplce[i]; args.iscale[i] = vince[i]; args.texturefracx[i] = buftexturefracx[i]; args.textureheight[i] = bufheight[i]; args.source[i] = (const uint32_t *)bufplce[i]; args.source2[i] = (const uint32_t *)bufplce2[i]; args.light[i] = LightBgra::calc_light_multiplier(palookuplight[i]); } args.srcalpha = dc_srcalpha >> (FRACBITS - 8); args.destalpha = dc_destalpha >> (FRACBITS - 8); args.flags = 0; if (dc_shade_constants.simple_shade) args.flags |= DrawWallArgs::simple_shade; if (args.source2[0] == nullptr) args.flags |= DrawWallArgs::nearest_filter; DetectRangeError(args.dest, args.dest_y, args.count); } void Execute(DrawerThread *thread) override { WorkerThreadData d = ThreadData(thread); LLVMDrawers::Instance()->vlinec4(&args, &d); } FString DebugInfo() override { return "DrawWall4\n" + args.ToString(); } }; class DrawWall1LLVMCommand : public DrawerCommand { protected: DrawWallArgs args; WorkerThreadData ThreadData(DrawerThread *thread) { WorkerThreadData d; d.core = thread->core; d.num_cores = thread->num_cores; d.pass_start_y = thread->pass_start_y; d.pass_end_y = thread->pass_end_y; return d; } public: DrawWall1LLVMCommand() { args.dest = (uint32_t*)dc_dest; args.dest_y = _dest_y; args.pitch = dc_pitch; args.count = dc_count; args.texturefrac[0] = dc_texturefrac; args.texturefracx[0] = dc_texturefracx; args.iscale[0] = dc_iscale; args.textureheight[0] = dc_textureheight; args.source[0] = (const uint32 *)dc_source; args.source2[0] = (const uint32 *)dc_source2; args.light[0] = LightBgra::calc_light_multiplier(dc_light); args.light_red = dc_shade_constants.light_red; args.light_green = dc_shade_constants.light_green; args.light_blue = dc_shade_constants.light_blue; args.light_alpha = dc_shade_constants.light_alpha; args.fade_red = dc_shade_constants.fade_red; args.fade_green = dc_shade_constants.fade_green; args.fade_blue = dc_shade_constants.fade_blue; args.fade_alpha = dc_shade_constants.fade_alpha; args.desaturate = dc_shade_constants.desaturate; args.srcalpha = dc_srcalpha >> (FRACBITS - 8); args.destalpha = dc_destalpha >> (FRACBITS - 8); args.flags = 0; if (dc_shade_constants.simple_shade) args.flags |= DrawWallArgs::simple_shade; if (args.source2[0] == nullptr) args.flags |= DrawWallArgs::nearest_filter; DetectRangeError(args.dest, args.dest_y, args.count); } void Execute(DrawerThread *thread) override { WorkerThreadData d = ThreadData(thread); LLVMDrawers::Instance()->vlinec1(&args, &d); } FString DebugInfo() override { return "DrawWall1\n" + args.ToString(); } }; class DrawColumnLLVMCommand : public DrawerCommand { protected: DrawColumnArgs args; WorkerThreadData ThreadData(DrawerThread *thread) { WorkerThreadData d; d.core = thread->core; d.num_cores = thread->num_cores; d.pass_start_y = thread->pass_start_y; d.pass_end_y = thread->pass_end_y; return d; } FString DebugInfo() override { return "DrawColumn\n" + args.ToString(); } public: DrawColumnLLVMCommand() { args.dest = (uint32_t*)dc_dest; args.source = dc_source; args.colormap = dc_colormap; args.translation = dc_translation; args.basecolors = (const uint32_t *)GPalette.BaseColors; args.pitch = dc_pitch; args.count = dc_count; args.dest_y = _dest_y; args.iscale = dc_iscale; args.texturefrac = dc_texturefrac; args.light = LightBgra::calc_light_multiplier(dc_light); args.color = LightBgra::shade_pal_index_simple(dc_color, args.light); args.srccolor = dc_srccolor_bgra; args.srcalpha = dc_srcalpha >> (FRACBITS - 8); args.destalpha = dc_destalpha >> (FRACBITS - 8); args.light_red = dc_shade_constants.light_red; args.light_green = dc_shade_constants.light_green; args.light_blue = dc_shade_constants.light_blue; args.light_alpha = dc_shade_constants.light_alpha; args.fade_red = dc_shade_constants.fade_red; args.fade_green = dc_shade_constants.fade_green; args.fade_blue = dc_shade_constants.fade_blue; args.fade_alpha = dc_shade_constants.fade_alpha; args.desaturate = dc_shade_constants.desaturate; args.flags = 0; if (dc_shade_constants.simple_shade) args.flags |= DrawColumnArgs::simple_shade; DetectRangeError(args.dest, args.dest_y, args.count); } void Execute(DrawerThread *thread) override { WorkerThreadData d = ThreadData(thread); LLVMDrawers::Instance()->DrawColumn(&args, &d); } }; class DrawSkyLLVMCommand : public DrawerCommand { protected: DrawSkyArgs args; WorkerThreadData ThreadData(DrawerThread *thread) { WorkerThreadData d; d.core = thread->core; d.num_cores = thread->num_cores; d.pass_start_y = thread->pass_start_y; d.pass_end_y = thread->pass_end_y; return d; } public: DrawSkyLLVMCommand(uint32_t solid_top, uint32_t solid_bottom) { args.dest = (uint32_t*)dc_dest; args.dest_y = _dest_y; args.count = dc_count; args.pitch = dc_pitch; for (int i = 0; i < 4; i++) { args.texturefrac[i] = vplce[i]; args.iscale[i] = vince[i]; args.source0[i] = (const uint32_t *)bufplce[i]; args.source1[i] = (const uint32_t *)bufplce2[i]; } args.textureheight0 = bufheight[0]; args.textureheight1 = bufheight[1]; args.top_color = solid_top; args.bottom_color = solid_bottom; DetectRangeError(args.dest, args.dest_y, args.count); } FString DebugInfo() override { return "DrawSky\n" + args.ToString(); } }; #define DECLARE_DRAW_COMMAND(name, func, base) \ class name##LLVMCommand : public base \ { \ public: \ using base::base; \ void Execute(DrawerThread *thread) override \ { \ WorkerThreadData d = ThreadData(thread); \ LLVMDrawers::Instance()->func(&args, &d); \ } \ }; //DECLARE_DRAW_COMMAND(name, func, DrawSpanLLVMCommand); DECLARE_DRAW_COMMAND(DrawWallMasked4, mvlinec4, DrawWall4LLVMCommand); DECLARE_DRAW_COMMAND(DrawWallAdd4, tmvline4_add, DrawWall4LLVMCommand); DECLARE_DRAW_COMMAND(DrawWallAddClamp4, tmvline4_addclamp, DrawWall4LLVMCommand); DECLARE_DRAW_COMMAND(DrawWallSubClamp4, tmvline4_subclamp, DrawWall4LLVMCommand); DECLARE_DRAW_COMMAND(DrawWallRevSubClamp4, tmvline4_revsubclamp, DrawWall4LLVMCommand); DECLARE_DRAW_COMMAND(DrawWallMasked1, mvlinec1, DrawWall1LLVMCommand); DECLARE_DRAW_COMMAND(DrawWallAdd1, tmvline1_add, DrawWall1LLVMCommand); DECLARE_DRAW_COMMAND(DrawWallAddClamp1, tmvline1_addclamp, DrawWall1LLVMCommand); DECLARE_DRAW_COMMAND(DrawWallSubClamp1, tmvline1_subclamp, DrawWall1LLVMCommand); DECLARE_DRAW_COMMAND(DrawWallRevSubClamp1, tmvline1_revsubclamp, DrawWall1LLVMCommand); DECLARE_DRAW_COMMAND(DrawColumnAdd, DrawColumnAdd, DrawColumnLLVMCommand); DECLARE_DRAW_COMMAND(DrawColumnTranslated, DrawColumnTranslated, DrawColumnLLVMCommand); DECLARE_DRAW_COMMAND(DrawColumnTlatedAdd, DrawColumnTlatedAdd, DrawColumnLLVMCommand); DECLARE_DRAW_COMMAND(DrawColumnShaded, DrawColumnShaded, DrawColumnLLVMCommand); DECLARE_DRAW_COMMAND(DrawColumnAddClamp, DrawColumnAddClamp, DrawColumnLLVMCommand); DECLARE_DRAW_COMMAND(DrawColumnAddClampTranslated, DrawColumnAddClampTranslated, DrawColumnLLVMCommand); DECLARE_DRAW_COMMAND(DrawColumnSubClamp, DrawColumnSubClamp, DrawColumnLLVMCommand); DECLARE_DRAW_COMMAND(DrawColumnSubClampTranslated, DrawColumnSubClampTranslated, DrawColumnLLVMCommand); DECLARE_DRAW_COMMAND(DrawColumnRevSubClamp, DrawColumnRevSubClamp, DrawColumnLLVMCommand); DECLARE_DRAW_COMMAND(DrawColumnRevSubClampTranslated, DrawColumnRevSubClampTranslated, DrawColumnLLVMCommand); DECLARE_DRAW_COMMAND(FillColumn, FillColumn, DrawColumnLLVMCommand); DECLARE_DRAW_COMMAND(FillColumnAdd, FillColumnAdd, DrawColumnLLVMCommand); DECLARE_DRAW_COMMAND(FillColumnAddClamp, FillColumnAddClamp, DrawColumnLLVMCommand); DECLARE_DRAW_COMMAND(FillColumnSubClamp, FillColumnSubClamp, DrawColumnLLVMCommand); DECLARE_DRAW_COMMAND(FillColumnRevSubClamp, FillColumnRevSubClamp, DrawColumnLLVMCommand); DECLARE_DRAW_COMMAND(DrawSingleSky1, DrawSky1, DrawSkyLLVMCommand); DECLARE_DRAW_COMMAND(DrawSingleSky4, DrawSky4, DrawSkyLLVMCommand); DECLARE_DRAW_COMMAND(DrawDoubleSky1, DrawDoubleSky1, DrawSkyLLVMCommand); DECLARE_DRAW_COMMAND(DrawDoubleSky4, DrawDoubleSky4, DrawSkyLLVMCommand); ///////////////////////////////////////////////////////////////////////////// class DrawFuzzColumnRGBACommand : public DrawerCommand { int _x; int _yl; int _yh; BYTE * RESTRICT _destorg; int _pitch; int _fuzzpos; int _fuzzviewheight; public: DrawFuzzColumnRGBACommand() { _x = dc_x; _yl = dc_yl; _yh = dc_yh; _destorg = dc_destorg; _pitch = dc_pitch; _fuzzpos = fuzzpos; _fuzzviewheight = fuzzviewheight; } void Execute(DrawerThread *thread) override { int yl = MAX(_yl, 1); int yh = MIN(_yh, _fuzzviewheight); int count = thread->count_for_thread(yl, yh - yl + 1); // Zero length. if (count <= 0) return; uint32_t *dest = thread->dest_for_thread(yl, _pitch, ylookup[yl] + _x + (uint32_t*)_destorg); int pitch = _pitch * thread->num_cores; int fuzzstep = thread->num_cores; int fuzz = (_fuzzpos + thread->skipped_by_thread(yl)) % FUZZTABLE; yl += thread->skipped_by_thread(yl); // Handle the case where we would go out of bounds at the top: if (yl < fuzzstep) { uint32_t *srcdest = dest + fuzzoffset[fuzz] * fuzzstep + pitch; //assert(static_cast((srcdest - (uint32_t*)dc_destorg) / (_pitch)) < viewheight); uint32_t bg = *srcdest; uint32_t red = RPART(bg) * 3 / 4; uint32_t green = GPART(bg) * 3 / 4; uint32_t blue = BPART(bg) * 3 / 4; *dest = 0xff000000 | (red << 16) | (green << 8) | blue; dest += pitch; fuzz += fuzzstep; fuzz %= FUZZTABLE; count--; if (count == 0) return; } bool lowerbounds = (yl + (count + fuzzstep - 1) * fuzzstep > _fuzzviewheight); if (lowerbounds) count--; // Fuzz where fuzzoffset stays within bounds while (count > 0) { int available = (FUZZTABLE - fuzz); int next_wrap = available / fuzzstep; if (available % fuzzstep != 0) next_wrap++; int cnt = MIN(count, next_wrap); count -= cnt; do { uint32_t *srcdest = dest + fuzzoffset[fuzz] * fuzzstep; //assert(static_cast((srcdest - (uint32_t*)dc_destorg) / (_pitch)) < viewheight); uint32_t bg = *srcdest; uint32_t red = RPART(bg) * 3 / 4; uint32_t green = GPART(bg) * 3 / 4; uint32_t blue = BPART(bg) * 3 / 4; *dest = 0xff000000 | (red << 16) | (green << 8) | blue; dest += pitch; fuzz += fuzzstep; } while (--cnt); fuzz %= FUZZTABLE; } // Handle the case where we would go out of bounds at the bottom if (lowerbounds) { uint32_t *srcdest = dest + fuzzoffset[fuzz] * fuzzstep - pitch; //assert(static_cast((srcdest - (uint32_t*)dc_destorg) / (_pitch)) < viewheight); uint32_t bg = *srcdest; uint32_t red = RPART(bg) * 3 / 4; uint32_t green = GPART(bg) * 3 / 4; uint32_t blue = BPART(bg) * 3 / 4; *dest = 0xff000000 | (red << 16) | (green << 8) | blue; } } FString DebugInfo() override { return "DrawFuzzColumn"; } }; class FillSpanRGBACommand : public DrawerCommand { int _x1; int _x2; int _y; BYTE * RESTRICT _destorg; fixed_t _light; int _color; public: FillSpanRGBACommand() { _x1 = ds_x1; _x2 = ds_x2; _y = ds_y; _destorg = dc_destorg; _light = ds_light; _color = ds_color; } void Execute(DrawerThread *thread) override { if (thread->line_skipped_by_thread(_y)) return; uint32_t *dest = ylookup[_y] + _x1 + (uint32_t*)_destorg; int count = (_x2 - _x1 + 1); uint32_t light = LightBgra::calc_light_multiplier(_light); uint32_t color = LightBgra::shade_pal_index_simple(_color, light); for (int i = 0; i < count; i++) dest[i] = color; } FString DebugInfo() override { return "FillSpan"; } }; ///////////////////////////////////////////////////////////////////////////// class DrawSlabRGBACommand : public DrawerCommand { int _dx; fixed_t _v; int _dy; fixed_t _vi; const BYTE *_voxelptr; uint32_t *_p; ShadeConstants _shade_constants; const BYTE *_colormap; fixed_t _light; int _pitch; int _start_y; public: DrawSlabRGBACommand(int dx, fixed_t v, int dy, fixed_t vi, const BYTE *vptr, BYTE *p, ShadeConstants shade_constants, const BYTE *colormap, fixed_t light) { _dx = dx; _v = v; _dy = dy; _vi = vi; _voxelptr = vptr; _p = (uint32_t *)p; _shade_constants = shade_constants; _colormap = colormap; _light = light; _pitch = dc_pitch; _start_y = static_cast((p - dc_destorg) / (dc_pitch * 4)); assert(dx > 0); } void Execute(DrawerThread *thread) override { int dx = _dx; fixed_t v = _v; int dy = _dy; fixed_t vi = _vi; const BYTE *vptr = _voxelptr; uint32_t *p = _p; ShadeConstants shade_constants = _shade_constants; const BYTE *colormap = _colormap; uint32_t light = LightBgra::calc_light_multiplier(_light); int pitch = _pitch; int x; dy = thread->count_for_thread(_start_y, dy); p = thread->dest_for_thread(_start_y, pitch, p); v += vi * thread->skipped_by_thread(_start_y); vi *= thread->num_cores; pitch *= thread->num_cores; if (dx == 1) { while (dy > 0) { *p = LightBgra::shade_pal_index(colormap[vptr[v >> FRACBITS]], light, shade_constants); p += pitch; v += vi; dy--; } } else if (dx == 2) { while (dy > 0) { uint32_t color = LightBgra::shade_pal_index(colormap[vptr[v >> FRACBITS]], light, shade_constants); p[0] = color; p[1] = color; p += pitch; v += vi; dy--; } } else if (dx == 3) { while (dy > 0) { uint32_t color = LightBgra::shade_pal_index(colormap[vptr[v >> FRACBITS]], light, shade_constants); p[0] = color; p[1] = color; p[2] = color; p += pitch; v += vi; dy--; } } else if (dx == 4) { while (dy > 0) { uint32_t color = LightBgra::shade_pal_index(colormap[vptr[v >> FRACBITS]], light, shade_constants); p[0] = color; p[1] = color; p[2] = color; p[3] = color; p += pitch; v += vi; dy--; } } else while (dy > 0) { uint32_t color = LightBgra::shade_pal_index(colormap[vptr[v >> FRACBITS]], light, shade_constants); // The optimizer will probably turn this into a memset call. // Since dx is not likely to be large, I'm not sure that's a good thing, // hence the alternatives above. for (x = 0; x < dx; x++) { p[x] = color; } p += pitch; v += vi; dy--; } } FString DebugInfo() override { return "DrawSlab"; } }; ///////////////////////////////////////////////////////////////////////////// class DrawFogBoundaryLineRGBACommand : public DrawerCommand { int _y; int _x; int _x2; BYTE * RESTRICT _destorg; fixed_t _light; ShadeConstants _shade_constants; public: DrawFogBoundaryLineRGBACommand(int y, int x, int x2) { _y = y; _x = x; _x2 = x2; _destorg = dc_destorg; _light = dc_light; _shade_constants = dc_shade_constants; } void Execute(DrawerThread *thread) override { if (thread->line_skipped_by_thread(_y)) return; int y = _y; int x = _x; int x2 = _x2; uint32_t *dest = ylookup[y] + (uint32_t*)_destorg; uint32_t light = LightBgra::calc_light_multiplier(_light); ShadeConstants constants = _shade_constants; do { uint32_t red = (dest[x] >> 16) & 0xff; uint32_t green = (dest[x] >> 8) & 0xff; uint32_t blue = dest[x] & 0xff; if (constants.simple_shade) { red = red * light / 256; green = green * light / 256; blue = blue * light / 256; } else { uint32_t inv_light = 256 - light; uint32_t inv_desaturate = 256 - constants.desaturate; uint32_t intensity = ((red * 77 + green * 143 + blue * 37) >> 8) * constants.desaturate; red = (red * inv_desaturate + intensity) / 256; green = (green * inv_desaturate + intensity) / 256; blue = (blue * inv_desaturate + intensity) / 256; red = (constants.fade_red * inv_light + red * light) / 256; green = (constants.fade_green * inv_light + green * light) / 256; blue = (constants.fade_blue * inv_light + blue * light) / 256; red = (red * constants.light_red) / 256; green = (green * constants.light_green) / 256; blue = (blue * constants.light_blue) / 256; } dest[x] = 0xff000000 | (red << 16) | (green << 8) | blue; } while (++x <= x2); } FString DebugInfo() override { return "DrawFogBoundaryLine"; } }; class DrawTiltedSpanRGBACommand : public DrawerCommand { int _x1; int _x2; int _y; BYTE * RESTRICT _destorg; fixed_t _light; ShadeConstants _shade_constants; FVector3 _plane_sz; FVector3 _plane_su; FVector3 _plane_sv; bool _plane_shade; int _planeshade; float _planelightfloat; fixed_t _pviewx; fixed_t _pviewy; int _xbits; int _ybits; const uint32_t * RESTRICT _source; public: DrawTiltedSpanRGBACommand(int y, int x1, int x2, const FVector3 &plane_sz, const FVector3 &plane_su, const FVector3 &plane_sv, bool plane_shade, int planeshade, float planelightfloat, fixed_t pviewx, fixed_t pviewy) { _x1 = x1; _x2 = x2; _y = y; _destorg = dc_destorg; _light = ds_light; _shade_constants = ds_shade_constants; _plane_sz = plane_sz; _plane_su = plane_su; _plane_sv = plane_sv; _plane_shade = plane_shade; _planeshade = planeshade; _planelightfloat = planelightfloat; _pviewx = pviewx; _pviewy = pviewy; _source = (const uint32_t*)ds_source; _xbits = ds_xbits; _ybits = ds_ybits; } void Execute(DrawerThread *thread) override { if (thread->line_skipped_by_thread(_y)) return; //#define SPANSIZE 32 //#define INVSPAN 0.03125f //#define SPANSIZE 8 //#define INVSPAN 0.125f #define SPANSIZE 16 #define INVSPAN 0.0625f int source_width = 1 << _xbits; int source_height = 1 << _ybits; uint32_t *dest = ylookup[_y] + _x1 + (uint32_t*)_destorg; int count = _x2 - _x1 + 1; // Depth (Z) change across the span double iz = _plane_sz[2] + _plane_sz[1] * (centery - _y) + _plane_sz[0] * (_x1 - centerx); // Light change across the span fixed_t lightstart = _light; fixed_t lightend = lightstart; if (_plane_shade) { double vis_start = iz * _planelightfloat; double vis_end = (iz + _plane_sz[0] * count) * _planelightfloat; lightstart = LIGHTSCALE(vis_start, _planeshade); lightend = LIGHTSCALE(vis_end, _planeshade); } fixed_t light = lightstart; fixed_t steplight = (lightend - lightstart) / count; // Texture coordinates double uz = _plane_su[2] + _plane_su[1] * (centery - _y) + _plane_su[0] * (_x1 - centerx); double vz = _plane_sv[2] + _plane_sv[1] * (centery - _y) + _plane_sv[0] * (_x1 - centerx); double startz = 1.f / iz; double startu = uz*startz; double startv = vz*startz; double izstep = _plane_sz[0] * SPANSIZE; double uzstep = _plane_su[0] * SPANSIZE; double vzstep = _plane_sv[0] * SPANSIZE; // Linear interpolate in sizes of SPANSIZE to increase speed while (count >= SPANSIZE) { iz += izstep; uz += uzstep; vz += vzstep; double endz = 1.f / iz; double endu = uz*endz; double endv = vz*endz; uint32_t stepu = (uint32_t)(SQWORD((endu - startu) * INVSPAN)); uint32_t stepv = (uint32_t)(SQWORD((endv - startv) * INVSPAN)); uint32_t u = (uint32_t)(SQWORD(startu) + _pviewx); uint32_t v = (uint32_t)(SQWORD(startv) + _pviewy); for (int i = 0; i < SPANSIZE; i++) { uint32_t sx = ((u >> 16) * source_width) >> 16; uint32_t sy = ((v >> 16) * source_height) >> 16; uint32_t fg = _source[sy + sx * source_height]; if (_shade_constants.simple_shade) *(dest++) = LightBgra::shade_bgra_simple(fg, LightBgra::calc_light_multiplier(light)); else *(dest++) = LightBgra::shade_bgra(fg, LightBgra::calc_light_multiplier(light), _shade_constants); u += stepu; v += stepv; light += steplight; } startu = endu; startv = endv; count -= SPANSIZE; } // The last few pixels at the end while (count > 0) { double endz = 1.f / iz; startu = uz*endz; startv = vz*endz; uint32_t u = (uint32_t)(SQWORD(startu) + _pviewx); uint32_t v = (uint32_t)(SQWORD(startv) + _pviewy); uint32_t sx = ((u >> 16) * source_width) >> 16; uint32_t sy = ((v >> 16) * source_height) >> 16; uint32_t fg = _source[sy + sx * source_height]; if (_shade_constants.simple_shade) *(dest++) = LightBgra::shade_bgra_simple(fg, LightBgra::calc_light_multiplier(light)); else *(dest++) = LightBgra::shade_bgra(fg, LightBgra::calc_light_multiplier(light), _shade_constants); iz += _plane_sz[0]; uz += _plane_su[0]; vz += _plane_sv[0]; light += steplight; count--; } } FString DebugInfo() override { return "DrawTiltedSpan"; } }; class DrawColoredSpanRGBACommand : public DrawerCommand { int _y; int _x1; int _x2; BYTE * RESTRICT _destorg; fixed_t _light; int _color; public: DrawColoredSpanRGBACommand(int y, int x1, int x2) { _y = y; _x1 = x1; _x2 = x2; _destorg = dc_destorg; _light = ds_light; _color = ds_color; } void Execute(DrawerThread *thread) override { if (thread->line_skipped_by_thread(_y)) return; int y = _y; int x1 = _x1; int x2 = _x2; uint32_t *dest = ylookup[y] + x1 + (uint32_t*)_destorg; int count = (x2 - x1 + 1); uint32_t light = LightBgra::calc_light_multiplier(_light); uint32_t color = LightBgra::shade_pal_index_simple(_color, light); for (int i = 0; i < count; i++) dest[i] = color; } FString DebugInfo() override { return "DrawColoredSpan"; } }; class FillTransColumnRGBACommand : public DrawerCommand { int _x; int _y1; int _y2; int _color; int _a; BYTE * RESTRICT _destorg; int _pitch; fixed_t _light; public: FillTransColumnRGBACommand(int x, int y1, int y2, int color, int a) { _x = x; _y1 = y1; _y2 = y2; _color = color; _a = a; _destorg = dc_destorg; _pitch = dc_pitch; } void Execute(DrawerThread *thread) override { int x = _x; int y1 = _y1; int y2 = _y2; int color = _color; int a = _a; int ycount = thread->count_for_thread(y1, y2 - y1 + 1); if (ycount <= 0) return; uint32_t fg = GPalette.BaseColors[color].d; uint32_t fg_red = (fg >> 16) & 0xff; uint32_t fg_green = (fg >> 8) & 0xff; uint32_t fg_blue = fg & 0xff; uint32_t alpha = a + 1; uint32_t inv_alpha = 256 - alpha; fg_red *= alpha; fg_green *= alpha; fg_blue *= alpha; int spacing = _pitch * thread->num_cores; uint32_t *dest = thread->dest_for_thread(y1, _pitch, ylookup[y1] + x + (uint32_t*)_destorg); for (int y = 0; y < ycount; y++) { uint32_t bg_red = (*dest >> 16) & 0xff; uint32_t bg_green = (*dest >> 8) & 0xff; uint32_t bg_blue = (*dest) & 0xff; uint32_t red = (fg_red + bg_red * inv_alpha) / 256; uint32_t green = (fg_green + bg_green * inv_alpha) / 256; uint32_t blue = (fg_blue + bg_blue * inv_alpha) / 256; *dest = 0xff000000 | (red << 16) | (green << 8) | blue; dest += spacing; } } FString DebugInfo() override { return "FillTransColumn"; } }; ApplySpecialColormapRGBACommand::ApplySpecialColormapRGBACommand(FSpecialColormap *colormap, DFrameBuffer *screen) { buffer = screen->GetBuffer(); pitch = screen->GetPitch(); width = screen->GetWidth(); height = screen->GetHeight(); start_red = (int)(colormap->ColorizeStart[0] * 255); start_green = (int)(colormap->ColorizeStart[1] * 255); start_blue = (int)(colormap->ColorizeStart[2] * 255); end_red = (int)(colormap->ColorizeEnd[0] * 255); end_green = (int)(colormap->ColorizeEnd[1] * 255); end_blue = (int)(colormap->ColorizeEnd[2] * 255); } #ifdef NO_SSE void ApplySpecialColormapRGBACommand::Execute(DrawerThread *thread) { int y = thread->skipped_by_thread(0); int count = thread->count_for_thread(0, height); while (count > 0) { BYTE *pixels = buffer + y * pitch * 4; for (int x = 0; x < width; x++) { int fg_red = pixels[2]; int fg_green = pixels[1]; int fg_blue = pixels[0]; int gray = (fg_red * 77 + fg_green * 143 + fg_blue * 37) >> 8; gray += (gray >> 7); // gray*=256/255 int inv_gray = 256 - gray; int red = clamp((start_red * inv_gray + end_red * gray) >> 8, 0, 255); int green = clamp((start_green * inv_gray + end_green * gray) >> 8, 0, 255); int blue = clamp((start_blue * inv_gray + end_blue * gray) >> 8, 0, 255); pixels[0] = (BYTE)blue; pixels[1] = (BYTE)green; pixels[2] = (BYTE)red; pixels[3] = 0xff; pixels += 4; } y += thread->num_cores; count--; } } #else void ApplySpecialColormapRGBACommand::Execute(DrawerThread *thread) { int y = thread->skipped_by_thread(0); int count = thread->count_for_thread(0, height); __m128i gray_weight = _mm_set_epi16(256, 77, 143, 37, 256, 77, 143, 37); __m128i start_end = _mm_set_epi16(255, start_red, start_green, start_blue, 255, end_red, end_green, end_blue); while (count > 0) { BYTE *pixels = buffer + y * pitch * 4; int sse_length = width / 4; for (int x = 0; x < sse_length; x++) { // Unpack to integers: __m128i p = _mm_loadu_si128((const __m128i*)pixels); __m128i p16_0 = _mm_unpacklo_epi8(p, _mm_setzero_si128()); __m128i p16_1 = _mm_unpackhi_epi8(p, _mm_setzero_si128()); // Add gray weighting to colors __m128i mullo0 = _mm_mullo_epi16(p16_0, gray_weight); __m128i mullo1 = _mm_mullo_epi16(p16_1, gray_weight); __m128i p32_0 = _mm_unpacklo_epi16(mullo0, _mm_setzero_si128()); __m128i p32_1 = _mm_unpackhi_epi16(mullo0, _mm_setzero_si128()); __m128i p32_2 = _mm_unpacklo_epi16(mullo1, _mm_setzero_si128()); __m128i p32_3 = _mm_unpackhi_epi16(mullo1, _mm_setzero_si128()); // Transpose to get color components in individual vectors: __m128 tmpx = _mm_castsi128_ps(p32_0); __m128 tmpy = _mm_castsi128_ps(p32_1); __m128 tmpz = _mm_castsi128_ps(p32_2); __m128 tmpw = _mm_castsi128_ps(p32_3); _MM_TRANSPOSE4_PS(tmpx, tmpy, tmpz, tmpw); __m128i blue = _mm_castps_si128(tmpx); __m128i green = _mm_castps_si128(tmpy); __m128i red = _mm_castps_si128(tmpz); __m128i alpha = _mm_castps_si128(tmpw); // Calculate gray and 256-gray values: __m128i gray = _mm_srli_epi32(_mm_add_epi32(_mm_add_epi32(red, green), blue), 8); __m128i inv_gray = _mm_sub_epi32(_mm_set1_epi32(256), gray); // p32 = start * inv_gray + end * gray: __m128i gray0 = _mm_shuffle_epi32(gray, _MM_SHUFFLE(0, 0, 0, 0)); __m128i gray1 = _mm_shuffle_epi32(gray, _MM_SHUFFLE(1, 1, 1, 1)); __m128i gray2 = _mm_shuffle_epi32(gray, _MM_SHUFFLE(2, 2, 2, 2)); __m128i gray3 = _mm_shuffle_epi32(gray, _MM_SHUFFLE(3, 3, 3, 3)); __m128i inv_gray0 = _mm_shuffle_epi32(inv_gray, _MM_SHUFFLE(0, 0, 0, 0)); __m128i inv_gray1 = _mm_shuffle_epi32(inv_gray, _MM_SHUFFLE(1, 1, 1, 1)); __m128i inv_gray2 = _mm_shuffle_epi32(inv_gray, _MM_SHUFFLE(2, 2, 2, 2)); __m128i inv_gray3 = _mm_shuffle_epi32(inv_gray, _MM_SHUFFLE(3, 3, 3, 3)); __m128i gray16_0 = _mm_packs_epi32(gray0, inv_gray0); __m128i gray16_1 = _mm_packs_epi32(gray1, inv_gray1); __m128i gray16_2 = _mm_packs_epi32(gray2, inv_gray2); __m128i gray16_3 = _mm_packs_epi32(gray3, inv_gray3); __m128i gray16_0_mullo = _mm_mullo_epi16(gray16_0, start_end); __m128i gray16_1_mullo = _mm_mullo_epi16(gray16_1, start_end); __m128i gray16_2_mullo = _mm_mullo_epi16(gray16_2, start_end); __m128i gray16_3_mullo = _mm_mullo_epi16(gray16_3, start_end); __m128i gray16_0_mulhi = _mm_mulhi_epi16(gray16_0, start_end); __m128i gray16_1_mulhi = _mm_mulhi_epi16(gray16_1, start_end); __m128i gray16_2_mulhi = _mm_mulhi_epi16(gray16_2, start_end); __m128i gray16_3_mulhi = _mm_mulhi_epi16(gray16_3, start_end); p32_0 = _mm_srli_epi32(_mm_add_epi32(_mm_unpacklo_epi16(gray16_0_mullo, gray16_0_mulhi), _mm_unpackhi_epi16(gray16_0_mullo, gray16_0_mulhi)), 8); p32_1 = _mm_srli_epi32(_mm_add_epi32(_mm_unpacklo_epi16(gray16_1_mullo, gray16_1_mulhi), _mm_unpackhi_epi16(gray16_1_mullo, gray16_1_mulhi)), 8); p32_2 = _mm_srli_epi32(_mm_add_epi32(_mm_unpacklo_epi16(gray16_2_mullo, gray16_2_mulhi), _mm_unpackhi_epi16(gray16_2_mullo, gray16_2_mulhi)), 8); p32_3 = _mm_srli_epi32(_mm_add_epi32(_mm_unpacklo_epi16(gray16_3_mullo, gray16_3_mulhi), _mm_unpackhi_epi16(gray16_3_mullo, gray16_3_mulhi)), 8); p16_0 = _mm_packs_epi32(p32_0, p32_1); p16_1 = _mm_packs_epi32(p32_2, p32_3); p = _mm_packus_epi16(p16_0, p16_1); _mm_storeu_si128((__m128i*)pixels, p); pixels += 16; } for (int x = sse_length * 4; x < width; x++) { int fg_red = pixels[2]; int fg_green = pixels[1]; int fg_blue = pixels[0]; int gray = (fg_red * 77 + fg_green * 143 + fg_blue * 37) >> 8; gray += (gray >> 7); // gray*=256/255 int inv_gray = 256 - gray; int red = clamp((start_red * inv_gray + end_red * gray) >> 8, 0, 255); int green = clamp((start_green * inv_gray + end_green * gray) >> 8, 0, 255); int blue = clamp((start_blue * inv_gray + end_blue * gray) >> 8, 0, 255); pixels[0] = (BYTE)blue; pixels[1] = (BYTE)green; pixels[2] = (BYTE)red; pixels[3] = 0xff; pixels += 4; } y += thread->num_cores; count--; } } #endif ///////////////////////////////////////////////////////////////////////////// class DrawTrianglesCommand : public DrawerCommand { public: DrawTrianglesCommand(const VSMatrix &transform, const TriVertex *vertices, int count, int clipleft, int clipright, const short *clipdata, const uint32_t *texturePixels, int textureWidth, int textureHeight) : transform(transform), vertices(vertices), count(count), clipleft(clipleft), clipright(clipright), clipdata(clipdata), texturePixels(texturePixels), textureWidth(textureWidth), textureHeight(textureHeight) { } void Execute(DrawerThread *thread) override { int cliplength = clipright - clipleft + 1; for (int i = 0; i < cliplength; i++) { thread->triangle_clip_top[clipleft + i] = clipdata[i]; thread->triangle_clip_bottom[clipleft + i] = clipdata[cliplength + i]; } draw_triangles(transform, vertices, count, clipleft, clipright, thread->triangle_clip_top, thread->triangle_clip_bottom, thread); } FString DebugInfo() override { return "DrawTriangles"; } private: float gradx(float x0, float y0, float x1, float y1, float x2, float y2, float c0, float c1, float c2) { float top = (c1 - c2) * (y0 - y2) - (c0 - c2) * (y1 - y2); float bottom = (x1 - x2) * (y0 - y2) - (x0 - x2) * (y1 - y2); return top / bottom; } float grady(float x0, float y0, float x1, float y1, float x2, float y2, float c0, float c1, float c2) { float top = (c1 - c2) * (x0 - x2) - (c0 - c2) * (x1 - x2); float bottom = -((x1 - x2) * (y0 - y2) - (x0 - x2) * (y1 - y2)); return top / bottom; } void triangle(uint32_t *dest, int pitch, const TriVertex &v1, const TriVertex &v2, const TriVertex &v3, int clipleft, int clipright, const short *cliptop, const short *clipbottom, DrawerThread *thread) { // 28.4 fixed-point coordinates const int Y1 = (int)round(16.0f * v1.y); const int Y2 = (int)round(16.0f * v2.y); const int Y3 = (int)round(16.0f * v3.y); const int X1 = (int)round(16.0f * v1.x); const int X2 = (int)round(16.0f * v2.x); const int X3 = (int)round(16.0f * v3.x); // Deltas const int DX12 = X1 - X2; const int DX23 = X2 - X3; const int DX31 = X3 - X1; const int DY12 = Y1 - Y2; const int DY23 = Y2 - Y3; const int DY31 = Y3 - Y1; // Fixed-point deltas const int FDX12 = DX12 << 4; const int FDX23 = DX23 << 4; const int FDX31 = DX31 << 4; const int FDY12 = DY12 << 4; const int FDY23 = DY23 << 4; const int FDY31 = DY31 << 4; // Bounding rectangle int clipymin = cliptop[clipleft]; int clipymax = clipbottom[clipleft]; for (int i = clipleft + 1; i <= clipright; i++) { clipymin = MIN(clipymin, (int)cliptop[i]); clipymax = MAX(clipymax, (int)clipbottom[i]); } int minx = MAX((MIN(MIN(X1, X2), X3) + 0xF) >> 4, clipleft); int maxx = MIN((MAX(MAX(X1, X2), X3) + 0xF) >> 4, clipright); int miny = MAX((MIN(MIN(Y1, Y2), Y3) + 0xF) >> 4, clipymin); int maxy = MIN((MAX(MAX(Y1, Y2), Y3) + 0xF) >> 4, clipymax - 1); if (minx >= maxx || miny >= maxy) return; // Block size, standard 8x8 (must be power of two) const int q = 8; // Start in corner of 8x8 block minx &= ~(q - 1); miny &= ~(q - 1); dest += miny * pitch; // Half-edge constants int C1 = DY12 * X1 - DX12 * Y1; int C2 = DY23 * X2 - DX23 * Y2; int C3 = DY31 * X3 - DX31 * Y3; // Correct for fill convention if (DY12 < 0 || (DY12 == 0 && DX12 > 0)) C1++; if (DY23 < 0 || (DY23 == 0 && DX23 > 0)) C2++; if (DY31 < 0 || (DY31 == 0 && DX31 > 0)) C3++; // Gradients float gradWX = gradx(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.w, v2.w, v3.w); float gradWY = grady(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.w, v2.w, v3.w); float startW = v1.w + gradWX * (minx - v1.x) + gradWY * (miny - v1.y); float gradVaryingX[TriVertex::NumVarying], gradVaryingY[TriVertex::NumVarying], startVarying[TriVertex::NumVarying]; for (int i = 0; i < TriVertex::NumVarying; i++) { gradVaryingX[i] = gradx(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.varying[i] * v1.w, v2.varying[i] * v2.w, v3.varying[i] * v3.w); gradVaryingY[i] = grady(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.varying[i] * v1.w, v2.varying[i] * v2.w, v3.varying[i] * v3.w); startVarying[i] = v1.varying[i] * v1.w + gradVaryingX[i] * (minx - v1.x) + gradVaryingY[i] * (miny - v1.y); } // Loop through blocks for (int y = miny; y < maxy; y += q) { for (int x = minx; x < maxx; x += q) { // Corners of block int x0 = x << 4; int x1 = (x + q - 1) << 4; int y0 = y << 4; int y1 = (y + q - 1) << 4; // Evaluate half-space functions bool a00 = C1 + DX12 * y0 - DY12 * x0 > 0; bool a10 = C1 + DX12 * y0 - DY12 * x1 > 0; bool a01 = C1 + DX12 * y1 - DY12 * x0 > 0; bool a11 = C1 + DX12 * y1 - DY12 * x1 > 0; int a = (a00 << 0) | (a10 << 1) | (a01 << 2) | (a11 << 3); bool b00 = C2 + DX23 * y0 - DY23 * x0 > 0; bool b10 = C2 + DX23 * y0 - DY23 * x1 > 0; bool b01 = C2 + DX23 * y1 - DY23 * x0 > 0; bool b11 = C2 + DX23 * y1 - DY23 * x1 > 0; int b = (b00 << 0) | (b10 << 1) | (b01 << 2) | (b11 << 3); bool c00 = C3 + DX31 * y0 - DY31 * x0 > 0; bool c10 = C3 + DX31 * y0 - DY31 * x1 > 0; bool c01 = C3 + DX31 * y1 - DY31 * x0 > 0; bool c11 = C3 + DX31 * y1 - DY31 * x1 > 0; int c = (c00 << 0) | (c10 << 1) | (c01 << 2) | (c11 << 3); // Skip block when outside an edge if (a == 0x0 || b == 0x0 || c == 0x0) continue; // Check if block needs clipping int clipcount = 0; for (int ix = x; ix < x + q; ix++) { clipcount += (clipleft > ix) || (clipright < ix) || (cliptop[ix] > y) || (clipbottom[ix] <= y + q - 1); } // Calculate varying variables for affine block float offx0 = (x - minx) + 0.5f; float offy0 = (y - miny) + 0.5f; float offx1 = offx0 + q; float offy1 = offy0 + q; float rcpWTL = 1.0f / (startW + offx0 * gradWX + offy0 * gradWY); float rcpWTR = 1.0f / (startW + offx1 * gradWX + offy0 * gradWY); float rcpWBL = 1.0f / (startW + offx0 * gradWX + offy1 * gradWY); float rcpWBR = 1.0f / (startW + offx1 * gradWX + offy1 * gradWY); float varyingTL[TriVertex::NumVarying]; float varyingTR[TriVertex::NumVarying]; float varyingBL[TriVertex::NumVarying]; float varyingBR[TriVertex::NumVarying]; for (int i = 0; i < TriVertex::NumVarying; i++) { varyingTL[i] = (startVarying[i] + offx0 * gradVaryingX[i] + offy0 * gradVaryingY[i]) * rcpWTL; varyingTR[i] = (startVarying[i] + offx1 * gradVaryingX[i] + offy0 * gradVaryingY[i]) * rcpWTR; varyingBL[i] = ((startVarying[i] + offx0 * gradVaryingX[i] + offy1 * gradVaryingY[i]) * rcpWBL - varyingTL[i]) * (1.0f / q); varyingBR[i] = ((startVarying[i] + offx1 * gradVaryingX[i] + offy1 * gradVaryingY[i]) * rcpWBR - varyingTR[i]) * (1.0f / q); } uint32_t *buffer = dest; // Accept whole block when totally covered if (a == 0xF && b == 0xF && c == 0xF && clipcount == 0) { for (int iy = 0; iy < q; iy++) { float varying[TriVertex::NumVarying], varyingStep[TriVertex::NumVarying]; for (int i = 0; i < TriVertex::NumVarying; i++) { varying[i] = varyingTL[i] + varyingBL[i] * iy; varyingStep[i] = (varyingTR[i] + varyingBR[i] * iy - varying[i]) * (1.0f / q); } if (!thread->skipped_by_thread(y + iy)) { for (int ix = x; ix < x + q; ix++) { uint32_t ufrac = (uint32_t)((varying[0] - floor(varying[0])) * 0x100000000LL); uint32_t vfrac = (uint32_t)((varying[1] - floor(varying[1])) * 0x100000000LL); //uint32_t light = (uint32_t)clamp(varying[2] * 255.0f + 0.5f, 0.0f, 255.0f); uint32_t upos = ((ufrac >> 16) * textureWidth) >> 16; uint32_t vpos = ((vfrac >> 16) * textureHeight) >> 16; uint32_t uvoffset = upos * textureHeight + vpos; buffer[ix] = texturePixels[uvoffset]; for (int i = 0; i < TriVertex::NumVarying; i++) varying[i] += varyingStep[i]; } } buffer += pitch; } } else // Partially covered block { int CY1 = C1 + DX12 * y0 - DY12 * x0; int CY2 = C2 + DX23 * y0 - DY23 * x0; int CY3 = C3 + DX31 * y0 - DY31 * x0; for (int iy = 0; iy < q; iy++) { int CX1 = CY1; int CX2 = CY2; int CX3 = CY3; float varying[TriVertex::NumVarying], varyingStep[TriVertex::NumVarying]; for (int i = 0; i < TriVertex::NumVarying; i++) { varying[i] = varyingTL[i] + varyingBL[i] * iy; varyingStep[i] = (varyingTR[i] + varyingBR[i] * iy - varying[i]) * (1.0f / q); } if (!thread->skipped_by_thread(y + iy)) { for (int ix = x; ix < x + q; ix++) { bool visible = ix >= clipleft && ix <= clipright && (cliptop[ix] <= y + iy) && (clipbottom[ix] > y + iy); if (CX1 > 0 && CX2 > 0 && CX3 > 0 && visible) { uint32_t ufrac = (uint32_t)((varying[0] - floor(varying[0])) * 0x100000000LL); uint32_t vfrac = (uint32_t)((varying[1] - floor(varying[1])) * 0x100000000LL); //uint32_t light = (uint32_t)clamp(varying[2] * 255.0f + 0.5f, 0.0f, 255.0f); uint32_t upos = ((ufrac >> 16) * textureWidth) >> 16; uint32_t vpos = ((vfrac >> 16) * textureHeight) >> 16; uint32_t uvoffset = upos * textureHeight + vpos; buffer[ix] = texturePixels[uvoffset]; } for (int i = 0; i < TriVertex::NumVarying; i++) varying[i] += varyingStep[i]; CX1 -= FDY12; CX2 -= FDY23; CX3 -= FDY31; } } CY1 += FDX12; CY2 += FDX23; CY3 += FDX31; buffer += pitch; } } } dest += q * pitch; } } bool cullhalfspace(float clipdistance1, float clipdistance2, float &t1, float &t2) { float d1 = clipdistance1 * (1.0f - t1) + clipdistance2 * t1; float d2 = clipdistance1 * (1.0f - t2) + clipdistance2 * t2; if (d1 < 0.0f && d2 < 0.0f) return true; if (d1 < 0.0f) t1 = MAX(-clipdistance1 / (clipdistance2 - clipdistance1), t1); if (d2 < 0.0f) t2 = MIN(1.0f + clipdistance2 / (clipdistance1 - clipdistance2), t2); return false; } void clipedge(const TriVertex &v1, const TriVertex &v2, TriVertex *clippedvert, int &numclipvert) { // Clip and cull so that the following is true for all vertices: // -v.w <= v.x <= v.w // -v.w <= v.y <= v.w // -v.w <= v.z <= v.w float t1 = 0.0f, t2 = 1.0f; bool culled = cullhalfspace(v1.x + v1.w, v2.x + v2.w, t1, t2) || cullhalfspace(v1.w - v1.x, v2.w - v2.x, t1, t2) || cullhalfspace(v1.y + v1.w, v2.y + v2.w, t1, t2) || cullhalfspace(v1.w - v1.y, v2.w - v2.y, t1, t2) || cullhalfspace(v1.z + v1.w, v2.z + v2.w, t1, t2) || cullhalfspace(v1.w - v1.z, v2.w - v2.z, t1, t2); if (culled) return; if (t1 == 0.0f) { clippedvert[numclipvert++] = v1; } else { auto &v = clippedvert[numclipvert++]; v.x = v1.x * (1.0f - t1) + v2.x * t1; v.y = v1.y * (1.0f - t1) + v2.y * t1; v.z = v1.z * (1.0f - t1) + v2.z * t1; v.w = v1.w * (1.0f - t1) + v2.w * t1; for (int i = 0; i < TriVertex::NumVarying; i++) v.varying[i] = v1.varying[i] * (1.0f - t1) + v2.varying[i] * t1; } if (t2 != 1.0f) { auto &v = clippedvert[numclipvert++]; v.x = v1.x * (1.0f - t2) + v2.x * t2; v.y = v1.y * (1.0f - t2) + v2.y * t2; v.z = v1.z * (1.0f - t2) + v2.z * t2; v.w = v1.w * (1.0f - t2) + v2.w * t2; for (int i = 0; i < TriVertex::NumVarying; i++) v.varying[i] = v1.varying[i] * (1.0f - t2) + v2.varying[i] * t2; } } void draw_triangles(const VSMatrix &transform, const TriVertex *vinput, int vcount, int clipleft, int clipright, const short *cliptop, const short *clipbottom, DrawerThread *thread) { for (int i = 0; i < vcount / 3; i++) { TriVertex vert[3]; // Vertex shader stuff: for (int j = 0; j < 3; j++) { auto &v = vert[j]; v = *(vinput++); // Apply transform to get world coordinates: const float *matrix = transform.get(); float vx = matrix[0 * 4 + 0] * v.x + matrix[1 * 4 + 0] * v.y + matrix[2 * 4 + 0] * v.z + matrix[3 * 4 + 0] * v.w; float vy = matrix[0 * 4 + 1] * v.x + matrix[1 * 4 + 1] * v.y + matrix[2 * 4 + 1] * v.z + matrix[3 * 4 + 1] * v.w; float vz = matrix[0 * 4 + 2] * v.x + matrix[1 * 4 + 2] * v.y + matrix[2 * 4 + 2] * v.z + matrix[3 * 4 + 2] * v.w; float vw = matrix[0 * 4 + 3] * v.x + matrix[1 * 4 + 3] * v.y + matrix[2 * 4 + 3] * v.z + matrix[3 * 4 + 3] * v.w; v.x = vx; v.y = vy; v.z = vz; v.w = vw; // The software renderer world to clip transform: double nearp = 5.0f; double farp = 65536.f; double tr_x = v.x - ViewPos.X; double tr_y = v.y - ViewPos.Y; double tr_z = v.z - ViewPos.Z; double tx = tr_x * ViewSin - tr_y * ViewCos; double tz = tr_x * ViewTanCos + tr_y * ViewTanSin; v.x = (float)tx * 0.5f; v.y = (float)tr_z * 0.5f; v.z = (float)((-tz * (farp + nearp) / (nearp - farp) + (2.0f * farp * nearp) / (nearp - farp))); v.w = (float)tz; } // Cull, clip and generate additional vertices as needed TriVertex clippedvert[6]; int numclipvert = 0; clipedge(vert[0], vert[1], clippedvert, numclipvert); clipedge(vert[1], vert[2], clippedvert, numclipvert); clipedge(vert[2], vert[0], clippedvert, numclipvert); // Map to 2D viewport: for (int j = 0; j < numclipvert; j++) { auto &v = clippedvert[j]; // Calculate normalized device coordinates: v.w = 1.0f / v.w; v.x *= v.w; v.y *= v.w; v.z *= v.w; // Apply viewport scale to get screen coordinates: v.x = (float)(CenterX + v.x * 2.0f * CenterX); v.y = (float)(CenterY - v.y * 2.0f * InvZtoScale); } // Draw screen triangles bool ccw = false; if (ccw) { for (int i = numclipvert; i > 1; i--) { triangle((uint32_t*)dc_destorg, dc_pitch, clippedvert[numclipvert - 1], clippedvert[i - 1], clippedvert[i - 2], clipleft, clipright, cliptop, clipbottom, thread); } } else { for (int i = 2; i < numclipvert; i++) { triangle((uint32_t*)dc_destorg, dc_pitch, clippedvert[0], clippedvert[i - 1], clippedvert[i], clipleft, clipright, cliptop, clipbottom, thread); } } } } VSMatrix transform; const TriVertex *vertices; int count; int clipleft; int clipright; const short *clipdata; const uint32_t *texturePixels; int textureWidth; int textureHeight; }; void R_DrawTriangles(const VSMatrix &transform, const TriVertex *vertices, int count, int clipleft, int clipright, const short *cliptop, const short *clipbottom, FTexture *texture) { if (clipright < clipleft || clipleft < 0 || clipright > MAXWIDTH) return; int cliplength = clipright - clipleft + 1; short *clipdata = (short*)DrawerCommandQueue::AllocMemory(cliplength * 2 * sizeof(short)); if (!clipdata) { DrawerCommandQueue::WaitForWorkers(); clipdata = (short*)DrawerCommandQueue::AllocMemory(cliplength * 2 * sizeof(short)); if (!clipdata) return; } for (int i = 0; i < cliplength; i++) clipdata[i] = cliptop[clipleft + i]; for (int i = 0; i < cliplength; i++) clipdata[cliplength + i] = clipbottom[clipleft + i]; DrawerCommandQueue::QueueCommand(transform, vertices, count, clipleft, clipright, clipdata, texture->GetPixelsBgra(), texture->GetWidth(), texture->GetHeight()); } ///////////////////////////////////////////////////////////////////////////// void R_DrawSingleSkyCol1_rgba(uint32_t solid_top, uint32_t solid_bottom) { DrawerCommandQueue::QueueCommand(solid_top, solid_bottom); } void R_DrawSingleSkyCol4_rgba(uint32_t solid_top, uint32_t solid_bottom) { DrawerCommandQueue::QueueCommand(solid_top, solid_bottom); } void R_DrawDoubleSkyCol1_rgba(uint32_t solid_top, uint32_t solid_bottom) { DrawerCommandQueue::QueueCommand(solid_top, solid_bottom); } void R_DrawDoubleSkyCol4_rgba(uint32_t solid_top, uint32_t solid_bottom) { DrawerCommandQueue::QueueCommand(solid_top, solid_bottom); } void R_DrawColumn_rgba() { DrawerCommandQueue::QueueCommand(); } void R_FillColumn_rgba() { DrawerCommandQueue::QueueCommand(); } void R_FillAddColumn_rgba() { DrawerCommandQueue::QueueCommand(); } void R_FillAddClampColumn_rgba() { DrawerCommandQueue::QueueCommand(); } void R_FillSubClampColumn_rgba() { DrawerCommandQueue::QueueCommand(); } void R_FillRevSubClampColumn_rgba() { DrawerCommandQueue::QueueCommand(); } void R_DrawFuzzColumn_rgba() { DrawerCommandQueue::QueueCommand(); dc_yl = MAX(dc_yl, 1); dc_yh = MIN(dc_yh, fuzzviewheight); if (dc_yl <= dc_yh) fuzzpos = (fuzzpos + dc_yh - dc_yl + 1) % FUZZTABLE; } void R_DrawAddColumn_rgba() { DrawerCommandQueue::QueueCommand(); } void R_DrawTranslatedColumn_rgba() { DrawerCommandQueue::QueueCommand(); } void R_DrawTlatedAddColumn_rgba() { DrawerCommandQueue::QueueCommand(); } void R_DrawShadedColumn_rgba() { DrawerCommandQueue::QueueCommand(); } void R_DrawAddClampColumn_rgba() { DrawerCommandQueue::QueueCommand(); } void R_DrawAddClampTranslatedColumn_rgba() { DrawerCommandQueue::QueueCommand(); } void R_DrawSubClampColumn_rgba() { DrawerCommandQueue::QueueCommand(); } void R_DrawSubClampTranslatedColumn_rgba() { DrawerCommandQueue::QueueCommand(); } void R_DrawRevSubClampColumn_rgba() { DrawerCommandQueue::QueueCommand(); } void R_DrawRevSubClampTranslatedColumn_rgba() { DrawerCommandQueue::QueueCommand(); } void R_DrawSpan_rgba() { DrawerCommandQueue::QueueCommand(); } void R_DrawSpanMasked_rgba() { DrawerCommandQueue::QueueCommand(); } void R_DrawSpanTranslucent_rgba() { DrawerCommandQueue::QueueCommand(); } void R_DrawSpanMaskedTranslucent_rgba() { DrawerCommandQueue::QueueCommand(); } void R_DrawSpanAddClamp_rgba() { DrawerCommandQueue::QueueCommand(); } void R_DrawSpanMaskedAddClamp_rgba() { DrawerCommandQueue::QueueCommand(); } void R_FillSpan_rgba() { DrawerCommandQueue::QueueCommand(); } void R_DrawTiltedSpan_rgba(int y, int x1, int x2, const FVector3 &plane_sz, const FVector3 &plane_su, const FVector3 &plane_sv, bool plane_shade, int planeshade, float planelightfloat, fixed_t pviewx, fixed_t pviewy) { DrawerCommandQueue::QueueCommand(y, x1, x2, plane_sz, plane_su, plane_sv, plane_shade, planeshade, planelightfloat, pviewx, pviewy); } void R_DrawColoredSpan_rgba(int y, int x1, int x2) { DrawerCommandQueue::QueueCommand(y, x1, x2); } static ShadeConstants slab_rgba_shade_constants; static const BYTE *slab_rgba_colormap; static fixed_t slab_rgba_light; void R_SetupDrawSlab_rgba(FSWColormap *base_colormap, float light, int shade) { slab_rgba_shade_constants.light_red = base_colormap->Color.r * 256 / 255; slab_rgba_shade_constants.light_green = base_colormap->Color.g * 256 / 255; slab_rgba_shade_constants.light_blue = base_colormap->Color.b * 256 / 255; slab_rgba_shade_constants.light_alpha = base_colormap->Color.a * 256 / 255; slab_rgba_shade_constants.fade_red = base_colormap->Fade.r; slab_rgba_shade_constants.fade_green = base_colormap->Fade.g; slab_rgba_shade_constants.fade_blue = base_colormap->Fade.b; slab_rgba_shade_constants.fade_alpha = base_colormap->Fade.a; slab_rgba_shade_constants.desaturate = MIN(abs(base_colormap->Desaturate), 255) * 255 / 256; slab_rgba_shade_constants.simple_shade = (base_colormap->Color.d == 0x00ffffff && base_colormap->Fade.d == 0x00000000 && base_colormap->Desaturate == 0); slab_rgba_colormap = base_colormap->Maps; slab_rgba_light = LIGHTSCALE(light, shade); } void R_DrawSlab_rgba(int dx, fixed_t v, int dy, fixed_t vi, const BYTE *vptr, BYTE *p) { DrawerCommandQueue::QueueCommand(dx, v, dy, vi, vptr, p, slab_rgba_shade_constants, slab_rgba_colormap, slab_rgba_light); } DWORD vlinec1_rgba() { DrawerCommandQueue::QueueCommand(); return dc_texturefrac + dc_count * dc_iscale; } void vlinec4_rgba() { DrawerCommandQueue::QueueCommand(); for (int i = 0; i < 4; i++) vplce[i] += vince[i] * dc_count; } DWORD mvlinec1_rgba() { DrawerCommandQueue::QueueCommand(); return dc_texturefrac + dc_count * dc_iscale; } void mvlinec4_rgba() { DrawerCommandQueue::QueueCommand(); for (int i = 0; i < 4; i++) vplce[i] += vince[i] * dc_count; } fixed_t tmvline1_add_rgba() { DrawerCommandQueue::QueueCommand(); return dc_texturefrac + dc_count * dc_iscale; } void tmvline4_add_rgba() { DrawerCommandQueue::QueueCommand(); for (int i = 0; i < 4; i++) vplce[i] += vince[i] * dc_count; } fixed_t tmvline1_addclamp_rgba() { DrawerCommandQueue::QueueCommand(); return dc_texturefrac + dc_count * dc_iscale; } void tmvline4_addclamp_rgba() { DrawerCommandQueue::QueueCommand(); for (int i = 0; i < 4; i++) vplce[i] += vince[i] * dc_count; } fixed_t tmvline1_subclamp_rgba() { DrawerCommandQueue::QueueCommand(); return dc_texturefrac + dc_count * dc_iscale; } void tmvline4_subclamp_rgba() { DrawerCommandQueue::QueueCommand(); for (int i = 0; i < 4; i++) vplce[i] += vince[i] * dc_count; } fixed_t tmvline1_revsubclamp_rgba() { DrawerCommandQueue::QueueCommand(); return dc_texturefrac + dc_count * dc_iscale; } void tmvline4_revsubclamp_rgba() { DrawerCommandQueue::QueueCommand(); for (int i = 0; i < 4; i++) vplce[i] += vince[i] * dc_count; } void R_DrawFogBoundarySection_rgba(int y, int y2, int x1) { for (; y < y2; ++y) { int x2 = spanend[y]; DrawerCommandQueue::QueueCommand(y, x1, x2); } } void R_DrawFogBoundary_rgba(int x1, int x2, short *uclip, short *dclip) { // To do: we do not need to create new spans when using rgba output - instead we should calculate light on a per pixel basis // This is essentially the same as R_MapVisPlane but with an extra step // to create new horizontal spans whenever the light changes enough that // we need to use a new colormap. double lightstep = rw_lightstep; double light = rw_light + rw_lightstep*(x2 - x1 - 1); int x = x2 - 1; int t2 = uclip[x]; int b2 = dclip[x]; int rcolormap = GETPALOOKUP(light, wallshade); int lcolormap; BYTE *basecolormapdata = basecolormap->Maps; if (b2 > t2) { clearbufshort(spanend + t2, b2 - t2, x); } R_SetColorMapLight(basecolormap, (float)light, wallshade); BYTE *fake_dc_colormap = basecolormap->Maps + (GETPALOOKUP(light, wallshade) << COLORMAPSHIFT); for (--x; x >= x1; --x) { int t1 = uclip[x]; int b1 = dclip[x]; const int xr = x + 1; int stop; light -= rw_lightstep; lcolormap = GETPALOOKUP(light, wallshade); if (lcolormap != rcolormap) { if (t2 < b2 && rcolormap != 0) { // Colormap 0 is always the identity map, so rendering it is // just a waste of time. R_DrawFogBoundarySection_rgba(t2, b2, xr); } if (t1 < t2) t2 = t1; if (b1 > b2) b2 = b1; if (t2 < b2) { clearbufshort(spanend + t2, b2 - t2, x); } rcolormap = lcolormap; R_SetColorMapLight(basecolormap, (float)light, wallshade); fake_dc_colormap = basecolormap->Maps + (GETPALOOKUP(light, wallshade) << COLORMAPSHIFT); } else { if (fake_dc_colormap != basecolormapdata) { stop = MIN(t1, b2); while (t2 < stop) { int y = t2++; DrawerCommandQueue::QueueCommand(y, xr, spanend[y]); } stop = MAX(b1, t2); while (b2 > stop) { int y = --b2; DrawerCommandQueue::QueueCommand(y, xr, spanend[y]); } } else { t2 = MAX(t2, MIN(t1, b2)); b2 = MIN(b2, MAX(b1, t2)); } stop = MIN(t2, b1); while (t1 < stop) { spanend[t1++] = x; } stop = MAX(b2, t2); while (b1 > stop) { spanend[--b1] = x; } } t2 = uclip[x]; b2 = dclip[x]; } if (t2 < b2 && rcolormap != 0) { R_DrawFogBoundarySection_rgba(t2, b2, x1); } }