diff --git a/src/r_draw.cpp b/src/r_draw.cpp index 578ca9646..682ed4668 100644 --- a/src/r_draw.cpp +++ b/src/r_draw.cpp @@ -154,6 +154,7 @@ int dc_yl; int dc_yh; fixed_t dc_iscale; fixed_t dc_texturefrac; +uint32_t dc_textureheight; int dc_color; // [RH] Color for column filler DWORD dc_srccolor; uint32_t dc_srccolor_bgra; @@ -177,6 +178,7 @@ fixed_t palookuplight[4]; const BYTE* bufplce[4]; const BYTE* bufplce2[4]; uint32_t buftexturefracx[4]; +uint32_t bufheight[4]; // just for profiling int dccount; @@ -1044,6 +1046,7 @@ int ds_ybits; // start of a floor/ceiling tile image const BYTE* ds_source; +bool ds_source_mipmapped; // just for profiling int dscount; @@ -1067,6 +1070,7 @@ extern "C" BYTE *ds_curcolormap, *ds_cursource, *ds_curtiltedsource; void R_SetSpanSource(FTexture *tex) { ds_source = r_swtruecolor ? (const BYTE*)tex->GetPixelsBgra() : tex->GetPixels(); + ds_source_mipmapped = tex->Mipmapped(); #ifdef X86_ASM if (!r_swtruecolor && ds_cursource != ds_source) { @@ -1644,8 +1648,6 @@ extern "C" void R_DrawSlabC(int dx, fixed_t v, int dy, fixed_t vi, const BYTE *v int vlinebits; int mvlinebits; -uint32_t vlinemax; -uint32_t mvlinemax; #ifndef X86_ASM static DWORD vlinec1 (); @@ -1695,12 +1697,11 @@ DWORD (*domvline1)() = mvlineasm1; void (*domvline4)() = mvlineasm4; #endif -void setupvline (int fracbits, int fracmax) +void setupvline (int fracbits) { if (r_swtruecolor) { vlinebits = fracbits; - vlinemax = fracmax; return; } @@ -1780,7 +1781,7 @@ void vlinec4 () } #endif -void setupmvline (int fracbits, int fracmax) +void setupmvline (int fracbits) { if (!r_swtruecolor) { @@ -1795,7 +1796,6 @@ void setupmvline (int fracbits, int fracmax) else { mvlinebits = fracbits; - mvlinemax = fracmax; } } @@ -1968,12 +1968,10 @@ void R_DrawFogBoundary_C (int x1, int x2, short *uclip, short *dclip) } int tmvlinebits; -uint32_t tmvlinemax; -void setuptmvline (int bits, int fracmax) +void setuptmvline (int bits) { tmvlinebits = bits; - tmvlinemax = fracmax; } fixed_t tmvline1_add_C () diff --git a/src/r_draw.h b/src/r_draw.h index 6a078b08f..591ae0b5f 100644 --- a/src/r_draw.h +++ b/src/r_draw.h @@ -61,6 +61,7 @@ extern "C" int dc_yh; extern "C" fixed_t dc_iscale; extern double dc_texturemid; extern "C" fixed_t dc_texturefrac; +extern "C" uint32_t dc_textureheight; extern "C" int dc_color; // [RH] For flat colors (no texturing) extern "C" DWORD dc_srccolor; extern "C" uint32_t dc_srccolor_bgra; @@ -84,6 +85,7 @@ extern "C" fixed_t palookuplight[4]; extern "C" const BYTE* bufplce[4]; extern "C" const BYTE* bufplce2[4]; extern "C" uint32_t buftexturefracx[4]; +extern "C" uint32_t bufheight[4]; // [RH] Temporary buffer for column drawing extern "C" BYTE *dc_temp; @@ -100,13 +102,13 @@ extern void (*R_DrawColumn)(void); extern DWORD (*dovline1) (); extern DWORD (*doprevline1) (); extern void (*dovline4) (); -extern void setupvline (int,int); +extern void setupvline (int); extern DWORD (*domvline1) (); extern void (*domvline4) (); -extern void setupmvline (int,int); +extern void setupmvline (int); -extern void setuptmvline (int,int); +extern void setuptmvline (int); // The Spectre/Invisibility effect. extern void (*R_DrawFuzzColumn)(void); @@ -316,6 +318,7 @@ extern "C" fixed_t ds_alpha; // start of a 64*64 tile image extern "C" const BYTE* ds_source; +extern "C" bool ds_source_mipmapped; extern "C" int ds_color; // [RH] For flat color (no texturing) @@ -381,8 +384,8 @@ void R_SetTranslationMap(lighttable_t *translation); extern bool r_swtruecolor; EXTERN_CVAR(Bool, r_multithreaded); -EXTERN_CVAR(Bool, r_magfilter_linear); -EXTERN_CVAR(Bool, r_minfilter_linear); +EXTERN_CVAR(Bool, r_magfilter); +EXTERN_CVAR(Bool, r_minfilter); EXTERN_CVAR(Bool, r_mipmap); #endif diff --git a/src/r_draw_rgba.cpp b/src/r_draw_rgba.cpp index 5a6e88e3b..2576cfeda 100644 --- a/src/r_draw_rgba.cpp +++ b/src/r_draw_rgba.cpp @@ -48,22 +48,22 @@ #endif #include -extern int vlinebits; -extern int mvlinebits; -extern int tmvlinebits; -extern uint32_t vlinemax; -extern uint32_t mvlinemax; -extern uint32_t tmvlinemax; - extern "C" short spanend[MAXHEIGHT]; extern float rw_light; extern float rw_lightstep; extern int wallshade; +// Use multiple threads when drawing CVAR(Bool, r_multithreaded, true, 0); -CVAR(Bool, r_magfilter_linear, false, 0); -CVAR(Bool, r_minfilter_linear, false, 0); -CVAR(Bool, r_mipmap, true, 0); + +// Use linear filtering when scaling up +CVAR(Bool, r_magfilter, false, CVAR_ARCHIVE | CVAR_GLOBALCONFIG); + +// Use linear filtering when scaling down +CVAR(Bool, r_minfilter, true, CVAR_ARCHIVE | CVAR_GLOBALCONFIG); + +// Use mipmapped textures +CVAR(Bool, r_mipmap, true, CVAR_ARCHIVE | CVAR_GLOBALCONFIG); #ifndef NO_SSE @@ -926,7 +926,7 @@ public: _source = (const uint32_t*)ds_source; _light = LightBgra::calc_light_multiplier(ds_light); _shade_constants = ds_shade_constants; - _nearest_filter = !SampleBgra::span_sampler_setup(_source, _xbits, _ybits, _xstep, _ystep); + _nearest_filter = !SampleBgra::span_sampler_setup(_source, _xbits, _ybits, _xstep, _ystep, ds_source_mipmapped); _srcalpha = dc_srcalpha >> (FRACBITS - 8); _destalpha = dc_destalpha >> (FRACBITS - 8); @@ -1354,8 +1354,7 @@ public: DWORD _texturefrac; uint32_t _texturefracx; DWORD _iscale; - int _vlinebits; - uint32_t _vlinemax; + uint32_t _textureheight; const uint32 * RESTRICT _source; const uint32 * RESTRICT _source2; @@ -1365,7 +1364,7 @@ public: uint32_t _srcalpha; uint32_t _destalpha; - DrawerWall1Command(int vlinebits, uint32_t vlinemax) + DrawerWall1Command() { _dest = dc_dest; _pitch = dc_pitch; @@ -1373,8 +1372,7 @@ public: _texturefrac = dc_texturefrac; _texturefracx = dc_texturefracx; _iscale = dc_iscale; - _vlinebits = vlinebits; - _vlinemax = vlinemax; + _textureheight = dc_textureheight; _source = (const uint32 *)dc_source; _source2 = (const uint32 *)dc_source2; @@ -1394,7 +1392,8 @@ public: uint32_t fracstep; uint32_t frac; uint32_t texturefracx; - int bits; + uint32_t height; + uint32_t half; LoopIterator(DrawerWall1Command *command, DrawerThread *thread) { @@ -1406,8 +1405,10 @@ public: frac = command->_texturefrac + command->_iscale * thread->skipped_by_thread(command->_dest_y); texturefracx = command->_texturefracx; dest = thread->dest_for_thread(command->_dest_y, command->_pitch, (uint32_t*)command->_dest); - bits = command->_vlinebits; pitch = command->_pitch * thread->num_cores; + + height = command->_textureheight; + half = (0x80000000 + height - 1) / height; } explicit operator bool() @@ -1417,7 +1418,7 @@ public: int sample_index() { - return frac >> bits; + return ((frac >> FRACBITS) * height) >> FRACBITS; } bool next() @@ -1435,12 +1436,11 @@ public: BYTE * RESTRICT _dest; int _count; int _pitch; - int _vlinebits; - uint32_t _vlinemax; ShadeConstants _shade_constants; uint32_t _vplce[4]; uint32_t _vince[4]; uint32_t _buftexturefracx[4]; + uint32_t _bufheight[4]; const uint32_t * RESTRICT _bufplce[4]; const uint32_t * RESTRICT _bufplce2[4]; uint32_t _light[4]; @@ -1448,19 +1448,18 @@ public: uint32_t _srcalpha; uint32_t _destalpha; - DrawerWall4Command(int vlinebits, uint32_t vlinemax) + DrawerWall4Command() { _dest = dc_dest; _count = dc_count; _pitch = dc_pitch; - _vlinebits = vlinebits; - _vlinemax = vlinemax; _shade_constants = dc_shade_constants; for (int i = 0; i < 4; i++) { _vplce[i] = vplce[i]; _vince[i] = vince[i]; _buftexturefracx[i] = buftexturefracx[i]; + _bufheight[i] = bufheight[i]; _bufplce[i] = (const uint32_t *)bufplce[i]; _bufplce2[i] = (const uint32_t *)bufplce2[i]; _light[i] = LightBgra::calc_light_multiplier(palookuplight[i]); @@ -1475,9 +1474,10 @@ public: uint32_t *dest; int pitch; int count; - int bits; uint32_t vplce[4]; uint32_t vince[4]; + uint32_t height[4]; + uint32_t half[4]; LoopIterator(DrawerWall4Command *command, DrawerThread *thread) { @@ -1487,13 +1487,14 @@ public: dest = thread->dest_for_thread(command->_dest_y, command->_pitch, (uint32_t*)command->_dest); pitch = command->_pitch * thread->num_cores; - bits = command->_vlinebits; int skipped = thread->skipped_by_thread(command->_dest_y); for (int i = 0; i < 4; i++) { vplce[i] = command->_vplce[i] + command->_vince[i] * skipped; vince[i] = command->_vince[i] * thread->num_cores; + height[i] = command->_bufheight[i]; + half[i] = (0x80000000 + height[i] - 1) / height[i]; } } @@ -1504,7 +1505,7 @@ public: int sample_index(int col) { - return vplce[col] >> bits; + return ((vplce[col] >> FRACBITS) * height[col]) >> FRACBITS; } bool next() @@ -1522,10 +1523,6 @@ public: class Vlinec1RGBACommand : public DrawerWall1Command { public: - Vlinec1RGBACommand() : DrawerWall1Command(vlinebits, vlinemax) - { - } - void Execute(DrawerThread *thread) override { LoopIterator loop(this, thread); @@ -1543,7 +1540,7 @@ public: { do { - uint32_t fg = LightBgra::shade_bgra(SampleBgra::sample_bilinear(_source, _source2, loop.texturefracx, loop.frac, loop.bits, _vlinemax), _light, _shade_constants); + uint32_t fg = LightBgra::shade_bgra(SampleBgra::sample_bilinear(_source, _source2, loop.texturefracx, loop.frac, loop.half, loop.height), _light, _shade_constants); *loop.dest = BlendBgra::copy(fg); } while (loop.next()); } @@ -1553,10 +1550,6 @@ public: class Vlinec4RGBACommand : public DrawerWall4Command { public: - Vlinec4RGBACommand() : DrawerWall4Command(vlinebits, vlinemax) - { - } - void Execute(DrawerThread *thread) override { LoopIterator loop(this, thread); @@ -1579,7 +1572,7 @@ public: { for (int i = 0; i < 4; i++) { - uint32_t fg = LightBgra::shade_bgra(SampleBgra::sample_bilinear(_bufplce[i], _bufplce2[i], _buftexturefracx[i], loop.sample_index(i), loop.bits, _vlinemax), _light[i], _shade_constants); + uint32_t fg = LightBgra::shade_bgra(SampleBgra::sample_bilinear(_bufplce[i], _bufplce2[i], _buftexturefracx[i], loop.sample_index(i), loop.half[i], loop.height[i]), _light[i], _shade_constants); loop.dest[i] = BlendBgra::copy(fg); } } while (loop.next()); @@ -1590,10 +1583,6 @@ public: class Mvlinec1RGBACommand : public DrawerWall1Command { public: - Mvlinec1RGBACommand() : DrawerWall1Command(mvlinebits, mvlinemax) - { - } - void Execute(DrawerThread *thread) override { LoopIterator loop(this, thread); @@ -1611,7 +1600,7 @@ public: { do { - uint32_t fg = LightBgra::shade_bgra(SampleBgra::sample_bilinear(_source, _source2, loop.texturefracx, loop.frac, loop.bits, _vlinemax), _light, _shade_constants); + uint32_t fg = LightBgra::shade_bgra(SampleBgra::sample_bilinear(_source, _source2, loop.texturefracx, loop.frac, loop.half, loop.height), _light, _shade_constants); *loop.dest = BlendBgra::alpha_blend(fg, *loop.dest); } while (loop.next()); } @@ -1621,10 +1610,6 @@ public: class Mvlinec4RGBACommand : public DrawerWall4Command { public: - Mvlinec4RGBACommand(): DrawerWall4Command(mvlinebits, mvlinemax) - { - } - void Execute(DrawerThread *thread) override { LoopIterator loop(this, thread); @@ -1647,7 +1632,7 @@ public: { for (int i = 0; i < 4; i++) { - uint32_t fg = LightBgra::shade_bgra(SampleBgra::sample_bilinear(_bufplce[i], _bufplce2[i], _buftexturefracx[i], loop.sample_index(i), loop.bits, _vlinemax), _light[i], _shade_constants); + uint32_t fg = LightBgra::shade_bgra(SampleBgra::sample_bilinear(_bufplce[i], _bufplce2[i], _buftexturefracx[i], loop.sample_index(i), loop.half[i], loop.height[i]), _light[i], _shade_constants); loop.dest[i] = BlendBgra::alpha_blend(fg, loop.dest[i]); } } while (loop.next()); @@ -1658,10 +1643,6 @@ public: class Tmvline1AddRGBACommand : public DrawerWall1Command { public: - Tmvline1AddRGBACommand() : DrawerWall1Command(tmvlinebits, tmvlinemax) - { - } - void Execute(DrawerThread *thread) override { LoopIterator loop(this, thread); @@ -1677,10 +1658,6 @@ public: class Tmvline4AddRGBACommand : public DrawerWall4Command { public: - Tmvline4AddRGBACommand() : DrawerWall4Command(tmvlinebits, tmvlinemax) - { - } - void Execute(DrawerThread *thread) override { LoopIterator loop(this, thread); @@ -1699,10 +1676,6 @@ public: class Tmvline1AddClampRGBACommand : public DrawerWall1Command { public: - Tmvline1AddClampRGBACommand() : DrawerWall1Command(tmvlinebits, tmvlinemax) - { - } - void Execute(DrawerThread *thread) override { LoopIterator loop(this, thread); @@ -1718,10 +1691,6 @@ public: class Tmvline4AddClampRGBACommand : public DrawerWall4Command { public: - Tmvline4AddClampRGBACommand() : DrawerWall4Command(tmvlinebits, tmvlinemax) - { - } - void Execute(DrawerThread *thread) override { LoopIterator loop(this, thread); @@ -1740,10 +1709,6 @@ public: class Tmvline1SubClampRGBACommand : public DrawerWall1Command { public: - Tmvline1SubClampRGBACommand() : DrawerWall1Command(tmvlinebits, tmvlinemax) - { - } - void Execute(DrawerThread *thread) override { LoopIterator loop(this, thread); @@ -1759,10 +1724,6 @@ public: class Tmvline4SubClampRGBACommand : public DrawerWall4Command { public: - Tmvline4SubClampRGBACommand() : DrawerWall4Command(tmvlinebits, tmvlinemax) - { - } - void Execute(DrawerThread *thread) override { LoopIterator loop(this, thread); @@ -1781,10 +1742,6 @@ public: class Tmvline1RevSubClampRGBACommand : public DrawerWall1Command { public: - Tmvline1RevSubClampRGBACommand() : DrawerWall1Command(tmvlinebits, tmvlinemax) - { - } - void Execute(DrawerThread *thread) override { LoopIterator loop(this, thread); @@ -1800,10 +1757,6 @@ public: class Tmvline4RevSubClampRGBACommand : public DrawerWall4Command { public: - Tmvline4RevSubClampRGBACommand() : DrawerWall4Command(tmvlinebits, tmvlinemax) - { - } - void Execute(DrawerThread *thread) override { LoopIterator loop(this, thread); @@ -2362,17 +2315,8 @@ void R_DrawSlab_rgba(int dx, fixed_t v, int dy, fixed_t vi, const BYTE *vptr, BY DrawerCommandQueue::QueueCommand(dx, v, dy, vi, vptr, p, slab_rgba_shade_constants, slab_rgba_colormap, slab_rgba_light); } -//extern FTexture *rw_pic; // For the asserts below - DWORD vlinec1_rgba() { - /*DWORD fracstep = dc_iscale; - DWORD frac = dc_texturefrac; - DWORD height = rw_pic->GetHeight(); - assert((frac >> vlinebits) < height); - frac += (dc_count-1) * fracstep; - assert((frac >> vlinebits) <= height);*/ - DrawerCommandQueue::QueueCommand(); return dc_texturefrac + dc_count * dc_iscale; } diff --git a/src/r_draw_rgba.h b/src/r_draw_rgba.h index 56f1faa24..a60fd65c7 100644 --- a/src/r_draw_rgba.h +++ b/src/r_draw_rgba.h @@ -459,7 +459,7 @@ public: class SampleBgra { public: - inline static bool span_sampler_setup(const uint32_t * RESTRICT &source, int &xbits, int &ybits, fixed_t xstep, fixed_t ystep) + inline static bool span_sampler_setup(const uint32_t * RESTRICT &source, int &xbits, int &ybits, fixed_t xstep, fixed_t ystep, bool mipmapped) { // Is this a magfilter or minfilter? fixed_t xmagnitude = abs(xstep) >> (32 - xbits - FRACBITS); @@ -467,7 +467,7 @@ public: fixed_t magnitude = (xmagnitude + ymagnitude) * 2 + (1 << (FRACBITS - 1)); bool magnifying = (magnitude >> FRACBITS == 0); - if (r_mipmap) + if (r_mipmap && mipmapped) { int level = magnitude >> (FRACBITS + 1); while (level != 0) @@ -482,18 +482,15 @@ public: } } - return (magnifying && r_magfilter_linear) || (!magnifying && r_minfilter_linear); + return (magnifying && r_magfilter) || (!magnifying && r_minfilter); } - FORCEINLINE static uint32_t sample_bilinear(const uint32_t *col0, const uint32_t *col1, uint32_t texturefracx, uint32_t texturefracy, int ybits, uint32_t ymax) + FORCEINLINE static uint32_t sample_bilinear(const uint32_t *col0, const uint32_t *col1, uint32_t texturefracx, uint32_t texturefracy, uint32_t half, uint32_t height) { - uint32_t half = 1 << (ybits - 1); - uint32_t y0 = (texturefracy - half) >> ybits; - if (y0 > ymax) - y0 = 0; - uint32_t y1 = y0 + 1; - if (y1 > ymax) - y1 = 0; + uint32_t frac_y0 = ((texturefracy - half) >> FRACBITS) * height; + uint32_t frac_y1 = ((texturefracy + half) >> FRACBITS) * height; + uint32_t y0 = frac_y0 >> FRACBITS; + uint32_t y1 = frac_y1 >> FRACBITS; uint32_t p00 = col0[y0]; uint32_t p01 = col0[y1]; @@ -501,7 +498,7 @@ public: uint32_t p11 = col1[y1]; uint32_t inv_b = texturefracx; - uint32_t inv_a = ((texturefracy + half) >> (ybits - 4)) & 15; + uint32_t inv_a = (frac_y1 >> (FRACBITS - 4)) & 15; uint32_t a = 16 - inv_a; uint32_t b = 16 - inv_b; @@ -546,20 +543,18 @@ public: ///////////////////////////////////////////////////////////////////////////// // SSE/AVX shading macros: -#define VEC_SAMPLE_BILINEAR4_COLUMN(fg, col0, col1, texturefracx, texturefracy, ybits, ymax) { \ - uint32_t half = 1 << (ybits - 1); \ - \ +#define VEC_SAMPLE_BILINEAR4_COLUMN(fg, col0, col1, texturefracx, texturefracy, half, height) { \ __m128i m127 = _mm_set1_epi16(127); \ fg = _mm_setzero_si128(); \ for (int i = 0; i < 4; i++) \ { \ - uint32_t y0 = (texturefracy[i] - half) >> ybits; \ - if (y0 > ymax) y0 = 0; \ - uint32_t y1 = y0 + 1; \ - if (y1 > ymax) y1 = 0; \ + uint32_t frac_y0 = ((texturefracy[i] - half[i]) >> FRACBITS) * height[i]; \ + uint32_t frac_y1 = ((texturefracy[i] + half[i]) >> FRACBITS) * height[i]; \ + uint32_t y0 = frac_y0 >> FRACBITS; \ + uint32_t y1 = frac_y1 >> FRACBITS; \ \ uint32_t inv_b = texturefracx[i]; \ - uint32_t inv_a = ((texturefracy[i] + half) >> (ybits - 4)) & 15; \ + uint32_t inv_a = (frac_y1 >> (FRACBITS - 4)) & 15; \ uint32_t a = 16 - inv_a; \ uint32_t b = 16 - inv_b; \ \ diff --git a/src/r_draw_rgba_sse.h b/src/r_draw_rgba_sse.h index bca30185c..ae8d3bf42 100644 --- a/src/r_draw_rgba_sse.h +++ b/src/r_draw_rgba_sse.h @@ -43,7 +43,7 @@ public: _destorg = dc_destorg; _light = ds_light; _shade_constants = ds_shade_constants; - _nearest_filter = !SampleBgra::span_sampler_setup(_source, _xbits, _ybits, _xstep, _ystep); + _nearest_filter = !SampleBgra::span_sampler_setup(_source, _xbits, _ybits, _xstep, _ystep, ds_source_mipmapped); } void Execute(DrawerThread *thread) override @@ -364,14 +364,13 @@ class VecCommand(Vlinec4RGBA) : public DrawerCommand int _count; int _pitch; ShadeConstants _shade_constants; - int _vlinebits; - uint32_t _vlinemax; fixed_t palookuplight[4]; DWORD vplce[4]; DWORD vince[4]; const uint32 * RESTRICT bufplce[4]; const uint32_t * RESTRICT bufplce2[4]; uint32_t buftexturefracx[4]; + uint32_t bufheight[4]; public: VecCommand(Vlinec4RGBA)() @@ -380,8 +379,6 @@ public: _count = dc_count; _pitch = dc_pitch; _shade_constants = dc_shade_constants; - _vlinebits = vlinebits; - _vlinemax = vlinemax; for (int i = 0; i < 4; i++) { palookuplight[i] = ::palookuplight[i]; @@ -390,6 +387,7 @@ public: bufplce[i] = (const uint32 *)::bufplce[i]; bufplce2[i] = (const uint32_t *)::bufplce2[i]; buftexturefracx[i] = ::buftexturefracx[i]; + bufheight[i] = ::bufheight[i]; } } @@ -400,9 +398,16 @@ public: return; uint32_t *dest = thread->dest_for_thread(_dest_y, _pitch, (uint32_t*)_dest); - int bits = _vlinebits; int pitch = _pitch * thread->num_cores; + uint32_t height[4]; + uint32_t half[4]; + for (int i = 0; i < 4; i++) + { + height[i] = bufheight[i]; + half[i] = (0x80000000 + height[i] - 1) / height[i]; + } + uint32_t light0 = LightBgra::calc_light_multiplier(palookuplight[0]); uint32_t light1 = LightBgra::calc_light_multiplier(palookuplight[1]); uint32_t light2 = LightBgra::calc_light_multiplier(palookuplight[2]); @@ -431,10 +436,10 @@ public: DWORD place2 = local_vplce[2]; DWORD place3 = local_vplce[3]; - uint32_t p0 = bufplce[0][place0 >> bits]; - uint32_t p1 = bufplce[1][place1 >> bits]; - uint32_t p2 = bufplce[2][place2 >> bits]; - uint32_t p3 = bufplce[3][place3 >> bits]; + uint32_t p0 = bufplce[0][((place0 >> FRACBITS) * height[0]) >> FRACBITS]; + uint32_t p1 = bufplce[1][((place1 >> FRACBITS) * height[1]) >> FRACBITS]; + uint32_t p2 = bufplce[2][((place2 >> FRACBITS) * height[2]) >> FRACBITS]; + uint32_t p3 = bufplce[3][((place3 >> FRACBITS) * height[3]) >> FRACBITS]; local_vplce[0] = place0 + local_vince[0]; local_vplce[1] = place1 + local_vince[1]; @@ -457,10 +462,10 @@ public: DWORD place2 = local_vplce[2]; DWORD place3 = local_vplce[3]; - uint32_t p0 = bufplce[0][place0 >> bits]; - uint32_t p1 = bufplce[1][place1 >> bits]; - uint32_t p2 = bufplce[2][place2 >> bits]; - uint32_t p3 = bufplce[3][place3 >> bits]; + uint32_t p0 = bufplce[0][((place0 >> FRACBITS) * height[0]) >> FRACBITS]; + uint32_t p1 = bufplce[1][((place1 >> FRACBITS) * height[1]) >> FRACBITS]; + uint32_t p2 = bufplce[2][((place2 >> FRACBITS) * height[2]) >> FRACBITS]; + uint32_t p3 = bufplce[3][((place3 >> FRACBITS) * height[3]) >> FRACBITS]; local_vplce[0] = place0 + local_vince[0]; local_vplce[1] = place1 + local_vince[1]; @@ -482,7 +487,7 @@ public: do { __m128i fg; - VEC_SAMPLE_BILINEAR4_COLUMN(fg, bufplce, bufplce2, buftexturefracx, local_vplce, bits, _vlinemax); + VEC_SAMPLE_BILINEAR4_COLUMN(fg, bufplce, bufplce2, buftexturefracx, local_vplce, half, height); local_vplce[0] = local_vplce[0] + local_vince[0]; local_vplce[1] = local_vplce[1] + local_vince[1]; @@ -500,7 +505,7 @@ public: do { __m128i fg; - VEC_SAMPLE_BILINEAR4_COLUMN(fg, bufplce, bufplce2, buftexturefracx, local_vplce, bits, _vlinemax); + VEC_SAMPLE_BILINEAR4_COLUMN(fg, bufplce, bufplce2, buftexturefracx, local_vplce, half, height); local_vplce[0] = local_vplce[0] + local_vince[0]; local_vplce[1] = local_vplce[1] + local_vince[1]; @@ -522,7 +527,6 @@ class VecCommand(Mvlinec4RGBA) : public DrawerCommand int _count; int _pitch; ShadeConstants _shade_constants; - int _mvlinebits; uint32_t _mvlinemax; fixed_t palookuplight[4]; DWORD vplce[4]; @@ -530,6 +534,7 @@ class VecCommand(Mvlinec4RGBA) : public DrawerCommand const uint32 * RESTRICT bufplce[4]; const uint32 * RESTRICT bufplce2[4]; uint32_t buftexturefracx[4]; + uint32_t bufheight[4]; public: VecCommand(Mvlinec4RGBA)() @@ -538,8 +543,6 @@ public: _count = dc_count; _pitch = dc_pitch; _shade_constants = dc_shade_constants; - _mvlinebits = mvlinebits; - _mvlinemax = mvlinemax; for (int i = 0; i < 4; i++) { palookuplight[i] = ::palookuplight[i]; @@ -548,6 +551,7 @@ public: bufplce[i] = (const uint32 *)::bufplce[i]; bufplce2[i] = (const uint32_t *)::bufplce2[i]; buftexturefracx[i] = ::buftexturefracx[i]; + bufheight[i] = ::bufheight[i]; } } @@ -559,7 +563,13 @@ public: uint32_t *dest = thread->dest_for_thread(_dest_y, _pitch, (uint32_t*)_dest); int pitch = _pitch * thread->num_cores; - int bits = _mvlinebits; + uint32_t height[4]; + uint32_t half[4]; + for (int i = 0; i < 4; i++) + { + height[i] = bufheight[i]; + half[i] = (0x80000000 + height[i] - 1) / height[i]; + } uint32_t light0 = LightBgra::calc_light_multiplier(palookuplight[0]); uint32_t light1 = LightBgra::calc_light_multiplier(palookuplight[1]); @@ -589,10 +599,10 @@ public: DWORD place2 = local_vplce[2]; DWORD place3 = local_vplce[3]; - uint32_t pix0 = bufplce[0][place0 >> bits]; - uint32_t pix1 = bufplce[1][place1 >> bits]; - uint32_t pix2 = bufplce[2][place2 >> bits]; - uint32_t pix3 = bufplce[3][place3 >> bits]; + uint32_t pix0 = bufplce[0][((place0 >> FRACBITS) * height[0]) >> FRACBITS]; + uint32_t pix1 = bufplce[1][((place1 >> FRACBITS) * height[1]) >> FRACBITS]; + uint32_t pix2 = bufplce[2][((place2 >> FRACBITS) * height[2]) >> FRACBITS]; + uint32_t pix3 = bufplce[3][((place3 >> FRACBITS) * height[3]) >> FRACBITS]; local_vplce[0] = place0 + local_vince[0]; local_vplce[1] = place1 + local_vince[1]; @@ -617,10 +627,10 @@ public: DWORD place2 = local_vplce[2]; DWORD place3 = local_vplce[3]; - uint32_t pix0 = bufplce[0][place0 >> bits]; - uint32_t pix1 = bufplce[1][place1 >> bits]; - uint32_t pix2 = bufplce[2][place2 >> bits]; - uint32_t pix3 = bufplce[3][place3 >> bits]; + uint32_t pix0 = bufplce[0][((place0 >> FRACBITS) * height[0]) >> FRACBITS]; + uint32_t pix1 = bufplce[1][((place1 >> FRACBITS) * height[1]) >> FRACBITS]; + uint32_t pix2 = bufplce[2][((place2 >> FRACBITS) * height[2]) >> FRACBITS]; + uint32_t pix3 = bufplce[3][((place3 >> FRACBITS) * height[3]) >> FRACBITS]; local_vplce[0] = place0 + local_vince[0]; local_vplce[1] = place1 + local_vince[1]; @@ -644,7 +654,7 @@ public: do { __m128i fg; - VEC_SAMPLE_BILINEAR4_COLUMN(fg, bufplce, bufplce2, buftexturefracx, local_vplce, bits, _mvlinemax); + VEC_SAMPLE_BILINEAR4_COLUMN(fg, bufplce, bufplce2, buftexturefracx, local_vplce, half, height); local_vplce[0] = local_vplce[0] + local_vince[0]; local_vplce[1] = local_vplce[1] + local_vince[1]; @@ -664,7 +674,7 @@ public: do { __m128i fg; - VEC_SAMPLE_BILINEAR4_COLUMN(fg, bufplce, bufplce2, buftexturefracx, local_vplce, bits, _mvlinemax); + VEC_SAMPLE_BILINEAR4_COLUMN(fg, bufplce, bufplce2, buftexturefracx, local_vplce, half, height); local_vplce[0] = local_vplce[0] + local_vince[0]; local_vplce[1] = local_vplce[1] + local_vince[1]; @@ -690,12 +700,11 @@ class VecCommand(Tmvline4AddRGBA) : public DrawerCommand ShadeConstants _shade_constants; fixed_t _srcalpha; fixed_t _destalpha; - int _tmvlinebits; - uint32_t _tmvlinemax; fixed_t palookuplight[4]; DWORD vplce[4]; DWORD vince[4]; const uint32 * RESTRICT bufplce[4]; + uint32_t bufheight[4]; public: VecCommand(Tmvline4AddRGBA)() @@ -706,14 +715,13 @@ public: _shade_constants = dc_shade_constants; _srcalpha = dc_srcalpha; _destalpha = dc_destalpha; - _tmvlinebits = tmvlinebits; - _tmvlinemax = tmvlinemax; for (int i = 0; i < 4; i++) { palookuplight[i] = ::palookuplight[i]; vplce[i] = ::vplce[i]; vince[i] = ::vince[i]; bufplce[i] = (const uint32 *)::bufplce[i]; + bufheight[i] = ::bufheight[i]; } } @@ -725,7 +733,14 @@ public: uint32_t *dest = thread->dest_for_thread(_dest_y, _pitch, (uint32_t*)_dest); int pitch = _pitch * thread->num_cores; - int bits = _tmvlinebits; + + uint32_t height[4]; + uint32_t half[4]; + for (int i = 0; i < 4; i++) + { + height[i] = bufheight[i]; + half[i] = (0x80000000 + height[i] - 1) / height[i]; + } uint32_t light[4]; light[0] = LightBgra::calc_light_multiplier(palookuplight[0]); @@ -754,10 +769,10 @@ public: do { - uint32_t pix0 = bufplce[0][local_vplce[0] >> bits]; - uint32_t pix1 = bufplce[1][local_vplce[1] >> bits]; - uint32_t pix2 = bufplce[2][local_vplce[2] >> bits]; - uint32_t pix3 = bufplce[3][local_vplce[3] >> bits]; + uint32_t pix0 = bufplce[0][((local_vplce[0] >> FRACBITS) * height[0]) >> FRACBITS]; + uint32_t pix1 = bufplce[1][((local_vplce[1] >> FRACBITS) * height[1]) >> FRACBITS]; + uint32_t pix2 = bufplce[2][((local_vplce[2] >> FRACBITS) * height[2]) >> FRACBITS]; + uint32_t pix3 = bufplce[3][((local_vplce[3] >> FRACBITS) * height[3]) >> FRACBITS]; local_vplce[0] = local_vplce[0] + local_vince[0]; local_vplce[1] = local_vplce[1] + local_vince[1]; @@ -791,10 +806,10 @@ public: do { - uint32_t pix0 = bufplce[0][local_vplce[0] >> bits]; - uint32_t pix1 = bufplce[1][local_vplce[1] >> bits]; - uint32_t pix2 = bufplce[2][local_vplce[2] >> bits]; - uint32_t pix3 = bufplce[3][local_vplce[3] >> bits]; + uint32_t pix0 = bufplce[0][((local_vplce[0] >> FRACBITS) * height[0]) >> FRACBITS]; + uint32_t pix1 = bufplce[1][((local_vplce[1] >> FRACBITS) * height[1]) >> FRACBITS]; + uint32_t pix2 = bufplce[2][((local_vplce[2] >> FRACBITS) * height[2]) >> FRACBITS]; + uint32_t pix3 = bufplce[3][((local_vplce[3] >> FRACBITS) * height[3]) >> FRACBITS]; local_vplce[0] = local_vplce[0] + local_vince[0]; local_vplce[1] = local_vplce[1] + local_vince[1]; @@ -831,12 +846,11 @@ class VecCommand(Tmvline4AddClampRGBA) : public DrawerCommand ShadeConstants _shade_constants; fixed_t _srcalpha; fixed_t _destalpha; - int _tmvlinebits; - uint32_t _tmvlinemax; fixed_t palookuplight[4]; DWORD vplce[4]; DWORD vince[4]; const uint32 *RESTRICT bufplce[4]; + uint32_t bufheight[4]; public: VecCommand(Tmvline4AddClampRGBA)() @@ -847,14 +861,13 @@ public: _shade_constants = dc_shade_constants; _srcalpha = dc_srcalpha; _destalpha = dc_destalpha; - _tmvlinebits = tmvlinebits; - _tmvlinemax = tmvlinemax; for (int i = 0; i < 4; i++) { palookuplight[i] = ::palookuplight[i]; vplce[i] = ::vplce[i]; vince[i] = ::vince[i]; bufplce[i] = (const uint32 *)::bufplce[i]; + bufheight[i] = ::bufheight[i]; } } @@ -866,7 +879,14 @@ public: uint32_t *dest = thread->dest_for_thread(_dest_y, _pitch, (uint32_t*)_dest); int pitch = _pitch * thread->num_cores; - int bits = _tmvlinebits; + + uint32_t height[4]; + uint32_t half[4]; + for (int i = 0; i < 4; i++) + { + height[i] = bufheight[i]; + half[i] = (0x80000000 + height[i] - 1) / height[i]; + } uint32_t light[4]; light[0] = LightBgra::calc_light_multiplier(palookuplight[0]); @@ -895,10 +915,10 @@ public: do { - uint32_t pix0 = bufplce[0][local_vplce[0] >> bits]; - uint32_t pix1 = bufplce[1][local_vplce[1] >> bits]; - uint32_t pix2 = bufplce[2][local_vplce[2] >> bits]; - uint32_t pix3 = bufplce[3][local_vplce[3] >> bits]; + uint32_t pix0 = bufplce[0][((local_vplce[0] >> FRACBITS) * height[0]) >> FRACBITS]; + uint32_t pix1 = bufplce[1][((local_vplce[1] >> FRACBITS) * height[1]) >> FRACBITS]; + uint32_t pix2 = bufplce[2][((local_vplce[2] >> FRACBITS) * height[2]) >> FRACBITS]; + uint32_t pix3 = bufplce[3][((local_vplce[3] >> FRACBITS) * height[3]) >> FRACBITS]; local_vplce[0] = local_vplce[0] + local_vince[0]; local_vplce[1] = local_vplce[1] + local_vince[1]; @@ -931,10 +951,10 @@ public: do { - uint32_t pix0 = bufplce[0][local_vplce[0] >> bits]; - uint32_t pix1 = bufplce[1][local_vplce[1] >> bits]; - uint32_t pix2 = bufplce[2][local_vplce[2] >> bits]; - uint32_t pix3 = bufplce[3][local_vplce[3] >> bits]; + uint32_t pix0 = bufplce[0][((local_vplce[0] >> FRACBITS) * height[0]) >> FRACBITS]; + uint32_t pix1 = bufplce[1][((local_vplce[1] >> FRACBITS) * height[1]) >> FRACBITS]; + uint32_t pix2 = bufplce[2][((local_vplce[2] >> FRACBITS) * height[2]) >> FRACBITS]; + uint32_t pix3 = bufplce[3][((local_vplce[3] >> FRACBITS) * height[3]) >> FRACBITS]; local_vplce[0] = local_vplce[0] + local_vince[0]; local_vplce[1] = local_vplce[1] + local_vince[1]; @@ -971,12 +991,11 @@ class VecCommand(Tmvline4SubClampRGBA) : public DrawerCommand ShadeConstants _shade_constants; fixed_t _srcalpha; fixed_t _destalpha; - int _tmvlinebits; - uint32_t _tmvlinemax; fixed_t palookuplight[4]; DWORD vplce[4]; DWORD vince[4]; const uint32 *RESTRICT bufplce[4]; + uint32_t bufheight[4]; public: VecCommand(Tmvline4SubClampRGBA)() @@ -987,14 +1006,13 @@ public: _shade_constants = dc_shade_constants; _srcalpha = dc_srcalpha; _destalpha = dc_destalpha; - _tmvlinebits = tmvlinebits; - _tmvlinemax = tmvlinemax; for (int i = 0; i < 4; i++) { palookuplight[i] = ::palookuplight[i]; vplce[i] = ::vplce[i]; vince[i] = ::vince[i]; bufplce[i] = (const uint32 *)::bufplce[i]; + bufheight[i] = ::bufheight[i]; } } @@ -1006,7 +1024,14 @@ public: uint32_t *dest = thread->dest_for_thread(_dest_y, _pitch, (uint32_t*)_dest); int pitch = _pitch * thread->num_cores; - int bits = _tmvlinebits; + + uint32_t height[4]; + uint32_t half[4]; + for (int i = 0; i < 4; i++) + { + height[i] = bufheight[i]; + half[i] = (0x80000000 + height[i] - 1) / height[i]; + } uint32_t light[4]; light[0] = LightBgra::calc_light_multiplier(palookuplight[0]); @@ -1035,10 +1060,10 @@ public: do { - uint32_t pix0 = bufplce[0][local_vplce[0] >> bits]; - uint32_t pix1 = bufplce[1][local_vplce[1] >> bits]; - uint32_t pix2 = bufplce[2][local_vplce[2] >> bits]; - uint32_t pix3 = bufplce[3][local_vplce[3] >> bits]; + uint32_t pix0 = bufplce[0][((local_vplce[0] >> FRACBITS) * height[0]) >> FRACBITS]; + uint32_t pix1 = bufplce[1][((local_vplce[1] >> FRACBITS) * height[1]) >> FRACBITS]; + uint32_t pix2 = bufplce[2][((local_vplce[2] >> FRACBITS) * height[2]) >> FRACBITS]; + uint32_t pix3 = bufplce[3][((local_vplce[3] >> FRACBITS) * height[3]) >> FRACBITS]; local_vplce[0] = local_vplce[0] + local_vince[0]; local_vplce[1] = local_vplce[1] + local_vince[1]; @@ -1071,10 +1096,10 @@ public: do { - uint32_t pix0 = bufplce[0][local_vplce[0] >> bits]; - uint32_t pix1 = bufplce[1][local_vplce[1] >> bits]; - uint32_t pix2 = bufplce[2][local_vplce[2] >> bits]; - uint32_t pix3 = bufplce[3][local_vplce[3] >> bits]; + uint32_t pix0 = bufplce[0][((local_vplce[0] >> FRACBITS) * height[0]) >> FRACBITS]; + uint32_t pix1 = bufplce[1][((local_vplce[1] >> FRACBITS) * height[1]) >> FRACBITS]; + uint32_t pix2 = bufplce[2][((local_vplce[2] >> FRACBITS) * height[2]) >> FRACBITS]; + uint32_t pix3 = bufplce[3][((local_vplce[3] >> FRACBITS) * height[3]) >> FRACBITS]; local_vplce[0] = local_vplce[0] + local_vince[0]; local_vplce[1] = local_vplce[1] + local_vince[1]; @@ -1111,12 +1136,11 @@ class VecCommand(Tmvline4RevSubClampRGBA) : public DrawerCommand ShadeConstants _shade_constants; fixed_t _srcalpha; fixed_t _destalpha; - int _tmvlinebits; - uint32_t _tmvlinemax; fixed_t palookuplight[4]; DWORD vplce[4]; DWORD vince[4]; const uint32 *RESTRICT bufplce[4]; + uint32_t bufheight[4]; public: VecCommand(Tmvline4RevSubClampRGBA)() @@ -1127,14 +1151,13 @@ public: _shade_constants = dc_shade_constants; _srcalpha = dc_srcalpha; _destalpha = dc_destalpha; - _tmvlinebits = tmvlinebits; - _tmvlinemax = tmvlinemax; for (int i = 0; i < 4; i++) { palookuplight[i] = ::palookuplight[i]; vplce[i] = ::vplce[i]; vince[i] = ::vince[i]; bufplce[i] = (const uint32 *)::bufplce[i]; + bufheight[i] = ::bufheight[4]; } } @@ -1146,7 +1169,14 @@ public: uint32_t *dest = thread->dest_for_thread(_dest_y, _pitch, (uint32_t*)_dest); int pitch = _pitch * thread->num_cores; - int bits = _tmvlinebits; + + uint32_t height[4]; + uint32_t half[4]; + for (int i = 0; i < 4; i++) + { + height[i] = bufheight[i]; + half[i] = (0x80000000 + height[i] - 1) / height[i]; + } uint32_t light[4]; light[0] = LightBgra::calc_light_multiplier(palookuplight[0]); @@ -1175,10 +1205,10 @@ public: do { - uint32_t pix0 = bufplce[0][local_vplce[0] >> bits]; - uint32_t pix1 = bufplce[1][local_vplce[1] >> bits]; - uint32_t pix2 = bufplce[2][local_vplce[2] >> bits]; - uint32_t pix3 = bufplce[3][local_vplce[3] >> bits]; + uint32_t pix0 = bufplce[0][((local_vplce[0] >> FRACBITS) * height[0]) >> FRACBITS]; + uint32_t pix1 = bufplce[1][((local_vplce[1] >> FRACBITS) * height[1]) >> FRACBITS]; + uint32_t pix2 = bufplce[2][((local_vplce[2] >> FRACBITS) * height[2]) >> FRACBITS]; + uint32_t pix3 = bufplce[3][((local_vplce[3] >> FRACBITS) * height[3]) >> FRACBITS]; local_vplce[0] = local_vplce[0] + local_vince[0]; local_vplce[1] = local_vplce[1] + local_vince[1]; @@ -1211,10 +1241,10 @@ public: do { - uint32_t pix0 = bufplce[0][local_vplce[0] >> bits]; - uint32_t pix1 = bufplce[1][local_vplce[1] >> bits]; - uint32_t pix2 = bufplce[2][local_vplce[2] >> bits]; - uint32_t pix3 = bufplce[3][local_vplce[3] >> bits]; + uint32_t pix0 = bufplce[0][((local_vplce[0] >> FRACBITS) * height[0]) >> FRACBITS]; + uint32_t pix1 = bufplce[1][((local_vplce[1] >> FRACBITS) * height[1]) >> FRACBITS]; + uint32_t pix2 = bufplce[2][((local_vplce[2] >> FRACBITS) * height[2]) >> FRACBITS]; + uint32_t pix3 = bufplce[3][((local_vplce[3] >> FRACBITS) * height[3]) >> FRACBITS]; local_vplce[0] = local_vplce[0] + local_vince[0]; local_vplce[1] = local_vplce[1] + local_vince[1]; diff --git a/src/r_segs.cpp b/src/r_segs.cpp index 5c9037375..630d64da0 100644 --- a/src/r_segs.cpp +++ b/src/r_segs.cpp @@ -1075,86 +1075,112 @@ struct WallscanSampler uint32_t uv_pos; uint32_t uv_step; - int32_t uv_fracbits; uint32_t uv_max; const BYTE *source; const BYTE *source2; uint32_t texturefracx; + uint32_t height; }; WallscanSampler::WallscanSampler(int y1, float swal, double yrepeat, fixed_t xoffset, FTexture *texture, const BYTE*(*getcol)(FTexture *texture, int x)) { - int base_width = texture->GetWidth(); - int base_height = texture->GetHeight(); - uv_fracbits = 32 - texture->HeightBits; - uv_max = base_height << uv_fracbits; - - // Find start uv in [0-base_height[ range. - // Not using xs_ToFixed because it rounds the result and we need something that always rounds down to stay within the range. - double uv_stepd = swal * yrepeat; - double v = (dc_texturemid + uv_stepd * (y1 - CenterY + 0.5)) / base_height; - v = v - floor(v); - v *= base_height; - v *= (1 << uv_fracbits); - - uv_pos = (uint32_t)v; - uv_step = xs_ToFixed(uv_fracbits, uv_stepd); - - bool magnifying = uv_step >> (uv_fracbits - 1) == 0; - - if (!r_swtruecolor || getcol != R_GetColumn) + if (!r_swtruecolor) { + height = texture->GetHeight(); + int uv_fracbits = 32 - texture->HeightBits; + uv_max = height << uv_fracbits; + + // Find start uv in [0-base_height[ range. + // Not using xs_ToFixed because it rounds the result and we need something that always rounds down to stay within the range. + double uv_stepd = swal * yrepeat; + double v = (dc_texturemid + uv_stepd * (y1 - CenterY + 0.5)) / height; + v = v - floor(v); + v *= height; + v *= (1 << uv_fracbits); + + uv_pos = (uint32_t)v; + uv_step = xs_ToFixed(uv_fracbits, uv_stepd); + if (uv_step == 0) // To prevent divide by zero elsewhere + uv_step = 1; + source = getcol(texture, xoffset >> FRACBITS); source2 = nullptr; texturefracx = 0; } else { - int mipmap_offset = 0; - int mip_width = base_width; - int mip_height = base_height; - if (r_mipmap) - { - fixed_t magnitude = abs((int32_t)uv_step) >> (uv_fracbits - FRACBITS); - int level = magnitude >> FRACBITS; - while (level != 0) - { - if (uv_fracbits > 30) - break; + // Normalize to 0-1 range: + double uv_stepd = swal * yrepeat; + double v = (dc_texturemid + uv_stepd * (y1 - CenterY + 0.5)) / texture->GetHeight(); + v = v - floor(v); + double v_step = uv_stepd / texture->GetHeight(); - mipmap_offset += mip_width * mip_height; - uv_fracbits += 1; - uv_pos >>= 1; - uv_step >>= 1; - xoffset >>= 1; - level >>= 1; - mip_width = MAX(mip_width >> 1, 1); - mip_height = MAX(mip_height >> 1, 1); - } + if (isnan(v) || isnan(v_step)) // this should never happen, but it apparently does.. + { + uv_stepd = 0.0; + v = 0.0; + v_step = 0.0; } - const uint32_t *pixels = texture->GetPixelsBgra() + mipmap_offset; + // Convert to uint32: + uv_pos = (uint32_t)(v * 0x100000000LL); + uv_step = (uint32_t)(v_step * 0x100000000LL); + uv_max = 0; - bool filter_nearest = (magnifying && !r_magfilter_linear) || (!magnifying && !r_minfilter_linear); - if (filter_nearest) + // Texture mipmap and filter selection: + if (getcol != R_GetColumn) { - int tx = (xoffset >> FRACBITS) % mip_width; - if (tx < 0) - tx += mip_width; - source = (BYTE*)(pixels + tx * mip_height); + source = getcol(texture, xoffset >> FRACBITS); source2 = nullptr; + height = texture->GetHeight(); texturefracx = 0; } else { - int tx0 = ((xoffset - FRACUNIT / 2) >> FRACBITS) % mip_width; - if (tx0 < 0) - tx0 += mip_width; - int tx1 = (tx0 + 1) % mip_width; - source = (BYTE*)(pixels + tx0 * mip_height); - source2 = (BYTE*)(pixels + tx1 * mip_height); - texturefracx = ((xoffset + FRACUNIT / 2) >> (FRACBITS - 4)) & 15; + double magnitude = abs(uv_stepd * 2); + bool magnifying = magnitude < 1.0f; + + int mipmap_offset = 0; + int mip_width = texture->GetWidth(); + int mip_height = texture->GetHeight(); + if (r_mipmap && texture->Mipmapped()) + { + int level = (int)MAX(magnitude - 1.0, 0.0); + while (level != 0) + { + mipmap_offset += mip_width * mip_height; + xoffset >>= 1; + level >>= 1; + mip_width = MAX(mip_width >> 1, 1); + mip_height = MAX(mip_height >> 1, 1); + } + } + + const uint32_t *pixels = texture->GetPixelsBgra() + mipmap_offset; + + bool filter_nearest = (magnifying && !r_magfilter) || (!magnifying && !r_minfilter); + if (filter_nearest) + { + int tx = (xoffset >> FRACBITS) % mip_width; + if (tx < 0) + tx += mip_width; + source = (BYTE*)(pixels + tx * mip_height); + source2 = nullptr; + height = mip_height; + texturefracx = 0; + } + else + { + int tx0 = ((xoffset - FRACUNIT / 2) >> FRACBITS) % mip_width; + if (tx0 < 0) + tx0 += mip_width; + int tx1 = (tx0 + 1) % mip_width; + source = (BYTE*)(pixels + tx0 * mip_height); + source2 = (BYTE*)(pixels + tx1 * mip_height); + height = mip_height; + texturefracx = ((xoffset + FRACUNIT / 2) >> (FRACBITS - 4)) & 15; + } } } } @@ -1162,18 +1188,18 @@ WallscanSampler::WallscanSampler(int y1, float swal, double yrepeat, fixed_t xof // Draw a column with support for non-power-of-two ranges void wallscan_drawcol1(int x, int y1, int y2, WallscanSampler &sampler, DWORD(*draw1column)()) { - int pixelsize = r_swtruecolor ? 4 : 1; - if (sampler.uv_max == 0) // power of two + if (r_swtruecolor) { int count = y2 - y1; dc_source = sampler.source; dc_source2 = sampler.source2; dc_texturefracx = sampler.texturefracx; - dc_dest = (ylookup[y1] + x) * pixelsize + dc_destorg; + dc_dest = (ylookup[y1] + x) * 4 + dc_destorg; dc_count = count; dc_iscale = sampler.uv_step; dc_texturefrac = sampler.uv_pos; + dc_textureheight = sampler.height; draw1column(); uint64_t step64 = sampler.uv_step; @@ -1182,41 +1208,60 @@ void wallscan_drawcol1(int x, int y1, int y2, WallscanSampler &sampler, DWORD(*d } else { - uint32_t uv_pos = sampler.uv_pos; - - uint32_t left = y2 - y1; - while (left > 0) + if (sampler.uv_max == 0) // power of two { - uint32_t available = sampler.uv_max - uv_pos; - uint32_t next_uv_wrap = available / sampler.uv_step; - if (available % sampler.uv_step != 0) - next_uv_wrap++; - uint32_t count = MIN(left, next_uv_wrap); + int count = y2 - y1; dc_source = sampler.source; dc_source2 = sampler.source2; dc_texturefracx = sampler.texturefracx; - dc_dest = (ylookup[y1] + x) * pixelsize + dc_destorg; + dc_dest = (ylookup[y1] + x) + dc_destorg; dc_count = count; dc_iscale = sampler.uv_step; - dc_texturefrac = uv_pos; + dc_texturefrac = sampler.uv_pos; draw1column(); - left -= count; - uv_pos += sampler.uv_step * count; - if (uv_pos >= sampler.uv_max) - uv_pos -= sampler.uv_max; + uint64_t step64 = sampler.uv_step; + uint64_t pos64 = sampler.uv_pos; + sampler.uv_pos = (uint32_t)(pos64 + step64 * count); } + else + { + uint32_t uv_pos = sampler.uv_pos; - sampler.uv_pos = uv_pos; + uint32_t left = y2 - y1; + while (left > 0) + { + uint32_t available = sampler.uv_max - uv_pos; + uint32_t next_uv_wrap = available / sampler.uv_step; + if (available % sampler.uv_step != 0) + next_uv_wrap++; + uint32_t count = MIN(left, next_uv_wrap); + + dc_source = sampler.source; + dc_source2 = sampler.source2; + dc_texturefracx = sampler.texturefracx; + dc_dest = (ylookup[y1] + x) + dc_destorg; + dc_count = count; + dc_iscale = sampler.uv_step; + dc_texturefrac = uv_pos; + draw1column(); + + left -= count; + uv_pos += sampler.uv_step * count; + if (uv_pos >= sampler.uv_max) + uv_pos -= sampler.uv_max; + } + + sampler.uv_pos = uv_pos; + } } } // Draw four columns with support for non-power-of-two ranges void wallscan_drawcol4(int x, int y1, int y2, WallscanSampler *sampler, void(*draw4columns)()) { - int pixelsize = r_swtruecolor ? 4 : 1; - if (sampler[0].uv_max == 0) // power of two, no wrap handling needed + if (r_swtruecolor) { int count = y2 - y1; for (int i = 0; i < 4; i++) @@ -1224,6 +1269,7 @@ void wallscan_drawcol4(int x, int y1, int y2, WallscanSampler *sampler, void(*dr bufplce[i] = sampler[i].source; bufplce2[i] = sampler[i].source2; buftexturefracx[i] = sampler[i].texturefracx; + bufheight[i] = sampler[i].height; vplce[i] = sampler[i].uv_pos; vince[i] = sampler[i].uv_step; @@ -1231,52 +1277,74 @@ void wallscan_drawcol4(int x, int y1, int y2, WallscanSampler *sampler, void(*dr uint64_t pos64 = sampler[i].uv_pos; sampler[i].uv_pos = (uint32_t)(pos64 + step64 * count); } - dc_dest = (ylookup[y1] + x) * pixelsize + dc_destorg; + dc_dest = (ylookup[y1] + x) * 4 + dc_destorg; dc_count = count; draw4columns(); } else { - dc_dest = (ylookup[y1] + x) * pixelsize + dc_destorg; - for (int i = 0; i < 4; i++) + if (sampler[0].uv_max == 0) // power of two, no wrap handling needed { - bufplce[i] = sampler[i].source; - bufplce2[i] = sampler[i].source2; - buftexturefracx[i] = sampler[i].texturefracx; - } - - uint32_t left = y2 - y1; - while (left > 0) - { - // Find which column wraps first - uint32_t count = left; - for (int i = 0; i < 4; i++) - { - uint32_t available = sampler[i].uv_max - sampler[i].uv_pos; - uint32_t next_uv_wrap = available / sampler[i].uv_step; - if (available % sampler[i].uv_step != 0) - next_uv_wrap++; - count = MIN(next_uv_wrap, count); - } - - // Draw until that column wraps + int count = y2 - y1; for (int i = 0; i < 4; i++) { + bufplce[i] = sampler[i].source; + bufplce2[i] = sampler[i].source2; + buftexturefracx[i] = sampler[i].texturefracx; vplce[i] = sampler[i].uv_pos; vince[i] = sampler[i].uv_step; + + uint64_t step64 = sampler[i].uv_step; + uint64_t pos64 = sampler[i].uv_pos; + sampler[i].uv_pos = (uint32_t)(pos64 + step64 * count); } + dc_dest = (ylookup[y1] + x) + dc_destorg; dc_count = count; draw4columns(); - - // Wrap the uv position + } + else + { + dc_dest = (ylookup[y1] + x) + dc_destorg; for (int i = 0; i < 4; i++) { - sampler[i].uv_pos += sampler[i].uv_step * count; - if (sampler[i].uv_pos >= sampler[i].uv_max) - sampler[i].uv_pos -= sampler[i].uv_max; + bufplce[i] = sampler[i].source; + bufplce2[i] = sampler[i].source2; + buftexturefracx[i] = sampler[i].texturefracx; } - left -= count; + uint32_t left = y2 - y1; + while (left > 0) + { + // Find which column wraps first + uint32_t count = left; + for (int i = 0; i < 4; i++) + { + uint32_t available = sampler[i].uv_max - sampler[i].uv_pos; + uint32_t next_uv_wrap = available / sampler[i].uv_step; + if (available % sampler[i].uv_step != 0) + next_uv_wrap++; + count = MIN(next_uv_wrap, count); + } + + // Draw until that column wraps + for (int i = 0; i < 4; i++) + { + vplce[i] = sampler[i].uv_pos; + vince[i] = sampler[i].uv_step; + } + dc_count = count; + draw4columns(); + + // Wrap the uv position + for (int i = 0; i < 4; i++) + { + sampler[i].uv_pos += sampler[i].uv_step * count; + if (sampler[i].uv_pos >= sampler[i].uv_max) + sampler[i].uv_pos -= sampler[i].uv_max; + } + + left -= count; + } } } } @@ -1287,7 +1355,7 @@ typedef void(*Draw4ColumnsFuncPtr)(); void wallscan_any( int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat, const BYTE *(*getcol)(FTexture *tex, int x), - void(setupwallscan(int bits, int fracmax, Draw1ColumnFuncPtr &draw1, Draw4ColumnsFuncPtr &draw2))) + void(setupwallscan(int bits, Draw1ColumnFuncPtr &draw1, Draw4ColumnsFuncPtr &draw2))) { if (rw_pic->UseType == FTexture::TEX_Null) return; @@ -1297,7 +1365,7 @@ void wallscan_any( DWORD(*draw1column)(); void(*draw4columns)(); - setupwallscan(32 - rw_pic->HeightBits, (rw_pic->GetHeight() - 1) << (32 - rw_pic->HeightBits), draw1column, draw4columns); + setupwallscan(r_swtruecolor ? FRACBITS : 32 - rw_pic->HeightBits, draw1column, draw4columns); bool fixed = (fixedcolormap != NULL || fixedlightlev >= 0); if (fixed) @@ -1450,9 +1518,9 @@ void wallscan_any( void wallscan(int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat, const BYTE *(*getcol)(FTexture *tex, int x)) { - wallscan_any(x1, x2, uwal, dwal, swal, lwal, yrepeat, getcol, [](int bits, int fracmax, Draw1ColumnFuncPtr &line1, Draw4ColumnsFuncPtr &line4) + wallscan_any(x1, x2, uwal, dwal, swal, lwal, yrepeat, getcol, [](int bits, Draw1ColumnFuncPtr &line1, Draw4ColumnsFuncPtr &line4) { - setupvline(bits, fracmax); + setupvline(bits); line1 = dovline1; line4 = dovline4; }); @@ -1466,9 +1534,9 @@ void maskwallscan(int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t } else { - wallscan_any(x1, x2, uwal, dwal, swal, lwal, yrepeat, getcol, [](int bits, int fracmax, Draw1ColumnFuncPtr &line1, Draw4ColumnsFuncPtr &line4) + wallscan_any(x1, x2, uwal, dwal, swal, lwal, yrepeat, getcol, [](int bits, Draw1ColumnFuncPtr &line1, Draw4ColumnsFuncPtr &line4) { - setupmvline(bits, fracmax); + setupmvline(bits); line1 = domvline1; line4 = domvline4; }); @@ -1486,9 +1554,9 @@ void transmaskwallscan(int x1, int x2, short *uwal, short *dwal, float *swal, fi } else { - wallscan_any(x1, x2, uwal, dwal, swal, lwal, yrepeat, getcol, [](int bits, int fracmax, Draw1ColumnFuncPtr &line1, Draw4ColumnsFuncPtr &line4) + wallscan_any(x1, x2, uwal, dwal, swal, lwal, yrepeat, getcol, [](int bits, Draw1ColumnFuncPtr &line1, Draw4ColumnsFuncPtr &line4) { - setuptmvline(bits, fracmax); + setuptmvline(bits); line1 = reinterpret_cast(tmvline1); line4 = tmvline4; }); diff --git a/src/textures/textures.h b/src/textures/textures.h index ab9dc3719..bb83f79e7 100644 --- a/src/textures/textures.h +++ b/src/textures/textures.h @@ -185,6 +185,9 @@ public: // Returns the whole texture, stored in column-major order, in BGRA8 format virtual const uint32_t *GetPixelsBgra(); + // Returns true if GetPixelsBgra includes mipmaps + virtual bool Mipmapped() { return true; } + virtual int CopyTrueColorPixels(FBitmap *bmp, int x, int y, int rotate=0, FCopyInfo *inf = NULL); int CopyTrueColorTranslated(FBitmap *bmp, int x, int y, int rotate, FRemapTable *remap, FCopyInfo *inf = NULL); virtual bool UseBasePalette(); @@ -530,6 +533,7 @@ public: void SetUpdated() { bNeedsUpdate = false; bDidUpdate = true; bFirstUpdate = false; } DSimpleCanvas *GetCanvas() { return Canvas; } DSimpleCanvas *GetCanvasBgra() { return CanvasBgra; } + bool Mipmapped() override { return false; } void MakeTexture (); void MakeTextureBgra (); diff --git a/wadsrc/static/language.enu b/wadsrc/static/language.enu index f9050a27b..0bb3a84fd 100644 --- a/wadsrc/static/language.enu +++ b/wadsrc/static/language.enu @@ -1781,6 +1781,9 @@ DSPLYMNU_VSYNC = "Vertical Sync"; DSPLYMNU_CAPFPS = "Rendering Interpolation"; DSPLYMNU_COLUMNMETHOD = "Column render mode"; DSPLYMNU_TRUECOLOR = "True color output"; +DSPLYMNU_MINFILTER = "Linear filter when downscaling"; +DSPLYMNU_MAGFILTER = "Linear filter when upscaling"; +DSPLYMNU_MIPMAP = "Use mipmapped textures"; DSPLYMNU_WIPETYPE = "Screen wipe style"; DSPLYMNU_SHOWENDOOM = "Show ENDOOM screen"; DSPLYMNU_PALLETEHACK = "DirectDraw palette hack"; // Not used diff --git a/wadsrc/static/menudef.txt b/wadsrc/static/menudef.txt index 3c712de96..679db909b 100644 --- a/wadsrc/static/menudef.txt +++ b/wadsrc/static/menudef.txt @@ -662,6 +662,9 @@ OptionMenu "VideoOptions" Option "$DSPLYMNU_CAPFPS", "cl_capfps", "OffOn" Option "$DSPLYMNU_COLUMNMETHOD", "r_columnmethod", "ColumnMethods" Option "$DSPLYMNU_TRUECOLOR", "swtruecolor", "OnOff" + Option "$DSPLYMNU_MINFILTER", "r_minfilter", "OnOff" + Option "$DSPLYMNU_MAGFILTER", "r_magfilter", "OnOff" + Option "$DSPLYMNU_MIPMAP", "r_mipmap", "OnOff" StaticText " " Option "$DSPLYMNU_WIPETYPE", "wipetype", "Wipes"