From 200d357b0d1f609ce67fdb23c03c77836285f0e3 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Mon, 27 Jun 2016 11:43:24 +0200 Subject: [PATCH] Linear filtering bug fix --- src/r_draw_rgba.cpp | 16 ++++++++-------- src/r_draw_rgba.h | 40 ++++++++++++++++++---------------------- src/r_segs.cpp | 4 ++-- 3 files changed, 28 insertions(+), 32 deletions(-) diff --git a/src/r_draw_rgba.cpp b/src/r_draw_rgba.cpp index aa88e43026..7a071e1d49 100644 --- a/src/r_draw_rgba.cpp +++ b/src/r_draw_rgba.cpp @@ -1436,7 +1436,7 @@ public: uint32_t frac; uint32_t texturefracx; uint32_t height; - uint32_t half; + uint32_t one; LoopIterator(DrawerWall1Command *command, DrawerThread *thread) { @@ -1451,7 +1451,7 @@ public: pitch = command->_pitch * thread->num_cores; height = command->_textureheight; - half = (0x80000000 + height - 1) / height; + one = ((0x80000000 + height - 1) / height) * 2 + 1; } explicit operator bool() @@ -1520,7 +1520,7 @@ public: uint32_t vplce[4]; uint32_t vince[4]; uint32_t height[4]; - uint32_t half[4]; + uint32_t one[4]; LoopIterator(DrawerWall4Command *command, DrawerThread *thread) { @@ -1537,7 +1537,7 @@ public: vplce[i] = command->_vplce[i] + command->_vince[i] * skipped; vince[i] = command->_vince[i] * thread->num_cores; height[i] = command->_bufheight[i]; - half[i] = (0x80000000 + height[i] - 1) / height[i]; + one[i] = ((0x80000000 + height[i] - 1) / height[i]) * 2 + 1; } } @@ -1574,7 +1574,7 @@ public: { FORCEINLINE static uint32_t Sample1(DrawerWall4Command &cmd, LoopIterator &loop, int index) { - return SampleBgra::sample_bilinear(cmd._bufplce[index], cmd._bufplce2[index], cmd._buftexturefracx[index], loop.vplce[index], loop.half[index], loop.height[index]); + return SampleBgra::sample_bilinear(cmd._bufplce[index], cmd._bufplce2[index], cmd._buftexturefracx[index], loop.vplce[index], loop.one[index], loop.height[index]); } }; #else @@ -1591,7 +1591,7 @@ public: FORCEINLINE static __m128i Sample4(DrawerWall4Command &cmd, LoopIterator &loop) { __m128i fg; - VEC_SAMPLE_BILINEAR4_COLUMN(fg, cmd._bufplce, cmd._bufplce2, cmd._buftexturefracx, loop.vplce, loop.half, loop.height); + VEC_SAMPLE_BILINEAR4_COLUMN(fg, cmd._bufplce, cmd._bufplce2, cmd._buftexturefracx, loop.vplce, loop.one, loop.height); return fg; } }; @@ -2021,7 +2021,7 @@ public: { do { - uint32_t fg = LightBgra::shade_bgra(SampleBgra::sample_bilinear(_source, _source2, loop.texturefracx, loop.frac, loop.half, loop.height), _light, _shade_constants); + uint32_t fg = LightBgra::shade_bgra(SampleBgra::sample_bilinear(_source, _source2, loop.texturefracx, loop.frac, loop.one, loop.height), _light, _shade_constants); *loop.dest = BlendBgra::copy(fg); } while (loop.next()); } @@ -2048,7 +2048,7 @@ public: { do { - uint32_t fg = LightBgra::shade_bgra(SampleBgra::sample_bilinear(_source, _source2, loop.texturefracx, loop.frac, loop.half, loop.height), _light, _shade_constants); + uint32_t fg = LightBgra::shade_bgra(SampleBgra::sample_bilinear(_source, _source2, loop.texturefracx, loop.frac, loop.one, loop.height), _light, _shade_constants); *loop.dest = BlendBgra::alpha_blend(fg, *loop.dest); } while (loop.next()); } diff --git a/src/r_draw_rgba.h b/src/r_draw_rgba.h index 53572c88b4..27d7bd035f 100644 --- a/src/r_draw_rgba.h +++ b/src/r_draw_rgba.h @@ -502,10 +502,10 @@ public: return (magnifying && r_magfilter) || (!magnifying && r_minfilter); } - FORCEINLINE static uint32_t sample_bilinear(const uint32_t *col0, const uint32_t *col1, uint32_t texturefracx, uint32_t texturefracy, uint32_t half, uint32_t height) + FORCEINLINE static uint32_t sample_bilinear(const uint32_t *col0, const uint32_t *col1, uint32_t texturefracx, uint32_t texturefracy, uint32_t one, uint32_t height) { - uint32_t frac_y0 = ((texturefracy - half) >> FRACBITS) * height; - uint32_t frac_y1 = ((texturefracy + half) >> FRACBITS) * height; + uint32_t frac_y0 = (texturefracy >> FRACBITS) * height; + uint32_t frac_y1 = ((texturefracy + one) >> FRACBITS) * height; uint32_t y0 = frac_y0 >> FRACBITS; uint32_t y1 = frac_y1 >> FRACBITS; @@ -533,18 +533,16 @@ public: int yshift = (32 - ybits); int xmask = (1 << xshift) - 1; int ymask = (1 << yshift) - 1; - uint32_t xhalf = 1 << (xbits - 1); - uint32_t yhalf = 1 << (ybits - 1); - uint32_t x = (xfrac - xhalf) >> xbits; - uint32_t y = (yfrac - yhalf) >> ybits; + uint32_t x = xfrac >> xbits; + uint32_t y = yfrac >> ybits; uint32_t p00 = texture[(y & ymask) + ((x & xmask) << yshift)]; uint32_t p01 = texture[((y + 1) & ymask) + ((x & xmask) << yshift)]; uint32_t p10 = texture[(y & ymask) + (((x + 1) & xmask) << yshift)]; uint32_t p11 = texture[((y + 1) & ymask) + (((x + 1) & xmask) << yshift)]; - uint32_t inv_b = ((xfrac + xhalf) >> (xbits - 4)) & 15; - uint32_t inv_a = ((yfrac + yhalf) >> (ybits - 4)) & 15; + uint32_t inv_b = (xfrac >> (xbits - 4)) & 15; + uint32_t inv_a = (yfrac >> (ybits - 4)) & 15; uint32_t a = 16 - inv_a; uint32_t b = 16 - inv_b; @@ -564,11 +562,11 @@ public: ///////////////////////////////////////////////////////////////////////////// // SSE/AVX shading macros: -#define AVX2_SAMPLE_BILINEAR4_COLUMN_INIT(col0, col1, half, height, texturefracx) \ +#define AVX2_SAMPLE_BILINEAR4_COLUMN_INIT(col0, col1, one, height, texturefracx) \ const uint32_t *baseptr = col0[0]; \ __m128i coloffsets0 = _mm_setr_epi32(col0[0] - baseptr, col0[1] - baseptr, col0[2] - baseptr, col0[3] - baseptr); \ __m128i coloffsets1 = _mm_setr_epi32(col1[0] - baseptr, col1[1] - baseptr, col1[2] - baseptr, col1[3] - baseptr); \ - __m128i mhalf = _mm_loadu_si128((const __m128i*)half); \ + __m128i mone = _mm_loadu_si128((const __m128i*)one); \ __m128i m127 = _mm_set1_epi16(127); \ __m128i m16 = _mm_set1_epi32(16); \ __m128i m15 = _mm_set1_epi32(15); \ @@ -577,8 +575,8 @@ public: #define AVX2_SAMPLE_BILINEAR4_COLUMN(fg, texturefracy) { \ __m128i mtexturefracy = _mm_loadu_si128((const __m128i*)texturefracy); \ - __m128i multmp0 = _mm_srli_epi32(_mm_sub_epi32(mtexturefracy, mhalf), FRACBITS); \ - __m128i multmp1 = _mm_srli_epi32(_mm_add_epi32(mtexturefracy, mhalf), FRACBITS); \ + __m128i multmp0 = _mm_srli_epi32(mtexturefracy, FRACBITS); \ + __m128i multmp1 = _mm_srli_epi32(_mm_add_epi32(mtexturefracy, mone), FRACBITS); \ __m128i frac_y0 = _mm_or_si128(_mm_mul_epu32(multmp0, mheight), _mm_slli_si128(_mm_mul_epu32(_mm_srli_si128(multmp0, 4), _mm_srli_si128(mheight, 4)), 4)); \ __m128i frac_y1 = _mm_or_si128(_mm_mul_epu32(multmp1, mheight), _mm_slli_si128(_mm_mul_epu32(_mm_srli_si128(multmp1, 4), _mm_srli_si128(mheight, 4)), 4)); \ __m128i y0 = _mm_srli_epi32(frac_y0, FRACBITS); \ @@ -624,13 +622,13 @@ public: fg = _mm_packus_epi16(fg_lo, fg_hi); \ } -#define VEC_SAMPLE_BILINEAR4_COLUMN(fg, col0, col1, texturefracx, texturefracy, half, height) { \ +#define VEC_SAMPLE_BILINEAR4_COLUMN(fg, col0, col1, texturefracx, texturefracy, one, height) { \ __m128i m127 = _mm_set1_epi16(127); \ fg = _mm_setzero_si128(); \ for (int i = 0; i < 4; i++) \ { \ - uint32_t frac_y0 = ((texturefracy[i] - half[i]) >> FRACBITS) * height[i]; \ - uint32_t frac_y1 = ((texturefracy[i] + half[i]) >> FRACBITS) * height[i]; \ + uint32_t frac_y0 = (texturefracy[i] >> FRACBITS) * height[i]; \ + uint32_t frac_y1 = ((texturefracy[i] + one[i]) >> FRACBITS) * height[i]; \ uint32_t y0 = (frac_y0 >> FRACBITS); \ uint32_t y1 = (frac_y1 >> FRACBITS); \ \ @@ -676,23 +674,21 @@ public: int yshift = (32 - ybits); \ int xmask = (1 << xshift) - 1; \ int ymask = (1 << yshift) - 1; \ - uint32_t xhalf = 1 << (xbits - 1); \ - uint32_t yhalf = 1 << (ybits - 1); \ \ __m128i m127 = _mm_set1_epi16(127); \ fg = _mm_setzero_si128(); \ for (int i = 0; i < 4; i++) \ { \ - uint32_t x = (xfrac - xhalf) >> xbits; \ - uint32_t y = (yfrac - yhalf) >> ybits; \ + uint32_t x = xfrac >> xbits; \ + uint32_t y = yfrac >> ybits; \ \ uint32_t p00 = texture[(y & ymask) + ((x & xmask) << yshift)]; \ uint32_t p01 = texture[((y + 1) & ymask) + ((x & xmask) << yshift)]; \ uint32_t p10 = texture[(y & ymask) + (((x + 1) & xmask) << yshift)]; \ uint32_t p11 = texture[((y + 1) & ymask) + (((x + 1) & xmask) << yshift)]; \ \ - uint32_t inv_b = ((xfrac + xhalf) >> (xbits - 4)) & 15; \ - uint32_t inv_a = ((yfrac + yhalf) >> (ybits - 4)) & 15; \ + uint32_t inv_b = (xfrac >> (xbits - 4)) & 15; \ + uint32_t inv_a = (yfrac >> (ybits - 4)) & 15; \ \ __m128i ab_invab = _mm_load_si128(SampleBgra::samplertable + inv_b * 32 + inv_a * 2); \ __m128i ainvb_invainvb = _mm_load_si128(SampleBgra::samplertable + inv_b * 32 + inv_a * 2 + 1); \ diff --git a/src/r_segs.cpp b/src/r_segs.cpp index 870d748947..96bb1f948c 100644 --- a/src/r_segs.cpp +++ b/src/r_segs.cpp @@ -1173,14 +1173,14 @@ WallscanSampler::WallscanSampler(int y1, float swal, double yrepeat, fixed_t xof } else { - int tx0 = ((xoffset - FRACUNIT / 2) >> FRACBITS) % mip_width; + int tx0 = (xoffset >> FRACBITS) % mip_width; if (tx0 < 0) tx0 += mip_width; int tx1 = (tx0 + 1) % mip_width; source = (BYTE*)(pixels + tx0 * mip_height); source2 = (BYTE*)(pixels + tx1 * mip_height); height = mip_height; - texturefracx = ((xoffset + FRACUNIT / 2) >> (FRACBITS - 4)) & 15; + texturefracx = (xoffset >> (FRACBITS - 4)) & 15; } } }