Linear filtering bug fix

This commit is contained in:
Magnus Norddahl 2016-06-27 11:43:24 +02:00
parent 8f38d3af99
commit 200d357b0d
3 changed files with 28 additions and 32 deletions

View file

@ -1436,7 +1436,7 @@ public:
uint32_t frac; uint32_t frac;
uint32_t texturefracx; uint32_t texturefracx;
uint32_t height; uint32_t height;
uint32_t half; uint32_t one;
LoopIterator(DrawerWall1Command *command, DrawerThread *thread) LoopIterator(DrawerWall1Command *command, DrawerThread *thread)
{ {
@ -1451,7 +1451,7 @@ public:
pitch = command->_pitch * thread->num_cores; pitch = command->_pitch * thread->num_cores;
height = command->_textureheight; height = command->_textureheight;
half = (0x80000000 + height - 1) / height; one = ((0x80000000 + height - 1) / height) * 2 + 1;
} }
explicit operator bool() explicit operator bool()
@ -1520,7 +1520,7 @@ public:
uint32_t vplce[4]; uint32_t vplce[4];
uint32_t vince[4]; uint32_t vince[4];
uint32_t height[4]; uint32_t height[4];
uint32_t half[4]; uint32_t one[4];
LoopIterator(DrawerWall4Command *command, DrawerThread *thread) LoopIterator(DrawerWall4Command *command, DrawerThread *thread)
{ {
@ -1537,7 +1537,7 @@ public:
vplce[i] = command->_vplce[i] + command->_vince[i] * skipped; vplce[i] = command->_vplce[i] + command->_vince[i] * skipped;
vince[i] = command->_vince[i] * thread->num_cores; vince[i] = command->_vince[i] * thread->num_cores;
height[i] = command->_bufheight[i]; height[i] = command->_bufheight[i];
half[i] = (0x80000000 + height[i] - 1) / height[i]; one[i] = ((0x80000000 + height[i] - 1) / height[i]) * 2 + 1;
} }
} }
@ -1574,7 +1574,7 @@ public:
{ {
FORCEINLINE static uint32_t Sample1(DrawerWall4Command &cmd, LoopIterator &loop, int index) FORCEINLINE static uint32_t Sample1(DrawerWall4Command &cmd, LoopIterator &loop, int index)
{ {
return SampleBgra::sample_bilinear(cmd._bufplce[index], cmd._bufplce2[index], cmd._buftexturefracx[index], loop.vplce[index], loop.half[index], loop.height[index]); return SampleBgra::sample_bilinear(cmd._bufplce[index], cmd._bufplce2[index], cmd._buftexturefracx[index], loop.vplce[index], loop.one[index], loop.height[index]);
} }
}; };
#else #else
@ -1591,7 +1591,7 @@ public:
FORCEINLINE static __m128i Sample4(DrawerWall4Command &cmd, LoopIterator &loop) FORCEINLINE static __m128i Sample4(DrawerWall4Command &cmd, LoopIterator &loop)
{ {
__m128i fg; __m128i fg;
VEC_SAMPLE_BILINEAR4_COLUMN(fg, cmd._bufplce, cmd._bufplce2, cmd._buftexturefracx, loop.vplce, loop.half, loop.height); VEC_SAMPLE_BILINEAR4_COLUMN(fg, cmd._bufplce, cmd._bufplce2, cmd._buftexturefracx, loop.vplce, loop.one, loop.height);
return fg; return fg;
} }
}; };
@ -2021,7 +2021,7 @@ public:
{ {
do do
{ {
uint32_t fg = LightBgra::shade_bgra(SampleBgra::sample_bilinear(_source, _source2, loop.texturefracx, loop.frac, loop.half, loop.height), _light, _shade_constants); uint32_t fg = LightBgra::shade_bgra(SampleBgra::sample_bilinear(_source, _source2, loop.texturefracx, loop.frac, loop.one, loop.height), _light, _shade_constants);
*loop.dest = BlendBgra::copy(fg); *loop.dest = BlendBgra::copy(fg);
} while (loop.next()); } while (loop.next());
} }
@ -2048,7 +2048,7 @@ public:
{ {
do do
{ {
uint32_t fg = LightBgra::shade_bgra(SampleBgra::sample_bilinear(_source, _source2, loop.texturefracx, loop.frac, loop.half, loop.height), _light, _shade_constants); uint32_t fg = LightBgra::shade_bgra(SampleBgra::sample_bilinear(_source, _source2, loop.texturefracx, loop.frac, loop.one, loop.height), _light, _shade_constants);
*loop.dest = BlendBgra::alpha_blend(fg, *loop.dest); *loop.dest = BlendBgra::alpha_blend(fg, *loop.dest);
} while (loop.next()); } while (loop.next());
} }

View file

@ -502,10 +502,10 @@ public:
return (magnifying && r_magfilter) || (!magnifying && r_minfilter); return (magnifying && r_magfilter) || (!magnifying && r_minfilter);
} }
FORCEINLINE static uint32_t sample_bilinear(const uint32_t *col0, const uint32_t *col1, uint32_t texturefracx, uint32_t texturefracy, uint32_t half, uint32_t height) FORCEINLINE static uint32_t sample_bilinear(const uint32_t *col0, const uint32_t *col1, uint32_t texturefracx, uint32_t texturefracy, uint32_t one, uint32_t height)
{ {
uint32_t frac_y0 = ((texturefracy - half) >> FRACBITS) * height; uint32_t frac_y0 = (texturefracy >> FRACBITS) * height;
uint32_t frac_y1 = ((texturefracy + half) >> FRACBITS) * height; uint32_t frac_y1 = ((texturefracy + one) >> FRACBITS) * height;
uint32_t y0 = frac_y0 >> FRACBITS; uint32_t y0 = frac_y0 >> FRACBITS;
uint32_t y1 = frac_y1 >> FRACBITS; uint32_t y1 = frac_y1 >> FRACBITS;
@ -533,18 +533,16 @@ public:
int yshift = (32 - ybits); int yshift = (32 - ybits);
int xmask = (1 << xshift) - 1; int xmask = (1 << xshift) - 1;
int ymask = (1 << yshift) - 1; int ymask = (1 << yshift) - 1;
uint32_t xhalf = 1 << (xbits - 1); uint32_t x = xfrac >> xbits;
uint32_t yhalf = 1 << (ybits - 1); uint32_t y = yfrac >> ybits;
uint32_t x = (xfrac - xhalf) >> xbits;
uint32_t y = (yfrac - yhalf) >> ybits;
uint32_t p00 = texture[(y & ymask) + ((x & xmask) << yshift)]; uint32_t p00 = texture[(y & ymask) + ((x & xmask) << yshift)];
uint32_t p01 = texture[((y + 1) & ymask) + ((x & xmask) << yshift)]; uint32_t p01 = texture[((y + 1) & ymask) + ((x & xmask) << yshift)];
uint32_t p10 = texture[(y & ymask) + (((x + 1) & xmask) << yshift)]; uint32_t p10 = texture[(y & ymask) + (((x + 1) & xmask) << yshift)];
uint32_t p11 = texture[((y + 1) & ymask) + (((x + 1) & xmask) << yshift)]; uint32_t p11 = texture[((y + 1) & ymask) + (((x + 1) & xmask) << yshift)];
uint32_t inv_b = ((xfrac + xhalf) >> (xbits - 4)) & 15; uint32_t inv_b = (xfrac >> (xbits - 4)) & 15;
uint32_t inv_a = ((yfrac + yhalf) >> (ybits - 4)) & 15; uint32_t inv_a = (yfrac >> (ybits - 4)) & 15;
uint32_t a = 16 - inv_a; uint32_t a = 16 - inv_a;
uint32_t b = 16 - inv_b; uint32_t b = 16 - inv_b;
@ -564,11 +562,11 @@ public:
///////////////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////////////
// SSE/AVX shading macros: // SSE/AVX shading macros:
#define AVX2_SAMPLE_BILINEAR4_COLUMN_INIT(col0, col1, half, height, texturefracx) \ #define AVX2_SAMPLE_BILINEAR4_COLUMN_INIT(col0, col1, one, height, texturefracx) \
const uint32_t *baseptr = col0[0]; \ const uint32_t *baseptr = col0[0]; \
__m128i coloffsets0 = _mm_setr_epi32(col0[0] - baseptr, col0[1] - baseptr, col0[2] - baseptr, col0[3] - baseptr); \ __m128i coloffsets0 = _mm_setr_epi32(col0[0] - baseptr, col0[1] - baseptr, col0[2] - baseptr, col0[3] - baseptr); \
__m128i coloffsets1 = _mm_setr_epi32(col1[0] - baseptr, col1[1] - baseptr, col1[2] - baseptr, col1[3] - baseptr); \ __m128i coloffsets1 = _mm_setr_epi32(col1[0] - baseptr, col1[1] - baseptr, col1[2] - baseptr, col1[3] - baseptr); \
__m128i mhalf = _mm_loadu_si128((const __m128i*)half); \ __m128i mone = _mm_loadu_si128((const __m128i*)one); \
__m128i m127 = _mm_set1_epi16(127); \ __m128i m127 = _mm_set1_epi16(127); \
__m128i m16 = _mm_set1_epi32(16); \ __m128i m16 = _mm_set1_epi32(16); \
__m128i m15 = _mm_set1_epi32(15); \ __m128i m15 = _mm_set1_epi32(15); \
@ -577,8 +575,8 @@ public:
#define AVX2_SAMPLE_BILINEAR4_COLUMN(fg, texturefracy) { \ #define AVX2_SAMPLE_BILINEAR4_COLUMN(fg, texturefracy) { \
__m128i mtexturefracy = _mm_loadu_si128((const __m128i*)texturefracy); \ __m128i mtexturefracy = _mm_loadu_si128((const __m128i*)texturefracy); \
__m128i multmp0 = _mm_srli_epi32(_mm_sub_epi32(mtexturefracy, mhalf), FRACBITS); \ __m128i multmp0 = _mm_srli_epi32(mtexturefracy, FRACBITS); \
__m128i multmp1 = _mm_srli_epi32(_mm_add_epi32(mtexturefracy, mhalf), FRACBITS); \ __m128i multmp1 = _mm_srli_epi32(_mm_add_epi32(mtexturefracy, mone), FRACBITS); \
__m128i frac_y0 = _mm_or_si128(_mm_mul_epu32(multmp0, mheight), _mm_slli_si128(_mm_mul_epu32(_mm_srli_si128(multmp0, 4), _mm_srli_si128(mheight, 4)), 4)); \ __m128i frac_y0 = _mm_or_si128(_mm_mul_epu32(multmp0, mheight), _mm_slli_si128(_mm_mul_epu32(_mm_srli_si128(multmp0, 4), _mm_srli_si128(mheight, 4)), 4)); \
__m128i frac_y1 = _mm_or_si128(_mm_mul_epu32(multmp1, mheight), _mm_slli_si128(_mm_mul_epu32(_mm_srli_si128(multmp1, 4), _mm_srli_si128(mheight, 4)), 4)); \ __m128i frac_y1 = _mm_or_si128(_mm_mul_epu32(multmp1, mheight), _mm_slli_si128(_mm_mul_epu32(_mm_srli_si128(multmp1, 4), _mm_srli_si128(mheight, 4)), 4)); \
__m128i y0 = _mm_srli_epi32(frac_y0, FRACBITS); \ __m128i y0 = _mm_srli_epi32(frac_y0, FRACBITS); \
@ -624,13 +622,13 @@ public:
fg = _mm_packus_epi16(fg_lo, fg_hi); \ fg = _mm_packus_epi16(fg_lo, fg_hi); \
} }
#define VEC_SAMPLE_BILINEAR4_COLUMN(fg, col0, col1, texturefracx, texturefracy, half, height) { \ #define VEC_SAMPLE_BILINEAR4_COLUMN(fg, col0, col1, texturefracx, texturefracy, one, height) { \
__m128i m127 = _mm_set1_epi16(127); \ __m128i m127 = _mm_set1_epi16(127); \
fg = _mm_setzero_si128(); \ fg = _mm_setzero_si128(); \
for (int i = 0; i < 4; i++) \ for (int i = 0; i < 4; i++) \
{ \ { \
uint32_t frac_y0 = ((texturefracy[i] - half[i]) >> FRACBITS) * height[i]; \ uint32_t frac_y0 = (texturefracy[i] >> FRACBITS) * height[i]; \
uint32_t frac_y1 = ((texturefracy[i] + half[i]) >> FRACBITS) * height[i]; \ uint32_t frac_y1 = ((texturefracy[i] + one[i]) >> FRACBITS) * height[i]; \
uint32_t y0 = (frac_y0 >> FRACBITS); \ uint32_t y0 = (frac_y0 >> FRACBITS); \
uint32_t y1 = (frac_y1 >> FRACBITS); \ uint32_t y1 = (frac_y1 >> FRACBITS); \
\ \
@ -676,23 +674,21 @@ public:
int yshift = (32 - ybits); \ int yshift = (32 - ybits); \
int xmask = (1 << xshift) - 1; \ int xmask = (1 << xshift) - 1; \
int ymask = (1 << yshift) - 1; \ int ymask = (1 << yshift) - 1; \
uint32_t xhalf = 1 << (xbits - 1); \
uint32_t yhalf = 1 << (ybits - 1); \
\ \
__m128i m127 = _mm_set1_epi16(127); \ __m128i m127 = _mm_set1_epi16(127); \
fg = _mm_setzero_si128(); \ fg = _mm_setzero_si128(); \
for (int i = 0; i < 4; i++) \ for (int i = 0; i < 4; i++) \
{ \ { \
uint32_t x = (xfrac - xhalf) >> xbits; \ uint32_t x = xfrac >> xbits; \
uint32_t y = (yfrac - yhalf) >> ybits; \ uint32_t y = yfrac >> ybits; \
\ \
uint32_t p00 = texture[(y & ymask) + ((x & xmask) << yshift)]; \ uint32_t p00 = texture[(y & ymask) + ((x & xmask) << yshift)]; \
uint32_t p01 = texture[((y + 1) & ymask) + ((x & xmask) << yshift)]; \ uint32_t p01 = texture[((y + 1) & ymask) + ((x & xmask) << yshift)]; \
uint32_t p10 = texture[(y & ymask) + (((x + 1) & xmask) << yshift)]; \ uint32_t p10 = texture[(y & ymask) + (((x + 1) & xmask) << yshift)]; \
uint32_t p11 = texture[((y + 1) & ymask) + (((x + 1) & xmask) << yshift)]; \ uint32_t p11 = texture[((y + 1) & ymask) + (((x + 1) & xmask) << yshift)]; \
\ \
uint32_t inv_b = ((xfrac + xhalf) >> (xbits - 4)) & 15; \ uint32_t inv_b = (xfrac >> (xbits - 4)) & 15; \
uint32_t inv_a = ((yfrac + yhalf) >> (ybits - 4)) & 15; \ uint32_t inv_a = (yfrac >> (ybits - 4)) & 15; \
\ \
__m128i ab_invab = _mm_load_si128(SampleBgra::samplertable + inv_b * 32 + inv_a * 2); \ __m128i ab_invab = _mm_load_si128(SampleBgra::samplertable + inv_b * 32 + inv_a * 2); \
__m128i ainvb_invainvb = _mm_load_si128(SampleBgra::samplertable + inv_b * 32 + inv_a * 2 + 1); \ __m128i ainvb_invainvb = _mm_load_si128(SampleBgra::samplertable + inv_b * 32 + inv_a * 2 + 1); \

View file

@ -1173,14 +1173,14 @@ WallscanSampler::WallscanSampler(int y1, float swal, double yrepeat, fixed_t xof
} }
else else
{ {
int tx0 = ((xoffset - FRACUNIT / 2) >> FRACBITS) % mip_width; int tx0 = (xoffset >> FRACBITS) % mip_width;
if (tx0 < 0) if (tx0 < 0)
tx0 += mip_width; tx0 += mip_width;
int tx1 = (tx0 + 1) % mip_width; int tx1 = (tx0 + 1) % mip_width;
source = (BYTE*)(pixels + tx0 * mip_height); source = (BYTE*)(pixels + tx0 * mip_height);
source2 = (BYTE*)(pixels + tx1 * mip_height); source2 = (BYTE*)(pixels + tx1 * mip_height);
height = mip_height; height = mip_height;
texturefracx = ((xoffset + FRACUNIT / 2) >> (FRACBITS - 4)) & 15; texturefracx = (xoffset >> (FRACBITS - 4)) & 15;
} }
} }
} }