mirror of
https://github.com/ZDoom/gzdoom-gles.git
synced 2025-01-19 07:00:52 +00:00
Linear filtering bug fix
This commit is contained in:
parent
8f38d3af99
commit
200d357b0d
3 changed files with 28 additions and 32 deletions
|
@ -1436,7 +1436,7 @@ public:
|
||||||
uint32_t frac;
|
uint32_t frac;
|
||||||
uint32_t texturefracx;
|
uint32_t texturefracx;
|
||||||
uint32_t height;
|
uint32_t height;
|
||||||
uint32_t half;
|
uint32_t one;
|
||||||
|
|
||||||
LoopIterator(DrawerWall1Command *command, DrawerThread *thread)
|
LoopIterator(DrawerWall1Command *command, DrawerThread *thread)
|
||||||
{
|
{
|
||||||
|
@ -1451,7 +1451,7 @@ public:
|
||||||
pitch = command->_pitch * thread->num_cores;
|
pitch = command->_pitch * thread->num_cores;
|
||||||
|
|
||||||
height = command->_textureheight;
|
height = command->_textureheight;
|
||||||
half = (0x80000000 + height - 1) / height;
|
one = ((0x80000000 + height - 1) / height) * 2 + 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
explicit operator bool()
|
explicit operator bool()
|
||||||
|
@ -1520,7 +1520,7 @@ public:
|
||||||
uint32_t vplce[4];
|
uint32_t vplce[4];
|
||||||
uint32_t vince[4];
|
uint32_t vince[4];
|
||||||
uint32_t height[4];
|
uint32_t height[4];
|
||||||
uint32_t half[4];
|
uint32_t one[4];
|
||||||
|
|
||||||
LoopIterator(DrawerWall4Command *command, DrawerThread *thread)
|
LoopIterator(DrawerWall4Command *command, DrawerThread *thread)
|
||||||
{
|
{
|
||||||
|
@ -1537,7 +1537,7 @@ public:
|
||||||
vplce[i] = command->_vplce[i] + command->_vince[i] * skipped;
|
vplce[i] = command->_vplce[i] + command->_vince[i] * skipped;
|
||||||
vince[i] = command->_vince[i] * thread->num_cores;
|
vince[i] = command->_vince[i] * thread->num_cores;
|
||||||
height[i] = command->_bufheight[i];
|
height[i] = command->_bufheight[i];
|
||||||
half[i] = (0x80000000 + height[i] - 1) / height[i];
|
one[i] = ((0x80000000 + height[i] - 1) / height[i]) * 2 + 1;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1574,7 +1574,7 @@ public:
|
||||||
{
|
{
|
||||||
FORCEINLINE static uint32_t Sample1(DrawerWall4Command &cmd, LoopIterator &loop, int index)
|
FORCEINLINE static uint32_t Sample1(DrawerWall4Command &cmd, LoopIterator &loop, int index)
|
||||||
{
|
{
|
||||||
return SampleBgra::sample_bilinear(cmd._bufplce[index], cmd._bufplce2[index], cmd._buftexturefracx[index], loop.vplce[index], loop.half[index], loop.height[index]);
|
return SampleBgra::sample_bilinear(cmd._bufplce[index], cmd._bufplce2[index], cmd._buftexturefracx[index], loop.vplce[index], loop.one[index], loop.height[index]);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
#else
|
#else
|
||||||
|
@ -1591,7 +1591,7 @@ public:
|
||||||
FORCEINLINE static __m128i Sample4(DrawerWall4Command &cmd, LoopIterator &loop)
|
FORCEINLINE static __m128i Sample4(DrawerWall4Command &cmd, LoopIterator &loop)
|
||||||
{
|
{
|
||||||
__m128i fg;
|
__m128i fg;
|
||||||
VEC_SAMPLE_BILINEAR4_COLUMN(fg, cmd._bufplce, cmd._bufplce2, cmd._buftexturefracx, loop.vplce, loop.half, loop.height);
|
VEC_SAMPLE_BILINEAR4_COLUMN(fg, cmd._bufplce, cmd._bufplce2, cmd._buftexturefracx, loop.vplce, loop.one, loop.height);
|
||||||
return fg;
|
return fg;
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
@ -2021,7 +2021,7 @@ public:
|
||||||
{
|
{
|
||||||
do
|
do
|
||||||
{
|
{
|
||||||
uint32_t fg = LightBgra::shade_bgra(SampleBgra::sample_bilinear(_source, _source2, loop.texturefracx, loop.frac, loop.half, loop.height), _light, _shade_constants);
|
uint32_t fg = LightBgra::shade_bgra(SampleBgra::sample_bilinear(_source, _source2, loop.texturefracx, loop.frac, loop.one, loop.height), _light, _shade_constants);
|
||||||
*loop.dest = BlendBgra::copy(fg);
|
*loop.dest = BlendBgra::copy(fg);
|
||||||
} while (loop.next());
|
} while (loop.next());
|
||||||
}
|
}
|
||||||
|
@ -2048,7 +2048,7 @@ public:
|
||||||
{
|
{
|
||||||
do
|
do
|
||||||
{
|
{
|
||||||
uint32_t fg = LightBgra::shade_bgra(SampleBgra::sample_bilinear(_source, _source2, loop.texturefracx, loop.frac, loop.half, loop.height), _light, _shade_constants);
|
uint32_t fg = LightBgra::shade_bgra(SampleBgra::sample_bilinear(_source, _source2, loop.texturefracx, loop.frac, loop.one, loop.height), _light, _shade_constants);
|
||||||
*loop.dest = BlendBgra::alpha_blend(fg, *loop.dest);
|
*loop.dest = BlendBgra::alpha_blend(fg, *loop.dest);
|
||||||
} while (loop.next());
|
} while (loop.next());
|
||||||
}
|
}
|
||||||
|
|
|
@ -502,10 +502,10 @@ public:
|
||||||
return (magnifying && r_magfilter) || (!magnifying && r_minfilter);
|
return (magnifying && r_magfilter) || (!magnifying && r_minfilter);
|
||||||
}
|
}
|
||||||
|
|
||||||
FORCEINLINE static uint32_t sample_bilinear(const uint32_t *col0, const uint32_t *col1, uint32_t texturefracx, uint32_t texturefracy, uint32_t half, uint32_t height)
|
FORCEINLINE static uint32_t sample_bilinear(const uint32_t *col0, const uint32_t *col1, uint32_t texturefracx, uint32_t texturefracy, uint32_t one, uint32_t height)
|
||||||
{
|
{
|
||||||
uint32_t frac_y0 = ((texturefracy - half) >> FRACBITS) * height;
|
uint32_t frac_y0 = (texturefracy >> FRACBITS) * height;
|
||||||
uint32_t frac_y1 = ((texturefracy + half) >> FRACBITS) * height;
|
uint32_t frac_y1 = ((texturefracy + one) >> FRACBITS) * height;
|
||||||
uint32_t y0 = frac_y0 >> FRACBITS;
|
uint32_t y0 = frac_y0 >> FRACBITS;
|
||||||
uint32_t y1 = frac_y1 >> FRACBITS;
|
uint32_t y1 = frac_y1 >> FRACBITS;
|
||||||
|
|
||||||
|
@ -533,18 +533,16 @@ public:
|
||||||
int yshift = (32 - ybits);
|
int yshift = (32 - ybits);
|
||||||
int xmask = (1 << xshift) - 1;
|
int xmask = (1 << xshift) - 1;
|
||||||
int ymask = (1 << yshift) - 1;
|
int ymask = (1 << yshift) - 1;
|
||||||
uint32_t xhalf = 1 << (xbits - 1);
|
uint32_t x = xfrac >> xbits;
|
||||||
uint32_t yhalf = 1 << (ybits - 1);
|
uint32_t y = yfrac >> ybits;
|
||||||
uint32_t x = (xfrac - xhalf) >> xbits;
|
|
||||||
uint32_t y = (yfrac - yhalf) >> ybits;
|
|
||||||
|
|
||||||
uint32_t p00 = texture[(y & ymask) + ((x & xmask) << yshift)];
|
uint32_t p00 = texture[(y & ymask) + ((x & xmask) << yshift)];
|
||||||
uint32_t p01 = texture[((y + 1) & ymask) + ((x & xmask) << yshift)];
|
uint32_t p01 = texture[((y + 1) & ymask) + ((x & xmask) << yshift)];
|
||||||
uint32_t p10 = texture[(y & ymask) + (((x + 1) & xmask) << yshift)];
|
uint32_t p10 = texture[(y & ymask) + (((x + 1) & xmask) << yshift)];
|
||||||
uint32_t p11 = texture[((y + 1) & ymask) + (((x + 1) & xmask) << yshift)];
|
uint32_t p11 = texture[((y + 1) & ymask) + (((x + 1) & xmask) << yshift)];
|
||||||
|
|
||||||
uint32_t inv_b = ((xfrac + xhalf) >> (xbits - 4)) & 15;
|
uint32_t inv_b = (xfrac >> (xbits - 4)) & 15;
|
||||||
uint32_t inv_a = ((yfrac + yhalf) >> (ybits - 4)) & 15;
|
uint32_t inv_a = (yfrac >> (ybits - 4)) & 15;
|
||||||
uint32_t a = 16 - inv_a;
|
uint32_t a = 16 - inv_a;
|
||||||
uint32_t b = 16 - inv_b;
|
uint32_t b = 16 - inv_b;
|
||||||
|
|
||||||
|
@ -564,11 +562,11 @@ public:
|
||||||
/////////////////////////////////////////////////////////////////////////////
|
/////////////////////////////////////////////////////////////////////////////
|
||||||
// SSE/AVX shading macros:
|
// SSE/AVX shading macros:
|
||||||
|
|
||||||
#define AVX2_SAMPLE_BILINEAR4_COLUMN_INIT(col0, col1, half, height, texturefracx) \
|
#define AVX2_SAMPLE_BILINEAR4_COLUMN_INIT(col0, col1, one, height, texturefracx) \
|
||||||
const uint32_t *baseptr = col0[0]; \
|
const uint32_t *baseptr = col0[0]; \
|
||||||
__m128i coloffsets0 = _mm_setr_epi32(col0[0] - baseptr, col0[1] - baseptr, col0[2] - baseptr, col0[3] - baseptr); \
|
__m128i coloffsets0 = _mm_setr_epi32(col0[0] - baseptr, col0[1] - baseptr, col0[2] - baseptr, col0[3] - baseptr); \
|
||||||
__m128i coloffsets1 = _mm_setr_epi32(col1[0] - baseptr, col1[1] - baseptr, col1[2] - baseptr, col1[3] - baseptr); \
|
__m128i coloffsets1 = _mm_setr_epi32(col1[0] - baseptr, col1[1] - baseptr, col1[2] - baseptr, col1[3] - baseptr); \
|
||||||
__m128i mhalf = _mm_loadu_si128((const __m128i*)half); \
|
__m128i mone = _mm_loadu_si128((const __m128i*)one); \
|
||||||
__m128i m127 = _mm_set1_epi16(127); \
|
__m128i m127 = _mm_set1_epi16(127); \
|
||||||
__m128i m16 = _mm_set1_epi32(16); \
|
__m128i m16 = _mm_set1_epi32(16); \
|
||||||
__m128i m15 = _mm_set1_epi32(15); \
|
__m128i m15 = _mm_set1_epi32(15); \
|
||||||
|
@ -577,8 +575,8 @@ public:
|
||||||
|
|
||||||
#define AVX2_SAMPLE_BILINEAR4_COLUMN(fg, texturefracy) { \
|
#define AVX2_SAMPLE_BILINEAR4_COLUMN(fg, texturefracy) { \
|
||||||
__m128i mtexturefracy = _mm_loadu_si128((const __m128i*)texturefracy); \
|
__m128i mtexturefracy = _mm_loadu_si128((const __m128i*)texturefracy); \
|
||||||
__m128i multmp0 = _mm_srli_epi32(_mm_sub_epi32(mtexturefracy, mhalf), FRACBITS); \
|
__m128i multmp0 = _mm_srli_epi32(mtexturefracy, FRACBITS); \
|
||||||
__m128i multmp1 = _mm_srli_epi32(_mm_add_epi32(mtexturefracy, mhalf), FRACBITS); \
|
__m128i multmp1 = _mm_srli_epi32(_mm_add_epi32(mtexturefracy, mone), FRACBITS); \
|
||||||
__m128i frac_y0 = _mm_or_si128(_mm_mul_epu32(multmp0, mheight), _mm_slli_si128(_mm_mul_epu32(_mm_srli_si128(multmp0, 4), _mm_srli_si128(mheight, 4)), 4)); \
|
__m128i frac_y0 = _mm_or_si128(_mm_mul_epu32(multmp0, mheight), _mm_slli_si128(_mm_mul_epu32(_mm_srli_si128(multmp0, 4), _mm_srli_si128(mheight, 4)), 4)); \
|
||||||
__m128i frac_y1 = _mm_or_si128(_mm_mul_epu32(multmp1, mheight), _mm_slli_si128(_mm_mul_epu32(_mm_srli_si128(multmp1, 4), _mm_srli_si128(mheight, 4)), 4)); \
|
__m128i frac_y1 = _mm_or_si128(_mm_mul_epu32(multmp1, mheight), _mm_slli_si128(_mm_mul_epu32(_mm_srli_si128(multmp1, 4), _mm_srli_si128(mheight, 4)), 4)); \
|
||||||
__m128i y0 = _mm_srli_epi32(frac_y0, FRACBITS); \
|
__m128i y0 = _mm_srli_epi32(frac_y0, FRACBITS); \
|
||||||
|
@ -624,13 +622,13 @@ public:
|
||||||
fg = _mm_packus_epi16(fg_lo, fg_hi); \
|
fg = _mm_packus_epi16(fg_lo, fg_hi); \
|
||||||
}
|
}
|
||||||
|
|
||||||
#define VEC_SAMPLE_BILINEAR4_COLUMN(fg, col0, col1, texturefracx, texturefracy, half, height) { \
|
#define VEC_SAMPLE_BILINEAR4_COLUMN(fg, col0, col1, texturefracx, texturefracy, one, height) { \
|
||||||
__m128i m127 = _mm_set1_epi16(127); \
|
__m128i m127 = _mm_set1_epi16(127); \
|
||||||
fg = _mm_setzero_si128(); \
|
fg = _mm_setzero_si128(); \
|
||||||
for (int i = 0; i < 4; i++) \
|
for (int i = 0; i < 4; i++) \
|
||||||
{ \
|
{ \
|
||||||
uint32_t frac_y0 = ((texturefracy[i] - half[i]) >> FRACBITS) * height[i]; \
|
uint32_t frac_y0 = (texturefracy[i] >> FRACBITS) * height[i]; \
|
||||||
uint32_t frac_y1 = ((texturefracy[i] + half[i]) >> FRACBITS) * height[i]; \
|
uint32_t frac_y1 = ((texturefracy[i] + one[i]) >> FRACBITS) * height[i]; \
|
||||||
uint32_t y0 = (frac_y0 >> FRACBITS); \
|
uint32_t y0 = (frac_y0 >> FRACBITS); \
|
||||||
uint32_t y1 = (frac_y1 >> FRACBITS); \
|
uint32_t y1 = (frac_y1 >> FRACBITS); \
|
||||||
\
|
\
|
||||||
|
@ -676,23 +674,21 @@ public:
|
||||||
int yshift = (32 - ybits); \
|
int yshift = (32 - ybits); \
|
||||||
int xmask = (1 << xshift) - 1; \
|
int xmask = (1 << xshift) - 1; \
|
||||||
int ymask = (1 << yshift) - 1; \
|
int ymask = (1 << yshift) - 1; \
|
||||||
uint32_t xhalf = 1 << (xbits - 1); \
|
|
||||||
uint32_t yhalf = 1 << (ybits - 1); \
|
|
||||||
\
|
\
|
||||||
__m128i m127 = _mm_set1_epi16(127); \
|
__m128i m127 = _mm_set1_epi16(127); \
|
||||||
fg = _mm_setzero_si128(); \
|
fg = _mm_setzero_si128(); \
|
||||||
for (int i = 0; i < 4; i++) \
|
for (int i = 0; i < 4; i++) \
|
||||||
{ \
|
{ \
|
||||||
uint32_t x = (xfrac - xhalf) >> xbits; \
|
uint32_t x = xfrac >> xbits; \
|
||||||
uint32_t y = (yfrac - yhalf) >> ybits; \
|
uint32_t y = yfrac >> ybits; \
|
||||||
\
|
\
|
||||||
uint32_t p00 = texture[(y & ymask) + ((x & xmask) << yshift)]; \
|
uint32_t p00 = texture[(y & ymask) + ((x & xmask) << yshift)]; \
|
||||||
uint32_t p01 = texture[((y + 1) & ymask) + ((x & xmask) << yshift)]; \
|
uint32_t p01 = texture[((y + 1) & ymask) + ((x & xmask) << yshift)]; \
|
||||||
uint32_t p10 = texture[(y & ymask) + (((x + 1) & xmask) << yshift)]; \
|
uint32_t p10 = texture[(y & ymask) + (((x + 1) & xmask) << yshift)]; \
|
||||||
uint32_t p11 = texture[((y + 1) & ymask) + (((x + 1) & xmask) << yshift)]; \
|
uint32_t p11 = texture[((y + 1) & ymask) + (((x + 1) & xmask) << yshift)]; \
|
||||||
\
|
\
|
||||||
uint32_t inv_b = ((xfrac + xhalf) >> (xbits - 4)) & 15; \
|
uint32_t inv_b = (xfrac >> (xbits - 4)) & 15; \
|
||||||
uint32_t inv_a = ((yfrac + yhalf) >> (ybits - 4)) & 15; \
|
uint32_t inv_a = (yfrac >> (ybits - 4)) & 15; \
|
||||||
\
|
\
|
||||||
__m128i ab_invab = _mm_load_si128(SampleBgra::samplertable + inv_b * 32 + inv_a * 2); \
|
__m128i ab_invab = _mm_load_si128(SampleBgra::samplertable + inv_b * 32 + inv_a * 2); \
|
||||||
__m128i ainvb_invainvb = _mm_load_si128(SampleBgra::samplertable + inv_b * 32 + inv_a * 2 + 1); \
|
__m128i ainvb_invainvb = _mm_load_si128(SampleBgra::samplertable + inv_b * 32 + inv_a * 2 + 1); \
|
||||||
|
|
|
@ -1173,14 +1173,14 @@ WallscanSampler::WallscanSampler(int y1, float swal, double yrepeat, fixed_t xof
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
int tx0 = ((xoffset - FRACUNIT / 2) >> FRACBITS) % mip_width;
|
int tx0 = (xoffset >> FRACBITS) % mip_width;
|
||||||
if (tx0 < 0)
|
if (tx0 < 0)
|
||||||
tx0 += mip_width;
|
tx0 += mip_width;
|
||||||
int tx1 = (tx0 + 1) % mip_width;
|
int tx1 = (tx0 + 1) % mip_width;
|
||||||
source = (BYTE*)(pixels + tx0 * mip_height);
|
source = (BYTE*)(pixels + tx0 * mip_height);
|
||||||
source2 = (BYTE*)(pixels + tx1 * mip_height);
|
source2 = (BYTE*)(pixels + tx1 * mip_height);
|
||||||
height = mip_height;
|
height = mip_height;
|
||||||
texturefracx = ((xoffset + FRACUNIT / 2) >> (FRACBITS - 4)) & 15;
|
texturefracx = (xoffset >> (FRACBITS - 4)) & 15;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in a new issue