mirror of
https://github.com/ZDoom/gzdoom-gles.git
synced 2024-11-15 09:01:24 +00:00
Added bilinear filtering
This commit is contained in:
parent
6daeb5a158
commit
c70aa1fe99
5 changed files with 242 additions and 62 deletions
|
@ -162,6 +162,8 @@ fixed_t dc_destalpha; // Alpha value used by dc_destblend
|
||||||
|
|
||||||
// first pixel in a column (possibly virtual)
|
// first pixel in a column (possibly virtual)
|
||||||
const BYTE* dc_source;
|
const BYTE* dc_source;
|
||||||
|
const BYTE* dc_source2;
|
||||||
|
uint32_t dc_texturefracx;
|
||||||
|
|
||||||
BYTE* dc_dest;
|
BYTE* dc_dest;
|
||||||
int dc_count;
|
int dc_count;
|
||||||
|
@ -171,6 +173,8 @@ DWORD vince[4];
|
||||||
BYTE* palookupoffse[4];
|
BYTE* palookupoffse[4];
|
||||||
fixed_t palookuplight[4];
|
fixed_t palookuplight[4];
|
||||||
const BYTE* bufplce[4];
|
const BYTE* bufplce[4];
|
||||||
|
const BYTE* bufplce2[4];
|
||||||
|
uint32_t buftexturefracx[4];
|
||||||
|
|
||||||
// just for profiling
|
// just for profiling
|
||||||
int dccount;
|
int dccount;
|
||||||
|
|
36
src/r_draw.h
36
src/r_draw.h
|
@ -71,6 +71,8 @@ extern "C" fixed_t dc_destalpha;
|
||||||
|
|
||||||
// first pixel in a column
|
// first pixel in a column
|
||||||
extern "C" const BYTE* dc_source;
|
extern "C" const BYTE* dc_source;
|
||||||
|
extern "C" const BYTE* dc_source2;
|
||||||
|
extern "C" uint32_t dc_texturefracx;
|
||||||
|
|
||||||
extern "C" BYTE *dc_dest, *dc_destorg;
|
extern "C" BYTE *dc_dest, *dc_destorg;
|
||||||
extern "C" int dc_count;
|
extern "C" int dc_count;
|
||||||
|
@ -80,6 +82,8 @@ extern "C" DWORD vince[4];
|
||||||
extern "C" BYTE* palookupoffse[4];
|
extern "C" BYTE* palookupoffse[4];
|
||||||
extern "C" fixed_t palookuplight[4];
|
extern "C" fixed_t palookuplight[4];
|
||||||
extern "C" const BYTE* bufplce[4];
|
extern "C" const BYTE* bufplce[4];
|
||||||
|
extern "C" const BYTE* bufplce2[4];
|
||||||
|
extern "C" uint32_t buftexturefracx[4];
|
||||||
|
|
||||||
// [RH] Temporary buffer for column drawing
|
// [RH] Temporary buffer for column drawing
|
||||||
extern "C" BYTE *dc_temp;
|
extern "C" BYTE *dc_temp;
|
||||||
|
@ -374,4 +378,36 @@ void R_SetDSColorMapLight(FColormap *base_colormap, float light, int shade);
|
||||||
|
|
||||||
void R_SetTranslationMap(lighttable_t *translation);
|
void R_SetTranslationMap(lighttable_t *translation);
|
||||||
|
|
||||||
|
extern bool r_swtruecolor;
|
||||||
|
EXTERN_CVAR(Bool, r_bilinear);
|
||||||
|
|
||||||
|
// Texture sampler state needed for bilinear filtering
|
||||||
|
struct SamplerSetup
|
||||||
|
{
|
||||||
|
SamplerSetup() { }
|
||||||
|
SamplerSetup(fixed_t xoffset, bool magnifying, FTexture *texture, const BYTE*(*getcol)(FTexture *texture, int x));
|
||||||
|
|
||||||
|
const BYTE *source;
|
||||||
|
const BYTE *source2;
|
||||||
|
uint32_t texturefracx;
|
||||||
|
};
|
||||||
|
|
||||||
|
inline SamplerSetup::SamplerSetup(fixed_t xoffset, bool magnifying, FTexture *texture, const BYTE*(*getcol)(FTexture *texture, int x))
|
||||||
|
{
|
||||||
|
// Only do bilinear filtering if enabled and not a magnifying filter
|
||||||
|
if (!r_swtruecolor || !r_bilinear || magnifying)
|
||||||
|
{
|
||||||
|
source = getcol(texture, xoffset >> FRACBITS);
|
||||||
|
source2 = nullptr;
|
||||||
|
texturefracx = 0;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
int tx = (xoffset - FRACUNIT / 2) >> FRACBITS;
|
||||||
|
source = getcol(texture, tx);
|
||||||
|
source2 = getcol(texture, tx + 1);
|
||||||
|
texturefracx = ((xoffset + FRACUNIT / 2) >> (FRACBITS - 4)) & 15;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
|
@ -58,6 +58,7 @@ extern float rw_lightstep;
|
||||||
extern int wallshade;
|
extern int wallshade;
|
||||||
|
|
||||||
CVAR(Bool, r_multithreaded, true, 0)
|
CVAR(Bool, r_multithreaded, true, 0)
|
||||||
|
CVAR(Bool, r_bilinear, false, 0)
|
||||||
|
|
||||||
#ifndef NO_SSE
|
#ifndef NO_SSE
|
||||||
|
|
||||||
|
@ -1547,41 +1548,72 @@ public:
|
||||||
uint32_t light = calc_light_multiplier(_light);
|
uint32_t light = calc_light_multiplier(_light);
|
||||||
ShadeConstants shade_constants = _shade_constants;
|
ShadeConstants shade_constants = _shade_constants;
|
||||||
|
|
||||||
if (_xbits == 6 && _ybits == 6)
|
fixed_t xmagnitude = abs((fixed_t)xstep) >> (32 - _xbits - FRACBITS);
|
||||||
|
fixed_t ymagnitude = abs((fixed_t)ystep) >> (32 - _ybits - FRACBITS);
|
||||||
|
fixed_t magnitude = xmagnitude + ymagnitude;
|
||||||
|
|
||||||
|
bool magnifying = !r_bilinear || magnitude >> (FRACBITS - 1) == 0;
|
||||||
|
if (magnifying)
|
||||||
{
|
{
|
||||||
// 64x64 is the most common case by far, so special case it.
|
if (_xbits == 6 && _ybits == 6)
|
||||||
|
|
||||||
do
|
|
||||||
{
|
{
|
||||||
// Current texture index in u,v.
|
// 64x64 is the most common case by far, so special case it.
|
||||||
spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6));
|
|
||||||
|
|
||||||
// Lookup pixel from flat texture tile
|
do
|
||||||
*dest++ = shade_bgra(source[spot], light, shade_constants);
|
{
|
||||||
|
// Current texture index in u,v.
|
||||||
|
spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6));
|
||||||
|
|
||||||
// Next step in u,v.
|
// Lookup pixel from flat texture tile
|
||||||
xfrac += xstep;
|
*dest++ = shade_bgra(source[spot], light, shade_constants);
|
||||||
yfrac += ystep;
|
|
||||||
} while (--count);
|
// Next step in u,v.
|
||||||
|
xfrac += xstep;
|
||||||
|
yfrac += ystep;
|
||||||
|
} while (--count);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
BYTE yshift = 32 - _ybits;
|
||||||
|
BYTE xshift = yshift - _xbits;
|
||||||
|
int xmask = ((1 << _xbits) - 1) << _ybits;
|
||||||
|
|
||||||
|
do
|
||||||
|
{
|
||||||
|
// Current texture index in u,v.
|
||||||
|
spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift);
|
||||||
|
|
||||||
|
// Lookup pixel from flat texture tile
|
||||||
|
*dest++ = shade_bgra(source[spot], light, shade_constants);
|
||||||
|
|
||||||
|
// Next step in u,v.
|
||||||
|
xfrac += xstep;
|
||||||
|
yfrac += ystep;
|
||||||
|
} while (--count);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
BYTE yshift = 32 - _ybits;
|
if (_xbits == 6 && _ybits == 6)
|
||||||
BYTE xshift = yshift - _xbits;
|
|
||||||
int xmask = ((1 << _xbits) - 1) << _ybits;
|
|
||||||
|
|
||||||
do
|
|
||||||
{
|
{
|
||||||
// Current texture index in u,v.
|
// 64x64 is the most common case by far, so special case it.
|
||||||
spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift);
|
|
||||||
|
|
||||||
// Lookup pixel from flat texture tile
|
do
|
||||||
*dest++ = shade_bgra(source[spot], light, shade_constants);
|
{
|
||||||
|
*dest++ = shade_bgra(sample_bilinear(source, xfrac, yfrac, 26, 26), light, shade_constants);
|
||||||
// Next step in u,v.
|
xfrac += xstep;
|
||||||
xfrac += xstep;
|
yfrac += ystep;
|
||||||
yfrac += ystep;
|
} while (--count);
|
||||||
} while (--count);
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
do
|
||||||
|
{
|
||||||
|
*dest++ = shade_bgra(sample_bilinear(source, xfrac, yfrac, 32 - _xbits, 32 - _ybits), light, shade_constants);
|
||||||
|
xfrac += xstep;
|
||||||
|
yfrac += ystep;
|
||||||
|
} while (--count);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
@ -2253,6 +2285,8 @@ class Vlinec1RGBACommand : public DrawerCommand
|
||||||
DWORD _texturefrac;
|
DWORD _texturefrac;
|
||||||
int _count;
|
int _count;
|
||||||
const BYTE * RESTRICT _source;
|
const BYTE * RESTRICT _source;
|
||||||
|
const BYTE * RESTRICT _source2;
|
||||||
|
uint32_t _texturefracx;
|
||||||
BYTE * RESTRICT _dest;
|
BYTE * RESTRICT _dest;
|
||||||
int vlinebits;
|
int vlinebits;
|
||||||
int _pitch;
|
int _pitch;
|
||||||
|
@ -2266,6 +2300,8 @@ public:
|
||||||
_texturefrac = dc_texturefrac;
|
_texturefrac = dc_texturefrac;
|
||||||
_count = dc_count;
|
_count = dc_count;
|
||||||
_source = dc_source;
|
_source = dc_source;
|
||||||
|
_source2 = dc_source2;
|
||||||
|
_texturefracx = dc_texturefracx;
|
||||||
_dest = dc_dest;
|
_dest = dc_dest;
|
||||||
vlinebits = ::vlinebits;
|
vlinebits = ::vlinebits;
|
||||||
_pitch = dc_pitch;
|
_pitch = dc_pitch;
|
||||||
|
@ -2282,6 +2318,8 @@ public:
|
||||||
DWORD fracstep = _iscale * thread->num_cores;
|
DWORD fracstep = _iscale * thread->num_cores;
|
||||||
DWORD frac = _texturefrac + _iscale * thread->skipped_by_thread(_dest_y);
|
DWORD frac = _texturefrac + _iscale * thread->skipped_by_thread(_dest_y);
|
||||||
const uint32 *source = (const uint32 *)_source;
|
const uint32 *source = (const uint32 *)_source;
|
||||||
|
const uint32 *source2 = (const uint32 *)_source2;
|
||||||
|
uint32_t texturefracx = _texturefracx;
|
||||||
uint32_t *dest = thread->dest_for_thread(_dest_y, _pitch, (uint32_t*)_dest);
|
uint32_t *dest = thread->dest_for_thread(_dest_y, _pitch, (uint32_t*)_dest);
|
||||||
int bits = vlinebits;
|
int bits = vlinebits;
|
||||||
int pitch = _pitch * thread->num_cores;
|
int pitch = _pitch * thread->num_cores;
|
||||||
|
@ -2289,12 +2327,24 @@ public:
|
||||||
uint32_t light = calc_light_multiplier(_light);
|
uint32_t light = calc_light_multiplier(_light);
|
||||||
ShadeConstants shade_constants = _shade_constants;
|
ShadeConstants shade_constants = _shade_constants;
|
||||||
|
|
||||||
do
|
if (_source2 == nullptr)
|
||||||
{
|
{
|
||||||
*dest = shade_bgra(source[frac >> bits], light, shade_constants);
|
do
|
||||||
frac += fracstep;
|
{
|
||||||
dest += pitch;
|
*dest = shade_bgra(source[frac >> bits], light, shade_constants);
|
||||||
} while (--count);
|
frac += fracstep;
|
||||||
|
dest += pitch;
|
||||||
|
} while (--count);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
do
|
||||||
|
{
|
||||||
|
*dest = shade_bgra(sample_bilinear(source, source2, texturefracx, frac, bits), light, shade_constants);
|
||||||
|
frac += fracstep;
|
||||||
|
dest += pitch;
|
||||||
|
} while (--count);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -2308,7 +2358,9 @@ class Vlinec4RGBACommand : public DrawerCommand
|
||||||
fixed_t palookuplight[4];
|
fixed_t palookuplight[4];
|
||||||
DWORD vplce[4];
|
DWORD vplce[4];
|
||||||
DWORD vince[4];
|
DWORD vince[4];
|
||||||
const uint32 * RESTRICT bufplce[4];
|
const uint32_t * RESTRICT bufplce[4];
|
||||||
|
const uint32_t * RESTRICT bufplce2[4];
|
||||||
|
uint32_t buftexturefracx[4];
|
||||||
|
|
||||||
public:
|
public:
|
||||||
Vlinec4RGBACommand()
|
Vlinec4RGBACommand()
|
||||||
|
@ -2323,7 +2375,9 @@ public:
|
||||||
palookuplight[i] = ::palookuplight[i];
|
palookuplight[i] = ::palookuplight[i];
|
||||||
vplce[i] = ::vplce[i];
|
vplce[i] = ::vplce[i];
|
||||||
vince[i] = ::vince[i];
|
vince[i] = ::vince[i];
|
||||||
bufplce[i] = (const uint32 *)::bufplce[i];
|
bufplce[i] = (const uint32_t *)::bufplce[i];
|
||||||
|
bufplce2[i] = (const uint32_t *)::bufplce2[i];
|
||||||
|
buftexturefracx[i] = ::buftexturefracx[i];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -2354,14 +2408,28 @@ public:
|
||||||
local_vince[i] *= thread->num_cores;
|
local_vince[i] *= thread->num_cores;
|
||||||
}
|
}
|
||||||
|
|
||||||
do
|
if (bufplce2[0] == nullptr)
|
||||||
{
|
{
|
||||||
dest[0] = shade_bgra(bufplce[0][(place = local_vplce[0]) >> bits], light0, shade_constants); local_vplce[0] = place + local_vince[0];
|
do
|
||||||
dest[1] = shade_bgra(bufplce[1][(place = local_vplce[1]) >> bits], light1, shade_constants); local_vplce[1] = place + local_vince[1];
|
{
|
||||||
dest[2] = shade_bgra(bufplce[2][(place = local_vplce[2]) >> bits], light2, shade_constants); local_vplce[2] = place + local_vince[2];
|
dest[0] = shade_bgra(bufplce[0][(place = local_vplce[0]) >> bits], light0, shade_constants); local_vplce[0] = place + local_vince[0];
|
||||||
dest[3] = shade_bgra(bufplce[3][(place = local_vplce[3]) >> bits], light3, shade_constants); local_vplce[3] = place + local_vince[3];
|
dest[1] = shade_bgra(bufplce[1][(place = local_vplce[1]) >> bits], light1, shade_constants); local_vplce[1] = place + local_vince[1];
|
||||||
dest += pitch;
|
dest[2] = shade_bgra(bufplce[2][(place = local_vplce[2]) >> bits], light2, shade_constants); local_vplce[2] = place + local_vince[2];
|
||||||
} while (--count);
|
dest[3] = shade_bgra(bufplce[3][(place = local_vplce[3]) >> bits], light3, shade_constants); local_vplce[3] = place + local_vince[3];
|
||||||
|
dest += pitch;
|
||||||
|
} while (--count);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
do
|
||||||
|
{
|
||||||
|
dest[0] = shade_bgra(sample_bilinear(bufplce[0], bufplce2[0], buftexturefracx[0], place = local_vplce[0], bits), light0, shade_constants); local_vplce[0] = place + local_vince[0];
|
||||||
|
dest[1] = shade_bgra(sample_bilinear(bufplce[1], bufplce2[1], buftexturefracx[1], place = local_vplce[1], bits), light1, shade_constants); local_vplce[1] = place + local_vince[1];
|
||||||
|
dest[2] = shade_bgra(sample_bilinear(bufplce[2], bufplce2[2], buftexturefracx[2], place = local_vplce[2], bits), light2, shade_constants); local_vplce[2] = place + local_vince[2];
|
||||||
|
dest[3] = shade_bgra(sample_bilinear(bufplce[3], bufplce2[3], buftexturefracx[3], place = local_vplce[3], bits), light3, shade_constants); local_vplce[3] = place + local_vince[3];
|
||||||
|
dest += pitch;
|
||||||
|
} while (--count);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -3651,7 +3719,10 @@ void R_DrawSpan_rgba()
|
||||||
#ifdef NO_SSE
|
#ifdef NO_SSE
|
||||||
DrawerCommandQueue::QueueCommand<DrawSpanRGBACommand>();
|
DrawerCommandQueue::QueueCommand<DrawSpanRGBACommand>();
|
||||||
#else
|
#else
|
||||||
DrawerCommandQueue::QueueCommand<DrawSpanRGBA_SSE_Command>();
|
if (!r_bilinear)
|
||||||
|
DrawerCommandQueue::QueueCommand<DrawSpanRGBA_SSE_Command>();
|
||||||
|
else
|
||||||
|
DrawerCommandQueue::QueueCommand<DrawSpanRGBACommand>();
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -3705,7 +3776,10 @@ void vlinec4_rgba()
|
||||||
#ifdef NO_SSE
|
#ifdef NO_SSE
|
||||||
DrawerCommandQueue::QueueCommand<Vlinec4RGBACommand>();
|
DrawerCommandQueue::QueueCommand<Vlinec4RGBACommand>();
|
||||||
#else
|
#else
|
||||||
DrawerCommandQueue::QueueCommand<Vlinec4RGBA_SSE_Command>();
|
if (!r_bilinear)
|
||||||
|
DrawerCommandQueue::QueueCommand<Vlinec4RGBA_SSE_Command>();
|
||||||
|
else
|
||||||
|
DrawerCommandQueue::QueueCommand<Vlinec4RGBACommand>();
|
||||||
#endif
|
#endif
|
||||||
for (int i = 0; i < 4; i++)
|
for (int i = 0; i < 4; i++)
|
||||||
vplce[i] += vince[i] * dc_count;
|
vplce[i] += vince[i] * dc_count;
|
||||||
|
|
|
@ -426,6 +426,58 @@ FORCEINLINE uint32_t alpha_blend(uint32_t fg, uint32_t bg)
|
||||||
return 0xff000000 | (red << 16) | (green << 8) | blue;
|
return 0xff000000 | (red << 16) | (green << 8) | blue;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
FORCEINLINE uint32_t sample_bilinear(const uint32_t *col0, const uint32_t *col1, uint32_t texturefracx, uint32_t texturefracy, int ybits)
|
||||||
|
{
|
||||||
|
uint32_t half = 1 << (ybits - 1);
|
||||||
|
uint32_t y = (texturefracy - half) >> ybits;
|
||||||
|
|
||||||
|
uint32_t p00 = col0[y];
|
||||||
|
uint32_t p01 = col0[y + 1];
|
||||||
|
uint32_t p10 = col1[y];
|
||||||
|
uint32_t p11 = col1[y + 1];
|
||||||
|
|
||||||
|
uint32_t inv_b = texturefracx;
|
||||||
|
uint32_t inv_a = ((texturefracy + half) >> (ybits - 4)) & 15;
|
||||||
|
uint32_t a = 16 - inv_a;
|
||||||
|
uint32_t b = 16 - inv_b;
|
||||||
|
|
||||||
|
uint32_t red = (RPART(p00) * a * b + RPART(p01) * inv_a * b + RPART(p10) * a * inv_b + RPART(p11) * inv_a * inv_b + 127) >> 8;
|
||||||
|
uint32_t green = (GPART(p00) * a * b + GPART(p01) * inv_a * b + GPART(p10) * a * inv_b + GPART(p11) * inv_a * inv_b + 127) >> 8;
|
||||||
|
uint32_t blue = (BPART(p00) * a * b + BPART(p01) * inv_a * b + BPART(p10) * a * inv_b + BPART(p11) * inv_a * inv_b + 127) >> 8;
|
||||||
|
uint32_t alpha = (APART(p00) * a * b + APART(p01) * inv_a * b + APART(p10) * a * inv_b + APART(p11) * inv_a * inv_b + 127) >> 8;
|
||||||
|
|
||||||
|
return (alpha << 24) | (red << 16) | (green << 8) | blue;
|
||||||
|
}
|
||||||
|
|
||||||
|
FORCEINLINE uint32_t sample_bilinear(const uint32_t *texture, dsfixed_t xfrac, dsfixed_t yfrac, int xbits, int ybits)
|
||||||
|
{
|
||||||
|
int xshift = (32 - xbits);
|
||||||
|
int yshift = (32 - ybits);
|
||||||
|
int xmask = (1 << xshift) - 1;
|
||||||
|
int ymask = (1 << yshift) - 1;
|
||||||
|
uint32_t xhalf = 1 << (xbits - 1);
|
||||||
|
uint32_t yhalf = 1 << (ybits - 1);
|
||||||
|
uint32_t x = (xfrac - xhalf) >> xbits;
|
||||||
|
uint32_t y = (yfrac - yhalf) >> ybits;
|
||||||
|
|
||||||
|
uint32_t p00 = texture[(y & ymask) + ((x & xmask) << yshift)];
|
||||||
|
uint32_t p01 = texture[(y + 1 & ymask) + ((x & xmask) << yshift)];
|
||||||
|
uint32_t p10 = texture[(y & ymask) + (((x + 1) & xmask) << yshift)];
|
||||||
|
uint32_t p11 = texture[(y + 1 & ymask) + (((x + 1) & xmask) << yshift)];
|
||||||
|
|
||||||
|
uint32_t inv_b = ((xfrac + xhalf) >> (xbits - 4)) & 15;
|
||||||
|
uint32_t inv_a = ((yfrac + yhalf) >> (ybits - 4)) & 15;
|
||||||
|
uint32_t a = 16 - inv_a;
|
||||||
|
uint32_t b = 16 - inv_b;
|
||||||
|
|
||||||
|
uint32_t red = (RPART(p00) * a * b + RPART(p01) * inv_a * b + RPART(p10) * a * inv_b + RPART(p11) * inv_a * inv_b + 127) >> 8;
|
||||||
|
uint32_t green = (GPART(p00) * a * b + GPART(p01) * inv_a * b + GPART(p10) * a * inv_b + GPART(p11) * inv_a * inv_b + 127) >> 8;
|
||||||
|
uint32_t blue = (BPART(p00) * a * b + BPART(p01) * inv_a * b + BPART(p10) * a * inv_b + BPART(p11) * inv_a * inv_b + 127) >> 8;
|
||||||
|
uint32_t alpha = (APART(p00) * a * b + APART(p01) * inv_a * b + APART(p10) * a * inv_b + APART(p11) * inv_a * inv_b + 127) >> 8;
|
||||||
|
|
||||||
|
return (alpha << 24) | (red << 16) | (green << 8) | blue;
|
||||||
|
}
|
||||||
|
|
||||||
// Calculate constants for a simple shade with gamma correction
|
// Calculate constants for a simple shade with gamma correction
|
||||||
#define AVX_LINEAR_SHADE_SIMPLE_INIT(light) \
|
#define AVX_LINEAR_SHADE_SIMPLE_INIT(light) \
|
||||||
__m256 mlight_hi = _mm256_set_ps(1.0f, light * (1.0f/256.0f), light * (1.0f/256.0f), light * (1.0f/256.0f), 1.0f, light * (1.0f/256.0f), light * (1.0f/256.0f), light * (1.0f/256.0f)); \
|
__m256 mlight_hi = _mm256_set_ps(1.0f, light * (1.0f/256.0f), light * (1.0f/256.0f), light * (1.0f/256.0f), 1.0f, light * (1.0f/256.0f), light * (1.0f/256.0f), light * (1.0f/256.0f)); \
|
||||||
|
|
|
@ -58,6 +58,8 @@
|
||||||
|
|
||||||
CVAR(Bool, r_np2, true, 0)
|
CVAR(Bool, r_np2, true, 0)
|
||||||
|
|
||||||
|
EXTERN_CVAR(Bool, r_bilinear)
|
||||||
|
|
||||||
//CVAR (Int, ty, 8, 0)
|
//CVAR (Int, ty, 8, 0)
|
||||||
//CVAR (Int, tx, 8, 0)
|
//CVAR (Int, tx, 8, 0)
|
||||||
|
|
||||||
|
@ -1066,14 +1068,16 @@ void R_RenderFakeWallRange (drawseg_t *ds, int x1, int x2)
|
||||||
}
|
}
|
||||||
|
|
||||||
// Draw a column with support for non-power-of-two ranges
|
// Draw a column with support for non-power-of-two ranges
|
||||||
uint32_t wallscan_drawcol1(int x, int y1, int y2, uint32_t uv_start, uint32_t uv_step, uint32_t uv_max, const BYTE *source, DWORD(*draw1column)())
|
uint32_t wallscan_drawcol1(int x, int y1, int y2, uint32_t uv_start, uint32_t uv_step, uint32_t uv_max, const SamplerSetup &sampler, DWORD(*draw1column)())
|
||||||
{
|
{
|
||||||
int pixelsize = r_swtruecolor ? 4 : 1;
|
int pixelsize = r_swtruecolor ? 4 : 1;
|
||||||
if (uv_max == 0) // power of two
|
if (uv_max == 0) // power of two
|
||||||
{
|
{
|
||||||
int count = y2 - y1;
|
int count = y2 - y1;
|
||||||
|
|
||||||
dc_source = source;
|
dc_source = sampler.source;
|
||||||
|
dc_source2 = sampler.source2;
|
||||||
|
dc_texturefracx = sampler.texturefracx;
|
||||||
dc_dest = (ylookup[y1] + x) * pixelsize + dc_destorg;
|
dc_dest = (ylookup[y1] + x) * pixelsize + dc_destorg;
|
||||||
dc_count = count;
|
dc_count = count;
|
||||||
dc_iscale = uv_step;
|
dc_iscale = uv_step;
|
||||||
|
@ -1097,7 +1101,9 @@ uint32_t wallscan_drawcol1(int x, int y1, int y2, uint32_t uv_start, uint32_t uv
|
||||||
next_uv_wrap++;
|
next_uv_wrap++;
|
||||||
uint32_t count = MIN(left, next_uv_wrap);
|
uint32_t count = MIN(left, next_uv_wrap);
|
||||||
|
|
||||||
dc_source = source;
|
dc_source = sampler.source;
|
||||||
|
dc_source2 = sampler.source2;
|
||||||
|
dc_texturefracx = sampler.texturefracx;
|
||||||
dc_dest = (ylookup[y1] + x) * pixelsize + dc_destorg;
|
dc_dest = (ylookup[y1] + x) * pixelsize + dc_destorg;
|
||||||
dc_count = count;
|
dc_count = count;
|
||||||
dc_iscale = uv_step;
|
dc_iscale = uv_step;
|
||||||
|
@ -1115,7 +1121,7 @@ uint32_t wallscan_drawcol1(int x, int y1, int y2, uint32_t uv_start, uint32_t uv
|
||||||
}
|
}
|
||||||
|
|
||||||
// Draw four columns with support for non-power-of-two ranges
|
// Draw four columns with support for non-power-of-two ranges
|
||||||
void wallscan_drawcol4(int x, int y1, int y2, uint32_t *uv_pos, uint32_t *uv_step, uint32_t uv_max, const BYTE **source, void(*draw4columns)())
|
void wallscan_drawcol4(int x, int y1, int y2, uint32_t *uv_pos, uint32_t *uv_step, uint32_t uv_max, const SamplerSetup *sampler, void(*draw4columns)())
|
||||||
{
|
{
|
||||||
int pixelsize = r_swtruecolor ? 4 : 1;
|
int pixelsize = r_swtruecolor ? 4 : 1;
|
||||||
if (uv_max == 0) // power of two, no wrap handling needed
|
if (uv_max == 0) // power of two, no wrap handling needed
|
||||||
|
@ -1123,7 +1129,9 @@ void wallscan_drawcol4(int x, int y1, int y2, uint32_t *uv_pos, uint32_t *uv_ste
|
||||||
int count = y2 - y1;
|
int count = y2 - y1;
|
||||||
for (int i = 0; i < 4; i++)
|
for (int i = 0; i < 4; i++)
|
||||||
{
|
{
|
||||||
bufplce[i] = source[i];
|
bufplce[i] = sampler[i].source;
|
||||||
|
bufplce2[i] = sampler[i].source2;
|
||||||
|
buftexturefracx[i] = sampler[i].texturefracx;
|
||||||
vplce[i] = uv_pos[i];
|
vplce[i] = uv_pos[i];
|
||||||
vince[i] = uv_step[i];
|
vince[i] = uv_step[i];
|
||||||
|
|
||||||
|
@ -1139,7 +1147,11 @@ void wallscan_drawcol4(int x, int y1, int y2, uint32_t *uv_pos, uint32_t *uv_ste
|
||||||
{
|
{
|
||||||
dc_dest = (ylookup[y1] + x) * pixelsize + dc_destorg;
|
dc_dest = (ylookup[y1] + x) * pixelsize + dc_destorg;
|
||||||
for (int i = 0; i < 4; i++)
|
for (int i = 0; i < 4; i++)
|
||||||
bufplce[i] = source[i];
|
{
|
||||||
|
bufplce[i] = sampler[i].source;
|
||||||
|
bufplce2[i] = sampler[i].source2;
|
||||||
|
buftexturefracx[i] = sampler[i].texturefracx;
|
||||||
|
}
|
||||||
|
|
||||||
uint32_t left = y2 - y1;
|
uint32_t left = y2 - y1;
|
||||||
while (left > 0)
|
while (left > 0)
|
||||||
|
@ -1249,12 +1261,11 @@ void wallscan_any(
|
||||||
if (!fixed)
|
if (!fixed)
|
||||||
R_SetColorMapLight(basecolormap, light, wallshade);
|
R_SetColorMapLight(basecolormap, light, wallshade);
|
||||||
|
|
||||||
const BYTE *source = getcol(rw_pic, (lwal[x] + xoffset) >> FRACBITS);
|
|
||||||
|
|
||||||
uint32_t uv_start, uv_step;
|
uint32_t uv_start, uv_step;
|
||||||
calc_uv_start_and_step(y1, swal[x], yrepeat, uv_height, fracbits, uv_start, uv_step);
|
calc_uv_start_and_step(y1, swal[x], yrepeat, uv_height, fracbits, uv_start, uv_step);
|
||||||
|
|
||||||
wallscan_drawcol1(x, y1, y2, uv_start, uv_step, uv_max, source, draw1column);
|
SamplerSetup sampler(lwal[x] + xoffset, uv_step >> (fracbits - 1) == 0, rw_pic, getcol);
|
||||||
|
wallscan_drawcol1(x, y1, y2, uv_start, uv_step, uv_max, sampler, draw1column);
|
||||||
}
|
}
|
||||||
|
|
||||||
// The aligned columns
|
// The aligned columns
|
||||||
|
@ -1264,10 +1275,6 @@ void wallscan_any(
|
||||||
int y1[4] = { uwal[x], uwal[x + 1], uwal[x + 2], uwal[x + 3] };
|
int y1[4] = { uwal[x], uwal[x + 1], uwal[x + 2], uwal[x + 3] };
|
||||||
int y2[4] = { dwal[x], dwal[x + 1], dwal[x + 2], dwal[x + 3] };
|
int y2[4] = { dwal[x], dwal[x + 1], dwal[x + 2], dwal[x + 3] };
|
||||||
|
|
||||||
const BYTE *source[4];
|
|
||||||
for (int i = 0; i < 4; i++)
|
|
||||||
source[i] = getcol(rw_pic, (lwal[x + i] + xoffset) >> FRACBITS);
|
|
||||||
|
|
||||||
float lights[4];
|
float lights[4];
|
||||||
for (int i = 0; i < 4; i++)
|
for (int i = 0; i < 4; i++)
|
||||||
{
|
{
|
||||||
|
@ -1276,8 +1283,16 @@ void wallscan_any(
|
||||||
}
|
}
|
||||||
|
|
||||||
uint32_t uv_pos[4], uv_step[4];
|
uint32_t uv_pos[4], uv_step[4];
|
||||||
|
int magnifying = 0;
|
||||||
for (int i = 0; i < 4; i++)
|
for (int i = 0; i < 4; i++)
|
||||||
|
{
|
||||||
calc_uv_start_and_step(y1[i], swal[x + i], yrepeat, uv_height, fracbits, uv_pos[i], uv_step[i]);
|
calc_uv_start_and_step(y1[i], swal[x + i], yrepeat, uv_height, fracbits, uv_pos[i], uv_step[i]);
|
||||||
|
magnifying |= uv_step[i] >> (fracbits - 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
SamplerSetup sampler[4];
|
||||||
|
for (int i = 0; i < 4; i++)
|
||||||
|
sampler[i] = SamplerSetup(lwal[x + i] + xoffset, magnifying == 0, rw_pic, getcol);
|
||||||
|
|
||||||
// Figure out where we vertically can start and stop drawing 4 columns in one go
|
// Figure out where we vertically can start and stop drawing 4 columns in one go
|
||||||
int middle_y1 = y1[0];
|
int middle_y1 = y1[0];
|
||||||
|
@ -1305,7 +1320,7 @@ void wallscan_any(
|
||||||
|
|
||||||
if (!fixed)
|
if (!fixed)
|
||||||
R_SetColorMapLight(basecolormap, lights[i], wallshade);
|
R_SetColorMapLight(basecolormap, lights[i], wallshade);
|
||||||
wallscan_drawcol1(x + i, y1[i], y2[i], uv_pos[i], uv_step[i], uv_max, source[i], draw1column);
|
wallscan_drawcol1(x + i, y1[i], y2[i], uv_pos[i], uv_step[i], uv_max, sampler[i], draw1column);
|
||||||
}
|
}
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
@ -1317,7 +1332,7 @@ void wallscan_any(
|
||||||
R_SetColorMapLight(basecolormap, lights[i], wallshade);
|
R_SetColorMapLight(basecolormap, lights[i], wallshade);
|
||||||
|
|
||||||
if (y1[i] < middle_y1)
|
if (y1[i] < middle_y1)
|
||||||
uv_pos[i] = wallscan_drawcol1(x + i, y1[i], middle_y1, uv_pos[i], uv_step[i], uv_max, source[i], draw1column);
|
uv_pos[i] = wallscan_drawcol1(x + i, y1[i], middle_y1, uv_pos[i], uv_step[i], uv_max, sampler[i], draw1column);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Draw the area where all 4 columns are active
|
// Draw the area where all 4 columns are active
|
||||||
|
@ -1337,7 +1352,7 @@ void wallscan_any(
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
wallscan_drawcol4(x, middle_y1, middle_y2, uv_pos, uv_step, uv_max, source, draw4columns);
|
wallscan_drawcol4(x, middle_y1, middle_y2, uv_pos, uv_step, uv_max, sampler, draw4columns);
|
||||||
|
|
||||||
// Draw the last rows where not all 4 columns are active
|
// Draw the last rows where not all 4 columns are active
|
||||||
for (int i = 0; i < 4; i++)
|
for (int i = 0; i < 4; i++)
|
||||||
|
@ -1346,7 +1361,7 @@ void wallscan_any(
|
||||||
R_SetColorMapLight(basecolormap, lights[i], wallshade);
|
R_SetColorMapLight(basecolormap, lights[i], wallshade);
|
||||||
|
|
||||||
if (middle_y2 < y2[i])
|
if (middle_y2 < y2[i])
|
||||||
uv_pos[i] = wallscan_drawcol1(x + i, middle_y2, y2[i], uv_pos[i], uv_step[i], uv_max, source[i], draw1column);
|
uv_pos[i] = wallscan_drawcol1(x + i, middle_y2, y2[i], uv_pos[i], uv_step[i], uv_max, sampler[i], draw1column);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1361,12 +1376,11 @@ void wallscan_any(
|
||||||
if (!fixed)
|
if (!fixed)
|
||||||
R_SetColorMapLight(basecolormap, light, wallshade);
|
R_SetColorMapLight(basecolormap, light, wallshade);
|
||||||
|
|
||||||
const BYTE *source = getcol(rw_pic, (lwal[x] + xoffset) >> FRACBITS);
|
|
||||||
|
|
||||||
uint32_t uv_start, uv_step;
|
uint32_t uv_start, uv_step;
|
||||||
calc_uv_start_and_step(y1, swal[x], yrepeat, uv_height, fracbits, uv_start, uv_step);
|
calc_uv_start_and_step(y1, swal[x], yrepeat, uv_height, fracbits, uv_start, uv_step);
|
||||||
|
|
||||||
wallscan_drawcol1(x, y1, y2, uv_start, uv_step, uv_max, source, draw1column);
|
SamplerSetup sampler(lwal[x] + xoffset, uv_step >> (fracbits - 1) == 0, rw_pic, getcol);
|
||||||
|
wallscan_drawcol1(x, y1, y2, uv_start, uv_step, uv_max, sampler, draw1column);
|
||||||
}
|
}
|
||||||
|
|
||||||
NetUpdate ();
|
NetUpdate ();
|
||||||
|
|
Loading…
Reference in a new issue