- Removed power-of-two limitation from span drawers, except for slopes

This commit is contained in:
Magnus Norddahl 2017-06-21 03:08:11 +02:00
parent f909b82074
commit fa36677bdb
10 changed files with 203 additions and 220 deletions

View file

@ -37,7 +37,6 @@ union QWORD_UNION
#define FRACUNIT (1<<FRACBITS)
typedef int32_t fixed_t;
typedef uint32_t dsfixed_t; // fixedpt used by span drawer
#define FIXED_MAX (signed)(0x7fffffff)
#define FIXED_MIN (signed)(0x80000000)

View file

@ -1964,8 +1964,8 @@ namespace swrenderer
_dest = args.Viewport()->GetDest(_x1, _y);
_xstep = args.TextureUStep();
_ystep = args.TextureVStep();
_xbits = args.TextureWidthBits();
_ybits = args.TextureHeightBits();
_srcwidth = args.TextureWidth();
_srcheight = args.TextureHeight();
_srcblend = args.SrcBlend();
_destblend = args.DestBlend();
_color = args.SolidColor();
@ -2035,10 +2035,10 @@ namespace swrenderer
if (thread->line_skipped_by_thread(_y))
return;
dsfixed_t xfrac;
dsfixed_t yfrac;
dsfixed_t xstep;
dsfixed_t ystep;
uint32_t xfrac;
uint32_t yfrac;
uint32_t xstep;
uint32_t ystep;
uint8_t *dest;
const uint8_t *source = _source;
const uint8_t *colormap = _colormap;
@ -2060,7 +2060,7 @@ namespace swrenderer
float viewpos_x = _viewpos_x;
float step_viewpos_x = _step_viewpos_x;
if (_xbits == 6 && _ybits == 6 && num_dynlights == 0)
if (_srcwidth == 64 && _srcheight == 64 && num_dynlights == 0)
{
// 64x64 is the most common case by far, so special case it.
do
@ -2077,7 +2077,7 @@ namespace swrenderer
yfrac += ystep;
} while (--count);
}
else if (_xbits == 6 && _ybits == 6)
else if (_srcwidth == 64 && _srcheight == 64)
{
// 64x64 is the most common case by far, so special case it.
do
@ -2097,14 +2097,13 @@ namespace swrenderer
}
else
{
uint8_t yshift = 32 - _ybits;
uint8_t xshift = yshift - _xbits;
int xmask = ((1 << _xbits) - 1) << _ybits;
uint8_t srcwidth = _srcwidth;
uint8_t srcheight = _srcheight;
do
{
// Current texture index in u,v.
spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift);
spot = (((xfrac >> 16) * srcwidth) >> 16) * srcheight + (((yfrac >> 16) * srcheight) >> 16);
// Lookup pixel from flat texture tile,
// re-index using light/colormap.
@ -2123,10 +2122,10 @@ namespace swrenderer
if (thread->line_skipped_by_thread(_y))
return;
dsfixed_t xfrac;
dsfixed_t yfrac;
dsfixed_t xstep;
dsfixed_t ystep;
uint32_t xfrac;
uint32_t yfrac;
uint32_t xstep;
uint32_t ystep;
uint8_t *dest;
const uint8_t *source = _source;
const uint8_t *colormap = _colormap;
@ -2148,7 +2147,7 @@ namespace swrenderer
float viewpos_x = _viewpos_x;
float step_viewpos_x = _step_viewpos_x;
if (_xbits == 6 && _ybits == 6)
if (_srcwidth == 64 && _srcheight == 64)
{
// 64x64 is the most common case by far, so special case it.
do
@ -2169,14 +2168,14 @@ namespace swrenderer
}
else
{
uint8_t yshift = 32 - _ybits;
uint8_t xshift = yshift - _xbits;
int xmask = ((1 << _xbits) - 1) << _ybits;
uint8_t srcwidth = _srcwidth;
uint8_t srcheight = _srcheight;
do
{
int texdata;
spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift);
spot = (((xfrac >> 16) * srcwidth) >> 16) * srcheight + (((yfrac >> 16) * srcheight) >> 16);
texdata = source[spot];
if (texdata != 0)
{
@ -2195,10 +2194,10 @@ namespace swrenderer
if (thread->line_skipped_by_thread(_y))
return;
dsfixed_t xfrac;
dsfixed_t yfrac;
dsfixed_t xstep;
dsfixed_t ystep;
uint32_t xfrac;
uint32_t yfrac;
uint32_t xstep;
uint32_t ystep;
uint8_t *dest;
const uint8_t *source = _source;
const uint8_t *colormap = _colormap;
@ -2226,7 +2225,7 @@ namespace swrenderer
if (!r_blendmethod)
{
if (_xbits == 6 && _ybits == 6)
if (_srcwidth == 64 && _srcheight == 64)
{
// 64x64 is the most common case by far, so special case it.
do
@ -2245,12 +2244,12 @@ namespace swrenderer
}
else
{
uint8_t yshift = 32 - _ybits;
uint8_t xshift = yshift - _xbits;
int xmask = ((1 << _xbits) - 1) << _ybits;
uint8_t srcwidth = _srcwidth;
uint8_t srcheight = _srcheight;
do
{
spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift);
spot = (((xfrac >> 16) * srcwidth) >> 16) * srcheight + (((yfrac >> 16) * srcheight) >> 16);
uint32_t fg = num_dynlights != 0 ? AddLights(dynlights, num_dynlights, viewpos_x, colormap[source[spot]], source[spot]) : colormap[source[spot]];
uint32_t bg = *dest;
fg = fg2rgb[fg];
@ -2265,7 +2264,7 @@ namespace swrenderer
}
else
{
if (_xbits == 6 && _ybits == 6)
if (_srcwidth == 64 && _srcheight == 64)
{
// 64x64 is the most common case by far, so special case it.
do
@ -2285,12 +2284,12 @@ namespace swrenderer
}
else
{
uint8_t yshift = 32 - _ybits;
uint8_t xshift = yshift - _xbits;
int xmask = ((1 << _xbits) - 1) << _ybits;
uint8_t srcwidth = _srcwidth;
uint8_t srcheight = _srcheight;
do
{
spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift);
spot = (((xfrac >> 16) * srcwidth) >> 16) * srcheight + (((yfrac >> 16) * srcheight) >> 16);
uint32_t fg = num_dynlights != 0 ? AddLights(dynlights, num_dynlights, viewpos_x, colormap[source[spot]], source[spot]) : colormap[source[spot]];
uint32_t bg = *dest;
int r = MAX((palette[fg].r * _srcalpha + palette[bg].r * _destalpha)>>18, 0);
@ -2311,10 +2310,10 @@ namespace swrenderer
if (thread->line_skipped_by_thread(_y))
return;
dsfixed_t xfrac;
dsfixed_t yfrac;
dsfixed_t xstep;
dsfixed_t ystep;
uint32_t xfrac;
uint32_t yfrac;
uint32_t xstep;
uint32_t ystep;
uint8_t *dest;
const uint8_t *source = _source;
const uint8_t *colormap = _colormap;
@ -2342,7 +2341,7 @@ namespace swrenderer
if (!r_blendmethod)
{
if (_xbits == 6 && _ybits == 6)
if (_srcwidth == 64 && _srcheight == 64)
{
// 64x64 is the most common case by far, so special case it.
do
@ -2368,14 +2367,14 @@ namespace swrenderer
}
else
{
uint8_t yshift = 32 - _ybits;
uint8_t xshift = yshift - _xbits;
int xmask = ((1 << _xbits) - 1) << _ybits;
uint8_t srcwidth = _srcwidth;
uint8_t srcheight = _srcheight;
do
{
uint8_t texdata;
spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift);
spot = (((xfrac >> 16) * srcwidth) >> 16) * srcheight + (((yfrac >> 16) * srcheight) >> 16);
texdata = source[spot];
if (texdata != 0)
{
@ -2395,7 +2394,7 @@ namespace swrenderer
}
else
{
if (_xbits == 6 && _ybits == 6)
if (_srcwidth == 64 && _srcheight == 64)
{
// 64x64 is the most common case by far, so special case it.
do
@ -2421,14 +2420,14 @@ namespace swrenderer
}
else
{
uint8_t yshift = 32 - _ybits;
uint8_t xshift = yshift - _xbits;
int xmask = ((1 << _xbits) - 1) << _ybits;
uint8_t srcwidth = _srcwidth;
uint8_t srcheight = _srcheight;
do
{
uint8_t texdata;
spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift);
spot = (((xfrac >> 16) * srcwidth) >> 16) * srcheight + (((yfrac >> 16) * srcheight) >> 16);
texdata = source[spot];
if (texdata != 0)
{
@ -2453,10 +2452,10 @@ namespace swrenderer
if (thread->line_skipped_by_thread(_y))
return;
dsfixed_t xfrac;
dsfixed_t yfrac;
dsfixed_t xstep;
dsfixed_t ystep;
uint32_t xfrac;
uint32_t yfrac;
uint32_t xstep;
uint32_t ystep;
uint8_t *dest;
const uint8_t *source = _source;
const uint8_t *colormap = _colormap;
@ -2483,7 +2482,7 @@ namespace swrenderer
if (!r_blendmethod)
{
if (_xbits == 6 && _ybits == 6)
if (_srcwidth == 64 && _srcheight == 64)
{
// 64x64 is the most common case by far, so special case it.
do
@ -2506,12 +2505,12 @@ namespace swrenderer
}
else
{
uint8_t yshift = 32 - _ybits;
uint8_t xshift = yshift - _xbits;
int xmask = ((1 << _xbits) - 1) << _ybits;
uint8_t srcwidth = _srcwidth;
uint8_t srcheight = _srcheight;
do
{
spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift);
spot = (((xfrac >> 16) * srcwidth) >> 16) * srcheight + (((yfrac >> 16) * srcheight) >> 16);
uint32_t fg = num_dynlights != 0 ? AddLights(dynlights, num_dynlights, viewpos_x, colormap[source[spot]], source[spot]) : colormap[source[spot]];
uint32_t a = fg2rgb[fg] + bg2rgb[*dest];
uint32_t b = a;
@ -2530,7 +2529,7 @@ namespace swrenderer
}
else
{
if (_xbits == 6 && _ybits == 6)
if (_srcwidth == 64 && _srcheight == 64)
{
// 64x64 is the most common case by far, so special case it.
do
@ -2550,12 +2549,12 @@ namespace swrenderer
}
else
{
uint8_t yshift = 32 - _ybits;
uint8_t xshift = yshift - _xbits;
int xmask = ((1 << _xbits) - 1) << _ybits;
uint8_t srcwidth = _srcwidth;
uint8_t srcheight = _srcheight;
do
{
spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift);
spot = (((xfrac >> 16) * srcwidth) >> 16) * srcheight + (((yfrac >> 16) * srcheight) >> 16);
uint32_t fg = num_dynlights != 0 ? AddLights(dynlights, num_dynlights, viewpos_x, colormap[source[spot]], source[spot]) : colormap[source[spot]];
uint32_t bg = *dest;
int r = MAX((palette[fg].r * _srcalpha + palette[bg].r * _destalpha)>>18, 0);
@ -2576,10 +2575,10 @@ namespace swrenderer
if (thread->line_skipped_by_thread(_y))
return;
dsfixed_t xfrac;
dsfixed_t yfrac;
dsfixed_t xstep;
dsfixed_t ystep;
uint32_t xfrac;
uint32_t yfrac;
uint32_t xstep;
uint32_t ystep;
uint8_t *dest;
const uint8_t *source = _source;
const uint8_t *colormap = _colormap;
@ -2606,7 +2605,7 @@ namespace swrenderer
if (!r_blendmethod)
{
if (_xbits == 6 && _ybits == 6)
if (_srcwidth == 64 && _srcheight == 64)
{
// 64x64 is the most common case by far, so special case it.
do
@ -2636,14 +2635,14 @@ namespace swrenderer
}
else
{
uint8_t yshift = 32 - _ybits;
uint8_t xshift = yshift - _xbits;
int xmask = ((1 << _xbits) - 1) << _ybits;
uint8_t srcwidth = _srcwidth;
uint8_t srcheight = _srcheight;
do
{
uint8_t texdata;
spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift);
spot = (((xfrac >> 16) * srcwidth) >> 16) * srcheight + (((yfrac >> 16) * srcheight) >> 16);
texdata = source[spot];
if (texdata != 0)
{
@ -2667,7 +2666,7 @@ namespace swrenderer
}
else
{
if (_xbits == 6 && _ybits == 6)
if (_srcwidth == 64 && _srcheight == 64)
{
// 64x64 is the most common case by far, so special case it.
do
@ -2693,14 +2692,14 @@ namespace swrenderer
}
else
{
uint8_t yshift = 32 - _ybits;
uint8_t xshift = yshift - _xbits;
int xmask = ((1 << _xbits) - 1) << _ybits;
uint8_t srcwidth = _srcwidth;
uint8_t srcheight = _srcheight;
do
{
uint8_t texdata;
spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift);
spot = (((xfrac >> 16) * srcwidth) >> 16) * srcheight + (((yfrac >> 16) * srcheight) >> 16);
texdata = source[spot];
if (texdata != 0)
{

View file

@ -101,16 +101,16 @@ namespace swrenderer
const uint8_t *_source;
const uint8_t *_colormap;
dsfixed_t _xfrac;
dsfixed_t _yfrac;
uint32_t _xfrac;
uint32_t _yfrac;
int _y;
int _x1;
int _x2;
uint8_t *_dest;
dsfixed_t _xstep;
dsfixed_t _ystep;
int _xbits;
int _ybits;
uint32_t _xstep;
uint32_t _ystep;
int _srcwidth;
int _srcheight;
uint32_t *_srcblend;
uint32_t *_destblend;
int _color;

View file

@ -286,7 +286,7 @@ namespace swrenderer
{
public:
// calculates the light constant passed to the shade_pal_index function
FORCEINLINE static uint32_t calc_light_multiplier(dsfixed_t light)
FORCEINLINE static uint32_t calc_light_multiplier(uint32_t light)
{
return 256 - (light >> (FRACBITS - 8));
}

View file

@ -61,15 +61,14 @@ namespace swrenderer
struct TextureData
{
uint32_t xbits;
uint32_t ybits;
uint32_t width;
uint32_t height;
uint32_t xone;
uint32_t yone;
uint32_t xstep;
uint32_t ystep;
uint32_t xfrac;
uint32_t yfrac;
uint32_t yshift;
uint32_t xshift;
uint32_t xmask;
const uint32_t *source;
};
@ -80,8 +79,8 @@ namespace swrenderer
if (thread->line_skipped_by_thread(args.DestY())) return;
TextureData texdata;
texdata.xbits = args.TextureWidthBits();
texdata.ybits = args.TextureHeightBits();
texdata.width = args.TextureWidth();
texdata.height = args.TextureHeight();
texdata.xstep = args.TextureUStep();
texdata.ystep = args.TextureVStep();
texdata.xfrac = args.TextureUPos();
@ -98,22 +97,21 @@ namespace swrenderer
int level = (int)lod;
while (level > 0)
{
if (texdata.xbits <= 2 || texdata.ybits <= 2)
if (texdata.width <= 2 || texdata.height <= 2)
break;
texdata.source += (1 << (texdata.xbits)) * (1 << (texdata.ybits));
texdata.xbits -= 1;
texdata.ybits -= 1;
texdata.source += texdata.width * texdata.height;
texdata.width = MAX<uint32_t>(texdata.width / 2, 1);
texdata.height = MAX<uint32_t>(texdata.height / 2, 1);
level--;
}
}
texdata.yshift = 32 - texdata.ybits;
texdata.xshift = texdata.yshift - texdata.xbits;
texdata.xmask = ((1 << texdata.xbits) - 1) << texdata.ybits;
texdata.xone = (0x80000000u / texdata.width) << 1;
texdata.yone = (0x80000000u / texdata.height) << 1;
bool is_nearest_filter = (magnifying && !r_magfilter) || (!magnifying && !r_minfilter);
bool is_64x64 = texdata.xbits == 6 && texdata.ybits == 6;
bool is_64x64 = texdata.width == 64 && texdata.height == 64;
auto shade_constants = args.ColormapConstants();
if (shade_constants.simple_shade)
@ -198,8 +196,8 @@ namespace swrenderer
if (FilterModeT::Mode == (int)FilterModes::Linear)
{
texdata.xfrac -= 1 << (31 - texdata.xbits);
texdata.yfrac -= 1 << (31 - texdata.ybits);
texdata.xfrac -= texdata.xone / 2;
texdata.yfrac -= texdata.yone / 2;
}
uint32_t srcalpha = args.SrcAlpha() >> (FRACBITS - 8);
@ -217,7 +215,7 @@ namespace swrenderer
bgcolor = 0;
}
uint32_t ifgcolor = Sample<FilterModeT, TextureSizeT>(texdata.xbits, texdata.ybits, texdata.xstep, texdata.ystep, texdata.xfrac, texdata.yfrac, texdata.yshift, texdata.xshift, texdata.xmask, texdata.source);
uint32_t ifgcolor = Sample<FilterModeT, TextureSizeT>(texdata.width, texdata.height, texdata.xone, texdata.yone, texdata.xstep, texdata.ystep, texdata.xfrac, texdata.yfrac, texdata.source);
BgraColor fgcolor = Shade<ShadeModeT>(ifgcolor, light, desaturate, inv_desaturate, shade_fade, shade_light, lights, num_lights, viewpos_x);
BgraColor outcolor = Blend(fgcolor, bgcolor, srcalpha, destalpha, ifgcolor);
@ -231,7 +229,7 @@ namespace swrenderer
}
template<typename FilterModeT, typename TextureSizeT>
FORCEINLINE uint32_t Sample(uint32_t xbits, uint32_t ybits, uint32_t xstep, uint32_t ystep, uint32_t xfrac, uint32_t yfrac, uint32_t yshift, uint32_t xshift, uint32_t xmask, const uint32_t *source)
FORCEINLINE uint32_t Sample(uint32_t width, uint32_t height, uint32_t xone, uint32_t yone, uint32_t xstep, uint32_t ystep, uint32_t xfrac, uint32_t yfrac, const uint32_t *source)
{
using namespace DrawSpan32TModes;
@ -242,37 +240,44 @@ namespace swrenderer
}
else if (FilterModeT::Mode == (int)FilterModes::Nearest)
{
int sample_index = ((xfrac >> xshift) & xmask) + (yfrac >> yshift);
uint32_t x = ((xfrac >> 16) * width) >> 16;
uint32_t y = ((yfrac >> 16) * height) >> 16;
int sample_index = x * height + y;
return source[sample_index];
}
else
{
uint32_t xxbits, yybits;
uint32_t p00, p01, p10, p11;
uint32_t frac_x, frac_y;
if (TextureSizeT::Mode == (int)SpanTextureSize::Size64x64)
{
xxbits = 26;
yybits = 26;
frac_x = xfrac >> 16 << 6;
frac_y = yfrac >> 16 << 6;
uint32_t x0 = frac_x >> 16;
uint32_t y0 = frac_y >> 16;
uint32_t x1 = (x0 + 1) & 0x3f;
uint32_t y1 = (y0 + 1) & 0x3f;
p00 = source[(y0 + (x0 << 6))];
p01 = source[(y1 + (x0 << 6))];
p10 = source[(y0 + (x1 << 6))];
p11 = source[(y1 + (x1 << 6))];
}
else
{
xxbits = 32 - xbits;
yybits = 32 - ybits;
frac_x = (xfrac >> 16) * width;
frac_y = (yfrac >> 16) * height;
uint32_t x0 = frac_x >> 16;
uint32_t y0 = frac_y >> 16;
uint32_t x1 = (((xfrac + xone) >> 16) * width) >> 16;
uint32_t y1 = (((yfrac + yone) >> 16) * height) >> 16;
p00 = source[y0 + x0 * height];
p01 = source[y1 + x0 * height];
p10 = source[y0 + x1 * height];
p11 = source[y1 + x1 * height];
}
uint32_t xxshift = (32 - xxbits);
uint32_t yyshift = (32 - yybits);
uint32_t xxmask = (1 << xxshift) - 1;
uint32_t yymask = (1 << yyshift) - 1;
uint32_t x = xfrac >> xxbits;
uint32_t y = yfrac >> yybits;
uint32_t p00 = source[((y & yymask) + ((x & xxmask) << yyshift))];
uint32_t p01 = source[(((y + 1) & yymask) + ((x & xxmask) << yyshift))];
uint32_t p10 = source[((y & yymask) + (((x + 1) & xxmask) << yyshift))];
uint32_t p11 = source[(((y + 1) & yymask) + (((x + 1) & xxmask) << yyshift))];
uint32_t inv_b = (xfrac >> (xxbits - 4)) & 15;
uint32_t inv_a = (yfrac >> (yybits - 4)) & 15;
uint32_t inv_b = (frac_x >> 12) & 15;
uint32_t inv_a = (frac_y >> 12) & 15;
uint32_t a = 16 - inv_a;
uint32_t b = 16 - inv_b;

View file

@ -61,15 +61,14 @@ namespace swrenderer
struct TextureData
{
uint32_t xbits;
uint32_t ybits;
uint32_t width;
uint32_t height;
uint32_t xone;
uint32_t yone;
uint32_t xstep;
uint32_t ystep;
uint32_t xfrac;
uint32_t yfrac;
uint32_t yshift;
uint32_t xshift;
uint32_t xmask;
const uint32_t *source;
};
@ -80,8 +79,8 @@ namespace swrenderer
if (thread->line_skipped_by_thread(args.DestY())) return;
TextureData texdata;
texdata.xbits = args.TextureWidthBits();
texdata.ybits = args.TextureHeightBits();
texdata.width = args.TextureWidth();
texdata.height = args.TextureHeight();
texdata.xstep = args.TextureUStep();
texdata.ystep = args.TextureVStep();
texdata.xfrac = args.TextureUPos();
@ -98,22 +97,21 @@ namespace swrenderer
int level = (int)lod;
while (level > 0)
{
if (texdata.xbits <= 2 || texdata.ybits <= 2)
if (texdata.width <= 2 || texdata.height <= 2)
break;
texdata.source += (1 << (texdata.xbits)) * (1 << (texdata.ybits));
texdata.xbits -= 1;
texdata.ybits -= 1;
texdata.source += texdata.width * texdata.height;
texdata.width = MAX<uint32_t>(texdata.width / 2, 1);
texdata.height = MAX<uint32_t>(texdata.height / 2, 1);
level--;
}
}
texdata.yshift = 32 - texdata.ybits;
texdata.xshift = texdata.yshift - texdata.xbits;
texdata.xmask = ((1 << texdata.xbits) - 1) << texdata.ybits;
texdata.xone = (0x80000000u / texdata.width) << 1;
texdata.yone = (0x80000000u / texdata.height) << 1;
bool is_nearest_filter = (magnifying && !r_magfilter) || (!magnifying && !r_minfilter);
bool is_64x64 = texdata.xbits == 6 && texdata.ybits == 6;
bool is_64x64 = texdata.width == 64 && texdata.height == 64;
auto shade_constants = args.ColormapConstants();
if (shade_constants.simple_shade)
@ -194,8 +192,8 @@ namespace swrenderer
if (FilterModeT::Mode == (int)FilterModes::Linear)
{
texdata.xfrac -= 1 << (31 - texdata.xbits);
texdata.yfrac -= 1 << (31 - texdata.ybits);
texdata.xfrac -= texdata.xone / 2;
texdata.yfrac -= texdata.yone / 2;
}
uint32_t srcalpha = args.SrcAlpha() >> (FRACBITS - 8);
@ -217,11 +215,11 @@ namespace swrenderer
}
unsigned int ifgcolor[2];
ifgcolor[0] = Sample<FilterModeT, TextureSizeT>(texdata.xbits, texdata.ybits, texdata.xstep, texdata.ystep, texdata.xfrac, texdata.yfrac, texdata.yshift, texdata.xshift, texdata.xmask, texdata.source);
ifgcolor[0] = Sample<FilterModeT, TextureSizeT>(texdata.width, texdata.height, texdata.xone, texdata.yone, texdata.xstep, texdata.ystep, texdata.xfrac, texdata.yfrac, texdata.source);
texdata.xfrac += texdata.xstep;
texdata.yfrac += texdata.ystep;
ifgcolor[1] = Sample<FilterModeT, TextureSizeT>(texdata.xbits, texdata.ybits, texdata.xstep, texdata.ystep, texdata.xfrac, texdata.yfrac, texdata.yshift, texdata.xshift, texdata.xmask, texdata.source);
ifgcolor[1] = Sample<FilterModeT, TextureSizeT>(texdata.width, texdata.height, texdata.xone, texdata.yone, texdata.xstep, texdata.ystep, texdata.xfrac, texdata.yfrac, texdata.source);
texdata.xfrac += texdata.xstep;
texdata.yfrac += texdata.ystep;
@ -251,7 +249,7 @@ namespace swrenderer
// Sample
unsigned int ifgcolor[2];
ifgcolor[0] = Sample<FilterModeT, TextureSizeT>(texdata.xbits, texdata.ybits, texdata.xstep, texdata.ystep, texdata.xfrac, texdata.yfrac, texdata.yshift, texdata.xshift, texdata.xmask, texdata.source);
ifgcolor[0] = Sample<FilterModeT, TextureSizeT>(texdata.width, texdata.height, texdata.xone, texdata.yone, texdata.xstep, texdata.ystep, texdata.xfrac, texdata.yfrac, texdata.source);
ifgcolor[1] = 0;
__m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128());
@ -265,7 +263,7 @@ namespace swrenderer
}
template<typename FilterModeT, typename TextureSizeT>
FORCEINLINE unsigned int VECTORCALL Sample(uint32_t xbits, uint32_t ybits, uint32_t xstep, uint32_t ystep, uint32_t xfrac, uint32_t yfrac, uint32_t yshift, uint32_t xshift, uint32_t xmask, const uint32_t *source)
FORCEINLINE unsigned int VECTORCALL Sample(uint32_t width, uint32_t height, uint32_t xone, uint32_t yone, uint32_t xstep, uint32_t ystep, uint32_t xfrac, uint32_t yfrac, const uint32_t *source)
{
using namespace DrawSpan32TModes;
@ -276,37 +274,44 @@ namespace swrenderer
}
else if (FilterModeT::Mode == (int)FilterModes::Nearest)
{
int sample_index = ((xfrac >> xshift) & xmask) + (yfrac >> yshift);
uint32_t x = ((xfrac >> 16) * width) >> 16;
uint32_t y = ((yfrac >> 16) * height) >> 16;
int sample_index = x * height + y;
return source[sample_index];
}
else
{
uint32_t xxbits, yybits;
uint32_t p00, p01, p10, p11;
uint32_t frac_x, frac_y;
if (TextureSizeT::Mode == (int)SpanTextureSize::Size64x64)
{
xxbits = 26;
yybits = 26;
frac_x = xfrac >> 16 << 6;
frac_y = yfrac >> 16 << 6;
uint32_t x0 = frac_x >> 16;
uint32_t y0 = frac_y >> 16;
uint32_t x1 = (x0 + 1) & 0x3f;
uint32_t y1 = (y0 + 1) & 0x3f;
p00 = source[(y0 + (x0 << 6))];
p01 = source[(y1 + (x0 << 6))];
p10 = source[(y0 + (x1 << 6))];
p11 = source[(y1 + (x1 << 6))];
}
else
{
xxbits = 32 - xbits;
yybits = 32 - ybits;
frac_x = (xfrac >> 16) * width;
frac_y = (yfrac >> 16) * height;
uint32_t x0 = frac_x >> 16;
uint32_t y0 = frac_y >> 16;
uint32_t x1 = (((xfrac + xone) >> 16) * width) >> 16;
uint32_t y1 = (((yfrac + yone) >> 16) * height) >> 16;
p00 = source[y0 + x0 * height];
p01 = source[y1 + x0 * height];
p10 = source[y0 + x1 * height];
p11 = source[y1 + x1 * height];
}
uint32_t xxshift = (32 - xxbits);
uint32_t yyshift = (32 - yybits);
uint32_t xxmask = (1 << xxshift) - 1;
uint32_t yymask = (1 << yyshift) - 1;
uint32_t x = xfrac >> xxbits;
uint32_t y = yfrac >> yybits;
uint32_t p00 = source[((y & yymask) + ((x & xxmask) << yyshift))];
uint32_t p01 = source[(((y + 1) & yymask) + ((x & xxmask) << yyshift))];
uint32_t p10 = source[((y & yymask) + (((x + 1) & xxmask) << yyshift))];
uint32_t p11 = source[(((y + 1) & yymask) + (((x + 1) & xxmask) << yyshift))];
uint32_t inv_b = (xfrac >> (xxbits - 4)) & 15;
uint32_t inv_a = (yfrac >> (yybits - 4)) & 15;
uint32_t inv_b = (frac_x >> 12) & 15;
uint32_t inv_a = (frac_y >> 12) & 15;
uint32_t a = 16 - inv_a;
uint32_t b = 16 - inv_b;

View file

@ -171,27 +171,11 @@ namespace swrenderer
double distance = viewport->PlaneDepth(y, planeheight);
if (drawerargs.TextureWidthBits() != 0)
{
drawerargs.SetTextureUStep(xs_ToFixed(32 - drawerargs.TextureWidthBits(), distance * xstepscale));
drawerargs.SetTextureUPos(xs_ToFixed(32 - drawerargs.TextureWidthBits(), distance * curxfrac + pviewx));
}
else
{
drawerargs.SetTextureUStep(0);
drawerargs.SetTextureUPos(0);
}
drawerargs.SetTextureUStep(distance * xstepscale / drawerargs.TextureWidth());
drawerargs.SetTextureUPos((distance * curxfrac + pviewx) / drawerargs.TextureWidth());
if (drawerargs.TextureHeightBits() != 0)
{
drawerargs.SetTextureVStep(xs_ToFixed(32 - drawerargs.TextureHeightBits(), distance * ystepscale));
drawerargs.SetTextureVPos(xs_ToFixed(32 - drawerargs.TextureHeightBits(), distance * curyfrac + pviewy));
}
else
{
drawerargs.SetTextureVStep(0);
drawerargs.SetTextureVPos(0);
}
drawerargs.SetTextureVStep(distance * ystepscale / drawerargs.TextureHeight());
drawerargs.SetTextureVPos((distance * curyfrac + pviewy) / drawerargs.TextureHeight());
if (viewport->RenderTarget->IsBgra())
{

View file

@ -285,26 +285,12 @@ void SWCanvas::FillSimplePoly(DCanvas *canvas, FTexture *tex, FVector2 *points,
drawerargs.SetLight(colormap, 0, clamp(shade >> FRACBITS, 0, NUMCOLORMAPS - 1));
else
drawerargs.SetLight(&identitycolormap, 0, 0);
if (drawerargs.TextureWidthBits() != 0)
{
scalex = double(1u << (32 - drawerargs.TextureWidthBits())) / scalex;
drawerargs.SetTextureUStep(xs_RoundToInt(cosrot * scalex));
}
else
{ // Texture is one pixel wide.
scalex = 0;
drawerargs.SetTextureUStep(0);
}
if (drawerargs.TextureHeightBits() != 0)
{
scaley = double(1u << (32 - drawerargs.TextureHeightBits())) / scaley;
drawerargs.SetTextureVStep(xs_RoundToInt(sinrot * scaley));
}
else
{ // Texture is one pixel tall.
scaley = 0;
drawerargs.SetTextureVStep(0);
}
scalex /= drawerargs.TextureWidth();
scaley /= drawerargs.TextureHeight();
drawerargs.SetTextureUStep(cosrot * scalex);
drawerargs.SetTextureVStep(sinrot * scaley);
int width = canvas->GetWidth();
@ -382,8 +368,8 @@ void SWCanvas::FillSimplePoly(DCanvas *canvas, FTexture *tex, FVector2 *points,
tex.X = t * cosrot - tex.Y * sinrot;
tex.Y = tex.Y * cosrot + t * sinrot;
}
drawerargs.SetTextureUPos(xs_RoundToInt(tex.X * scalex));
drawerargs.SetTextureVPos(xs_RoundToInt(tex.Y * scaley));
drawerargs.SetTextureUPos(tex.X * scalex);
drawerargs.SetTextureVPos(tex.Y * scaley);
drawerargs.DrawSpan(&thread);
#endif

View file

@ -34,7 +34,8 @@ namespace swrenderer
{
thread->PrepareTexture(tex);
tex->GetWidth();
ds_texwidth = tex->GetWidth();
ds_texheight = tex->GetHeight();
ds_xbits = tex->WidthBits;
ds_ybits = tex->HeightBits;
if ((1 << ds_xbits) > tex->GetWidth())

View file

@ -21,10 +21,10 @@ namespace swrenderer
void SetDestX2(int x) { ds_x2 = x; }
void SetTexture(RenderThread *thread, FTexture *tex);
void SetTextureLOD(double lod) { ds_lod = lod; }
void SetTextureUPos(dsfixed_t xfrac) { ds_xfrac = xfrac; }
void SetTextureVPos(dsfixed_t yfrac) { ds_yfrac = yfrac; }
void SetTextureUStep(dsfixed_t xstep) { ds_xstep = xstep; }
void SetTextureVStep(dsfixed_t vstep) { ds_ystep = vstep; }
void SetTextureUPos(double u) { ds_xfrac = (uint32_t)(int64_t)(u * 4294967296.0); }
void SetTextureVPos(double v) { ds_yfrac = (uint32_t)(int64_t)(v * 4294967296.0); }
void SetTextureUStep(double ustep) { ds_xstep = (uint32_t)(int64_t)(ustep * 4294967296.0); }
void SetTextureVStep(double vstep) { ds_ystep = (uint32_t)(int64_t)(vstep * 4294967296.0); }
void SetSolidColor(int colorIndex) { ds_color = colorIndex; }
void DrawSpan(RenderThread *thread);
@ -39,11 +39,13 @@ namespace swrenderer
int DestY() const { return ds_y; }
int DestX1() const { return ds_x1; }
int DestX2() const { return ds_x2; }
dsfixed_t TextureUPos() const { return ds_xfrac; }
dsfixed_t TextureVPos() const { return ds_yfrac; }
dsfixed_t TextureUStep() const { return ds_xstep; }
dsfixed_t TextureVStep() const { return ds_ystep; }
uint32_t TextureUPos() const { return ds_xfrac; }
uint32_t TextureVPos() const { return ds_yfrac; }
uint32_t TextureUStep() const { return ds_xstep; }
uint32_t TextureVStep() const { return ds_ystep; }
int SolidColor() const { return ds_color; }
int TextureWidth() const { return ds_texwidth; }
int TextureHeight() const { return ds_texheight; }
int TextureWidthBits() const { return ds_xbits; }
int TextureHeightBits() const { return ds_ybits; }
const uint8_t *TexturePixels() const { return ds_source; }
@ -64,14 +66,16 @@ namespace swrenderer
int ds_y;
int ds_x1;
int ds_x2;
int ds_texwidth;
int ds_texheight;
int ds_xbits;
int ds_ybits;
const uint8_t *ds_source;
bool ds_source_mipmapped;
dsfixed_t ds_xfrac;
dsfixed_t ds_yfrac;
dsfixed_t ds_xstep;
dsfixed_t ds_ystep;
uint32_t ds_xfrac;
uint32_t ds_yfrac;
uint32_t ds_xstep;
uint32_t ds_ystep;
uint32_t *dc_srcblend;
uint32_t *dc_destblend;
fixed_t dc_srcalpha;