Native mipmap support to FTexture

This commit is contained in:
Magnus Norddahl 2016-06-21 21:55:08 +02:00
parent d15af1524c
commit c235de5c22
9 changed files with 171 additions and 196 deletions

View file

@ -1064,7 +1064,7 @@ extern "C" BYTE *ds_curcolormap, *ds_cursource, *ds_curtiltedsource;
void R_SetSpanSource(FTexture *tex)
{
R_SetMipmappedSpanSource(tex);
ds_source = r_swtruecolor ? (const BYTE*)tex->GetPixelsBgra() : tex->GetPixels();
#ifdef X86_ASM
if (!r_swtruecolor && ds_cursource != ds_source)
{

View file

@ -3672,106 +3672,6 @@ void ApplySpecialColormapRGBACommand::Execute(DrawerThread *thread)
/////////////////////////////////////////////////////////////////////////////
#include <map>
class MipmappedTexture
{
public:
MipmappedTexture(FTexture *texture)
{
const uint32_t *base_texture = texture->GetPixelsBgra();
Width = texture->GetWidth();
Height = texture->GetHeight();
Levels = MAX(texture->WidthBits, texture->HeightBits);
// I bet there is a better way to calculate this..
int buffersize = 0;
for (int i = 0; i < Levels; i++)
{
int w = MAX(Width >> i, 2); // 2 instead of 1 because we texelGather in 2x2 blocks
int h = MAX(Height >> i, 2);
buffersize += w * h;
}
Pixels.resize(buffersize);
// Base level:
memcpy(Pixels.data(), base_texture, Width * Height * 4);
// Mipmap levels:
uint32_t *src = Pixels.data();
uint32_t *dest = src + Width * Height;
for (int i = 1; i < Levels; i++)
{
int srch = MAX(Height >> (i - 1), 2);
int w = MAX(Width >> i, 2);
int h = MAX(Height >> i, 2);
for (int x = 0; x < w; x++)
{
for (int y = 0; y < h; y++)
{
uint32_t src00 = src[y * 2 + x * 2 * srch];
uint32_t src01 = src[y * 2 + 1 + x * 2 * srch];
uint32_t src10 = src[y * 2 + (x * 2 + 1) * srch];
uint32_t src11 = src[y * 2 + 1 + (x * 2 + 1) * srch];
uint32_t alpha = (APART(src00) + APART(src01) + APART(src10) + APART(src11) + 2) / 4;
uint32_t red = (RPART(src00) + RPART(src01) + RPART(src10) + RPART(src11) + 2) / 4;
uint32_t green = (GPART(src00) + GPART(src01) + GPART(src10) + GPART(src11) + 2) / 4;
uint32_t blue = (BPART(src00) + BPART(src01) + BPART(src10) + BPART(src11) + 2) / 4;
dest[y + x * h] = (alpha << 24) | (red << 16) | (green << 8) | blue;
}
}
src = dest;
dest += w * h;
}
}
int Width = 0;
int Height = 0;
int Levels = 0;
std::vector<uint32_t> Pixels;
};
class TextureMipmapper
{
public:
static std::map<FTexture*, std::shared_ptr<MipmappedTexture>> &Textures()
{
static std::map<FTexture*, std::shared_ptr<MipmappedTexture>> textures;
return textures;
}
};
void R_SetMipmappedSpanSource(FTexture *tex)
{
if (r_swtruecolor)
{
if (r_mipmap)
{
auto &mipmap = TextureMipmapper::Textures()[tex];
if (!mipmap)
mipmap = std::make_shared<MipmappedTexture>(tex);
ds_source = (const BYTE*)mipmap->Pixels.data();
}
else
{
ds_source = (const BYTE*)tex->GetPixelsBgra();
}
}
else
{
ds_source = tex->GetPixels();
}
}
void R_ClearMipmapCache()
{
TextureMipmapper::Textures().clear();
}
void R_BeginDrawerCommands()
{
DrawerCommandQueue::Begin();

View file

@ -108,9 +108,6 @@ void tmvline4_revsubclamp_rgba();
void R_FillColumnHoriz_rgba();
void R_FillSpan_rgba();
void R_SetMipmappedSpanSource(FTexture *tex);
void R_ClearMipmapCache();
/////////////////////////////////////////////////////////////////////////////
// Multithreaded rendering infrastructure:
@ -494,9 +491,9 @@ FORCEINLINE uint32_t sample_bilinear(const uint32_t *texture, dsfixed_t xfrac, d
uint32_t y = (yfrac - yhalf) >> ybits;
uint32_t p00 = texture[(y & ymask) + ((x & xmask) << yshift)];
uint32_t p01 = texture[(y + 1 & ymask) + ((x & xmask) << yshift)];
uint32_t p01 = texture[((y + 1) & ymask) + ((x & xmask) << yshift)];
uint32_t p10 = texture[(y & ymask) + (((x + 1) & xmask) << yshift)];
uint32_t p11 = texture[(y + 1 & ymask) + (((x + 1) & xmask) << yshift)];
uint32_t p11 = texture[((y + 1) & ymask) + (((x + 1) & xmask) << yshift)];
uint32_t inv_b = ((xfrac + xhalf) >> (xbits - 4)) & 15;
uint32_t inv_a = ((yfrac + yhalf) >> (ybits - 4)) & 15;
@ -511,87 +508,81 @@ FORCEINLINE uint32_t sample_bilinear(const uint32_t *texture, dsfixed_t xfrac, d
return (alpha << 24) | (red << 16) | (green << 8) | blue;
}
#ifndef NO_SSE
FORCEINLINE __m128i sample_bilinear4_sse(const uint32_t **col0, const uint32_t **col1, uint32_t texturefracx[4], uint32_t texturefracy[4], int ybits)
{
uint32_t half = 1 << (ybits - 1);
__m128i m127 = _mm_set1_epi16(127);
__m128i fg = _mm_setzero_si128();
for (int i = 0; i < 4; i++)
{
uint32_t y = (texturefracy[i] - half) >> ybits;
uint32_t inv_b = texturefracx[i];
uint32_t inv_a = ((texturefracy[i] + half) >> (ybits - 4)) & 15;
uint32_t a = 16 - inv_a;
uint32_t b = 16 - inv_b;
uint32_t ab = a * b;
uint32_t invab = inv_a * b;
uint32_t ainvb = a * inv_b;
uint32_t invainvb = inv_a * inv_b;
__m128i ab_invab = _mm_set_epi16(invab, invab, invab, invab, ab, ab, ab, ab);
__m128i ainvb_invainvb = _mm_set_epi16(invainvb, invainvb, invainvb, invainvb, ainvb, ainvb, ainvb, ainvb);
__m128i p0 = _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i*)(col0[i] + y)), _mm_setzero_si128());
__m128i p1 = _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i*)(col1[i] + y)), _mm_setzero_si128());
__m128i tmp = _mm_adds_epu16(_mm_mullo_epi16(p0, ab_invab), _mm_mullo_epi16(p1, ainvb_invainvb));
__m128i color = _mm_srli_epi16(_mm_adds_epu16(_mm_adds_epu16(_mm_srli_si128(tmp, 8), tmp), m127), 8);
fg = _mm_or_si128(_mm_srli_si128(fg, 4), _mm_slli_si128(_mm_packus_epi16(color, _mm_setzero_si128()), 12));
}
return fg;
#define VEC_SAMPLE_BILINEAR4_COLUMN(fg, col0, col1, texturefracx, texturefracy, ybits) { \
uint32_t half = 1 << (ybits - 1); \
\
__m128i m127 = _mm_set1_epi16(127); \
fg = _mm_setzero_si128(); \
for (int i = 0; i < 4; i++) \
{ \
uint32_t y = (texturefracy[i] - half) >> ybits; \
\
uint32_t inv_b = texturefracx[i]; \
uint32_t inv_a = ((texturefracy[i] + half) >> (ybits - 4)) & 15; \
uint32_t a = 16 - inv_a; \
uint32_t b = 16 - inv_b; \
\
uint32_t ab = a * b; \
uint32_t invab = inv_a * b; \
uint32_t ainvb = a * inv_b; \
uint32_t invainvb = inv_a * inv_b; \
__m128i ab_invab = _mm_set_epi16(invab, invab, invab, invab, ab, ab, ab, ab); \
__m128i ainvb_invainvb = _mm_set_epi16(invainvb, invainvb, invainvb, invainvb, ainvb, ainvb, ainvb, ainvb); \
\
__m128i p0 = _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i*)(col0[i] + y)), _mm_setzero_si128()); \
__m128i p1 = _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i*)(col1[i] + y)), _mm_setzero_si128()); \
\
__m128i tmp = _mm_adds_epu16(_mm_mullo_epi16(p0, ab_invab), _mm_mullo_epi16(p1, ainvb_invainvb)); \
__m128i color = _mm_srli_epi16(_mm_adds_epu16(_mm_adds_epu16(_mm_srli_si128(tmp, 8), tmp), m127), 8); \
\
fg = _mm_or_si128(_mm_srli_si128(fg, 4), _mm_slli_si128(_mm_packus_epi16(color, _mm_setzero_si128()), 12)); \
} \
}
FORCEINLINE __m128i sample_bilinear4_sse(const uint32_t *texture, dsfixed_t &xfrac, dsfixed_t &yfrac, dsfixed_t xstep, dsfixed_t ystep, int xbits, int ybits)
{
int xshift = (32 - xbits);
int yshift = (32 - ybits);
int xmask = (1 << xshift) - 1;
int ymask = (1 << yshift) - 1;
uint32_t xhalf = 1 << (xbits - 1);
uint32_t yhalf = 1 << (ybits - 1);
__m128i m127 = _mm_set1_epi16(127);
__m128i fg = _mm_setzero_si128();
for (int i = 0; i < 4; i++)
{
uint32_t x = (xfrac - xhalf) >> xbits;
uint32_t y = (yfrac - yhalf) >> ybits;
uint32_t p00 = texture[(y & ymask) + ((x & xmask) << yshift)];
uint32_t p01 = texture[(y + 1 & ymask) + ((x & xmask) << yshift)];
uint32_t p10 = texture[(y & ymask) + (((x + 1) & xmask) << yshift)];
uint32_t p11 = texture[(y + 1 & ymask) + (((x + 1) & xmask) << yshift)];
uint32_t inv_b = ((xfrac + xhalf) >> (xbits - 4)) & 15;
uint32_t inv_a = ((yfrac + yhalf) >> (ybits - 4)) & 15;
uint32_t a = 16 - inv_a;
uint32_t b = 16 - inv_b;
uint32_t ab = a * b;
uint32_t invab = inv_a * b;
uint32_t ainvb = a * inv_b;
uint32_t invainvb = inv_a * inv_b;
__m128i ab_invab = _mm_set_epi16(invab, invab, invab, invab, ab, ab, ab, ab);
__m128i ainvb_invainvb = _mm_set_epi16(invainvb, invainvb, invainvb, invainvb, ainvb, ainvb, ainvb, ainvb);
__m128i p0 = _mm_unpacklo_epi8(_mm_set_epi32(0, 0, p01, p00), _mm_setzero_si128());
__m128i p1 = _mm_unpacklo_epi8(_mm_set_epi32(0, 0, p11, p10), _mm_setzero_si128());
__m128i tmp = _mm_adds_epu16(_mm_mullo_epi16(p0, ab_invab), _mm_mullo_epi16(p1, ainvb_invainvb));
__m128i color = _mm_srli_epi16(_mm_adds_epu16(_mm_adds_epu16(_mm_srli_si128(tmp, 8), tmp), m127), 8);
fg = _mm_or_si128(_mm_srli_si128(fg, 4), _mm_slli_si128(_mm_packus_epi16(color, _mm_setzero_si128()), 12));
xfrac += xstep;
yfrac += ystep;
#define VEC_SAMPLE_BILINEAR4_SPAN(fg, texture, xfrac, yfrac, xstep, ystep, xbits, ybits) { \
int xshift = (32 - xbits); \
int yshift = (32 - ybits); \
int xmask = (1 << xshift) - 1; \
int ymask = (1 << yshift) - 1; \
uint32_t xhalf = 1 << (xbits - 1); \
uint32_t yhalf = 1 << (ybits - 1); \
\
__m128i m127 = _mm_set1_epi16(127); \
fg = _mm_setzero_si128(); \
for (int i = 0; i < 4; i++) \
{ \
uint32_t x = (xfrac - xhalf) >> xbits; \
uint32_t y = (yfrac - yhalf) >> ybits; \
\
uint32_t p00 = texture[(y & ymask) + ((x & xmask) << yshift)]; \
uint32_t p01 = texture[((y + 1) & ymask) + ((x & xmask) << yshift)]; \
uint32_t p10 = texture[(y & ymask) + (((x + 1) & xmask) << yshift)]; \
uint32_t p11 = texture[((y + 1) & ymask) + (((x + 1) & xmask) << yshift)]; \
\
uint32_t inv_b = ((xfrac + xhalf) >> (xbits - 4)) & 15; \
uint32_t inv_a = ((yfrac + yhalf) >> (ybits - 4)) & 15; \
uint32_t a = 16 - inv_a; \
uint32_t b = 16 - inv_b; \
\
uint32_t ab = a * b; \
uint32_t invab = inv_a * b; \
uint32_t ainvb = a * inv_b; \
uint32_t invainvb = inv_a * inv_b; \
__m128i ab_invab = _mm_set_epi16(invab, invab, invab, invab, ab, ab, ab, ab); \
__m128i ainvb_invainvb = _mm_set_epi16(invainvb, invainvb, invainvb, invainvb, ainvb, ainvb, ainvb, ainvb); \
\
__m128i p0 = _mm_unpacklo_epi8(_mm_set_epi32(0, 0, p01, p00), _mm_setzero_si128()); \
__m128i p1 = _mm_unpacklo_epi8(_mm_set_epi32(0, 0, p11, p10), _mm_setzero_si128()); \
\
__m128i tmp = _mm_adds_epu16(_mm_mullo_epi16(p0, ab_invab), _mm_mullo_epi16(p1, ainvb_invainvb)); \
__m128i color = _mm_srli_epi16(_mm_adds_epu16(_mm_adds_epu16(_mm_srli_si128(tmp, 8), tmp), m127), 8); \
\
fg = _mm_or_si128(_mm_srli_si128(fg, 4), _mm_slli_si128(_mm_packus_epi16(color, _mm_setzero_si128()), 12)); \
\
xfrac += xstep; \
yfrac += ystep; \
} \
}
return fg;
}
#endif
// Calculate constants for a simple shade with gamma correction
#define AVX_LINEAR_SHADE_SIMPLE_INIT(light) \

View file

@ -280,7 +280,8 @@ public:
VEC_SHADE_SIMPLE_INIT(light);
while (sse_count--)
{
__m128i fg = sample_bilinear4_sse(source, xfrac, yfrac, xstep, ystep, 26, 26);
__m128i fg;
VEC_SAMPLE_BILINEAR4_SPAN(fg, source, xfrac, yfrac, xstep, ystep, 26, 26);
VEC_SHADE_SIMPLE(fg);
_mm_storeu_si128((__m128i*)dest, fg);
dest += 4;
@ -291,7 +292,8 @@ public:
VEC_SHADE_INIT(light, shade_constants);
while (sse_count--)
{
__m128i fg = sample_bilinear4_sse(source, xfrac, yfrac, xstep, ystep, 26, 26);
__m128i fg;
VEC_SAMPLE_BILINEAR4_SPAN(fg, source, xfrac, yfrac, xstep, ystep, 26, 26);
VEC_SHADE(fg, shade_constants);
_mm_storeu_si128((__m128i*)dest, fg);
dest += 4;
@ -318,7 +320,10 @@ public:
VEC_SHADE_SIMPLE_INIT(light);
while (sse_count--)
{
__m128i fg = sample_bilinear4_sse(source, xfrac, yfrac, xstep, ystep, 32 -_xbits, 32 - _ybits);
__m128i fg;
int tmpx = 32 - _xbits;
int tmpy = 32 - _ybits;
VEC_SAMPLE_BILINEAR4_SPAN(fg, source, xfrac, yfrac, xstep, ystep, tmpx, tmpy);
VEC_SHADE_SIMPLE(fg);
_mm_storeu_si128((__m128i*)dest, fg);
dest += 4;
@ -329,7 +334,10 @@ public:
VEC_SHADE_INIT(light, shade_constants);
while (sse_count--)
{
__m128i fg = sample_bilinear4_sse(source, xfrac, yfrac, xstep, ystep, 32 - _xbits, 32 - _ybits);
__m128i fg;
int tmpx = 32 - _xbits;
int tmpy = 32 - _ybits;
VEC_SAMPLE_BILINEAR4_SPAN(fg, source, xfrac, yfrac, xstep, ystep, tmpx, tmpy);
VEC_SHADE(fg, shade_constants);
_mm_storeu_si128((__m128i*)dest, fg);
dest += 4;
@ -471,7 +479,8 @@ public:
VEC_SHADE_SIMPLE_INIT4(light3, light2, light1, light0);
do
{
__m128i fg = sample_bilinear4_sse(bufplce, bufplce2, buftexturefracx, local_vplce, bits);
__m128i fg;
VEC_SAMPLE_BILINEAR4_COLUMN(fg, bufplce, bufplce2, buftexturefracx, local_vplce, bits);
local_vplce[0] = local_vplce[0] + local_vince[0];
local_vplce[1] = local_vplce[1] + local_vince[1];
@ -488,7 +497,8 @@ public:
VEC_SHADE_INIT4(light3, light2, light1, light0, shade_constants);
do
{
__m128i fg = sample_bilinear4_sse(bufplce, bufplce2, buftexturefracx, local_vplce, bits);
__m128i fg;
VEC_SAMPLE_BILINEAR4_COLUMN(fg, bufplce, bufplce2, buftexturefracx, local_vplce, bits);
local_vplce[0] = local_vplce[0] + local_vince[0];
local_vplce[1] = local_vplce[1] + local_vince[1];
@ -629,7 +639,8 @@ public:
VEC_SHADE_SIMPLE_INIT4(light3, light2, light1, light0);
do
{
__m128i fg = sample_bilinear4_sse(bufplce, bufplce2, buftexturefracx, local_vplce, bits);
__m128i fg;
VEC_SAMPLE_BILINEAR4_COLUMN(fg, bufplce, bufplce2, buftexturefracx, local_vplce, bits);
local_vplce[0] = local_vplce[0] + local_vince[0];
local_vplce[1] = local_vplce[1] + local_vince[1];
@ -648,7 +659,8 @@ public:
VEC_SHADE_INIT4(light3, light2, light1, light0, shade_constants);
do
{
__m128i fg = sample_bilinear4_sse(bufplce, bufplce2, buftexturefracx, local_vplce, bits);
__m128i fg;
VEC_SAMPLE_BILINEAR4_COLUMN(fg, bufplce, bufplce2, buftexturefracx, local_vplce, bits);
local_vplce[0] = local_vplce[0] + local_vince[0];
local_vplce[1] = local_vplce[1] + local_vince[1];

View file

@ -170,7 +170,6 @@ void FSoftwareRenderer::RenderView(player_t *player)
R_InitColumnDrawers();
}
R_ClearMipmapCache();
R_BeginDrawerCommands();
R_RenderActorView (player->mo);
// [RH] Let cameras draw onto textures that were visible this frame.

View file

@ -474,7 +474,7 @@ void FJPEGTexture::MakeTextureBgra()
jpeg_decompress_struct cinfo;
jpeg_error_mgr jerr;
PixelsBgra.resize(Width * Height, 0xffba0000);
CreatePixelsBgraWithMipmaps();
cinfo.err = jpeg_std_error(&jerr);
cinfo.err->output_message = JPEG_OutputMessage;
@ -560,6 +560,8 @@ void FJPEGTexture::MakeTextureBgra()
{
delete[] buff;
}
GenerateBgraMipmaps();
}

View file

@ -633,7 +633,7 @@ void FPNGTexture::MakeTextureBgra ()
lump = new FileReader(SourceFile.GetChars());
}
PixelsBgra.resize(Width * Height, 0xffff0000);
CreatePixelsBgraWithMipmaps();
if (StartOfIDAT != 0)
{
DWORD len, id;
@ -757,6 +757,7 @@ void FPNGTexture::MakeTextureBgra ()
}
}
delete lump;
GenerateBgraMipmaps();
}
//===========================================================================

View file

@ -200,7 +200,7 @@ const uint32_t *FTexture::GetPixelsBgra()
const BYTE *indices = GetPixels();
if (indices == nullptr)
return nullptr;
PixelsBgra.resize(Width * Height);
CreatePixelsBgraWithMipmaps();
for (int i = 0; i < Width * Height; i++)
{
if (indices[i] != 0)
@ -208,6 +208,7 @@ const uint32_t *FTexture::GetPixelsBgra()
else
PixelsBgra[i] = 0;
}
GenerateBgraMipmaps();
}
return PixelsBgra.data();
}
@ -355,6 +356,71 @@ void FTexture::FreeSpans (Span **spans) const
M_Free (spans);
}
void FTexture::CreatePixelsBgraWithMipmaps()
{
int levels = MipmapLevels();
int buffersize = 0;
for (int i = 0; i < levels; i++)
{
int w = MAX(Width >> i, 1);
int h = MAX(Height >> i, 1);
buffersize += w * h;
}
PixelsBgra.resize(buffersize, 0xffff0000);
}
int FTexture::MipmapLevels() const
{
int widthbits = 0;
while ((Width >> widthbits) != 0) widthbits++;
int heightbits = 0;
while ((Height >> heightbits) != 0) heightbits++;
return MAX(widthbits, heightbits);
}
void FTexture::GenerateBgraMipmaps()
{
uint32_t *src = PixelsBgra.data();
uint32_t *dest = src + Width * Height;
int levels = MipmapLevels();
for (int i = 1; i < levels; i++)
{
int srcw = MAX(Width >> (i - 1), 1);
int srch = MAX(Height >> (i - 1), 1);
int w = MAX(Width >> i, 1);
int h = MAX(Height >> i, 1);
for (int x = 0; x < w; x++)
{
int sx0 = x * 2;
int sx1 = MIN((x + 1) * 2, srcw - 1);
for (int y = 0; y < h; y++)
{
int sy0 = y * 2;
int sy1 = MIN((y + 1) * 2, srch - 1);
uint32_t src00 = src[sy0 + sx0 * srch];
uint32_t src01 = src[sy1 + sx0 * srch];
uint32_t src10 = src[sy0 + sx1 * srch];
uint32_t src11 = src[sy1 + sx1 * srch];
uint32_t alpha = (APART(src00) + APART(src01) + APART(src10) + APART(src11) + 2) / 4;
uint32_t red = (RPART(src00) + RPART(src01) + RPART(src10) + RPART(src11) + 2) / 4;
uint32_t green = (GPART(src00) + GPART(src01) + GPART(src10) + GPART(src11) + 2) / 4;
uint32_t blue = (BPART(src00) + BPART(src01) + BPART(src10) + BPART(src11) + 2) / 4;
dest[y + x * h] = (alpha << 24) | (red << 16) | (green << 8) | blue;
}
}
src = dest;
dest += w * h;
}
}
void FTexture::CopyToBlock (BYTE *dest, int dwidth, int dheight, int xpos, int ypos, int rotate, const BYTE *translation)
{
const BYTE *pixels = GetPixels();

View file

@ -271,6 +271,10 @@ protected:
std::vector<uint32_t> PixelsBgra;
void CreatePixelsBgraWithMipmaps();
void GenerateBgraMipmaps();
int MipmapLevels() const;
public:
static void FlipSquareBlock (BYTE *block, int x, int y);
static void FlipSquareBlockBgra (uint32_t *block, int x, int y);