diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 042da0c8f..8c0a30ea0 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -1190,7 +1190,6 @@ set (PCH_SOURCES textures/texturemanager.cpp textures/tgatexture.cpp textures/warptexture.cpp - textures/bicubic_interpolation.cpp thingdef/olddecorations.cpp thingdef/thingdef.cpp thingdef/thingdef_codeptr.cpp diff --git a/src/r_draw_rgba.cpp b/src/r_draw_rgba.cpp index fbb2c12c5..aa88e4302 100644 --- a/src/r_draw_rgba.cpp +++ b/src/r_draw_rgba.cpp @@ -103,7 +103,9 @@ CVAR(Bool, r_mipmap, true, CVAR_ARCHIVE | CVAR_GLOBALCONFIG); ///////////////////////////////////////////////////////////////////////////// +#ifndef NO_SSE __m128i SampleBgra::samplertable[256 * 2]; +#endif DrawerCommandQueue *DrawerCommandQueue::Instance() { @@ -113,6 +115,7 @@ DrawerCommandQueue *DrawerCommandQueue::Instance() DrawerCommandQueue::DrawerCommandQueue() { +#ifndef NO_SSE for (int inv_b = 0; inv_b < 16; inv_b++) { for (int inv_a = 0; inv_a < 16; inv_a++) @@ -132,6 +135,7 @@ DrawerCommandQueue::DrawerCommandQueue() _mm_store_si128(SampleBgra::samplertable + inv_b * 32 + inv_a * 2 + 1, ainvb_invainvb); } } +#endif } DrawerCommandQueue::~DrawerCommandQueue() diff --git a/src/textures/bicubic_interpolation.cpp b/src/textures/bicubic_interpolation.cpp deleted file mode 100644 index 2c8a3049d..000000000 --- a/src/textures/bicubic_interpolation.cpp +++ /dev/null @@ -1,107 +0,0 @@ - -#include "doomtype.h" -#include "bicubic_interpolation.h" - -void BicubicInterpolation::ScaleImage(uint32_t *dest_data, int dest_width, int dest_height, const uint32_t *src_data, int src_width, int src_height) -{ - if (dest_width <= 0 || dest_height <= 0 || src_width <= 0 || src_height <= 0) - return; - - // Scale factor as a rational number r = n / d - int n = dest_width; - int d = src_width; - - const unsigned char *src_ptr = (const unsigned char *)src_data; - unsigned char *dest_ptr = (unsigned char *)dest_data; - - scale(n, d, src_width, src_width * 4, src_height, src_ptr + 0, dest_width, dest_width * 4, dest_height, dest_ptr + 0); - scale(n, d, src_width, src_width * 4, src_height, src_ptr + 1, dest_width, dest_width * 4, dest_height, dest_ptr + 1); - scale(n, d, src_width, src_width * 4, src_height, src_ptr + 2, dest_width, dest_width * 4, dest_height, dest_ptr + 2); - scale(n, d, src_width, src_width * 4, src_height, src_ptr + 3, dest_width, dest_width * 4, dest_height, dest_ptr + 3); -} - -void BicubicInterpolation::scale(int n, int d, int in_width, int in_pitch, int in_height, const unsigned char *f, int out_width, int out_pitch, int out_height, unsigned char *g) -{ - // Implementation of Michael J. Aramini's Efficient Image Magnification by Bicubic Spline Interpolation - - int dimension_size = (out_width > out_height) ? out_width : out_height; - L_vector.resize(dimension_size); - - for (int i=0;i<4;i++) - c_vector[i].resize(dimension_size); - h_vector.resize(in_width); - - int larger_out_dimension; - int j, k, l, m, index; - int *L = &L_vector[0]; - float x; - float *c[4] = { &c_vector[0][0], &c_vector[1][0], &c_vector[2][0], &c_vector[3][0] }; - float *h = &h_vector[0]; - - larger_out_dimension = (out_width > out_height) ? out_width : out_height; - - for (k = 0; k < larger_out_dimension; k++) - L[k] = (k * d) / n; - - for (k = 0; k < n; k++) - { - x = (float)((k * d) % n) / (float)n; - c[0][k] = C0(x); - c[1][k] = C1(x); - c[2][k] = C2(x); - c[3][k] = C3(x); - } - for (k = n; k < larger_out_dimension; k++) - for (l = 0; l < 4; l++) - c[l][k] = c[l][k % n]; - - for (k = 0; k < out_height; k++) - { - for (j = 0; j < in_width; j++) - { - h[j] = 0.0f; - for (l = 0; l < 4; l++) - { - index = L[k] + l - 1; - if ((index >= 0) && (index < in_height)) - h[j] += f[index*in_pitch+j*4] * c[3 - l][k]; - } - } - for (m = 0; m < out_width; m++) - { - x = 0.5f; - for (l = 0; l < 4; l++) - { - index = L[m] + l - 1; - if ((index >= 0) && (index < in_width)) - x += h[index] * c[3 - l][m]; - } - if (x <= 0.0f) - g[k*out_pitch+m*4] = 0; - else if (x >= 255) - g[k*out_pitch+m*4] = 255; - else - g[k*out_pitch+m*4] = (unsigned char)x; - } - } -} - -inline float BicubicInterpolation::C0(float t) -{ - return -a * t * t * t + a * t * t; -} - -inline float BicubicInterpolation::C1(float t) -{ - return -(a + 2.0f) * t * t * t + (2.0f * a + 3.0f) * t * t - a * t; -} - -inline float BicubicInterpolation::C2(float t) -{ - return (a + 2.0f) * t * t * t - (a + 3.0f) * t * t + 1.0f; -} - -inline float BicubicInterpolation::C3(float t) -{ - return a * t * t * t - 2.0f * a * t * t + a * t; -} diff --git a/src/textures/bicubic_interpolation.h b/src/textures/bicubic_interpolation.h deleted file mode 100644 index da547ad83..000000000 --- a/src/textures/bicubic_interpolation.h +++ /dev/null @@ -1,50 +0,0 @@ -/* -** Bicubic Image Scaler -** Copyright (c) 2016 Magnus Norddahl -** -** This software is provided 'as-is', without any express or implied -** warranty. In no event will the authors be held liable for any damages -** arising from the use of this software. -** -** Permission is granted to anyone to use this software for any purpose, -** including commercial applications, and to alter it and redistribute it -** freely, subject to the following restrictions: -** -** 1. The origin of this software must not be misrepresented; you must not -** claim that you wrote the original software. If you use this software -** in a product, an acknowledgment in the product documentation would be -** appreciated but is not required. -** 2. Altered source versions must be plainly marked as such, and must not be -** misrepresented as being the original software. -** 3. This notice may not be removed or altered from any source distribution. -*/ - -#ifndef __BICUBIC_INTERPOLATION_H__ -#define __BICUBIC_INTERPOLATION_H__ - -#pragma once - -#include - -// Bicubic image scaler -class BicubicInterpolation -{ -public: - void ScaleImage(uint32_t *dest, int dest_width, int dest_height, const uint32_t *src, int src_width, int src_height); - -private: - void scale(int n, int d, int in_width, int in_pitch, int in_height, const unsigned char *in_data, int out_width, int out_pitch, int out_height, unsigned char *out_data); - - float a = -0.5f; // a is a spline parameter such that -1 <= a <= 0 - - inline float C0(float t); - inline float C1(float t); - inline float C2(float t); - inline float C3(float t); - - std::vector L_vector; - std::vector c_vector[4]; - std::vector h_vector; -}; - -#endif diff --git a/src/textures/texture.cpp b/src/textures/texture.cpp index 7ff5c9ba2..ce7874ee6 100644 --- a/src/textures/texture.cpp +++ b/src/textures/texture.cpp @@ -45,7 +45,6 @@ #include "v_video.h" #include "m_fixed.h" #include "textures/textures.h" -#include "textures/bicubic_interpolation.h" #include "v_palette.h" typedef bool (*CheckFunc)(FileReader & file); @@ -383,19 +382,122 @@ int FTexture::MipmapLevels() const void FTexture::GenerateBgraMipmaps() { - BicubicInterpolation bicubic; - - uint32_t *src = PixelsBgra.data(); - uint32_t *dest = src + Width * Height; - int levels = MipmapLevels(); - for (int i = 1; i < levels; i++) + struct Color4f { - int w = MAX(Width >> i, 1); - int h = MAX(Height >> i, 1); + float a, r, g, b; + Color4f operator*(const Color4f &v) const { return Color4f{ a * v.a, r * v.r, g * v.g, b * v.b }; } + Color4f operator/(const Color4f &v) const { return Color4f{ a / v.a, r / v.r, g / v.g, b / v.b }; } + Color4f operator+(const Color4f &v) const { return Color4f{ a + v.a, r + v.r, g + v.g, b + v.b }; } + Color4f operator-(const Color4f &v) const { return Color4f{ a - v.a, r - v.r, g - v.g, b - v.b }; } + Color4f operator*(float s) const { return Color4f{ a * s, r * s, g * s, b * s }; } + Color4f operator/(float s) const { return Color4f{ a / s, r / s, g / s, b / s }; } + Color4f operator+(float s) const { return Color4f{ a + s, r + s, g + s, b + s }; } + Color4f operator-(float s) const { return Color4f{ a - s, r - s, g - s, b - s }; } + }; - bicubic.ScaleImage(dest, h, w, src, Height, Width); + int levels = MipmapLevels(); + std::vector image(PixelsBgra.size()); - dest += w * h; + // Convert to normalized linear colorspace + { + for (int x = 0; x < Width; x++) + { + for (int y = 0; y < Height; y++) + { + uint32_t c8 = PixelsBgra[x * Height + y]; + Color4f c; + c.a = std::pow(APART(c8) * (1.0f / 255.0f), 2.2f); + c.r = std::pow(RPART(c8) * (1.0f / 255.0f), 2.2f); + c.g = std::pow(GPART(c8) * (1.0f / 255.0f), 2.2f); + c.b = std::pow(BPART(c8) * (1.0f / 255.0f), 2.2f); + image[x * Height + y] = c; + } + } + } + + // Generate mipmaps + { + std::vector smoothed(Width * Height); + Color4f *src = image.data(); + Color4f *dest = src + Width * Height; + for (int i = 1; i < levels; i++) + { + int srcw = MAX(Width >> (i - 1), 1); + int srch = MAX(Height >> (i - 1), 1); + int w = MAX(Width >> i, 1); + int h = MAX(Height >> i, 1); + + // Downscale + for (int x = 0; x < w; x++) + { + int sx0 = x * 2; + int sx1 = MIN((x + 1) * 2, srcw - 1); + for (int y = 0; y < h; y++) + { + int sy0 = y * 2; + int sy1 = MIN((y + 1) * 2, srch - 1); + + Color4f src00 = src[sy0 + sx0 * srch]; + Color4f src01 = src[sy1 + sx0 * srch]; + Color4f src10 = src[sy0 + sx1 * srch]; + Color4f src11 = src[sy1 + sx1 * srch]; + Color4f c = (src00 + src01 + src10 + src11) * 0.25f; + + dest[y + x * h] = src00; + } + } + + // Sharpen filter with a 3x3 kernel: + for (int x = 0; x < w; x++) + { + for (int y = 0; y < h; y++) + { + Color4f c = { 0.0f, 0.0f, 0.0f, 0.0f }; + for (int kx = -1; kx < 2; kx++) + { + for (int ky = -1; ky < 2; ky++) + { + int a = y + ky; + int b = x + kx; + if (a < 0) a = h - 1; + if (a == h) a = 0; + if (b < 0) b = w - 1; + if (b == h) b = 0; + c = c + dest[a + b * h]; + } + } + c = c * (1.0f / 9.0f); + smoothed[y + x * h] = c; + } + } + float k = 0.04f; + for (int j = 0; j < w * h; j++) + dest[j] = dest[j] + (dest[j] - smoothed[j]) * k; + + src = dest; + dest += w * h; + } + } + + // Convert to bgra8 sRGB colorspace + { + Color4f *src = image.data() + Width * Height; + uint32_t *dest = PixelsBgra.data() + Width * Height; + for (int i = 1; i < levels; i++) + { + int w = MAX(Width >> i, 1); + int h = MAX(Height >> i, 1); + for (int j = 0; j < w * h; j++) + { + uint32_t a = (uint32_t)clamp(std::pow(src[j].a, 1.0f / 2.2f) * 255.0f + 0.5f, 0.0f, 255.0f); + uint32_t r = (uint32_t)clamp(std::pow(src[j].r, 1.0f / 2.2f) * 255.0f + 0.5f, 0.0f, 255.0f); + uint32_t g = (uint32_t)clamp(std::pow(src[j].g, 1.0f / 2.2f) * 255.0f + 0.5f, 0.0f, 255.0f); + uint32_t b = (uint32_t)clamp(std::pow(src[j].b, 1.0f / 2.2f) * 255.0f + 0.5f, 0.0f, 255.0f); + dest[j] = (a << 24) | (r << 16) | (g << 8) | b; + } + src += w * h; + dest += w * h; + } } }