diff --git a/src/gamedata/textures/hires/xbr/xbrz.cpp b/src/gamedata/textures/hires/xbr/xbrz.cpp index ffb05983a..cd0b7b030 100644 --- a/src/gamedata/textures/hires/xbr/xbrz.cpp +++ b/src/gamedata/textures/hires/xbr/xbrz.cpp @@ -27,7 +27,7 @@ using namespace xbrz; namespace { template inline -uint32_t gradientRGB(uint32_t pixFront, uint32_t pixBack) //blend front color with opacity M / N over opaque background: http://en.wikipedia.org/wiki/Alpha_compositing#Alpha_blending +uint32_t gradientRGB(uint32_t pixFront, uint32_t pixBack) //blend front color with opacity M / N over opaque background: https://en.wikipedia.org/wiki/Alpha_compositing#Alpha_blending { static_assert(0 < M && M < N && N <= 1000, ""); @@ -153,7 +153,7 @@ double distRGB(uint32_t pix1, uint32_t pix2) inline double distYCbCr(uint32_t pix1, uint32_t pix2, double lumaWeight) { - //http://en.wikipedia.org/wiki/YCbCr#ITU-R_BT.601_conversion + //https://en.wikipedia.org/wiki/YCbCr#ITU-R_BT.601_conversion //YCbCr conversion is a matrix multiplication => take advantage of linearity by subtracting first! const int r_diff = static_cast(getRed (pix1)) - getRed (pix2); //we may delay division by 255 to after matrix multiplication const int g_diff = static_cast(getGreen(pix1)) - getGreen(pix2); // @@ -254,24 +254,32 @@ struct BlendResult }; +struct Kernel_3x3 +{ + uint32_t + a, b, c, + d, e, f, + g, h, i; +}; + struct Kernel_4x4 //kernel for preprocessing step { uint32_t - /**/a, b, c, d, - /**/e, f, g, h, - /**/i, j, k, l, - /**/m, n, o, p; + a, b, c, // + e, f, g, // support reinterpret_cast from Kernel_4x4 => Kernel_3x3 + i, j, k, // + m, n, o, + d, h, l, p; }; -/* -input kernel area naming convention: +/* input kernel area naming convention: ----------------- | A | B | C | D | -----|---|---|---| -| E | F | G | H | //evaluate the four corners between F, G, J, K -----|---|---|---| //input pixel is at position F +|---|---|---|---| +| E | F | G | H | evaluate the four corners between F, G, J, K +|---|---|---|---| input pixel is at position F | I | J | K | L | -----|---|---|---| +|---|---|---|---| | M | N | O | P | ----------------- */ @@ -318,14 +326,6 @@ BlendResult preProcessCorners(const Kernel_4x4& ker, const xbrz::ScalerCfg& cfg) return result; } -struct Kernel_3x3 -{ - uint32_t - /**/a, b, c, - /**/d, e, f, - /**/g, h, i; -}; - #define DEF_GETTER(x) template uint32_t inline get_##x(const Kernel_3x3& ker) { return ker.x; } //we cannot and NEED NOT write "ker.##x" since ## concatenates preprocessor tokens but "." is not a token DEF_GETTER(a) DEF_GETTER(b) DEF_GETTER(c) @@ -358,12 +358,16 @@ inline BlendType getTopR (unsigned char b) { return static_cast(0x3 inline BlendType getBottomR(unsigned char b) { return static_cast(0x3 & (b >> 4)); } inline BlendType getBottomL(unsigned char b) { return static_cast(0x3 & (b >> 6)); } -inline void setTopL (unsigned char& b, BlendType bt) { b |= bt; } //buffer is assumed to be initialized before preprocessing! -inline void setTopR (unsigned char& b, BlendType bt) { b |= (bt << 2); } -inline void setBottomR(unsigned char& b, BlendType bt) { b |= (bt << 4); } -inline void setBottomL(unsigned char& b, BlendType bt) { b |= (bt << 6); } +inline void clearAddTopL(unsigned char& b, BlendType bt) { b = static_cast(bt); } +inline void addTopR (unsigned char& b, BlendType bt) { b |= (bt << 2); } //buffer is assumed to be initialized before preprocessing! +inline void addBottomR (unsigned char& b, BlendType bt) { b |= (bt << 4); } //e.g. via clearAddTopL() +inline void addBottomL (unsigned char& b, BlendType bt) { b |= (bt << 6); } // -inline bool blendingNeeded(unsigned char b) { return b != 0; } +inline bool blendingNeeded(unsigned char b) +{ + static_assert(BLEND_NONE == 0, ""); + return b != 0; +} template inline unsigned char rotateBlendInfo(unsigned char b) { return b; } @@ -372,13 +376,12 @@ template <> inline unsigned char rotateBlendInfo(unsigned char b) { ret template <> inline unsigned char rotateBlendInfo(unsigned char b) { return ((b << 6) | (b >> 2)) & 0xff; } -/* -input kernel area naming convention: +/* input kernel area naming convention: ------------- | A | B | C | -----|---|---| -| D | E | F | //input pixel is at position E -----|---|---| +|---|---|---| +| D | E | F | input pixel is at position E +|---|---|---| | G | H | I | ------------- */ @@ -472,7 +475,80 @@ void blendPixel(const Kernel_3x3& ker, } -template //scaler policy: see "Scaler2x" reference implementation +class OobReaderTransparent +{ +public: + OobReaderTransparent(const uint32_t* src, int srcWidth, int srcHeight, int y) : + s_m1(0 <= y - 1 && y - 1 < srcHeight ? src + srcWidth * (y - 1) : nullptr), + s_0 (0 <= y && y < srcHeight ? src + srcWidth * y : nullptr), + s_p1(0 <= y + 1 && y + 1 < srcHeight ? src + srcWidth * (y + 1) : nullptr), + s_p2(0 <= y + 2 && y + 2 < srcHeight ? src + srcWidth * (y + 2) : nullptr), + srcWidth_(srcWidth) {} + + void readDhlp(Kernel_4x4& ker, int x) const //(x, y) is at kernel position F + { + const int x_p2 = x + 2; + + if (0 <= x_p2 && x_p2 < srcWidth_) + { + ker.d = s_m1 ? s_m1[x_p2] : 0; + ker.h = s_0 ? s_0 [x_p2] : 0; + ker.l = s_p1 ? s_p1[x_p2] : 0; + ker.p = s_p2 ? s_p2[x_p2] : 0; + } + else + { + ker.d = 0; + ker.h = 0; + ker.l = 0; + ker.p = 0; + } + } + +private: + const uint32_t* const s_m1; + const uint32_t* const s_0; + const uint32_t* const s_p1; + const uint32_t* const s_p2; + const int srcWidth_; +}; + + +template +constexpr inline T xbrz_clamp(const T in, const T min, const T max) +{ + return in <= min ? min : in >= max ? max : in; +} + +class OobReaderDuplicate +{ +public: + OobReaderDuplicate(const uint32_t* src, int srcWidth, int srcHeight, int y) : + s_m1(src + srcWidth * xbrz_clamp(y - 1, 0, srcHeight - 1)), + s_0 (src + srcWidth * xbrz_clamp(y, 0, srcHeight - 1)), + s_p1(src + srcWidth * xbrz_clamp(y + 1, 0, srcHeight - 1)), + s_p2(src + srcWidth * xbrz_clamp(y + 2, 0, srcHeight - 1)), + srcWidth_(srcWidth) {} + + void readDhlp(Kernel_4x4& ker, int x) const //(x, y) is at kernel position F + { + const int x_p2 = xbrz_clamp(x + 2, 0, srcWidth_ - 1); + ker.d = s_m1[x_p2]; + ker.h = s_0 [x_p2]; + ker.l = s_p1[x_p2]; + ker.p = s_p2[x_p2]; + } + +private: + const uint32_t* const s_m1; + const uint32_t* const s_0; + const uint32_t* const s_p1; + const uint32_t* const s_p2; + const int srcWidth_; +}; + + +template //scaler policy: see "Scaler2x" reference implementation void scaleImage(const uint32_t* src, uint32_t* trg, int srcWidth, int srcHeight, const xbrz::ScalerCfg& cfg, int yFirst, int yLast) { yFirst = std::max(yFirst, 0); @@ -482,64 +558,72 @@ void scaleImage(const uint32_t* src, uint32_t* trg, int srcWidth, int srcHeight, const int trgWidth = srcWidth * Scaler::scale; - //"use" space at the end of the image as temporary buffer for "on the fly preprocessing": we even could use larger area of - //"sizeof(uint32_t) * srcWidth * (yLast - yFirst)" bytes without risk of accidental overwriting before accessing - const int bufferSize = srcWidth; - unsigned char* preProcBuffer = reinterpret_cast(trg + yLast * Scaler::scale * trgWidth) - bufferSize; - std::fill(preProcBuffer, preProcBuffer + bufferSize, '\0'); - static_assert(BLEND_NONE == 0, ""); + //(ab)use space of "sizeof(uint32_t) * srcWidth * Scaler::scale" at the end of the image as temporary + //buffer for "on the fly preprocessing" without risk of accidental overwriting before accessing + unsigned char* const preProcBuf = reinterpret_cast(trg + yLast * Scaler::scale * trgWidth) - srcWidth; //initialize preprocessing buffer for first row of current stripe: detect upper left and right corner blending //this cannot be optimized for adjacent processing stripes; we must not allow for a memory race condition! - if (yFirst > 0) { - const int y = yFirst - 1; + const OobReader oobReader(src, srcWidth, srcHeight, yFirst - 1); - const uint32_t* s_m1 = src + srcWidth * std::max(y - 1, 0); - const uint32_t* s_0 = src + srcWidth * y; //center line - const uint32_t* s_p1 = src + srcWidth * std::min(y + 1, srcHeight - 1); - const uint32_t* s_p2 = src + srcWidth * std::min(y + 2, srcHeight - 1); + //initialize at position x = -1 + Kernel_4x4 ker4 = {}; + oobReader.readDhlp(ker4, -4); //hack: read a, e, i, m at x = -1 + ker4.a = ker4.d; + ker4.e = ker4.h; + ker4.i = ker4.l; + ker4.m = ker4.p; + + oobReader.readDhlp(ker4, -3); + ker4.b = ker4.d; + ker4.f = ker4.h; + ker4.j = ker4.l; + ker4.n = ker4.p; + + oobReader.readDhlp(ker4, -2); + ker4.c = ker4.d; + ker4.g = ker4.h; + ker4.k = ker4.l; + ker4.o = ker4.p; + + oobReader.readDhlp(ker4, -1); + + { + const BlendResult res = preProcessCorners(ker4, cfg); + clearAddTopL(preProcBuf[0], res.blend_k); //set 1st known corner for (0, yFirst) + } for (int x = 0; x < srcWidth; ++x) { - const int x_m1 = std::max(x - 1, 0); - const int x_p1 = std::min(x + 1, srcWidth - 1); - const int x_p2 = std::min(x + 2, srcWidth - 1); + ker4.a = ker4.b; //shift previous kernel to the left + ker4.e = ker4.f; // ----------------- + ker4.i = ker4.j; // | A | B | C | D | + ker4.m = ker4.n; // |---|---|---|---| + /**/ // | E | F | G | H | (x, yFirst - 1) is at position F + ker4.b = ker4.c; // |---|---|---|---| + ker4.f = ker4.g; // | I | J | K | L | + ker4.j = ker4.k; // |---|---|---|---| + ker4.n = ker4.o; // | M | N | O | P | + /**/ // ----------------- + ker4.c = ker4.d; + ker4.g = ker4.h; + ker4.k = ker4.l; + ker4.o = ker4.p; - Kernel_4x4 ker = {}; //perf: initialization is negligible - ker.a = s_m1[x_m1]; //read sequentially from memory as far as possible - ker.b = s_m1[x]; - ker.c = s_m1[x_p1]; - ker.d = s_m1[x_p2]; + oobReader.readDhlp(ker4, x); - ker.e = s_0[x_m1]; - ker.f = s_0[x]; - ker.g = s_0[x_p1]; - ker.h = s_0[x_p2]; + /* preprocessing blend result: + --------- + | F | G | evaluate corner between F, G, J, K + |---+---| current input pixel is at position F + | J | K | + --------- */ + const BlendResult res = preProcessCorners(ker4, cfg); + addTopR(preProcBuf[x], res.blend_j); //set 2nd known corner for (x, yFirst) - ker.i = s_p1[x_m1]; - ker.j = s_p1[x]; - ker.k = s_p1[x_p1]; - ker.l = s_p1[x_p2]; - - ker.m = s_p2[x_m1]; - ker.n = s_p2[x]; - ker.o = s_p2[x_p1]; - ker.p = s_p2[x_p2]; - - const BlendResult res = preProcessCorners(ker, cfg); - /* - preprocessing blend result: - --------- - | F | G | //evalute corner between F, G, J, K - ----|---| //input pixel is at position F - | J | K | - --------- - */ - setTopR(preProcBuffer[x], res.blend_j); - - if (x + 1 < bufferSize) - setTopL(preProcBuffer[x + 1], res.blend_k); + if (x + 1 < srcWidth) + clearAddTopL(preProcBuf[x + 1], res.blend_k); //set 1st known corner for (x + 1, yFirst) } } //------------------------------------------------------------------------------------ @@ -548,91 +632,92 @@ void scaleImage(const uint32_t* src, uint32_t* trg, int srcWidth, int srcHeight, { uint32_t* out = trg + Scaler::scale * y * trgWidth; //consider MT "striped" access - const uint32_t* s_m1 = src + srcWidth * std::max(y - 1, 0); - const uint32_t* s_0 = src + srcWidth * y; //center line - const uint32_t* s_p1 = src + srcWidth * std::min(y + 1, srcHeight - 1); - const uint32_t* s_p2 = src + srcWidth * std::min(y + 2, srcHeight - 1); + const OobReader oobReader(src, srcWidth, srcHeight, y); + + //initialize at position x = -1 + Kernel_4x4 ker4 = {}; + oobReader.readDhlp(ker4, -4); //hack: read a, e, i, m at x = -1 + ker4.a = ker4.d; + ker4.e = ker4.h; + ker4.i = ker4.l; + ker4.m = ker4.p; + + oobReader.readDhlp(ker4, -3); + ker4.b = ker4.d; + ker4.f = ker4.h; + ker4.j = ker4.l; + ker4.n = ker4.p; + + oobReader.readDhlp(ker4, -2); + ker4.c = ker4.d; + ker4.g = ker4.h; + ker4.k = ker4.l; + ker4.o = ker4.p; + + oobReader.readDhlp(ker4, -1); unsigned char blend_xy1 = 0; //corner blending for current (x, y + 1) position + { + const BlendResult res = preProcessCorners(ker4, cfg); + clearAddTopL(blend_xy1, res.blend_k); //set 1st known corner for (0, y + 1) and buffer for use on next column + + addBottomL(preProcBuf[0], res.blend_g); //set 3rd known corner for (0, y) + } for (int x = 0; x < srcWidth; ++x, out += Scaler::scale) { #if defined _MSC_VER && !defined NDEBUG breakIntoDebugger = debugPixelX == x && debugPixelY == y; #endif - //all those bounds checks have only insignificant impact on performance! - const int x_m1 = std::max(x - 1, 0); //perf: prefer array indexing to additional pointers! - const int x_p1 = std::min(x + 1, srcWidth - 1); - const int x_p2 = std::min(x + 2, srcWidth - 1); + ker4.a = ker4.b; //shift previous kernel to the left + ker4.e = ker4.f; // ----------------- + ker4.i = ker4.j; // | A | B | C | D | + ker4.m = ker4.n; // |---|---|---|---| + /**/ // | E | F | G | H | (x, y) is at position F + ker4.b = ker4.c; // |---|---|---|---| + ker4.f = ker4.g; // | I | J | K | L | + ker4.j = ker4.k; // |---|---|---|---| + ker4.n = ker4.o; // | M | N | O | P | + /**/ // ----------------- + ker4.c = ker4.d; + ker4.g = ker4.h; + ker4.k = ker4.l; + ker4.o = ker4.p; - Kernel_4x4 ker4 = {}; //perf: initialization is negligible - - ker4.a = s_m1[x_m1]; //read sequentially from memory as far as possible - ker4.b = s_m1[x]; - ker4.c = s_m1[x_p1]; - ker4.d = s_m1[x_p2]; - - ker4.e = s_0[x_m1]; - ker4.f = s_0[x]; - ker4.g = s_0[x_p1]; - ker4.h = s_0[x_p2]; - - ker4.i = s_p1[x_m1]; - ker4.j = s_p1[x]; - ker4.k = s_p1[x_p1]; - ker4.l = s_p1[x_p2]; - - ker4.m = s_p2[x_m1]; - ker4.n = s_p2[x]; - ker4.o = s_p2[x_p1]; - ker4.p = s_p2[x_p2]; + oobReader.readDhlp(ker4, x); //evaluate the four corners on bottom-right of current pixel - unsigned char blend_xy = 0; //for current (x, y) position + unsigned char blend_xy = preProcBuf[x]; //for current (x, y) position { + /* preprocessing blend result: + --------- + | F | G | evaluate corner between F, G, J, K + |---+---| current input pixel is at position F + | J | K | + --------- */ const BlendResult res = preProcessCorners(ker4, cfg); - /* - preprocessing blend result: - --------- - | F | G | //evalute corner between F, G, J, K - ----|---| //current input pixel is at position F - | J | K | - --------- - */ - blend_xy = preProcBuffer[x]; - setBottomR(blend_xy, res.blend_f); //all four corners of (x, y) have been determined at this point due to processing sequence! + addBottomR(blend_xy, res.blend_f); //all four corners of (x, y) have been determined at this point due to processing sequence! - setTopR(blend_xy1, res.blend_j); //set 2nd known corner for (x, y + 1) - preProcBuffer[x] = blend_xy1; //store on current buffer position for use on next row + addTopR(blend_xy1, res.blend_j); //set 2nd known corner for (x, y + 1) + preProcBuf[x] = blend_xy1; //store on current buffer position for use on next row - blend_xy1 = 0; - setTopL(blend_xy1, res.blend_k); //set 1st known corner for (x + 1, y + 1) and buffer for use on next column + if (x + 1 < srcWidth) + { + //blend_xy1 -> blend_x1y1 + clearAddTopL(blend_xy1, res.blend_k); //set 1st known corner for (x + 1, y + 1) and buffer for use on next column - if (x + 1 < bufferSize) //set 3rd known corner for (x + 1, y) - setBottomL(preProcBuffer[x + 1], res.blend_g); + addBottomL(preProcBuf[x + 1], res.blend_g); //set 3rd known corner for (x + 1, y) + } } //fill block of size scale * scale with the given color fillBlock(out, trgWidth * sizeof(uint32_t), ker4.f, Scaler::scale, Scaler::scale); - //place *after* preprocessing step, to not overwrite the results while processing the the last pixel! + //place *after* preprocessing step, to not overwrite the results while processing the last pixel! - //blend four corners of current pixel - if (blendingNeeded(blend_xy)) //good 5% perf-improvement + //blend all four corners of current pixel + if (blendingNeeded(blend_xy)) { - Kernel_3x3 ker3 = {}; //perf: initialization is negligible - - ker3.a = ker4.a; - ker3.b = ker4.b; - ker3.c = ker4.c; - - ker3.d = ker4.e; - ker3.e = ker4.f; - ker3.f = ker4.g; - - ker3.g = ker4.i; - ker3.h = ker4.j; - ker3.i = ker4.k; - + const auto& ker3 = reinterpret_cast(ker4); //"The Things We Do for Perf" blendPixel(ker3, out, trgWidth, blend_xy, cfg); blendPixel(ker3, out, trgWidth, blend_xy, cfg); blendPixel(ker3, out, trgWidth, blend_xy, cfg); @@ -1095,15 +1180,15 @@ void xbrz::scale(size_t factor, const uint32_t* src, uint32_t* trg, int srcWidth switch (factor) { case 2: - return scaleImage, ColorDistanceRGB>(src, trg, srcWidth, srcHeight, cfg, yFirst, yLast); + return scaleImage, ColorDistanceRGB, OobReaderDuplicate>(src, trg, srcWidth, srcHeight, cfg, yFirst, yLast); case 3: - return scaleImage, ColorDistanceRGB>(src, trg, srcWidth, srcHeight, cfg, yFirst, yLast); + return scaleImage, ColorDistanceRGB, OobReaderDuplicate>(src, trg, srcWidth, srcHeight, cfg, yFirst, yLast); case 4: - return scaleImage, ColorDistanceRGB>(src, trg, srcWidth, srcHeight, cfg, yFirst, yLast); + return scaleImage, ColorDistanceRGB, OobReaderDuplicate>(src, trg, srcWidth, srcHeight, cfg, yFirst, yLast); case 5: - return scaleImage, ColorDistanceRGB>(src, trg, srcWidth, srcHeight, cfg, yFirst, yLast); + return scaleImage, ColorDistanceRGB, OobReaderDuplicate>(src, trg, srcWidth, srcHeight, cfg, yFirst, yLast); case 6: - return scaleImage, ColorDistanceRGB>(src, trg, srcWidth, srcHeight, cfg, yFirst, yLast); + return scaleImage, ColorDistanceRGB, OobReaderDuplicate>(src, trg, srcWidth, srcHeight, cfg, yFirst, yLast); } break; @@ -1111,15 +1196,15 @@ void xbrz::scale(size_t factor, const uint32_t* src, uint32_t* trg, int srcWidth switch (factor) { case 2: - return scaleImage, ColorDistanceARGB>(src, trg, srcWidth, srcHeight, cfg, yFirst, yLast); + return scaleImage, ColorDistanceARGB, OobReaderTransparent>(src, trg, srcWidth, srcHeight, cfg, yFirst, yLast); case 3: - return scaleImage, ColorDistanceARGB>(src, trg, srcWidth, srcHeight, cfg, yFirst, yLast); + return scaleImage, ColorDistanceARGB, OobReaderTransparent>(src, trg, srcWidth, srcHeight, cfg, yFirst, yLast); case 4: - return scaleImage, ColorDistanceARGB>(src, trg, srcWidth, srcHeight, cfg, yFirst, yLast); + return scaleImage, ColorDistanceARGB, OobReaderTransparent>(src, trg, srcWidth, srcHeight, cfg, yFirst, yLast); case 5: - return scaleImage, ColorDistanceARGB>(src, trg, srcWidth, srcHeight, cfg, yFirst, yLast); + return scaleImage, ColorDistanceARGB, OobReaderTransparent>(src, trg, srcWidth, srcHeight, cfg, yFirst, yLast); case 6: - return scaleImage, ColorDistanceARGB>(src, trg, srcWidth, srcHeight, cfg, yFirst, yLast); + return scaleImage, ColorDistanceARGB, OobReaderTransparent>(src, trg, srcWidth, srcHeight, cfg, yFirst, yLast); } break; @@ -1127,15 +1212,15 @@ void xbrz::scale(size_t factor, const uint32_t* src, uint32_t* trg, int srcWidth switch (factor) { case 2: - return scaleImage, ColorDistanceUnbufferedARGB>(src, trg, srcWidth, srcHeight, cfg, yFirst, yLast); + return scaleImage, ColorDistanceUnbufferedARGB, OobReaderTransparent>(src, trg, srcWidth, srcHeight, cfg, yFirst, yLast); case 3: - return scaleImage, ColorDistanceUnbufferedARGB>(src, trg, srcWidth, srcHeight, cfg, yFirst, yLast); + return scaleImage, ColorDistanceUnbufferedARGB, OobReaderTransparent>(src, trg, srcWidth, srcHeight, cfg, yFirst, yLast); case 4: - return scaleImage, ColorDistanceUnbufferedARGB>(src, trg, srcWidth, srcHeight, cfg, yFirst, yLast); + return scaleImage, ColorDistanceUnbufferedARGB, OobReaderTransparent>(src, trg, srcWidth, srcHeight, cfg, yFirst, yLast); case 5: - return scaleImage, ColorDistanceUnbufferedARGB>(src, trg, srcWidth, srcHeight, cfg, yFirst, yLast); + return scaleImage, ColorDistanceUnbufferedARGB, OobReaderTransparent>(src, trg, srcWidth, srcHeight, cfg, yFirst, yLast); case 6: - return scaleImage, ColorDistanceUnbufferedARGB>(src, trg, srcWidth, srcHeight, cfg, yFirst, yLast); + return scaleImage, ColorDistanceUnbufferedARGB, OobReaderTransparent>(src, trg, srcWidth, srcHeight, cfg, yFirst, yLast); } break; } diff --git a/src/gamedata/textures/hires/xbr/xbrz.h b/src/gamedata/textures/hires/xbr/xbrz.h index c57649e01..492fb43ad 100644 --- a/src/gamedata/textures/hires/xbr/xbrz.h +++ b/src/gamedata/textures/hires/xbr/xbrz.h @@ -22,6 +22,7 @@ #include #include "xbrz_config.h" + namespace xbrz { /* @@ -50,7 +51,6 @@ const int SCALE_FACTOR_MAX = 6; /* -> map source (srcWidth * srcHeight) to target (scale * width x scale * height) image, optionally processing a half-open slice of rows [yFirst, yLast) only --> support for source/target pitch in bytes! -> if your emulator changes only a few image slices during each cycle (e.g. DOSBox) then there's no need to run xBRZ on the complete image: Just make sure you enlarge the source image slice by 2 rows on top and 2 on bottom (this is the additional range the xBRZ algorithm is using during analysis) CAVEAT: If there are multiple changed slices, make sure they do not overlap after adding these additional rows in order to avoid a memory race condition diff --git a/src/gamedata/textures/hires/xbr/xbrz_tools.h b/src/gamedata/textures/hires/xbr/xbrz_tools.h index 1a5e55556..844201c52 100644 --- a/src/gamedata/textures/hires/xbr/xbrz_tools.h +++ b/src/gamedata/textures/hires/xbr/xbrz_tools.h @@ -56,7 +56,7 @@ Pix* byteAdvance(Pix* ptr, int bytes) //fill block with the given color template inline -void fillBlock(Pix* trg, int pitch, Pix col, int blockWidth, int blockHeight) +void fillBlock(Pix* trg, int pitch /*[bytes]*/, Pix col, int blockWidth, int blockHeight) { //for (int y = 0; y < blockHeight; ++y, trg = byteAdvance(trg, pitch)) // std::fill(trg, trg + blockWidth, col); @@ -69,8 +69,8 @@ void fillBlock(Pix* trg, int pitch, Pix col, int blockWidth, int blockHeight) //nearest-neighbor (going over target image - slow for upscaling, since source is read multiple times missing out on cache! Fast for similar image sizes!) template -void nearestNeighborScale(const PixSrc* src, int srcWidth, int srcHeight, int srcPitch, - /**/ PixTrg* trg, int trgWidth, int trgHeight, int trgPitch, +void nearestNeighborScale(const PixSrc* src, int srcWidth, int srcHeight, int srcPitch /*[bytes]*/, + /**/ PixTrg* trg, int trgWidth, int trgHeight, int trgPitch /*[bytes]*/, int yFirst, int yLast, PixConverter pixCvrt /*convert PixSrc to PixTrg*/) { static_assert(std::is_integral::value, "PixSrc* is expected to be cast-able to char*"); @@ -106,8 +106,8 @@ void nearestNeighborScale(const PixSrc* src, int srcWidth, int srcHeight, int sr //nearest-neighbor (going over source image - fast for upscaling, since source is read only once template -void nearestNeighborScaleOverSource(const PixSrc* src, int srcWidth, int srcHeight, int srcPitch, - /**/ PixTrg* trg, int trgWidth, int trgHeight, int trgPitch, +void nearestNeighborScaleOverSource(const PixSrc* src, int srcWidth, int srcHeight, int srcPitch /*[bytes]*/, + /**/ PixTrg* trg, int trgWidth, int trgHeight, int trgPitch /*[bytes]*/, int yFirst, int yLast, PixConverter pixCvrt /*convert PixSrc to PixTrg*/) { static_assert(std::is_integral::value, "PixSrc* is expected to be cast-able to char*"); @@ -187,10 +187,10 @@ void bilinearScale(const uint32_t* src, int srcWidth, int srcHeight, int srcPitc // -> pre-calculation gives significant boost; std::vector<> memory allocation is negligible! struct CoeffsX { - int x1; - int x2; - double xx1; - double x2x; + int x1 = 0; + int x2 = 0; + double xx1 = 0; + double x2x = 0; }; std::vector buf(trgWidth); for (int x = 0; x < trgWidth; ++x) @@ -202,7 +202,11 @@ void bilinearScale(const uint32_t* src, int srcWidth, int srcHeight, int srcPitc const double xx1 = x / scaleX - x1; const double x2x = 1 - xx1; - buf[x] = { x1, x2, xx1, x2x }; + CoeffsX& bx = buf[x]; + bx.x1 = x1; + bx.x2 = x2; + bx.xx1 = xx1; + bx.x2x = x2x; } for (int y = yFirst; y < yLast; ++y) @@ -231,7 +235,7 @@ void bilinearScale(const uint32_t* src, int srcWidth, int srcHeight, int srcPitc const double x2xyy1 = x2x * yy1; const double xx1yy1 = xx1 * yy1; - auto interpolate = [=](int offset) + auto interpolate = [=](int offset) -> double { /* https://en.wikipedia.org/wiki/Bilinear_interpolation (c11(x2 - x) + c21(x - x1)) * (y2 - y ) +