From 8de11ee81a1c3ff817781afb0e9bf3841e84af1a Mon Sep 17 00:00:00 2001 From: Rachael Alexanderson Date: Sat, 24 Dec 2016 03:49:56 -0500 Subject: [PATCH] - Begin reimplementing rgb555 again. --- src/r_draw_pal.cpp | 137 ++++++++++++++++++++++++++++++++++----------- src/v_video.cpp | 84 ++++++++++++++++++++++++--- src/v_video.h | 39 +++++++++++++ 3 files changed, 218 insertions(+), 42 deletions(-) diff --git a/src/r_draw_pal.cpp b/src/r_draw_pal.cpp index 17e77d39ae..c7adc9d87a 100644 --- a/src/r_draw_pal.cpp +++ b/src/r_draw_pal.cpp @@ -43,6 +43,9 @@ #include "v_video.h" #include "r_draw_pal.h" +// [SP] r_blendmode - false = rgb555 matching (ZDoom classic), true = rgb666 (refactored) +CVAR(Bool, r_blendmode, false, CVAR_GLOBALCONFIG | CVAR_ARCHIVE) + /* [RH] This translucency algorithm is based on DOSDoom 0.65's, but uses a 32k RGB table instead of an 8k one. At least on my machine, it's @@ -303,19 +306,39 @@ namespace swrenderer fracstep *= thread->num_cores; pitch *= thread->num_cores; - do + if (!r_blendmode) { - uint8_t pix = source[frac >> bits]; - if (pix != 0) + do { - uint32_t r = MIN(GPalette.BaseColors[colormap[pix]].r + GPalette.BaseColors[*dest].r, 255); - uint32_t g = MIN(GPalette.BaseColors[colormap[pix]].g + GPalette.BaseColors[*dest].g, 255); - uint32_t b = MIN(GPalette.BaseColors[colormap[pix]].b + GPalette.BaseColors[*dest].b, 255); - *dest = RGB256k.RGB[r>>2][g>>2][b>>2]; - } - frac += fracstep; - dest += pitch; - } while (--count); + uint8_t pix = source[frac >> bits]; + if (pix != 0) + { + uint32_t fg = fg2rgb[colormap[pix]]; + uint32_t bg = bg2rgb[*dest]; + fg = (fg + bg) | 0x1f07c1f; + *dest = RGB32k.All[fg & (fg >> 15)]; + } + frac += fracstep; + dest += pitch; + } while (--count); + + } + else + { + do + { + uint8_t pix = source[frac >> bits]; + if (pix != 0) + { + uint32_t r = MIN(GPalette.BaseColors[colormap[pix]].r + GPalette.BaseColors[*dest].r, 255); + uint32_t g = MIN(GPalette.BaseColors[colormap[pix]].g + GPalette.BaseColors[*dest].g, 255); + uint32_t b = MIN(GPalette.BaseColors[colormap[pix]].b + GPalette.BaseColors[*dest].b, 255); + *dest = RGB256k.RGB[r>>2][g>>2][b>>2]; + } + frac += fracstep; + dest += pitch; + } while (--count); + } } void DrawWallAdd4PalCommand::Execute(DrawerThread *thread) @@ -341,22 +364,44 @@ namespace swrenderer } pitch *= thread->num_cores; - do + if (!r_blendmode) { - for (int i = 0; i < 4; ++i) + do { - uint8_t pix = _source[i][dc_wall_texturefrac[i] >> bits]; - if (pix != 0) + for (int i = 0; i < 4; ++i) { - uint32_t r = MIN(GPalette.BaseColors[_colormap[i][pix]].r + GPalette.BaseColors[dest[i]].r, 255); - uint32_t g = MIN(GPalette.BaseColors[_colormap[i][pix]].g + GPalette.BaseColors[dest[i]].g, 255); - uint32_t b = MIN(GPalette.BaseColors[_colormap[i][pix]].b + GPalette.BaseColors[dest[i]].b, 255); - dest[i] = RGB256k.RGB[r>>2][g>>2][b>>2]; + uint8_t pix = _source[i][dc_wall_texturefrac[i] >> bits]; + if (pix != 0) + { + uint32_t fg = fg2rgb[_colormap[i][pix]]; + uint32_t bg = bg2rgb[dest[i]]; + fg = (fg + bg) | 0x1f07c1f; + dest[i] = RGB32k.All[fg & (fg >> 15)]; + } + dc_wall_texturefrac[i] += dc_wall_iscale[i]; } - dc_wall_texturefrac[i] += dc_wall_iscale[i]; - } - dest += pitch; - } while (--count); + dest += pitch; + } while (--count); + } + else + { + do + { + for (int i = 0; i < 4; ++i) + { + uint8_t pix = _source[i][dc_wall_texturefrac[i] >> bits]; + if (pix != 0) + { + uint32_t r = MIN(GPalette.BaseColors[_colormap[i][pix]].r + GPalette.BaseColors[dest[i]].r, 255); + uint32_t g = MIN(GPalette.BaseColors[_colormap[i][pix]].g + GPalette.BaseColors[dest[i]].g, 255); + uint32_t b = MIN(GPalette.BaseColors[_colormap[i][pix]].b + GPalette.BaseColors[dest[i]].b, 255); + dest[i] = RGB256k.RGB[r>>2][g>>2][b>>2]; + } + dc_wall_texturefrac[i] += dc_wall_iscale[i]; + } + dest += pitch; + } while (--count); + } } void DrawWallAddClamp1PalCommand::Execute(DrawerThread *thread) @@ -379,19 +424,43 @@ namespace swrenderer fracstep *= thread->num_cores; pitch *= thread->num_cores; - do + if (!r_blendmode) { - uint8_t pix = source[frac >> bits]; - if (pix != 0) + do { - uint32_t r = MIN(GPalette.BaseColors[colormap[pix]].r + GPalette.BaseColors[*dest].r, 255); - uint32_t g = MIN(GPalette.BaseColors[colormap[pix]].g + GPalette.BaseColors[*dest].g, 255); - uint32_t b = MIN(GPalette.BaseColors[colormap[pix]].b + GPalette.BaseColors[*dest].b, 255); - *dest = RGB256k.RGB[r>>2][g>>2][b>>2]; - } - frac += fracstep; - dest += pitch; - } while (--count); + uint8_t pix = source[frac >> bits]; + if (pix != 0) + { + uint32_t a = fg2rgb[colormap[pix]] + bg2rgb[*dest]; + uint32_t b = a; + + a |= 0x01f07c1f; + b &= 0x40100400; + a &= 0x3fffffff; + b = b - (b >> 5); + a |= b; + *dest = RGB32k.All[a & (a >> 15)]; + } + frac += fracstep; + dest += pitch; + } while (--count); + } + else + { + do + { + uint8_t pix = source[frac >> bits]; + if (pix != 0) + { + uint32_t r = MIN(GPalette.BaseColors[colormap[pix]].r + GPalette.BaseColors[*dest].r, 255); + uint32_t g = MIN(GPalette.BaseColors[colormap[pix]].g + GPalette.BaseColors[*dest].g, 255); + uint32_t b = MIN(GPalette.BaseColors[colormap[pix]].b + GPalette.BaseColors[*dest].b, 255); + *dest = RGB256k.RGB[r>>2][g>>2][b>>2]; + } + frac += fracstep; + dest += pitch; + } while (--count); + } } void DrawWallAddClamp4PalCommand::Execute(DrawerThread *thread) diff --git a/src/v_video.cpp b/src/v_video.cpp index b639939eed..5a8ef10d59 100644 --- a/src/v_video.cpp +++ b/src/v_video.cpp @@ -143,6 +143,7 @@ extern "C" { DWORD Col2RGB8[65][256]; DWORD *Col2RGB8_LessPrecision[65]; DWORD Col2RGB8_Inverse[65][256]; +ColorTable32k RGB32k; ColorTable256k RGB256k; } @@ -346,6 +347,8 @@ void DCanvas::Dim (PalEntry color, float damount, int x1, int y1, int w, int h) if (damount == 0.f) return; + DWORD *bg2rgb; + DWORD fg; int gap; BYTE *spot; int x, y; @@ -367,6 +370,16 @@ void DCanvas::Dim (PalEntry color, float damount, int x1, int y1, int w, int h) return; } + { + int amount; + + amount = (int)(damount * 64); + bg2rgb = Col2RGB8[64-amount]; + + fg = (((color.r * amount) >> 4) << 20) | + ((color.g * amount) >> 4) | + (((color.b * amount) >> 4) << 10); + } spot = Buffer + x1 + y1*Pitch; gap = Pitch - w; @@ -376,17 +389,37 @@ void DCanvas::Dim (PalEntry color, float damount, int x1, int y1, int w, int h) int dimmedcolor_r = color.r * alpha; int dimmedcolor_g = color.g * alpha; int dimmedcolor_b = color.b * alpha; - for (y = h; y != 0; y--) + + if (!r_blendmode) { - for (x = w; x != 0; x--) + for (y = h; y != 0; y--) { - uint32_t r = (dimmedcolor_r + GPalette.BaseColors[*spot].r * ialpha) >> 8; - uint32_t g = (dimmedcolor_g + GPalette.BaseColors[*spot].g * ialpha) >> 8; - uint32_t b = (dimmedcolor_b + GPalette.BaseColors[*spot].b * ialpha) >> 8; - *spot = (BYTE)RGB256k.RGB[r][g][b]; - spot++; + for (x = w; x != 0; x--) + { + DWORD bg; + + bg = bg2rgb[(*spot)&0xff]; + bg = (fg+bg) | 0x1f07c1f; + *spot = RGB32k.All[bg&(bg>>15)]; + spot++; + } + spot += gap; + } + } + else + { + for (y = h; y != 0; y--) + { + for (x = w; x != 0; x--) + { + uint32_t r = (dimmedcolor_r + GPalette.BaseColors[*spot].r * ialpha) >> 8; + uint32_t g = (dimmedcolor_g + GPalette.BaseColors[*spot].g * ialpha) >> 8; + uint32_t b = (dimmedcolor_b + GPalette.BaseColors[*spot].b * ialpha) >> 8; + *spot = (BYTE)RGB256k.RGB[r][g][b]; + spot++; + } + spot += gap; } - spot += gap; } } @@ -658,12 +691,47 @@ static void BuildTransTable (const PalEntry *palette) { int r, g, b; + // create the RGB555 lookup table + for (r = 0; r < 32; r++) + for (g = 0; g < 32; g++) + for (b = 0; b < 32; b++) + RGB32k.RGB[r][g][b] = ColorMatcher.Pick ((r<<3)|(r>>2), (g<<3)|(g>>2), (b<<3)|(b>>2)); // create the RGB666 lookup table for (r = 0; r < 64; r++) for (g = 0; g < 64; g++) for (b = 0; b < 64; b++) RGB256k.RGB[r][g][b] = ColorMatcher.Pick ((r<<2)|(r>>4), (g<<2)|(g>>4), (b<<2)|(b>>4)); + int x, y; + + // create the swizzled palette + for (x = 0; x < 65; x++) + for (y = 0; y < 256; y++) + Col2RGB8[x][y] = (((palette[y].r*x)>>4)<<20) | + ((palette[y].g*x)>>4) | + (((palette[y].b*x)>>4)<<10); + + // create the swizzled palette with the lsb of red and blue forced to 0 + // (for green, a 1 is okay since it never gets added into) + for (x = 1; x < 64; x++) + { + Col2RGB8_LessPrecision[x] = Col2RGB8_2[x-1]; + for (y = 0; y < 256; y++) + { + Col2RGB8_2[x-1][y] = Col2RGB8[x][y] & 0x3feffbff; + } + } + Col2RGB8_LessPrecision[0] = Col2RGB8[0]; + Col2RGB8_LessPrecision[64] = Col2RGB8[64]; + + // create the inverse swizzled palette + for (x = 0; x < 65; x++) + for (y = 0; y < 256; y++) + { + Col2RGB8_Inverse[x][y] = (((((255-palette[y].r)*x)>>4)<<20) | + (((255-palette[y].g)*x)>>4) | + ((((255-palette[y].b)*x)>>4)<<10)) & 0x3feffbff; + } } //========================================================================== diff --git a/src/v_video.h b/src/v_video.h index 0da6b9b500..57d15869cd 100644 --- a/src/v_video.h +++ b/src/v_video.h @@ -448,6 +448,20 @@ EXTERN_CVAR (Float, Gamma) // Translucency tables +// RGB32k is a normal R5G5B5 -> palette lookup table. + +// Use a union so we can "overflow" without warnings. +// Otherwise, we get stuff like this from Clang (when compiled +// with -fsanitize=bounds) while running: +// src/v_video.cpp:390:12: runtime error: index 1068 out of bounds for type 'BYTE [32]' +// src/r_draw.cpp:273:11: runtime error: index 1057 out of bounds for type 'BYTE [32]' +union ColorTable32k +{ + BYTE RGB[32][32][32]; + BYTE All[32 *32 *32]; +}; +extern "C" ColorTable32k RGB32k; + // [SP] RGB666 support union ColorTable256k { @@ -456,6 +470,31 @@ union ColorTable256k }; extern "C" ColorTable256k RGB256k; +// Col2RGB8 is a pre-multiplied palette for color lookup. It is stored in a +// special R10B10G10 format for efficient blending computation. +// --RRRRRrrr--BBBBBbbb--GGGGGggg-- at level 64 +// --------rrrr------bbbb------gggg at level 1 +extern "C" DWORD Col2RGB8[65][256]; + +// Col2RGB8_LessPrecision is the same as Col2RGB8, but the LSB for red +// and blue are forced to zero, so if the blend overflows, it won't spill +// over into the next component's value. +// --RRRRRrrr-#BBBBBbbb-#GGGGGggg-- at level 64 +// --------rrr#------bbb#------gggg at level 1 +extern "C" DWORD *Col2RGB8_LessPrecision[65]; + +// Col2RGB8_Inverse is the same as Col2RGB8_LessPrecision, except the source +// palette has been inverted. +extern "C" DWORD Col2RGB8_Inverse[65][256]; + +// "Magic" numbers used during the blending: +// --000001111100000111110000011111 = 0x01f07c1f +// -0111111111011111111101111111111 = 0x3FEFFBFF +// -1000000000100000000010000000000 = 0x40100400 +// ------10000000001000000000100000 = 0x40100400 >> 5 +// --11111-----11111-----11111----- = 0x40100400 - (0x40100400 >> 5) aka "white" +// --111111111111111111111111111111 = 0x3FFFFFFF + // Allocates buffer screens, call before R_Init. void V_Init (bool restart);