diff --git a/src/g_level.cpp b/src/g_level.cpp index 141932c22..d27747ccb 100644 --- a/src/g_level.cpp +++ b/src/g_level.cpp @@ -1307,7 +1307,7 @@ void G_InitLevelLocals () level_info_t *info; BaseBlendA = 0.0f; // Remove underwater blend effect, if any - NormalLight.Maps = realcolormaps; + NormalLight.Maps = realcolormaps.Maps; // [BB] Instead of just setting the color, we also have to reset Desaturate and build the lights. NormalLight.ChangeColor (PalEntry (255, 255, 255), 0); diff --git a/src/g_shared/a_artifacts.cpp b/src/g_shared/a_artifacts.cpp index 777d6824a..305260ebf 100644 --- a/src/g_shared/a_artifacts.cpp +++ b/src/g_shared/a_artifacts.cpp @@ -737,7 +737,7 @@ int APowerInvisibility::AlterWeaponSprite (visstyle_t *vis) if ((vis->Alpha < 0.25f && special1 > 0) || (vis->Alpha == 0)) { vis->Alpha = clamp((1.f - float(Strength/100)), 0.f, 1.f); - vis->BaseColormap = SpecialColormaps[INVERSECOLORMAP].Colormap; + vis->BaseColormap = &SpecialColormaps[INVERSECOLORMAP]; vis->ColormapNum = 0; } return -1; // This item is valid so another one shouldn't reset the translucency diff --git a/src/r_data/colormaps.cpp b/src/r_data/colormaps.cpp index b46342463..ffaaa38ac 100644 --- a/src/r_data/colormaps.cpp +++ b/src/r_data/colormaps.cpp @@ -71,7 +71,7 @@ struct FakeCmap }; TArray fakecmaps; -BYTE *realcolormaps; +FColormap realcolormaps; size_t numfakecmaps; @@ -408,7 +408,7 @@ void R_SetDefaultColormap (const char *name) foo.Color = 0xFFFFFF; foo.Fade = 0; - foo.Maps = realcolormaps; + foo.Maps = realcolormaps.Maps; foo.Desaturate = 0; foo.Next = NULL; foo.BuildLights (); @@ -430,7 +430,7 @@ void R_SetDefaultColormap (const char *name) remap[0] = 0; for (i = 0; i < NUMCOLORMAPS; ++i) { - BYTE *map2 = &realcolormaps[i*256]; + BYTE *map2 = &realcolormaps.Maps[i*256]; lumpr.Read (map, 256); for (j = 0; j < 256; ++j) { @@ -454,11 +454,7 @@ void R_DeinitColormaps () { SpecialColormaps.Clear(); fakecmaps.Clear(); - if (realcolormaps != NULL) - { - delete[] realcolormaps; - realcolormaps = NULL; - } + delete[] realcolormaps.Maps; FreeSpecialLights(); } @@ -501,7 +497,7 @@ void R_InitColormaps () } } } - realcolormaps = new BYTE[256*NUMCOLORMAPS*fakecmaps.Size()]; + realcolormaps.Maps = new BYTE[256*NUMCOLORMAPS*fakecmaps.Size()]; R_SetDefaultColormap ("COLORMAP"); if (fakecmaps.Size() > 1) @@ -523,7 +519,7 @@ void R_InitColormaps () { int k, r, g, b; FWadLump lump = Wads.OpenLumpNum (fakecmaps[j].lump); - BYTE *const map = realcolormaps + NUMCOLORMAPS*256*j; + BYTE *const map = realcolormaps.Maps + NUMCOLORMAPS*256*j; for (k = 0; k < NUMCOLORMAPS; ++k) { @@ -550,8 +546,8 @@ void R_InitColormaps () } NormalLight.Color = PalEntry (255, 255, 255); NormalLight.Fade = 0; - NormalLight.Maps = realcolormaps; - NormalLightHasFixedLights = R_CheckForFixedLights(realcolormaps); + NormalLight.Maps = realcolormaps.Maps; + NormalLightHasFixedLights = R_CheckForFixedLights(realcolormaps.Maps); numfakecmaps = fakecmaps.Size(); // build default special maps (e.g. invulnerability) diff --git a/src/r_data/colormaps.h b/src/r_data/colormaps.h index 0764191a3..bda6a5ea4 100644 --- a/src/r_data/colormaps.h +++ b/src/r_data/colormaps.h @@ -1,18 +1,26 @@ #ifndef __RES_CMAP_H #define __RES_CMAP_H +struct FColormap; + void R_InitColormaps (); void R_DeinitColormaps (); DWORD R_ColormapNumForName(const char *name); // killough 4/4/98 void R_SetDefaultColormap (const char *name); // [RH] change normal fadetable DWORD R_BlendForColormap (DWORD map); // [RH] return calculated blend for a colormap -extern BYTE *realcolormaps; // [RH] make the colormaps externally visible +extern FColormap realcolormaps; // [RH] make the colormaps externally visible extern size_t numfakecmaps; +struct FColormap +{ + BYTE *Maps = nullptr; + PalEntry Color = 0xffffffff; + PalEntry Fade = 0xff000000; + int Desaturate = 0; +}; - -struct FDynamicColormap +struct FDynamicColormap : FColormap { void ChangeFade (PalEntry fadecolor); void ChangeColor (PalEntry lightcolor, int desaturate); @@ -20,10 +28,6 @@ struct FDynamicColormap void BuildLights (); static void RebuildAllLights(); - BYTE *Maps; - PalEntry Color; - PalEntry Fade; - int Desaturate; FDynamicColormap *Next; }; @@ -43,8 +47,13 @@ enum }; -struct FSpecialColormap +struct FSpecialColormap : FColormap { + FSpecialColormap() + { + Maps = Colormap; + } + float ColorizeStart[3]; float ColorizeEnd[3]; BYTE Colormap[256]; diff --git a/src/r_defs.h b/src/r_defs.h index 8a247a5c0..c0f878664 100644 --- a/src/r_defs.h +++ b/src/r_defs.h @@ -1397,12 +1397,13 @@ struct FMiniBSP // typedef BYTE lighttable_t; // This could be wider for >8 bit display. +struct FColormap; // This encapsulates the fields of vissprite_t that can be altered by AlterWeaponSprite struct visstyle_t { int ColormapNum; // Which colormap is rendered - lighttable_t *BaseColormap; // Base colormap used together with ColormapNum + FColormap *BaseColormap; // Base colormap used together with ColormapNum float Alpha; FRenderStyle RenderStyle; }; diff --git a/src/r_draw.cpp b/src/r_draw.cpp index ec0645fd2..2e21c7038 100644 --- a/src/r_draw.cpp +++ b/src/r_draw.cpp @@ -145,6 +145,8 @@ extern "C" { int dc_pitch=0xABadCafe; // [RH] Distance between rows lighttable_t* dc_colormap; +FColormap *dc_fcolormap; +ShadeConstants dc_shade_constants; fixed_t dc_light; int dc_x; int dc_yl; @@ -179,6 +181,7 @@ BYTE *dc_translation; BYTE shadetables[NUMCOLORMAPS*16*256]; FDynamicColormap ShadeFakeColormap[16]; BYTE identitymap[256]; +FDynamicColormap identitycolormap; EXTERN_CVAR (Int, r_columnmethod) @@ -219,6 +222,10 @@ void R_InitShadeMaps() { identitymap[i] = i; } + identitycolormap.Color = ~0u; + identitycolormap.Desaturate = 0; + identitycolormap.Next = NULL; + identitycolormap.Maps = identitymap; } /************************************/ @@ -297,6 +304,7 @@ void R_DrawColumnP_RGBA_C() dest = (uint32_t*)dc_dest; uint32_t light = calc_light_multiplier(dc_light); + ShadeConstants shade_constants = dc_shade_constants; // Determine scaling, // which is the only mapping to be done. @@ -315,9 +323,7 @@ void R_DrawColumnP_RGBA_C() // This is as fast as it gets. do { - // Re-map color indices from wall texture column - // using a lighting/special effects LUT. - *dest = shade_pal_index(colormap[source[frac >> FRACBITS]], light); + *dest = shade_pal_index(source[frac >> FRACBITS], light, shade_constants); dest += pitch; frac += fracstep; @@ -371,7 +377,7 @@ void R_FillColumnP_RGBA() do { - *dest = shade_pal_index(color, light); + *dest = shade_pal_index_simple(color, light); dest += pitch; } while (--count); } @@ -416,7 +422,7 @@ void R_FillAddColumn_RGBA_C() dest = (uint32_t*)dc_dest; int pitch = dc_pitch; - uint32_t fg = shade_pal_index(dc_color, calc_light_multiplier(dc_light)); + uint32_t fg = shade_pal_index_simple(dc_color, calc_light_multiplier(dc_light)); uint32_t fg_red = (fg >> 24) & 0xff; uint32_t fg_green = (fg >> 16) & 0xff; uint32_t fg_blue = fg & 0xff; @@ -481,7 +487,7 @@ void R_FillAddClampColumn_RGBA() dest = (uint32_t*)dc_dest; int pitch = dc_pitch; - uint32_t fg = shade_pal_index(dc_color, calc_light_multiplier(dc_light)); + uint32_t fg = shade_pal_index_simple(dc_color, calc_light_multiplier(dc_light)); uint32_t fg_red = (fg >> 24) & 0xff; uint32_t fg_green = (fg >> 16) & 0xff; uint32_t fg_blue = fg & 0xff; @@ -545,7 +551,7 @@ void R_FillSubClampColumn_RGBA() dest = (uint32_t*)dc_dest; int pitch = dc_pitch; - uint32_t fg = shade_pal_index(dc_color, calc_light_multiplier(dc_light)); + uint32_t fg = shade_pal_index_simple(dc_color, calc_light_multiplier(dc_light)); uint32_t fg_red = (fg >> 24) & 0xff; uint32_t fg_green = (fg >> 16) & 0xff; uint32_t fg_blue = fg & 0xff; @@ -609,7 +615,7 @@ void R_FillRevSubClampColumn_RGBA() dest = (uint32_t*)dc_dest; int pitch = dc_pitch; - uint32_t fg = shade_pal_index(dc_color, calc_light_multiplier(dc_light)); + uint32_t fg = shade_pal_index_simple(dc_color, calc_light_multiplier(dc_light)); uint32_t fg_red = (fg >> 24) & 0xff; uint32_t fg_green = (fg >> 16) & 0xff; uint32_t fg_blue = fg & 0xff; @@ -953,13 +959,14 @@ void R_DrawAddColumnP_RGBA_C() BYTE *colormap = dc_colormap; uint32_t light = calc_light_multiplier(dc_light); + ShadeConstants shade_constants = dc_shade_constants; uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); do { - uint32_t fg = shade_pal_index(colormap[source[frac >> FRACBITS]], light); + uint32_t fg = shade_pal_index(source[frac >> FRACBITS], light, shade_constants); uint32_t fg_red = (fg >> 16) & 0xff; uint32_t fg_green = (fg >> 8) & 0xff; @@ -1032,6 +1039,7 @@ void R_DrawTranslatedColumnP_RGBA_C() return; uint32_t light = calc_light_multiplier(dc_light); + ShadeConstants shade_constants = dc_shade_constants; dest = (uint32_t*)dc_dest; @@ -1040,14 +1048,13 @@ void R_DrawTranslatedColumnP_RGBA_C() { // [RH] Local copies of global vars to improve compiler optimizations - BYTE *colormap = dc_colormap; BYTE *translation = dc_translation; const BYTE *source = dc_source; int pitch = dc_pitch; do { - *dest = shade_pal_index(colormap[translation[source[frac >> FRACBITS]]], light); + *dest = shade_pal_index(translation[source[frac >> FRACBITS]], light, shade_constants); dest += pitch; frac += fracstep; } while (--count); @@ -1106,6 +1113,7 @@ void R_DrawTlatedAddColumnP_RGBA_C() return; uint32_t light = calc_light_multiplier(dc_light); + ShadeConstants shade_constants = dc_shade_constants; dest = (uint32_t*)dc_dest; @@ -1114,7 +1122,6 @@ void R_DrawTlatedAddColumnP_RGBA_C() { BYTE *translation = dc_translation; - BYTE *colormap = dc_colormap; const BYTE *source = dc_source; int pitch = dc_pitch; @@ -1123,7 +1130,7 @@ void R_DrawTlatedAddColumnP_RGBA_C() do { - uint32_t fg = shade_pal_index(colormap[translation[source[frac >> FRACBITS]]], light); + uint32_t fg = shade_pal_index(translation[source[frac >> FRACBITS]], light, shade_constants); uint32_t fg_red = (fg >> 16) & 0xff; uint32_t fg_green = (fg >> 8) & 0xff; @@ -1197,7 +1204,7 @@ void R_DrawShadedColumnP_RGBA_C() fracstep = dc_iscale; frac = dc_texturefrac; - uint32_t fg = shade_pal_index(dc_color, calc_light_multiplier(dc_light)); + uint32_t fg = shade_pal_index_simple(dc_color, calc_light_multiplier(dc_light)); uint32_t fg_red = (fg >> 16) & 0xff; uint32_t fg_green = (fg >> 8) & 0xff; uint32_t fg_blue = fg & 0xff; @@ -1286,16 +1293,16 @@ void R_DrawAddClampColumnP_RGBA_C() { const BYTE *source = dc_source; - BYTE *colormap = dc_colormap; int pitch = dc_pitch; uint32_t light = calc_light_multiplier(dc_light); + ShadeConstants shade_constants = dc_shade_constants; uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); do { - uint32_t fg = shade_pal_index(colormap[source[frac >> FRACBITS]], light); + uint32_t fg = shade_pal_index(source[frac >> FRACBITS], light, shade_constants); uint32_t fg_red = (fg >> 16) & 0xff; uint32_t fg_green = (fg >> 8) & 0xff; uint32_t fg_blue = fg & 0xff; @@ -1375,17 +1382,17 @@ void R_DrawAddClampTranslatedColumnP_RGBA_C() { BYTE *translation = dc_translation; - BYTE *colormap = dc_colormap; const BYTE *source = dc_source; int pitch = dc_pitch; uint32_t light = calc_light_multiplier(dc_light); + ShadeConstants shade_constants = dc_shade_constants; uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); do { - uint32_t fg = shade_pal_index(colormap[translation[source[frac >> FRACBITS]]], light); + uint32_t fg = shade_pal_index(translation[source[frac >> FRACBITS]], light, shade_constants); uint32_t fg_red = (fg >> 16) & 0xff; uint32_t fg_green = (fg >> 8) & 0xff; uint32_t fg_blue = fg & 0xff; @@ -1462,17 +1469,17 @@ void R_DrawSubClampColumnP_RGBA_C() frac = dc_texturefrac; { - BYTE *colormap = dc_colormap; const BYTE *source = dc_source; int pitch = dc_pitch; uint32_t light = calc_light_multiplier(dc_light); + ShadeConstants shade_constants = dc_shade_constants; uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); do { - uint32_t fg = shade_pal_index(colormap[source[frac >> FRACBITS]], light); + uint32_t fg = shade_pal_index(source[frac >> FRACBITS], light, shade_constants); uint32_t fg_red = (fg >> 16) & 0xff; uint32_t fg_green = (fg >> 8) & 0xff; uint32_t fg_blue = fg & 0xff; @@ -1551,17 +1558,17 @@ void R_DrawSubClampTranslatedColumnP_RGBA_C() { BYTE *translation = dc_translation; - BYTE *colormap = dc_colormap; const BYTE *source = dc_source; int pitch = dc_pitch; uint32_t light = calc_light_multiplier(dc_light); + ShadeConstants shade_constants = dc_shade_constants; uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); do { - uint32_t fg = shade_pal_index(colormap[translation[source[frac >> FRACBITS]]], light); + uint32_t fg = shade_pal_index(translation[source[frac >> FRACBITS]], light, shade_constants); uint32_t fg_red = (fg >> 16) & 0xff; uint32_t fg_green = (fg >> 8) & 0xff; uint32_t fg_blue = fg & 0xff; @@ -1638,16 +1645,16 @@ void R_DrawRevSubClampColumnP_RGBA_C() frac = dc_texturefrac; { - BYTE *colormap = dc_colormap; const BYTE *source = dc_source; int pitch = dc_pitch; uint32_t light = calc_light_multiplier(dc_light); + ShadeConstants shade_constants = dc_shade_constants; uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); do { - uint32_t fg = shade_pal_index(colormap[source[frac >> FRACBITS]], light); + uint32_t fg = shade_pal_index(source[frac >> FRACBITS], light, shade_constants); uint32_t fg_red = (fg >> 16) & 0xff; uint32_t fg_green = (fg >> 8) & 0xff; uint32_t fg_blue = fg & 0xff; @@ -1726,17 +1733,17 @@ void R_DrawRevSubClampTranslatedColumnP_RGBA_C() { BYTE *translation = dc_translation; - BYTE *colormap = dc_colormap; const BYTE *source = dc_source; int pitch = dc_pitch; uint32_t light = calc_light_multiplier(dc_light); + ShadeConstants shade_constants = dc_shade_constants; uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); do { - uint32_t fg = shade_pal_index(colormap[translation[source[frac >> FRACBITS]]], light); + uint32_t fg = shade_pal_index(translation[source[frac >> FRACBITS]], light, shade_constants); uint32_t fg_red = (fg >> 16) & 0xff; uint32_t fg_green = (fg >> 8) & 0xff; uint32_t fg_blue = fg & 0xff; @@ -1784,8 +1791,10 @@ int ds_y; int ds_x1; int ds_x2; +FColormap* ds_fcolormap; lighttable_t* ds_colormap; -//dsfixed_t ds_light; +ShadeConstants ds_shade_constants; +dsfixed_t ds_light; dsfixed_t ds_xfrac; dsfixed_t ds_yfrac; @@ -1835,9 +1844,9 @@ void R_SetSpanSource(const BYTE *pixels) // //========================================================================== -void R_SetSpanColormap(BYTE *colormap) +void R_SetSpanColormap(FDynamicColormap *colormap, int shade) { - R_SetDSColorMapLight(colormap, 0, 0); + R_SetDSColorMapLight(colormap, 0, shade); #ifdef X86_ASM if (!r_swtruecolor && ds_colormap != ds_curcolormap) { @@ -1956,7 +1965,6 @@ void R_DrawSpanP_RGBA_C() dsfixed_t ystep; uint32_t* dest; const BYTE* source = ds_source; - const BYTE* colormap = ds_colormap; int count; int spot; @@ -1980,6 +1988,7 @@ void R_DrawSpanP_RGBA_C() ystep = ds_ystep; uint32_t light = calc_light_multiplier(ds_light); + ShadeConstants shade_constants = ds_shade_constants; if (ds_xbits == 6 && ds_ybits == 6) { @@ -1990,9 +1999,8 @@ void R_DrawSpanP_RGBA_C() // Current texture index in u,v. spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); - // Lookup pixel from flat texture tile, - // re-index using light/colormap. - *dest++ = shade_pal_index(colormap[source[spot]], light); + // Lookup pixel from flat texture tile + *dest++ = shade_pal_index(source[spot], light, shade_constants); // Next step in u,v. xfrac += xstep; @@ -2010,9 +2018,8 @@ void R_DrawSpanP_RGBA_C() // Current texture index in u,v. spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); - // Lookup pixel from flat texture tile, - // re-index using light/colormap. - *dest++ = shade_pal_index(colormap[source[spot]], light); + // Lookup pixel from flat texture tile + *dest++ = shade_pal_index(source[spot], light, shade_constants); // Next step in u,v. xfrac += xstep; @@ -2030,7 +2037,6 @@ void R_DrawSpanP_RGBA_SSE() dsfixed_t ystep; uint32_t* dest; const BYTE* source = ds_source; - const BYTE* colormap = ds_colormap; int count; int spot; @@ -2054,54 +2060,92 @@ void R_DrawSpanP_RGBA_SSE() ystep = ds_ystep; uint32_t light = calc_light_multiplier(ds_light); + ShadeConstants shade_constants = ds_shade_constants; if (ds_xbits == 6 && ds_ybits == 6) { // 64x64 is the most common case by far, so special case it. - __m128i mlight = _mm_set_epi16(256, light, light, light, 256, light, light, light); uint32_t *palette = (uint32_t*)GPalette.BaseColors; int sse_count = count / 4; count -= sse_count * 4; - while (sse_count--) + + if (shade_constants.simple_shade) { - // Current texture index in u,v. - spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); - uint32_t p0 = colormap[source[spot]]; - xfrac += xstep; - yfrac += ystep; + SSE_SHADE_SIMPLE_INIT(light); - spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); - uint32_t p1 = colormap[source[spot]]; - xfrac += xstep; - yfrac += ystep; + while (sse_count--) + { + // Current texture index in u,v. + spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); + uint32_t p0 = source[spot]; + xfrac += xstep; + yfrac += ystep; - spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); - uint32_t p2 = colormap[source[spot]]; - xfrac += xstep; - yfrac += ystep; + spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); + uint32_t p1 = source[spot]; + xfrac += xstep; + yfrac += ystep; - spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); - uint32_t p3 = colormap[source[spot]]; - xfrac += xstep; - yfrac += ystep; + spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); + uint32_t p2 = source[spot]; + xfrac += xstep; + yfrac += ystep; - // Lookup pixel from flat texture tile, - // re-index using light/colormap. - __m128i fg = _mm_set_epi32(palette[p3], palette[p2], palette[p1], palette[p0]); - __m128i fg_hi = _mm_unpackhi_epi8(fg, _mm_setzero_si128()); - __m128i fg_lo = _mm_unpacklo_epi8(fg, _mm_setzero_si128()); - fg_hi = _mm_mullo_epi16(fg_hi, mlight); - fg_hi = _mm_srli_epi16(fg_hi, 8); - fg_lo = _mm_mullo_epi16(fg_lo, mlight); - fg_lo = _mm_srli_epi16(fg_lo, 8); - fg = _mm_packus_epi16(fg_lo, fg_hi); - _mm_storeu_si128((__m128i*)dest, fg); + spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); + uint32_t p3 = source[spot]; + xfrac += xstep; + yfrac += ystep; - // Next step in u,v. - dest += 4; + // Lookup pixel from flat texture tile, + // re-index using light/colormap. + __m128i fg = _mm_set_epi32(palette[p3], palette[p2], palette[p1], palette[p0]); + SSE_SHADE_SIMPLE(fg); + _mm_storeu_si128((__m128i*)dest, fg); + + // Next step in u,v. + dest += 4; + } } + else + { + SSE_SHADE_INIT(light, shade_constants); + + while (sse_count--) + { + // Current texture index in u,v. + spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); + uint32_t p0 = source[spot]; + xfrac += xstep; + yfrac += ystep; + + spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); + uint32_t p1 = source[spot]; + xfrac += xstep; + yfrac += ystep; + + spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); + uint32_t p2 = source[spot]; + xfrac += xstep; + yfrac += ystep; + + spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); + uint32_t p3 = source[spot]; + xfrac += xstep; + yfrac += ystep; + + // Lookup pixel from flat texture tile, + // re-index using light/colormap. + __m128i fg = _mm_set_epi32(palette[p3], palette[p2], palette[p1], palette[p0]); + SSE_SHADE(fg, shade_constants); + _mm_storeu_si128((__m128i*)dest, fg); + + // Next step in u,v. + dest += 4; + } + } + if (count == 0) return; @@ -2110,9 +2154,8 @@ void R_DrawSpanP_RGBA_SSE() // Current texture index in u,v. spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); - // Lookup pixel from flat texture tile, - // re-index using light/colormap. - *dest++ = shade_pal_index(colormap[source[spot]], light); + // Lookup pixel from flat texture tile + *dest++ = shade_pal_index(source[spot], light, shade_constants); // Next step in u,v. xfrac += xstep; @@ -2130,9 +2173,8 @@ void R_DrawSpanP_RGBA_SSE() // Current texture index in u,v. spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); - // Lookup pixel from flat texture tile, - // re-index using light/colormap. - *dest++ = shade_pal_index(colormap[source[spot]], light); + // Lookup pixel from flat texture tile + *dest++ = shade_pal_index(source[spot], light, shade_constants); // Next step in u,v. xfrac += xstep; @@ -2221,6 +2263,7 @@ void R_DrawSpanMaskedP_RGBA_C() int spot; uint32_t light = calc_light_multiplier(ds_light); + ShadeConstants shade_constants = ds_shade_constants; xfrac = ds_xfrac; yfrac = ds_yfrac; @@ -2243,7 +2286,7 @@ void R_DrawSpanMaskedP_RGBA_C() texdata = source[spot]; if (texdata != 0) { - *dest = shade_pal_index(colormap[texdata], light); + *dest = shade_pal_index(texdata, light, shade_constants); } dest++; xfrac += xstep; @@ -2263,7 +2306,7 @@ void R_DrawSpanMaskedP_RGBA_C() texdata = source[spot]; if (texdata != 0) { - *dest = shade_pal_index(colormap[texdata], light); + *dest = shade_pal_index(texdata, light, shade_constants); } dest++; xfrac += xstep; @@ -2343,7 +2386,6 @@ void R_DrawSpanTranslucentP_RGBA_C() dsfixed_t ystep; uint32_t* dest; const BYTE* source = ds_source; - const BYTE* colormap = ds_colormap; int count; int spot; @@ -2358,6 +2400,7 @@ void R_DrawSpanTranslucentP_RGBA_C() ystep = ds_ystep; uint32_t light = calc_light_multiplier(ds_light); + ShadeConstants shade_constants = ds_shade_constants; uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); @@ -2369,7 +2412,7 @@ void R_DrawSpanTranslucentP_RGBA_C() { spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); - uint32_t fg = shade_pal_index(colormap[source[spot]], light); + uint32_t fg = shade_pal_index(source[spot], light, shade_constants); uint32_t fg_red = (fg >> 16) & 0xff; uint32_t fg_green = (fg >> 8) & 0xff; uint32_t fg_blue = (fg) & 0xff; @@ -2397,7 +2440,7 @@ void R_DrawSpanTranslucentP_RGBA_C() { spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); - uint32_t fg = shade_pal_index(colormap[source[spot]], light); + uint32_t fg = shade_pal_index(source[spot], light, shade_constants); uint32_t fg_red = (fg >> 16) & 0xff; uint32_t fg_green = (fg >> 8) & 0xff; uint32_t fg_blue = (fg) & 0xff; @@ -2502,11 +2545,11 @@ void R_DrawSpanMaskedTranslucentP_RGBA_C() dsfixed_t ystep; uint32_t* dest; const BYTE* source = ds_source; - const BYTE* colormap = ds_colormap; int count; int spot; uint32_t light = calc_light_multiplier(ds_light); + ShadeConstants shade_constants = ds_shade_constants; uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); @@ -2532,7 +2575,7 @@ void R_DrawSpanMaskedTranslucentP_RGBA_C() texdata = source[spot]; if (texdata != 0) { - uint32_t fg = shade_pal_index(colormap[texdata], light); + uint32_t fg = shade_pal_index(texdata, light, shade_constants); uint32_t fg_red = (fg >> 16) & 0xff; uint32_t fg_green = (fg >> 8) & 0xff; uint32_t fg_blue = (fg) & 0xff; @@ -2565,7 +2608,7 @@ void R_DrawSpanMaskedTranslucentP_RGBA_C() texdata = source[spot]; if (texdata != 0) { - uint32_t fg = shade_pal_index(colormap[texdata], light); + uint32_t fg = shade_pal_index(texdata, light, shade_constants); uint32_t fg_red = (fg >> 16) & 0xff; uint32_t fg_green = (fg >> 8) & 0xff; uint32_t fg_blue = (fg) & 0xff; @@ -2665,11 +2708,11 @@ void R_DrawSpanAddClampP_RGBA_C() dsfixed_t ystep; uint32_t* dest; const BYTE* source = ds_source; - const BYTE* colormap = ds_colormap; int count; int spot; uint32_t light = calc_light_multiplier(ds_light); + ShadeConstants shade_constants = ds_shade_constants; uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); @@ -2691,7 +2734,7 @@ void R_DrawSpanAddClampP_RGBA_C() { spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); - uint32_t fg = shade_pal_index(colormap[source[spot]], light); + uint32_t fg = shade_pal_index(source[spot], light, shade_constants); uint32_t fg_red = (fg >> 16) & 0xff; uint32_t fg_green = (fg >> 8) & 0xff; uint32_t fg_blue = (fg) & 0xff; @@ -2719,7 +2762,7 @@ void R_DrawSpanAddClampP_RGBA_C() { spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); - uint32_t fg = shade_pal_index(colormap[source[spot]], light); + uint32_t fg = shade_pal_index(source[spot], light, shade_constants); uint32_t fg_red = (fg >> 16) & 0xff; uint32_t fg_green = (fg >> 8) & 0xff; uint32_t fg_blue = (fg) & 0xff; @@ -2830,11 +2873,11 @@ void R_DrawSpanMaskedAddClampP_RGBA_C() dsfixed_t ystep; uint32_t* dest; const BYTE* source = ds_source; - const BYTE* colormap = ds_colormap; int count; int spot; uint32_t light = calc_light_multiplier(ds_light); + ShadeConstants shade_constants = ds_shade_constants; uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); @@ -2860,7 +2903,7 @@ void R_DrawSpanMaskedAddClampP_RGBA_C() texdata = source[spot]; if (texdata != 0) { - uint32_t fg = shade_pal_index(colormap[texdata], light); + uint32_t fg = shade_pal_index(texdata, light, shade_constants); uint32_t fg_red = (fg >> 16) & 0xff; uint32_t fg_green = (fg >> 8) & 0xff; uint32_t fg_blue = (fg) & 0xff; @@ -2893,7 +2936,7 @@ void R_DrawSpanMaskedAddClampP_RGBA_C() texdata = source[spot]; if (texdata != 0) { - uint32_t fg = shade_pal_index(colormap[texdata], light); + uint32_t fg = shade_pal_index(texdata, light, shade_constants); uint32_t fg_red = (fg >> 16) & 0xff; uint32_t fg_green = (fg >> 8) & 0xff; uint32_t fg_blue = (fg) & 0xff; @@ -2926,7 +2969,7 @@ void R_FillSpan_RGBA() uint32_t *dest = ylookup[ds_y] + ds_x1 + (uint32_t*)dc_destorg; int count = (ds_x2 - ds_x1 + 1); uint32_t light = calc_light_multiplier(ds_light); - uint32_t color = shade_pal_index(ds_color, light); + uint32_t color = shade_pal_index_simple(ds_color, light); for (int i = 0; i < count; i++) dest[i] = color; } @@ -3147,7 +3190,6 @@ DWORD vlinec1_RGBA() { DWORD fracstep = dc_iscale; DWORD frac = dc_texturefrac; - BYTE *colormap = dc_colormap; int count = dc_count; const BYTE *source = dc_source; uint32_t *dest = (uint32_t*)dc_dest; @@ -3155,10 +3197,11 @@ DWORD vlinec1_RGBA() int pitch = dc_pitch; uint32_t light = calc_light_multiplier(dc_light); + ShadeConstants shade_constants = dc_shade_constants; do { - *dest = shade_pal_index(colormap[source[frac >> bits]], light); + *dest = shade_pal_index(source[frac >> bits], light, shade_constants); frac += fracstep; dest += pitch; } while (--count); @@ -3197,12 +3240,14 @@ void vlinec4_RGBA() uint32_t light2 = calc_light_multiplier(palookuplight[2]); uint32_t light3 = calc_light_multiplier(palookuplight[3]); + ShadeConstants shade_constants = dc_shade_constants; + do { - dest[0] = shade_pal_index(palookupoffse[0][bufplce[0][(place = vplce[0]) >> bits]], light0); vplce[0] = place + vince[0]; - dest[1] = shade_pal_index(palookupoffse[1][bufplce[1][(place = vplce[1]) >> bits]], light1); vplce[1] = place + vince[1]; - dest[2] = shade_pal_index(palookupoffse[2][bufplce[2][(place = vplce[2]) >> bits]], light2); vplce[2] = place + vince[2]; - dest[3] = shade_pal_index(palookupoffse[3][bufplce[3][(place = vplce[3]) >> bits]], light3); vplce[3] = place + vince[3]; + dest[0] = shade_pal_index(bufplce[0][(place = vplce[0]) >> bits], light0, shade_constants); vplce[0] = place + vince[0]; + dest[1] = shade_pal_index(bufplce[1][(place = vplce[1]) >> bits], light1, shade_constants); vplce[1] = place + vince[1]; + dest[2] = shade_pal_index(bufplce[2][(place = vplce[2]) >> bits], light2, shade_constants); vplce[2] = place + vince[2]; + dest[3] = shade_pal_index(bufplce[3][(place = vplce[3]) >> bits], light3, shade_constants); vplce[3] = place + vince[3]; dest += dc_pitch; } while (--count); } @@ -3219,40 +3264,64 @@ void vlinec4_RGBA_SSE() uint32_t light2 = calc_light_multiplier(palookuplight[2]); uint32_t light3 = calc_light_multiplier(palookuplight[3]); - __m128i mlight_hi = _mm_set_epi16(256, light1, light1, light1, 256, light0, light0, light0); - __m128i mlight_lo = _mm_set_epi16(256, light3, light3, light3, 256, light2, light2, light2); + ShadeConstants shade_constants = dc_shade_constants; + uint32_t *palette = (uint32_t*)GPalette.BaseColors; DWORD local_vplce[4] = { vplce[0], vplce[1], vplce[2], vplce[3] }; DWORD local_vince[4] = { vince[0], vince[1], vince[2], vince[3] }; - do + if (shade_constants.simple_shade) { - DWORD place0 = local_vplce[0]; - DWORD place1 = local_vplce[1]; - DWORD place2 = local_vplce[2]; - DWORD place3 = local_vplce[3]; + SSE_SHADE_SIMPLE_INIT4(light3, light2, light1, light0); + do + { + DWORD place0 = local_vplce[0]; + DWORD place1 = local_vplce[1]; + DWORD place2 = local_vplce[2]; + DWORD place3 = local_vplce[3]; - BYTE p0 = palookupoffse[0][bufplce[0][place0 >> bits]]; - BYTE p1 = palookupoffse[1][bufplce[1][place1 >> bits]]; - BYTE p2 = palookupoffse[2][bufplce[2][place2 >> bits]]; - BYTE p3 = palookupoffse[3][bufplce[3][place3 >> bits]]; + BYTE p0 = bufplce[0][place0 >> bits]; + BYTE p1 = bufplce[1][place1 >> bits]; + BYTE p2 = bufplce[2][place2 >> bits]; + BYTE p3 = bufplce[3][place3 >> bits]; - local_vplce[0] = place0 + local_vince[0]; - local_vplce[1] = place1 + local_vince[1]; - local_vplce[2] = place2 + local_vince[2]; - local_vplce[3] = place3 + local_vince[3]; + local_vplce[0] = place0 + local_vince[0]; + local_vplce[1] = place1 + local_vince[1]; + local_vplce[2] = place2 + local_vince[2]; + local_vplce[3] = place3 + local_vince[3]; - __m128i fg = _mm_set_epi32(palette[p3], palette[p2], palette[p1], palette[p0]); - __m128i fg_hi = _mm_unpackhi_epi8(fg, _mm_setzero_si128()); - __m128i fg_lo = _mm_unpacklo_epi8(fg, _mm_setzero_si128()); - fg_hi = _mm_mullo_epi16(fg_hi, mlight_hi); - fg_hi = _mm_srli_epi16(fg_hi, 8); - fg_lo = _mm_mullo_epi16(fg_lo, mlight_lo); - fg_lo = _mm_srli_epi16(fg_lo, 8); - fg = _mm_packus_epi16(fg_lo, fg_hi); - _mm_storeu_si128((__m128i*)dest, fg); - dest += dc_pitch; - } while (--count); + __m128i fg = _mm_set_epi32(palette[p3], palette[p2], palette[p1], palette[p0]); + SSE_SHADE_SIMPLE(fg); + _mm_storeu_si128((__m128i*)dest, fg); + dest += dc_pitch; + } while (--count); + } + else + { + SSE_SHADE_INIT4(light3, light2, light1, light0, shade_constants); + do + { + DWORD place0 = local_vplce[0]; + DWORD place1 = local_vplce[1]; + DWORD place2 = local_vplce[2]; + DWORD place3 = local_vplce[3]; + + BYTE p0 = bufplce[0][place0 >> bits]; + BYTE p1 = bufplce[1][place1 >> bits]; + BYTE p2 = bufplce[2][place2 >> bits]; + BYTE p3 = bufplce[3][place3 >> bits]; + + local_vplce[0] = place0 + local_vince[0]; + local_vplce[1] = place1 + local_vince[1]; + local_vplce[2] = place2 + local_vince[2]; + local_vplce[3] = place3 + local_vince[3]; + + __m128i fg = _mm_set_epi32(palette[p3], palette[p2], palette[p1], palette[p0]); + SSE_SHADE(fg, shade_constants); + _mm_storeu_si128((__m128i*)dest, fg); + dest += dc_pitch; + } while (--count); + } // Is this needed? Global variables makes it tricky to know.. vplce[0] = local_vplce[0]; @@ -3323,13 +3392,14 @@ DWORD mvlinec1_RGBA() int pitch = dc_pitch; uint32_t light = calc_light_multiplier(dc_light); + ShadeConstants shade_constants = dc_shade_constants; do { BYTE pix = source[frac >> bits]; if (pix != 0) { - *dest = shade_pal_index(colormap[pix], light); + *dest = shade_pal_index(pix, light, shade_constants); } frac += fracstep; dest += pitch; @@ -3370,13 +3440,15 @@ void mvlinec4_RGBA() uint32_t light2 = calc_light_multiplier(palookuplight[2]); uint32_t light3 = calc_light_multiplier(palookuplight[3]); + ShadeConstants shade_constants = dc_shade_constants; + do { BYTE pix; - pix = bufplce[0][(place = vplce[0]) >> bits]; if (pix) dest[0] = shade_pal_index(palookupoffse[0][pix], light0); vplce[0] = place + vince[0]; - pix = bufplce[1][(place = vplce[1]) >> bits]; if (pix) dest[1] = shade_pal_index(palookupoffse[1][pix], light1); vplce[1] = place + vince[1]; - pix = bufplce[2][(place = vplce[2]) >> bits]; if (pix) dest[2] = shade_pal_index(palookupoffse[2][pix], light2); vplce[2] = place + vince[2]; - pix = bufplce[3][(place = vplce[3]) >> bits]; if (pix) dest[3] = shade_pal_index(palookupoffse[3][pix], light3); vplce[3] = place + vince[3]; + pix = bufplce[0][(place = vplce[0]) >> bits]; if (pix) dest[0] = shade_pal_index(pix, light0, shade_constants); vplce[0] = place + vince[0]; + pix = bufplce[1][(place = vplce[1]) >> bits]; if (pix) dest[1] = shade_pal_index(pix, light1, shade_constants); vplce[1] = place + vince[1]; + pix = bufplce[2][(place = vplce[2]) >> bits]; if (pix) dest[2] = shade_pal_index(pix, light2, shade_constants); vplce[2] = place + vince[2]; + pix = bufplce[3][(place = vplce[3]) >> bits]; if (pix) dest[3] = shade_pal_index(pix, light3, shade_constants); vplce[3] = place + vince[3]; dest += dc_pitch; } while (--count); } @@ -3393,48 +3465,70 @@ void mvlinec4_RGBA_SSE() uint32_t light2 = calc_light_multiplier(palookuplight[2]); uint32_t light3 = calc_light_multiplier(palookuplight[3]); - __m128i mlight_hi = _mm_set_epi16(256, light1, light1, light1, 256, light0, light0, light0); - __m128i mlight_lo = _mm_set_epi16(256, light3, light3, light3, 256, light2, light2, light2); + ShadeConstants shade_constants = dc_shade_constants; + uint32_t *palette = (uint32_t*)GPalette.BaseColors; DWORD local_vplce[4] = { vplce[0], vplce[1], vplce[2], vplce[3] }; DWORD local_vince[4] = { vince[0], vince[1], vince[2], vince[3] }; - do + if (shade_constants.simple_shade) { - DWORD place0 = local_vplce[0]; - DWORD place1 = local_vplce[1]; - DWORD place2 = local_vplce[2]; - DWORD place3 = local_vplce[3]; + SSE_SHADE_SIMPLE_INIT4(light3, light2, light1, light0); + do + { + DWORD place0 = local_vplce[0]; + DWORD place1 = local_vplce[1]; + DWORD place2 = local_vplce[2]; + DWORD place3 = local_vplce[3]; - BYTE pix0 = bufplce[0][place0 >> bits]; - BYTE pix1 = bufplce[1][place1 >> bits]; - BYTE pix2 = bufplce[2][place2 >> bits]; - BYTE pix3 = bufplce[3][place3 >> bits]; + BYTE pix0 = bufplce[0][place0 >> bits]; + BYTE pix1 = bufplce[1][place1 >> bits]; + BYTE pix2 = bufplce[2][place2 >> bits]; + BYTE pix3 = bufplce[3][place3 >> bits]; - // movemask = !(pix == 0) - __m128i movemask = _mm_xor_si128(_mm_cmpeq_epi32(_mm_set_epi32(pix3, pix2, pix1, pix0), _mm_setzero_si128()), _mm_cmpeq_epi32(_mm_setzero_si128(), _mm_setzero_si128())); + // movemask = !(pix == 0) + __m128i movemask = _mm_xor_si128(_mm_cmpeq_epi32(_mm_set_epi32(pix3, pix2, pix1, pix0), _mm_setzero_si128()), _mm_cmpeq_epi32(_mm_setzero_si128(), _mm_setzero_si128())); - BYTE p0 = palookupoffse[0][pix0]; - BYTE p1 = palookupoffse[1][pix1]; - BYTE p2 = palookupoffse[2][pix2]; - BYTE p3 = palookupoffse[3][pix3]; + local_vplce[0] = place0 + local_vince[0]; + local_vplce[1] = place1 + local_vince[1]; + local_vplce[2] = place2 + local_vince[2]; + local_vplce[3] = place3 + local_vince[3]; - local_vplce[0] = place0 + local_vince[0]; - local_vplce[1] = place1 + local_vince[1]; - local_vplce[2] = place2 + local_vince[2]; - local_vplce[3] = place3 + local_vince[3]; + __m128i fg = _mm_set_epi32(palette[pix3], palette[pix2], palette[pix1], palette[pix0]); + SSE_SHADE_SIMPLE(fg); + _mm_maskmoveu_si128(fg, movemask, (char*)dest); + dest += dc_pitch; + } while (--count); + } + else + { + SSE_SHADE_INIT4(light3, light2, light1, light0, shade_constants); + do + { + DWORD place0 = local_vplce[0]; + DWORD place1 = local_vplce[1]; + DWORD place2 = local_vplce[2]; + DWORD place3 = local_vplce[3]; - __m128i fg = _mm_set_epi32(palette[p3], palette[p2], palette[p1], palette[p0]); - __m128i fg_hi = _mm_unpackhi_epi8(fg, _mm_setzero_si128()); - __m128i fg_lo = _mm_unpacklo_epi8(fg, _mm_setzero_si128()); - fg_hi = _mm_mullo_epi16(fg_hi, mlight_hi); - fg_hi = _mm_srli_epi16(fg_hi, 8); - fg_lo = _mm_mullo_epi16(fg_lo, mlight_lo); - fg_lo = _mm_srli_epi16(fg_lo, 8); - fg = _mm_packus_epi16(fg_lo, fg_hi); - _mm_maskmoveu_si128(fg, movemask, (char*)dest); - dest += dc_pitch; - } while (--count); + BYTE pix0 = bufplce[0][place0 >> bits]; + BYTE pix1 = bufplce[1][place1 >> bits]; + BYTE pix2 = bufplce[2][place2 >> bits]; + BYTE pix3 = bufplce[3][place3 >> bits]; + + // movemask = !(pix == 0) + __m128i movemask = _mm_xor_si128(_mm_cmpeq_epi32(_mm_set_epi32(pix3, pix2, pix1, pix0), _mm_setzero_si128()), _mm_cmpeq_epi32(_mm_setzero_si128(), _mm_setzero_si128())); + + local_vplce[0] = place0 + local_vince[0]; + local_vplce[1] = place1 + local_vince[1]; + local_vplce[2] = place2 + local_vince[2]; + local_vplce[3] = place3 + local_vince[3]; + + __m128i fg = _mm_set_epi32(palette[pix3], palette[pix2], palette[pix1], palette[pix0]); + SSE_SHADE(fg, shade_constants); + _mm_maskmoveu_si128(fg, movemask, (char*)dest); + dest += dc_pitch; + } while (--count); + } // Is this needed? Global variables makes it tricky to know.. vplce[0] = local_vplce[0]; @@ -3503,7 +3597,7 @@ void R_DrawFogBoundary_C (int x1, int x2, short *uclip, short *dclip) clearbufshort (spanend+t2, b2-t2, x); } - R_SetColorMapLight(basecolormapdata, (float)light, wallshade); + R_SetColorMapLight(basecolormap, (float)light, wallshade); for (--x; x >= x1; --x) { @@ -3528,7 +3622,7 @@ void R_DrawFogBoundary_C (int x1, int x2, short *uclip, short *dclip) clearbufshort (spanend+t2, b2-t2, x); } rcolormap = lcolormap; - R_SetColorMapLight(basecolormapdata, (float)light, wallshade); + R_SetColorMapLight(basecolormap, (float)light, wallshade); } else { @@ -3578,6 +3672,7 @@ static void R_DrawFogBoundarySection_RGBA(int y, int y2, int x1) uint32_t *dest = ylookup[y] + (uint32_t*)dc_destorg; uint32_t light = calc_light_multiplier(dc_light); + ShadeConstants constants = dc_shade_constants; for (; y < y2; ++y) { @@ -3585,7 +3680,37 @@ static void R_DrawFogBoundarySection_RGBA(int y, int y2, int x1) int x = x1; do { - dest[x] = shade_pal_index(colormap[dest[x]], light); + uint32_t red = (dest[x] >> 16) & 0xff; + uint32_t green = (dest[x] >> 8) & 0xff; + uint32_t blue = dest[x] & 0xff; + + if (constants.simple_shade) + { + red = red * light / 256; + green = green * light / 256; + blue = blue * light / 256; + } + else + { + uint32_t inv_light = 256 - light; + uint32_t inv_desaturate = 256 - constants.desaturate; + + uint32_t intensity = ((red * 77 + green * 143 + blue * 37) >> 8) * constants.desaturate; + + red = (red * inv_desaturate + intensity) / 256; + green = (green * inv_desaturate + intensity) / 256; + blue = (blue * inv_desaturate + intensity) / 256; + + red = (constants.fade_red * inv_light + red * light) / 256; + green = (constants.fade_green * inv_light + green * light) / 256; + blue = (constants.fade_blue * inv_light + blue * light) / 256; + + red = (red * constants.light_red) / 256; + green = (green * constants.light_green) / 256; + blue = (blue * constants.light_blue) / 256; + } + + dest[x] = 0xff000000 | (red << 16) | (green << 8) | blue; } while (++x <= x2); dest += dc_pitch; } @@ -3598,10 +3723,41 @@ static void R_DrawFogBoundaryLine_RGBA(int y, int x) uint32_t *dest = ylookup[y] + (uint32_t*)dc_destorg; uint32_t light = calc_light_multiplier(dc_light); + ShadeConstants constants = dc_shade_constants; do { - dest[x] = shade_pal_index(colormap[dest[x]], light); + uint32_t red = (dest[x] >> 16) & 0xff; + uint32_t green = (dest[x] >> 8) & 0xff; + uint32_t blue = dest[x] & 0xff; + + if (constants.simple_shade) + { + red = red * light / 256; + green = green * light / 256; + blue = blue * light / 256; + } + else + { + uint32_t inv_light = 256 - light; + uint32_t inv_desaturate = 256 - constants.desaturate; + + uint32_t intensity = ((red * 77 + green * 143 + blue * 37) >> 8) * constants.desaturate; + + red = (red * inv_desaturate + intensity) / 256; + green = (green * inv_desaturate + intensity) / 256; + blue = (blue * inv_desaturate + intensity) / 256; + + red = (constants.fade_red * inv_light + red * light) / 256; + green = (constants.fade_green * inv_light + green * light) / 256; + blue = (constants.fade_blue * inv_light + blue * light) / 256; + + red = (red * constants.light_red) / 256; + green = (green * constants.light_green) / 256; + blue = (blue * constants.light_blue) / 256; + } + + dest[x] = 0xff000000 | (red << 16) | (green << 8) | blue; } while (++x <= x2); } @@ -3627,7 +3783,9 @@ void R_DrawFogBoundary_RGBA(int x1, int x2, short *uclip, short *dclip) clearbufshort(spanend + t2, b2 - t2, x); } - R_SetColorMapLight(basecolormapdata, (float)light, wallshade); + R_SetColorMapLight(basecolormap, (float)light, wallshade); + + BYTE *fake_dc_colormap = basecolormap->Maps + (GETPALOOKUP(light, wallshade) << COLORMAPSHIFT); for (--x; x >= x1; --x) { @@ -3652,11 +3810,12 @@ void R_DrawFogBoundary_RGBA(int x1, int x2, short *uclip, short *dclip) clearbufshort(spanend + t2, b2 - t2, x); } rcolormap = lcolormap; - R_SetColorMapLight(basecolormapdata, (float)light, wallshade); + R_SetColorMapLight(basecolormap, (float)light, wallshade); + fake_dc_colormap = basecolormap->Maps + (GETPALOOKUP(light, wallshade) << COLORMAPSHIFT); } else { - if (dc_colormap != basecolormapdata) + if (fake_dc_colormap != basecolormapdata) { stop = MIN(t1, b2); while (t2 < stop) @@ -3741,7 +3900,6 @@ fixed_t tmvline1_add_RGBA() { DWORD fracstep = dc_iscale; DWORD frac = dc_texturefrac; - BYTE *colormap = dc_colormap; int count = dc_count; const BYTE *source = dc_source; uint32_t *dest = (uint32_t*)dc_dest; @@ -3749,6 +3907,7 @@ fixed_t tmvline1_add_RGBA() int pitch = dc_pitch; uint32_t light = calc_light_multiplier(dc_light); + ShadeConstants shade_constants = dc_shade_constants; uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); @@ -3758,7 +3917,7 @@ fixed_t tmvline1_add_RGBA() BYTE pix = source[frac >> bits]; if (pix != 0) { - uint32_t fg = shade_pal_index(colormap[pix], light); + uint32_t fg = shade_pal_index(pix, light, shade_constants); uint32_t fg_red = (fg >> 16) & 0xff; uint32_t fg_green = (fg >> 8) & 0xff; uint32_t fg_blue = fg & 0xff; @@ -3825,6 +3984,8 @@ void tmvline4_add_RGBA() light[2] = calc_light_multiplier(palookuplight[2]); light[3] = calc_light_multiplier(palookuplight[3]); + ShadeConstants shade_constants = dc_shade_constants; + uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); @@ -3835,7 +3996,7 @@ void tmvline4_add_RGBA() BYTE pix = bufplce[i][vplce[i] >> bits]; if (pix != 0) { - uint32_t fg = shade_pal_index(palookupoffse[i][pix], light[i]); + uint32_t fg = shade_pal_index(pix, light[i], shade_constants); uint32_t fg_red = (fg >> 16) & 0xff; uint32_t fg_green = (fg >> 8) & 0xff; uint32_t fg_blue = fg & 0xff; @@ -3898,7 +4059,6 @@ fixed_t tmvline1_addclamp_RGBA() { DWORD fracstep = dc_iscale; DWORD frac = dc_texturefrac; - BYTE *colormap = dc_colormap; int count = dc_count; const BYTE *source = dc_source; uint32_t *dest = (uint32_t*)dc_dest; @@ -3906,6 +4066,7 @@ fixed_t tmvline1_addclamp_RGBA() int pitch = dc_pitch; uint32_t light = calc_light_multiplier(dc_light); + ShadeConstants shade_constants = dc_shade_constants; uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); @@ -3915,7 +4076,7 @@ fixed_t tmvline1_addclamp_RGBA() BYTE pix = source[frac >> bits]; if (pix != 0) { - uint32_t fg = shade_pal_index(colormap[pix], light); + uint32_t fg = shade_pal_index(pix, light, shade_constants); uint32_t fg_red = (fg >> 16) & 0xff; uint32_t fg_green = (fg >> 8) & 0xff; uint32_t fg_blue = fg & 0xff; @@ -3981,6 +4142,8 @@ void tmvline4_addclamp_RGBA() light[2] = calc_light_multiplier(palookuplight[2]); light[3] = calc_light_multiplier(palookuplight[3]); + ShadeConstants shade_constants = dc_shade_constants; + uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); @@ -3991,7 +4154,7 @@ void tmvline4_addclamp_RGBA() BYTE pix = bufplce[i][vplce[i] >> bits]; if (pix != 0) { - uint32_t fg = shade_pal_index(palookupoffse[i][pix], light[i]); + uint32_t fg = shade_pal_index(pix, light[i], shade_constants); uint32_t fg_red = (fg >> 16) & 0xff; uint32_t fg_green = (fg >> 8) & 0xff; uint32_t fg_blue = fg & 0xff; @@ -4059,6 +4222,7 @@ fixed_t tmvline1_subclamp_RGBA() int pitch = dc_pitch; uint32_t light = calc_light_multiplier(dc_light); + ShadeConstants shade_constants = dc_shade_constants; uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); @@ -4068,7 +4232,7 @@ fixed_t tmvline1_subclamp_RGBA() BYTE pix = source[frac >> bits]; if (pix != 0) { - uint32_t fg = shade_pal_index(colormap[pix], light); + uint32_t fg = shade_pal_index(pix, light, shade_constants); uint32_t fg_red = (fg >> 16) & 0xff; uint32_t fg_green = (fg >> 8) & 0xff; uint32_t fg_blue = fg & 0xff; @@ -4133,6 +4297,8 @@ void tmvline4_subclamp_RGBA() light[2] = calc_light_multiplier(palookuplight[2]); light[3] = calc_light_multiplier(palookuplight[3]); + ShadeConstants shade_constants = dc_shade_constants; + uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); @@ -4143,7 +4309,7 @@ void tmvline4_subclamp_RGBA() BYTE pix = bufplce[i][vplce[i] >> bits]; if (pix != 0) { - uint32_t fg = shade_pal_index(palookupoffse[i][pix], light[i]); + uint32_t fg = shade_pal_index(pix, light[i], shade_constants); uint32_t fg_red = (fg >> 16) & 0xff; uint32_t fg_green = (fg >> 8) & 0xff; uint32_t fg_blue = fg & 0xff; @@ -4211,6 +4377,7 @@ fixed_t tmvline1_revsubclamp_RGBA() int pitch = dc_pitch; uint32_t light = calc_light_multiplier(dc_light); + ShadeConstants shade_constants = dc_shade_constants; uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); @@ -4220,7 +4387,7 @@ fixed_t tmvline1_revsubclamp_RGBA() BYTE pix = source[frac >> bits]; if (pix != 0) { - uint32_t fg = shade_pal_index(colormap[pix], light); + uint32_t fg = shade_pal_index(pix, light, shade_constants); uint32_t fg_red = (fg >> 16) & 0xff; uint32_t fg_green = (fg >> 8) & 0xff; uint32_t fg_blue = fg & 0xff; @@ -4285,6 +4452,8 @@ void tmvline4_revsubclamp_RGBA() light[2] = calc_light_multiplier(palookuplight[2]); light[3] = calc_light_multiplier(palookuplight[3]); + ShadeConstants shade_constants = dc_shade_constants; + uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); @@ -4295,7 +4464,7 @@ void tmvline4_revsubclamp_RGBA() BYTE pix = bufplce[i][vplce[i] >> bits]; if (pix != 0) { - uint32_t fg = shade_pal_index(palookupoffse[i][pix], light[i]); + uint32_t fg = shade_pal_index(pix, light[i], shade_constants); uint32_t fg_red = (fg >> 16) & 0xff; uint32_t fg_green = (fg >> 8) & 0xff; uint32_t fg_blue = fg & 0xff; @@ -4793,15 +4962,15 @@ ESPSResult R_SetPatchStyle (FRenderStyle style, fixed_t alpha, int translation, colfunc = R_DrawShadedColumn; hcolfunc_post1 = rt_shaded1col; hcolfunc_post4 = rt_shaded4cols; - dc_color = fixedcolormap ? fixedcolormap[APART(color)] : basecolormap->Maps[APART(color)]; - lighttable_t *colormap = (basecolormap = &ShadeFakeColormap[16-alpha])->Maps; + dc_color = fixedcolormap ? fixedcolormap->Maps[APART(color)] : basecolormap->Maps[APART(color)]; + basecolormap = &ShadeFakeColormap[16-alpha]; if (fixedlightlev >= 0 && fixedcolormap == NULL) { - R_SetColorMapLight(colormap, 0, FIXEDLIGHT2SHADE(fixedlightlev)); + R_SetColorMapLight(basecolormap, 0, FIXEDLIGHT2SHADE(fixedlightlev)); } else { - R_SetColorMapLight(colormap, 0, 0); + R_SetColorMapLight(basecolormap, 0, 0); } return r_columnmethod ? DoDraw1 : DoDraw0; } @@ -4827,7 +4996,7 @@ ESPSResult R_SetPatchStyle (FRenderStyle style, fixed_t alpha, int translation, // with the alpha. dc_srccolor = ((((r*x)>>4)<<20) | ((g*x)>>4) | ((((b)*x)>>4)<<10)) & 0x3feffbff; hcolfunc_pre = R_FillColumnHoriz; - R_SetColorMapLight(identitymap, 0, 0); + R_SetColorMapLight(&identitycolormap, 0, 0); } if (!R_SetBlendFunc (style.BlendOp, fglevel, bglevel, style.Flags)) @@ -4871,30 +5040,77 @@ bool R_GetTransMaskDrawers (fixed_t (**tmvline1)(), void (**tmvline4)()) return false; } -void R_SetColorMapLight(BYTE *basecolormapdata, float light, int shade) +void R_SetTranslationMap(lighttable_t *translation) { + dc_fcolormap = nullptr; + dc_shade_constants.light_red = 256; + dc_shade_constants.light_green = 256; + dc_shade_constants.light_blue = 256; + dc_shade_constants.light_alpha = 256; + dc_shade_constants.fade_red = 0; + dc_shade_constants.fade_green = 0; + dc_shade_constants.fade_blue = 0; + dc_shade_constants.fade_alpha = 256; + dc_shade_constants.desaturate = 0; + dc_shade_constants.simple_shade = true; if (r_swtruecolor) { - dc_colormap = basecolormapdata; - dc_light = LIGHTSCALE(light, shade); + dc_colormap = translation; + dc_light = 0; } else { - dc_colormap = basecolormapdata + (GETPALOOKUP(light, shade) << COLORMAPSHIFT); + dc_colormap = translation; dc_light = 0; } } -void R_SetDSColorMapLight(BYTE *basecolormapdata, float light, int shade) +void R_SetColorMapLight(FColormap *base_colormap, float light, int shade) { + dc_fcolormap = base_colormap; + dc_shade_constants.light_red = dc_fcolormap->Color.r * 256 / 255; + dc_shade_constants.light_green = dc_fcolormap->Color.g * 256 / 255; + dc_shade_constants.light_blue = dc_fcolormap->Color.b * 256 / 255; + dc_shade_constants.light_alpha = dc_fcolormap->Color.a * 256 / 255; + dc_shade_constants.fade_red = dc_fcolormap->Fade.r; + dc_shade_constants.fade_green = dc_fcolormap->Fade.g; + dc_shade_constants.fade_blue = dc_fcolormap->Fade.b; + dc_shade_constants.fade_alpha = dc_fcolormap->Fade.a; + dc_shade_constants.desaturate = MIN(std::abs(dc_fcolormap->Desaturate), 255) * 255 / 256; + dc_shade_constants.simple_shade = (dc_fcolormap->Color.d == 0x00ffffff && dc_fcolormap->Fade.d == 0x00000000 && dc_fcolormap->Desaturate == 0); if (r_swtruecolor) { - ds_colormap = basecolormapdata; + dc_colormap = base_colormap->Maps; + dc_light = LIGHTSCALE(light, shade); + } + else + { + dc_colormap = base_colormap->Maps + (GETPALOOKUP(light, shade) << COLORMAPSHIFT); + dc_light = 0; + } +} + +void R_SetDSColorMapLight(FColormap *base_colormap, float light, int shade) +{ + ds_fcolormap = base_colormap; + ds_shade_constants.light_red = ds_fcolormap->Color.r * 256 / 255; + ds_shade_constants.light_green = ds_fcolormap->Color.g * 256 / 255; + ds_shade_constants.light_blue = ds_fcolormap->Color.b * 256 / 255; + ds_shade_constants.light_alpha = ds_fcolormap->Color.a * 256 / 255; + ds_shade_constants.fade_red = ds_fcolormap->Fade.r; + ds_shade_constants.fade_green = ds_fcolormap->Fade.g; + ds_shade_constants.fade_blue = ds_fcolormap->Fade.b; + ds_shade_constants.fade_alpha = ds_fcolormap->Fade.a; + ds_shade_constants.desaturate = MIN(std::abs(ds_fcolormap->Desaturate), 255) * 255 / 256; + ds_shade_constants.simple_shade = (ds_fcolormap->Color.d == 0x00ffffff && ds_fcolormap->Fade.d == 0x00000000 && ds_fcolormap->Desaturate == 0); + if (r_swtruecolor) + { + ds_colormap = base_colormap->Maps; ds_light = LIGHTSCALE(light, shade); } else { - ds_colormap = basecolormapdata + (GETPALOOKUP(light, shade) << COLORMAPSHIFT); + ds_colormap = base_colormap->Maps + (GETPALOOKUP(light, shade) << COLORMAPSHIFT); ds_light = 0; } } diff --git a/src/r_draw.h b/src/r_draw.h index 2eefff9bd..cc3b10935 100644 --- a/src/r_draw.h +++ b/src/r_draw.h @@ -25,11 +25,16 @@ #include "r_defs.h" +struct FColormap; +struct ShadeConstants; + extern "C" int ylookup[MAXHEIGHT]; extern "C" int dc_pitch; // [RH] Distance between rows extern "C" lighttable_t*dc_colormap; +extern "C" FColormap *dc_fcolormap; +extern "C" ShadeConstants dc_shade_constants; extern "C" fixed_t dc_light; extern "C" int dc_x; extern "C" int dc_yl; @@ -93,7 +98,7 @@ extern void (*R_DrawTranslatedColumn)(void); // Span drawing for rows, floor/ceiling. No Spectre effect needed. extern void (*R_DrawSpan)(void); void R_SetupSpanBits(FTexture *tex); -void R_SetSpanColormap(BYTE *colormap); +void R_SetSpanColormap(FDynamicColormap *colormap, int shade); void R_SetSpanSource(const BYTE *pixels); // Span drawing for masked textures. @@ -321,9 +326,10 @@ extern "C" int ds_y; extern "C" int ds_x1; extern "C" int ds_x2; +extern "C" FColormap* ds_fcolormap; extern "C" lighttable_t* ds_colormap; -//extern "C" dsfixed_t ds_light; -#define ds_light dc_light +extern "C" ShadeConstants ds_shade_constants; +extern "C" dsfixed_t ds_light; extern "C" dsfixed_t ds_xfrac; extern "C" dsfixed_t ds_yfrac; @@ -341,6 +347,7 @@ extern "C" int ds_color; // [RH] For flat color (no texturing) extern BYTE shadetables[/*NUMCOLORMAPS*16*256*/]; extern FDynamicColormap ShadeFakeColormap[16]; extern BYTE identitymap[256]; +extern FDynamicColormap identitycolormap; extern BYTE *dc_translation; // [RH] Added for muliresolution support @@ -389,9 +396,11 @@ void maskwallscan (int x1, int x2, short *uwal, short *dwal, float *swal, fixed_ void transmaskwallscan (int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat, const BYTE *(*getcol)(FTexture *tex, int col)=R_GetColumn); // Sets dc_colormap and dc_light to their appropriate values depending on the output format (pal vs true color) -void R_SetColorMapLight(BYTE *base_colormap, float light, int shade); +void R_SetColorMapLight(FColormap *base_colormap, float light, int shade); // Same as R_SetColorMapLight, but for ds_colormap and ds_light -void R_SetDSColorMapLight(BYTE *base_colormap, float light, int shade); +void R_SetDSColorMapLight(FColormap *base_colormap, float light, int shade); + +void R_SetTranslationMap(lighttable_t *translation); #endif diff --git a/src/r_drawt_rgba.cpp b/src/r_drawt_rgba.cpp index d390fc54d..ff5c0d82f 100644 --- a/src/r_drawt_rgba.cpp +++ b/src/r_drawt_rgba.cpp @@ -108,7 +108,6 @@ void rt_copy4cols_RGBA_c (int sx, int yl, int yh) // Maps one span at hx to the screen at sx. void rt_map1col_RGBA_c (int hx, int sx, int yl, int yh) { - BYTE *colormap; uint32_t *source; uint32_t *dest; int count; @@ -120,14 +119,14 @@ void rt_map1col_RGBA_c (int hx, int sx, int yl, int yh) count++; uint32_t light = calc_light_multiplier(dc_light); + ShadeConstants shade_constants = dc_shade_constants; - colormap = dc_colormap; dest = ylookup[yl] + sx + (uint32_t*)dc_destorg; source = &dc_temp_rgba[yl*4 + hx]; pitch = dc_pitch; if (count & 1) { - *dest = shade_pal_index(colormap[*source], light); + *dest = shade_pal_index(*source, light, shade_constants); source += 4; dest += pitch; } @@ -135,8 +134,8 @@ void rt_map1col_RGBA_c (int hx, int sx, int yl, int yh) return; do { - dest[0] = shade_pal_index(colormap[source[0]], light); - dest[pitch] = shade_pal_index(colormap[source[4]], light); + dest[0] = shade_pal_index(source[0], light, shade_constants); + dest[pitch] = shade_pal_index(source[4], light, shade_constants); source += 8; dest += pitch*2; } while (--count); @@ -145,7 +144,6 @@ void rt_map1col_RGBA_c (int hx, int sx, int yl, int yh) // Maps all four spans to the screen starting at sx. void rt_map4cols_RGBA_c (int sx, int yl, int yh) { - BYTE *colormap; uint32_t *source; uint32_t *dest; int count; @@ -157,17 +155,17 @@ void rt_map4cols_RGBA_c (int sx, int yl, int yh) count++; uint32_t light = calc_light_multiplier(dc_light); + ShadeConstants shade_constants = dc_shade_constants; - colormap = dc_colormap; dest = ylookup[yl] + sx + (uint32_t*)dc_destorg; source = &dc_temp_rgba[yl*4]; pitch = dc_pitch; if (count & 1) { - dest[0] = shade_pal_index(colormap[source[0]], light); - dest[1] = shade_pal_index(colormap[source[1]], light); - dest[2] = shade_pal_index(colormap[source[2]], light); - dest[3] = shade_pal_index(colormap[source[3]], light); + dest[0] = shade_pal_index(source[0], light, shade_constants); + dest[1] = shade_pal_index(source[1], light, shade_constants); + dest[2] = shade_pal_index(source[2], light, shade_constants); + dest[3] = shade_pal_index(source[3], light, shade_constants); source += 4; dest += pitch; } @@ -175,14 +173,14 @@ void rt_map4cols_RGBA_c (int sx, int yl, int yh) return; do { - dest[0] = shade_pal_index(colormap[source[0]], light); - dest[1] = shade_pal_index(colormap[source[1]], light); - dest[2] = shade_pal_index(colormap[source[2]], light); - dest[3] = shade_pal_index(colormap[source[3]], light); - dest[pitch] = shade_pal_index(colormap[source[4]], light); - dest[pitch + 1] = shade_pal_index(colormap[source[5]], light); - dest[pitch + 2] = shade_pal_index(colormap[source[6]], light); - dest[pitch + 3] = shade_pal_index(colormap[source[7]], light); + dest[0] = shade_pal_index(source[0], light, shade_constants); + dest[1] = shade_pal_index(source[1], light, shade_constants); + dest[2] = shade_pal_index(source[2], light, shade_constants); + dest[3] = shade_pal_index(source[3], light, shade_constants); + dest[pitch] = shade_pal_index(source[4], light, shade_constants); + dest[pitch + 1] = shade_pal_index(source[5], light, shade_constants); + dest[pitch + 2] = shade_pal_index(source[6], light, shade_constants); + dest[pitch + 3] = shade_pal_index(source[7], light, shade_constants); source += 8; dest += pitch*2; } while (--count); @@ -191,7 +189,6 @@ void rt_map4cols_RGBA_c (int sx, int yl, int yh) // Maps all four spans to the screen starting at sx. void rt_map4cols_RGBA_SSE(int sx, int yl, int yh) { - BYTE *colormap; uint32_t *source; uint32_t *dest; int count; @@ -202,82 +199,114 @@ void rt_map4cols_RGBA_SSE(int sx, int yl, int yh) return; count++; + ShadeConstants shade_constants = dc_shade_constants; uint32_t light = calc_light_multiplier(dc_light); uint32_t *palette = (uint32_t*)GPalette.BaseColors; - colormap = dc_colormap; dest = ylookup[yl] + sx + (uint32_t*)dc_destorg; source = &dc_temp_rgba[yl * 4]; pitch = dc_pitch; - __m128i mlight = _mm_set_epi16(256, light, light, light, 256, light, light, light); + if (shade_constants.simple_shade) + { + SSE_SHADE_SIMPLE_INIT(light); - if (count & 1) { - uint32_t p0 = colormap[source[0]]; - uint32_t p1 = colormap[source[1]]; - uint32_t p2 = colormap[source[2]]; - uint32_t p3 = colormap[source[3]]; - - // shade_pal_index: - __m128i fg = _mm_set_epi32(palette[p3], palette[p2], palette[p1], palette[p0]); - __m128i fg_hi = _mm_unpackhi_epi8(fg, _mm_setzero_si128()); - __m128i fg_lo = _mm_unpacklo_epi8(fg, _mm_setzero_si128()); - fg_hi = _mm_mullo_epi16(fg_hi, mlight); - fg_hi = _mm_srli_epi16(fg_hi, 8); - fg_lo = _mm_mullo_epi16(fg_lo, mlight); - fg_lo = _mm_srli_epi16(fg_lo, 8); - - fg = _mm_packus_epi16(fg_lo, fg_hi); - _mm_storeu_si128((__m128i*)dest, fg); - - source += 4; - dest += pitch; - } - if (!(count >>= 1)) - return; - - do { - // shade_pal_index 0-3 - { - uint32_t p0 = colormap[source[0]]; - uint32_t p1 = colormap[source[1]]; - uint32_t p2 = colormap[source[2]]; - uint32_t p3 = colormap[source[3]]; + if (count & 1) { + uint32_t p0 = source[0]; + uint32_t p1 = source[1]; + uint32_t p2 = source[2]; + uint32_t p3 = source[3]; + // shade_pal_index: __m128i fg = _mm_set_epi32(palette[p3], palette[p2], palette[p1], palette[p0]); - __m128i fg_hi = _mm_unpackhi_epi8(fg, _mm_setzero_si128()); - __m128i fg_lo = _mm_unpacklo_epi8(fg, _mm_setzero_si128()); - fg_hi = _mm_mullo_epi16(fg_hi, mlight); - fg_hi = _mm_srli_epi16(fg_hi, 8); - fg_lo = _mm_mullo_epi16(fg_lo, mlight); - fg_lo = _mm_srli_epi16(fg_lo, 8); - - fg = _mm_packus_epi16(fg_lo, fg_hi); + SSE_SHADE_SIMPLE(fg); _mm_storeu_si128((__m128i*)dest, fg); + + source += 4; + dest += pitch; } + if (!(count >>= 1)) + return; - // shade_pal_index 4-7 (pitch) - { - uint32_t p0 = colormap[source[4]]; - uint32_t p1 = colormap[source[5]]; - uint32_t p2 = colormap[source[6]]; - uint32_t p3 = colormap[source[7]]; + do { + // shade_pal_index 0-3 + { + uint32_t p0 = source[0]; + uint32_t p1 = source[1]; + uint32_t p2 = source[2]; + uint32_t p3 = source[3]; + __m128i fg = _mm_set_epi32(palette[p3], palette[p2], palette[p1], palette[p0]); + SSE_SHADE_SIMPLE(fg); + _mm_storeu_si128((__m128i*)dest, fg); + } + + // shade_pal_index 4-7 (pitch) + { + uint32_t p0 = source[4]; + uint32_t p1 = source[5]; + uint32_t p2 = source[6]; + uint32_t p3 = source[7]; + + __m128i fg = _mm_set_epi32(palette[p3], palette[p2], palette[p1], palette[p0]); + SSE_SHADE_SIMPLE(fg); + _mm_storeu_si128((__m128i*)(dest + pitch), fg); + } + + source += 8; + dest += pitch * 2; + } while (--count); + } + else + { + SSE_SHADE_INIT(light, shade_constants); + + if (count & 1) { + uint32_t p0 = source[0]; + uint32_t p1 = source[1]; + uint32_t p2 = source[2]; + uint32_t p3 = source[3]; + + // shade_pal_index: __m128i fg = _mm_set_epi32(palette[p3], palette[p2], palette[p1], palette[p0]); - __m128i fg_hi = _mm_unpackhi_epi8(fg, _mm_setzero_si128()); - __m128i fg_lo = _mm_unpacklo_epi8(fg, _mm_setzero_si128()); - fg_hi = _mm_mullo_epi16(fg_hi, mlight); - fg_hi = _mm_srli_epi16(fg_hi, 8); - fg_lo = _mm_mullo_epi16(fg_lo, mlight); - fg_lo = _mm_srli_epi16(fg_lo, 8); + SSE_SHADE(fg, shade_constants); + _mm_storeu_si128((__m128i*)dest, fg); - fg = _mm_packus_epi16(fg_lo, fg_hi); - _mm_storeu_si128((__m128i*)(dest + pitch), fg); + source += 4; + dest += pitch; } + if (!(count >>= 1)) + return; - source += 8; - dest += pitch * 2; - } while (--count); + do { + // shade_pal_index 0-3 + { + uint32_t p0 = source[0]; + uint32_t p1 = source[1]; + uint32_t p2 = source[2]; + uint32_t p3 = source[3]; + + __m128i fg = _mm_set_epi32(palette[p3], palette[p2], palette[p1], palette[p0]); + SSE_SHADE(fg, shade_constants); + _mm_storeu_si128((__m128i*)dest, fg); + } + + // shade_pal_index 4-7 (pitch) + { + uint32_t p0 = source[4]; + uint32_t p1 = source[5]; + uint32_t p2 = source[6]; + uint32_t p3 = source[7]; + + __m128i fg = _mm_set_epi32(palette[p3], palette[p2], palette[p1], palette[p0]); + SSE_SHADE(fg, shade_constants); + _mm_storeu_si128((__m128i*)(dest + pitch), fg); + } + + source += 8; + dest += pitch * 2; + } while (--count); + } } void rt_Translate1col_RGBA_c(const BYTE *translation, int hx, int yl, int yh) @@ -385,7 +414,6 @@ void rt_tlate4cols_RGBA_c (int sx, int yl, int yh) // Adds one span at hx to the screen at sx without clamping. void rt_add1col_RGBA_c (int hx, int sx, int yl, int yh) { - BYTE *colormap; uint32_t *source; uint32_t *dest; int count; @@ -399,15 +427,15 @@ void rt_add1col_RGBA_c (int hx, int sx, int yl, int yh) dest = ylookup[yl] + sx + (uint32_t*)dc_destorg; source = &dc_temp_rgba[yl*4 + hx]; pitch = dc_pitch; - colormap = dc_colormap; uint32_t light = calc_light_multiplier(dc_light); + ShadeConstants shade_constants = dc_shade_constants; uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); do { - uint32_t fg = shade_pal_index(colormap[*source], light); + uint32_t fg = shade_pal_index(*source, light, shade_constants); uint32_t fg_red = (fg >> 16) & 0xff; uint32_t fg_green = (fg >> 8) & 0xff; uint32_t fg_blue = fg & 0xff; @@ -430,7 +458,6 @@ void rt_add1col_RGBA_c (int hx, int sx, int yl, int yh) // Adds all four spans to the screen starting at sx without clamping. void rt_add4cols_RGBA_c (int sx, int yl, int yh) { - BYTE *colormap; uint32_t *source; uint32_t *dest; int count; @@ -444,9 +471,9 @@ void rt_add4cols_RGBA_c (int sx, int yl, int yh) dest = ylookup[yl] + sx + (uint32_t*)dc_destorg; source = &dc_temp_rgba[yl*4]; pitch = dc_pitch; - colormap = dc_colormap; uint32_t light = calc_light_multiplier(dc_light); + ShadeConstants shade_constants = dc_shade_constants; uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); @@ -454,7 +481,7 @@ void rt_add4cols_RGBA_c (int sx, int yl, int yh) do { for (int i = 0; i < 4; i++) { - uint32_t fg = shade_pal_index(colormap[source[i]], light); + uint32_t fg = shade_pal_index(source[i], light, shade_constants); uint32_t fg_red = (fg >> 16) & 0xff; uint32_t fg_green = (fg >> 8) & 0xff; uint32_t fg_blue = fg & 0xff; @@ -479,7 +506,6 @@ void rt_add4cols_RGBA_c (int sx, int yl, int yh) #ifndef NO_SSE void rt_add4cols_RGBA_SSE(int sx, int yl, int yh) { - BYTE *colormap; uint32_t *source; uint32_t *dest; int count; @@ -493,7 +519,6 @@ void rt_add4cols_RGBA_SSE(int sx, int yl, int yh) dest = ylookup[yl] + sx + (uint32_t*)dc_destorg; source = &dc_temp_rgba[yl * 4]; pitch = dc_pitch; - colormap = dc_colormap; uint32_t light = calc_light_multiplier(dc_light); uint32_t *palette = (uint32_t*)GPalette.BaseColors; @@ -501,40 +526,80 @@ void rt_add4cols_RGBA_SSE(int sx, int yl, int yh) uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); - __m128i mlight = _mm_set_epi16(256, light, light, light, 256, light, light, light); - __m128i mfg_alpha = _mm_set_epi16(256, fg_alpha, fg_alpha, fg_alpha, 256, fg_alpha, fg_alpha, fg_alpha); - __m128i mbg_alpha = _mm_set_epi16(256, bg_alpha, bg_alpha, bg_alpha, 256, bg_alpha, bg_alpha, bg_alpha); + ShadeConstants shade_constants = dc_shade_constants; - do { - uint32_t p0 = colormap[source[0]]; - uint32_t p1 = colormap[source[1]]; - uint32_t p2 = colormap[source[2]]; - uint32_t p3 = colormap[source[3]]; + if (shade_constants.simple_shade) + { + SSE_SHADE_SIMPLE_INIT(light); - // shade_pal_index: - __m128i fg = _mm_set_epi32(palette[p3], palette[p2], palette[p1], palette[p0]); - __m128i fg_hi = _mm_unpackhi_epi8(fg, _mm_setzero_si128()); - __m128i fg_lo = _mm_unpacklo_epi8(fg, _mm_setzero_si128()); - fg_hi = _mm_mullo_epi16(fg_hi, mlight); - fg_hi = _mm_srli_epi16(fg_hi, 8); - fg_lo = _mm_mullo_epi16(fg_lo, mlight); - fg_lo = _mm_srli_epi16(fg_lo, 8); + __m128i mfg_alpha = _mm_set_epi16(256, fg_alpha, fg_alpha, fg_alpha, 256, fg_alpha, fg_alpha, fg_alpha); + __m128i mbg_alpha = _mm_set_epi16(256, bg_alpha, bg_alpha, bg_alpha, 256, bg_alpha, bg_alpha, bg_alpha); - // unpack bg: - __m128i bg = _mm_loadu_si128((const __m128i*)dest); - __m128i bg_hi = _mm_unpackhi_epi8(bg, _mm_setzero_si128()); - __m128i bg_lo = _mm_unpacklo_epi8(bg, _mm_setzero_si128()); + do { + uint32_t p0 = source[0]; + uint32_t p1 = source[1]; + uint32_t p2 = source[2]; + uint32_t p3 = source[3]; - // (fg_red * fg_alpha + bg_red * bg_alpha) / 256: - __m128i color_hi = _mm_srli_epi16(_mm_adds_epu16(_mm_mullo_epi16(fg_hi, mfg_alpha), _mm_mullo_epi16(bg_hi, mbg_alpha)), 8); - __m128i color_lo = _mm_srli_epi16(_mm_adds_epu16(_mm_mullo_epi16(fg_lo, mfg_alpha), _mm_mullo_epi16(bg_lo, mbg_alpha)), 8); + // shade_pal_index: + __m128i fg = _mm_set_epi32(palette[p3], palette[p2], palette[p1], palette[p0]); + SSE_SHADE_SIMPLE(fg); - __m128i color = _mm_packus_epi16(color_lo, color_hi); - _mm_storeu_si128((__m128i*)dest, color); + __m128i fg_hi = _mm_unpackhi_epi8(fg, _mm_setzero_si128()); + __m128i fg_lo = _mm_unpacklo_epi8(fg, _mm_setzero_si128()); - source += 4; - dest += pitch; - } while (--count); + // unpack bg: + __m128i bg = _mm_loadu_si128((const __m128i*)dest); + __m128i bg_hi = _mm_unpackhi_epi8(bg, _mm_setzero_si128()); + __m128i bg_lo = _mm_unpacklo_epi8(bg, _mm_setzero_si128()); + + // (fg_red * fg_alpha + bg_red * bg_alpha) / 256: + __m128i color_hi = _mm_srli_epi16(_mm_adds_epu16(_mm_mullo_epi16(fg_hi, mfg_alpha), _mm_mullo_epi16(bg_hi, mbg_alpha)), 8); + __m128i color_lo = _mm_srli_epi16(_mm_adds_epu16(_mm_mullo_epi16(fg_lo, mfg_alpha), _mm_mullo_epi16(bg_lo, mbg_alpha)), 8); + + __m128i color = _mm_packus_epi16(color_lo, color_hi); + _mm_storeu_si128((__m128i*)dest, color); + + source += 4; + dest += pitch; + } while (--count); + } + else + { + SSE_SHADE_INIT(light, shade_constants); + + __m128i mfg_alpha = _mm_set_epi16(256, fg_alpha, fg_alpha, fg_alpha, 256, fg_alpha, fg_alpha, fg_alpha); + __m128i mbg_alpha = _mm_set_epi16(256, bg_alpha, bg_alpha, bg_alpha, 256, bg_alpha, bg_alpha, bg_alpha); + + do { + uint32_t p0 = source[0]; + uint32_t p1 = source[1]; + uint32_t p2 = source[2]; + uint32_t p3 = source[3]; + + // shade_pal_index: + __m128i fg = _mm_set_epi32(palette[p3], palette[p2], palette[p1], palette[p0]); + SSE_SHADE(fg, shade_constants); + + __m128i fg_hi = _mm_unpackhi_epi8(fg, _mm_setzero_si128()); + __m128i fg_lo = _mm_unpacklo_epi8(fg, _mm_setzero_si128()); + + // unpack bg: + __m128i bg = _mm_loadu_si128((const __m128i*)dest); + __m128i bg_hi = _mm_unpackhi_epi8(bg, _mm_setzero_si128()); + __m128i bg_lo = _mm_unpacklo_epi8(bg, _mm_setzero_si128()); + + // (fg_red * fg_alpha + bg_red * bg_alpha) / 256: + __m128i color_hi = _mm_srli_epi16(_mm_adds_epu16(_mm_mullo_epi16(fg_hi, mfg_alpha), _mm_mullo_epi16(bg_hi, mbg_alpha)), 8); + __m128i color_lo = _mm_srli_epi16(_mm_adds_epu16(_mm_mullo_epi16(fg_lo, mfg_alpha), _mm_mullo_epi16(bg_lo, mbg_alpha)), 8); + + __m128i color = _mm_packus_epi16(color_lo, color_hi); + _mm_storeu_si128((__m128i*)dest, color); + + source += 4; + dest += pitch; + } while (--count); + } } #endif @@ -571,7 +636,7 @@ void rt_shaded1col_RGBA_c (int hx, int sx, int yl, int yh) source = &dc_temp_rgba[yl*4 + hx]; pitch = dc_pitch; - uint32_t fg = shade_pal_index(dc_color, calc_light_multiplier(dc_light)); + uint32_t fg = shade_pal_index_simple(dc_color, calc_light_multiplier(dc_light)); uint32_t fg_red = (fg >> 16) & 0xff; uint32_t fg_green = (fg >> 8) & 0xff; uint32_t fg_blue = fg & 0xff; @@ -613,7 +678,7 @@ void rt_shaded4cols_RGBA_c (int sx, int yl, int yh) source = &dc_temp_rgba[yl*4]; pitch = dc_pitch; - uint32_t fg = shade_pal_index(dc_color, calc_light_multiplier(dc_light)); + uint32_t fg = shade_pal_index_simple(dc_color, calc_light_multiplier(dc_light)); uint32_t fg_red = (fg >> 16) & 0xff; uint32_t fg_green = (fg >> 8) & 0xff; uint32_t fg_blue = fg & 0xff; @@ -659,7 +724,7 @@ void rt_shaded4cols_RGBA_SSE(int sx, int yl, int yh) source = &dc_temp_rgba[yl * 4]; pitch = dc_pitch; - __m128i fg = _mm_unpackhi_epi8(_mm_set1_epi32(shade_pal_index(dc_color, calc_light_multiplier(dc_light))), _mm_setzero_si128()); + __m128i fg = _mm_unpackhi_epi8(_mm_set1_epi32(shade_pal_index_simple(dc_color, calc_light_multiplier(dc_light))), _mm_setzero_si128()); __m128i alpha_one = _mm_set1_epi16(64); do { @@ -694,7 +759,6 @@ void rt_shaded4cols_RGBA_SSE(int sx, int yl, int yh) // Adds one span at hx to the screen at sx with clamping. void rt_addclamp1col_RGBA_c (int hx, int sx, int yl, int yh) { - BYTE *colormap; uint32_t *source; uint32_t *dest; int count; @@ -708,15 +772,15 @@ void rt_addclamp1col_RGBA_c (int hx, int sx, int yl, int yh) dest = ylookup[yl] + sx + (uint32_t*)dc_destorg; source = &dc_temp_rgba[yl*4 + hx]; pitch = dc_pitch; - colormap = dc_colormap; uint32_t light = calc_light_multiplier(dc_light); + ShadeConstants shade_constants = dc_shade_constants; uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); do { - uint32_t fg = shade_pal_index(colormap[*source], light); + uint32_t fg = shade_pal_index(*source, light, shade_constants); uint32_t fg_red = (fg >> 16) & 0xff; uint32_t fg_green = (fg >> 8) & 0xff; uint32_t fg_blue = fg & 0xff; @@ -738,7 +802,6 @@ void rt_addclamp1col_RGBA_c (int hx, int sx, int yl, int yh) // Adds all four spans to the screen starting at sx with clamping. void rt_addclamp4cols_RGBA_c (int sx, int yl, int yh) { - BYTE *colormap; uint32_t *source; uint32_t *dest; int count; @@ -752,9 +815,9 @@ void rt_addclamp4cols_RGBA_c (int sx, int yl, int yh) dest = ylookup[yl] + sx + (uint32_t*)dc_destorg; source = &dc_temp_rgba[yl*4]; pitch = dc_pitch; - colormap = dc_colormap; uint32_t light = calc_light_multiplier(dc_light); + ShadeConstants shade_constants = dc_shade_constants; uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); @@ -762,7 +825,7 @@ void rt_addclamp4cols_RGBA_c (int sx, int yl, int yh) do { for (int i = 0; i < 4; i++) { - uint32_t fg = shade_pal_index(colormap[source[i]], light); + uint32_t fg = shade_pal_index(source[i], light, shade_constants); uint32_t fg_red = (fg >> 16) & 0xff; uint32_t fg_green = (fg >> 8) & 0xff; uint32_t fg_blue = fg & 0xff; @@ -786,7 +849,6 @@ void rt_addclamp4cols_RGBA_c (int sx, int yl, int yh) #ifndef NO_SSE void rt_addclamp4cols_RGBA_SSE(int sx, int yl, int yh) { - BYTE *colormap; uint32_t *source; uint32_t *dest; int count; @@ -800,7 +862,6 @@ void rt_addclamp4cols_RGBA_SSE(int sx, int yl, int yh) dest = ylookup[yl] + sx + (uint32_t*)dc_destorg; source = &dc_temp_rgba[yl * 4]; pitch = dc_pitch; - colormap = dc_colormap; uint32_t light = calc_light_multiplier(dc_light); uint32_t *palette = (uint32_t*)GPalette.BaseColors; @@ -808,40 +869,80 @@ void rt_addclamp4cols_RGBA_SSE(int sx, int yl, int yh) uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); - __m128i mlight = _mm_set_epi16(256, light, light, light, 256, light, light, light); - __m128i mfg_alpha = _mm_set_epi16(256, fg_alpha, fg_alpha, fg_alpha, 256, fg_alpha, fg_alpha, fg_alpha); - __m128i mbg_alpha = _mm_set_epi16(256, bg_alpha, bg_alpha, bg_alpha, 256, bg_alpha, bg_alpha, bg_alpha); + ShadeConstants shade_constants = dc_shade_constants; - do { - uint32_t p0 = colormap[source[0]]; - uint32_t p1 = colormap[source[1]]; - uint32_t p2 = colormap[source[2]]; - uint32_t p3 = colormap[source[3]]; + if (shade_constants.simple_shade) + { + SSE_SHADE_SIMPLE_INIT(light); - // shade_pal_index: - __m128i fg = _mm_set_epi32(palette[p3], palette[p2], palette[p1], palette[p0]); - __m128i fg_hi = _mm_unpackhi_epi8(fg, _mm_setzero_si128()); - __m128i fg_lo = _mm_unpacklo_epi8(fg, _mm_setzero_si128()); - fg_hi = _mm_mullo_epi16(fg_hi, mlight); - fg_hi = _mm_srli_epi16(fg_hi, 8); - fg_lo = _mm_mullo_epi16(fg_lo, mlight); - fg_lo = _mm_srli_epi16(fg_lo, 8); + __m128i mfg_alpha = _mm_set_epi16(256, fg_alpha, fg_alpha, fg_alpha, 256, fg_alpha, fg_alpha, fg_alpha); + __m128i mbg_alpha = _mm_set_epi16(256, bg_alpha, bg_alpha, bg_alpha, 256, bg_alpha, bg_alpha, bg_alpha); - // unpack bg: - __m128i bg = _mm_loadu_si128((const __m128i*)dest); - __m128i bg_hi = _mm_unpackhi_epi8(bg, _mm_setzero_si128()); - __m128i bg_lo = _mm_unpacklo_epi8(bg, _mm_setzero_si128()); + do { + uint32_t p0 = source[0]; + uint32_t p1 = source[1]; + uint32_t p2 = source[2]; + uint32_t p3 = source[3]; - // (fg_red * fg_alpha + bg_red * bg_alpha) / 256: - __m128i color_hi = _mm_srli_epi16(_mm_adds_epu16(_mm_mullo_epi16(fg_hi, mfg_alpha), _mm_mullo_epi16(bg_hi, mbg_alpha)), 8); - __m128i color_lo = _mm_srli_epi16(_mm_adds_epu16(_mm_mullo_epi16(fg_lo, mfg_alpha), _mm_mullo_epi16(bg_lo, mbg_alpha)), 8); + // shade_pal_index: + __m128i fg = _mm_set_epi32(palette[p3], palette[p2], palette[p1], palette[p0]); + SSE_SHADE_SIMPLE(fg); - __m128i color = _mm_packus_epi16(color_lo, color_hi); - _mm_storeu_si128((__m128i*)dest, color); + __m128i fg_hi = _mm_unpackhi_epi8(fg, _mm_setzero_si128()); + __m128i fg_lo = _mm_unpacklo_epi8(fg, _mm_setzero_si128()); - source += 4; - dest += pitch; - } while (--count); + // unpack bg: + __m128i bg = _mm_loadu_si128((const __m128i*)dest); + __m128i bg_hi = _mm_unpackhi_epi8(bg, _mm_setzero_si128()); + __m128i bg_lo = _mm_unpacklo_epi8(bg, _mm_setzero_si128()); + + // (fg_red * fg_alpha + bg_red * bg_alpha) / 256: + __m128i color_hi = _mm_srli_epi16(_mm_adds_epu16(_mm_mullo_epi16(fg_hi, mfg_alpha), _mm_mullo_epi16(bg_hi, mbg_alpha)), 8); + __m128i color_lo = _mm_srli_epi16(_mm_adds_epu16(_mm_mullo_epi16(fg_lo, mfg_alpha), _mm_mullo_epi16(bg_lo, mbg_alpha)), 8); + + __m128i color = _mm_packus_epi16(color_lo, color_hi); + _mm_storeu_si128((__m128i*)dest, color); + + source += 4; + dest += pitch; + } while (--count); + } + else + { + SSE_SHADE_INIT(light, shade_constants); + + __m128i mfg_alpha = _mm_set_epi16(256, fg_alpha, fg_alpha, fg_alpha, 256, fg_alpha, fg_alpha, fg_alpha); + __m128i mbg_alpha = _mm_set_epi16(256, bg_alpha, bg_alpha, bg_alpha, 256, bg_alpha, bg_alpha, bg_alpha); + + do { + uint32_t p0 = source[0]; + uint32_t p1 = source[1]; + uint32_t p2 = source[2]; + uint32_t p3 = source[3]; + + // shade_pal_index: + __m128i fg = _mm_set_epi32(palette[p3], palette[p2], palette[p1], palette[p0]); + SSE_SHADE(fg, shade_constants); + + __m128i fg_hi = _mm_unpackhi_epi8(fg, _mm_setzero_si128()); + __m128i fg_lo = _mm_unpacklo_epi8(fg, _mm_setzero_si128()); + + // unpack bg: + __m128i bg = _mm_loadu_si128((const __m128i*)dest); + __m128i bg_hi = _mm_unpackhi_epi8(bg, _mm_setzero_si128()); + __m128i bg_lo = _mm_unpacklo_epi8(bg, _mm_setzero_si128()); + + // (fg_red * fg_alpha + bg_red * bg_alpha) / 256: + __m128i color_hi = _mm_srli_epi16(_mm_adds_epu16(_mm_mullo_epi16(fg_hi, mfg_alpha), _mm_mullo_epi16(bg_hi, mbg_alpha)), 8); + __m128i color_lo = _mm_srli_epi16(_mm_adds_epu16(_mm_mullo_epi16(fg_lo, mfg_alpha), _mm_mullo_epi16(bg_lo, mbg_alpha)), 8); + + __m128i color = _mm_packus_epi16(color_lo, color_hi); + _mm_storeu_si128((__m128i*)dest, color); + + source += 4; + dest += pitch; + } while (--count); + } } #endif @@ -862,7 +963,6 @@ void rt_tlateaddclamp4cols_RGBA_c (int sx, int yl, int yh) // Subtracts one span at hx to the screen at sx with clamping. void rt_subclamp1col_RGBA_c (int hx, int sx, int yl, int yh) { - BYTE *colormap; uint32_t *source; uint32_t *dest; int count; @@ -876,15 +976,15 @@ void rt_subclamp1col_RGBA_c (int hx, int sx, int yl, int yh) dest = ylookup[yl] + sx + (uint32_t*)dc_destorg; source = &dc_temp_rgba[yl*4 + hx]; pitch = dc_pitch; - colormap = dc_colormap; uint32_t light = calc_light_multiplier(dc_light); + ShadeConstants shade_constants = dc_shade_constants; uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); do { - uint32_t fg = shade_pal_index(colormap[*source], light); + uint32_t fg = shade_pal_index(*source, light, shade_constants); uint32_t fg_red = (fg >> 16) & 0xff; uint32_t fg_green = (fg >> 8) & 0xff; uint32_t fg_blue = fg & 0xff; @@ -906,7 +1006,6 @@ void rt_subclamp1col_RGBA_c (int hx, int sx, int yl, int yh) // Subtracts all four spans to the screen starting at sx with clamping. void rt_subclamp4cols_RGBA_c (int sx, int yl, int yh) { - BYTE *colormap; uint32_t *source; uint32_t *dest; int count; @@ -920,9 +1019,9 @@ void rt_subclamp4cols_RGBA_c (int sx, int yl, int yh) dest = ylookup[yl] + sx + (uint32_t*)dc_destorg; source = &dc_temp_rgba[yl*4]; pitch = dc_pitch; - colormap = dc_colormap; uint32_t light = calc_light_multiplier(dc_light); + ShadeConstants shade_constants = dc_shade_constants; uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); @@ -930,7 +1029,7 @@ void rt_subclamp4cols_RGBA_c (int sx, int yl, int yh) do { for (int i = 0; i < 4; i++) { - uint32_t fg = shade_pal_index(colormap[source[i]], light); + uint32_t fg = shade_pal_index(source[i], light, shade_constants); uint32_t fg_red = (fg >> 16) & 0xff; uint32_t fg_green = (fg >> 8) & 0xff; uint32_t fg_blue = fg & 0xff; @@ -968,7 +1067,6 @@ void rt_tlatesubclamp4cols_RGBA_c (int sx, int yl, int yh) // Subtracts one span at hx from the screen at sx with clamping. void rt_revsubclamp1col_RGBA_c (int hx, int sx, int yl, int yh) { - BYTE *colormap; uint32_t *source; uint32_t *dest; int count; @@ -982,15 +1080,15 @@ void rt_revsubclamp1col_RGBA_c (int hx, int sx, int yl, int yh) dest = ylookup[yl] + sx + (uint32_t*)dc_destorg; source = &dc_temp_rgba[yl*4 + hx]; pitch = dc_pitch; - colormap = dc_colormap; uint32_t light = calc_light_multiplier(dc_light); + ShadeConstants shade_constants = dc_shade_constants; uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); do { - uint32_t fg = shade_pal_index(colormap[*source], light); + uint32_t fg = shade_pal_index(*source, light, shade_constants); uint32_t fg_red = (fg >> 16) & 0xff; uint32_t fg_green = (fg >> 8) & 0xff; uint32_t fg_blue = fg & 0xff; @@ -1012,7 +1110,6 @@ void rt_revsubclamp1col_RGBA_c (int hx, int sx, int yl, int yh) // Subtracts all four spans from the screen starting at sx with clamping. void rt_revsubclamp4cols_RGBA_c (int sx, int yl, int yh) { - BYTE *colormap; uint32_t *source; uint32_t *dest; int count; @@ -1026,9 +1123,9 @@ void rt_revsubclamp4cols_RGBA_c (int sx, int yl, int yh) dest = ylookup[yl] + sx + (uint32_t*)dc_destorg; source = &dc_temp_rgba[yl*4]; pitch = dc_pitch; - colormap = dc_colormap; uint32_t light = calc_light_multiplier(dc_light); + ShadeConstants shade_constants = dc_shade_constants; uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); @@ -1036,7 +1133,7 @@ void rt_revsubclamp4cols_RGBA_c (int sx, int yl, int yh) do { for (int i = 0; i < 4; i++) { - uint32_t fg = shade_pal_index(colormap[source[i]], light); + uint32_t fg = shade_pal_index(source[i], light, shade_constants); uint32_t fg_red = (fg >> 16) & 0xff; uint32_t fg_green = (fg >> 8) & 0xff; uint32_t fg_blue = fg & 0xff; diff --git a/src/r_main.cpp b/src/r_main.cpp index aaf8fc532..a795f8016 100644 --- a/src/r_main.cpp +++ b/src/r_main.cpp @@ -119,7 +119,7 @@ double FocalLengthX; double FocalLengthY; FDynamicColormap*basecolormap; // [RH] colormap currently drawing with int fixedlightlev; -lighttable_t *fixedcolormap; +FColormap *fixedcolormap; FSpecialColormap *realfixedcolormap; double WallTMapScale2; @@ -464,11 +464,11 @@ void R_SetupColormap(player_t *player) // Render everything fullbright. The copy to video memory will // apply the special colormap, so it won't be restricted to the // palette. - fixedcolormap = realcolormaps; + fixedcolormap = &realcolormaps; } else { - fixedcolormap = SpecialColormaps[player->fixedcolormap].Colormap; + fixedcolormap = &SpecialColormaps[player->fixedcolormap]; } } else if (player->fixedlightlevel >= 0 && player->fixedlightlevel < NUMCOLORMAPS) @@ -479,7 +479,7 @@ void R_SetupColormap(player_t *player) // [RH] Inverse light for shooting the Sigil if (fixedcolormap == NULL && extralight == INT_MIN) { - fixedcolormap = SpecialColormaps[INVERSECOLORMAP].Colormap; + fixedcolormap = &SpecialColormaps[INVERSECOLORMAP]; extralight = 0; } } diff --git a/src/r_main.h b/src/r_main.h index e8be3c1a3..0db704df1 100644 --- a/src/r_main.h +++ b/src/r_main.h @@ -90,25 +90,162 @@ extern bool r_dontmaplines; // Converts fixedlightlev into a shade value #define FIXEDLIGHT2SHADE(lightlev) (((lightlev) >> COLORMAPSHIFT) << FRACBITS) +struct ShadeConstants +{ + uint16_t light_alpha; + uint16_t light_red; + uint16_t light_green; + uint16_t light_blue; + uint16_t fade_alpha; + uint16_t fade_red; + uint16_t fade_green; + uint16_t fade_blue; + uint16_t desaturate; + bool simple_shade; +}; + // calculates the light constant passed to the shade_pal_index function inline uint32_t calc_light_multiplier(dsfixed_t light) { return 256 - (light >> (FRACBITS - 8)); } +// Give the compiler a strong hint we want these functions inlined: +#ifndef FORCEINLINE +#if defined(_MSC_VER) +#define FORCEINLINE __forceinline +#elif defined(__GNUC__) +#define FORCEINLINE __attribute__((always_inline)) +#else +#define FORCEINLINE inline +#endif +#endif + // Calculates a ARGB8 color for the given palette index and light multiplier -inline uint32_t shade_pal_index(uint32_t index, uint32_t light) +FORCEINLINE uint32_t shade_pal_index_simple(uint32_t index, uint32_t light) { const PalEntry &color = GPalette.BaseColors[index]; uint32_t red = color.r; uint32_t green = color.g; uint32_t blue = color.b; + red = red * light / 256; green = green * light / 256; blue = blue * light / 256; + return 0xff000000 | (red << 16) | (green << 8) | blue; } +// Calculates a ARGB8 color for the given palette index, light multiplier and dynamic colormap +FORCEINLINE uint32_t shade_pal_index(uint32_t index, uint32_t light, const ShadeConstants &constants) +{ + const PalEntry &color = GPalette.BaseColors[index]; + uint32_t red = color.r; + uint32_t green = color.g; + uint32_t blue = color.b; + if (constants.simple_shade) + { + red = red * light / 256; + green = green * light / 256; + blue = blue * light / 256; + } + else + { + uint32_t inv_light = 256 - light; + uint32_t inv_desaturate = 256 - constants.desaturate; + + uint32_t intensity = ((red * 77 + green * 143 + blue * 37) >> 8) * constants.desaturate; + + red = (red * inv_desaturate + intensity) / 256; + green = (green * inv_desaturate + intensity) / 256; + blue = (blue * inv_desaturate + intensity) / 256; + + red = (constants.fade_red * inv_light + red * light) / 256; + green = (constants.fade_green * inv_light + green * light) / 256; + blue = (constants.fade_blue * inv_light + blue * light) / 256; + + red = (red * constants.light_red) / 256; + green = (green * constants.light_green) / 256; + blue = (blue * constants.light_blue) / 256; + } + return 0xff000000 | (red << 16) | (green << 8) | blue; +} + +// Calculate constants for a simple shade +#define SSE_SHADE_SIMPLE_INIT(light) \ + __m128i mlight_hi = _mm_set_epi16(256, light, light, light, 256, light, light, light); \ + __m128i mlight_lo = mlight_hi; + +// Calculate constants for a simple shade with different light levels for each pixel +#define SSE_SHADE_SIMPLE_INIT4(light3, light2, light1, light0) \ + __m128i mlight_hi = _mm_set_epi16(256, light1, light1, light1, 256, light0, light0, light0); \ + __m128i mlight_lo = _mm_set_epi16(256, light3, light3, light3, 256, light2, light2, light2); + +// Simple shade 4 pixels +#define SSE_SHADE_SIMPLE(fg) { \ + __m128i fg_hi = _mm_unpackhi_epi8(fg, _mm_setzero_si128()); \ + __m128i fg_lo = _mm_unpacklo_epi8(fg, _mm_setzero_si128()); \ + fg_hi = _mm_mullo_epi16(fg_hi, mlight_hi); \ + fg_hi = _mm_srli_epi16(fg_hi, 8); \ + fg_lo = _mm_mullo_epi16(fg_lo, mlight_lo); \ + fg_lo = _mm_srli_epi16(fg_lo, 8); \ + fg = _mm_packus_epi16(fg_lo, fg_hi); \ +} + +// Calculate constants for a complex shade +#define SSE_SHADE_INIT(light, shade_constants) \ + __m128i mlight_hi = _mm_set_epi16(256, light, light, light, 256, light, light, light); \ + __m128i mlight_lo = mlight_hi; \ + __m128i color = _mm_set_epi16( \ + shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue, \ + shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue); \ + __m128i fade = _mm_set_epi16( \ + shade_constants.fade_alpha, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue, \ + shade_constants.fade_alpha, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue); \ + __m128i fade_amount_hi = _mm_mullo_epi16(fade, _mm_subs_epu16(_mm_set1_epi16(256), mlight_hi)); \ + __m128i fade_amount_lo = fade_amount_hi; \ + __m128i inv_desaturate = _mm_set1_epi16(256 - shade_constants.desaturate); \ + +// Calculate constants for a complex shade with different light levels for each pixel +#define SSE_SHADE_INIT4(light3, light2, light1, light0, shade_constants) \ + __m128i mlight_hi = _mm_set_epi16(256, light1, light1, light1, 256, light0, light0, light0); \ + __m128i mlight_lo = _mm_set_epi16(256, light3, light3, light3, 256, light2, light2, light2); \ + __m128i color = _mm_set_epi16( \ + shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue, \ + shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue); \ + __m128i fade = _mm_set_epi16( \ + shade_constants.fade_alpha, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue, \ + shade_constants.fade_alpha, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue); \ + __m128i fade_amount_hi = _mm_mullo_epi16(fade, _mm_subs_epu16(_mm_set1_epi16(256), mlight_hi)); \ + __m128i fade_amount_lo = _mm_mullo_epi16(fade, _mm_subs_epu16(_mm_set1_epi16(256), mlight_lo)); \ + __m128i inv_desaturate = _mm_set1_epi16(256 - shade_constants.desaturate); \ + +// Complex shade 4 pixels +#define SSE_SHADE(fg, shade_constants) { \ + __m128i fg_hi = _mm_unpackhi_epi8(fg, _mm_setzero_si128()); \ + __m128i fg_lo = _mm_unpacklo_epi8(fg, _mm_setzero_si128()); \ + \ + __m128i intensity_hi = _mm_mullo_epi16(fg_hi, _mm_set_epi16(0, 77, 143, 37, 0, 77, 143, 37)); \ + uint16_t intensity_hi0 = ((intensity_hi.m128i_u16[2] + intensity_hi.m128i_u16[1] + intensity_hi.m128i_u16[0]) >> 8) * shade_constants.desaturate; \ + uint16_t intensity_hi1 = ((intensity_hi.m128i_u16[6] + intensity_hi.m128i_u16[5] + intensity_hi.m128i_u16[4]) >> 8) * shade_constants.desaturate; \ + intensity_hi = _mm_set_epi16(intensity_hi1, intensity_hi1, intensity_hi1, intensity_hi1, intensity_hi0, intensity_hi0, intensity_hi0, intensity_hi0); \ + \ + fg_hi = _mm_srli_epi16(_mm_adds_epu16(_mm_mullo_epi16(fg_hi, inv_desaturate), intensity_hi), 8); \ + fg_hi = _mm_srli_epi16(_mm_adds_epu16(_mm_mullo_epi16(fg_hi, mlight_hi), fade_amount_hi), 8); \ + fg_hi = _mm_srli_epi16(_mm_mullo_epi16(fg_hi, color), 8); \ + \ + __m128i intensity_lo = _mm_mullo_epi16(fg_lo, _mm_set_epi16(0, 77, 143, 37, 0, 77, 143, 37)); \ + uint16_t intensity_lo0 = ((intensity_lo.m128i_u16[2] + intensity_lo.m128i_u16[1] + intensity_lo.m128i_u16[0]) >> 8) * shade_constants.desaturate; \ + uint16_t intensity_lo1 = ((intensity_lo.m128i_u16[6] + intensity_lo.m128i_u16[5] + intensity_lo.m128i_u16[4]) >> 8) * shade_constants.desaturate; \ + intensity_lo = _mm_set_epi16(intensity_lo1, intensity_lo1, intensity_lo1, intensity_lo1, intensity_lo0, intensity_lo0, intensity_lo0, intensity_lo0); \ + \ + fg_lo = _mm_srli_epi16(_mm_adds_epu16(_mm_mullo_epi16(fg_lo, inv_desaturate), intensity_lo), 8); \ + fg_lo = _mm_srli_epi16(_mm_adds_epu16(_mm_mullo_epi16(fg_lo, mlight_lo), fade_amount_lo), 8); \ + fg_lo = _mm_srli_epi16(_mm_mullo_epi16(fg_lo, color), 8); \ + \ + fg = _mm_packus_epi16(fg_lo, fg_hi); \ +} + extern bool r_swtruecolor; extern double GlobVis; @@ -125,7 +262,7 @@ extern double r_SpriteVisibility; extern int r_actualextralight; extern bool foggy; extern int fixedlightlev; -extern lighttable_t* fixedcolormap; +extern FColormap* fixedcolormap; extern FSpecialColormap*realfixedcolormap; diff --git a/src/r_plane.cpp b/src/r_plane.cpp index 9805ab200..26d579d6d 100644 --- a/src/r_plane.cpp +++ b/src/r_plane.cpp @@ -227,7 +227,7 @@ void R_MapPlane (int y, int x1) if (plane_shade) { // Determine lighting based on the span's distance from the viewer. - R_SetDSColorMapLight(basecolormap->Maps, GlobVis * fabs(CenterY - y), planeshade); + R_SetDSColorMapLight(basecolormap, GlobVis * fabs(CenterY - y), planeshade); } #ifdef X86_ASM @@ -616,7 +616,7 @@ void R_MapColoredPlane_RGBA(int y, int x1) uint32_t *dest = ylookup[y] + x1 + (uint32_t*)dc_destorg; int count = (spanend[y] - x1 + 1); uint32_t light = calc_light_multiplier(ds_light); - uint32_t color = shade_pal_index(ds_color, light); + uint32_t color = shade_pal_index_simple(ds_color, light); for (int i = 0; i < count; i++) dest[i] = color; } @@ -1598,7 +1598,7 @@ void R_DrawSkyPlane (visplane_t *pl) else { fakefixed = true; - fixedcolormap = NormalLight.Maps; + fixedcolormap = &NormalLight; R_SetColorMapLight(fixedcolormap, 0, 0); } @@ -1683,7 +1683,7 @@ void R_DrawNormalPlane (visplane_t *pl, double _xscale, double _yscale, fixed_t ds_light = 0; if (fixedlightlev >= 0) { - R_SetDSColorMapLight(basecolormap->Maps, 0, FIXEDLIGHT2SHADE(fixedlightlev)); + R_SetDSColorMapLight(basecolormap, 0, FIXEDLIGHT2SHADE(fixedlightlev)); plane_shade = false; } else if (fixedcolormap) @@ -1860,7 +1860,7 @@ void R_DrawTiltedPlane(visplane_t *pl, double _xscale, double _yscale, fixed_t a ds_light = 0; if (fixedlightlev >= 0) { - R_SetDSColorMapLight(basecolormap->Maps, 0, FIXEDLIGHT2SHADE(fixedlightlev)); + R_SetDSColorMapLight(basecolormap, 0, FIXEDLIGHT2SHADE(fixedlightlev)); plane_shade = false; } else if (fixedcolormap) @@ -1870,7 +1870,7 @@ void R_DrawTiltedPlane(visplane_t *pl, double _xscale, double _yscale, fixed_t a } else { - R_SetDSColorMapLight(basecolormap->Maps, 0, 0); + R_SetDSColorMapLight(basecolormap, 0, 0); plane_shade = true; } diff --git a/src/r_segs.cpp b/src/r_segs.cpp index 43590247e..bd2c7d22b 100644 --- a/src/r_segs.cpp +++ b/src/r_segs.cpp @@ -177,7 +177,7 @@ static void BlastMaskedColumn (void (*blastfunc)(const BYTE *pixels, const FText // calculate lighting if (fixedcolormap == NULL && fixedlightlev < 0) { - R_SetColorMapLight(basecolormap->Maps, rw_light, wallshade); + R_SetColorMapLight(basecolormap, rw_light, wallshade); } dc_iscale = xs_Fix<16>::ToFix(MaskedSWall[dc_x] * MaskedScaleY); @@ -313,7 +313,7 @@ void R_RenderMaskedSegRange (drawseg_t *ds, int x1, int x2) rw_scalestep = ds->iscalestep; if (fixedlightlev >= 0) - R_SetColorMapLight(basecolormap->Maps, 0, FIXEDLIGHT2SHADE(fixedlightlev)); + R_SetColorMapLight(basecolormap, 0, FIXEDLIGHT2SHADE(fixedlightlev)); else if (fixedcolormap != NULL) R_SetColorMapLight(fixedcolormap, 0, 0); @@ -630,7 +630,7 @@ void R_RenderFakeWall(drawseg_t *ds, int x1, int x2, F3DFloor *rover) } if (fixedlightlev >= 0) - R_SetColorMapLight(basecolormap->Maps, 0, FIXEDLIGHT2SHADE(fixedlightlev)); + R_SetColorMapLight(basecolormap, 0, FIXEDLIGHT2SHADE(fixedlightlev)); else if (fixedcolormap != NULL) R_SetColorMapLight(fixedcolormap, 0, 0); @@ -1126,6 +1126,11 @@ void wallscan (int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *l palookuplight[3] = 0; } + if (fixedcolormap) + R_SetColorMapLight(fixedcolormap, 0, 0); + else + R_SetColorMapLight(basecolormap, 0, 0); + for(; (x < x2) && (x & 3); ++x) { light += rw_lightstep; @@ -1137,7 +1142,7 @@ void wallscan (int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *l if (!fixed) { // calculate lighting - R_SetColorMapLight(basecolormapdata, light, wallshade); + R_SetColorMapLight(basecolormap, light, wallshade); } dc_source = getcol (rw_pic, (lwal[x] + xoffset) >> FRACBITS); @@ -1241,7 +1246,7 @@ void wallscan (int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *l if (!fixed) { // calculate lighting - R_SetColorMapLight(basecolormapdata, light, wallshade); + R_SetColorMapLight(basecolormap, light, wallshade); } dc_source = getcol (rw_pic, (lwal[x] + xoffset) >> FRACBITS); @@ -1496,6 +1501,11 @@ void maskwallscan (int x1, int x2, short *uwal, short *dwal, float *swal, fixed_ palookuplight[3] = 0; } + if (fixedcolormap) + R_SetColorMapLight(fixedcolormap, 0, 0); + else + R_SetColorMapLight(basecolormap, 0, 0); + for(; (x < x2) && (((size_t)pixel >> pixelshift) & 3); ++x, pixel += pixelsize) { light += rw_lightstep; @@ -1505,7 +1515,7 @@ void maskwallscan (int x1, int x2, short *uwal, short *dwal, float *swal, fixed_ if (!fixed) { // calculate lighting - R_SetColorMapLight(basecolormapdata, light, wallshade); + R_SetColorMapLight(basecolormap, light, wallshade); } dc_source = getcol (rw_pic, (lwal[x] + xoffset) >> FRACBITS); @@ -1605,7 +1615,7 @@ void maskwallscan (int x1, int x2, short *uwal, short *dwal, float *swal, fixed_ if (!fixed) { // calculate lighting - R_SetColorMapLight(basecolormapdata, light, wallshade); + R_SetColorMapLight(basecolormap, light, wallshade); } dc_source = getcol (rw_pic, (lwal[x] + xoffset) >> FRACBITS); @@ -1690,6 +1700,11 @@ void transmaskwallscan (int x1, int x2, short *uwal, short *dwal, float *swal, f palookuplight[3] = 0; } + if (fixedcolormap) + R_SetColorMapLight(fixedcolormap, 0, 0); + else + R_SetColorMapLight(basecolormap, 0, 0); + for(; (x < x2) && (((size_t)pixel >> pixelshift) & 3); ++x, pixel += pixelsize) { light += rw_lightstep; @@ -1699,7 +1714,7 @@ void transmaskwallscan (int x1, int x2, short *uwal, short *dwal, float *swal, f if (!fixed) { // calculate lighting - R_SetColorMapLight(basecolormapdata, light, wallshade); + R_SetColorMapLight(basecolormap, light, wallshade); } dc_source = getcol (rw_pic, (lwal[x] + xoffset) >> FRACBITS); @@ -1801,7 +1816,7 @@ void transmaskwallscan (int x1, int x2, short *uwal, short *dwal, float *swal, f if (!fixed) { // calculate lighting - R_SetColorMapLight(basecolormapdata, light, wallshade); + R_SetColorMapLight(basecolormap, light, wallshade); } dc_source = getcol (rw_pic, (lwal[x] + xoffset) >> FRACBITS); @@ -1839,7 +1854,7 @@ void R_RenderSegLoop () fixed_t xoffset = rw_offset; if (fixedlightlev >= 0) - R_SetColorMapLight(basecolormap->Maps, 0, FIXEDLIGHT2SHADE(fixedlightlev)); + R_SetColorMapLight(basecolormap, 0, FIXEDLIGHT2SHADE(fixedlightlev)); else if (fixedcolormap != NULL) R_SetColorMapLight(fixedcolormap, 0, 0); @@ -3238,11 +3253,11 @@ static void R_RenderDecal (side_t *wall, DBaseDecal *decal, drawseg_t *clipper, rw_light = rw_lightleft + (x1 - WallC.sx1) * rw_lightstep; if (fixedlightlev >= 0) - R_SetColorMapLight(usecolormap->Maps, 0, FIXEDLIGHT2SHADE(fixedlightlev)); + R_SetColorMapLight(usecolormap, 0, FIXEDLIGHT2SHADE(fixedlightlev)); else if (fixedcolormap != NULL) R_SetColorMapLight(fixedcolormap, 0, 0); else if (!foggy && (decal->RenderFlags & RF_FULLBRIGHT)) - R_SetColorMapLight(usecolormap->Maps, 0, 0); + R_SetColorMapLight(usecolormap, 0, 0); else calclighting = true; @@ -3293,7 +3308,7 @@ static void R_RenderDecal (side_t *wall, DBaseDecal *decal, drawseg_t *clipper, { if (calclighting) { // calculate lighting - R_SetColorMapLight(usecolormap->Maps, rw_light, wallshade); + R_SetColorMapLight(usecolormap, rw_light, wallshade); } R_WallSpriteColumn (R_DrawMaskedColumn); dc_x++; @@ -3303,7 +3318,7 @@ static void R_RenderDecal (side_t *wall, DBaseDecal *decal, drawseg_t *clipper, { if (calclighting) { // calculate lighting - R_SetColorMapLight(usecolormap->Maps, rw_light, wallshade); + R_SetColorMapLight(usecolormap, rw_light, wallshade); } rt_initcols(nullptr); for (int zz = 4; zz; --zz) @@ -3318,7 +3333,7 @@ static void R_RenderDecal (side_t *wall, DBaseDecal *decal, drawseg_t *clipper, { if (calclighting) { // calculate lighting - R_SetColorMapLight(usecolormap->Maps, rw_light, wallshade); + R_SetColorMapLight(usecolormap, rw_light, wallshade); } R_WallSpriteColumn (R_DrawMaskedColumn); dc_x++; diff --git a/src/r_swrenderer.cpp b/src/r_swrenderer.cpp index ee6ac5fed..645741a2a 100644 --- a/src/r_swrenderer.cpp +++ b/src/r_swrenderer.cpp @@ -323,7 +323,7 @@ void FSoftwareRenderer::RenderTextureView (FCanvasTexture *tex, AActor *viewpoin // curse Doom's overuse of global variables in the renderer. // These get clobbered by rendering to a camera texture but they need to be preserved so the final rendering can be done with the correct palette. - unsigned char *savecolormap = fixedcolormap; + FColormap *savecolormap = fixedcolormap; FSpecialColormap *savecm = realfixedcolormap; DAngle savedfov = FieldOfView; diff --git a/src/r_things.cpp b/src/r_things.cpp index 6f8038148..c132cc2fd 100644 --- a/src/r_things.cpp +++ b/src/r_things.cpp @@ -416,7 +416,7 @@ void R_DrawVisSprite (vissprite_t *vis) { // For shaded sprites, R_SetPatchStyle sets a dc_colormap to an alpha table, but // it is the brightest one. We need to get back to the proper light level for // this sprite. - R_SetColorMapLight(dc_colormap, 0, vis->Style.ColormapNum << FRACBITS); + R_SetColorMapLight(dc_fcolormap, 0, vis->Style.ColormapNum << FRACBITS); } if (mode != DontDraw) @@ -538,11 +538,11 @@ void R_DrawWallSprite(vissprite_t *spr) rw_lightstep = float((GlobVis / spr->wallc.sz2 - rw_lightleft) / (spr->wallc.sx2 - spr->wallc.sx1)); rw_light = rw_lightleft + (x1 - spr->wallc.sx1) * rw_lightstep; if (fixedlightlev >= 0) - R_SetColorMapLight(usecolormap->Maps, 0, FIXEDLIGHT2SHADE(fixedlightlev)); + R_SetColorMapLight(usecolormap, 0, FIXEDLIGHT2SHADE(fixedlightlev)); else if (fixedcolormap != NULL) R_SetColorMapLight(fixedcolormap, 0, 0); else if (!foggy && (spr->renderflags & RF_FULLBRIGHT)) - R_SetColorMapLight(usecolormap->Maps, 0, 0); + R_SetColorMapLight(usecolormap, 0, 0); else calclighting = true; @@ -593,7 +593,7 @@ void R_DrawWallSprite(vissprite_t *spr) { if (calclighting) { // calculate lighting - R_SetColorMapLight(usecolormap->Maps, rw_light, shade); + R_SetColorMapLight(usecolormap, rw_light, shade); } if (!R_ClipSpriteColumnWithPortals(spr)) R_WallSpriteColumn(R_DrawMaskedColumn); @@ -604,7 +604,7 @@ void R_DrawWallSprite(vissprite_t *spr) { if (calclighting) { // calculate lighting - R_SetColorMapLight(usecolormap->Maps, rw_light, shade); + R_SetColorMapLight(usecolormap, rw_light, shade); } rt_initcols(nullptr); for (int zz = 4; zz; --zz) @@ -620,7 +620,7 @@ void R_DrawWallSprite(vissprite_t *spr) { if (calclighting) { // calculate lighting - R_SetColorMapLight(usecolormap->Maps, rw_light, shade); + R_SetColorMapLight(usecolormap, rw_light, shade); } if (!R_ClipSpriteColumnWithPortals(spr)) R_WallSpriteColumn(R_DrawMaskedColumn); @@ -680,7 +680,7 @@ void R_DrawVisVoxel(vissprite_t *spr, int minslabz, int maxslabz, short *cliptop // Render the voxel, either directly to the screen or offscreen. R_DrawVoxel(spr->pa.vpos, spr->pa.vang, spr->gpos, spr->Angle, - spr->xscale, FLOAT2FIXED(spr->yscale), spr->voxel, spr->Style.BaseColormap + (spr->Style.ColormapNum << COLORMAPSHIFT), cliptop, clipbot, + spr->xscale, FLOAT2FIXED(spr->yscale), spr->voxel, spr->Style.BaseColormap->Maps + (spr->Style.ColormapNum << COLORMAPSHIFT), cliptop, clipbot, minslabz, maxslabz, flags); // Blend the voxel, if that's what we need to do. @@ -1121,19 +1121,19 @@ void R_ProjectSprite (AActor *thing, int fakeside, F3DFloor *fakefloor, F3DFloor } if (fixedlightlev >= 0) { - vis->Style.BaseColormap = mybasecolormap->Maps; + vis->Style.BaseColormap = mybasecolormap; vis->Style.ColormapNum = fixedlightlev >> COLORMAPSHIFT; } else if (!foggy && ((renderflags & RF_FULLBRIGHT) || (thing->flags5 & MF5_BRIGHT))) { // full bright - vis->Style.BaseColormap = mybasecolormap->Maps; + vis->Style.BaseColormap = mybasecolormap; vis->Style.ColormapNum = 0; } else { // diminished light vis->Style.ColormapNum = GETPALOOKUP( r_SpriteVisibility / MAX(tz, MINZ), spriteshade); - vis->Style.BaseColormap = mybasecolormap->Maps; + vis->Style.BaseColormap = mybasecolormap; } } } @@ -1208,7 +1208,7 @@ static void R_ProjectWallSprite(AActor *thing, const DVector3 &pos, FTextureID p vis->bWallSprite = true; vis->Style.ColormapNum = GETPALOOKUP( r_SpriteVisibility / MAX(tz, MINZ), spriteshade); - vis->Style.BaseColormap = basecolormap->Maps; + vis->Style.BaseColormap = basecolormap; vis->wallc = wallc; } @@ -1428,7 +1428,7 @@ void R_DrawPSprite (pspdef_t* psp, int pspnum, AActor *owner, double sx, double if (realfixedcolormap != NULL) { // fixed color - vis->Style.BaseColormap = realfixedcolormap->Colormap; + vis->Style.BaseColormap = realfixedcolormap; vis->Style.ColormapNum = 0; } else @@ -1439,39 +1439,38 @@ void R_DrawPSprite (pspdef_t* psp, int pspnum, AActor *owner, double sx, double } if (fixedlightlev >= 0) { - vis->Style.BaseColormap = mybasecolormap->Maps; + vis->Style.BaseColormap = mybasecolormap; vis->Style.ColormapNum = fixedlightlev >> COLORMAPSHIFT; } else if (!foggy && psp->state->GetFullbright()) { // full bright - vis->Style.BaseColormap = mybasecolormap->Maps; // [RH] use basecolormap + vis->Style.BaseColormap = mybasecolormap; // [RH] use basecolormap vis->Style.ColormapNum = 0; } else { // local light - vis->Style.BaseColormap = mybasecolormap->Maps; + vis->Style.BaseColormap = mybasecolormap; vis->Style.ColormapNum = GETPALOOKUP(0, spriteshade); } } if (camera->Inventory != NULL) { BYTE oldcolormapnum = vis->Style.ColormapNum; - lighttable_t *oldcolormap = vis->Style.BaseColormap; + FColormap *oldcolormap = vis->Style.BaseColormap; camera->Inventory->AlterWeaponSprite (&vis->Style); if (vis->Style.BaseColormap != oldcolormap || vis->Style.ColormapNum != oldcolormapnum) { // The colormap has changed. Is it one we can easily identify? // If not, then don't bother trying to identify it for // hardware accelerated drawing. - if (vis->Style.BaseColormap < SpecialColormaps[0].Colormap || - vis->Style.BaseColormap > SpecialColormaps.Last().Colormap) + if (vis->Style.BaseColormap < &SpecialColormaps[0] || + vis->Style.BaseColormap > &SpecialColormaps.Last()) { noaccel = true; } // Has the basecolormap changed? If so, we can't hardware accelerate it, // since we don't know what it is anymore. - else if (vis->Style.BaseColormap < mybasecolormap->Maps || - vis->Style.BaseColormap >= mybasecolormap->Maps + NUMCOLORMAPS*256) + else if (vis->Style.BaseColormap != mybasecolormap) { noaccel = true; } @@ -1479,13 +1478,13 @@ void R_DrawPSprite (pspdef_t* psp, int pspnum, AActor *owner, double sx, double } // If we're drawing with a special colormap, but shaders for them are disabled, do // not accelerate. - if (!r_shadercolormaps && (vis->Style.BaseColormap >= SpecialColormaps[0].Colormap && - vis->Style.BaseColormap <= SpecialColormaps.Last().Colormap)) + if (!r_shadercolormaps && (vis->Style.BaseColormap >= &SpecialColormaps[0] && + vis->Style.BaseColormap <= &SpecialColormaps.Last())) { noaccel = true; } // If drawing with a BOOM colormap, disable acceleration. - if (mybasecolormap == &NormalLight && NormalLight.Maps != realcolormaps) + if (mybasecolormap == &NormalLight && NormalLight.Maps != realcolormaps.Maps) { noaccel = true; } @@ -1502,7 +1501,7 @@ void R_DrawPSprite (pspdef_t* psp, int pspnum, AActor *owner, double sx, double else { colormap_to_use = basecolormap; - vis->Style.BaseColormap = basecolormap->Maps; + vis->Style.BaseColormap = basecolormap; vis->Style.ColormapNum = 0; vis->Style.RenderStyle = STYLE_Normal; } @@ -1649,12 +1648,10 @@ void R_DrawRemainingPlayerSprites() FColormapStyle colormapstyle; bool usecolormapstyle = false; - if (vis->Style.BaseColormap >= SpecialColormaps[0].Colormap && - vis->Style.BaseColormap < SpecialColormaps[SpecialColormaps.Size()].Colormap) + if (vis->Style.BaseColormap >= &SpecialColormaps[0] && + vis->Style.BaseColormap < &SpecialColormaps[SpecialColormaps.Size()]) { - // Yuck! There needs to be a better way to store colormaps in the vissprite... :( - ptrdiff_t specialmap = (vis->Style.BaseColormap - SpecialColormaps[0].Colormap) / sizeof(FSpecialColormap) + vis->Style.ColormapNum; - special = &SpecialColormaps[specialmap]; + special = static_cast(vis->Style.BaseColormap); } else if (colormap->Color == PalEntry(255,255,255) && colormap->Desaturate == 0) @@ -1912,7 +1909,7 @@ void R_DrawSprite (vissprite_t *spr) int r1, r2; short topclip, botclip; short *clip1, *clip2; - lighttable_t *colormap = spr->Style.BaseColormap; + FColormap *colormap = spr->Style.BaseColormap; int colormapnum = spr->Style.ColormapNum; F3DFloor *rover; FDynamicColormap *mybasecolormap; @@ -2010,18 +2007,18 @@ void R_DrawSprite (vissprite_t *spr) } if (fixedlightlev >= 0) { - spr->Style.BaseColormap = mybasecolormap->Maps; + spr->Style.BaseColormap = mybasecolormap; spr->Style.ColormapNum = fixedlightlev >> COLORMAPSHIFT; } else if (!foggy && (spr->renderflags & RF_FULLBRIGHT)) { // full bright - spr->Style.BaseColormap = mybasecolormap->Maps; + spr->Style.BaseColormap = mybasecolormap; spr->Style.ColormapNum = 0; } else { // diminished light spriteshade = LIGHT2SHADE(sec->lightlevel + r_actualextralight); - spr->Style.BaseColormap = mybasecolormap->Maps; + spr->Style.BaseColormap = mybasecolormap; spr->Style.ColormapNum = GETPALOOKUP(r_SpriteVisibility / MAX(MINZ, (double)spr->depth), spriteshade); } } @@ -2438,7 +2435,7 @@ void R_ProjectParticle (particle_t *particle, const sector_t *sector, int shade, int x1, x2, y1, y2; vissprite_t* vis; sector_t* heightsec = NULL; - BYTE* map; + FColormap* map; // [ZZ] Particle not visible through the portal plane if (CurrentPortal && !!P_PointOnLineSide(particle->Pos, CurrentPortal->dst)) @@ -2511,7 +2508,7 @@ void R_ProjectParticle (particle_t *particle, const sector_t *sector, int shade, botplane = &heightsec->ceilingplane; toppic = sector->GetTexture(sector_t::ceiling); botpic = heightsec->GetTexture(sector_t::ceiling); - map = heightsec->ColorMap->Maps; + map = heightsec->ColorMap; } else if (fakeside == FAKED_BelowFloor) { @@ -2519,7 +2516,7 @@ void R_ProjectParticle (particle_t *particle, const sector_t *sector, int shade, botplane = §or->floorplane; toppic = heightsec->GetTexture(sector_t::floor); botpic = sector->GetTexture(sector_t::floor); - map = heightsec->ColorMap->Maps; + map = heightsec->ColorMap; } else { @@ -2527,7 +2524,7 @@ void R_ProjectParticle (particle_t *particle, const sector_t *sector, int shade, botplane = &heightsec->floorplane; toppic = heightsec->GetTexture(sector_t::ceiling); botpic = heightsec->GetTexture(sector_t::floor); - map = sector->ColorMap->Maps; + map = sector->ColorMap; } } else @@ -2536,7 +2533,7 @@ void R_ProjectParticle (particle_t *particle, const sector_t *sector, int shade, botplane = §or->floorplane; toppic = sector->GetTexture(sector_t::ceiling); botpic = sector->GetTexture(sector_t::floor); - map = sector->ColorMap->Maps; + map = sector->ColorMap; } if (botpic != skyflatnum && particle->Pos.Z < botplane->ZatPoint (particle->Pos)) @@ -2619,7 +2616,7 @@ void R_DrawParticle_C (vissprite_t *vis) { int spacing; BYTE *dest; - BYTE color = vis->Style.BaseColormap[(vis->Style.ColormapNum << COLORMAPSHIFT) + vis->startfrac]; + BYTE color = vis->Style.BaseColormap->Maps[(vis->Style.ColormapNum << COLORMAPSHIFT) + vis->startfrac]; int yl = vis->y1; int ycount = vis->y2 - yl + 1; int x1 = vis->x1; @@ -2685,7 +2682,7 @@ void R_DrawParticle_RGBA(vissprite_t *vis) { int spacing; uint32_t *dest; - BYTE color = vis->Style.BaseColormap[vis->startfrac]; + BYTE color = vis->Style.BaseColormap->Maps[vis->startfrac]; int yl = vis->y1; int ycount = vis->y2 - yl + 1; int x1 = vis->x1; @@ -2693,7 +2690,7 @@ void R_DrawParticle_RGBA(vissprite_t *vis) R_DrawMaskedSegsBehindParticle(vis); - uint32_t fg = shade_pal_index(color, calc_light_multiplier(LIGHTSCALE(0, vis->Style.ColormapNum << FRACBITS))); + uint32_t fg = shade_pal_index_simple(color, calc_light_multiplier(LIGHTSCALE(0, vis->Style.ColormapNum << FRACBITS))); uint32_t fg_red = (fg >> 16) & 0xff; uint32_t fg_green = (fg >> 8) & 0xff; uint32_t fg_blue = fg & 0xff; diff --git a/src/r_utility.cpp b/src/r_utility.cpp index 71d3f2376..efc901ca5 100644 --- a/src/r_utility.cpp +++ b/src/r_utility.cpp @@ -889,11 +889,11 @@ void R_SetupFrame (AActor *actor) BaseBlendG = GPART(newblend); BaseBlendB = BPART(newblend); BaseBlendA = APART(newblend) / 255.f; - NormalLight.Maps = realcolormaps; + NormalLight.Maps = realcolormaps.Maps; } else { - NormalLight.Maps = realcolormaps + NUMCOLORMAPS*256*newblend; + NormalLight.Maps = realcolormaps.Maps + NUMCOLORMAPS*256*newblend; BaseBlendR = BaseBlendG = BaseBlendB = 0; BaseBlendA = 0.f; } diff --git a/src/v_draw.cpp b/src/v_draw.cpp index ff0427b34..c2dbf31c5 100644 --- a/src/v_draw.cpp +++ b/src/v_draw.cpp @@ -171,14 +171,14 @@ void DCanvas::DrawTextureParms(FTexture *img, DrawParms &parms) if (translation != NULL) { - R_SetColorMapLight((lighttable_t *)translation, 0, 0); + R_SetTranslationMap((lighttable_t *)translation); } else { - R_SetColorMapLight(identitymap, 0, 0); + R_SetTranslationMap(identitymap); } - fixedcolormap = dc_colormap; + fixedcolormap = dc_fcolormap; ESPSResult mode = R_SetPatchStyle (parms.style, parms.Alpha, 0, parms.fillcolor); BYTE *destorgsave = dc_destorg; @@ -1025,7 +1025,7 @@ void DCanvas::PUTTRANSDOT (int xx, int yy, int basecolor, int level) { uint32_t *spot = (uint32_t*)GetBuffer() + oldyyshifted + xx; - uint32_t fg = shade_pal_index(basecolor, calc_light_multiplier(0)); + uint32_t fg = shade_pal_index_simple(basecolor, calc_light_multiplier(0)); uint32_t fg_red = (fg >> 16) & 0xff; uint32_t fg_green = (fg >> 8) & 0xff; uint32_t fg_blue = fg & 0xff; @@ -1394,7 +1394,10 @@ void DCanvas::FillSimplePoly(FTexture *tex, FVector2 *points, int npoints, // Setup constant texture mapping parameters. R_SetupSpanBits(tex); - R_SetSpanColormap(colormap != NULL ? &colormap->Maps[clamp(shade >> FRACBITS, 0, NUMCOLORMAPS-1) * 256] : identitymap); + if (colormap) + R_SetSpanColormap(colormap, clamp(shade >> FRACBITS, 0, NUMCOLORMAPS - 1)); + else + R_SetSpanColormap(&identitycolormap, 0); R_SetSpanSource(tex->GetPixels()); scalex = double(1u << (32 - ds_xbits)) / scalex; scaley = double(1u << (32 - ds_ybits)) / scaley;