diff --git a/src/r_draw.cpp b/src/r_draw.cpp index 984a74f3f..2710b9992 100644 --- a/src/r_draw.cpp +++ b/src/r_draw.cpp @@ -2265,9 +2265,12 @@ const BYTE *R_GetColumn (FTexture *tex, int col) { col = width + (col % width); } - return tex->GetColumn (col, NULL); -} + if (r_swtruecolor) + return (const BYTE *)tex->GetColumnBgra(col, NULL); + else + return tex->GetColumn(col, NULL); +} // [RH] Initialize the column drawer pointers void R_InitColumnDrawers () diff --git a/src/r_draw.h b/src/r_draw.h index bf73c9dfb..3f97a7a65 100644 --- a/src/r_draw.h +++ b/src/r_draw.h @@ -520,6 +520,9 @@ class DrawerCommandQueue std::condition_variable end_condition; int finished_threads = 0; + bool no_threading = false; + DrawerThread single_core_thread; + void StartThreads(); void StopThreads(); @@ -535,11 +538,20 @@ public: template static void QueueCommand(Types &&... args) { - void *ptr = AllocMemory(sizeof(T)); - T *command = new (ptr)T(std::forward(args)...); - if (!command) - return; - Instance()->commands.push_back(command); + auto queue = Instance(); + if (queue->no_threading) + { + T command(std::forward(args)...); + command.Execute(&queue->single_core_thread); + } + else + { + void *ptr = AllocMemory(sizeof(T)); + if (!ptr) + return; + T *command = new (ptr)T(std::forward(args)...); + queue->commands.push_back(command); + } } // Wait until all worker threads finished executing commands diff --git a/src/r_draw_rgba.cpp b/src/r_draw_rgba.cpp index 489716e1f..528c3c986 100644 --- a/src/r_draw_rgba.cpp +++ b/src/r_draw_rgba.cpp @@ -85,6 +85,8 @@ void* DrawerCommandQueue::AllocMemory(size_t size) void DrawerCommandQueue::Finish() { auto queue = Instance(); + if (queue->commands.empty()) + return; // Give worker threads something to do: @@ -190,8 +192,8 @@ class DrawColumnRGBACommand : public DrawerCommand { int dc_count; BYTE *dc_dest; - fixed_t dc_texturefrac; - fixed_t dc_iscale; + DWORD dc_texturefrac; + DWORD dc_iscale; fixed_t dc_light; const BYTE *dc_source; int dc_pitch; @@ -628,8 +630,8 @@ class DrawAddColumnRGBACommand : public DrawerCommand { int dc_count; BYTE *dc_dest; - fixed_t dc_iscale; - fixed_t dc_texturefrac; + DWORD dc_iscale; + DWORD dc_texturefrac; const BYTE *dc_source; int dc_pitch; fixed_t dc_light; @@ -708,8 +710,8 @@ class DrawTranslatedColumnRGBACommand : public DrawerCommand fixed_t dc_light; ShadeConstants dc_shade_constants; BYTE *dc_dest; - fixed_t dc_iscale; - fixed_t dc_texturefrac; + DWORD dc_iscale; + DWORD dc_texturefrac; BYTE *dc_translation; const BYTE *dc_source; int dc_pitch; @@ -769,8 +771,8 @@ class DrawTlatedAddColumnRGBACommand : public DrawerCommand fixed_t dc_light; ShadeConstants dc_shade_constants; BYTE *dc_dest; - fixed_t dc_iscale; - fixed_t dc_texturefrac; + DWORD dc_iscale; + DWORD dc_texturefrac; BYTE *dc_translation; const BYTE *dc_source; int dc_pitch; @@ -845,8 +847,8 @@ class DrawShadedColumnRGBACommand : public DrawerCommand private: int dc_count; BYTE *dc_dest; - fixed_t dc_iscale; - fixed_t dc_texturefrac; + DWORD dc_iscale; + DWORD dc_texturefrac; fixed_t dc_light; const BYTE *dc_source; lighttable_t *dc_colormap; @@ -918,8 +920,8 @@ class DrawAddClampColumnRGBACommand : public DrawerCommand { int dc_count; BYTE *dc_dest; - fixed_t dc_iscale; - fixed_t dc_texturefrac; + DWORD dc_iscale; + DWORD dc_texturefrac; const BYTE *dc_source; int dc_pitch; fixed_t dc_light; @@ -994,8 +996,8 @@ class DrawAddClampTranslatedColumnRGBACommand : public DrawerCommand { int dc_count; BYTE *dc_dest; - fixed_t dc_iscale; - fixed_t dc_texturefrac; + DWORD dc_iscale; + DWORD dc_texturefrac; BYTE *dc_translation; const BYTE *dc_source; int dc_pitch; @@ -1073,8 +1075,8 @@ class DrawSubClampColumnRGBACommand : public DrawerCommand { int dc_count; BYTE *dc_dest; - fixed_t dc_iscale; - fixed_t dc_texturefrac; + DWORD dc_iscale; + DWORD dc_texturefrac; const BYTE *dc_source; int dc_pitch; fixed_t dc_light; @@ -1149,8 +1151,8 @@ class DrawSubClampTranslatedColumnRGBACommand : public DrawerCommand { int dc_count; BYTE *dc_dest; - fixed_t dc_iscale; - fixed_t dc_texturefrac; + DWORD dc_iscale; + DWORD dc_texturefrac; const BYTE *dc_source; int dc_pitch; fixed_t dc_light; @@ -1228,8 +1230,8 @@ class DrawRevSubClampColumnRGBACommand : public DrawerCommand { int dc_count; BYTE *dc_dest; - fixed_t dc_iscale; - fixed_t dc_texturefrac; + DWORD dc_iscale; + DWORD dc_texturefrac; const BYTE *dc_source; int dc_pitch; fixed_t dc_light; @@ -1303,8 +1305,8 @@ class DrawRevSubClampTranslatedColumnRGBACommand : public DrawerCommand { int dc_count; BYTE *dc_dest; - fixed_t dc_iscale; - fixed_t dc_texturefrac; + DWORD dc_iscale; + DWORD dc_texturefrac; const BYTE *dc_source; int dc_pitch; fixed_t dc_light; @@ -1380,7 +1382,7 @@ public: class DrawSpanRGBACommand : public DrawerCommand { - const BYTE *ds_source; + const uint32_t *ds_source; fixed_t ds_xfrac; fixed_t ds_yfrac; fixed_t ds_xstep; @@ -1397,7 +1399,7 @@ class DrawSpanRGBACommand : public DrawerCommand public: DrawSpanRGBACommand() { - ds_source = ::ds_source; + ds_source = (const uint32_t*)::ds_source; ds_xfrac = ::ds_xfrac; ds_yfrac = ::ds_yfrac; ds_xstep = ::ds_xstep; @@ -1423,7 +1425,7 @@ public: dsfixed_t xstep; dsfixed_t ystep; uint32_t* dest; - const BYTE* source = ds_source; + const uint32_t* source = ds_source; int count; int spot; @@ -1450,7 +1452,7 @@ public: spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); // Lookup pixel from flat texture tile - *dest++ = shade_pal_index(source[spot], light, shade_constants); + *dest++ = shade_bgra(source[spot], light, shade_constants); // Next step in u,v. xfrac += xstep; @@ -1469,7 +1471,7 @@ public: spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); // Lookup pixel from flat texture tile - *dest++ = shade_pal_index(source[spot], light, shade_constants); + *dest++ = shade_bgra(source[spot], light, shade_constants); // Next step in u,v. xfrac += xstep; @@ -1488,7 +1490,7 @@ public: dsfixed_t xstep; dsfixed_t ystep; uint32_t* dest; - const BYTE* source = ds_source; + const uint32_t* source = ds_source; int count; int spot; @@ -1598,7 +1600,7 @@ public: spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); // Lookup pixel from flat texture tile - *dest++ = shade_pal_index(source[spot], light, shade_constants); + *dest++ = shade_bgra(source[spot], light, shade_constants); // Next step in u,v. xfrac += xstep; @@ -1617,7 +1619,7 @@ public: spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); // Lookup pixel from flat texture tile - *dest++ = shade_pal_index(source[spot], light, shade_constants); + *dest++ = shade_bgra(source[spot], light, shade_constants); // Next step in u,v. xfrac += xstep; @@ -1630,7 +1632,7 @@ public: class DrawSpanMaskedRGBACommand : public DrawerCommand { - const BYTE *ds_source; + const uint32_t *ds_source; fixed_t ds_light; ShadeConstants ds_shade_constants; fixed_t ds_xfrac; @@ -1647,7 +1649,7 @@ class DrawSpanMaskedRGBACommand : public DrawerCommand public: DrawSpanMaskedRGBACommand() { - ds_source = ::ds_source; + ds_source = (const uint32_t*)::ds_source; ds_light = ::ds_light; ds_shade_constants = ::ds_shade_constants; ds_xfrac = ::ds_xfrac; @@ -1672,7 +1674,7 @@ public: dsfixed_t xstep; dsfixed_t ystep; uint32_t* dest; - const BYTE* source = ds_source; + const uint32_t* source = ds_source; int count; int spot; @@ -1694,13 +1696,13 @@ public: // 64x64 is the most common case by far, so special case it. do { - BYTE texdata; + uint32_t texdata; spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); texdata = source[spot]; if (texdata != 0) { - *dest = shade_pal_index(texdata, light, shade_constants); + *dest = shade_bgra(texdata, light, shade_constants); } dest++; xfrac += xstep; @@ -1714,13 +1716,13 @@ public: int xmask = ((1 << ds_xbits) - 1) << ds_ybits; do { - BYTE texdata; + uint32_t texdata; spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); texdata = source[spot]; if (texdata != 0) { - *dest = shade_pal_index(texdata, light, shade_constants); + *dest = shade_bgra(texdata, light, shade_constants); } dest++; xfrac += xstep; @@ -1732,7 +1734,7 @@ public: class DrawSpanTranslucentRGBACommand : public DrawerCommand { - const BYTE *ds_source; + const uint32_t *ds_source; fixed_t ds_light; ShadeConstants ds_shade_constants; fixed_t ds_xfrac; @@ -1749,7 +1751,7 @@ class DrawSpanTranslucentRGBACommand : public DrawerCommand public: DrawSpanTranslucentRGBACommand() { - ds_source = ::ds_source; + ds_source = (const uint32_t *)::ds_source; ds_light = ::ds_light; ds_shade_constants = ::ds_shade_constants; ds_xfrac = ::ds_xfrac; @@ -1774,7 +1776,7 @@ public: dsfixed_t xstep; dsfixed_t ystep; uint32_t* dest; - const BYTE* source = ds_source; + const uint32_t* source = ds_source; int count; int spot; @@ -1801,7 +1803,7 @@ public: { spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); - uint32_t fg = shade_pal_index(source[spot], light, shade_constants); + uint32_t fg = shade_bgra(source[spot], light, shade_constants); uint32_t fg_red = (fg >> 16) & 0xff; uint32_t fg_green = (fg >> 8) & 0xff; uint32_t fg_blue = (fg) & 0xff; @@ -1829,7 +1831,7 @@ public: { spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); - uint32_t fg = shade_pal_index(source[spot], light, shade_constants); + uint32_t fg = shade_bgra(source[spot], light, shade_constants); uint32_t fg_red = (fg >> 16) & 0xff; uint32_t fg_green = (fg >> 8) & 0xff; uint32_t fg_blue = (fg) & 0xff; @@ -1853,7 +1855,7 @@ public: class DrawSpanMaskedTranslucentRGBACommand : public DrawerCommand { - const BYTE *ds_source; + const uint32_t *ds_source; fixed_t ds_light; ShadeConstants ds_shade_constants; fixed_t ds_xfrac; @@ -1870,7 +1872,7 @@ class DrawSpanMaskedTranslucentRGBACommand : public DrawerCommand public: DrawSpanMaskedTranslucentRGBACommand() { - ds_source = ::ds_source; + ds_source = (const uint32_t*)::ds_source; ds_light = ::ds_light; ds_shade_constants = ::ds_shade_constants; ds_xfrac = ::ds_xfrac; @@ -1895,7 +1897,7 @@ public: dsfixed_t xstep; dsfixed_t ystep; uint32_t* dest; - const BYTE* source = ds_source; + const uint32_t* source = ds_source; int count; int spot; @@ -1920,13 +1922,13 @@ public: // 64x64 is the most common case by far, so special case it. do { - BYTE texdata; + uint32_t texdata; spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); texdata = source[spot]; if (texdata != 0) { - uint32_t fg = shade_pal_index(texdata, light, shade_constants); + uint32_t fg = shade_bgra(texdata, light, shade_constants); uint32_t fg_red = (fg >> 16) & 0xff; uint32_t fg_green = (fg >> 8) & 0xff; uint32_t fg_blue = (fg) & 0xff; @@ -1953,13 +1955,13 @@ public: int xmask = ((1 << ds_xbits) - 1) << ds_ybits; do { - BYTE texdata; + uint32_t texdata; spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); texdata = source[spot]; if (texdata != 0) { - uint32_t fg = shade_pal_index(texdata, light, shade_constants); + uint32_t fg = shade_bgra(texdata, light, shade_constants); uint32_t fg_red = (fg >> 16) & 0xff; uint32_t fg_green = (fg >> 8) & 0xff; uint32_t fg_blue = (fg) & 0xff; @@ -1984,7 +1986,7 @@ public: class DrawSpanAddClampRGBACommand : public DrawerCommand { - const BYTE *ds_source; + const uint32_t *ds_source; fixed_t ds_light; ShadeConstants ds_shade_constants; fixed_t ds_xfrac; @@ -2001,7 +2003,7 @@ class DrawSpanAddClampRGBACommand : public DrawerCommand public: DrawSpanAddClampRGBACommand() { - ds_source = ::ds_source; + ds_source = (const uint32_t*)::ds_source; ds_light = ::ds_light; ds_shade_constants = ::ds_shade_constants; ds_xfrac = ::ds_xfrac; @@ -2026,7 +2028,7 @@ public: dsfixed_t xstep; dsfixed_t ystep; uint32_t* dest; - const BYTE* source = ds_source; + const uint32_t* source = ds_source; int count; int spot; @@ -2053,7 +2055,7 @@ public: { spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); - uint32_t fg = shade_pal_index(source[spot], light, shade_constants); + uint32_t fg = shade_bgra(source[spot], light, shade_constants); uint32_t fg_red = (fg >> 16) & 0xff; uint32_t fg_green = (fg >> 8) & 0xff; uint32_t fg_blue = (fg) & 0xff; @@ -2081,7 +2083,7 @@ public: { spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); - uint32_t fg = shade_pal_index(source[spot], light, shade_constants); + uint32_t fg = shade_bgra(source[spot], light, shade_constants); uint32_t fg_red = (fg >> 16) & 0xff; uint32_t fg_green = (fg >> 8) & 0xff; uint32_t fg_blue = (fg) & 0xff; @@ -2105,7 +2107,7 @@ public: class DrawSpanMaskedAddClampRGBACommand : public DrawerCommand { - const BYTE *ds_source; + const uint32_t *ds_source; fixed_t ds_light; ShadeConstants ds_shade_constants; fixed_t ds_xfrac; @@ -2122,7 +2124,7 @@ class DrawSpanMaskedAddClampRGBACommand : public DrawerCommand public: DrawSpanMaskedAddClampRGBACommand() { - ds_source = ::ds_source; + ds_source = (const uint32_t*)::ds_source; ds_light = ::ds_light; ds_shade_constants = ::ds_shade_constants; ds_xfrac = ::ds_xfrac; @@ -2147,7 +2149,7 @@ public: dsfixed_t xstep; dsfixed_t ystep; uint32_t* dest; - const BYTE* source = ds_source; + const uint32_t* source = ds_source; int count; int spot; @@ -2172,13 +2174,13 @@ public: // 64x64 is the most common case by far, so special case it. do { - BYTE texdata; + uint32_t texdata; spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); texdata = source[spot]; if (texdata != 0) { - uint32_t fg = shade_pal_index(texdata, light, shade_constants); + uint32_t fg = shade_bgra(texdata, light, shade_constants); uint32_t fg_red = (fg >> 16) & 0xff; uint32_t fg_green = (fg >> 8) & 0xff; uint32_t fg_blue = (fg) & 0xff; @@ -2205,13 +2207,13 @@ public: int xmask = ((1 << ds_xbits) - 1) << ds_ybits; do { - BYTE texdata; + uint32_t texdata; spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); texdata = source[spot]; if (texdata != 0) { - uint32_t fg = shade_pal_index(texdata, light, shade_constants); + uint32_t fg = shade_bgra(texdata, light, shade_constants); uint32_t fg_red = (fg >> 16) & 0xff; uint32_t fg_green = (fg >> 8) & 0xff; uint32_t fg_blue = (fg) & 0xff; @@ -2270,8 +2272,8 @@ public: class Vlinec1RGBACommand : public DrawerCommand { - fixed_t dc_iscale; - fixed_t dc_texturefrac; + DWORD dc_iscale; + DWORD dc_texturefrac; int dc_count; const BYTE *dc_source; BYTE *dc_dest; @@ -2302,7 +2304,7 @@ public: DWORD fracstep = dc_iscale * thread->num_cores; DWORD frac = dc_texturefrac + dc_iscale * thread->skipped_by_thread(dc_dest_y); - const BYTE *source = dc_source; + const uint32 *source = (const uint32 *)dc_source; uint32_t *dest = thread->dest_for_thread(dc_dest_y, dc_pitch, (uint32_t*)dc_dest); int bits = vlinebits; int pitch = dc_pitch * thread->num_cores; @@ -2312,7 +2314,7 @@ public: do { - *dest = shade_pal_index(source[frac >> bits], light, shade_constants); + *dest = shade_bgra(source[frac >> bits], light, shade_constants); frac += fracstep; dest += pitch; } while (--count); @@ -2329,7 +2331,7 @@ class Vlinec4RGBACommand : public DrawerCommand fixed_t palookuplight[4]; DWORD vplce[4]; DWORD vince[4]; - const BYTE *bufplce[4]; + const uint32 *bufplce[4]; public: Vlinec4RGBACommand() @@ -2344,7 +2346,7 @@ public: palookuplight[i] = ::palookuplight[i]; vplce[i] = ::vplce[i]; vince[i] = ::vince[i]; - bufplce[i] = ::bufplce[i]; + bufplce[i] = (const uint32 *)::bufplce[i]; } } @@ -2378,10 +2380,10 @@ public: do { - dest[0] = shade_pal_index(bufplce[0][(place = local_vplce[0]) >> bits], light0, shade_constants); local_vplce[0] = place + local_vince[0]; - dest[1] = shade_pal_index(bufplce[1][(place = local_vplce[1]) >> bits], light1, shade_constants); local_vplce[1] = place + local_vince[1]; - dest[2] = shade_pal_index(bufplce[2][(place = local_vplce[2]) >> bits], light2, shade_constants); local_vplce[2] = place + local_vince[2]; - dest[3] = shade_pal_index(bufplce[3][(place = local_vplce[3]) >> bits], light3, shade_constants); local_vplce[3] = place + local_vince[3]; + dest[0] = shade_bgra(bufplce[0][(place = local_vplce[0]) >> bits], light0, shade_constants); local_vplce[0] = place + local_vince[0]; + dest[1] = shade_bgra(bufplce[1][(place = local_vplce[1]) >> bits], light1, shade_constants); local_vplce[1] = place + local_vince[1]; + dest[2] = shade_bgra(bufplce[2][(place = local_vplce[2]) >> bits], light2, shade_constants); local_vplce[2] = place + local_vince[2]; + dest[3] = shade_bgra(bufplce[3][(place = local_vplce[3]) >> bits], light3, shade_constants); local_vplce[3] = place + local_vince[3]; dest += pitch; } while (--count); } @@ -2403,7 +2405,6 @@ public: ShadeConstants shade_constants = dc_shade_constants; - uint32_t *palette = (uint32_t*)GPalette.BaseColors; DWORD local_vplce[4] = { vplce[0], vplce[1], vplce[2], vplce[3] }; DWORD local_vince[4] = { vince[0], vince[1], vince[2], vince[3] }; int skipped = thread->skipped_by_thread(dc_dest_y); @@ -2423,17 +2424,17 @@ public: DWORD place2 = local_vplce[2]; DWORD place3 = local_vplce[3]; - BYTE p0 = bufplce[0][place0 >> bits]; - BYTE p1 = bufplce[1][place1 >> bits]; - BYTE p2 = bufplce[2][place2 >> bits]; - BYTE p3 = bufplce[3][place3 >> bits]; + uint32_t p0 = bufplce[0][place0 >> bits]; + uint32_t p1 = bufplce[1][place1 >> bits]; + uint32_t p2 = bufplce[2][place2 >> bits]; + uint32_t p3 = bufplce[3][place3 >> bits]; local_vplce[0] = place0 + local_vince[0]; local_vplce[1] = place1 + local_vince[1]; local_vplce[2] = place2 + local_vince[2]; local_vplce[3] = place3 + local_vince[3]; - __m128i fg = _mm_set_epi32(palette[p3], palette[p2], palette[p1], palette[p0]); + __m128i fg = _mm_set_epi32(p3, p2, p1, p0); SSE_SHADE_SIMPLE(fg); _mm_storeu_si128((__m128i*)dest, fg); dest += pitch; @@ -2449,17 +2450,17 @@ public: DWORD place2 = local_vplce[2]; DWORD place3 = local_vplce[3]; - BYTE p0 = bufplce[0][place0 >> bits]; - BYTE p1 = bufplce[1][place1 >> bits]; - BYTE p2 = bufplce[2][place2 >> bits]; - BYTE p3 = bufplce[3][place3 >> bits]; + uint32_t p0 = bufplce[0][place0 >> bits]; + uint32_t p1 = bufplce[1][place1 >> bits]; + uint32_t p2 = bufplce[2][place2 >> bits]; + uint32_t p3 = bufplce[3][place3 >> bits]; local_vplce[0] = place0 + local_vince[0]; local_vplce[1] = place1 + local_vince[1]; local_vplce[2] = place2 + local_vince[2]; local_vplce[3] = place3 + local_vince[3]; - __m128i fg = _mm_set_epi32(palette[p3], palette[p2], palette[p1], palette[p0]); + __m128i fg = _mm_set_epi32(p3, p2, p1, p0); SSE_SHADE(fg, shade_constants); _mm_storeu_si128((__m128i*)dest, fg); dest += pitch; @@ -2471,8 +2472,8 @@ public: class Mvlinec1RGBACommand : public DrawerCommand { - fixed_t dc_iscale; - fixed_t dc_texturefrac; + DWORD dc_iscale; + DWORD dc_texturefrac; int dc_count; const BYTE *dc_source; BYTE *dc_dest; @@ -2503,7 +2504,7 @@ public: DWORD fracstep = dc_iscale * thread->num_cores; DWORD frac = dc_texturefrac + dc_iscale * thread->skipped_by_thread(dc_dest_y); - const BYTE *source = dc_source; + const uint32 *source = (const uint32 *)dc_source; uint32_t *dest = thread->dest_for_thread(dc_dest_y, dc_pitch, (uint32_t*)dc_dest); int bits = mvlinebits; int pitch = dc_pitch * thread->num_cores; @@ -2513,10 +2514,10 @@ public: do { - BYTE pix = source[frac >> bits]; + uint32_t pix = source[frac >> bits]; if (pix != 0) { - *dest = shade_pal_index(pix, light, shade_constants); + *dest = shade_bgra(pix, light, shade_constants); } frac += fracstep; dest += pitch; @@ -2534,7 +2535,7 @@ class Mvlinec4RGBACommand : public DrawerCommand fixed_t palookuplight[4]; DWORD vplce[4]; DWORD vince[4]; - const BYTE *bufplce[4]; + const uint32 *bufplce[4]; public: Mvlinec4RGBACommand() @@ -2549,7 +2550,7 @@ public: palookuplight[i] = ::palookuplight[i]; vplce[i] = ::vplce[i]; vince[i] = ::vince[i]; - bufplce[i] = ::bufplce[i]; + bufplce[i] = (const uint32 *)::bufplce[i]; } } @@ -2583,11 +2584,11 @@ public: do { - BYTE pix; - pix = bufplce[0][(place = local_vplce[0]) >> bits]; if (pix) dest[0] = shade_pal_index(pix, light0, shade_constants); local_vplce[0] = place + local_vince[0]; - pix = bufplce[1][(place = local_vplce[1]) >> bits]; if (pix) dest[1] = shade_pal_index(pix, light1, shade_constants); local_vplce[1] = place + local_vince[1]; - pix = bufplce[2][(place = local_vplce[2]) >> bits]; if (pix) dest[2] = shade_pal_index(pix, light2, shade_constants); local_vplce[2] = place + local_vince[2]; - pix = bufplce[3][(place = local_vplce[3]) >> bits]; if (pix) dest[3] = shade_pal_index(pix, light3, shade_constants); local_vplce[3] = place + local_vince[3]; + uint32_t pix; + pix = bufplce[0][(place = local_vplce[0]) >> bits]; if (pix) dest[0] = shade_bgra(pix, light0, shade_constants); local_vplce[0] = place + local_vince[0]; + pix = bufplce[1][(place = local_vplce[1]) >> bits]; if (pix) dest[1] = shade_bgra(pix, light1, shade_constants); local_vplce[1] = place + local_vince[1]; + pix = bufplce[2][(place = local_vplce[2]) >> bits]; if (pix) dest[2] = shade_bgra(pix, light2, shade_constants); local_vplce[2] = place + local_vince[2]; + pix = bufplce[3][(place = local_vplce[3]) >> bits]; if (pix) dest[3] = shade_bgra(pix, light3, shade_constants); local_vplce[3] = place + local_vince[3]; dest += pitch; } while (--count); } @@ -2609,7 +2610,6 @@ public: ShadeConstants shade_constants = dc_shade_constants; - uint32_t *palette = (uint32_t*)GPalette.BaseColors; DWORD local_vplce[4] = { vplce[0], vplce[1], vplce[2], vplce[3] }; DWORD local_vince[4] = { vince[0], vince[1], vince[2], vince[3] }; int skipped = thread->skipped_by_thread(dc_dest_y); @@ -2629,10 +2629,10 @@ public: DWORD place2 = local_vplce[2]; DWORD place3 = local_vplce[3]; - BYTE pix0 = bufplce[0][place0 >> bits]; - BYTE pix1 = bufplce[1][place1 >> bits]; - BYTE pix2 = bufplce[2][place2 >> bits]; - BYTE pix3 = bufplce[3][place3 >> bits]; + uint32_t pix0 = bufplce[0][place0 >> bits]; + uint32_t pix1 = bufplce[1][place1 >> bits]; + uint32_t pix2 = bufplce[2][place2 >> bits]; + uint32_t pix3 = bufplce[3][place3 >> bits]; // movemask = !(pix == 0) __m128i movemask = _mm_xor_si128(_mm_cmpeq_epi32(_mm_set_epi32(pix3, pix2, pix1, pix0), _mm_setzero_si128()), _mm_cmpeq_epi32(_mm_setzero_si128(), _mm_setzero_si128())); @@ -2642,7 +2642,7 @@ public: local_vplce[2] = place2 + local_vince[2]; local_vplce[3] = place3 + local_vince[3]; - __m128i fg = _mm_set_epi32(palette[pix3], palette[pix2], palette[pix1], palette[pix0]); + __m128i fg = _mm_set_epi32(pix3, pix2, pix1, pix0); SSE_SHADE_SIMPLE(fg); _mm_maskmoveu_si128(fg, movemask, (char*)dest); dest += pitch; @@ -2658,10 +2658,10 @@ public: DWORD place2 = local_vplce[2]; DWORD place3 = local_vplce[3]; - BYTE pix0 = bufplce[0][place0 >> bits]; - BYTE pix1 = bufplce[1][place1 >> bits]; - BYTE pix2 = bufplce[2][place2 >> bits]; - BYTE pix3 = bufplce[3][place3 >> bits]; + uint32_t pix0 = bufplce[0][place0 >> bits]; + uint32_t pix1 = bufplce[1][place1 >> bits]; + uint32_t pix2 = bufplce[2][place2 >> bits]; + uint32_t pix3 = bufplce[3][place3 >> bits]; // movemask = !(pix == 0) __m128i movemask = _mm_xor_si128(_mm_cmpeq_epi32(_mm_set_epi32(pix3, pix2, pix1, pix0), _mm_setzero_si128()), _mm_cmpeq_epi32(_mm_setzero_si128(), _mm_setzero_si128())); @@ -2671,7 +2671,7 @@ public: local_vplce[2] = place2 + local_vince[2]; local_vplce[3] = place3 + local_vince[3]; - __m128i fg = _mm_set_epi32(palette[pix3], palette[pix2], palette[pix1], palette[pix0]); + __m128i fg = _mm_set_epi32(pix3, pix2, pix1, pix0); SSE_SHADE(fg, shade_constants); _mm_maskmoveu_si128(fg, movemask, (char*)dest); dest += pitch; @@ -2683,8 +2683,8 @@ public: class Tmvline1AddRGBACommand : public DrawerCommand { - fixed_t dc_iscale; - fixed_t dc_texturefrac; + DWORD dc_iscale; + DWORD dc_texturefrac; int dc_count; const BYTE *dc_source; BYTE *dc_dest; @@ -2719,7 +2719,7 @@ public: DWORD fracstep = dc_iscale * thread->num_cores; DWORD frac = dc_texturefrac + dc_iscale * thread->skipped_by_thread(dc_dest_y); - const BYTE *source = dc_source; + const uint32 *source = (const uint32 *)dc_source; uint32_t *dest = thread->dest_for_thread(dc_dest_y, dc_pitch, (uint32_t*)dc_dest); int bits = tmvlinebits; int pitch = dc_pitch * thread->num_cores; @@ -2732,10 +2732,10 @@ public: do { - BYTE pix = source[frac >> bits]; + uint32_t pix = source[frac >> bits]; if (pix != 0) { - uint32_t fg = shade_pal_index(pix, light, shade_constants); + uint32_t fg = shade_bgra(pix, light, shade_constants); uint32_t fg_red = (fg >> 16) & 0xff; uint32_t fg_green = (fg >> 8) & 0xff; uint32_t fg_blue = fg & 0xff; @@ -2768,7 +2768,7 @@ class Tmvline4AddRGBACommand : public DrawerCommand fixed_t palookuplight[4]; DWORD vplce[4]; DWORD vince[4]; - const BYTE *bufplce[4]; + const uint32 *bufplce[4]; public: Tmvline4AddRGBACommand() @@ -2785,7 +2785,7 @@ public: palookuplight[i] = ::palookuplight[i]; vplce[i] = ::vplce[i]; vince[i] = ::vince[i]; - bufplce[i] = ::bufplce[i]; + bufplce[i] = (const uint32 *)::bufplce[i]; } } @@ -2823,10 +2823,10 @@ public: { for (int i = 0; i < 4; ++i) { - BYTE pix = bufplce[i][local_vplce[i] >> bits]; + uint32_t pix = bufplce[i][local_vplce[i] >> bits]; if (pix != 0) { - uint32_t fg = shade_pal_index(pix, light[i], shade_constants); + uint32_t fg = shade_bgra(pix, light[i], shade_constants); uint32_t fg_red = (fg >> 16) & 0xff; uint32_t fg_green = (fg >> 8) & 0xff; uint32_t fg_blue = fg & 0xff; @@ -2850,8 +2850,8 @@ public: class Tmvline1AddClampRGBACommand : public DrawerCommand { - fixed_t dc_iscale; - fixed_t dc_texturefrac; + DWORD dc_iscale; + DWORD dc_texturefrac; int dc_count; const BYTE *dc_source; BYTE *dc_dest; @@ -2886,7 +2886,7 @@ public: DWORD fracstep = dc_iscale * thread->num_cores; DWORD frac = dc_texturefrac + dc_iscale * thread->skipped_by_thread(dc_dest_y); - const BYTE *source = dc_source; + const uint32 *source = (const uint32 *)dc_source; uint32_t *dest = thread->dest_for_thread(dc_dest_y, dc_pitch, (uint32_t*)dc_dest); int bits = tmvlinebits; int pitch = dc_pitch * thread->num_cores; @@ -2899,10 +2899,10 @@ public: do { - BYTE pix = source[frac >> bits]; + uint32_t pix = source[frac >> bits]; if (pix != 0) { - uint32_t fg = shade_pal_index(pix, light, shade_constants); + uint32_t fg = shade_bgra(pix, light, shade_constants); uint32_t fg_red = (fg >> 16) & 0xff; uint32_t fg_green = (fg >> 8) & 0xff; uint32_t fg_blue = fg & 0xff; @@ -2935,7 +2935,7 @@ class Tmvline4AddClampRGBACommand : public DrawerCommand fixed_t palookuplight[4]; DWORD vplce[4]; DWORD vince[4]; - const BYTE *bufplce[4]; + const uint32 *bufplce[4]; public: Tmvline4AddClampRGBACommand() @@ -2952,7 +2952,7 @@ public: palookuplight[i] = ::palookuplight[i]; vplce[i] = ::vplce[i]; vince[i] = ::vince[i]; - bufplce[i] = ::bufplce[i]; + bufplce[i] = (const uint32 *)::bufplce[i]; } } @@ -2990,10 +2990,10 @@ public: { for (int i = 0; i < 4; ++i) { - BYTE pix = bufplce[i][local_vplce[i] >> bits]; + uint32_t pix = bufplce[i][local_vplce[i] >> bits]; if (pix != 0) { - uint32_t fg = shade_pal_index(pix, light[i], shade_constants); + uint32_t fg = shade_bgra(pix, light[i], shade_constants); uint32_t fg_red = (fg >> 16) & 0xff; uint32_t fg_green = (fg >> 8) & 0xff; uint32_t fg_blue = fg & 0xff; @@ -3017,8 +3017,8 @@ public: class Tmvline1SubClampRGBACommand : public DrawerCommand { - fixed_t dc_iscale; - fixed_t dc_texturefrac; + DWORD dc_iscale; + DWORD dc_texturefrac; int dc_count; const BYTE *dc_source; BYTE *dc_dest; @@ -3053,7 +3053,7 @@ public: DWORD fracstep = dc_iscale * thread->num_cores; DWORD frac = dc_texturefrac + dc_iscale * thread->skipped_by_thread(dc_dest_y); - const BYTE *source = dc_source; + const uint32 *source = (const uint32 *)dc_source; uint32_t *dest = thread->dest_for_thread(dc_dest_y, dc_pitch, (uint32_t*)dc_dest); int bits = tmvlinebits; int pitch = dc_pitch * thread->num_cores; @@ -3066,10 +3066,10 @@ public: do { - BYTE pix = source[frac >> bits]; + uint32_t pix = source[frac >> bits]; if (pix != 0) { - uint32_t fg = shade_pal_index(pix, light, shade_constants); + uint32_t fg = shade_bgra(pix, light, shade_constants); uint32_t fg_red = (fg >> 16) & 0xff; uint32_t fg_green = (fg >> 8) & 0xff; uint32_t fg_blue = fg & 0xff; @@ -3102,7 +3102,7 @@ class Tmvline4SubClampRGBACommand : public DrawerCommand fixed_t palookuplight[4]; DWORD vplce[4]; DWORD vince[4]; - const BYTE *bufplce[4]; + const uint32 *bufplce[4]; public: Tmvline4SubClampRGBACommand() @@ -3119,7 +3119,7 @@ public: palookuplight[i] = ::palookuplight[i]; vplce[i] = ::vplce[i]; vince[i] = ::vince[i]; - bufplce[i] = ::bufplce[i]; + bufplce[i] = (const uint32 *)::bufplce[i]; } } @@ -3157,10 +3157,10 @@ public: { for (int i = 0; i < 4; ++i) { - BYTE pix = bufplce[i][local_vplce[i] >> bits]; + uint32_t pix = bufplce[i][local_vplce[i] >> bits]; if (pix != 0) { - uint32_t fg = shade_pal_index(pix, light[i], shade_constants); + uint32_t fg = shade_bgra(pix, light[i], shade_constants); uint32_t fg_red = (fg >> 16) & 0xff; uint32_t fg_green = (fg >> 8) & 0xff; uint32_t fg_blue = fg & 0xff; @@ -3184,8 +3184,8 @@ public: class Tmvline1RevSubClampRGBACommand : public DrawerCommand { - fixed_t dc_iscale; - fixed_t dc_texturefrac; + DWORD dc_iscale; + DWORD dc_texturefrac; int dc_count; const BYTE *dc_source; BYTE *dc_dest; @@ -3220,7 +3220,7 @@ public: DWORD fracstep = dc_iscale * thread->num_cores; DWORD frac = dc_texturefrac + dc_iscale * thread->skipped_by_thread(dc_dest_y); - const BYTE *source = dc_source; + const uint32 *source = (const uint32 *)dc_source; uint32_t *dest = thread->dest_for_thread(dc_dest_y, dc_pitch, (uint32_t*)dc_dest); int bits = tmvlinebits; int pitch = dc_pitch * thread->num_cores; @@ -3233,10 +3233,10 @@ public: do { - BYTE pix = source[frac >> bits]; + uint32_t pix = source[frac >> bits]; if (pix != 0) { - uint32_t fg = shade_pal_index(pix, light, shade_constants); + uint32_t fg = shade_bgra(pix, light, shade_constants); uint32_t fg_red = (fg >> 16) & 0xff; uint32_t fg_green = (fg >> 8) & 0xff; uint32_t fg_blue = fg & 0xff; @@ -3269,7 +3269,7 @@ class Tmvline4RevSubClampRGBACommand : public DrawerCommand fixed_t palookuplight[4]; DWORD vplce[4]; DWORD vince[4]; - const BYTE *bufplce[4]; + const uint32 *bufplce[4]; public: Tmvline4RevSubClampRGBACommand() @@ -3286,7 +3286,7 @@ public: palookuplight[i] = ::palookuplight[i]; vplce[i] = ::vplce[i]; vince[i] = ::vince[i]; - bufplce[i] = ::bufplce[i]; + bufplce[i] = (const uint32 *)::bufplce[i]; } } @@ -3324,10 +3324,10 @@ public: { for (int i = 0; i < 4; ++i) { - BYTE pix = bufplce[i][local_vplce[i] >> bits]; + uint32_t pix = bufplce[i][local_vplce[i] >> bits]; if (pix != 0) { - uint32_t fg = shade_pal_index(pix, light[i], shade_constants); + uint32_t fg = shade_bgra(pix, light[i], shade_constants); uint32_t fg_red = (fg >> 16) & 0xff; uint32_t fg_green = (fg >> 8) & 0xff; uint32_t fg_blue = fg & 0xff; @@ -3549,8 +3549,17 @@ void R_FillSpan_RGBA() DrawerCommandQueue::QueueCommand(); } +extern FTexture *rw_pic; // For the asserts below + DWORD vlinec1_RGBA() { + DWORD fracstep = dc_iscale; + DWORD frac = dc_texturefrac; + DWORD height = rw_pic->GetHeight(); + assert((frac >> vlinebits) < height); + frac += dc_count * fracstep; + assert((frac >> vlinebits) <= height); + DrawerCommandQueue::QueueCommand(); return dc_texturefrac + dc_count * dc_iscale; } @@ -3558,6 +3567,8 @@ DWORD vlinec1_RGBA() void vlinec4_RGBA() { DrawerCommandQueue::QueueCommand(); + for (int i = 0; i < 4; i++) + vplce[i] += vince[i] * dc_count; } DWORD mvlinec1_RGBA() @@ -3569,6 +3580,8 @@ DWORD mvlinec1_RGBA() void mvlinec4_RGBA() { DrawerCommandQueue::QueueCommand(); + for (int i = 0; i < 4; i++) + vplce[i] += vince[i] * dc_count; } fixed_t tmvline1_add_RGBA() @@ -3580,6 +3593,8 @@ fixed_t tmvline1_add_RGBA() void tmvline4_add_RGBA() { DrawerCommandQueue::QueueCommand(); + for (int i = 0; i < 4; i++) + vplce[i] += vince[i] * dc_count; } fixed_t tmvline1_addclamp_RGBA() @@ -3591,6 +3606,8 @@ fixed_t tmvline1_addclamp_RGBA() void tmvline4_addclamp_RGBA() { DrawerCommandQueue::QueueCommand(); + for (int i = 0; i < 4; i++) + vplce[i] += vince[i] * dc_count; } fixed_t tmvline1_subclamp_RGBA() @@ -3602,6 +3619,8 @@ fixed_t tmvline1_subclamp_RGBA() void tmvline4_subclamp_RGBA() { DrawerCommandQueue::QueueCommand(); + for (int i = 0; i < 4; i++) + vplce[i] += vince[i] * dc_count; } fixed_t tmvline1_revsubclamp_RGBA() @@ -3613,6 +3632,8 @@ fixed_t tmvline1_revsubclamp_RGBA() void tmvline4_revsubclamp_RGBA() { DrawerCommandQueue::QueueCommand(); + for (int i = 0; i < 4; i++) + vplce[i] += vince[i] * dc_count; } void R_DrawFogBoundarySection_RGBA(int y, int y2, int x1) diff --git a/src/r_main.h b/src/r_main.h index 5d4ff1174..6d0e2a21f 100644 --- a/src/r_main.h +++ b/src/r_main.h @@ -136,6 +136,19 @@ FORCEINLINE uint32_t shade_pal_index_simple(uint32_t index, uint32_t light) return 0xff000000 | (red << 16) | (green << 8) | blue; } +FORCEINLINE uint32_t shade_bgra_simple(uint32_t color, uint32_t light) +{ + uint32_t red = (color >> 16) & 0xff; + uint32_t green = (color >> 8) & 0xff; + uint32_t blue = color & 0xff; + + red = red * light / 256; + green = green * light / 256; + blue = blue * light / 256; + + return 0xff000000 | (red << 16) | (green << 8) | blue; +} + // Calculates a ARGB8 color for the given palette index, light multiplier and dynamic colormap FORCEINLINE uint32_t shade_pal_index(uint32_t index, uint32_t light, const ShadeConstants &constants) { @@ -171,6 +184,39 @@ FORCEINLINE uint32_t shade_pal_index(uint32_t index, uint32_t light, const Shade return 0xff000000 | (red << 16) | (green << 8) | blue; } +FORCEINLINE uint32_t shade_bgra(uint32_t color, uint32_t light, const ShadeConstants &constants) +{ + uint32_t red = (color >> 16) & 0xff; + uint32_t green = (color >> 8) & 0xff; + uint32_t blue = color & 0xff; + if (constants.simple_shade) + { + red = red * light / 256; + green = green * light / 256; + blue = blue * light / 256; + } + else + { + uint32_t inv_light = 256 - light; + uint32_t inv_desaturate = 256 - constants.desaturate; + + uint32_t intensity = ((red * 77 + green * 143 + blue * 37) >> 8) * constants.desaturate; + + red = (red * inv_desaturate + intensity) / 256; + green = (green * inv_desaturate + intensity) / 256; + blue = (blue * inv_desaturate + intensity) / 256; + + red = (constants.fade_red * inv_light + red * light) / 256; + green = (constants.fade_green * inv_light + green * light) / 256; + blue = (constants.fade_blue * inv_light + blue * light) / 256; + + red = (red * constants.light_red) / 256; + green = (green * constants.light_green) / 256; + blue = (blue * constants.light_blue) / 256; + } + return 0xff000000 | (red << 16) | (green << 8) | blue; +} + // Calculate constants for a simple shade #define SSE_SHADE_SIMPLE_INIT(light) \ __m128i mlight_hi = _mm_set_epi16(256, light, light, light, 256, light, light, light); \ diff --git a/src/r_plane.cpp b/src/r_plane.cpp index 26d579d6d..05fce79a6 100644 --- a/src/r_plane.cpp +++ b/src/r_plane.cpp @@ -973,15 +973,22 @@ extern FTexture *rw_pic; // Allow for layer skies up to 512 pixels tall. This is overkill, // since the most anyone can ever see of the sky is 500 pixels. // We need 4 skybufs because wallscan can draw up to 4 columns at a time. +// Need two versions - one for true color and one for palette static BYTE skybuf[4][512]; +static uint32_t skybuf_bgra[4][512]; static DWORD lastskycol[4]; +static DWORD lastskycol_bgra[4]; static int skycolplace; +static int skycolplace_bgra; // Get a column of sky when there is only one sky texture. static const BYTE *R_GetOneSkyColumn (FTexture *fronttex, int x) { angle_t column = (skyangle + xtoviewangle[x]) ^ skyflip; - return fronttex->GetColumn((UMulScale16(column, frontcyl) + frontpos) >> FRACBITS, NULL); + if (!r_swtruecolor) + return fronttex->GetColumn((UMulScale16(column, frontcyl) + frontpos) >> FRACBITS, NULL); + else + return (const BYTE *)fronttex->GetColumnBgra((UMulScale16(column, frontcyl) + frontpos) >> FRACBITS, NULL); } // Get a column of sky when there are two overlapping sky textures @@ -996,38 +1003,77 @@ static const BYTE *R_GetTwoSkyColumns (FTexture *fronttex, int x) DWORD skycol = (angle1 << 16) | angle2; int i; - for (i = 0; i < 4; ++i) + if (!r_swtruecolor) { - if (lastskycol[i] == skycol) + for (i = 0; i < 4; ++i) { - return skybuf[i]; + if (lastskycol[i] == skycol) + { + return skybuf[i]; + } } + + lastskycol[skycolplace] = skycol; + BYTE *composite = skybuf[skycolplace]; + skycolplace = (skycolplace + 1) & 3; + + // The ordering of the following code has been tuned to allow VC++ to optimize + // it well. In particular, this arrangement lets it keep count in a register + // instead of on the stack. + const BYTE *front = fronttex->GetColumn(angle1, NULL); + const BYTE *back = backskytex->GetColumn(angle2, NULL); + + int count = MIN(512, MIN(backskytex->GetHeight(), fronttex->GetHeight())); + i = 0; + do + { + if (front[i]) + { + composite[i] = front[i]; + } + else + { + composite[i] = back[i]; + } + } while (++i, --count); + return composite; } - - lastskycol[skycolplace] = skycol; - BYTE *composite = skybuf[skycolplace]; - skycolplace = (skycolplace + 1) & 3; - - // The ordering of the following code has been tuned to allow VC++ to optimize - // it well. In particular, this arrangement lets it keep count in a register - // instead of on the stack. - const BYTE *front = fronttex->GetColumn (angle1, NULL); - const BYTE *back = backskytex->GetColumn (angle2, NULL); - - int count = MIN (512, MIN (backskytex->GetHeight(), fronttex->GetHeight())); - i = 0; - do + else { - if (front[i]) + return R_GetOneSkyColumn(fronttex, x); + for (i = 0; i < 4; ++i) { - composite[i] = front[i]; + if (lastskycol_bgra[i] == skycol) + { + return (BYTE*)(skybuf_bgra[i]); + } } - else + + lastskycol_bgra[skycolplace_bgra] = skycol; + uint32_t *composite = skybuf_bgra[skycolplace_bgra]; + skycolplace_bgra = (skycolplace_bgra + 1) & 3; + + // The ordering of the following code has been tuned to allow VC++ to optimize + // it well. In particular, this arrangement lets it keep count in a register + // instead of on the stack. + const uint32_t *front = (const uint32_t *)fronttex->GetColumnBgra(angle1, NULL); + const uint32_t *back = (const uint32_t *)backskytex->GetColumnBgra(angle2, NULL); + + int count = MIN(512, MIN(backskytex->GetHeight(), fronttex->GetHeight())); + i = 0; + do { - composite[i] = back[i]; - } - } while (++i, --count); - return composite; + if (front[i]) + { + composite[i] = front[i]; + } + else + { + composite[i] = back[i]; + } + } while (++i, --count); + return (BYTE*)composite; + } } static void R_DrawSky (visplane_t *pl) @@ -1062,6 +1108,7 @@ static void R_DrawSky (visplane_t *pl) for (x = 0; x < 4; ++x) { lastskycol[x] = 0xffffffff; + lastskycol_bgra[x] = 0xffffffff; } rw_pic = frontskytex; @@ -1075,6 +1122,7 @@ static void R_DrawSky (visplane_t *pl) for (x = 0; x < 4; ++x) { lastskycol[x] = 0xffffffff; + lastskycol_bgra[x] = 0xffffffff; } wallscan (pl->left, pl->right, (short *)pl->top, (short *)pl->bottom, swall, lwall, frontyScale, backskytex == NULL ? R_GetOneSkyColumn : R_GetTwoSkyColumns); @@ -1112,6 +1160,7 @@ static void R_DrawSkyStriped (visplane_t *pl) for (x = 0; x < 4; ++x) { lastskycol[x] = 0xffffffff; + lastskycol_bgra[x] = 0xffffffff; } wallscan (pl->left, pl->right, top, bot, swall, lwall, rw_pic->Scale.Y, backskytex == NULL ? R_GetOneSkyColumn : R_GetTwoSkyColumns); @@ -1230,7 +1279,10 @@ void R_DrawSinglePlane (visplane_t *pl, fixed_t alpha, bool additive, bool maske R_SetupSpanBits(tex); double xscale = pl->xform.xScale * tex->Scale.X; double yscale = pl->xform.yScale * tex->Scale.Y; - ds_source = tex->GetPixels (); + if (r_swtruecolor) + ds_source = (const BYTE*)tex->GetPixelsBgra(); + else + ds_source = tex->GetPixels(); basecolormap = pl->colormap; planeshade = LIGHT2SHADE(pl->lightlevel); diff --git a/src/r_segs.cpp b/src/r_segs.cpp index 5aa7c29a2..87ce48ec4 100644 --- a/src/r_segs.cpp +++ b/src/r_segs.cpp @@ -1092,6 +1092,8 @@ uint32_t wallscan_drawcol1(int x, int y1, int y2, uint32_t uv_start, uint32_t uv { int next_uv_wrap = (uv_max - uv_pos + uv_step - 1) / uv_step; int count = MIN(left, next_uv_wrap); + if (count <= 0) + break; // This should never happen, but it does.. if (count > 0) { @@ -1146,6 +1148,8 @@ void wallscan_drawcol4(int x, int y1, int y2, uint32_t *uv_pos, uint32_t *uv_ste int next_uv_wrap = (uv_max - uv_pos[i] + uv_step[i] - 1) / uv_step[i]; count = MIN(next_uv_wrap, count); } + if (count <= 0) + break; // This should never happen, but it does.. // Draw until that column wraps if (count > 0) diff --git a/src/textures/pngtexture.cpp b/src/textures/pngtexture.cpp index e47fa62c0..95f7aca75 100644 --- a/src/textures/pngtexture.cpp +++ b/src/textures/pngtexture.cpp @@ -41,6 +41,7 @@ #include "bitmap.h" #include "v_palette.h" #include "textures/textures.h" +#include //========================================================================== // @@ -56,6 +57,7 @@ public: const BYTE *GetColumn (unsigned int column, const Span **spans_out); const BYTE *GetPixels (); + const uint32_t *GetPixelsBgra (); void Unload (); FTextureFormat GetFormat (); int CopyTrueColorPixels(FBitmap *bmp, int x, int y, int rotate, FCopyInfo *inf = NULL); @@ -65,6 +67,7 @@ protected: FString SourceFile; BYTE *Pixels; + std::vector PixelsBgra; Span **Spans; BYTE BitDepth; @@ -73,11 +76,13 @@ protected: bool HaveTrans; WORD NonPaletteTrans[3]; + std::vector PngPalette; BYTE *PaletteMap; int PaletteSize; DWORD StartOfIDAT; void MakeTexture (); + void MakeTextureBgra (); friend class FTexture; }; @@ -266,6 +271,12 @@ FPNGTexture::FPNGTexture (FileReader &lump, int lumpnum, const FString &filename { lump.Seek (len - PaletteSize * 3, SEEK_CUR); } + for (i = 0; i < PaletteSize; i++) + { + PngPalette.push_back(p.pngpal[i][0]); + PngPalette.push_back(p.pngpal[i][1]); + PngPalette.push_back(p.pngpal[i][2]); + } for (i = PaletteSize - 1; i >= 0; --i) { p.palette[i] = MAKERGB(p.pngpal[i][0], p.pngpal[i][1], p.pngpal[i][2]); @@ -369,11 +380,9 @@ FPNGTexture::~FPNGTexture () void FPNGTexture::Unload () { - if (Pixels != NULL) - { - delete[] Pixels; - Pixels = NULL; - } + delete[] Pixels; + Pixels = NULL; + PixelsBgra.clear(); } //========================================================================== @@ -446,6 +455,16 @@ const BYTE *FPNGTexture::GetPixels () return Pixels; } +const uint32_t *FPNGTexture::GetPixelsBgra() +{ + if (PixelsBgra.empty()) + { + MakeTextureBgra(); + } + return PixelsBgra.data(); +} + + //========================================================================== // // @@ -602,6 +621,139 @@ void FPNGTexture::MakeTexture () delete lump; } +void FPNGTexture::MakeTextureBgra () +{ + FileReader *lump; + + if (SourceLump >= 0) + { + lump = new FWadLump(Wads.OpenLumpNum(SourceLump)); + } + else + { + lump = new FileReader(SourceFile.GetChars()); + } + + PixelsBgra.resize(Width * Height, 0xffff0000); + if (StartOfIDAT != 0) + { + DWORD len, id; + lump->Seek (StartOfIDAT, SEEK_SET); + lump->Read(&len, 4); + lump->Read(&id, 4); + + if (ColorType == 0 || ColorType == 3) /* Grayscale and paletted */ + { + std::vector src(Width*Height); + M_ReadIDAT (lump, src.data(), Width, Height, Width, BitDepth, ColorType, Interlace, BigLong((unsigned int)len)); + + if (!PngPalette.empty()) + { + for (int x = 0; x < Width; x++) + { + for (int y = 0; y < Height; y++) + { + uint32_t r = PngPalette[src[x + y * Width] * 3 + 0]; + uint32_t g = PngPalette[src[x + y * Width] * 3 + 1]; + uint32_t b = PngPalette[src[x + y * Width] * 3 + 2]; + PixelsBgra[x * Height + y] = 0xff000000 | (r << 16) | (g << 8) | b; + } + } + } + else + { + for (int x = 0; x < Width; x++) + { + for (int y = 0; y < Height; y++) + { + uint32_t gray = src[x + y * Width]; + PixelsBgra[x * Height + y] = 0xff000000 | (gray << 16) | (gray << 8) | gray; + } + } + } + } + else /* RGB and/or Alpha present */ + { + int bytesPerPixel = ColorType == 2 ? 3 : ColorType == 4 ? 2 : 4; + BYTE *tempix = new BYTE[Width * Height * bytesPerPixel]; + BYTE *in; + uint32_t *out; + int x, y, pitch, backstep; + + M_ReadIDAT (lump, tempix, Width, Height, Width*bytesPerPixel, BitDepth, ColorType, Interlace, BigLong((unsigned int)len)); + in = tempix; + out = PixelsBgra.data(); + + // Convert from source format to paletted, column-major. + // Formats with alpha maps are reduced to only 1 bit of alpha. + switch (ColorType) + { + case 2: // RGB + pitch = Width * 3; + backstep = Height * pitch - 3; + for (x = Width; x > 0; --x) + { + for (y = Height; y > 0; --y) + { + if (!HaveTrans) + { + *out++ = 0xff000000 | (((uint32_t)in[0]) << 16) | (((uint32_t)in[1]) << 8) | ((uint32_t)in[2]); + } + else + { + if (in[0] == NonPaletteTrans[0] && + in[1] == NonPaletteTrans[1] && + in[2] == NonPaletteTrans[2]) + { + *out++ = 0; + } + else + { + *out++ = 0xff000000 | (((uint32_t)in[0]) << 16) | (((uint32_t)in[1]) << 8) | ((uint32_t)in[2]); + } + } + in += pitch; + } + in -= backstep; + } + break; + + case 4: // Grayscale + Alpha + pitch = Width * 2; + backstep = Height * pitch - 2; + for (x = Width; x > 0; --x) + { + for (y = Height; y > 0; --y) + { + uint32_t alpha = in[1]; + uint32_t gray = in[0]; + *out++ = (alpha << 24) | (gray << 16) | (gray << 8) | gray; + in += pitch; + } + in -= backstep; + } + break; + + case 6: // RGB + Alpha + pitch = Width * 4; + backstep = Height * pitch - 4; + for (x = Width; x > 0; --x) + { + for (y = Height; y > 0; --y) + { + *out++ = (((uint32_t)in[3]) << 24) | (((uint32_t)in[0]) << 16) | (((uint32_t)in[1]) << 8) | ((uint32_t)in[2]); + in += pitch; + } + in -= backstep; + } + break; + } + delete[] tempix; + } + } + delete lump; +} + //=========================================================================== // // FPNGTexture::CopyTrueColorPixels diff --git a/src/textures/texture.cpp b/src/textures/texture.cpp index 7b90c295f..1869491b1 100644 --- a/src/textures/texture.cpp +++ b/src/textures/texture.cpp @@ -45,6 +45,7 @@ #include "v_video.h" #include "m_fixed.h" #include "textures/textures.h" +#include "v_palette.h" typedef bool (*CheckFunc)(FileReader & file); typedef FTexture * (*CreateFunc)(FileReader & file, int lumpnum); @@ -175,6 +176,33 @@ FTexture::~FTexture () KillNative(); } +const uint32_t *FTexture::GetColumnBgra(unsigned int column, const Span **spans_out) +{ + const uint32_t *pixels = GetPixelsBgra(); + + column %= Width; + if (column < 0) + column += Width; + + if (spans_out != nullptr) + GetColumn(column, spans_out); + return pixels + column * Height; +} + +const uint32_t *FTexture::GetPixelsBgra() +{ + if (BgraPixels.empty()) + { + const BYTE *indices = GetPixels(); + BgraPixels.resize(Width * Height); + for (int i = 0; i < Width * Height; i++) + { + BgraPixels[i] = GPalette.BaseColors[indices[i]].d; + } + } + return BgraPixels.data(); +} + bool FTexture::CheckModified () { return false; diff --git a/src/textures/textures.h b/src/textures/textures.h index 14667093c..0d066eff5 100644 --- a/src/textures/textures.h +++ b/src/textures/textures.h @@ -3,6 +3,7 @@ #include "doomtype.h" #include "vectors.h" +#include class FBitmap; struct FRemapTable; @@ -175,9 +176,15 @@ public: // Returns a single column of the texture virtual const BYTE *GetColumn (unsigned int column, const Span **spans_out) = 0; + // Returns a single column of the texture, in BGRA8 format + virtual const uint32_t *GetColumnBgra(unsigned int column, const Span **spans_out); + // Returns the whole texture, stored in column-major order virtual const BYTE *GetPixels () = 0; - + + // Returns the whole texture, stored in column-major order, in BGRA8 format + virtual const uint32_t *GetPixelsBgra(); + virtual int CopyTrueColorPixels(FBitmap *bmp, int x, int y, int rotate=0, FCopyInfo *inf = NULL); int CopyTrueColorTranslated(FBitmap *bmp, int x, int y, int rotate, FRemapTable *remap, FCopyInfo *inf = NULL); virtual bool UseBasePalette(); @@ -262,6 +269,9 @@ protected: Rotations = other->Rotations; } +private: + std::vector BgraPixels; + public: static void FlipSquareBlock (BYTE *block, int x, int y); static void FlipSquareBlockRemap (BYTE *block, int x, int y, const BYTE *remap);