diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index c90756b5d..75cf27cad 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -882,6 +882,7 @@ set( FASTMATH_PCH_SOURCES r_3dfloors.cpp r_bsp.cpp r_draw.cpp + r_draw_rgba.cpp r_drawt.cpp r_drawt_rgba.cpp r_main.cpp diff --git a/src/r_draw.cpp b/src/r_draw.cpp index e809342e9..984a74f3f 100644 --- a/src/r_draw.cpp +++ b/src/r_draw.cpp @@ -43,9 +43,6 @@ #include "gi.h" #include "stats.h" #include "x86.h" -#ifndef NO_SSE -#include -#endif #undef RANGECHECK @@ -135,6 +132,7 @@ void (*rt_tlateaddclamp4cols)(int sx, int yl, int yh); void (*rt_tlatesubclamp4cols)(int sx, int yl, int yh); void (*rt_tlaterevsubclamp4cols)(int sx, int yl, int yh); void (*rt_initcols)(BYTE *buffer); +void (*rt_span_coverage)(int x, int start, int stop); // // R_DrawColumn @@ -287,51 +285,6 @@ void R_DrawColumnP_C (void) } #endif -void R_DrawColumnP_RGBA_C() -{ - int count; - uint32_t* dest; - fixed_t frac; - fixed_t fracstep; - - count = dc_count; - - // Zero length, column does not exceed a pixel. - if (count <= 0) - return; - - // Framebuffer destination address. - dest = (uint32_t*)dc_dest; - - uint32_t light = calc_light_multiplier(dc_light); - ShadeConstants shade_constants = dc_shade_constants; - - // Determine scaling, - // which is the only mapping to be done. - fracstep = dc_iscale; - frac = dc_texturefrac; - - { - // [RH] Get local copies of these variables so that the compiler - // has a better chance of optimizing this well. - BYTE *colormap = dc_colormap; - const BYTE *source = dc_source; - int pitch = dc_pitch; - - // Inner loop that does the actual texture mapping, - // e.g. a DDA-lile scaling. - // This is as fast as it gets. - do - { - *dest = shade_pal_index(source[frac >> FRACBITS], light, shade_constants); - - dest += pitch; - frac += fracstep; - - } while (--count); - } -} - // [RH] Just fills a column with a color void R_FillColumnP_C (void) { @@ -357,32 +310,6 @@ void R_FillColumnP_C (void) } } -void R_FillColumnP_RGBA() -{ - int count; - uint32_t* dest; - - count = dc_count; - - if (count <= 0) - return; - - dest = (uint32_t*)dc_dest; - - uint32_t light = calc_light_multiplier(dc_light); - - { - int pitch = dc_pitch; - BYTE color = dc_color; - - do - { - *dest = shade_pal_index_simple(color, light); - dest += pitch; - } while (--count); - } -} - void R_FillAddColumn_C (void) { int count; @@ -410,38 +337,6 @@ void R_FillAddColumn_C (void) } while (--count); } -void R_FillAddColumn_RGBA_C() -{ - int count; - uint32_t *dest; - - count = dc_count; - if (count <= 0) - return; - - dest = (uint32_t*)dc_dest; - int pitch = dc_pitch; - - uint32_t fg = shade_pal_index_simple(dc_color, calc_light_multiplier(dc_light)); - uint32_t fg_red = (fg >> 24) & 0xff; - uint32_t fg_green = (fg >> 16) & 0xff; - uint32_t fg_blue = fg & 0xff; - - do - { - uint32_t bg_red = (*dest >> 16) & 0xff; - uint32_t bg_green = (*dest >> 8) & 0xff; - uint32_t bg_blue = (*dest) & 0xff; - - uint32_t red = (fg_red + bg_red + 1) / 2; - uint32_t green = (fg_green + bg_green + 1) / 2; - uint32_t blue = (fg_blue + bg_blue + 1) / 2; - - *dest = 0xff000000 | (red << 16) | (green << 8) | blue; - dest += pitch; - } while (--count); -} - void R_FillAddClampColumn_C (void) { int count; @@ -475,38 +370,6 @@ void R_FillAddClampColumn_C (void) } while (--count); } -void R_FillAddClampColumn_RGBA() -{ - int count; - uint32_t *dest; - - count = dc_count; - if (count <= 0) - return; - - dest = (uint32_t*)dc_dest; - int pitch = dc_pitch; - - uint32_t fg = shade_pal_index_simple(dc_color, calc_light_multiplier(dc_light)); - uint32_t fg_red = (fg >> 24) & 0xff; - uint32_t fg_green = (fg >> 16) & 0xff; - uint32_t fg_blue = fg & 0xff; - - do - { - uint32_t bg_red = (*dest >> 16) & 0xff; - uint32_t bg_green = (*dest >> 8) & 0xff; - uint32_t bg_blue = (*dest) & 0xff; - - uint32_t red = clamp(fg_red + bg_red, 0, 255); - uint32_t green = clamp(fg_green + bg_green, 0, 255); - uint32_t blue = clamp(fg_blue + bg_blue, 0, 255); - - *dest = 0xff000000 | (red << 16) | (green << 8) | blue; - dest += pitch; - } while (--count); -} - void R_FillSubClampColumn_C (void) { int count; @@ -539,38 +402,6 @@ void R_FillSubClampColumn_C (void) } while (--count); } -void R_FillSubClampColumn_RGBA() -{ - int count; - uint32_t *dest; - - count = dc_count; - if (count <= 0) - return; - - dest = (uint32_t*)dc_dest; - int pitch = dc_pitch; - - uint32_t fg = shade_pal_index_simple(dc_color, calc_light_multiplier(dc_light)); - uint32_t fg_red = (fg >> 24) & 0xff; - uint32_t fg_green = (fg >> 16) & 0xff; - uint32_t fg_blue = fg & 0xff; - - do - { - uint32_t bg_red = (*dest >> 16) & 0xff; - uint32_t bg_green = (*dest >> 8) & 0xff; - uint32_t bg_blue = (*dest) & 0xff; - - uint32_t red = clamp(256 - fg_red + bg_red, 256, 256 + 255) - 255; - uint32_t green = clamp(256 - fg_green + bg_green, 256, 256 + 255) - 255; - uint32_t blue = clamp(256 - fg_blue + bg_blue, 256, 256 + 255) - 255; - - *dest = 0xff000000 | (red << 16) | (green << 8) | blue; - dest += pitch; - } while (--count); -} - void R_FillRevSubClampColumn_C (void) { int count; @@ -603,42 +434,9 @@ void R_FillRevSubClampColumn_C (void) } while (--count); } -void R_FillRevSubClampColumn_RGBA() -{ - int count; - uint32_t *dest; - - count = dc_count; - if (count <= 0) - return; - - dest = (uint32_t*)dc_dest; - int pitch = dc_pitch; - - uint32_t fg = shade_pal_index_simple(dc_color, calc_light_multiplier(dc_light)); - uint32_t fg_red = (fg >> 24) & 0xff; - uint32_t fg_green = (fg >> 16) & 0xff; - uint32_t fg_blue = fg & 0xff; - - do - { - uint32_t bg_red = (*dest >> 16) & 0xff; - uint32_t bg_green = (*dest >> 8) & 0xff; - uint32_t bg_blue = (*dest) & 0xff; - - uint32_t red = clamp(256 + fg_red - bg_red, 256, 256 + 255) - 255; - uint32_t green = clamp(256 + fg_green - bg_green, 256, 256 + 255) - 255; - uint32_t blue = clamp(256 + fg_blue - bg_blue, 256, 256 + 255) - 255; - - *dest = 0xff000000 | (red << 16) | (green << 8) | blue; - dest += pitch; - } while (--count); -} - // // Spectre/Invisibility. // -#define FUZZTABLE 50 extern "C" { @@ -754,105 +552,6 @@ void R_DrawFuzzColumnP_C (void) } #endif -void R_DrawFuzzColumnP_RGBA_C() -{ - int count; - uint32_t *dest; - - // Adjust borders. Low... - if (dc_yl == 0) - dc_yl = 1; - - // .. and high. - if (dc_yh > fuzzviewheight) - dc_yh = fuzzviewheight; - - count = dc_yh - dc_yl; - - // Zero length. - if (count < 0) - return; - - count++; - - dest = ylookup[dc_yl] + dc_x + (uint32_t*)dc_destorg; - - // Note: this implementation assumes this function is only used for the pinky shadow effect (i.e. no other fancy colormap than black) - // I'm not sure if this is really always the case or not. - - { - // [RH] Make local copies of global vars to try and improve - // the optimizations made by the compiler. - int pitch = dc_pitch; - int fuzz = fuzzpos; - int cnt; - - // [RH] Split this into three separate loops to minimize - // the number of times fuzzpos needs to be clamped. - if (fuzz) - { - cnt = MIN(FUZZTABLE - fuzz, count); - count -= cnt; - do - { - uint32_t bg = dest[fuzzoffset[fuzz++]]; - uint32_t bg_red = (bg >> 16) & 0xff; - uint32_t bg_green = (bg >> 8) & 0xff; - uint32_t bg_blue = (bg) & 0xff; - - uint32_t red = bg_red * 3 / 4; - uint32_t green = bg_green * 3 / 4; - uint32_t blue = bg_blue * 3 / 4; - - *dest = 0xff000000 | (red << 16) | (green << 8) | blue; - dest += pitch; - } while (--cnt); - } - if (fuzz == FUZZTABLE || count > 0) - { - while (count >= FUZZTABLE) - { - fuzz = 0; - cnt = FUZZTABLE; - count -= FUZZTABLE; - do - { - uint32_t bg = dest[fuzzoffset[fuzz++]]; - uint32_t bg_red = (bg >> 16) & 0xff; - uint32_t bg_green = (bg >> 8) & 0xff; - uint32_t bg_blue = (bg) & 0xff; - - uint32_t red = bg_red * 3 / 4; - uint32_t green = bg_green * 3 / 4; - uint32_t blue = bg_blue * 3 / 4; - - *dest = 0xff000000 | (red << 16) | (green << 8) | blue; - dest += pitch; - } while (--cnt); - } - fuzz = 0; - if (count > 0) - { - do - { - uint32_t bg = dest[fuzzoffset[fuzz++]]; - uint32_t bg_red = (bg >> 16) & 0xff; - uint32_t bg_green = (bg >> 8) & 0xff; - uint32_t bg_blue = (bg) & 0xff; - - uint32_t red = bg_red * 3 / 4; - uint32_t green = bg_green * 3 / 4; - uint32_t blue = bg_blue * 3 / 4; - - *dest = 0xff000000 | (red << 16) | (green << 8) | blue; - dest += pitch; - } while (--count); - } - } - fuzzpos = fuzz; - } -} - // // R_DrawTranlucentColumn // @@ -937,56 +636,6 @@ void R_DrawAddColumnP_C (void) } } -void R_DrawAddColumnP_RGBA_C() -{ - int count; - uint32_t *dest; - fixed_t frac; - fixed_t fracstep; - - count = dc_count; - if (count <= 0) - return; - - dest = (uint32_t*)dc_dest; - - fracstep = dc_iscale; - frac = dc_texturefrac; - - { - const BYTE *source = dc_source; - int pitch = dc_pitch; - BYTE *colormap = dc_colormap; - - uint32_t light = calc_light_multiplier(dc_light); - ShadeConstants shade_constants = dc_shade_constants; - - uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); - - do - { - uint32_t fg = shade_pal_index(source[frac >> FRACBITS], light, shade_constants); - - uint32_t fg_red = (fg >> 16) & 0xff; - uint32_t fg_green = (fg >> 8) & 0xff; - uint32_t fg_blue = fg & 0xff; - - uint32_t bg_red = (*dest >> 16) & 0xff; - uint32_t bg_green = (*dest >> 8) & 0xff; - uint32_t bg_blue = (*dest) & 0xff; - - uint32_t red = clamp((fg_red * fg_alpha + bg_red * bg_alpha) / 256, 0, 255); - uint32_t green = clamp((fg_green * fg_alpha + bg_green * bg_alpha) / 256, 0, 255); - uint32_t blue = clamp((fg_blue * fg_alpha + bg_blue * bg_alpha) / 256, 0, 255); - - *dest = 0xff000000 | (red << 16) | (green << 8) | blue; - dest += pitch; - frac += fracstep; - } while (--count); - } -} - // // R_DrawTranslatedColumn // Used to draw player sprites with the green colorramp mapped to others. @@ -1027,40 +676,6 @@ void R_DrawTranslatedColumnP_C (void) } } -void R_DrawTranslatedColumnP_RGBA_C() -{ - int count; - uint32_t* dest; - fixed_t frac; - fixed_t fracstep; - - count = dc_count; - if (count <= 0) - return; - - uint32_t light = calc_light_multiplier(dc_light); - ShadeConstants shade_constants = dc_shade_constants; - - dest = (uint32_t*)dc_dest; - - fracstep = dc_iscale; - frac = dc_texturefrac; - - { - // [RH] Local copies of global vars to improve compiler optimizations - BYTE *translation = dc_translation; - const BYTE *source = dc_source; - int pitch = dc_pitch; - - do - { - *dest = shade_pal_index(translation[source[frac >> FRACBITS]], light, shade_constants); - dest += pitch; - frac += fracstep; - } while (--count); - } -} - // Draw a column that is both translated and translucent void R_DrawTlatedAddColumnP_C() { @@ -1101,56 +716,6 @@ void R_DrawTlatedAddColumnP_C() } } -void R_DrawTlatedAddColumnP_RGBA_C() -{ - int count; - uint32_t *dest; - fixed_t frac; - fixed_t fracstep; - - count = dc_count; - if (count <= 0) - return; - - uint32_t light = calc_light_multiplier(dc_light); - ShadeConstants shade_constants = dc_shade_constants; - - dest = (uint32_t*)dc_dest; - - fracstep = dc_iscale; - frac = dc_texturefrac; - - { - BYTE *translation = dc_translation; - const BYTE *source = dc_source; - int pitch = dc_pitch; - - uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); - - do - { - uint32_t fg = shade_pal_index(translation[source[frac >> FRACBITS]], light, shade_constants); - - uint32_t fg_red = (fg >> 16) & 0xff; - uint32_t fg_green = (fg >> 8) & 0xff; - uint32_t fg_blue = fg & 0xff; - - uint32_t bg_red = (*dest >> 16) & 0xff; - uint32_t bg_green = (*dest >> 8) & 0xff; - uint32_t bg_blue = (*dest) & 0xff; - - uint32_t red = clamp((fg_red * fg_alpha + bg_red * bg_alpha) / 256, 0, 255); - uint32_t green = clamp((fg_green * fg_alpha + bg_green * bg_alpha) / 256, 0, 255); - uint32_t blue = clamp((fg_blue * fg_alpha + bg_blue * bg_alpha) / 256, 0, 255); - - *dest = 0xff000000 | (red << 16) | (green << 8) | blue; - dest += pitch; - frac += fracstep; - } while (--count); - } -} - // Draw a column whose "color" values are actually translucency // levels for a base color stored in dc_color. void R_DrawShadedColumnP_C (void) @@ -1188,52 +753,6 @@ void R_DrawShadedColumnP_C (void) } } -void R_DrawShadedColumnP_RGBA_C() -{ - int count; - uint32_t *dest; - fixed_t frac, fracstep; - - count = dc_count; - - if (count <= 0) - return; - - dest = (uint32_t*)dc_dest; - - fracstep = dc_iscale; - frac = dc_texturefrac; - - uint32_t fg = shade_pal_index_simple(dc_color, calc_light_multiplier(dc_light)); - uint32_t fg_red = (fg >> 16) & 0xff; - uint32_t fg_green = (fg >> 8) & 0xff; - uint32_t fg_blue = fg & 0xff; - - { - const BYTE *source = dc_source; - BYTE *colormap = dc_colormap; - int pitch = dc_pitch; - - do - { - DWORD alpha = clamp(colormap[source[frac >> FRACBITS]], 0, 64); - DWORD inv_alpha = 64 - alpha; - - uint32_t bg_red = (*dest >> 16) & 0xff; - uint32_t bg_green = (*dest >> 8) & 0xff; - uint32_t bg_blue = (*dest) & 0xff; - - uint32_t red = (fg_red * alpha + bg_red * inv_alpha) / 64; - uint32_t green = (fg_green * alpha + bg_green * inv_alpha) / 64; - uint32_t blue = (fg_blue * alpha + bg_blue * inv_alpha) / 64; - - *dest = 0xff000000 | (red << 16) | (green << 8) | blue; - dest += pitch; - frac += fracstep; - } while (--count); - } -} - // Add source to destination, clamping it to white void R_DrawAddClampColumnP_C () { @@ -1275,53 +794,6 @@ void R_DrawAddClampColumnP_C () } } -void R_DrawAddClampColumnP_RGBA_C() -{ - int count; - uint32_t *dest; - fixed_t frac; - fixed_t fracstep; - - count = dc_count; - if (count <= 0) - return; - - dest = (uint32_t*)dc_dest; - - fracstep = dc_iscale; - frac = dc_texturefrac; - - { - const BYTE *source = dc_source; - int pitch = dc_pitch; - uint32_t light = calc_light_multiplier(dc_light); - ShadeConstants shade_constants = dc_shade_constants; - - uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); - - do - { - uint32_t fg = shade_pal_index(source[frac >> FRACBITS], light, shade_constants); - uint32_t fg_red = (fg >> 16) & 0xff; - uint32_t fg_green = (fg >> 8) & 0xff; - uint32_t fg_blue = fg & 0xff; - - uint32_t bg_red = (*dest >> 16) & 0xff; - uint32_t bg_green = (*dest >> 8) & 0xff; - uint32_t bg_blue = (*dest) & 0xff; - - uint32_t red = clamp((fg_red * fg_alpha + bg_red * bg_alpha) / 256, 0, 255); - uint32_t green = clamp((fg_green * fg_alpha + bg_green * bg_alpha) / 256, 0, 255); - uint32_t blue = clamp((fg_blue * fg_alpha + bg_blue * bg_alpha) / 256, 0, 255); - - *dest = 0xff000000 | (red << 16) | (green << 8) | blue; - dest += pitch; - frac += fracstep; - } while (--count); - } -} - // Add translated source to destination, clamping it to white void R_DrawAddClampTranslatedColumnP_C () { @@ -1364,54 +836,6 @@ void R_DrawAddClampTranslatedColumnP_C () } } -void R_DrawAddClampTranslatedColumnP_RGBA_C() -{ - int count; - uint32_t *dest; - fixed_t frac; - fixed_t fracstep; - - count = dc_count; - if (count <= 0) - return; - - dest = (uint32_t*)dc_dest; - - fracstep = dc_iscale; - frac = dc_texturefrac; - - { - BYTE *translation = dc_translation; - const BYTE *source = dc_source; - int pitch = dc_pitch; - uint32_t light = calc_light_multiplier(dc_light); - ShadeConstants shade_constants = dc_shade_constants; - - uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); - - do - { - uint32_t fg = shade_pal_index(translation[source[frac >> FRACBITS]], light, shade_constants); - uint32_t fg_red = (fg >> 16) & 0xff; - uint32_t fg_green = (fg >> 8) & 0xff; - uint32_t fg_blue = fg & 0xff; - - uint32_t bg_red = (*dest >> 16) & 0xff; - uint32_t bg_green = (*dest >> 8) & 0xff; - uint32_t bg_blue = (*dest) & 0xff; - - uint32_t red = clamp((fg_red * fg_alpha + bg_red * bg_alpha) / 256, 0, 255); - uint32_t green = clamp((fg_green * fg_alpha + bg_green * bg_alpha) / 256, 0, 255); - uint32_t blue = clamp((fg_blue * fg_alpha + bg_blue * bg_alpha) / 256, 0, 255); - - *dest = 0xff000000 | (red << 16) | (green << 8) | blue; - dest += pitch; - frac += fracstep; - } while (--count); - } -} - // Subtract destination from source, clamping it to black void R_DrawSubClampColumnP_C () { @@ -1452,53 +876,6 @@ void R_DrawSubClampColumnP_C () } } -void R_DrawSubClampColumnP_RGBA_C() -{ - int count; - uint32_t *dest; - fixed_t frac; - fixed_t fracstep; - - count = dc_count; - if (count <= 0) - return; - - dest = (uint32_t*)dc_dest; - - fracstep = dc_iscale; - frac = dc_texturefrac; - - { - const BYTE *source = dc_source; - int pitch = dc_pitch; - uint32_t light = calc_light_multiplier(dc_light); - ShadeConstants shade_constants = dc_shade_constants; - - uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); - - do - { - uint32_t fg = shade_pal_index(source[frac >> FRACBITS], light, shade_constants); - uint32_t fg_red = (fg >> 16) & 0xff; - uint32_t fg_green = (fg >> 8) & 0xff; - uint32_t fg_blue = fg & 0xff; - - uint32_t bg_red = (*dest >> 16) & 0xff; - uint32_t bg_green = (*dest >> 8) & 0xff; - uint32_t bg_blue = (*dest) & 0xff; - - uint32_t red = clamp((0x10000 - fg_red * fg_alpha + bg_red * bg_alpha) / 256, 256, 256 + 255) - 256; - uint32_t green = clamp((0x10000 - fg_green * fg_alpha + bg_green * bg_alpha) / 256, 256, 256 + 255) - 256; - uint32_t blue = clamp((0x10000 - fg_blue * fg_alpha + bg_blue * bg_alpha) / 256, 256, 256 + 255) - 256; - - *dest = 0xff000000 | (red << 16) | (green << 8) | blue; - dest += pitch; - frac += fracstep; - } while (--count); - } -} - // Subtract destination from source, clamping it to black void R_DrawSubClampTranslatedColumnP_C () { @@ -1540,54 +917,6 @@ void R_DrawSubClampTranslatedColumnP_C () } } -void R_DrawSubClampTranslatedColumnP_RGBA_C() -{ - int count; - uint32_t *dest; - fixed_t frac; - fixed_t fracstep; - - count = dc_count; - if (count <= 0) - return; - - dest = (uint32_t*)dc_dest; - - fracstep = dc_iscale; - frac = dc_texturefrac; - - { - BYTE *translation = dc_translation; - const BYTE *source = dc_source; - int pitch = dc_pitch; - uint32_t light = calc_light_multiplier(dc_light); - ShadeConstants shade_constants = dc_shade_constants; - - uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); - - do - { - uint32_t fg = shade_pal_index(translation[source[frac >> FRACBITS]], light, shade_constants); - uint32_t fg_red = (fg >> 16) & 0xff; - uint32_t fg_green = (fg >> 8) & 0xff; - uint32_t fg_blue = fg & 0xff; - - uint32_t bg_red = (*dest >> 16) & 0xff; - uint32_t bg_green = (*dest >> 8) & 0xff; - uint32_t bg_blue = (*dest) & 0xff; - - uint32_t red = clamp((0x10000 - fg_red * fg_alpha + bg_red * bg_alpha) / 256, 256, 256 + 255) - 256; - uint32_t green = clamp((0x10000 - fg_green * fg_alpha + bg_green * bg_alpha) / 256, 256, 256 + 255) - 256; - uint32_t blue = clamp((0x10000 - fg_blue * fg_alpha + bg_blue * bg_alpha) / 256, 256, 256 + 255) - 256; - - *dest = 0xff000000 | (red << 16) | (green << 8) | blue; - dest += pitch; - frac += fracstep; - } while (--count); - } -} - // Subtract source from destination, clamping it to black void R_DrawRevSubClampColumnP_C () { @@ -1628,52 +957,6 @@ void R_DrawRevSubClampColumnP_C () } } -void R_DrawRevSubClampColumnP_RGBA_C() -{ - int count; - uint32_t *dest; - fixed_t frac; - fixed_t fracstep; - - count = dc_count; - if (count <= 0) - return; - - dest = (uint32_t*)dc_dest; - - fracstep = dc_iscale; - frac = dc_texturefrac; - - { - const BYTE *source = dc_source; - int pitch = dc_pitch; - uint32_t light = calc_light_multiplier(dc_light); - ShadeConstants shade_constants = dc_shade_constants; - uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); - - do - { - uint32_t fg = shade_pal_index(source[frac >> FRACBITS], light, shade_constants); - uint32_t fg_red = (fg >> 16) & 0xff; - uint32_t fg_green = (fg >> 8) & 0xff; - uint32_t fg_blue = fg & 0xff; - - uint32_t bg_red = (*dest >> 16) & 0xff; - uint32_t bg_green = (*dest >> 8) & 0xff; - uint32_t bg_blue = (*dest) & 0xff; - - uint32_t red = clamp((0x10000 + fg_red * fg_alpha - bg_red * bg_alpha) / 256, 256, 256 + 255) - 256; - uint32_t green = clamp((0x10000 + fg_green * fg_alpha - bg_green * bg_alpha) / 256, 256, 256 + 255) - 256; - uint32_t blue = clamp((0x10000 + fg_blue * fg_alpha - bg_blue * bg_alpha) / 256, 256, 256 + 255) - 256; - - *dest = 0xff000000 | (red << 16) | (green << 8) | blue; - dest += pitch; - frac += fracstep; - } while (--count); - } -} - // Subtract source from destination, clamping it to black void R_DrawRevSubClampTranslatedColumnP_C () { @@ -1715,55 +998,6 @@ void R_DrawRevSubClampTranslatedColumnP_C () } } -void R_DrawRevSubClampTranslatedColumnP_RGBA_C() -{ - int count; - uint32_t *dest; - fixed_t frac; - fixed_t fracstep; - - count = dc_count; - if (count <= 0) - return; - - dest = (uint32_t*)dc_dest; - - fracstep = dc_iscale; - frac = dc_texturefrac; - - { - BYTE *translation = dc_translation; - const BYTE *source = dc_source; - int pitch = dc_pitch; - uint32_t light = calc_light_multiplier(dc_light); - ShadeConstants shade_constants = dc_shade_constants; - - uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); - - do - { - uint32_t fg = shade_pal_index(translation[source[frac >> FRACBITS]], light, shade_constants); - uint32_t fg_red = (fg >> 16) & 0xff; - uint32_t fg_green = (fg >> 8) & 0xff; - uint32_t fg_blue = fg & 0xff; - - uint32_t bg_red = (*dest >> 16) & 0xff; - uint32_t bg_green = (*dest >> 8) & 0xff; - uint32_t bg_blue = (*dest) & 0xff; - - uint32_t red = clamp((0x10000 + fg_red * fg_alpha - bg_red * bg_alpha) / 256, 256, 256 + 255) - 256; - uint32_t green = clamp((0x10000 + fg_green * fg_alpha - bg_green * bg_alpha) / 256, 256, 256 + 255) - 256; - uint32_t blue = clamp((0x10000 + fg_blue * fg_alpha - bg_blue * bg_alpha) / 256, 256, 256 + 255) - 256; - - *dest = 0xff000000 | (red << 16) | (green << 8) | blue; - dest += pitch; - frac += fracstep; - } while (--count); - } -} - - // // R_DrawSpan // With DOOM style restrictions on view orientation, @@ -1957,233 +1191,6 @@ void R_DrawSpanP_C (void) } #endif -void R_DrawSpanP_RGBA_C() -{ - dsfixed_t xfrac; - dsfixed_t yfrac; - dsfixed_t xstep; - dsfixed_t ystep; - uint32_t* dest; - const BYTE* source = ds_source; - int count; - int spot; - -#ifdef RANGECHECK - if (ds_x2 < ds_x1 || ds_x1 < 0 - || ds_x2 >= screen->width || ds_y > screen->height) - { - I_Error("R_DrawSpan: %i to %i at %i", ds_x1, ds_x2, ds_y); - } - // dscount++; -#endif - - xfrac = ds_xfrac; - yfrac = ds_yfrac; - - dest = ylookup[ds_y] + ds_x1 + (uint32_t*)dc_destorg; - - count = ds_x2 - ds_x1 + 1; - - xstep = ds_xstep; - ystep = ds_ystep; - - uint32_t light = calc_light_multiplier(ds_light); - ShadeConstants shade_constants = ds_shade_constants; - - if (ds_xbits == 6 && ds_ybits == 6) - { - // 64x64 is the most common case by far, so special case it. - - do - { - // Current texture index in u,v. - spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); - - // Lookup pixel from flat texture tile - *dest++ = shade_pal_index(source[spot], light, shade_constants); - - // Next step in u,v. - xfrac += xstep; - yfrac += ystep; - } while (--count); - } - else - { - BYTE yshift = 32 - ds_ybits; - BYTE xshift = yshift - ds_xbits; - int xmask = ((1 << ds_xbits) - 1) << ds_ybits; - - do - { - // Current texture index in u,v. - spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); - - // Lookup pixel from flat texture tile - *dest++ = shade_pal_index(source[spot], light, shade_constants); - - // Next step in u,v. - xfrac += xstep; - yfrac += ystep; - } while (--count); - } -} - -#ifndef NO_SSE -void R_DrawSpanP_RGBA_SSE() -{ - dsfixed_t xfrac; - dsfixed_t yfrac; - dsfixed_t xstep; - dsfixed_t ystep; - uint32_t* dest; - const BYTE* source = ds_source; - int count; - int spot; - -#ifdef RANGECHECK - if (ds_x2 < ds_x1 || ds_x1 < 0 - || ds_x2 >= screen->width || ds_y > screen->height) - { - I_Error("R_DrawSpan: %i to %i at %i", ds_x1, ds_x2, ds_y); - } - // dscount++; -#endif - - xfrac = ds_xfrac; - yfrac = ds_yfrac; - - dest = ylookup[ds_y] + ds_x1 + (uint32_t*)dc_destorg; - - count = ds_x2 - ds_x1 + 1; - - xstep = ds_xstep; - ystep = ds_ystep; - - uint32_t light = calc_light_multiplier(ds_light); - ShadeConstants shade_constants = ds_shade_constants; - - if (ds_xbits == 6 && ds_ybits == 6) - { - // 64x64 is the most common case by far, so special case it. - - uint32_t *palette = (uint32_t*)GPalette.BaseColors; - - int sse_count = count / 4; - count -= sse_count * 4; - - if (shade_constants.simple_shade) - { - SSE_SHADE_SIMPLE_INIT(light); - - while (sse_count--) - { - // Current texture index in u,v. - spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); - uint32_t p0 = source[spot]; - xfrac += xstep; - yfrac += ystep; - - spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); - uint32_t p1 = source[spot]; - xfrac += xstep; - yfrac += ystep; - - spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); - uint32_t p2 = source[spot]; - xfrac += xstep; - yfrac += ystep; - - spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); - uint32_t p3 = source[spot]; - xfrac += xstep; - yfrac += ystep; - - // Lookup pixel from flat texture tile, - // re-index using light/colormap. - __m128i fg = _mm_set_epi32(palette[p3], palette[p2], palette[p1], palette[p0]); - SSE_SHADE_SIMPLE(fg); - _mm_storeu_si128((__m128i*)dest, fg); - - // Next step in u,v. - dest += 4; - } - } - else - { - SSE_SHADE_INIT(light, shade_constants); - - while (sse_count--) - { - // Current texture index in u,v. - spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); - uint32_t p0 = source[spot]; - xfrac += xstep; - yfrac += ystep; - - spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); - uint32_t p1 = source[spot]; - xfrac += xstep; - yfrac += ystep; - - spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); - uint32_t p2 = source[spot]; - xfrac += xstep; - yfrac += ystep; - - spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); - uint32_t p3 = source[spot]; - xfrac += xstep; - yfrac += ystep; - - // Lookup pixel from flat texture tile, - // re-index using light/colormap. - __m128i fg = _mm_set_epi32(palette[p3], palette[p2], palette[p1], palette[p0]); - SSE_SHADE(fg, shade_constants); - _mm_storeu_si128((__m128i*)dest, fg); - - // Next step in u,v. - dest += 4; - } - } - - if (count == 0) - return; - - do - { - // Current texture index in u,v. - spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); - - // Lookup pixel from flat texture tile - *dest++ = shade_pal_index(source[spot], light, shade_constants); - - // Next step in u,v. - xfrac += xstep; - yfrac += ystep; - } while (--count); - } - else - { - BYTE yshift = 32 - ds_ybits; - BYTE xshift = yshift - ds_xbits; - int xmask = ((1 << ds_xbits) - 1) << ds_ybits; - - do - { - // Current texture index in u,v. - spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); - - // Lookup pixel from flat texture tile - *dest++ = shade_pal_index(source[spot], light, shade_constants); - - // Next step in u,v. - xfrac += xstep; - yfrac += ystep; - } while (--count); - } -} -#endif - #ifndef X86_ASM // [RH] Draw a span with holes @@ -2250,72 +1257,6 @@ void R_DrawSpanMaskedP_C (void) } #endif -void R_DrawSpanMaskedP_RGBA_C() -{ - dsfixed_t xfrac; - dsfixed_t yfrac; - dsfixed_t xstep; - dsfixed_t ystep; - uint32_t* dest; - const BYTE* source = ds_source; - const BYTE* colormap = ds_colormap; - int count; - int spot; - - uint32_t light = calc_light_multiplier(ds_light); - ShadeConstants shade_constants = ds_shade_constants; - - xfrac = ds_xfrac; - yfrac = ds_yfrac; - - dest = ylookup[ds_y] + ds_x1 + (uint32_t*)dc_destorg; - - count = ds_x2 - ds_x1 + 1; - - xstep = ds_xstep; - ystep = ds_ystep; - - if (ds_xbits == 6 && ds_ybits == 6) - { - // 64x64 is the most common case by far, so special case it. - do - { - BYTE texdata; - - spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); - texdata = source[spot]; - if (texdata != 0) - { - *dest = shade_pal_index(texdata, light, shade_constants); - } - dest++; - xfrac += xstep; - yfrac += ystep; - } while (--count); - } - else - { - BYTE yshift = 32 - ds_ybits; - BYTE xshift = yshift - ds_xbits; - int xmask = ((1 << ds_xbits) - 1) << ds_ybits; - do - { - BYTE texdata; - - spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); - texdata = source[spot]; - if (texdata != 0) - { - *dest = shade_pal_index(texdata, light, shade_constants); - } - dest++; - xfrac += xstep; - yfrac += ystep; - } while (--count); - } -} - - void R_DrawSpanTranslucentP_C (void) { dsfixed_t xfrac; @@ -2378,89 +1319,6 @@ void R_DrawSpanTranslucentP_C (void) } } -void R_DrawSpanTranslucentP_RGBA_C() -{ - dsfixed_t xfrac; - dsfixed_t yfrac; - dsfixed_t xstep; - dsfixed_t ystep; - uint32_t* dest; - const BYTE* source = ds_source; - int count; - int spot; - - xfrac = ds_xfrac; - yfrac = ds_yfrac; - - dest = ylookup[ds_y] + ds_x1 + (uint32_t*)dc_destorg; - - count = ds_x2 - ds_x1 + 1; - - xstep = ds_xstep; - ystep = ds_ystep; - - uint32_t light = calc_light_multiplier(ds_light); - ShadeConstants shade_constants = ds_shade_constants; - - uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); - - if (ds_xbits == 6 && ds_ybits == 6) - { - // 64x64 is the most common case by far, so special case it. - do - { - spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); - - uint32_t fg = shade_pal_index(source[spot], light, shade_constants); - uint32_t fg_red = (fg >> 16) & 0xff; - uint32_t fg_green = (fg >> 8) & 0xff; - uint32_t fg_blue = (fg) & 0xff; - - uint32_t bg_red = (*dest >> 16) & 0xff; - uint32_t bg_green = (*dest >> 8) & 0xff; - uint32_t bg_blue = (*dest) & 0xff; - - uint32_t red = (fg_red * fg_alpha + bg_red * bg_alpha) / 256; - uint32_t green = (fg_green * fg_alpha + bg_green * bg_alpha) / 256; - uint32_t blue = (fg_blue * fg_alpha + bg_blue * bg_alpha) / 256; - - *dest++ = 0xff000000 | (red << 16) | (green << 8) | blue; - - xfrac += xstep; - yfrac += ystep; - } while (--count); - } - else - { - BYTE yshift = 32 - ds_ybits; - BYTE xshift = yshift - ds_xbits; - int xmask = ((1 << ds_xbits) - 1) << ds_ybits; - do - { - spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); - - uint32_t fg = shade_pal_index(source[spot], light, shade_constants); - uint32_t fg_red = (fg >> 16) & 0xff; - uint32_t fg_green = (fg >> 8) & 0xff; - uint32_t fg_blue = (fg) & 0xff; - - uint32_t bg_red = (*dest >> 16) & 0xff; - uint32_t bg_green = (*dest >> 8) & 0xff; - uint32_t bg_blue = (*dest) & 0xff; - - uint32_t red = (fg_red * fg_alpha + bg_red * bg_alpha) / 256; - uint32_t green = (fg_green * fg_alpha + bg_green * bg_alpha) / 256; - uint32_t blue = (fg_blue * fg_alpha + bg_blue * bg_alpha) / 256; - - *dest++ = 0xff000000 | (red << 16) | (green << 8) | blue; - - xfrac += xstep; - yfrac += ystep; - } while (--count); - } -} - void R_DrawSpanMaskedTranslucentP_C (void) { dsfixed_t xfrac; @@ -2537,99 +1395,6 @@ void R_DrawSpanMaskedTranslucentP_C (void) } } -void R_DrawSpanMaskedTranslucentP_RGBA_C() -{ - dsfixed_t xfrac; - dsfixed_t yfrac; - dsfixed_t xstep; - dsfixed_t ystep; - uint32_t* dest; - const BYTE* source = ds_source; - int count; - int spot; - - uint32_t light = calc_light_multiplier(ds_light); - ShadeConstants shade_constants = ds_shade_constants; - - uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); - - xfrac = ds_xfrac; - yfrac = ds_yfrac; - - dest = ylookup[ds_y] + ds_x1 + (uint32_t*)dc_destorg; - - count = ds_x2 - ds_x1 + 1; - - xstep = ds_xstep; - ystep = ds_ystep; - - if (ds_xbits == 6 && ds_ybits == 6) - { - // 64x64 is the most common case by far, so special case it. - do - { - BYTE texdata; - - spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); - texdata = source[spot]; - if (texdata != 0) - { - uint32_t fg = shade_pal_index(texdata, light, shade_constants); - uint32_t fg_red = (fg >> 16) & 0xff; - uint32_t fg_green = (fg >> 8) & 0xff; - uint32_t fg_blue = (fg) & 0xff; - - uint32_t bg_red = (*dest >> 16) & 0xff; - uint32_t bg_green = (*dest >> 8) & 0xff; - uint32_t bg_blue = (*dest) & 0xff; - - uint32_t red = (fg_red * fg_alpha + bg_red * bg_alpha) / 256; - uint32_t green = (fg_green * fg_alpha + bg_green * bg_alpha) / 256; - uint32_t blue = (fg_blue * fg_alpha + bg_blue * bg_alpha) / 256; - - *dest = 0xff000000 | (red << 16) | (green << 8) | blue; - } - dest++; - xfrac += xstep; - yfrac += ystep; - } while (--count); - } - else - { - BYTE yshift = 32 - ds_ybits; - BYTE xshift = yshift - ds_xbits; - int xmask = ((1 << ds_xbits) - 1) << ds_ybits; - do - { - BYTE texdata; - - spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); - texdata = source[spot]; - if (texdata != 0) - { - uint32_t fg = shade_pal_index(texdata, light, shade_constants); - uint32_t fg_red = (fg >> 16) & 0xff; - uint32_t fg_green = (fg >> 8) & 0xff; - uint32_t fg_blue = (fg) & 0xff; - - uint32_t bg_red = (*dest >> 16) & 0xff; - uint32_t bg_green = (*dest >> 8) & 0xff; - uint32_t bg_blue = (*dest) & 0xff; - - uint32_t red = (fg_red * fg_alpha + bg_red * bg_alpha) / 256; - uint32_t green = (fg_green * fg_alpha + bg_green * bg_alpha) / 256; - uint32_t blue = (fg_blue * fg_alpha + bg_blue * bg_alpha) / 256; - - *dest = 0xff000000 | (red << 16) | (green << 8) | blue; - } - dest++; - xfrac += xstep; - yfrac += ystep; - } while (--count); - } -} - void R_DrawSpanAddClampP_C (void) { dsfixed_t xfrac; @@ -2700,88 +1465,6 @@ void R_DrawSpanAddClampP_C (void) } } -void R_DrawSpanAddClampP_RGBA_C() -{ - dsfixed_t xfrac; - dsfixed_t yfrac; - dsfixed_t xstep; - dsfixed_t ystep; - uint32_t* dest; - const BYTE* source = ds_source; - int count; - int spot; - - uint32_t light = calc_light_multiplier(ds_light); - ShadeConstants shade_constants = ds_shade_constants; - - uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); - - xfrac = ds_xfrac; - yfrac = ds_yfrac; - - dest = ylookup[ds_y] + ds_x1 + (uint32_t*)dc_destorg; - - count = ds_x2 - ds_x1 + 1; - - xstep = ds_xstep; - ystep = ds_ystep; - - if (ds_xbits == 6 && ds_ybits == 6) - { - // 64x64 is the most common case by far, so special case it. - do - { - spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); - - uint32_t fg = shade_pal_index(source[spot], light, shade_constants); - uint32_t fg_red = (fg >> 16) & 0xff; - uint32_t fg_green = (fg >> 8) & 0xff; - uint32_t fg_blue = (fg) & 0xff; - - uint32_t bg_red = (*dest >> 16) & 0xff; - uint32_t bg_green = (*dest >> 8) & 0xff; - uint32_t bg_blue = (*dest) & 0xff; - - uint32_t red = clamp((fg_red * fg_alpha + bg_red * bg_alpha) / 256, 0, 255); - uint32_t green = clamp((fg_green * fg_alpha + bg_green * bg_alpha) / 256, 0, 255); - uint32_t blue = clamp((fg_blue * fg_alpha + bg_blue * bg_alpha) / 256, 0, 255); - - *dest++ = 0xff000000 | (red << 16) | (green << 8) | blue; - - xfrac += xstep; - yfrac += ystep; - } while (--count); - } - else - { - BYTE yshift = 32 - ds_ybits; - BYTE xshift = yshift - ds_xbits; - int xmask = ((1 << ds_xbits) - 1) << ds_ybits; - do - { - spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); - - uint32_t fg = shade_pal_index(source[spot], light, shade_constants); - uint32_t fg_red = (fg >> 16) & 0xff; - uint32_t fg_green = (fg >> 8) & 0xff; - uint32_t fg_blue = (fg) & 0xff; - - uint32_t bg_red = (*dest >> 16) & 0xff; - uint32_t bg_green = (*dest >> 8) & 0xff; - uint32_t bg_blue = (*dest) & 0xff; - - uint32_t red = clamp((fg_red * fg_alpha + bg_red * bg_alpha) / 256, 0, 255); - uint32_t green = clamp((fg_green * fg_alpha + bg_green * bg_alpha) / 256, 0, 255); - uint32_t blue = clamp((fg_blue * fg_alpha + bg_blue * bg_alpha) / 256, 0, 255); - - *dest++ = 0xff000000 | (red << 16) | (green << 8) | blue; - - xfrac += xstep; - yfrac += ystep; - } while (--count); - } -} void R_DrawSpanMaskedAddClampP_C (void) { @@ -2865,114 +1548,12 @@ void R_DrawSpanMaskedAddClampP_C (void) } } -void R_DrawSpanMaskedAddClampP_RGBA_C() -{ - dsfixed_t xfrac; - dsfixed_t yfrac; - dsfixed_t xstep; - dsfixed_t ystep; - uint32_t* dest; - const BYTE* source = ds_source; - int count; - int spot; - - uint32_t light = calc_light_multiplier(ds_light); - ShadeConstants shade_constants = ds_shade_constants; - - uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); - - xfrac = ds_xfrac; - yfrac = ds_yfrac; - - dest = ylookup[ds_y] + ds_x1 + (uint32_t*)dc_destorg; - - count = ds_x2 - ds_x1 + 1; - - xstep = ds_xstep; - ystep = ds_ystep; - - if (ds_xbits == 6 && ds_ybits == 6) - { - // 64x64 is the most common case by far, so special case it. - do - { - BYTE texdata; - - spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); - texdata = source[spot]; - if (texdata != 0) - { - uint32_t fg = shade_pal_index(texdata, light, shade_constants); - uint32_t fg_red = (fg >> 16) & 0xff; - uint32_t fg_green = (fg >> 8) & 0xff; - uint32_t fg_blue = (fg) & 0xff; - - uint32_t bg_red = (*dest >> 16) & 0xff; - uint32_t bg_green = (*dest >> 8) & 0xff; - uint32_t bg_blue = (*dest) & 0xff; - - uint32_t red = (fg_red * fg_alpha + bg_red * bg_alpha) / 256; - uint32_t green = (fg_green * fg_alpha + bg_green * bg_alpha) / 256; - uint32_t blue = (fg_blue * fg_alpha + bg_blue * bg_alpha) / 256; - - *dest = 0xff000000 | (red << 16) | (green << 8) | blue; - } - dest++; - xfrac += xstep; - yfrac += ystep; - } while (--count); - } - else - { - BYTE yshift = 32 - ds_ybits; - BYTE xshift = yshift - ds_xbits; - int xmask = ((1 << ds_xbits) - 1) << ds_ybits; - do - { - BYTE texdata; - - spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); - texdata = source[spot]; - if (texdata != 0) - { - uint32_t fg = shade_pal_index(texdata, light, shade_constants); - uint32_t fg_red = (fg >> 16) & 0xff; - uint32_t fg_green = (fg >> 8) & 0xff; - uint32_t fg_blue = (fg) & 0xff; - - uint32_t bg_red = (*dest >> 16) & 0xff; - uint32_t bg_green = (*dest >> 8) & 0xff; - uint32_t bg_blue = (*dest) & 0xff; - - uint32_t red = (fg_red * fg_alpha + bg_red * bg_alpha) / 256; - uint32_t green = (fg_green * fg_alpha + bg_green * bg_alpha) / 256; - uint32_t blue = (fg_blue * fg_alpha + bg_blue * bg_alpha) / 256; - - *dest = 0xff000000 | (red << 16) | (green << 8) | blue; - } - dest++; - xfrac += xstep; - yfrac += ystep; - } while (--count); - } -} - // [RH] Just fill a span with a color void R_FillSpan_C (void) { memset (ylookup[ds_y] + ds_x1 + dc_destorg, ds_color, (ds_x2 - ds_x1 + 1)); } -void R_FillSpan_RGBA() -{ - uint32_t *dest = ylookup[ds_y] + ds_x1 + (uint32_t*)dc_destorg; - int count = (ds_x2 - ds_x1 + 1); - uint32_t light = calc_light_multiplier(ds_light); - uint32_t color = shade_pal_index_simple(ds_color, light); - for (int i = 0; i < count; i++) - dest[i] = color; -} // Draw a voxel slab // @@ -3070,8 +1651,8 @@ extern "C" void R_DrawSlabC(int dx, fixed_t v, int dy, fixed_t vi, const BYTE *v // wallscan stuff, in C -static int vlinebits; -static int mvlinebits; +int vlinebits; +int mvlinebits; #ifndef X86_ASM static DWORD vlinec1 (); @@ -3186,29 +1767,6 @@ DWORD vlinec1 () } #endif -DWORD vlinec1_RGBA() -{ - DWORD fracstep = dc_iscale; - DWORD frac = dc_texturefrac; - int count = dc_count; - const BYTE *source = dc_source; - uint32_t *dest = (uint32_t*)dc_dest; - int bits = vlinebits; - int pitch = dc_pitch; - - uint32_t light = calc_light_multiplier(dc_light); - ShadeConstants shade_constants = dc_shade_constants; - - do - { - *dest = shade_pal_index(source[frac >> bits], light, shade_constants); - frac += fracstep; - dest += pitch; - } while (--count); - - return frac; -} - #if !defined(X86_ASM) void vlinec4 () { @@ -3228,113 +1786,6 @@ void vlinec4 () } #endif -void vlinec4_RGBA() -{ - uint32_t *dest = (uint32_t*)dc_dest; - int count = dc_count; - int bits = vlinebits; - DWORD place; - - uint32_t light0 = calc_light_multiplier(palookuplight[0]); - uint32_t light1 = calc_light_multiplier(palookuplight[1]); - uint32_t light2 = calc_light_multiplier(palookuplight[2]); - uint32_t light3 = calc_light_multiplier(palookuplight[3]); - - ShadeConstants shade_constants = dc_shade_constants; - - do - { - dest[0] = shade_pal_index(bufplce[0][(place = vplce[0]) >> bits], light0, shade_constants); vplce[0] = place + vince[0]; - dest[1] = shade_pal_index(bufplce[1][(place = vplce[1]) >> bits], light1, shade_constants); vplce[1] = place + vince[1]; - dest[2] = shade_pal_index(bufplce[2][(place = vplce[2]) >> bits], light2, shade_constants); vplce[2] = place + vince[2]; - dest[3] = shade_pal_index(bufplce[3][(place = vplce[3]) >> bits], light3, shade_constants); vplce[3] = place + vince[3]; - dest += dc_pitch; - } while (--count); -} - -#ifndef NO_SSE -void vlinec4_RGBA_SSE() -{ - uint32_t *dest = (uint32_t*)dc_dest; - int count = dc_count; - int bits = vlinebits; - - uint32_t light0 = calc_light_multiplier(palookuplight[0]); - uint32_t light1 = calc_light_multiplier(palookuplight[1]); - uint32_t light2 = calc_light_multiplier(palookuplight[2]); - uint32_t light3 = calc_light_multiplier(palookuplight[3]); - - ShadeConstants shade_constants = dc_shade_constants; - - uint32_t *palette = (uint32_t*)GPalette.BaseColors; - DWORD local_vplce[4] = { vplce[0], vplce[1], vplce[2], vplce[3] }; - DWORD local_vince[4] = { vince[0], vince[1], vince[2], vince[3] }; - - if (shade_constants.simple_shade) - { - SSE_SHADE_SIMPLE_INIT4(light3, light2, light1, light0); - do - { - DWORD place0 = local_vplce[0]; - DWORD place1 = local_vplce[1]; - DWORD place2 = local_vplce[2]; - DWORD place3 = local_vplce[3]; - - BYTE p0 = bufplce[0][place0 >> bits]; - BYTE p1 = bufplce[1][place1 >> bits]; - BYTE p2 = bufplce[2][place2 >> bits]; - BYTE p3 = bufplce[3][place3 >> bits]; - - local_vplce[0] = place0 + local_vince[0]; - local_vplce[1] = place1 + local_vince[1]; - local_vplce[2] = place2 + local_vince[2]; - local_vplce[3] = place3 + local_vince[3]; - - __m128i fg = _mm_set_epi32(palette[p3], palette[p2], palette[p1], palette[p0]); - SSE_SHADE_SIMPLE(fg); - _mm_storeu_si128((__m128i*)dest, fg); - dest += dc_pitch; - } while (--count); - } - else - { - SSE_SHADE_INIT4(light3, light2, light1, light0, shade_constants); - do - { - DWORD place0 = local_vplce[0]; - DWORD place1 = local_vplce[1]; - DWORD place2 = local_vplce[2]; - DWORD place3 = local_vplce[3]; - - BYTE p0 = bufplce[0][place0 >> bits]; - BYTE p1 = bufplce[1][place1 >> bits]; - BYTE p2 = bufplce[2][place2 >> bits]; - BYTE p3 = bufplce[3][place3 >> bits]; - - local_vplce[0] = place0 + local_vince[0]; - local_vplce[1] = place1 + local_vince[1]; - local_vplce[2] = place2 + local_vince[2]; - local_vplce[3] = place3 + local_vince[3]; - - __m128i fg = _mm_set_epi32(palette[p3], palette[p2], palette[p1], palette[p0]); - SSE_SHADE(fg, shade_constants); - _mm_storeu_si128((__m128i*)dest, fg); - dest += dc_pitch; - } while (--count); - } - - // Is this needed? Global variables makes it tricky to know.. - vplce[0] = local_vplce[0]; - vplce[1] = local_vplce[1]; - vplce[2] = local_vplce[2]; - vplce[3] = local_vplce[3]; - vince[0] = local_vince[0]; - vince[1] = local_vince[1]; - vince[2] = local_vince[2]; - vince[3] = local_vince[3]; -} -#endif - void setupmvline (int fracbits) { if (!r_swtruecolor) @@ -3380,34 +1831,6 @@ DWORD mvlinec1 () } #endif -DWORD mvlinec1_RGBA() -{ - DWORD fracstep = dc_iscale; - DWORD frac = dc_texturefrac; - BYTE *colormap = dc_colormap; - int count = dc_count; - const BYTE *source = dc_source; - uint32_t *dest = (uint32_t*)dc_dest; - int bits = mvlinebits; - int pitch = dc_pitch; - - uint32_t light = calc_light_multiplier(dc_light); - ShadeConstants shade_constants = dc_shade_constants; - - do - { - BYTE pix = source[frac >> bits]; - if (pix != 0) - { - *dest = shade_pal_index(pix, light, shade_constants); - } - frac += fracstep; - dest += pitch; - } while (--count); - - return frac; -} - #if !defined(X86_ASM) void mvlinec4 () { @@ -3428,121 +1851,6 @@ void mvlinec4 () } #endif -void mvlinec4_RGBA() -{ - uint32_t *dest = (uint32_t*)dc_dest; - int count = dc_count; - int bits = mvlinebits; - DWORD place; - - uint32_t light0 = calc_light_multiplier(palookuplight[0]); - uint32_t light1 = calc_light_multiplier(palookuplight[1]); - uint32_t light2 = calc_light_multiplier(palookuplight[2]); - uint32_t light3 = calc_light_multiplier(palookuplight[3]); - - ShadeConstants shade_constants = dc_shade_constants; - - do - { - BYTE pix; - pix = bufplce[0][(place = vplce[0]) >> bits]; if (pix) dest[0] = shade_pal_index(pix, light0, shade_constants); vplce[0] = place + vince[0]; - pix = bufplce[1][(place = vplce[1]) >> bits]; if (pix) dest[1] = shade_pal_index(pix, light1, shade_constants); vplce[1] = place + vince[1]; - pix = bufplce[2][(place = vplce[2]) >> bits]; if (pix) dest[2] = shade_pal_index(pix, light2, shade_constants); vplce[2] = place + vince[2]; - pix = bufplce[3][(place = vplce[3]) >> bits]; if (pix) dest[3] = shade_pal_index(pix, light3, shade_constants); vplce[3] = place + vince[3]; - dest += dc_pitch; - } while (--count); -} - -#ifndef NO_SSE -void mvlinec4_RGBA_SSE() -{ - uint32_t *dest = (uint32_t*)dc_dest; - int count = dc_count; - int bits = vlinebits; - - uint32_t light0 = calc_light_multiplier(palookuplight[0]); - uint32_t light1 = calc_light_multiplier(palookuplight[1]); - uint32_t light2 = calc_light_multiplier(palookuplight[2]); - uint32_t light3 = calc_light_multiplier(palookuplight[3]); - - ShadeConstants shade_constants = dc_shade_constants; - - uint32_t *palette = (uint32_t*)GPalette.BaseColors; - DWORD local_vplce[4] = { vplce[0], vplce[1], vplce[2], vplce[3] }; - DWORD local_vince[4] = { vince[0], vince[1], vince[2], vince[3] }; - - if (shade_constants.simple_shade) - { - SSE_SHADE_SIMPLE_INIT4(light3, light2, light1, light0); - do - { - DWORD place0 = local_vplce[0]; - DWORD place1 = local_vplce[1]; - DWORD place2 = local_vplce[2]; - DWORD place3 = local_vplce[3]; - - BYTE pix0 = bufplce[0][place0 >> bits]; - BYTE pix1 = bufplce[1][place1 >> bits]; - BYTE pix2 = bufplce[2][place2 >> bits]; - BYTE pix3 = bufplce[3][place3 >> bits]; - - // movemask = !(pix == 0) - __m128i movemask = _mm_xor_si128(_mm_cmpeq_epi32(_mm_set_epi32(pix3, pix2, pix1, pix0), _mm_setzero_si128()), _mm_cmpeq_epi32(_mm_setzero_si128(), _mm_setzero_si128())); - - local_vplce[0] = place0 + local_vince[0]; - local_vplce[1] = place1 + local_vince[1]; - local_vplce[2] = place2 + local_vince[2]; - local_vplce[3] = place3 + local_vince[3]; - - __m128i fg = _mm_set_epi32(palette[pix3], palette[pix2], palette[pix1], palette[pix0]); - SSE_SHADE_SIMPLE(fg); - _mm_maskmoveu_si128(fg, movemask, (char*)dest); - dest += dc_pitch; - } while (--count); - } - else - { - SSE_SHADE_INIT4(light3, light2, light1, light0, shade_constants); - do - { - DWORD place0 = local_vplce[0]; - DWORD place1 = local_vplce[1]; - DWORD place2 = local_vplce[2]; - DWORD place3 = local_vplce[3]; - - BYTE pix0 = bufplce[0][place0 >> bits]; - BYTE pix1 = bufplce[1][place1 >> bits]; - BYTE pix2 = bufplce[2][place2 >> bits]; - BYTE pix3 = bufplce[3][place3 >> bits]; - - // movemask = !(pix == 0) - __m128i movemask = _mm_xor_si128(_mm_cmpeq_epi32(_mm_set_epi32(pix3, pix2, pix1, pix0), _mm_setzero_si128()), _mm_cmpeq_epi32(_mm_setzero_si128(), _mm_setzero_si128())); - - local_vplce[0] = place0 + local_vince[0]; - local_vplce[1] = place1 + local_vince[1]; - local_vplce[2] = place2 + local_vince[2]; - local_vplce[3] = place3 + local_vince[3]; - - __m128i fg = _mm_set_epi32(palette[pix3], palette[pix2], palette[pix1], palette[pix0]); - SSE_SHADE(fg, shade_constants); - _mm_maskmoveu_si128(fg, movemask, (char*)dest); - dest += dc_pitch; - } while (--count); - } - - // Is this needed? Global variables makes it tricky to know.. - vplce[0] = local_vplce[0]; - vplce[1] = local_vplce[1]; - vplce[2] = local_vplce[2]; - vplce[3] = local_vplce[3]; - vince[0] = local_vince[0]; - vince[1] = local_vince[1]; - vince[2] = local_vince[2]; - vince[3] = local_vince[3]; -} -#endif - - extern "C" short spanend[MAXHEIGHT]; extern float rw_light; extern float rw_lightstep; @@ -3666,196 +1974,6 @@ void R_DrawFogBoundary_C (int x1, int x2, short *uclip, short *dclip) } } -static void R_DrawFogBoundarySection_RGBA(int y, int y2, int x1) -{ - BYTE *colormap = dc_colormap; - uint32_t *dest = ylookup[y] + (uint32_t*)dc_destorg; - - uint32_t light = calc_light_multiplier(dc_light); - ShadeConstants constants = dc_shade_constants; - - for (; y < y2; ++y) - { - int x2 = spanend[y]; - int x = x1; - do - { - uint32_t red = (dest[x] >> 16) & 0xff; - uint32_t green = (dest[x] >> 8) & 0xff; - uint32_t blue = dest[x] & 0xff; - - if (constants.simple_shade) - { - red = red * light / 256; - green = green * light / 256; - blue = blue * light / 256; - } - else - { - uint32_t inv_light = 256 - light; - uint32_t inv_desaturate = 256 - constants.desaturate; - - uint32_t intensity = ((red * 77 + green * 143 + blue * 37) >> 8) * constants.desaturate; - - red = (red * inv_desaturate + intensity) / 256; - green = (green * inv_desaturate + intensity) / 256; - blue = (blue * inv_desaturate + intensity) / 256; - - red = (constants.fade_red * inv_light + red * light) / 256; - green = (constants.fade_green * inv_light + green * light) / 256; - blue = (constants.fade_blue * inv_light + blue * light) / 256; - - red = (red * constants.light_red) / 256; - green = (green * constants.light_green) / 256; - blue = (blue * constants.light_blue) / 256; - } - - dest[x] = 0xff000000 | (red << 16) | (green << 8) | blue; - } while (++x <= x2); - dest += dc_pitch; - } -} - -static void R_DrawFogBoundaryLine_RGBA(int y, int x) -{ - int x2 = spanend[y]; - BYTE *colormap = dc_colormap; - uint32_t *dest = ylookup[y] + (uint32_t*)dc_destorg; - - uint32_t light = calc_light_multiplier(dc_light); - ShadeConstants constants = dc_shade_constants; - - do - { - uint32_t red = (dest[x] >> 16) & 0xff; - uint32_t green = (dest[x] >> 8) & 0xff; - uint32_t blue = dest[x] & 0xff; - - if (constants.simple_shade) - { - red = red * light / 256; - green = green * light / 256; - blue = blue * light / 256; - } - else - { - uint32_t inv_light = 256 - light; - uint32_t inv_desaturate = 256 - constants.desaturate; - - uint32_t intensity = ((red * 77 + green * 143 + blue * 37) >> 8) * constants.desaturate; - - red = (red * inv_desaturate + intensity) / 256; - green = (green * inv_desaturate + intensity) / 256; - blue = (blue * inv_desaturate + intensity) / 256; - - red = (constants.fade_red * inv_light + red * light) / 256; - green = (constants.fade_green * inv_light + green * light) / 256; - blue = (constants.fade_blue * inv_light + blue * light) / 256; - - red = (red * constants.light_red) / 256; - green = (green * constants.light_green) / 256; - blue = (blue * constants.light_blue) / 256; - } - - dest[x] = 0xff000000 | (red << 16) | (green << 8) | blue; - } while (++x <= x2); -} - -void R_DrawFogBoundary_RGBA(int x1, int x2, short *uclip, short *dclip) -{ - // To do: we do not need to create new spans when using rgba output - instead we should calculate light on a per pixel basis - - // This is essentially the same as R_MapVisPlane but with an extra step - // to create new horizontal spans whenever the light changes enough that - // we need to use a new colormap. - - double lightstep = rw_lightstep; - double light = rw_light + rw_lightstep*(x2 - x1 - 1); - int x = x2 - 1; - int t2 = uclip[x]; - int b2 = dclip[x]; - int rcolormap = GETPALOOKUP(light, wallshade); - int lcolormap; - BYTE *basecolormapdata = basecolormap->Maps; - - if (b2 > t2) - { - clearbufshort(spanend + t2, b2 - t2, x); - } - - R_SetColorMapLight(basecolormap, (float)light, wallshade); - - BYTE *fake_dc_colormap = basecolormap->Maps + (GETPALOOKUP(light, wallshade) << COLORMAPSHIFT); - - for (--x; x >= x1; --x) - { - int t1 = uclip[x]; - int b1 = dclip[x]; - const int xr = x + 1; - int stop; - - light -= rw_lightstep; - lcolormap = GETPALOOKUP(light, wallshade); - if (lcolormap != rcolormap) - { - if (t2 < b2 && rcolormap != 0) - { // Colormap 0 is always the identity map, so rendering it is - // just a waste of time. - R_DrawFogBoundarySection_RGBA(t2, b2, xr); - } - if (t1 < t2) t2 = t1; - if (b1 > b2) b2 = b1; - if (t2 < b2) - { - clearbufshort(spanend + t2, b2 - t2, x); - } - rcolormap = lcolormap; - R_SetColorMapLight(basecolormap, (float)light, wallshade); - fake_dc_colormap = basecolormap->Maps + (GETPALOOKUP(light, wallshade) << COLORMAPSHIFT); - } - else - { - if (fake_dc_colormap != basecolormapdata) - { - stop = MIN(t1, b2); - while (t2 < stop) - { - R_DrawFogBoundaryLine_RGBA(t2++, xr); - } - stop = MAX(b1, t2); - while (b2 > stop) - { - R_DrawFogBoundaryLine_RGBA(--b2, xr); - } - } - else - { - t2 = MAX(t2, MIN(t1, b2)); - b2 = MIN(b2, MAX(b1, t2)); - } - - stop = MIN(t2, b1); - while (t1 < stop) - { - spanend[t1++] = x; - } - stop = MAX(b2, t2); - while (b1 > stop) - { - spanend[--b1] = x; - } - } - - t2 = uclip[x]; - b2 = dclip[x]; - } - if (t2 < b2 && rcolormap != 0) - { - R_DrawFogBoundarySection_RGBA(t2, b2, x1); - } -} - - int tmvlinebits; void setuptmvline (int bits) @@ -3896,49 +2014,6 @@ fixed_t tmvline1_add_C () return frac; } -fixed_t tmvline1_add_RGBA() -{ - DWORD fracstep = dc_iscale; - DWORD frac = dc_texturefrac; - int count = dc_count; - const BYTE *source = dc_source; - uint32_t *dest = (uint32_t*)dc_dest; - int bits = tmvlinebits; - int pitch = dc_pitch; - - uint32_t light = calc_light_multiplier(dc_light); - ShadeConstants shade_constants = dc_shade_constants; - - uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); - - do - { - BYTE pix = source[frac >> bits]; - if (pix != 0) - { - uint32_t fg = shade_pal_index(pix, light, shade_constants); - uint32_t fg_red = (fg >> 16) & 0xff; - uint32_t fg_green = (fg >> 8) & 0xff; - uint32_t fg_blue = fg & 0xff; - - uint32_t bg_red = (*dest >> 16) & 0xff; - uint32_t bg_green = (*dest >> 8) & 0xff; - uint32_t bg_blue = (*dest) & 0xff; - - uint32_t red = clamp((fg_red * fg_alpha + bg_red * bg_alpha) / 256, 0, 255); - uint32_t green = clamp((fg_green * fg_alpha + bg_green * bg_alpha) / 256, 0, 255); - uint32_t blue = clamp((fg_blue * fg_alpha + bg_blue * bg_alpha) / 256, 0, 255); - - *dest = 0xff000000 | (red << 16) | (green << 8) | blue; - } - frac += fracstep; - dest += pitch; - } while (--count); - - return frac; -} - void tmvline4_add_C () { BYTE *dest = dc_dest; @@ -3972,51 +2047,6 @@ void tmvline4_add_C () } while (--count); } -void tmvline4_add_RGBA() -{ - uint32_t *dest = (uint32_t*)dc_dest; - int count = dc_count; - int bits = tmvlinebits; - - uint32_t light[4]; - light[0] = calc_light_multiplier(palookuplight[0]); - light[1] = calc_light_multiplier(palookuplight[1]); - light[2] = calc_light_multiplier(palookuplight[2]); - light[3] = calc_light_multiplier(palookuplight[3]); - - ShadeConstants shade_constants = dc_shade_constants; - - uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); - - do - { - for (int i = 0; i < 4; ++i) - { - BYTE pix = bufplce[i][vplce[i] >> bits]; - if (pix != 0) - { - uint32_t fg = shade_pal_index(pix, light[i], shade_constants); - uint32_t fg_red = (fg >> 16) & 0xff; - uint32_t fg_green = (fg >> 8) & 0xff; - uint32_t fg_blue = fg & 0xff; - - uint32_t bg_red = (*dest >> 16) & 0xff; - uint32_t bg_green = (*dest >> 8) & 0xff; - uint32_t bg_blue = (*dest) & 0xff; - - uint32_t red = clamp((fg_red * fg_alpha + bg_red * bg_alpha) / 256, 0, 255); - uint32_t green = clamp((fg_green * fg_alpha + bg_green * bg_alpha) / 256, 0, 255); - uint32_t blue = clamp((fg_blue * fg_alpha + bg_blue * bg_alpha) / 256, 0, 255); - - dest[i] = 0xff000000 | (red << 16) | (green << 8) | blue; - } - vplce[i] += vince[i]; - } - dest += dc_pitch; - } while (--count); -} - fixed_t tmvline1_addclamp_C () { DWORD fracstep = dc_iscale; @@ -4055,49 +2085,6 @@ fixed_t tmvline1_addclamp_C () return frac; } -fixed_t tmvline1_addclamp_RGBA() -{ - DWORD fracstep = dc_iscale; - DWORD frac = dc_texturefrac; - int count = dc_count; - const BYTE *source = dc_source; - uint32_t *dest = (uint32_t*)dc_dest; - int bits = tmvlinebits; - int pitch = dc_pitch; - - uint32_t light = calc_light_multiplier(dc_light); - ShadeConstants shade_constants = dc_shade_constants; - - uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); - - do - { - BYTE pix = source[frac >> bits]; - if (pix != 0) - { - uint32_t fg = shade_pal_index(pix, light, shade_constants); - uint32_t fg_red = (fg >> 16) & 0xff; - uint32_t fg_green = (fg >> 8) & 0xff; - uint32_t fg_blue = fg & 0xff; - - uint32_t bg_red = (*dest >> 16) & 0xff; - uint32_t bg_green = (*dest >> 8) & 0xff; - uint32_t bg_blue = (*dest) & 0xff; - - uint32_t red = clamp((fg_red * fg_alpha + bg_red * bg_alpha) / 256, 0, 255); - uint32_t green = clamp((fg_green * fg_alpha + bg_green * bg_alpha) / 256, 0, 255); - uint32_t blue = clamp((fg_blue * fg_alpha + bg_blue * bg_alpha) / 256, 0, 255); - - *dest = 0xff000000 | (red << 16) | (green << 8) | blue; - } - frac += fracstep; - dest += pitch; - } while (--count); - - return frac; -} - void tmvline4_addclamp_C () { BYTE *dest = dc_dest; @@ -4130,51 +2117,6 @@ void tmvline4_addclamp_C () } while (--count); } -void tmvline4_addclamp_RGBA() -{ - uint32_t *dest = (uint32_t*)dc_dest; - int count = dc_count; - int bits = tmvlinebits; - - uint32_t light[4]; - light[0] = calc_light_multiplier(palookuplight[0]); - light[1] = calc_light_multiplier(palookuplight[1]); - light[2] = calc_light_multiplier(palookuplight[2]); - light[3] = calc_light_multiplier(palookuplight[3]); - - ShadeConstants shade_constants = dc_shade_constants; - - uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); - - do - { - for (int i = 0; i < 4; ++i) - { - BYTE pix = bufplce[i][vplce[i] >> bits]; - if (pix != 0) - { - uint32_t fg = shade_pal_index(pix, light[i], shade_constants); - uint32_t fg_red = (fg >> 16) & 0xff; - uint32_t fg_green = (fg >> 8) & 0xff; - uint32_t fg_blue = fg & 0xff; - - uint32_t bg_red = (dest[i] >> 16) & 0xff; - uint32_t bg_green = (dest[i] >> 8) & 0xff; - uint32_t bg_blue = (dest[i]) & 0xff; - - uint32_t red = clamp((fg_red * fg_alpha + bg_red * bg_alpha) / 256, 0, 255); - uint32_t green = clamp((fg_green * fg_alpha + bg_green * bg_alpha) / 256, 0, 255); - uint32_t blue = clamp((fg_blue * fg_alpha + bg_blue * bg_alpha) / 256, 0, 255); - - dest[i] = 0xff000000 | (red << 16) | (green << 8) | blue; - } - vplce[i] += vince[i]; - } - dest += dc_pitch; - } while (--count); -} - fixed_t tmvline1_subclamp_C () { DWORD fracstep = dc_iscale; @@ -4210,50 +2152,6 @@ fixed_t tmvline1_subclamp_C () return frac; } -fixed_t tmvline1_subclamp_RGBA() -{ - DWORD fracstep = dc_iscale; - DWORD frac = dc_texturefrac; - BYTE *colormap = dc_colormap; - int count = dc_count; - const BYTE *source = dc_source; - uint32_t *dest = (uint32_t*)dc_dest; - int bits = tmvlinebits; - int pitch = dc_pitch; - - uint32_t light = calc_light_multiplier(dc_light); - ShadeConstants shade_constants = dc_shade_constants; - - uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); - - do - { - BYTE pix = source[frac >> bits]; - if (pix != 0) - { - uint32_t fg = shade_pal_index(pix, light, shade_constants); - uint32_t fg_red = (fg >> 16) & 0xff; - uint32_t fg_green = (fg >> 8) & 0xff; - uint32_t fg_blue = fg & 0xff; - - uint32_t bg_red = (*dest >> 16) & 0xff; - uint32_t bg_green = (*dest >> 8) & 0xff; - uint32_t bg_blue = (*dest) & 0xff; - - uint32_t red = clamp((0x10000 - fg_red * fg_alpha + bg_red * bg_alpha) / 256, 256, 256 + 255) - 256; - uint32_t green = clamp((0x10000 - fg_green * fg_alpha + bg_green * bg_alpha) / 256, 256, 256 + 255) - 256; - uint32_t blue = clamp((0x10000 - fg_blue * fg_alpha + bg_blue * bg_alpha) / 256, 256, 256 + 255) - 256; - - *dest = 0xff000000 | (red << 16) | (green << 8) | blue; - } - frac += fracstep; - dest += pitch; - } while (--count); - - return frac; -} - void tmvline4_subclamp_C () { BYTE *dest = dc_dest; @@ -4285,51 +2183,6 @@ void tmvline4_subclamp_C () } while (--count); } -void tmvline4_subclamp_RGBA() -{ - uint32_t *dest = (uint32_t*)dc_dest; - int count = dc_count; - int bits = tmvlinebits; - - uint32_t light[4]; - light[0] = calc_light_multiplier(palookuplight[0]); - light[1] = calc_light_multiplier(palookuplight[1]); - light[2] = calc_light_multiplier(palookuplight[2]); - light[3] = calc_light_multiplier(palookuplight[3]); - - ShadeConstants shade_constants = dc_shade_constants; - - uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); - - do - { - for (int i = 0; i < 4; ++i) - { - BYTE pix = bufplce[i][vplce[i] >> bits]; - if (pix != 0) - { - uint32_t fg = shade_pal_index(pix, light[i], shade_constants); - uint32_t fg_red = (fg >> 16) & 0xff; - uint32_t fg_green = (fg >> 8) & 0xff; - uint32_t fg_blue = fg & 0xff; - - uint32_t bg_red = (dest[i] >> 16) & 0xff; - uint32_t bg_green = (dest[i] >> 8) & 0xff; - uint32_t bg_blue = (dest[i]) & 0xff; - - uint32_t red = clamp((0x10000 - fg_red * fg_alpha + bg_red * bg_alpha) / 256, 256, 256 + 255) - 256; - uint32_t green = clamp((0x10000 - fg_green * fg_alpha + bg_green * bg_alpha) / 256, 256, 256 + 255) - 256; - uint32_t blue = clamp((0x10000 - fg_blue * fg_alpha + bg_blue * bg_alpha) / 256, 256, 256 + 255) - 256; - - dest[i] = 0xff000000 | (red << 16) | (green << 8) | blue; - } - vplce[i] += vince[i]; - } - dest += dc_pitch; - } while (--count); -} - fixed_t tmvline1_revsubclamp_C () { DWORD fracstep = dc_iscale; @@ -4365,50 +2218,6 @@ fixed_t tmvline1_revsubclamp_C () return frac; } -fixed_t tmvline1_revsubclamp_RGBA() -{ - DWORD fracstep = dc_iscale; - DWORD frac = dc_texturefrac; - BYTE *colormap = dc_colormap; - int count = dc_count; - const BYTE *source = dc_source; - uint32_t *dest = (uint32_t*)dc_dest; - int bits = tmvlinebits; - int pitch = dc_pitch; - - uint32_t light = calc_light_multiplier(dc_light); - ShadeConstants shade_constants = dc_shade_constants; - - uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); - - do - { - BYTE pix = source[frac >> bits]; - if (pix != 0) - { - uint32_t fg = shade_pal_index(pix, light, shade_constants); - uint32_t fg_red = (fg >> 16) & 0xff; - uint32_t fg_green = (fg >> 8) & 0xff; - uint32_t fg_blue = fg & 0xff; - - uint32_t bg_red = (*dest >> 16) & 0xff; - uint32_t bg_green = (*dest >> 8) & 0xff; - uint32_t bg_blue = (*dest) & 0xff; - - uint32_t red = clamp((0x10000 + fg_red * fg_alpha - bg_red * bg_alpha) / 256, 256, 256 + 255) - 256; - uint32_t green = clamp((0x10000 + fg_green * fg_alpha - bg_green * bg_alpha) / 256, 256, 256 + 255) - 256; - uint32_t blue = clamp((0x10000 + fg_blue * fg_alpha - bg_blue * bg_alpha) / 256, 256, 256 + 255) - 256; - - *dest = 0xff000000 | (red << 16) | (green << 8) | blue; - } - frac += fracstep; - dest += pitch; - } while (--count); - - return frac; -} - void tmvline4_revsubclamp_C () { BYTE *dest = dc_dest; @@ -4440,52 +2249,6 @@ void tmvline4_revsubclamp_C () } while (--count); } -void tmvline4_revsubclamp_RGBA() -{ - uint32_t *dest = (uint32_t*)dc_dest; - int count = dc_count; - int bits = tmvlinebits; - - uint32_t light[4]; - light[0] = calc_light_multiplier(palookuplight[0]); - light[1] = calc_light_multiplier(palookuplight[1]); - light[2] = calc_light_multiplier(palookuplight[2]); - light[3] = calc_light_multiplier(palookuplight[3]); - - ShadeConstants shade_constants = dc_shade_constants; - - uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); - - do - { - for (int i = 0; i < 4; ++i) - { - BYTE pix = bufplce[i][vplce[i] >> bits]; - if (pix != 0) - { - uint32_t fg = shade_pal_index(pix, light[i], shade_constants); - uint32_t fg_red = (fg >> 16) & 0xff; - uint32_t fg_green = (fg >> 8) & 0xff; - uint32_t fg_blue = fg & 0xff; - - uint32_t bg_red = (dest[i] >> 16) & 0xff; - uint32_t bg_green = (dest[i] >> 8) & 0xff; - uint32_t bg_blue = (dest[i]) & 0xff; - - uint32_t red = clamp((0x10000 + fg_red * fg_alpha - bg_red * bg_alpha) / 256, 256, 256 + 255) - 256; - uint32_t green = clamp((0x10000 + fg_green * fg_alpha - bg_green * bg_alpha) / 256, 256, 256 + 255) - 256; - uint32_t blue = clamp((0x10000 + fg_blue * fg_alpha - bg_blue * bg_alpha) / 256, 256, 256 + 255) - 256; - - dest[i] = 0xff000000 | (red << 16) | (green << 8) | blue; - } - vplce[i] += vince[i]; - } - dest += dc_pitch; - } while (--count); -} - - //========================================================================== // // R_GetColumn @@ -4535,11 +2298,7 @@ void R_InitColumnDrawers () R_DrawTranslatedColumn = R_DrawTranslatedColumnP_RGBA_C; R_DrawShadedColumn = R_DrawShadedColumnP_RGBA_C; R_DrawSpanMasked = R_DrawSpanMaskedP_RGBA_C; -#ifndef NO_SSE - R_DrawSpan = R_DrawSpanP_RGBA_SSE; -#else - R_DrawSpan = R_DrawSpanP_RGBA_C; -#endif + R_DrawSpan = R_DrawSpanP_RGBA_C; R_DrawSpanTranslucent = R_DrawSpanTranslucentP_RGBA_C; R_DrawSpanMaskedTranslucent = R_DrawSpanMaskedTranslucentP_RGBA_C; @@ -4579,9 +2338,13 @@ void R_InitColumnDrawers () rt_copy1col = rt_copy1col_RGBA_c; rt_copy4cols = rt_copy4cols_RGBA_c; rt_map1col = rt_map1col_RGBA_c; + rt_map4cols = rt_map4cols_RGBA_c; rt_shaded1col = rt_shaded1col_RGBA_c; + rt_shaded4cols = rt_shaded4cols_RGBA_c; rt_add1col = rt_add1col_RGBA_c; + rt_add4cols = rt_add4cols_RGBA_c; rt_addclamp1col = rt_addclamp1col_RGBA_c; + rt_addclamp4cols = rt_addclamp4cols_RGBA_c; rt_subclamp1col = rt_subclamp1col_RGBA_c; rt_revsubclamp1col = rt_revsubclamp1col_RGBA_c; rt_tlate1col = rt_tlate1col_RGBA_c; @@ -4597,31 +2360,14 @@ void R_InitColumnDrawers () rt_tlatesubclamp4cols = rt_tlatesubclamp4cols_RGBA_c; rt_tlaterevsubclamp4cols = rt_tlaterevsubclamp4cols_RGBA_c; rt_initcols = rt_initcols_rgba; - -#ifndef NO_SSE - rt_map4cols = rt_map4cols_RGBA_SSE; - rt_add4cols = rt_add4cols_RGBA_SSE; - rt_addclamp4cols = rt_addclamp4cols_RGBA_SSE; - rt_shaded4cols = rt_shaded4cols_RGBA_SSE; -#else - rt_map4cols = rt_map4cols_RGBA_c; - rt_add4cols = rt_add4cols_RGBA_c; - rt_addclamp4cols = rt_addclamp4cols_RGBA_c; - rt_shaded4cols = rt_shaded4cols_RGBA_c; -#endif + rt_span_coverage = rt_span_coverage_rgba; dovline1 = vlinec1_RGBA; doprevline1 = vlinec1_RGBA; domvline1 = mvlinec1_RGBA; -#ifndef NO_SSE - dovline4 = vlinec4_RGBA_SSE; - domvline4 = mvlinec4_RGBA_SSE; -#else dovline4 = vlinec4_RGBA; domvline4 = mvlinec4_RGBA; -#endif - } else { @@ -4719,6 +2465,7 @@ void R_InitColumnDrawers () rt_tlatesubclamp4cols = rt_tlatesubclamp4cols_c; rt_tlaterevsubclamp4cols = rt_tlaterevsubclamp4cols_c; rt_initcols = rt_initcols_pal; + rt_span_coverage = rt_span_coverage_pal; if (pointers_saved) { diff --git a/src/r_draw.h b/src/r_draw.h index cc3b10935..98be57c51 100644 --- a/src/r_draw.h +++ b/src/r_draw.h @@ -24,6 +24,13 @@ #define __R_DRAW__ #include "r_defs.h" +#include + +// Spectre/Invisibility. +#define FUZZTABLE 50 +extern "C" int fuzzoffset[FUZZTABLE + 1]; // [RH] +1 for the assembly routine +extern "C" int fuzzpos; +extern "C" int fuzzviewheight; struct FColormap; struct ShadeConstants; @@ -173,7 +180,6 @@ void rt_copy4cols_RGBA_c (int sx, int yl, int yh); void rt_shaded1col_RGBA_c (int hx, int sx, int yl, int yh); void rt_shaded4cols_RGBA_c (int sx, int yl, int yh); -void rt_shaded4cols_RGBA_SSE (int sx, int yl, int yh); void rt_map1col_RGBA_c (int hx, int sx, int yl, int yh); void rt_add1col_RGBA_c (int hx, int sx, int yl, int yh); @@ -188,11 +194,8 @@ void rt_tlatesubclamp1col_RGBA_c (int hx, int sx, int yl, int yh); void rt_tlaterevsubclamp1col_RGBA_c (int hx, int sx, int yl, int yh); void rt_map4cols_RGBA_c (int sx, int yl, int yh); -void rt_map4cols_RGBA_SSE (int sx, int yl, int yh); void rt_add4cols_RGBA_c (int sx, int yl, int yh); -void rt_add4cols_RGBA_SSE (int sx, int yl, int yh); void rt_addclamp4cols_RGBA_c (int sx, int yl, int yh); -void rt_addclamp4cols_RGBA_SSE (int sx, int yl, int yh); void rt_subclamp4cols_RGBA_c (int sx, int yl, int yh); void rt_revsubclamp4cols_RGBA_c (int sx, int yl, int yh); @@ -235,6 +238,7 @@ extern void (*rt_tlatesubclamp4cols)(int sx, int yl, int yh); extern void (*rt_tlaterevsubclamp4cols)(int sx, int yl, int yh); extern void (*rt_initcols)(BYTE *buffer); +extern void (*rt_span_coverage)(int x, int start, int stop); void rt_draw4cols (int sx); @@ -242,6 +246,8 @@ void rt_draw4cols (int sx); void rt_initcols_pal (BYTE *buffer); void rt_initcols_rgba (BYTE *buffer); +void rt_span_coverage_pal(int x, int start, int stop); +void rt_span_coverage_rgba(int x, int start, int stop); extern void (*R_DrawFogBoundary)(int x1, int x2, short *uclip, short *dclip); @@ -277,9 +283,40 @@ void R_DrawFuzzColumnP_RGBA_C (void); void R_DrawTranslatedColumnP_RGBA_C (void); void R_DrawShadedColumnP_RGBA_C (void); void R_DrawSpanP_RGBA_C (void); -void R_DrawSpanP_RGBA_SSE (void); void R_DrawSpanMaskedP_RGBA_C (void); +void R_DrawSpanTranslucentP_RGBA_C(); +void R_DrawSpanMaskedTranslucentP_RGBA_C(); +void R_DrawSpanAddClampP_RGBA_C(); +void R_DrawSpanMaskedAddClampP_RGBA_C(); +void R_FillColumnP_RGBA(); +void R_FillAddColumn_RGBA_C(); +void R_FillAddClampColumn_RGBA(); +void R_FillSubClampColumn_RGBA(); +void R_FillRevSubClampColumn_RGBA(); +void R_DrawAddColumnP_RGBA_C(); +void R_DrawTlatedAddColumnP_RGBA_C(); +void R_DrawAddClampColumnP_RGBA_C(); +void R_DrawAddClampTranslatedColumnP_RGBA_C(); +void R_DrawSubClampColumnP_RGBA_C(); +void R_DrawSubClampTranslatedColumnP_RGBA_C(); +void R_DrawRevSubClampColumnP_RGBA_C(); +void R_DrawRevSubClampTranslatedColumnP_RGBA_C(); +void R_FillSpan_RGBA(); +void R_DrawFogBoundary_RGBA(int x1, int x2, short *uclip, short *dclip); +fixed_t tmvline1_add_RGBA(); +void tmvline4_add_RGBA(); +fixed_t tmvline1_addclamp_RGBA(); +void tmvline4_addclamp_RGBA(); +fixed_t tmvline1_subclamp_RGBA(); +void tmvline4_subclamp_RGBA(); +fixed_t tmvline1_revsubclamp_RGBA(); +void tmvline4_revsubclamp_RGBA(); +DWORD vlinec1_RGBA(); +void vlinec4_RGBA(); +DWORD mvlinec1_RGBA(); +void mvlinec4_RGBA(); + void R_DrawSpanTranslucentP_C (void); void R_DrawSpanMaskedTranslucentP_C (void); @@ -403,4 +440,52 @@ void R_SetDSColorMapLight(FColormap *base_colormap, float light, int shade); void R_SetTranslationMap(lighttable_t *translation); +// Wait until all drawers finished executing +void R_FinishDrawerCommands(); + +class DrawerThread +{ +public: + int core = 0; + int num_cores = 1; + + uint32_t dc_temp_rgbabuff_rgba[MAXHEIGHT * 4]; + uint32_t *dc_temp_rgba; +}; + +class DrawerCommand +{ +public: + virtual void Execute(DrawerThread *thread) = 0; +}; + +class DrawerCommandQueue +{ + enum { memorypool_size = 4 * 1024 * 1024 }; + char memorypool[memorypool_size]; + size_t memorypool_pos = 0; + + std::vector commands; + + static DrawerCommandQueue *Instance(); + +public: + // Allocate memory valid for the duration of a command execution + static void* AllocMemory(size_t size); + + // Queue command to be executed by drawer worker threads + template + static void QueueCommand(Types &&... args) + { + void *ptr = AllocMemory(sizeof(T)); + T *command = new (ptr)T(std::forward(args)...); + if (!command) + return; + Instance()->commands.push_back(command); + } + + // Wait until all worker threads finished executing commands + static void Finish(); +}; + #endif diff --git a/src/r_draw_rgba.cpp b/src/r_draw_rgba.cpp new file mode 100644 index 000000000..9e61bb427 --- /dev/null +++ b/src/r_draw_rgba.cpp @@ -0,0 +1,3492 @@ +// Emacs style mode select -*- C++ -*- +//----------------------------------------------------------------------------- +// +// $Id:$ +// +// Copyright (C) 1993-1996 by id Software, Inc. +// +// This source is available for distribution and/or modification +// only under the terms of the DOOM Source Code License as +// published by id Software. All rights reserved. +// +// The source is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// FITNESS FOR A PARTICULAR PURPOSE. See the DOOM Source Code License +// for more details. +// +// $Log:$ +// +// DESCRIPTION: +// True color span/column drawing functions. +// +//----------------------------------------------------------------------------- + +#include + +#include "templates.h" +#include "doomdef.h" +#include "i_system.h" +#include "w_wad.h" +#include "r_local.h" +#include "v_video.h" +#include "doomstat.h" +#include "st_stuff.h" +#include "g_game.h" +#include "g_level.h" +#include "r_data/r_translate.h" +#include "v_palette.h" +#include "r_data/colormaps.h" +#include "r_plane.h" + +#include "gi.h" +#include "stats.h" +#include "x86.h" +#ifndef NO_SSE +#include +#endif +#include + +extern int vlinebits; +extern int mvlinebits; +extern int tmvlinebits; + +extern "C" short spanend[MAXHEIGHT]; +extern float rw_light; +extern float rw_lightstep; +extern int wallshade; + +///////////////////////////////////////////////////////////////////////////// + +DrawerCommandQueue *DrawerCommandQueue::Instance() +{ + static DrawerCommandQueue queue; + return &queue; +} + +void* DrawerCommandQueue::AllocMemory(size_t size) +{ + // Make sure allocations remain 16-byte aligned + size = (size + 15) / 16 * 16; + + auto queue = Instance(); + if (queue->memorypool_pos + size > memorypool_size) + return nullptr; + + void *data = queue->memorypool + queue->memorypool_pos; + queue->memorypool_pos += size; + return data; +} + +void DrawerCommandQueue::Finish() +{ + auto queue = Instance(); + + DrawerThread thread; + + size_t size = queue->commands.size(); + for (size_t i = 0; i < size; i++) + { + auto &command = queue->commands[i]; + command->Execute(&thread); + } + + for (auto &command : queue->commands) + command->~DrawerCommand(); + queue->commands.clear(); + queue->memorypool_pos = 0; +} + +///////////////////////////////////////////////////////////////////////////// + +class DrawColumnRGBACommand : public DrawerCommand +{ + int dc_count; + BYTE *dc_dest; + fixed_t dc_texturefrac; + fixed_t dc_iscale; + fixed_t dc_light; + const BYTE *dc_source; + int dc_pitch; + ShadeConstants dc_shade_constants; + +public: + DrawColumnRGBACommand() + { + dc_count = ::dc_count; + dc_dest = ::dc_dest; + dc_texturefrac = ::dc_texturefrac; + dc_iscale = ::dc_iscale; + dc_light = ::dc_light; + dc_source = ::dc_source; + dc_pitch = ::dc_pitch; + dc_shade_constants = ::dc_shade_constants; + } + + void Execute(DrawerThread *thread) override + { + int count; + uint32_t* dest; + fixed_t frac; + fixed_t fracstep; + + count = dc_count; + + // Zero length, column does not exceed a pixel. + if (count <= 0) + return; + + // Framebuffer destination address. + dest = (uint32_t*)dc_dest; + + uint32_t light = calc_light_multiplier(dc_light); + ShadeConstants shade_constants = dc_shade_constants; + + // Determine scaling, + // which is the only mapping to be done. + fracstep = dc_iscale; + frac = dc_texturefrac; + + { + // [RH] Get local copies of these variables so that the compiler + // has a better chance of optimizing this well. + const BYTE *source = dc_source; + int pitch = dc_pitch; + + // Inner loop that does the actual texture mapping, + // e.g. a DDA-lile scaling. + // This is as fast as it gets. + do + { + *dest = shade_pal_index(source[frac >> FRACBITS], light, shade_constants); + + dest += pitch; + frac += fracstep; + + } while (--count); + } + } +}; + +class FillColumnRGBACommand : public DrawerCommand +{ + int dc_count; + BYTE *dc_dest; + fixed_t dc_light; + int dc_pitch; + int dc_color; + +public: + FillColumnRGBACommand() + { + dc_count = ::dc_count; + dc_dest = ::dc_dest; + dc_light = ::dc_light; + dc_pitch = ::dc_pitch; + dc_color = ::dc_color; + } + + void Execute(DrawerThread *thread) override + { + int count; + uint32_t* dest; + + count = dc_count; + + if (count <= 0) + return; + + dest = (uint32_t*)dc_dest; + + uint32_t light = calc_light_multiplier(dc_light); + + { + int pitch = dc_pitch; + BYTE color = dc_color; + + do + { + *dest = shade_pal_index_simple(color, light); + dest += pitch; + } while (--count); + } + } +}; + +class FillAddColumnRGBACommand : public DrawerCommand +{ + int dc_count; + BYTE *dc_dest; + int dc_pitch; + fixed_t dc_light; + int dc_color; + +public: + FillAddColumnRGBACommand() + { + dc_count = ::dc_count; + dc_dest = ::dc_dest; + dc_pitch = ::dc_pitch; + dc_light = ::dc_light; + dc_color = ::dc_color; + } + + void Execute(DrawerThread *thread) override + { + int count; + uint32_t *dest; + + count = dc_count; + if (count <= 0) + return; + + dest = (uint32_t*)dc_dest; + int pitch = dc_pitch; + + uint32_t fg = shade_pal_index_simple(dc_color, calc_light_multiplier(dc_light)); + uint32_t fg_red = (fg >> 24) & 0xff; + uint32_t fg_green = (fg >> 16) & 0xff; + uint32_t fg_blue = fg & 0xff; + + do + { + uint32_t bg_red = (*dest >> 16) & 0xff; + uint32_t bg_green = (*dest >> 8) & 0xff; + uint32_t bg_blue = (*dest) & 0xff; + + uint32_t red = (fg_red + bg_red + 1) / 2; + uint32_t green = (fg_green + bg_green + 1) / 2; + uint32_t blue = (fg_blue + bg_blue + 1) / 2; + + *dest = 0xff000000 | (red << 16) | (green << 8) | blue; + dest += pitch; + } while (--count); + } +}; + +class FillAddClampColumnRGBACommand : public DrawerCommand +{ + int dc_count; + BYTE *dc_dest; + int dc_pitch; + fixed_t dc_light; + int dc_color; + +public: + FillAddClampColumnRGBACommand() + { + dc_count = ::dc_count; + dc_dest = ::dc_dest; + dc_pitch = ::dc_pitch; + dc_light = ::dc_light; + dc_color = ::dc_color; + } + + void Execute(DrawerThread *thread) override + { + int count; + uint32_t *dest; + + count = dc_count; + if (count <= 0) + return; + + dest = (uint32_t*)dc_dest; + int pitch = dc_pitch; + + uint32_t fg = shade_pal_index_simple(dc_color, calc_light_multiplier(dc_light)); + uint32_t fg_red = (fg >> 24) & 0xff; + uint32_t fg_green = (fg >> 16) & 0xff; + uint32_t fg_blue = fg & 0xff; + + do + { + uint32_t bg_red = (*dest >> 16) & 0xff; + uint32_t bg_green = (*dest >> 8) & 0xff; + uint32_t bg_blue = (*dest) & 0xff; + + uint32_t red = clamp(fg_red + bg_red, 0, 255); + uint32_t green = clamp(fg_green + bg_green, 0, 255); + uint32_t blue = clamp(fg_blue + bg_blue, 0, 255); + + *dest = 0xff000000 | (red << 16) | (green << 8) | blue; + dest += pitch; + } while (--count); + } +}; + +class FillSubClampColumnRGBACommand : public DrawerCommand +{ + int dc_count; + BYTE *dc_dest; + int dc_pitch; + int dc_color; + fixed_t dc_light; + +public: + FillSubClampColumnRGBACommand() + { + dc_count = ::dc_count; + dc_dest = ::dc_dest; + dc_pitch = ::dc_pitch; + dc_color = ::dc_color; + dc_light = ::dc_light; + } + + void Execute(DrawerThread *thread) override + { + int count; + uint32_t *dest; + + count = dc_count; + if (count <= 0) + return; + + dest = (uint32_t*)dc_dest; + int pitch = dc_pitch; + + uint32_t fg = shade_pal_index_simple(dc_color, calc_light_multiplier(dc_light)); + uint32_t fg_red = (fg >> 24) & 0xff; + uint32_t fg_green = (fg >> 16) & 0xff; + uint32_t fg_blue = fg & 0xff; + + do + { + uint32_t bg_red = (*dest >> 16) & 0xff; + uint32_t bg_green = (*dest >> 8) & 0xff; + uint32_t bg_blue = (*dest) & 0xff; + + uint32_t red = clamp(256 - fg_red + bg_red, 256, 256 + 255) - 255; + uint32_t green = clamp(256 - fg_green + bg_green, 256, 256 + 255) - 255; + uint32_t blue = clamp(256 - fg_blue + bg_blue, 256, 256 + 255) - 255; + + *dest = 0xff000000 | (red << 16) | (green << 8) | blue; + dest += pitch; + } while (--count); + } +}; + +class FillRevSubClampColumnRGBACommand : public DrawerCommand +{ + int dc_count; + BYTE *dc_dest; + int dc_pitch; + int dc_color; + fixed_t dc_light; + +public: + FillRevSubClampColumnRGBACommand() + { + dc_count = ::dc_count; + dc_dest = ::dc_dest; + dc_pitch = ::dc_pitch; + dc_color = ::dc_color; + dc_light = ::dc_light; + } + + void Execute(DrawerThread *thread) override + { + int count; + uint32_t *dest; + + count = dc_count; + if (count <= 0) + return; + + dest = (uint32_t*)dc_dest; + int pitch = dc_pitch; + + uint32_t fg = shade_pal_index_simple(dc_color, calc_light_multiplier(dc_light)); + uint32_t fg_red = (fg >> 24) & 0xff; + uint32_t fg_green = (fg >> 16) & 0xff; + uint32_t fg_blue = fg & 0xff; + + do + { + uint32_t bg_red = (*dest >> 16) & 0xff; + uint32_t bg_green = (*dest >> 8) & 0xff; + uint32_t bg_blue = (*dest) & 0xff; + + uint32_t red = clamp(256 + fg_red - bg_red, 256, 256 + 255) - 255; + uint32_t green = clamp(256 + fg_green - bg_green, 256, 256 + 255) - 255; + uint32_t blue = clamp(256 + fg_blue - bg_blue, 256, 256 + 255) - 255; + + *dest = 0xff000000 | (red << 16) | (green << 8) | blue; + dest += pitch; + } while (--count); + } +}; + +class DrawFuzzColumnRGBACommand : public DrawerCommand +{ + int dc_x; + int dc_yl; + int dc_yh; + BYTE *dc_destorg; + int dc_pitch; + int fuzzpos; + int fuzzviewheight; + +public: + DrawFuzzColumnRGBACommand() + { + dc_x = ::dc_x; + dc_yl = ::dc_yl; + dc_yh = ::dc_yh; + dc_destorg = ::dc_destorg; + dc_pitch = ::dc_pitch; + fuzzpos = ::fuzzpos; + fuzzviewheight = ::fuzzviewheight; + } + + void Execute(DrawerThread *thread) override + { + int count; + uint32_t *dest; + + // Adjust borders. Low... + if (dc_yl == 0) + dc_yl = 1; + + // .. and high. + if (dc_yh > fuzzviewheight) + dc_yh = fuzzviewheight; + + count = dc_yh - dc_yl; + + // Zero length. + if (count < 0) + return; + + count++; + + dest = ylookup[dc_yl] + dc_x + (uint32_t*)dc_destorg; + + // Note: this implementation assumes this function is only used for the pinky shadow effect (i.e. no other fancy colormap than black) + // I'm not sure if this is really always the case or not. + + { + // [RH] Make local copies of global vars to try and improve + // the optimizations made by the compiler. + int pitch = dc_pitch; + int fuzz = fuzzpos; + int cnt; + + // [RH] Split this into three separate loops to minimize + // the number of times fuzzpos needs to be clamped. + if (fuzz) + { + cnt = MIN(FUZZTABLE - fuzz, count); + count -= cnt; + do + { + uint32_t bg = dest[fuzzoffset[fuzz++]]; + uint32_t bg_red = (bg >> 16) & 0xff; + uint32_t bg_green = (bg >> 8) & 0xff; + uint32_t bg_blue = (bg) & 0xff; + + uint32_t red = bg_red * 3 / 4; + uint32_t green = bg_green * 3 / 4; + uint32_t blue = bg_blue * 3 / 4; + + *dest = 0xff000000 | (red << 16) | (green << 8) | blue; + dest += pitch; + } while (--cnt); + } + if (fuzz == FUZZTABLE || count > 0) + { + while (count >= FUZZTABLE) + { + fuzz = 0; + cnt = FUZZTABLE; + count -= FUZZTABLE; + do + { + uint32_t bg = dest[fuzzoffset[fuzz++]]; + uint32_t bg_red = (bg >> 16) & 0xff; + uint32_t bg_green = (bg >> 8) & 0xff; + uint32_t bg_blue = (bg) & 0xff; + + uint32_t red = bg_red * 3 / 4; + uint32_t green = bg_green * 3 / 4; + uint32_t blue = bg_blue * 3 / 4; + + *dest = 0xff000000 | (red << 16) | (green << 8) | blue; + dest += pitch; + } while (--cnt); + } + fuzz = 0; + if (count > 0) + { + do + { + uint32_t bg = dest[fuzzoffset[fuzz++]]; + uint32_t bg_red = (bg >> 16) & 0xff; + uint32_t bg_green = (bg >> 8) & 0xff; + uint32_t bg_blue = (bg) & 0xff; + + uint32_t red = bg_red * 3 / 4; + uint32_t green = bg_green * 3 / 4; + uint32_t blue = bg_blue * 3 / 4; + + *dest = 0xff000000 | (red << 16) | (green << 8) | blue; + dest += pitch; + } while (--count); + } + } + fuzzpos = fuzz; + } + } +}; + +class DrawAddColumnRGBACommand : public DrawerCommand +{ + int dc_count; + BYTE *dc_dest; + fixed_t dc_iscale; + fixed_t dc_texturefrac; + const BYTE *dc_source; + int dc_pitch; + fixed_t dc_light; + ShadeConstants dc_shade_constants; + fixed_t dc_srcalpha; + fixed_t dc_destalpha; + +public: + DrawAddColumnRGBACommand() + { + dc_count = ::dc_count; + dc_dest = ::dc_dest; + dc_iscale = ::dc_iscale; + dc_texturefrac = ::dc_texturefrac; + dc_source = ::dc_source; + dc_pitch = ::dc_pitch; + dc_light = ::dc_light; + dc_shade_constants = ::dc_shade_constants; + dc_srcalpha = ::dc_srcalpha; + dc_destalpha = ::dc_destalpha; + } + + void Execute(DrawerThread *thread) override + { + int count; + uint32_t *dest; + fixed_t frac; + fixed_t fracstep; + + count = dc_count; + if (count <= 0) + return; + + dest = (uint32_t*)dc_dest; + + fracstep = dc_iscale; + frac = dc_texturefrac; + + { + const BYTE *source = dc_source; + int pitch = dc_pitch; + + uint32_t light = calc_light_multiplier(dc_light); + ShadeConstants shade_constants = dc_shade_constants; + + uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + + do + { + uint32_t fg = shade_pal_index(source[frac >> FRACBITS], light, shade_constants); + + uint32_t fg_red = (fg >> 16) & 0xff; + uint32_t fg_green = (fg >> 8) & 0xff; + uint32_t fg_blue = fg & 0xff; + + uint32_t bg_red = (*dest >> 16) & 0xff; + uint32_t bg_green = (*dest >> 8) & 0xff; + uint32_t bg_blue = (*dest) & 0xff; + + uint32_t red = clamp((fg_red * fg_alpha + bg_red * bg_alpha) / 256, 0, 255); + uint32_t green = clamp((fg_green * fg_alpha + bg_green * bg_alpha) / 256, 0, 255); + uint32_t blue = clamp((fg_blue * fg_alpha + bg_blue * bg_alpha) / 256, 0, 255); + + *dest = 0xff000000 | (red << 16) | (green << 8) | blue; + dest += pitch; + frac += fracstep; + } while (--count); + } + } +}; + +class DrawTranslatedColumnRGBACommand : public DrawerCommand +{ + int dc_count; + fixed_t dc_light; + ShadeConstants dc_shade_constants; + BYTE *dc_dest; + fixed_t dc_iscale; + fixed_t dc_texturefrac; + BYTE *dc_translation; + const BYTE *dc_source; + int dc_pitch; + +public: + DrawTranslatedColumnRGBACommand() + { + dc_count = ::dc_count; + dc_light = ::dc_light; + dc_shade_constants = ::dc_shade_constants; + dc_dest = ::dc_dest; + dc_iscale = ::dc_iscale; + dc_texturefrac = ::dc_texturefrac; + dc_translation = ::dc_translation; + dc_source = ::dc_source; + dc_pitch = ::dc_pitch; + } + + void Execute(DrawerThread *thread) override + { + int count; + uint32_t* dest; + fixed_t frac; + fixed_t fracstep; + + count = dc_count; + if (count <= 0) + return; + + uint32_t light = calc_light_multiplier(dc_light); + ShadeConstants shade_constants = dc_shade_constants; + + dest = (uint32_t*)dc_dest; + + fracstep = dc_iscale; + frac = dc_texturefrac; + + { + // [RH] Local copies of global vars to improve compiler optimizations + BYTE *translation = dc_translation; + const BYTE *source = dc_source; + int pitch = dc_pitch; + + do + { + *dest = shade_pal_index(translation[source[frac >> FRACBITS]], light, shade_constants); + dest += pitch; + frac += fracstep; + } while (--count); + } + } +}; + +class DrawTlatedAddColumnRGBACommand : public DrawerCommand +{ + int dc_count; + fixed_t dc_light; + ShadeConstants dc_shade_constants; + BYTE *dc_dest; + fixed_t dc_iscale; + fixed_t dc_texturefrac; + BYTE *dc_translation; + const BYTE *dc_source; + int dc_pitch; + +public: + DrawTlatedAddColumnRGBACommand() + { + dc_count = ::dc_count; + dc_light = ::dc_light; + dc_shade_constants = ::dc_shade_constants; + dc_dest = ::dc_dest; + dc_iscale = ::dc_iscale; + dc_texturefrac = ::dc_texturefrac; + dc_translation = ::dc_translation; + dc_source = ::dc_source; + dc_pitch = ::dc_pitch; + } + + void Execute(DrawerThread *thread) override + { + int count; + uint32_t *dest; + fixed_t frac; + fixed_t fracstep; + + count = dc_count; + if (count <= 0) + return; + + uint32_t light = calc_light_multiplier(dc_light); + ShadeConstants shade_constants = dc_shade_constants; + + dest = (uint32_t*)dc_dest; + + fracstep = dc_iscale; + frac = dc_texturefrac; + + { + BYTE *translation = dc_translation; + const BYTE *source = dc_source; + int pitch = dc_pitch; + + uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + + do + { + uint32_t fg = shade_pal_index(translation[source[frac >> FRACBITS]], light, shade_constants); + + uint32_t fg_red = (fg >> 16) & 0xff; + uint32_t fg_green = (fg >> 8) & 0xff; + uint32_t fg_blue = fg & 0xff; + + uint32_t bg_red = (*dest >> 16) & 0xff; + uint32_t bg_green = (*dest >> 8) & 0xff; + uint32_t bg_blue = (*dest) & 0xff; + + uint32_t red = clamp((fg_red * fg_alpha + bg_red * bg_alpha) / 256, 0, 255); + uint32_t green = clamp((fg_green * fg_alpha + bg_green * bg_alpha) / 256, 0, 255); + uint32_t blue = clamp((fg_blue * fg_alpha + bg_blue * bg_alpha) / 256, 0, 255); + + *dest = 0xff000000 | (red << 16) | (green << 8) | blue; + dest += pitch; + frac += fracstep; + } while (--count); + } + } +}; + +class DrawShadedColumnRGBACommand : public DrawerCommand +{ +private: + int dc_count; + BYTE *dc_dest; + fixed_t dc_iscale; + fixed_t dc_texturefrac; + fixed_t dc_light; + const BYTE *dc_source; + lighttable_t *dc_colormap; + int dc_color; + int dc_pitch; + +public: + DrawShadedColumnRGBACommand() + { + dc_count = ::dc_count; + dc_dest = ::dc_dest; + dc_iscale = ::dc_iscale; + dc_texturefrac = ::dc_texturefrac; + dc_light = ::dc_light; + dc_source = ::dc_source; + dc_colormap = ::dc_colormap; + dc_color = ::dc_color; + dc_pitch = ::dc_pitch; + } + + void Execute(DrawerThread *thread) override + { + int count; + uint32_t *dest; + fixed_t frac, fracstep; + + count = dc_count; + + if (count <= 0) + return; + + dest = (uint32_t*)dc_dest; + + fracstep = dc_iscale; + frac = dc_texturefrac; + + uint32_t fg = shade_pal_index_simple(dc_color, calc_light_multiplier(dc_light)); + uint32_t fg_red = (fg >> 16) & 0xff; + uint32_t fg_green = (fg >> 8) & 0xff; + uint32_t fg_blue = fg & 0xff; + + { + const BYTE *source = dc_source; + BYTE *colormap = dc_colormap; + int pitch = dc_pitch; + + do + { + DWORD alpha = clamp(colormap[source[frac >> FRACBITS]], 0, 64); + DWORD inv_alpha = 64 - alpha; + + uint32_t bg_red = (*dest >> 16) & 0xff; + uint32_t bg_green = (*dest >> 8) & 0xff; + uint32_t bg_blue = (*dest) & 0xff; + + uint32_t red = (fg_red * alpha + bg_red * inv_alpha) / 64; + uint32_t green = (fg_green * alpha + bg_green * inv_alpha) / 64; + uint32_t blue = (fg_blue * alpha + bg_blue * inv_alpha) / 64; + + *dest = 0xff000000 | (red << 16) | (green << 8) | blue; + dest += pitch; + frac += fracstep; + } while (--count); + } + } +}; + +class DrawAddClampColumnRGBACommand : public DrawerCommand +{ + int dc_count; + BYTE *dc_dest; + fixed_t dc_iscale; + fixed_t dc_texturefrac; + const BYTE *dc_source; + int dc_pitch; + fixed_t dc_light; + ShadeConstants dc_shade_constants; + fixed_t dc_srcalpha; + fixed_t dc_destalpha; + +public: + DrawAddClampColumnRGBACommand() + { + dc_count = ::dc_count; + dc_dest = ::dc_dest; + dc_iscale = ::dc_iscale; + dc_texturefrac = ::dc_texturefrac; + dc_source = ::dc_source; + dc_pitch = ::dc_pitch; + dc_light = ::dc_light; + dc_shade_constants = ::dc_shade_constants; + dc_srcalpha = ::dc_srcalpha; + dc_destalpha = ::dc_destalpha; + } + + void Execute(DrawerThread *thread) override + { + int count; + uint32_t *dest; + fixed_t frac; + fixed_t fracstep; + + count = dc_count; + if (count <= 0) + return; + + dest = (uint32_t*)dc_dest; + + fracstep = dc_iscale; + frac = dc_texturefrac; + + { + const BYTE *source = dc_source; + int pitch = dc_pitch; + uint32_t light = calc_light_multiplier(dc_light); + ShadeConstants shade_constants = dc_shade_constants; + + uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + + do + { + uint32_t fg = shade_pal_index(source[frac >> FRACBITS], light, shade_constants); + uint32_t fg_red = (fg >> 16) & 0xff; + uint32_t fg_green = (fg >> 8) & 0xff; + uint32_t fg_blue = fg & 0xff; + + uint32_t bg_red = (*dest >> 16) & 0xff; + uint32_t bg_green = (*dest >> 8) & 0xff; + uint32_t bg_blue = (*dest) & 0xff; + + uint32_t red = clamp((fg_red * fg_alpha + bg_red * bg_alpha) / 256, 0, 255); + uint32_t green = clamp((fg_green * fg_alpha + bg_green * bg_alpha) / 256, 0, 255); + uint32_t blue = clamp((fg_blue * fg_alpha + bg_blue * bg_alpha) / 256, 0, 255); + + *dest = 0xff000000 | (red << 16) | (green << 8) | blue; + dest += pitch; + frac += fracstep; + } while (--count); + } + } +}; + +class DrawAddClampTranslatedColumnRGBACommand : public DrawerCommand +{ + int dc_count; + BYTE *dc_dest; + fixed_t dc_iscale; + fixed_t dc_texturefrac; + BYTE *dc_translation; + const BYTE *dc_source; + int dc_pitch; + fixed_t dc_light; + ShadeConstants dc_shade_constants; + fixed_t dc_srcalpha; + fixed_t dc_destalpha; + +public: + DrawAddClampTranslatedColumnRGBACommand() + { + dc_count = ::dc_count; + dc_dest = ::dc_dest; + dc_iscale = ::dc_iscale; + dc_texturefrac = ::dc_texturefrac; + dc_translation = ::dc_translation; + dc_source = ::dc_source; + dc_pitch = ::dc_pitch; + dc_light = ::dc_light; + dc_shade_constants = ::dc_shade_constants; + dc_srcalpha = ::dc_srcalpha; + dc_destalpha = ::dc_destalpha; + } + + void Execute(DrawerThread *thread) override + { + int count; + uint32_t *dest; + fixed_t frac; + fixed_t fracstep; + + count = dc_count; + if (count <= 0) + return; + + dest = (uint32_t*)dc_dest; + + fracstep = dc_iscale; + frac = dc_texturefrac; + + { + BYTE *translation = dc_translation; + const BYTE *source = dc_source; + int pitch = dc_pitch; + uint32_t light = calc_light_multiplier(dc_light); + ShadeConstants shade_constants = dc_shade_constants; + + uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + + do + { + uint32_t fg = shade_pal_index(translation[source[frac >> FRACBITS]], light, shade_constants); + uint32_t fg_red = (fg >> 16) & 0xff; + uint32_t fg_green = (fg >> 8) & 0xff; + uint32_t fg_blue = fg & 0xff; + + uint32_t bg_red = (*dest >> 16) & 0xff; + uint32_t bg_green = (*dest >> 8) & 0xff; + uint32_t bg_blue = (*dest) & 0xff; + + uint32_t red = clamp((fg_red * fg_alpha + bg_red * bg_alpha) / 256, 0, 255); + uint32_t green = clamp((fg_green * fg_alpha + bg_green * bg_alpha) / 256, 0, 255); + uint32_t blue = clamp((fg_blue * fg_alpha + bg_blue * bg_alpha) / 256, 0, 255); + + *dest = 0xff000000 | (red << 16) | (green << 8) | blue; + dest += pitch; + frac += fracstep; + } while (--count); + } + } +}; + +class DrawSubClampColumnRGBACommand : public DrawerCommand +{ + int dc_count; + BYTE *dc_dest; + fixed_t dc_iscale; + fixed_t dc_texturefrac; + const BYTE *dc_source; + int dc_pitch; + fixed_t dc_light; + ShadeConstants dc_shade_constants; + fixed_t dc_srcalpha; + fixed_t dc_destalpha; + +public: + DrawSubClampColumnRGBACommand() + { + dc_count = ::dc_count; + dc_dest = ::dc_dest; + dc_iscale = ::dc_iscale; + dc_texturefrac = ::dc_texturefrac; + dc_source = ::dc_source; + dc_pitch = ::dc_pitch; + dc_light = ::dc_light; + dc_shade_constants = ::dc_shade_constants; + dc_srcalpha = ::dc_srcalpha; + dc_destalpha = ::dc_destalpha; + } + + void Execute(DrawerThread *thread) override + { + int count; + uint32_t *dest; + fixed_t frac; + fixed_t fracstep; + + count = dc_count; + if (count <= 0) + return; + + dest = (uint32_t*)dc_dest; + + fracstep = dc_iscale; + frac = dc_texturefrac; + + { + const BYTE *source = dc_source; + int pitch = dc_pitch; + uint32_t light = calc_light_multiplier(dc_light); + ShadeConstants shade_constants = dc_shade_constants; + + uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + + do + { + uint32_t fg = shade_pal_index(source[frac >> FRACBITS], light, shade_constants); + uint32_t fg_red = (fg >> 16) & 0xff; + uint32_t fg_green = (fg >> 8) & 0xff; + uint32_t fg_blue = fg & 0xff; + + uint32_t bg_red = (*dest >> 16) & 0xff; + uint32_t bg_green = (*dest >> 8) & 0xff; + uint32_t bg_blue = (*dest) & 0xff; + + uint32_t red = clamp((0x10000 - fg_red * fg_alpha + bg_red * bg_alpha) / 256, 256, 256 + 255) - 256; + uint32_t green = clamp((0x10000 - fg_green * fg_alpha + bg_green * bg_alpha) / 256, 256, 256 + 255) - 256; + uint32_t blue = clamp((0x10000 - fg_blue * fg_alpha + bg_blue * bg_alpha) / 256, 256, 256 + 255) - 256; + + *dest = 0xff000000 | (red << 16) | (green << 8) | blue; + dest += pitch; + frac += fracstep; + } while (--count); + } + } +}; + +class DrawSubClampTranslatedColumnRGBACommand : public DrawerCommand +{ + int dc_count; + BYTE *dc_dest; + fixed_t dc_iscale; + fixed_t dc_texturefrac; + const BYTE *dc_source; + int dc_pitch; + fixed_t dc_light; + ShadeConstants dc_shade_constants; + fixed_t dc_srcalpha; + fixed_t dc_destalpha; + BYTE *dc_translation; + +public: + DrawSubClampTranslatedColumnRGBACommand() + { + dc_count = ::dc_count; + dc_dest = ::dc_dest; + dc_iscale = ::dc_iscale; + dc_texturefrac = ::dc_texturefrac; + dc_source = ::dc_source; + dc_pitch = ::dc_pitch; + dc_light = ::dc_light; + dc_shade_constants = ::dc_shade_constants; + dc_srcalpha = ::dc_srcalpha; + dc_destalpha = ::dc_destalpha; + dc_translation = ::dc_translation; + } + + void Execute(DrawerThread *thread) override + { + int count; + uint32_t *dest; + fixed_t frac; + fixed_t fracstep; + + count = dc_count; + if (count <= 0) + return; + + dest = (uint32_t*)dc_dest; + + fracstep = dc_iscale; + frac = dc_texturefrac; + + { + BYTE *translation = dc_translation; + const BYTE *source = dc_source; + int pitch = dc_pitch; + uint32_t light = calc_light_multiplier(dc_light); + ShadeConstants shade_constants = dc_shade_constants; + + uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + + do + { + uint32_t fg = shade_pal_index(translation[source[frac >> FRACBITS]], light, shade_constants); + uint32_t fg_red = (fg >> 16) & 0xff; + uint32_t fg_green = (fg >> 8) & 0xff; + uint32_t fg_blue = fg & 0xff; + + uint32_t bg_red = (*dest >> 16) & 0xff; + uint32_t bg_green = (*dest >> 8) & 0xff; + uint32_t bg_blue = (*dest) & 0xff; + + uint32_t red = clamp((0x10000 - fg_red * fg_alpha + bg_red * bg_alpha) / 256, 256, 256 + 255) - 256; + uint32_t green = clamp((0x10000 - fg_green * fg_alpha + bg_green * bg_alpha) / 256, 256, 256 + 255) - 256; + uint32_t blue = clamp((0x10000 - fg_blue * fg_alpha + bg_blue * bg_alpha) / 256, 256, 256 + 255) - 256; + + *dest = 0xff000000 | (red << 16) | (green << 8) | blue; + dest += pitch; + frac += fracstep; + } while (--count); + } + } +}; + +class DrawRevSubClampColumnRGBACommand : public DrawerCommand +{ + int dc_count; + BYTE *dc_dest; + fixed_t dc_iscale; + fixed_t dc_texturefrac; + const BYTE *dc_source; + int dc_pitch; + fixed_t dc_light; + ShadeConstants dc_shade_constants; + fixed_t dc_srcalpha; + fixed_t dc_destalpha; + +public: + DrawRevSubClampColumnRGBACommand() + { + dc_count = ::dc_count; + dc_dest = ::dc_dest; + dc_iscale = ::dc_iscale; + dc_texturefrac = ::dc_texturefrac; + dc_source = ::dc_source; + dc_pitch = ::dc_pitch; + dc_light = ::dc_light; + dc_shade_constants = ::dc_shade_constants; + dc_srcalpha = ::dc_srcalpha; + dc_destalpha = ::dc_destalpha; + } + + void Execute(DrawerThread *thread) override + { + int count; + uint32_t *dest; + fixed_t frac; + fixed_t fracstep; + + count = dc_count; + if (count <= 0) + return; + + dest = (uint32_t*)dc_dest; + + fracstep = dc_iscale; + frac = dc_texturefrac; + + { + const BYTE *source = dc_source; + int pitch = dc_pitch; + uint32_t light = calc_light_multiplier(dc_light); + ShadeConstants shade_constants = dc_shade_constants; + uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + + do + { + uint32_t fg = shade_pal_index(source[frac >> FRACBITS], light, shade_constants); + uint32_t fg_red = (fg >> 16) & 0xff; + uint32_t fg_green = (fg >> 8) & 0xff; + uint32_t fg_blue = fg & 0xff; + + uint32_t bg_red = (*dest >> 16) & 0xff; + uint32_t bg_green = (*dest >> 8) & 0xff; + uint32_t bg_blue = (*dest) & 0xff; + + uint32_t red = clamp((0x10000 + fg_red * fg_alpha - bg_red * bg_alpha) / 256, 256, 256 + 255) - 256; + uint32_t green = clamp((0x10000 + fg_green * fg_alpha - bg_green * bg_alpha) / 256, 256, 256 + 255) - 256; + uint32_t blue = clamp((0x10000 + fg_blue * fg_alpha - bg_blue * bg_alpha) / 256, 256, 256 + 255) - 256; + + *dest = 0xff000000 | (red << 16) | (green << 8) | blue; + dest += pitch; + frac += fracstep; + } while (--count); + } + } +}; + +class DrawRevSubClampTranslatedColumnRGBACommand : public DrawerCommand +{ + int dc_count; + BYTE *dc_dest; + fixed_t dc_iscale; + fixed_t dc_texturefrac; + const BYTE *dc_source; + int dc_pitch; + fixed_t dc_light; + ShadeConstants dc_shade_constants; + fixed_t dc_srcalpha; + fixed_t dc_destalpha; + BYTE *dc_translation; + +public: + DrawRevSubClampTranslatedColumnRGBACommand() + { + dc_count = ::dc_count; + dc_dest = ::dc_dest; + dc_iscale = ::dc_iscale; + dc_texturefrac = ::dc_texturefrac; + dc_source = ::dc_source; + dc_pitch = ::dc_pitch; + dc_light = ::dc_light; + dc_shade_constants = ::dc_shade_constants; + dc_srcalpha = ::dc_srcalpha; + dc_destalpha = ::dc_destalpha; + dc_translation = ::dc_translation; + } + + void Execute(DrawerThread *thread) override + { + int count; + uint32_t *dest; + fixed_t frac; + fixed_t fracstep; + + count = dc_count; + if (count <= 0) + return; + + dest = (uint32_t*)dc_dest; + + fracstep = dc_iscale; + frac = dc_texturefrac; + + { + BYTE *translation = dc_translation; + const BYTE *source = dc_source; + int pitch = dc_pitch; + uint32_t light = calc_light_multiplier(dc_light); + ShadeConstants shade_constants = dc_shade_constants; + + uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + + do + { + uint32_t fg = shade_pal_index(translation[source[frac >> FRACBITS]], light, shade_constants); + uint32_t fg_red = (fg >> 16) & 0xff; + uint32_t fg_green = (fg >> 8) & 0xff; + uint32_t fg_blue = fg & 0xff; + + uint32_t bg_red = (*dest >> 16) & 0xff; + uint32_t bg_green = (*dest >> 8) & 0xff; + uint32_t bg_blue = (*dest) & 0xff; + + uint32_t red = clamp((0x10000 + fg_red * fg_alpha - bg_red * bg_alpha) / 256, 256, 256 + 255) - 256; + uint32_t green = clamp((0x10000 + fg_green * fg_alpha - bg_green * bg_alpha) / 256, 256, 256 + 255) - 256; + uint32_t blue = clamp((0x10000 + fg_blue * fg_alpha - bg_blue * bg_alpha) / 256, 256, 256 + 255) - 256; + + *dest = 0xff000000 | (red << 16) | (green << 8) | blue; + dest += pitch; + frac += fracstep; + } while (--count); + } + } +}; + +class DrawSpanRGBACommand : public DrawerCommand +{ + const BYTE *ds_source; + fixed_t ds_xfrac; + fixed_t ds_yfrac; + fixed_t ds_xstep; + fixed_t ds_ystep; + int ds_x1; + int ds_x2; + int ds_y; + int ds_xbits; + int ds_ybits; + BYTE *dc_destorg; + fixed_t ds_light; + ShadeConstants ds_shade_constants; + +public: + DrawSpanRGBACommand() + { + ds_source = ::ds_source; + ds_xfrac = ::ds_xfrac; + ds_yfrac = ::ds_yfrac; + ds_xstep = ::ds_xstep; + ds_ystep = ::ds_ystep; + ds_x1 = ::ds_x1; + ds_x2 = ::ds_x2; + ds_y = ::ds_y; + ds_xbits = ::ds_xbits; + ds_ybits = ::ds_ybits; + dc_destorg = ::dc_destorg; + ds_light = ::ds_light; + ds_shade_constants = ::ds_shade_constants; + } + +#ifdef NO_SSE + void Execute(DrawerThread *thread) override + { + dsfixed_t xfrac; + dsfixed_t yfrac; + dsfixed_t xstep; + dsfixed_t ystep; + uint32_t* dest; + const BYTE* source = ds_source; + int count; + int spot; + + xfrac = ds_xfrac; + yfrac = ds_yfrac; + + dest = ylookup[ds_y] + ds_x1 + (uint32_t*)dc_destorg; + + count = ds_x2 - ds_x1 + 1; + + xstep = ds_xstep; + ystep = ds_ystep; + + uint32_t light = calc_light_multiplier(ds_light); + ShadeConstants shade_constants = ds_shade_constants; + + if (ds_xbits == 6 && ds_ybits == 6) + { + // 64x64 is the most common case by far, so special case it. + + do + { + // Current texture index in u,v. + spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); + + // Lookup pixel from flat texture tile + *dest++ = shade_pal_index(source[spot], light, shade_constants); + + // Next step in u,v. + xfrac += xstep; + yfrac += ystep; + } while (--count); + } + else + { + BYTE yshift = 32 - ds_ybits; + BYTE xshift = yshift - ds_xbits; + int xmask = ((1 << ds_xbits) - 1) << ds_ybits; + + do + { + // Current texture index in u,v. + spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); + + // Lookup pixel from flat texture tile + *dest++ = shade_pal_index(source[spot], light, shade_constants); + + // Next step in u,v. + xfrac += xstep; + yfrac += ystep; + } while (--count); + } + } +#else + void Execute(DrawerThread *thread) override + { + dsfixed_t xfrac; + dsfixed_t yfrac; + dsfixed_t xstep; + dsfixed_t ystep; + uint32_t* dest; + const BYTE* source = ds_source; + int count; + int spot; + + xfrac = ds_xfrac; + yfrac = ds_yfrac; + + dest = ylookup[ds_y] + ds_x1 + (uint32_t*)dc_destorg; + + count = ds_x2 - ds_x1 + 1; + + xstep = ds_xstep; + ystep = ds_ystep; + + uint32_t light = calc_light_multiplier(ds_light); + ShadeConstants shade_constants = ds_shade_constants; + + if (ds_xbits == 6 && ds_ybits == 6) + { + // 64x64 is the most common case by far, so special case it. + + uint32_t *palette = (uint32_t*)GPalette.BaseColors; + + int sse_count = count / 4; + count -= sse_count * 4; + + if (shade_constants.simple_shade) + { + SSE_SHADE_SIMPLE_INIT(light); + + while (sse_count--) + { + // Current texture index in u,v. + spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); + uint32_t p0 = source[spot]; + xfrac += xstep; + yfrac += ystep; + + spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); + uint32_t p1 = source[spot]; + xfrac += xstep; + yfrac += ystep; + + spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); + uint32_t p2 = source[spot]; + xfrac += xstep; + yfrac += ystep; + + spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); + uint32_t p3 = source[spot]; + xfrac += xstep; + yfrac += ystep; + + // Lookup pixel from flat texture tile, + // re-index using light/colormap. + __m128i fg = _mm_set_epi32(palette[p3], palette[p2], palette[p1], palette[p0]); + SSE_SHADE_SIMPLE(fg); + _mm_storeu_si128((__m128i*)dest, fg); + + // Next step in u,v. + dest += 4; + } + } + else + { + SSE_SHADE_INIT(light, shade_constants); + + while (sse_count--) + { + // Current texture index in u,v. + spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); + uint32_t p0 = source[spot]; + xfrac += xstep; + yfrac += ystep; + + spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); + uint32_t p1 = source[spot]; + xfrac += xstep; + yfrac += ystep; + + spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); + uint32_t p2 = source[spot]; + xfrac += xstep; + yfrac += ystep; + + spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); + uint32_t p3 = source[spot]; + xfrac += xstep; + yfrac += ystep; + + // Lookup pixel from flat texture tile, + // re-index using light/colormap. + __m128i fg = _mm_set_epi32(palette[p3], palette[p2], palette[p1], palette[p0]); + SSE_SHADE(fg, shade_constants); + _mm_storeu_si128((__m128i*)dest, fg); + + // Next step in u,v. + dest += 4; + } + } + + if (count == 0) + return; + + do + { + // Current texture index in u,v. + spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); + + // Lookup pixel from flat texture tile + *dest++ = shade_pal_index(source[spot], light, shade_constants); + + // Next step in u,v. + xfrac += xstep; + yfrac += ystep; + } while (--count); + } + else + { + BYTE yshift = 32 - ds_ybits; + BYTE xshift = yshift - ds_xbits; + int xmask = ((1 << ds_xbits) - 1) << ds_ybits; + + do + { + // Current texture index in u,v. + spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); + + // Lookup pixel from flat texture tile + *dest++ = shade_pal_index(source[spot], light, shade_constants); + + // Next step in u,v. + xfrac += xstep; + yfrac += ystep; + } while (--count); + } + } +#endif +}; + +class DrawSpanMaskedRGBACommand : public DrawerCommand +{ + const BYTE *ds_source; + fixed_t ds_light; + ShadeConstants ds_shade_constants; + fixed_t ds_xfrac; + fixed_t ds_yfrac; + BYTE *dc_destorg; + int ds_x1; + int ds_y1; + int ds_y; + fixed_t ds_xstep; + fixed_t ds_ystep; + int ds_xbits; + int ds_ybits; + +public: + DrawSpanMaskedRGBACommand() + { + ds_source = ::ds_source; + ds_light = ::ds_light; + ds_shade_constants = ::ds_shade_constants; + ds_xfrac = ::ds_xfrac; + ds_yfrac = ::ds_yfrac; + dc_destorg = ::dc_destorg; + ds_x1 = ::ds_x1; + ds_x2 = ::ds_x2; + ds_y = ::ds_y; + ds_xstep = ::ds_xstep; + ds_ystep = ::ds_ystep; + ds_xbits = ::ds_xbits; + ds_ybits = ::ds_ybits; + } + + void Execute(DrawerThread *thread) override + { + dsfixed_t xfrac; + dsfixed_t yfrac; + dsfixed_t xstep; + dsfixed_t ystep; + uint32_t* dest; + const BYTE* source = ds_source; + int count; + int spot; + + uint32_t light = calc_light_multiplier(ds_light); + ShadeConstants shade_constants = ds_shade_constants; + + xfrac = ds_xfrac; + yfrac = ds_yfrac; + + dest = ylookup[ds_y] + ds_x1 + (uint32_t*)dc_destorg; + + count = ds_x2 - ds_x1 + 1; + + xstep = ds_xstep; + ystep = ds_ystep; + + if (ds_xbits == 6 && ds_ybits == 6) + { + // 64x64 is the most common case by far, so special case it. + do + { + BYTE texdata; + + spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); + texdata = source[spot]; + if (texdata != 0) + { + *dest = shade_pal_index(texdata, light, shade_constants); + } + dest++; + xfrac += xstep; + yfrac += ystep; + } while (--count); + } + else + { + BYTE yshift = 32 - ds_ybits; + BYTE xshift = yshift - ds_xbits; + int xmask = ((1 << ds_xbits) - 1) << ds_ybits; + do + { + BYTE texdata; + + spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); + texdata = source[spot]; + if (texdata != 0) + { + *dest = shade_pal_index(texdata, light, shade_constants); + } + dest++; + xfrac += xstep; + yfrac += ystep; + } while (--count); + } + } +}; + +class DrawSpanTranslucentRGBACommand : public DrawerCommand +{ + const BYTE *ds_source; + fixed_t ds_light; + ShadeConstants ds_shade_constants; + fixed_t ds_xfrac; + fixed_t ds_yfrac; + BYTE *dc_destorg; + int ds_x1; + int ds_y1; + int ds_y; + fixed_t ds_xstep; + fixed_t ds_ystep; + int ds_xbits; + int ds_ybits; + +public: + DrawSpanTranslucentRGBACommand() + { + ds_source = ::ds_source; + ds_light = ::ds_light; + ds_shade_constants = ::ds_shade_constants; + ds_xfrac = ::ds_xfrac; + ds_yfrac = ::ds_yfrac; + dc_destorg = ::dc_destorg; + ds_x1 = ::ds_x1; + ds_x2 = ::ds_x2; + ds_y = ::ds_y; + ds_xstep = ::ds_xstep; + ds_ystep = ::ds_ystep; + ds_xbits = ::ds_xbits; + ds_ybits = ::ds_ybits; + } + + void Execute(DrawerThread *thread) override + { + dsfixed_t xfrac; + dsfixed_t yfrac; + dsfixed_t xstep; + dsfixed_t ystep; + uint32_t* dest; + const BYTE* source = ds_source; + int count; + int spot; + + xfrac = ds_xfrac; + yfrac = ds_yfrac; + + dest = ylookup[ds_y] + ds_x1 + (uint32_t*)dc_destorg; + + count = ds_x2 - ds_x1 + 1; + + xstep = ds_xstep; + ystep = ds_ystep; + + uint32_t light = calc_light_multiplier(ds_light); + ShadeConstants shade_constants = ds_shade_constants; + + uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + + if (ds_xbits == 6 && ds_ybits == 6) + { + // 64x64 is the most common case by far, so special case it. + do + { + spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); + + uint32_t fg = shade_pal_index(source[spot], light, shade_constants); + uint32_t fg_red = (fg >> 16) & 0xff; + uint32_t fg_green = (fg >> 8) & 0xff; + uint32_t fg_blue = (fg) & 0xff; + + uint32_t bg_red = (*dest >> 16) & 0xff; + uint32_t bg_green = (*dest >> 8) & 0xff; + uint32_t bg_blue = (*dest) & 0xff; + + uint32_t red = (fg_red * fg_alpha + bg_red * bg_alpha) / 256; + uint32_t green = (fg_green * fg_alpha + bg_green * bg_alpha) / 256; + uint32_t blue = (fg_blue * fg_alpha + bg_blue * bg_alpha) / 256; + + *dest++ = 0xff000000 | (red << 16) | (green << 8) | blue; + + xfrac += xstep; + yfrac += ystep; + } while (--count); + } + else + { + BYTE yshift = 32 - ds_ybits; + BYTE xshift = yshift - ds_xbits; + int xmask = ((1 << ds_xbits) - 1) << ds_ybits; + do + { + spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); + + uint32_t fg = shade_pal_index(source[spot], light, shade_constants); + uint32_t fg_red = (fg >> 16) & 0xff; + uint32_t fg_green = (fg >> 8) & 0xff; + uint32_t fg_blue = (fg) & 0xff; + + uint32_t bg_red = (*dest >> 16) & 0xff; + uint32_t bg_green = (*dest >> 8) & 0xff; + uint32_t bg_blue = (*dest) & 0xff; + + uint32_t red = (fg_red * fg_alpha + bg_red * bg_alpha) / 256; + uint32_t green = (fg_green * fg_alpha + bg_green * bg_alpha) / 256; + uint32_t blue = (fg_blue * fg_alpha + bg_blue * bg_alpha) / 256; + + *dest++ = 0xff000000 | (red << 16) | (green << 8) | blue; + + xfrac += xstep; + yfrac += ystep; + } while (--count); + } + } +}; + +class DrawSpanMaskedTranslucentRGBACommand : public DrawerCommand +{ + const BYTE *ds_source; + fixed_t ds_light; + ShadeConstants ds_shade_constants; + fixed_t ds_xfrac; + fixed_t ds_yfrac; + BYTE *dc_destorg; + int ds_x1; + int ds_y1; + int ds_y; + fixed_t ds_xstep; + fixed_t ds_ystep; + int ds_xbits; + int ds_ybits; + +public: + DrawSpanMaskedTranslucentRGBACommand() + { + ds_source = ::ds_source; + ds_light = ::ds_light; + ds_shade_constants = ::ds_shade_constants; + ds_xfrac = ::ds_xfrac; + ds_yfrac = ::ds_yfrac; + dc_destorg = ::dc_destorg; + ds_x1 = ::ds_x1; + ds_x2 = ::ds_x2; + ds_y = ::ds_y; + ds_xstep = ::ds_xstep; + ds_ystep = ::ds_ystep; + ds_xbits = ::ds_xbits; + ds_ybits = ::ds_ybits; + } + + void Execute(DrawerThread *thread) override + { + dsfixed_t xfrac; + dsfixed_t yfrac; + dsfixed_t xstep; + dsfixed_t ystep; + uint32_t* dest; + const BYTE* source = ds_source; + int count; + int spot; + + uint32_t light = calc_light_multiplier(ds_light); + ShadeConstants shade_constants = ds_shade_constants; + + uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + + xfrac = ds_xfrac; + yfrac = ds_yfrac; + + dest = ylookup[ds_y] + ds_x1 + (uint32_t*)dc_destorg; + + count = ds_x2 - ds_x1 + 1; + + xstep = ds_xstep; + ystep = ds_ystep; + + if (ds_xbits == 6 && ds_ybits == 6) + { + // 64x64 is the most common case by far, so special case it. + do + { + BYTE texdata; + + spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); + texdata = source[spot]; + if (texdata != 0) + { + uint32_t fg = shade_pal_index(texdata, light, shade_constants); + uint32_t fg_red = (fg >> 16) & 0xff; + uint32_t fg_green = (fg >> 8) & 0xff; + uint32_t fg_blue = (fg) & 0xff; + + uint32_t bg_red = (*dest >> 16) & 0xff; + uint32_t bg_green = (*dest >> 8) & 0xff; + uint32_t bg_blue = (*dest) & 0xff; + + uint32_t red = (fg_red * fg_alpha + bg_red * bg_alpha) / 256; + uint32_t green = (fg_green * fg_alpha + bg_green * bg_alpha) / 256; + uint32_t blue = (fg_blue * fg_alpha + bg_blue * bg_alpha) / 256; + + *dest = 0xff000000 | (red << 16) | (green << 8) | blue; + } + dest++; + xfrac += xstep; + yfrac += ystep; + } while (--count); + } + else + { + BYTE yshift = 32 - ds_ybits; + BYTE xshift = yshift - ds_xbits; + int xmask = ((1 << ds_xbits) - 1) << ds_ybits; + do + { + BYTE texdata; + + spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); + texdata = source[spot]; + if (texdata != 0) + { + uint32_t fg = shade_pal_index(texdata, light, shade_constants); + uint32_t fg_red = (fg >> 16) & 0xff; + uint32_t fg_green = (fg >> 8) & 0xff; + uint32_t fg_blue = (fg) & 0xff; + + uint32_t bg_red = (*dest >> 16) & 0xff; + uint32_t bg_green = (*dest >> 8) & 0xff; + uint32_t bg_blue = (*dest) & 0xff; + + uint32_t red = (fg_red * fg_alpha + bg_red * bg_alpha) / 256; + uint32_t green = (fg_green * fg_alpha + bg_green * bg_alpha) / 256; + uint32_t blue = (fg_blue * fg_alpha + bg_blue * bg_alpha) / 256; + + *dest = 0xff000000 | (red << 16) | (green << 8) | blue; + } + dest++; + xfrac += xstep; + yfrac += ystep; + } while (--count); + } + } +}; + +class DrawSpanAddClampRGBACommand : public DrawerCommand +{ + const BYTE *ds_source; + fixed_t ds_light; + ShadeConstants ds_shade_constants; + fixed_t ds_xfrac; + fixed_t ds_yfrac; + BYTE *dc_destorg; + int ds_x1; + int ds_y1; + int ds_y; + fixed_t ds_xstep; + fixed_t ds_ystep; + int ds_xbits; + int ds_ybits; + +public: + DrawSpanAddClampRGBACommand() + { + ds_source = ::ds_source; + ds_light = ::ds_light; + ds_shade_constants = ::ds_shade_constants; + ds_xfrac = ::ds_xfrac; + ds_yfrac = ::ds_yfrac; + dc_destorg = ::dc_destorg; + ds_x1 = ::ds_x1; + ds_x2 = ::ds_x2; + ds_y = ::ds_y; + ds_xstep = ::ds_xstep; + ds_ystep = ::ds_ystep; + ds_xbits = ::ds_xbits; + ds_ybits = ::ds_ybits; + } + + void Execute(DrawerThread *thread) override + { + dsfixed_t xfrac; + dsfixed_t yfrac; + dsfixed_t xstep; + dsfixed_t ystep; + uint32_t* dest; + const BYTE* source = ds_source; + int count; + int spot; + + uint32_t light = calc_light_multiplier(ds_light); + ShadeConstants shade_constants = ds_shade_constants; + + uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + + xfrac = ds_xfrac; + yfrac = ds_yfrac; + + dest = ylookup[ds_y] + ds_x1 + (uint32_t*)dc_destorg; + + count = ds_x2 - ds_x1 + 1; + + xstep = ds_xstep; + ystep = ds_ystep; + + if (ds_xbits == 6 && ds_ybits == 6) + { + // 64x64 is the most common case by far, so special case it. + do + { + spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); + + uint32_t fg = shade_pal_index(source[spot], light, shade_constants); + uint32_t fg_red = (fg >> 16) & 0xff; + uint32_t fg_green = (fg >> 8) & 0xff; + uint32_t fg_blue = (fg) & 0xff; + + uint32_t bg_red = (*dest >> 16) & 0xff; + uint32_t bg_green = (*dest >> 8) & 0xff; + uint32_t bg_blue = (*dest) & 0xff; + + uint32_t red = clamp((fg_red * fg_alpha + bg_red * bg_alpha) / 256, 0, 255); + uint32_t green = clamp((fg_green * fg_alpha + bg_green * bg_alpha) / 256, 0, 255); + uint32_t blue = clamp((fg_blue * fg_alpha + bg_blue * bg_alpha) / 256, 0, 255); + + *dest++ = 0xff000000 | (red << 16) | (green << 8) | blue; + + xfrac += xstep; + yfrac += ystep; + } while (--count); + } + else + { + BYTE yshift = 32 - ds_ybits; + BYTE xshift = yshift - ds_xbits; + int xmask = ((1 << ds_xbits) - 1) << ds_ybits; + do + { + spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); + + uint32_t fg = shade_pal_index(source[spot], light, shade_constants); + uint32_t fg_red = (fg >> 16) & 0xff; + uint32_t fg_green = (fg >> 8) & 0xff; + uint32_t fg_blue = (fg) & 0xff; + + uint32_t bg_red = (*dest >> 16) & 0xff; + uint32_t bg_green = (*dest >> 8) & 0xff; + uint32_t bg_blue = (*dest) & 0xff; + + uint32_t red = clamp((fg_red * fg_alpha + bg_red * bg_alpha) / 256, 0, 255); + uint32_t green = clamp((fg_green * fg_alpha + bg_green * bg_alpha) / 256, 0, 255); + uint32_t blue = clamp((fg_blue * fg_alpha + bg_blue * bg_alpha) / 256, 0, 255); + + *dest++ = 0xff000000 | (red << 16) | (green << 8) | blue; + + xfrac += xstep; + yfrac += ystep; + } while (--count); + } + } +}; + +class DrawSpanMaskedAddClampRGBACommand : public DrawerCommand +{ + const BYTE *ds_source; + fixed_t ds_light; + ShadeConstants ds_shade_constants; + fixed_t ds_xfrac; + fixed_t ds_yfrac; + BYTE *dc_destorg; + int ds_x1; + int ds_y1; + int ds_y; + fixed_t ds_xstep; + fixed_t ds_ystep; + int ds_xbits; + int ds_ybits; + +public: + DrawSpanMaskedAddClampRGBACommand() + { + ds_source = ::ds_source; + ds_light = ::ds_light; + ds_shade_constants = ::ds_shade_constants; + ds_xfrac = ::ds_xfrac; + ds_yfrac = ::ds_yfrac; + dc_destorg = ::dc_destorg; + ds_x1 = ::ds_x1; + ds_x2 = ::ds_x2; + ds_y = ::ds_y; + ds_xstep = ::ds_xstep; + ds_ystep = ::ds_ystep; + ds_xbits = ::ds_xbits; + ds_ybits = ::ds_ybits; + } + + void Execute(DrawerThread *thread) override + { + dsfixed_t xfrac; + dsfixed_t yfrac; + dsfixed_t xstep; + dsfixed_t ystep; + uint32_t* dest; + const BYTE* source = ds_source; + int count; + int spot; + + uint32_t light = calc_light_multiplier(ds_light); + ShadeConstants shade_constants = ds_shade_constants; + + uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + + xfrac = ds_xfrac; + yfrac = ds_yfrac; + + dest = ylookup[ds_y] + ds_x1 + (uint32_t*)dc_destorg; + + count = ds_x2 - ds_x1 + 1; + + xstep = ds_xstep; + ystep = ds_ystep; + + if (ds_xbits == 6 && ds_ybits == 6) + { + // 64x64 is the most common case by far, so special case it. + do + { + BYTE texdata; + + spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); + texdata = source[spot]; + if (texdata != 0) + { + uint32_t fg = shade_pal_index(texdata, light, shade_constants); + uint32_t fg_red = (fg >> 16) & 0xff; + uint32_t fg_green = (fg >> 8) & 0xff; + uint32_t fg_blue = (fg) & 0xff; + + uint32_t bg_red = (*dest >> 16) & 0xff; + uint32_t bg_green = (*dest >> 8) & 0xff; + uint32_t bg_blue = (*dest) & 0xff; + + uint32_t red = (fg_red * fg_alpha + bg_red * bg_alpha) / 256; + uint32_t green = (fg_green * fg_alpha + bg_green * bg_alpha) / 256; + uint32_t blue = (fg_blue * fg_alpha + bg_blue * bg_alpha) / 256; + + *dest = 0xff000000 | (red << 16) | (green << 8) | blue; + } + dest++; + xfrac += xstep; + yfrac += ystep; + } while (--count); + } + else + { + BYTE yshift = 32 - ds_ybits; + BYTE xshift = yshift - ds_xbits; + int xmask = ((1 << ds_xbits) - 1) << ds_ybits; + do + { + BYTE texdata; + + spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); + texdata = source[spot]; + if (texdata != 0) + { + uint32_t fg = shade_pal_index(texdata, light, shade_constants); + uint32_t fg_red = (fg >> 16) & 0xff; + uint32_t fg_green = (fg >> 8) & 0xff; + uint32_t fg_blue = (fg) & 0xff; + + uint32_t bg_red = (*dest >> 16) & 0xff; + uint32_t bg_green = (*dest >> 8) & 0xff; + uint32_t bg_blue = (*dest) & 0xff; + + uint32_t red = (fg_red * fg_alpha + bg_red * bg_alpha) / 256; + uint32_t green = (fg_green * fg_alpha + bg_green * bg_alpha) / 256; + uint32_t blue = (fg_blue * fg_alpha + bg_blue * bg_alpha) / 256; + + *dest = 0xff000000 | (red << 16) | (green << 8) | blue; + } + dest++; + xfrac += xstep; + yfrac += ystep; + } while (--count); + } + } +}; + +class FillSpanRGBACommand : public DrawerCommand +{ + int ds_x1; + int ds_x2; + int ds_y; + BYTE *dc_destorg; + fixed_t ds_light; + int ds_color; + +public: + FillSpanRGBACommand() + { + ds_x1 = ::ds_x1; + ds_x2 = ::ds_x2; + ds_y = ::ds_y; + dc_destorg = ::dc_destorg; + ds_light = ::ds_light; + ds_color = ::ds_color; + } + + void Execute(DrawerThread *thread) override + { + uint32_t *dest = ylookup[ds_y] + ds_x1 + (uint32_t*)dc_destorg; + int count = (ds_x2 - ds_x1 + 1); + uint32_t light = calc_light_multiplier(ds_light); + uint32_t color = shade_pal_index_simple(ds_color, light); + for (int i = 0; i < count; i++) + dest[i] = color; + } +}; + +class Vlinec1RGBACommand : public DrawerCommand +{ + fixed_t dc_iscale; + fixed_t dc_texturefrac; + int dc_count; + const BYTE *dc_source; + BYTE *dc_dest; + int vlinebits; + int dc_pitch; + fixed_t dc_light; + ShadeConstants dc_shade_constants; + +public: + Vlinec1RGBACommand() + { + dc_iscale = ::dc_iscale; + dc_texturefrac = ::dc_texturefrac; + dc_count = ::dc_count; + dc_source = ::dc_source; + dc_dest = ::dc_dest; + vlinebits = ::vlinebits; + dc_pitch = ::dc_pitch; + dc_light = ::dc_light; + dc_shade_constants = ::dc_shade_constants; + } + + void Execute(DrawerThread *thread) override + { + DWORD fracstep = dc_iscale; + DWORD frac = dc_texturefrac; + int count = dc_count; + const BYTE *source = dc_source; + uint32_t *dest = (uint32_t*)dc_dest; + int bits = vlinebits; + int pitch = dc_pitch; + + uint32_t light = calc_light_multiplier(dc_light); + ShadeConstants shade_constants = dc_shade_constants; + + do + { + *dest = shade_pal_index(source[frac >> bits], light, shade_constants); + frac += fracstep; + dest += pitch; + } while (--count); + } +}; + +class Vlinec4RGBACommand : public DrawerCommand +{ + BYTE *dc_dest; + int dc_count; + int dc_pitch; + ShadeConstants dc_shade_constants; + int vlinebits; + fixed_t palookuplight[4]; + DWORD vplce[4]; + DWORD vince[4]; + const BYTE *bufplce[4]; + +public: + Vlinec4RGBACommand() + { + dc_dest = ::dc_dest; + dc_count = ::dc_count; + dc_pitch = ::dc_pitch; + dc_shade_constants = ::dc_shade_constants; + vlinebits = ::vlinebits; + for (int i = 0; i < 4; i++) + { + palookuplight[i] = ::palookuplight[i]; + vplce[i] = ::vplce[i]; + vince[i] = ::vince[i]; + bufplce[i] = ::bufplce[i]; + } + } + +#ifdef NO_SSE + void Execute(DrawerThread *thread) override + { + uint32_t *dest = (uint32_t*)dc_dest; + int count = dc_count; + int bits = vlinebits; + DWORD place; + + uint32_t light0 = calc_light_multiplier(palookuplight[0]); + uint32_t light1 = calc_light_multiplier(palookuplight[1]); + uint32_t light2 = calc_light_multiplier(palookuplight[2]); + uint32_t light3 = calc_light_multiplier(palookuplight[3]); + + ShadeConstants shade_constants = dc_shade_constants; + + do + { + dest[0] = shade_pal_index(bufplce[0][(place = vplce[0]) >> bits], light0, shade_constants); vplce[0] = place + vince[0]; + dest[1] = shade_pal_index(bufplce[1][(place = vplce[1]) >> bits], light1, shade_constants); vplce[1] = place + vince[1]; + dest[2] = shade_pal_index(bufplce[2][(place = vplce[2]) >> bits], light2, shade_constants); vplce[2] = place + vince[2]; + dest[3] = shade_pal_index(bufplce[3][(place = vplce[3]) >> bits], light3, shade_constants); vplce[3] = place + vince[3]; + dest += dc_pitch; + } while (--count); + } +#else + void Execute(DrawerThread *thread) override + { + uint32_t *dest = (uint32_t*)dc_dest; + int count = dc_count; + int bits = vlinebits; + + uint32_t light0 = calc_light_multiplier(palookuplight[0]); + uint32_t light1 = calc_light_multiplier(palookuplight[1]); + uint32_t light2 = calc_light_multiplier(palookuplight[2]); + uint32_t light3 = calc_light_multiplier(palookuplight[3]); + + ShadeConstants shade_constants = dc_shade_constants; + + uint32_t *palette = (uint32_t*)GPalette.BaseColors; + DWORD local_vplce[4] = { vplce[0], vplce[1], vplce[2], vplce[3] }; + DWORD local_vince[4] = { vince[0], vince[1], vince[2], vince[3] }; + + if (shade_constants.simple_shade) + { + SSE_SHADE_SIMPLE_INIT4(light3, light2, light1, light0); + do + { + DWORD place0 = local_vplce[0]; + DWORD place1 = local_vplce[1]; + DWORD place2 = local_vplce[2]; + DWORD place3 = local_vplce[3]; + + BYTE p0 = bufplce[0][place0 >> bits]; + BYTE p1 = bufplce[1][place1 >> bits]; + BYTE p2 = bufplce[2][place2 >> bits]; + BYTE p3 = bufplce[3][place3 >> bits]; + + local_vplce[0] = place0 + local_vince[0]; + local_vplce[1] = place1 + local_vince[1]; + local_vplce[2] = place2 + local_vince[2]; + local_vplce[3] = place3 + local_vince[3]; + + __m128i fg = _mm_set_epi32(palette[p3], palette[p2], palette[p1], palette[p0]); + SSE_SHADE_SIMPLE(fg); + _mm_storeu_si128((__m128i*)dest, fg); + dest += dc_pitch; + } while (--count); + } + else + { + SSE_SHADE_INIT4(light3, light2, light1, light0, shade_constants); + do + { + DWORD place0 = local_vplce[0]; + DWORD place1 = local_vplce[1]; + DWORD place2 = local_vplce[2]; + DWORD place3 = local_vplce[3]; + + BYTE p0 = bufplce[0][place0 >> bits]; + BYTE p1 = bufplce[1][place1 >> bits]; + BYTE p2 = bufplce[2][place2 >> bits]; + BYTE p3 = bufplce[3][place3 >> bits]; + + local_vplce[0] = place0 + local_vince[0]; + local_vplce[1] = place1 + local_vince[1]; + local_vplce[2] = place2 + local_vince[2]; + local_vplce[3] = place3 + local_vince[3]; + + __m128i fg = _mm_set_epi32(palette[p3], palette[p2], palette[p1], palette[p0]); + SSE_SHADE(fg, shade_constants); + _mm_storeu_si128((__m128i*)dest, fg); + dest += dc_pitch; + } while (--count); + } + } +#endif +}; + +class Mvlinec1RGBACommand : public DrawerCommand +{ + fixed_t dc_iscale; + fixed_t dc_texturefrac; + int dc_count; + const BYTE *dc_source; + BYTE *dc_dest; + int mvlinebits; + int dc_pitch; + fixed_t dc_light; + ShadeConstants dc_shade_constants; + +public: + Mvlinec1RGBACommand() + { + dc_iscale = ::dc_iscale; + dc_texturefrac = ::dc_texturefrac; + dc_count = ::dc_count; + dc_source = ::dc_source; + dc_dest = ::dc_dest; + mvlinebits = ::mvlinebits; + dc_pitch = ::dc_pitch; + dc_light = ::dc_light; + dc_shade_constants = ::dc_shade_constants; + } + + void Execute(DrawerThread *thread) override + { + DWORD fracstep = dc_iscale; + DWORD frac = dc_texturefrac; + int count = dc_count; + const BYTE *source = dc_source; + uint32_t *dest = (uint32_t*)dc_dest; + int bits = mvlinebits; + int pitch = dc_pitch; + + uint32_t light = calc_light_multiplier(dc_light); + ShadeConstants shade_constants = dc_shade_constants; + + do + { + BYTE pix = source[frac >> bits]; + if (pix != 0) + { + *dest = shade_pal_index(pix, light, shade_constants); + } + frac += fracstep; + dest += pitch; + } while (--count); + } +}; + +class Mvlinec4RGBACommand : public DrawerCommand +{ + BYTE *dc_dest; + int dc_count; + int dc_pitch; + ShadeConstants dc_shade_constants; + int mvlinebits; + fixed_t palookuplight[4]; + DWORD vplce[4]; + DWORD vince[4]; + const BYTE *bufplce[4]; + +public: + Mvlinec4RGBACommand() + { + dc_dest = ::dc_dest; + dc_count = ::dc_count; + dc_pitch = ::dc_pitch; + dc_shade_constants = ::dc_shade_constants; + mvlinebits = ::mvlinebits; + for (int i = 0; i < 4; i++) + { + palookuplight[i] = ::palookuplight[i]; + vplce[i] = ::vplce[i]; + vince[i] = ::vince[i]; + bufplce[i] = ::bufplce[i]; + } + } + +#ifdef NO_SSE + void Execute(DrawerThread *thread) override + { + uint32_t *dest = (uint32_t*)dc_dest; + int count = dc_count; + int bits = mvlinebits; + DWORD place; + + uint32_t light0 = calc_light_multiplier(palookuplight[0]); + uint32_t light1 = calc_light_multiplier(palookuplight[1]); + uint32_t light2 = calc_light_multiplier(palookuplight[2]); + uint32_t light3 = calc_light_multiplier(palookuplight[3]); + + ShadeConstants shade_constants = dc_shade_constants; + + do + { + BYTE pix; + pix = bufplce[0][(place = vplce[0]) >> bits]; if (pix) dest[0] = shade_pal_index(pix, light0, shade_constants); vplce[0] = place + vince[0]; + pix = bufplce[1][(place = vplce[1]) >> bits]; if (pix) dest[1] = shade_pal_index(pix, light1, shade_constants); vplce[1] = place + vince[1]; + pix = bufplce[2][(place = vplce[2]) >> bits]; if (pix) dest[2] = shade_pal_index(pix, light2, shade_constants); vplce[2] = place + vince[2]; + pix = bufplce[3][(place = vplce[3]) >> bits]; if (pix) dest[3] = shade_pal_index(pix, light3, shade_constants); vplce[3] = place + vince[3]; + dest += dc_pitch; + } while (--count); + } +#else + void Execute(DrawerThread *thread) override + { + uint32_t *dest = (uint32_t*)dc_dest; + int count = dc_count; + int bits = mvlinebits; + + uint32_t light0 = calc_light_multiplier(palookuplight[0]); + uint32_t light1 = calc_light_multiplier(palookuplight[1]); + uint32_t light2 = calc_light_multiplier(palookuplight[2]); + uint32_t light3 = calc_light_multiplier(palookuplight[3]); + + ShadeConstants shade_constants = dc_shade_constants; + + uint32_t *palette = (uint32_t*)GPalette.BaseColors; + DWORD local_vplce[4] = { vplce[0], vplce[1], vplce[2], vplce[3] }; + DWORD local_vince[4] = { vince[0], vince[1], vince[2], vince[3] }; + + if (shade_constants.simple_shade) + { + SSE_SHADE_SIMPLE_INIT4(light3, light2, light1, light0); + do + { + DWORD place0 = local_vplce[0]; + DWORD place1 = local_vplce[1]; + DWORD place2 = local_vplce[2]; + DWORD place3 = local_vplce[3]; + + BYTE pix0 = bufplce[0][place0 >> bits]; + BYTE pix1 = bufplce[1][place1 >> bits]; + BYTE pix2 = bufplce[2][place2 >> bits]; + BYTE pix3 = bufplce[3][place3 >> bits]; + + // movemask = !(pix == 0) + __m128i movemask = _mm_xor_si128(_mm_cmpeq_epi32(_mm_set_epi32(pix3, pix2, pix1, pix0), _mm_setzero_si128()), _mm_cmpeq_epi32(_mm_setzero_si128(), _mm_setzero_si128())); + + local_vplce[0] = place0 + local_vince[0]; + local_vplce[1] = place1 + local_vince[1]; + local_vplce[2] = place2 + local_vince[2]; + local_vplce[3] = place3 + local_vince[3]; + + __m128i fg = _mm_set_epi32(palette[pix3], palette[pix2], palette[pix1], palette[pix0]); + SSE_SHADE_SIMPLE(fg); + _mm_maskmoveu_si128(fg, movemask, (char*)dest); + dest += dc_pitch; + } while (--count); + } + else + { + SSE_SHADE_INIT4(light3, light2, light1, light0, shade_constants); + do + { + DWORD place0 = local_vplce[0]; + DWORD place1 = local_vplce[1]; + DWORD place2 = local_vplce[2]; + DWORD place3 = local_vplce[3]; + + BYTE pix0 = bufplce[0][place0 >> bits]; + BYTE pix1 = bufplce[1][place1 >> bits]; + BYTE pix2 = bufplce[2][place2 >> bits]; + BYTE pix3 = bufplce[3][place3 >> bits]; + + // movemask = !(pix == 0) + __m128i movemask = _mm_xor_si128(_mm_cmpeq_epi32(_mm_set_epi32(pix3, pix2, pix1, pix0), _mm_setzero_si128()), _mm_cmpeq_epi32(_mm_setzero_si128(), _mm_setzero_si128())); + + local_vplce[0] = place0 + local_vince[0]; + local_vplce[1] = place1 + local_vince[1]; + local_vplce[2] = place2 + local_vince[2]; + local_vplce[3] = place3 + local_vince[3]; + + __m128i fg = _mm_set_epi32(palette[pix3], palette[pix2], palette[pix1], palette[pix0]); + SSE_SHADE(fg, shade_constants); + _mm_maskmoveu_si128(fg, movemask, (char*)dest); + dest += dc_pitch; + } while (--count); + } + } +#endif +}; + +class Tmvline1AddRGBACommand : public DrawerCommand +{ + fixed_t dc_iscale; + fixed_t dc_texturefrac; + int dc_count; + const BYTE *dc_source; + BYTE *dc_dest; + int tmvlinebits; + int dc_pitch; + fixed_t dc_light; + ShadeConstants dc_shade_constants; + fixed_t dc_srcalpha; + fixed_t dc_destalpha; + +public: + Tmvline1AddRGBACommand() + { + dc_iscale = ::dc_iscale; + dc_texturefrac = ::dc_texturefrac; + dc_count = ::dc_count; + dc_source = ::dc_source; + dc_dest = ::dc_dest; + tmvlinebits = ::tmvlinebits; + dc_pitch = ::dc_pitch; + dc_light = ::dc_light; + dc_shade_constants = ::dc_shade_constants; + dc_srcalpha = ::dc_srcalpha; + dc_destalpha = ::dc_destalpha; + } + + void Execute(DrawerThread *thread) override + { + DWORD fracstep = dc_iscale; + DWORD frac = dc_texturefrac; + int count = dc_count; + const BYTE *source = dc_source; + uint32_t *dest = (uint32_t*)dc_dest; + int bits = tmvlinebits; + int pitch = dc_pitch; + + uint32_t light = calc_light_multiplier(dc_light); + ShadeConstants shade_constants = dc_shade_constants; + + uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + + do + { + BYTE pix = source[frac >> bits]; + if (pix != 0) + { + uint32_t fg = shade_pal_index(pix, light, shade_constants); + uint32_t fg_red = (fg >> 16) & 0xff; + uint32_t fg_green = (fg >> 8) & 0xff; + uint32_t fg_blue = fg & 0xff; + + uint32_t bg_red = (*dest >> 16) & 0xff; + uint32_t bg_green = (*dest >> 8) & 0xff; + uint32_t bg_blue = (*dest) & 0xff; + + uint32_t red = clamp((fg_red * fg_alpha + bg_red * bg_alpha) / 256, 0, 255); + uint32_t green = clamp((fg_green * fg_alpha + bg_green * bg_alpha) / 256, 0, 255); + uint32_t blue = clamp((fg_blue * fg_alpha + bg_blue * bg_alpha) / 256, 0, 255); + + *dest = 0xff000000 | (red << 16) | (green << 8) | blue; + } + frac += fracstep; + dest += pitch; + } while (--count); + } +}; + +class Tmvline4AddRGBACommand : public DrawerCommand +{ + BYTE *dc_dest; + int dc_count; + int dc_pitch; + ShadeConstants dc_shade_constants; + fixed_t dc_srcalpha; + fixed_t dc_destalpha; + int tmvlinebits; + fixed_t palookuplight[4]; + DWORD vplce[4]; + DWORD vince[4]; + const BYTE *bufplce[4]; + +public: + Tmvline4AddRGBACommand() + { + dc_dest = ::dc_dest; + dc_count = ::dc_count; + dc_pitch = ::dc_pitch; + dc_shade_constants = ::dc_shade_constants; + dc_srcalpha = ::dc_srcalpha; + dc_destalpha = ::dc_destalpha; + tmvlinebits = ::tmvlinebits; + for (int i = 0; i < 4; i++) + { + palookuplight[i] = ::palookuplight[i]; + vplce[i] = ::vplce[i]; + vince[i] = ::vince[i]; + bufplce[i] = ::bufplce[i]; + } + } + + void Execute(DrawerThread *thread) override + { + uint32_t *dest = (uint32_t*)dc_dest; + int count = dc_count; + int bits = tmvlinebits; + + uint32_t light[4]; + light[0] = calc_light_multiplier(palookuplight[0]); + light[1] = calc_light_multiplier(palookuplight[1]); + light[2] = calc_light_multiplier(palookuplight[2]); + light[3] = calc_light_multiplier(palookuplight[3]); + + ShadeConstants shade_constants = dc_shade_constants; + + uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + + do + { + for (int i = 0; i < 4; ++i) + { + BYTE pix = bufplce[i][vplce[i] >> bits]; + if (pix != 0) + { + uint32_t fg = shade_pal_index(pix, light[i], shade_constants); + uint32_t fg_red = (fg >> 16) & 0xff; + uint32_t fg_green = (fg >> 8) & 0xff; + uint32_t fg_blue = fg & 0xff; + + uint32_t bg_red = (*dest >> 16) & 0xff; + uint32_t bg_green = (*dest >> 8) & 0xff; + uint32_t bg_blue = (*dest) & 0xff; + + uint32_t red = clamp((fg_red * fg_alpha + bg_red * bg_alpha) / 256, 0, 255); + uint32_t green = clamp((fg_green * fg_alpha + bg_green * bg_alpha) / 256, 0, 255); + uint32_t blue = clamp((fg_blue * fg_alpha + bg_blue * bg_alpha) / 256, 0, 255); + + dest[i] = 0xff000000 | (red << 16) | (green << 8) | blue; + } + vplce[i] += vince[i]; + } + dest += dc_pitch; + } while (--count); + } +}; + +class Tmvline1AddClampRGBACommand : public DrawerCommand +{ + fixed_t dc_iscale; + fixed_t dc_texturefrac; + int dc_count; + const BYTE *dc_source; + BYTE *dc_dest; + int tmvlinebits; + int dc_pitch; + fixed_t dc_light; + ShadeConstants dc_shade_constants; + fixed_t dc_srcalpha; + fixed_t dc_destalpha; + +public: + Tmvline1AddClampRGBACommand() + { + dc_iscale = ::dc_iscale; + dc_texturefrac = ::dc_texturefrac; + dc_count = ::dc_count; + dc_source = ::dc_source; + dc_dest = ::dc_dest; + tmvlinebits = ::tmvlinebits; + dc_pitch = ::dc_pitch; + dc_light = ::dc_light; + dc_shade_constants = ::dc_shade_constants; + dc_srcalpha = ::dc_srcalpha; + dc_destalpha = ::dc_destalpha; + } + + void Execute(DrawerThread *thread) override + { + DWORD fracstep = dc_iscale; + DWORD frac = dc_texturefrac; + int count = dc_count; + const BYTE *source = dc_source; + uint32_t *dest = (uint32_t*)dc_dest; + int bits = tmvlinebits; + int pitch = dc_pitch; + + uint32_t light = calc_light_multiplier(dc_light); + ShadeConstants shade_constants = dc_shade_constants; + + uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + + do + { + BYTE pix = source[frac >> bits]; + if (pix != 0) + { + uint32_t fg = shade_pal_index(pix, light, shade_constants); + uint32_t fg_red = (fg >> 16) & 0xff; + uint32_t fg_green = (fg >> 8) & 0xff; + uint32_t fg_blue = fg & 0xff; + + uint32_t bg_red = (*dest >> 16) & 0xff; + uint32_t bg_green = (*dest >> 8) & 0xff; + uint32_t bg_blue = (*dest) & 0xff; + + uint32_t red = clamp((fg_red * fg_alpha + bg_red * bg_alpha) / 256, 0, 255); + uint32_t green = clamp((fg_green * fg_alpha + bg_green * bg_alpha) / 256, 0, 255); + uint32_t blue = clamp((fg_blue * fg_alpha + bg_blue * bg_alpha) / 256, 0, 255); + + *dest = 0xff000000 | (red << 16) | (green << 8) | blue; + } + frac += fracstep; + dest += pitch; + } while (--count); + } +}; + +class Tmvline4AddClampRGBACommand : public DrawerCommand +{ + BYTE *dc_dest; + int dc_count; + int dc_pitch; + ShadeConstants dc_shade_constants; + fixed_t dc_srcalpha; + fixed_t dc_destalpha; + int tmvlinebits; + fixed_t palookuplight[4]; + DWORD vplce[4]; + DWORD vince[4]; + const BYTE *bufplce[4]; + +public: + Tmvline4AddClampRGBACommand() + { + dc_dest = ::dc_dest; + dc_count = ::dc_count; + dc_pitch = ::dc_pitch; + dc_shade_constants = ::dc_shade_constants; + dc_srcalpha = ::dc_srcalpha; + dc_destalpha = ::dc_destalpha; + tmvlinebits = ::tmvlinebits; + for (int i = 0; i < 4; i++) + { + palookuplight[i] = ::palookuplight[i]; + vplce[i] = ::vplce[i]; + vince[i] = ::vince[i]; + bufplce[i] = ::bufplce[i]; + } + } + + void Execute(DrawerThread *thread) override + { + uint32_t *dest = (uint32_t*)dc_dest; + int count = dc_count; + int bits = tmvlinebits; + + uint32_t light[4]; + light[0] = calc_light_multiplier(palookuplight[0]); + light[1] = calc_light_multiplier(palookuplight[1]); + light[2] = calc_light_multiplier(palookuplight[2]); + light[3] = calc_light_multiplier(palookuplight[3]); + + ShadeConstants shade_constants = dc_shade_constants; + + uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + + do + { + for (int i = 0; i < 4; ++i) + { + BYTE pix = bufplce[i][vplce[i] >> bits]; + if (pix != 0) + { + uint32_t fg = shade_pal_index(pix, light[i], shade_constants); + uint32_t fg_red = (fg >> 16) & 0xff; + uint32_t fg_green = (fg >> 8) & 0xff; + uint32_t fg_blue = fg & 0xff; + + uint32_t bg_red = (dest[i] >> 16) & 0xff; + uint32_t bg_green = (dest[i] >> 8) & 0xff; + uint32_t bg_blue = (dest[i]) & 0xff; + + uint32_t red = clamp((fg_red * fg_alpha + bg_red * bg_alpha) / 256, 0, 255); + uint32_t green = clamp((fg_green * fg_alpha + bg_green * bg_alpha) / 256, 0, 255); + uint32_t blue = clamp((fg_blue * fg_alpha + bg_blue * bg_alpha) / 256, 0, 255); + + dest[i] = 0xff000000 | (red << 16) | (green << 8) | blue; + } + vplce[i] += vince[i]; + } + dest += dc_pitch; + } while (--count); + } +}; + +class Tmvline1SubClampRGBACommand : public DrawerCommand +{ + fixed_t dc_iscale; + fixed_t dc_texturefrac; + int dc_count; + const BYTE *dc_source; + BYTE *dc_dest; + int tmvlinebits; + int dc_pitch; + fixed_t dc_light; + ShadeConstants dc_shade_constants; + fixed_t dc_srcalpha; + fixed_t dc_destalpha; + +public: + Tmvline1SubClampRGBACommand() + { + dc_iscale = ::dc_iscale; + dc_texturefrac = ::dc_texturefrac; + dc_count = ::dc_count; + dc_source = ::dc_source; + dc_dest = ::dc_dest; + tmvlinebits = ::tmvlinebits; + dc_pitch = ::dc_pitch; + dc_light = ::dc_light; + dc_shade_constants = ::dc_shade_constants; + dc_srcalpha = ::dc_srcalpha; + dc_destalpha = ::dc_destalpha; + } + + void Execute(DrawerThread *thread) override + { + DWORD fracstep = dc_iscale; + DWORD frac = dc_texturefrac; + int count = dc_count; + const BYTE *source = dc_source; + uint32_t *dest = (uint32_t*)dc_dest; + int bits = tmvlinebits; + int pitch = dc_pitch; + + uint32_t light = calc_light_multiplier(dc_light); + ShadeConstants shade_constants = dc_shade_constants; + + uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + + do + { + BYTE pix = source[frac >> bits]; + if (pix != 0) + { + uint32_t fg = shade_pal_index(pix, light, shade_constants); + uint32_t fg_red = (fg >> 16) & 0xff; + uint32_t fg_green = (fg >> 8) & 0xff; + uint32_t fg_blue = fg & 0xff; + + uint32_t bg_red = (*dest >> 16) & 0xff; + uint32_t bg_green = (*dest >> 8) & 0xff; + uint32_t bg_blue = (*dest) & 0xff; + + uint32_t red = clamp((0x10000 - fg_red * fg_alpha + bg_red * bg_alpha) / 256, 256, 256 + 255) - 256; + uint32_t green = clamp((0x10000 - fg_green * fg_alpha + bg_green * bg_alpha) / 256, 256, 256 + 255) - 256; + uint32_t blue = clamp((0x10000 - fg_blue * fg_alpha + bg_blue * bg_alpha) / 256, 256, 256 + 255) - 256; + + *dest = 0xff000000 | (red << 16) | (green << 8) | blue; + } + frac += fracstep; + dest += pitch; + } while (--count); + } +}; + +class Tmvline4SubClampRGBACommand : public DrawerCommand +{ + BYTE *dc_dest; + int dc_count; + int dc_pitch; + ShadeConstants dc_shade_constants; + fixed_t dc_srcalpha; + fixed_t dc_destalpha; + int tmvlinebits; + fixed_t palookuplight[4]; + DWORD vplce[4]; + DWORD vince[4]; + const BYTE *bufplce[4]; + +public: + Tmvline4SubClampRGBACommand() + { + dc_dest = ::dc_dest; + dc_count = ::dc_count; + dc_pitch = ::dc_pitch; + dc_shade_constants = ::dc_shade_constants; + dc_srcalpha = ::dc_srcalpha; + dc_destalpha = ::dc_destalpha; + tmvlinebits = ::tmvlinebits; + for (int i = 0; i < 4; i++) + { + palookuplight[i] = ::palookuplight[i]; + vplce[i] = ::vplce[i]; + vince[i] = ::vince[i]; + bufplce[i] = ::bufplce[i]; + } + } + + void Execute(DrawerThread *thread) override + { + uint32_t *dest = (uint32_t*)dc_dest; + int count = dc_count; + int bits = tmvlinebits; + + uint32_t light[4]; + light[0] = calc_light_multiplier(palookuplight[0]); + light[1] = calc_light_multiplier(palookuplight[1]); + light[2] = calc_light_multiplier(palookuplight[2]); + light[3] = calc_light_multiplier(palookuplight[3]); + + ShadeConstants shade_constants = dc_shade_constants; + + uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + + do + { + for (int i = 0; i < 4; ++i) + { + BYTE pix = bufplce[i][vplce[i] >> bits]; + if (pix != 0) + { + uint32_t fg = shade_pal_index(pix, light[i], shade_constants); + uint32_t fg_red = (fg >> 16) & 0xff; + uint32_t fg_green = (fg >> 8) & 0xff; + uint32_t fg_blue = fg & 0xff; + + uint32_t bg_red = (dest[i] >> 16) & 0xff; + uint32_t bg_green = (dest[i] >> 8) & 0xff; + uint32_t bg_blue = (dest[i]) & 0xff; + + uint32_t red = clamp((0x10000 - fg_red * fg_alpha + bg_red * bg_alpha) / 256, 256, 256 + 255) - 256; + uint32_t green = clamp((0x10000 - fg_green * fg_alpha + bg_green * bg_alpha) / 256, 256, 256 + 255) - 256; + uint32_t blue = clamp((0x10000 - fg_blue * fg_alpha + bg_blue * bg_alpha) / 256, 256, 256 + 255) - 256; + + dest[i] = 0xff000000 | (red << 16) | (green << 8) | blue; + } + vplce[i] += vince[i]; + } + dest += dc_pitch; + } while (--count); + } +}; + +class Tmvline1RevSubClampRGBACommand : public DrawerCommand +{ + fixed_t dc_iscale; + fixed_t dc_texturefrac; + int dc_count; + const BYTE *dc_source; + BYTE *dc_dest; + int tmvlinebits; + int dc_pitch; + fixed_t dc_light; + ShadeConstants dc_shade_constants; + fixed_t dc_srcalpha; + fixed_t dc_destalpha; + +public: + Tmvline1RevSubClampRGBACommand() + { + dc_iscale = ::dc_iscale; + dc_texturefrac = ::dc_texturefrac; + dc_count = ::dc_count; + dc_source = ::dc_source; + dc_dest = ::dc_dest; + tmvlinebits = ::tmvlinebits; + dc_pitch = ::dc_pitch; + dc_light = ::dc_light; + dc_shade_constants = ::dc_shade_constants; + dc_srcalpha = ::dc_srcalpha; + dc_destalpha = ::dc_destalpha; + } + + void Execute(DrawerThread *thread) override + { + DWORD fracstep = dc_iscale; + DWORD frac = dc_texturefrac; + int count = dc_count; + const BYTE *source = dc_source; + uint32_t *dest = (uint32_t*)dc_dest; + int bits = tmvlinebits; + int pitch = dc_pitch; + + uint32_t light = calc_light_multiplier(dc_light); + ShadeConstants shade_constants = dc_shade_constants; + + uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + + do + { + BYTE pix = source[frac >> bits]; + if (pix != 0) + { + uint32_t fg = shade_pal_index(pix, light, shade_constants); + uint32_t fg_red = (fg >> 16) & 0xff; + uint32_t fg_green = (fg >> 8) & 0xff; + uint32_t fg_blue = fg & 0xff; + + uint32_t bg_red = (*dest >> 16) & 0xff; + uint32_t bg_green = (*dest >> 8) & 0xff; + uint32_t bg_blue = (*dest) & 0xff; + + uint32_t red = clamp((0x10000 + fg_red * fg_alpha - bg_red * bg_alpha) / 256, 256, 256 + 255) - 256; + uint32_t green = clamp((0x10000 + fg_green * fg_alpha - bg_green * bg_alpha) / 256, 256, 256 + 255) - 256; + uint32_t blue = clamp((0x10000 + fg_blue * fg_alpha - bg_blue * bg_alpha) / 256, 256, 256 + 255) - 256; + + *dest = 0xff000000 | (red << 16) | (green << 8) | blue; + } + frac += fracstep; + dest += pitch; + } while (--count); + } +}; + +class Tmvline4RevSubClampRGBACommand : public DrawerCommand +{ + BYTE *dc_dest; + int dc_count; + int dc_pitch; + ShadeConstants dc_shade_constants; + fixed_t dc_srcalpha; + fixed_t dc_destalpha; + int tmvlinebits; + fixed_t palookuplight[4]; + DWORD vplce[4]; + DWORD vince[4]; + const BYTE *bufplce[4]; + +public: + Tmvline4RevSubClampRGBACommand() + { + dc_dest = ::dc_dest; + dc_count = ::dc_count; + dc_pitch = ::dc_pitch; + dc_shade_constants = ::dc_shade_constants; + dc_srcalpha = ::dc_srcalpha; + dc_destalpha = ::dc_destalpha; + tmvlinebits = ::tmvlinebits; + for (int i = 0; i < 4; i++) + { + palookuplight[i] = ::palookuplight[i]; + vplce[i] = ::vplce[i]; + vince[i] = ::vince[i]; + bufplce[i] = ::bufplce[i]; + } + } + + void Execute(DrawerThread *thread) override + { + uint32_t *dest = (uint32_t*)dc_dest; + int count = dc_count; + int bits = tmvlinebits; + + uint32_t light[4]; + light[0] = calc_light_multiplier(palookuplight[0]); + light[1] = calc_light_multiplier(palookuplight[1]); + light[2] = calc_light_multiplier(palookuplight[2]); + light[3] = calc_light_multiplier(palookuplight[3]); + + ShadeConstants shade_constants = dc_shade_constants; + + uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + + do + { + for (int i = 0; i < 4; ++i) + { + BYTE pix = bufplce[i][vplce[i] >> bits]; + if (pix != 0) + { + uint32_t fg = shade_pal_index(pix, light[i], shade_constants); + uint32_t fg_red = (fg >> 16) & 0xff; + uint32_t fg_green = (fg >> 8) & 0xff; + uint32_t fg_blue = fg & 0xff; + + uint32_t bg_red = (dest[i] >> 16) & 0xff; + uint32_t bg_green = (dest[i] >> 8) & 0xff; + uint32_t bg_blue = (dest[i]) & 0xff; + + uint32_t red = clamp((0x10000 + fg_red * fg_alpha - bg_red * bg_alpha) / 256, 256, 256 + 255) - 256; + uint32_t green = clamp((0x10000 + fg_green * fg_alpha - bg_green * bg_alpha) / 256, 256, 256 + 255) - 256; + uint32_t blue = clamp((0x10000 + fg_blue * fg_alpha - bg_blue * bg_alpha) / 256, 256, 256 + 255) - 256; + + dest[i] = 0xff000000 | (red << 16) | (green << 8) | blue; + } + vplce[i] += vince[i]; + } + dest += dc_pitch; + } while (--count); + } +}; + +class DrawFogBoundaryLineRGBACommand : public DrawerCommand +{ + int _y; + int _x; + int _x2; + BYTE *dc_destorg; + fixed_t dc_light; + ShadeConstants dc_shade_constants; + +public: + DrawFogBoundaryLineRGBACommand(int y, int x, int x2) + { + _y = y; + _x = x; + _x2 = x2; + + dc_destorg = ::dc_destorg; + dc_light = ::dc_light; + dc_shade_constants = ::dc_shade_constants; + } + + void Execute(DrawerThread *thread) override + { + int y = _y; + int x = _x; + int x2 = _x2; + + uint32_t *dest = ylookup[y] + (uint32_t*)dc_destorg; + + uint32_t light = calc_light_multiplier(dc_light); + ShadeConstants constants = dc_shade_constants; + + do + { + uint32_t red = (dest[x] >> 16) & 0xff; + uint32_t green = (dest[x] >> 8) & 0xff; + uint32_t blue = dest[x] & 0xff; + + if (constants.simple_shade) + { + red = red * light / 256; + green = green * light / 256; + blue = blue * light / 256; + } + else + { + uint32_t inv_light = 256 - light; + uint32_t inv_desaturate = 256 - constants.desaturate; + + uint32_t intensity = ((red * 77 + green * 143 + blue * 37) >> 8) * constants.desaturate; + + red = (red * inv_desaturate + intensity) / 256; + green = (green * inv_desaturate + intensity) / 256; + blue = (blue * inv_desaturate + intensity) / 256; + + red = (constants.fade_red * inv_light + red * light) / 256; + green = (constants.fade_green * inv_light + green * light) / 256; + blue = (constants.fade_blue * inv_light + blue * light) / 256; + + red = (red * constants.light_red) / 256; + green = (green * constants.light_green) / 256; + blue = (blue * constants.light_blue) / 256; + } + + dest[x] = 0xff000000 | (red << 16) | (green << 8) | blue; + } while (++x <= x2); + } +}; + +///////////////////////////////////////////////////////////////////////////// + +void R_FinishDrawerCommands() +{ + DrawerCommandQueue::Finish(); +} + +void R_DrawColumnP_RGBA_C() +{ + DrawerCommandQueue::QueueCommand(); +} + +void R_FillColumnP_RGBA() +{ + DrawerCommandQueue::QueueCommand(); +} + +void R_FillAddColumn_RGBA_C() +{ + DrawerCommandQueue::QueueCommand(); +} + +void R_FillAddClampColumn_RGBA() +{ + DrawerCommandQueue::QueueCommand(); +} + +void R_FillSubClampColumn_RGBA() +{ + DrawerCommandQueue::QueueCommand(); +} + +void R_FillRevSubClampColumn_RGBA() +{ + DrawerCommandQueue::QueueCommand(); +} + +void R_DrawFuzzColumnP_RGBA_C() +{ + DrawerCommandQueue::QueueCommand(); + fuzzpos = (fuzzpos + dc_yh - dc_yl) % FUZZTABLE; +} + +void R_DrawAddColumnP_RGBA_C() +{ + DrawerCommandQueue::QueueCommand(); +} + +void R_DrawTranslatedColumnP_RGBA_C() +{ + DrawerCommandQueue::QueueCommand(); +} + +void R_DrawTlatedAddColumnP_RGBA_C() +{ + DrawerCommandQueue::QueueCommand(); +} + +void R_DrawShadedColumnP_RGBA_C() +{ + DrawerCommandQueue::QueueCommand(); +} + +void R_DrawAddClampColumnP_RGBA_C() +{ + DrawerCommandQueue::QueueCommand(); +} + +void R_DrawAddClampTranslatedColumnP_RGBA_C() +{ + DrawerCommandQueue::QueueCommand(); +} + +void R_DrawSubClampColumnP_RGBA_C() +{ + DrawerCommandQueue::QueueCommand(); +} + +void R_DrawSubClampTranslatedColumnP_RGBA_C() +{ + DrawerCommandQueue::QueueCommand(); +} + +void R_DrawRevSubClampColumnP_RGBA_C() +{ + DrawerCommandQueue::QueueCommand(); +} + +void R_DrawRevSubClampTranslatedColumnP_RGBA_C() +{ + DrawerCommandQueue::QueueCommand(); +} + +void R_DrawSpanP_RGBA_C() +{ + DrawerCommandQueue::QueueCommand(); +} + +void R_DrawSpanMaskedP_RGBA_C() +{ + DrawerCommandQueue::QueueCommand(); +} + +void R_DrawSpanTranslucentP_RGBA_C() +{ + DrawerCommandQueue::QueueCommand(); +} + +void R_DrawSpanMaskedTranslucentP_RGBA_C() +{ + DrawerCommandQueue::QueueCommand(); +} + +void R_DrawSpanAddClampP_RGBA_C() +{ + DrawerCommandQueue::QueueCommand(); +} + +void R_DrawSpanMaskedAddClampP_RGBA_C() +{ + DrawerCommandQueue::QueueCommand(); +} + +void R_FillSpan_RGBA() +{ + DrawerCommandQueue::QueueCommand(); +} + +DWORD vlinec1_RGBA() +{ + DrawerCommandQueue::QueueCommand(); + return dc_texturefrac + dc_count * dc_iscale; +} + +void vlinec4_RGBA() +{ + DrawerCommandQueue::QueueCommand(); +} + +DWORD mvlinec1_RGBA() +{ + DrawerCommandQueue::QueueCommand(); + return dc_texturefrac + dc_count * dc_iscale; +} + +void mvlinec4_RGBA() +{ + DrawerCommandQueue::QueueCommand(); +} + +fixed_t tmvline1_add_RGBA() +{ + DrawerCommandQueue::QueueCommand(); + return dc_texturefrac + dc_count * dc_iscale; +} + +void tmvline4_add_RGBA() +{ + DrawerCommandQueue::QueueCommand(); +} + +fixed_t tmvline1_addclamp_RGBA() +{ + DrawerCommandQueue::QueueCommand(); + return dc_texturefrac + dc_count * dc_iscale; +} + +void tmvline4_addclamp_RGBA() +{ + DrawerCommandQueue::QueueCommand(); +} + +fixed_t tmvline1_subclamp_RGBA() +{ + DrawerCommandQueue::QueueCommand(); + return dc_texturefrac + dc_count * dc_iscale; +} + +void tmvline4_subclamp_RGBA() +{ + DrawerCommandQueue::QueueCommand(); +} + +fixed_t tmvline1_revsubclamp_RGBA() +{ + DrawerCommandQueue::QueueCommand(); + return dc_texturefrac + dc_count * dc_iscale; +} + +void tmvline4_revsubclamp_RGBA() +{ + DrawerCommandQueue::QueueCommand(); +} + +void R_DrawFogBoundarySection_RGBA(int y, int y2, int x1) +{ + for (; y < y2; ++y) + { + int x2 = spanend[y]; + DrawerCommandQueue::QueueCommand(y, x1, x2); + } +} + +void R_DrawFogBoundary_RGBA(int x1, int x2, short *uclip, short *dclip) +{ + // To do: we do not need to create new spans when using rgba output - instead we should calculate light on a per pixel basis + + // This is essentially the same as R_MapVisPlane but with an extra step + // to create new horizontal spans whenever the light changes enough that + // we need to use a new colormap. + + double lightstep = rw_lightstep; + double light = rw_light + rw_lightstep*(x2 - x1 - 1); + int x = x2 - 1; + int t2 = uclip[x]; + int b2 = dclip[x]; + int rcolormap = GETPALOOKUP(light, wallshade); + int lcolormap; + BYTE *basecolormapdata = basecolormap->Maps; + + if (b2 > t2) + { + clearbufshort(spanend + t2, b2 - t2, x); + } + + R_SetColorMapLight(basecolormap, (float)light, wallshade); + + BYTE *fake_dc_colormap = basecolormap->Maps + (GETPALOOKUP(light, wallshade) << COLORMAPSHIFT); + + for (--x; x >= x1; --x) + { + int t1 = uclip[x]; + int b1 = dclip[x]; + const int xr = x + 1; + int stop; + + light -= rw_lightstep; + lcolormap = GETPALOOKUP(light, wallshade); + if (lcolormap != rcolormap) + { + if (t2 < b2 && rcolormap != 0) + { // Colormap 0 is always the identity map, so rendering it is + // just a waste of time. + R_DrawFogBoundarySection_RGBA(t2, b2, xr); + } + if (t1 < t2) t2 = t1; + if (b1 > b2) b2 = b1; + if (t2 < b2) + { + clearbufshort(spanend + t2, b2 - t2, x); + } + rcolormap = lcolormap; + R_SetColorMapLight(basecolormap, (float)light, wallshade); + fake_dc_colormap = basecolormap->Maps + (GETPALOOKUP(light, wallshade) << COLORMAPSHIFT); + } + else + { + if (fake_dc_colormap != basecolormapdata) + { + stop = MIN(t1, b2); + while (t2 < stop) + { + int y = t2++; + DrawerCommandQueue::QueueCommand(y, xr, spanend[y]); + } + stop = MAX(b1, t2); + while (b2 > stop) + { + int y = --b2; + DrawerCommandQueue::QueueCommand(y, xr, spanend[y]); + } + } + else + { + t2 = MAX(t2, MIN(t1, b2)); + b2 = MIN(b2, MAX(b1, t2)); + } + + stop = MIN(t2, b1); + while (t1 < stop) + { + spanend[t1++] = x; + } + stop = MAX(b2, t2); + while (b1 > stop) + { + spanend[--b1] = x; + } + } + + t2 = uclip[x]; + b2 = dclip[x]; + } + if (t2 < b2 && rcolormap != 0) + { + R_DrawFogBoundarySection_RGBA(t2, b2, x1); + } +} diff --git a/src/r_drawt.cpp b/src/r_drawt.cpp index ca6862ed6..c829c2dc4 100644 --- a/src/r_drawt.cpp +++ b/src/r_drawt.cpp @@ -1019,6 +1019,14 @@ void rt_initcols_pal (BYTE *buff) horizspan[y] = dc_ctspan[y] = &dc_tspans[y][0]; } +void rt_span_coverage_pal(int x, int start, int stop) +{ + unsigned int **tspan = &dc_ctspan[x & 3]; + (*tspan)[0] = start; + (*tspan)[1] = stop; + *tspan += 2; +} + // Stretches a column into a temporary buffer which is later // drawn to the screen along with up to three other columns. void R_DrawColumnHorizP_C (void) diff --git a/src/r_drawt_rgba.cpp b/src/r_drawt_rgba.cpp index ff5c0d82f..bbf68a795 100644 --- a/src/r_drawt_rgba.cpp +++ b/src/r_drawt_rgba.cpp @@ -46,53 +46,1560 @@ #include #endif -uint32_t dc_temp_rgbabuff_rgba[MAXHEIGHT*4]; -uint32_t *dc_temp_rgba; - -// Defined in r_draw_t.cpp: extern unsigned int dc_tspans[4][MAXHEIGHT]; extern unsigned int *dc_ctspan[4]; extern unsigned int *horizspan[4]; +///////////////////////////////////////////////////////////////////////////// + +class RtCopy1colRGBACommand : public DrawerCommand +{ + int hx; + int sx; + int yl; + int yh; + BYTE *dc_destorg; + int dc_pitch; + +public: + RtCopy1colRGBACommand(int hx, int sx, int yl, int yh) + { + this->hx = hx; + this->sx = sx; + this->yl = yl; + this->yh = yh; + + dc_destorg = ::dc_destorg; + dc_pitch = ::dc_pitch; + } + + void Execute(DrawerThread *thread) override + { + uint32_t *source; + uint32_t *dest; + int count; + int pitch; + + count = yh - yl; + if (count < 0) + return; + count++; + + dest = ylookup[yl] + sx + (uint32_t*)dc_destorg; + source = &thread->dc_temp_rgba[yl * 4 + hx]; + pitch = dc_pitch; + + if (count & 1) { + *dest = *source; + source += 4; + dest += pitch; + } + if (count & 2) { + dest[0] = source[0]; + dest[pitch] = source[4]; + source += 8; + dest += pitch * 2; + } + if (!(count >>= 2)) + return; + + do { + dest[0] = source[0]; + dest[pitch] = source[4]; + dest[pitch * 2] = source[8]; + dest[pitch * 3] = source[12]; + source += 16; + dest += pitch * 4; + } while (--count); + } +}; + +class RtMap1colRGBACommand : public DrawerCommand +{ + int hx; + int sx; + int yl; + int yh; + fixed_t dc_light; + ShadeConstants dc_shade_constants; + BYTE *dc_destorg; + int dc_pitch; + +public: + RtMap1colRGBACommand(int hx, int sx, int yl, int yh) + { + this->hx = hx; + this->sx = sx; + this->yl = yl; + this->yh = yh; + + dc_light = ::dc_light; + dc_shade_constants = ::dc_shade_constants; + dc_destorg = ::dc_destorg; + dc_pitch = ::dc_pitch; + } + + void Execute(DrawerThread *thread) override + { + uint32_t *source; + uint32_t *dest; + int count; + int pitch; + + count = yh - yl; + if (count < 0) + return; + count++; + + uint32_t light = calc_light_multiplier(dc_light); + ShadeConstants shade_constants = dc_shade_constants; + + dest = ylookup[yl] + sx + (uint32_t*)dc_destorg; + source = &thread->dc_temp_rgba[yl * 4 + hx]; + pitch = dc_pitch; + + if (count & 1) { + *dest = shade_pal_index(*source, light, shade_constants); + source += 4; + dest += pitch; + } + if (!(count >>= 1)) + return; + + do { + dest[0] = shade_pal_index(source[0], light, shade_constants); + dest[pitch] = shade_pal_index(source[4], light, shade_constants); + source += 8; + dest += pitch * 2; + } while (--count); + } +}; + +class RtMap4colsRGBACommand : public DrawerCommand +{ + int sx; + int yl; + int yh; + fixed_t dc_light; + ShadeConstants dc_shade_constants; + BYTE *dc_destorg; + int dc_pitch; + +public: + RtMap4colsRGBACommand(int sx, int yl, int yh) + { + this->sx = sx; + this->yl = yl; + this->yh = yh; + + dc_light = ::dc_light; + dc_shade_constants = ::dc_shade_constants; + dc_destorg = ::dc_destorg; + dc_pitch = ::dc_pitch; + } + +#ifdef NO_SSE + void Execute(DrawerThread *thread) override + { + uint32_t *source; + uint32_t *dest; + int count; + int pitch; + + count = yh - yl; + if (count < 0) + return; + count++; + + uint32_t light = calc_light_multiplier(dc_light); + ShadeConstants shade_constants = dc_shade_constants; + + dest = ylookup[yl] + sx + (uint32_t*)dc_destorg; + source = &thread->dc_temp_rgba[yl * 4]; + pitch = dc_pitch; + + if (count & 1) { + dest[0] = shade_pal_index(source[0], light, shade_constants); + dest[1] = shade_pal_index(source[1], light, shade_constants); + dest[2] = shade_pal_index(source[2], light, shade_constants); + dest[3] = shade_pal_index(source[3], light, shade_constants); + source += 4; + dest += pitch; + } + if (!(count >>= 1)) + return; + + do { + dest[0] = shade_pal_index(source[0], light, shade_constants); + dest[1] = shade_pal_index(source[1], light, shade_constants); + dest[2] = shade_pal_index(source[2], light, shade_constants); + dest[3] = shade_pal_index(source[3], light, shade_constants); + dest[pitch] = shade_pal_index(source[4], light, shade_constants); + dest[pitch + 1] = shade_pal_index(source[5], light, shade_constants); + dest[pitch + 2] = shade_pal_index(source[6], light, shade_constants); + dest[pitch + 3] = shade_pal_index(source[7], light, shade_constants); + source += 8; + dest += pitch * 2; + } while (--count); + } +#else + void Execute(DrawerThread *thread) override + { + uint32_t *source; + uint32_t *dest; + int count; + int pitch; + + count = yh - yl; + if (count < 0) + return; + count++; + + ShadeConstants shade_constants = dc_shade_constants; + uint32_t light = calc_light_multiplier(dc_light); + uint32_t *palette = (uint32_t*)GPalette.BaseColors; + + dest = ylookup[yl] + sx + (uint32_t*)dc_destorg; + source = &thread->dc_temp_rgba[yl * 4]; + pitch = dc_pitch; + + if (shade_constants.simple_shade) + { + SSE_SHADE_SIMPLE_INIT(light); + + if (count & 1) { + uint32_t p0 = source[0]; + uint32_t p1 = source[1]; + uint32_t p2 = source[2]; + uint32_t p3 = source[3]; + + // shade_pal_index: + __m128i fg = _mm_set_epi32(palette[p3], palette[p2], palette[p1], palette[p0]); + SSE_SHADE_SIMPLE(fg); + _mm_storeu_si128((__m128i*)dest, fg); + + source += 4; + dest += pitch; + } + if (!(count >>= 1)) + return; + + do { + // shade_pal_index 0-3 + { + uint32_t p0 = source[0]; + uint32_t p1 = source[1]; + uint32_t p2 = source[2]; + uint32_t p3 = source[3]; + + __m128i fg = _mm_set_epi32(palette[p3], palette[p2], palette[p1], palette[p0]); + SSE_SHADE_SIMPLE(fg); + _mm_storeu_si128((__m128i*)dest, fg); + } + + // shade_pal_index 4-7 (pitch) + { + uint32_t p0 = source[4]; + uint32_t p1 = source[5]; + uint32_t p2 = source[6]; + uint32_t p3 = source[7]; + + __m128i fg = _mm_set_epi32(palette[p3], palette[p2], palette[p1], palette[p0]); + SSE_SHADE_SIMPLE(fg); + _mm_storeu_si128((__m128i*)(dest + pitch), fg); + } + + source += 8; + dest += pitch * 2; + } while (--count); + } + else + { + SSE_SHADE_INIT(light, shade_constants); + + if (count & 1) { + uint32_t p0 = source[0]; + uint32_t p1 = source[1]; + uint32_t p2 = source[2]; + uint32_t p3 = source[3]; + + // shade_pal_index: + __m128i fg = _mm_set_epi32(palette[p3], palette[p2], palette[p1], palette[p0]); + SSE_SHADE(fg, shade_constants); + _mm_storeu_si128((__m128i*)dest, fg); + + source += 4; + dest += pitch; + } + if (!(count >>= 1)) + return; + + do { + // shade_pal_index 0-3 + { + uint32_t p0 = source[0]; + uint32_t p1 = source[1]; + uint32_t p2 = source[2]; + uint32_t p3 = source[3]; + + __m128i fg = _mm_set_epi32(palette[p3], palette[p2], palette[p1], palette[p0]); + SSE_SHADE(fg, shade_constants); + _mm_storeu_si128((__m128i*)dest, fg); + } + + // shade_pal_index 4-7 (pitch) + { + uint32_t p0 = source[4]; + uint32_t p1 = source[5]; + uint32_t p2 = source[6]; + uint32_t p3 = source[7]; + + __m128i fg = _mm_set_epi32(palette[p3], palette[p2], palette[p1], palette[p0]); + SSE_SHADE(fg, shade_constants); + _mm_storeu_si128((__m128i*)(dest + pitch), fg); + } + + source += 8; + dest += pitch * 2; + } while (--count); + } + } +#endif +}; + +class RtTranslate1colRGBACommand : public DrawerCommand +{ + const BYTE *translation; + int hx; + int yl; + int yh; + +public: + RtTranslate1colRGBACommand(const BYTE *translation, int hx, int yl, int yh) + { + this->translation = translation; + this->hx = hx; + this->yl = yl; + this->yh = yh; + } + + void Execute(DrawerThread *thread) override + { + int count = yh - yl + 1; + uint32_t *source = &thread->dc_temp_rgba[yl*4 + hx]; + + // Things we do to hit the compiler's optimizer with a clue bat: + // 1. Parallelism is explicitly spelled out by using a separate + // C instruction for each assembly instruction. GCC lets me + // have four temporaries, but VC++ spills to the stack with + // more than two. Two is probably optimal, anyway. + // 2. The results of the translation lookups are explicitly + // stored in byte-sized variables. This causes the VC++ code + // to use byte mov instructions in most cases; for apparently + // random reasons, it will use movzx for some places. GCC + // ignores this and uses movzx always. + + // Do 8 rows at a time. + for (int count8 = count >> 3; count8; --count8) + { + int c0, c1; + BYTE b0, b1; + + c0 = source[0]; c1 = source[4]; + b0 = translation[c0]; b1 = translation[c1]; + source[0] = b0; source[4] = b1; + + c0 = source[8]; c1 = source[12]; + b0 = translation[c0]; b1 = translation[c1]; + source[8] = b0; source[12] = b1; + + c0 = source[16]; c1 = source[20]; + b0 = translation[c0]; b1 = translation[c1]; + source[16] = b0; source[20] = b1; + + c0 = source[24]; c1 = source[28]; + b0 = translation[c0]; b1 = translation[c1]; + source[24] = b0; source[28] = b1; + + source += 32; + } + // Finish by doing 1 row at a time. + for (count &= 7; count; --count, source += 4) + { + source[0] = translation[source[0]]; + } + } +}; + +class RtTranslate4colsRGBACommand : public DrawerCommand +{ + const BYTE *translation; + int yl; + int yh; + +public: + RtTranslate4colsRGBACommand(const BYTE *translation, int yl, int yh) + { + this->translation = translation; + this->yl = yl; + this->yh = yh; + } + + void Execute(DrawerThread *thread) override + { + int count = yh - yl + 1; + uint32_t *source = &thread->dc_temp_rgba[yl*4]; + int c0, c1; + BYTE b0, b1; + + // Do 2 rows at a time. + for (int count8 = count >> 1; count8; --count8) + { + c0 = source[0]; c1 = source[1]; + b0 = translation[c0]; b1 = translation[c1]; + source[0] = b0; source[1] = b1; + + c0 = source[2]; c1 = source[3]; + b0 = translation[c0]; b1 = translation[c1]; + source[2] = b0; source[3] = b1; + + c0 = source[4]; c1 = source[5]; + b0 = translation[c0]; b1 = translation[c1]; + source[4] = b0; source[5] = b1; + + c0 = source[6]; c1 = source[7]; + b0 = translation[c0]; b1 = translation[c1]; + source[6] = b0; source[7] = b1; + + source += 8; + } + // Do the final row if count was odd. + if (count & 1) + { + c0 = source[0]; c1 = source[1]; + b0 = translation[c0]; b1 = translation[c1]; + source[0] = b0; source[1] = b1; + + c0 = source[2]; c1 = source[3]; + b0 = translation[c0]; b1 = translation[c1]; + source[2] = b0; source[3] = b1; + } + } +}; + +class RtAdd1colRGBACommand : public DrawerCommand +{ + int hx; + int sx; + int yl; + int yh; + BYTE *dc_destorg; + int dc_pitch; + fixed_t dc_light; + ShadeConstants dc_shade_constants; + fixed_t dc_srcalpha; + fixed_t dc_destalpha; + +public: + RtAdd1colRGBACommand(int hx, int sx, int yl, int yh) + { + this->hx = hx; + this->sx = sx; + this->yl = yl; + this->yh = yh; + + dc_destorg = ::dc_destorg; + dc_pitch = ::dc_pitch; + dc_light = ::dc_light; + dc_shade_constants = ::dc_shade_constants; + dc_srcalpha = ::dc_srcalpha; + dc_destalpha = ::dc_destalpha; + } + + void Execute(DrawerThread *thread) override + { + uint32_t *source; + uint32_t *dest; + int count; + int pitch; + + count = yh - yl; + if (count < 0) + return; + count++; + + dest = ylookup[yl] + sx + (uint32_t*)dc_destorg; + source = &thread->dc_temp_rgba[yl * 4 + hx]; + pitch = dc_pitch; + + uint32_t light = calc_light_multiplier(dc_light); + ShadeConstants shade_constants = dc_shade_constants; + + uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + + do { + uint32_t fg = shade_pal_index(*source, light, shade_constants); + uint32_t fg_red = (fg >> 16) & 0xff; + uint32_t fg_green = (fg >> 8) & 0xff; + uint32_t fg_blue = fg & 0xff; + + uint32_t bg_red = (*dest >> 16) & 0xff; + uint32_t bg_green = (*dest >> 8) & 0xff; + uint32_t bg_blue = (*dest) & 0xff; + + uint32_t red = clamp((fg_red * fg_alpha + bg_red * bg_alpha) / 256, 0, 255); + uint32_t green = clamp((fg_green * fg_alpha + bg_green * bg_alpha) / 256, 0, 255); + uint32_t blue = clamp((fg_blue * fg_alpha + bg_blue * bg_alpha) / 256, 0, 255); + + *dest = 0xff000000 | (red << 16) | (green << 8) | blue; + + source += 4; + dest += pitch; + } while (--count); + } +}; + +class RtAdd4colsRGBACommand : public DrawerCommand +{ + int sx; + int yl; + int yh; + BYTE *dc_destorg; + int dc_pitch; + fixed_t dc_light; + ShadeConstants dc_shade_constants; + +public: + RtAdd4colsRGBACommand(int sx, int yl, int yh) + { + this->sx = sx; + this->yl = yl; + this->yh = yh; + + dc_destorg = ::dc_destorg; + dc_pitch = ::dc_pitch; + dc_light = ::dc_light; + dc_shade_constants = ::dc_shade_constants; + } + +#ifdef NO_SSE + void Execute(DrawerThread *thread) override + { + uint32_t *source; + uint32_t *dest; + int count; + int pitch; + + count = yh - yl; + if (count < 0) + return; + count++; + + dest = ylookup[yl] + sx + (uint32_t*)dc_destorg; + source = &thread->dc_temp_rgba[yl * 4]; + pitch = dc_pitch; + + uint32_t light = calc_light_multiplier(dc_light); + ShadeConstants shade_constants = dc_shade_constants; + + uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + + do { + for (int i = 0; i < 4; i++) + { + uint32_t fg = shade_pal_index(source[i], light, shade_constants); + uint32_t fg_red = (fg >> 16) & 0xff; + uint32_t fg_green = (fg >> 8) & 0xff; + uint32_t fg_blue = fg & 0xff; + + uint32_t bg_red = (dest[i] >> 16) & 0xff; + uint32_t bg_green = (dest[i] >> 8) & 0xff; + uint32_t bg_blue = (dest[i]) & 0xff; + + uint32_t red = clamp((fg_red * fg_alpha + bg_red * bg_alpha) / 256, 0, 255); + uint32_t green = clamp((fg_green * fg_alpha + bg_green * bg_alpha) / 256, 0, 255); + uint32_t blue = clamp((fg_blue * fg_alpha + bg_blue * bg_alpha) / 256, 0, 255); + + dest[i] = 0xff000000 | (red << 16) | (green << 8) | blue; + } + + source += 4; + dest += pitch; + } while (--count); + } +#else + void Execute(DrawerThread *thread) override + { + uint32_t *source; + uint32_t *dest; + int count; + int pitch; + + count = yh - yl; + if (count < 0) + return; + count++; + + dest = ylookup[yl] + sx + (uint32_t*)dc_destorg; + source = &thread->dc_temp_rgba[yl * 4]; + pitch = dc_pitch; + + uint32_t light = calc_light_multiplier(dc_light); + uint32_t *palette = (uint32_t*)GPalette.BaseColors; + + uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + + ShadeConstants shade_constants = dc_shade_constants; + + if (shade_constants.simple_shade) + { + SSE_SHADE_SIMPLE_INIT(light); + + __m128i mfg_alpha = _mm_set_epi16(256, fg_alpha, fg_alpha, fg_alpha, 256, fg_alpha, fg_alpha, fg_alpha); + __m128i mbg_alpha = _mm_set_epi16(256, bg_alpha, bg_alpha, bg_alpha, 256, bg_alpha, bg_alpha, bg_alpha); + + do { + uint32_t p0 = source[0]; + uint32_t p1 = source[1]; + uint32_t p2 = source[2]; + uint32_t p3 = source[3]; + + // shade_pal_index: + __m128i fg = _mm_set_epi32(palette[p3], palette[p2], palette[p1], palette[p0]); + SSE_SHADE_SIMPLE(fg); + + __m128i fg_hi = _mm_unpackhi_epi8(fg, _mm_setzero_si128()); + __m128i fg_lo = _mm_unpacklo_epi8(fg, _mm_setzero_si128()); + + // unpack bg: + __m128i bg = _mm_loadu_si128((const __m128i*)dest); + __m128i bg_hi = _mm_unpackhi_epi8(bg, _mm_setzero_si128()); + __m128i bg_lo = _mm_unpacklo_epi8(bg, _mm_setzero_si128()); + + // (fg_red * fg_alpha + bg_red * bg_alpha) / 256: + __m128i color_hi = _mm_srli_epi16(_mm_adds_epu16(_mm_mullo_epi16(fg_hi, mfg_alpha), _mm_mullo_epi16(bg_hi, mbg_alpha)), 8); + __m128i color_lo = _mm_srli_epi16(_mm_adds_epu16(_mm_mullo_epi16(fg_lo, mfg_alpha), _mm_mullo_epi16(bg_lo, mbg_alpha)), 8); + + __m128i color = _mm_packus_epi16(color_lo, color_hi); + _mm_storeu_si128((__m128i*)dest, color); + + source += 4; + dest += pitch; + } while (--count); + } + else + { + SSE_SHADE_INIT(light, shade_constants); + + __m128i mfg_alpha = _mm_set_epi16(256, fg_alpha, fg_alpha, fg_alpha, 256, fg_alpha, fg_alpha, fg_alpha); + __m128i mbg_alpha = _mm_set_epi16(256, bg_alpha, bg_alpha, bg_alpha, 256, bg_alpha, bg_alpha, bg_alpha); + + do { + uint32_t p0 = source[0]; + uint32_t p1 = source[1]; + uint32_t p2 = source[2]; + uint32_t p3 = source[3]; + + // shade_pal_index: + __m128i fg = _mm_set_epi32(palette[p3], palette[p2], palette[p1], palette[p0]); + SSE_SHADE(fg, shade_constants); + + __m128i fg_hi = _mm_unpackhi_epi8(fg, _mm_setzero_si128()); + __m128i fg_lo = _mm_unpacklo_epi8(fg, _mm_setzero_si128()); + + // unpack bg: + __m128i bg = _mm_loadu_si128((const __m128i*)dest); + __m128i bg_hi = _mm_unpackhi_epi8(bg, _mm_setzero_si128()); + __m128i bg_lo = _mm_unpacklo_epi8(bg, _mm_setzero_si128()); + + // (fg_red * fg_alpha + bg_red * bg_alpha) / 256: + __m128i color_hi = _mm_srli_epi16(_mm_adds_epu16(_mm_mullo_epi16(fg_hi, mfg_alpha), _mm_mullo_epi16(bg_hi, mbg_alpha)), 8); + __m128i color_lo = _mm_srli_epi16(_mm_adds_epu16(_mm_mullo_epi16(fg_lo, mfg_alpha), _mm_mullo_epi16(bg_lo, mbg_alpha)), 8); + + __m128i color = _mm_packus_epi16(color_lo, color_hi); + _mm_storeu_si128((__m128i*)dest, color); + + source += 4; + dest += pitch; + } while (--count); + } + } +#endif +}; + +class RtShaded1colRGBACommand : public DrawerCommand +{ + int hx; + int sx; + int yl; + int yh; + lighttable_t *dc_colormap; + BYTE *dc_destorg; + int dc_pitch; + int dc_color; + fixed_t dc_light; + +public: + RtShaded1colRGBACommand(int hx, int sx, int yl, int yh) + { + this->hx = hx; + this->sx = sx; + this->yl = yl; + this->yh = yh; + + dc_colormap = ::dc_colormap; + dc_destorg = ::dc_destorg; + dc_pitch = ::dc_pitch; + dc_color = ::dc_color; + dc_light = ::dc_light; + } + + void Execute(DrawerThread *thread) override + { + BYTE *colormap; + uint32_t *source; + uint32_t *dest; + int count; + int pitch; + + count = yh - yl; + if (count < 0) + return; + count++; + + colormap = dc_colormap; + dest = ylookup[yl] + sx + (uint32_t*)dc_destorg; + source = &thread->dc_temp_rgba[yl * 4 + hx]; + pitch = dc_pitch; + + uint32_t fg = shade_pal_index_simple(dc_color, calc_light_multiplier(dc_light)); + uint32_t fg_red = (fg >> 16) & 0xff; + uint32_t fg_green = (fg >> 8) & 0xff; + uint32_t fg_blue = fg & 0xff; + + do { + uint32_t alpha = colormap[*source]; + uint32_t inv_alpha = 64 - alpha; + + uint32_t bg_red = (*dest >> 16) & 0xff; + uint32_t bg_green = (*dest >> 8) & 0xff; + uint32_t bg_blue = (*dest) & 0xff; + + uint32_t red = (fg_red * alpha + bg_red * inv_alpha) / 64; + uint32_t green = (fg_green * alpha + bg_green * inv_alpha) / 64; + uint32_t blue = (fg_blue * alpha + bg_blue * inv_alpha) / 64; + + *dest = 0xff000000 | (red << 16) | (green << 8) | blue; + source += 4; + dest += pitch; + } while (--count); + } +}; + +class RtShaded4colsRGBACommand : public DrawerCommand +{ + int sx; + int yl; + int yh; + lighttable_t *dc_colormap; + int dc_color; + BYTE *dc_destorg; + int dc_pitch; + fixed_t dc_light; + +public: + RtShaded4colsRGBACommand(int sx, int yl, int yh) + { + this->sx = sx; + this->yl = yl; + this->yh = yh; + + dc_colormap = ::dc_colormap; + dc_color = ::dc_color; + dc_destorg = ::dc_destorg; + dc_pitch = ::dc_pitch; + dc_light = ::dc_light; + } + +#ifdef NO_SSE + void Execute(DrawerThread *thread) override + { + BYTE *colormap; + uint32_t *source; + uint32_t *dest; + int count; + int pitch; + + count = yh - yl; + if (count < 0) + return; + count++; + + colormap = dc_colormap; + dest = ylookup[yl] + sx + (uint32_t*)dc_destorg; + source = &thread->dc_temp_rgba[yl * 4]; + pitch = dc_pitch; + + uint32_t fg = shade_pal_index_simple(dc_color, calc_light_multiplier(dc_light)); + uint32_t fg_red = (fg >> 16) & 0xff; + uint32_t fg_green = (fg >> 8) & 0xff; + uint32_t fg_blue = fg & 0xff; + + do { + for (int i = 0; i < 4; i++) + { + uint32_t alpha = colormap[source[i]]; + uint32_t inv_alpha = 64 - alpha; + + uint32_t bg_red = (dest[i] >> 16) & 0xff; + uint32_t bg_green = (dest[i] >> 8) & 0xff; + uint32_t bg_blue = (dest[i]) & 0xff; + + uint32_t red = (fg_red * alpha + bg_red * inv_alpha) / 64; + uint32_t green = (fg_green * alpha + bg_green * inv_alpha) / 64; + uint32_t blue = (fg_blue * alpha + bg_blue * inv_alpha) / 64; + + dest[i] = 0xff000000 | (red << 16) | (green << 8) | blue; + } + source += 4; + dest += pitch; + } while (--count); + } +#else + void Execute(DrawerThread *thread) override + { + BYTE *colormap; + uint32_t *source; + uint32_t *dest; + int count; + int pitch; + + count = yh - yl; + if (count < 0) + return; + count++; + + colormap = dc_colormap; + dest = ylookup[yl] + sx + (uint32_t*)dc_destorg; + source = &thread->dc_temp_rgba[yl * 4]; + pitch = dc_pitch; + + __m128i fg = _mm_unpackhi_epi8(_mm_set1_epi32(shade_pal_index_simple(dc_color, calc_light_multiplier(dc_light))), _mm_setzero_si128()); + __m128i alpha_one = _mm_set1_epi16(64); + + do { + uint32_t p0 = colormap[source[0]]; + uint32_t p1 = colormap[source[1]]; + uint32_t p2 = colormap[source[2]]; + uint32_t p3 = colormap[source[3]]; + + __m128i alpha_hi = _mm_set_epi16(64, p3, p3, p3, 64, p2, p2, p2); + __m128i alpha_lo = _mm_set_epi16(64, p1, p1, p1, 64, p0, p0, p0); + __m128i inv_alpha_hi = _mm_subs_epu16(alpha_one, alpha_hi); + __m128i inv_alpha_lo = _mm_subs_epu16(alpha_one, alpha_lo); + + // unpack bg: + __m128i bg = _mm_loadu_si128((const __m128i*)dest); + __m128i bg_hi = _mm_unpackhi_epi8(bg, _mm_setzero_si128()); + __m128i bg_lo = _mm_unpacklo_epi8(bg, _mm_setzero_si128()); + + // (fg_red * alpha + bg_red * inv_alpha) / 64: + __m128i color_hi = _mm_srli_epi16(_mm_adds_epu16(_mm_mullo_epi16(fg, alpha_hi), _mm_mullo_epi16(bg_hi, inv_alpha_hi)), 6); + __m128i color_lo = _mm_srli_epi16(_mm_adds_epu16(_mm_mullo_epi16(fg, alpha_lo), _mm_mullo_epi16(bg_lo, inv_alpha_lo)), 6); + + __m128i color = _mm_packus_epi16(color_lo, color_hi); + _mm_storeu_si128((__m128i*)dest, color); + + source += 4; + dest += pitch; + } while (--count); + } +#endif +}; + +class RtAddClamp1colRGBACommand : public DrawerCommand +{ + int hx; + int sx; + int yl; + int yh; + BYTE *dc_destorg; + int dc_pitch; + fixed_t dc_light; + ShadeConstants dc_shade_constants; + fixed_t dc_srcalpha; + fixed_t dc_destalpha; + +public: + RtAddClamp1colRGBACommand(int hx, int sx, int yl, int yh) + { + this->hx = hx; + this->sx = sx; + this->yl = yl; + this->yh = yh; + + dc_destorg = ::dc_destorg; + dc_pitch = ::dc_pitch; + dc_light = ::dc_light; + dc_shade_constants = ::dc_shade_constants; + dc_srcalpha = ::dc_srcalpha; + dc_destalpha = ::dc_destalpha; + } + + void Execute(DrawerThread *thread) override + { + uint32_t *source; + uint32_t *dest; + int count; + int pitch; + + count = yh - yl; + if (count < 0) + return; + count++; + + dest = ylookup[yl] + sx + (uint32_t*)dc_destorg; + source = &thread->dc_temp_rgba[yl * 4 + hx]; + pitch = dc_pitch; + + uint32_t light = calc_light_multiplier(dc_light); + ShadeConstants shade_constants = dc_shade_constants; + + uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + + do { + uint32_t fg = shade_pal_index(*source, light, shade_constants); + uint32_t fg_red = (fg >> 16) & 0xff; + uint32_t fg_green = (fg >> 8) & 0xff; + uint32_t fg_blue = fg & 0xff; + + uint32_t bg_red = (*dest >> 16) & 0xff; + uint32_t bg_green = (*dest >> 8) & 0xff; + uint32_t bg_blue = (*dest) & 0xff; + + uint32_t red = clamp((fg_red * fg_alpha + bg_red * bg_alpha) / 256, 0, 255); + uint32_t green = clamp((fg_green * fg_alpha + bg_green * bg_alpha) / 256, 0, 255); + uint32_t blue = clamp((fg_blue * fg_alpha + bg_blue * bg_alpha) / 256, 0, 255); + + *dest = 0xff000000 | (red << 16) | (green << 8) | blue; + source += 4; + dest += pitch; + } while (--count); + } +}; + +class RtAddClamp4colsRGBACommand : public DrawerCommand +{ + int sx; + int yl; + int yh; + BYTE *dc_destorg; + int dc_pitch; + fixed_t dc_light; + fixed_t dc_srcalpha; + fixed_t dc_destalpha; + ShadeConstants dc_shade_constants; + +public: + RtAddClamp4colsRGBACommand(int sx, int yl, int yh) + { + this->sx = sx; + this->yl = yl; + this->yh = yh; + + dc_destorg = ::dc_destorg; + dc_pitch = ::dc_pitch; + dc_light = ::dc_light; + dc_srcalpha = ::dc_srcalpha; + dc_destalpha = ::dc_destalpha; + dc_shade_constants = ::dc_shade_constants; + } + +#ifdef NO_SSE + void Execute(DrawerThread *thread) override + { + uint32_t *source; + uint32_t *dest; + int count; + int pitch; + + count = yh - yl; + if (count < 0) + return; + count++; + + dest = ylookup[yl] + sx + (uint32_t*)dc_destorg; + source = &thread->dc_temp_rgba[yl * 4]; + pitch = dc_pitch; + + uint32_t light = calc_light_multiplier(dc_light); + ShadeConstants shade_constants = dc_shade_constants; + + uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + + do { + for (int i = 0; i < 4; i++) + { + uint32_t fg = shade_pal_index(source[i], light, shade_constants); + uint32_t fg_red = (fg >> 16) & 0xff; + uint32_t fg_green = (fg >> 8) & 0xff; + uint32_t fg_blue = fg & 0xff; + + uint32_t bg_red = (dest[i] >> 16) & 0xff; + uint32_t bg_green = (dest[i] >> 8) & 0xff; + uint32_t bg_blue = (dest[i]) & 0xff; + + uint32_t red = clamp((fg_red * fg_alpha + bg_red * bg_alpha) / 256, 0, 255); + uint32_t green = clamp((fg_green * fg_alpha + bg_green * bg_alpha) / 256, 0, 255); + uint32_t blue = clamp((fg_blue * fg_alpha + bg_blue * bg_alpha) / 256, 0, 255); + + dest[i] = 0xff000000 | (red << 16) | (green << 8) | blue; + } + source += 4; + dest += pitch; + } while (--count); + } +#else + void Execute(DrawerThread *thread) override + { + uint32_t *source; + uint32_t *dest; + int count; + int pitch; + + count = yh - yl; + if (count < 0) + return; + count++; + + dest = ylookup[yl] + sx + (uint32_t*)dc_destorg; + source = &thread->dc_temp_rgba[yl * 4]; + pitch = dc_pitch; + + uint32_t light = calc_light_multiplier(dc_light); + uint32_t *palette = (uint32_t*)GPalette.BaseColors; + + uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + + ShadeConstants shade_constants = dc_shade_constants; + + if (shade_constants.simple_shade) + { + SSE_SHADE_SIMPLE_INIT(light); + + __m128i mfg_alpha = _mm_set_epi16(256, fg_alpha, fg_alpha, fg_alpha, 256, fg_alpha, fg_alpha, fg_alpha); + __m128i mbg_alpha = _mm_set_epi16(256, bg_alpha, bg_alpha, bg_alpha, 256, bg_alpha, bg_alpha, bg_alpha); + + do { + uint32_t p0 = source[0]; + uint32_t p1 = source[1]; + uint32_t p2 = source[2]; + uint32_t p3 = source[3]; + + // shade_pal_index: + __m128i fg = _mm_set_epi32(palette[p3], palette[p2], palette[p1], palette[p0]); + SSE_SHADE_SIMPLE(fg); + + __m128i fg_hi = _mm_unpackhi_epi8(fg, _mm_setzero_si128()); + __m128i fg_lo = _mm_unpacklo_epi8(fg, _mm_setzero_si128()); + + // unpack bg: + __m128i bg = _mm_loadu_si128((const __m128i*)dest); + __m128i bg_hi = _mm_unpackhi_epi8(bg, _mm_setzero_si128()); + __m128i bg_lo = _mm_unpacklo_epi8(bg, _mm_setzero_si128()); + + // (fg_red * fg_alpha + bg_red * bg_alpha) / 256: + __m128i color_hi = _mm_srli_epi16(_mm_adds_epu16(_mm_mullo_epi16(fg_hi, mfg_alpha), _mm_mullo_epi16(bg_hi, mbg_alpha)), 8); + __m128i color_lo = _mm_srli_epi16(_mm_adds_epu16(_mm_mullo_epi16(fg_lo, mfg_alpha), _mm_mullo_epi16(bg_lo, mbg_alpha)), 8); + + __m128i color = _mm_packus_epi16(color_lo, color_hi); + _mm_storeu_si128((__m128i*)dest, color); + + source += 4; + dest += pitch; + } while (--count); + } + else + { + SSE_SHADE_INIT(light, shade_constants); + + __m128i mfg_alpha = _mm_set_epi16(256, fg_alpha, fg_alpha, fg_alpha, 256, fg_alpha, fg_alpha, fg_alpha); + __m128i mbg_alpha = _mm_set_epi16(256, bg_alpha, bg_alpha, bg_alpha, 256, bg_alpha, bg_alpha, bg_alpha); + + do { + uint32_t p0 = source[0]; + uint32_t p1 = source[1]; + uint32_t p2 = source[2]; + uint32_t p3 = source[3]; + + // shade_pal_index: + __m128i fg = _mm_set_epi32(palette[p3], palette[p2], palette[p1], palette[p0]); + SSE_SHADE(fg, shade_constants); + + __m128i fg_hi = _mm_unpackhi_epi8(fg, _mm_setzero_si128()); + __m128i fg_lo = _mm_unpacklo_epi8(fg, _mm_setzero_si128()); + + // unpack bg: + __m128i bg = _mm_loadu_si128((const __m128i*)dest); + __m128i bg_hi = _mm_unpackhi_epi8(bg, _mm_setzero_si128()); + __m128i bg_lo = _mm_unpacklo_epi8(bg, _mm_setzero_si128()); + + // (fg_red * fg_alpha + bg_red * bg_alpha) / 256: + __m128i color_hi = _mm_srli_epi16(_mm_adds_epu16(_mm_mullo_epi16(fg_hi, mfg_alpha), _mm_mullo_epi16(bg_hi, mbg_alpha)), 8); + __m128i color_lo = _mm_srli_epi16(_mm_adds_epu16(_mm_mullo_epi16(fg_lo, mfg_alpha), _mm_mullo_epi16(bg_lo, mbg_alpha)), 8); + + __m128i color = _mm_packus_epi16(color_lo, color_hi); + _mm_storeu_si128((__m128i*)dest, color); + + source += 4; + dest += pitch; + } while (--count); + } + } +#endif +}; + +class RtSubClamp1colRGBACommand : public DrawerCommand +{ + int hx; + int sx; + int yl; + int yh; + BYTE *dc_destorg; + int dc_pitch; + fixed_t dc_light; + fixed_t dc_srcalpha; + fixed_t dc_destalpha; + ShadeConstants dc_shade_constants; + +public: + RtSubClamp1colRGBACommand(int hx, int sx, int yl, int yh) + { + this->hx = hx; + this->sx = sx; + this->yl = yl; + this->yh = yh; + + dc_destorg = ::dc_destorg; + dc_pitch = ::dc_pitch; + dc_light = ::dc_light; + dc_srcalpha = ::dc_srcalpha; + dc_destalpha = ::dc_destalpha; + dc_shade_constants = ::dc_shade_constants; + } + + void Execute(DrawerThread *thread) override + { + uint32_t *source; + uint32_t *dest; + int count; + int pitch; + + count = yh - yl; + if (count < 0) + return; + count++; + + dest = ylookup[yl] + sx + (uint32_t*)dc_destorg; + source = &thread->dc_temp_rgba[yl * 4 + hx]; + pitch = dc_pitch; + + uint32_t light = calc_light_multiplier(dc_light); + ShadeConstants shade_constants = dc_shade_constants; + + uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + + do { + uint32_t fg = shade_pal_index(*source, light, shade_constants); + uint32_t fg_red = (fg >> 16) & 0xff; + uint32_t fg_green = (fg >> 8) & 0xff; + uint32_t fg_blue = fg & 0xff; + + uint32_t bg_red = (*dest >> 16) & 0xff; + uint32_t bg_green = (*dest >> 8) & 0xff; + uint32_t bg_blue = (*dest) & 0xff; + + uint32_t red = clamp((0x10000 - fg_red * fg_alpha + bg_red * bg_alpha) / 256, 256, 256 + 255) - 256; + uint32_t green = clamp((0x10000 - fg_green * fg_alpha + bg_green * bg_alpha) / 256, 256, 256 + 255) - 256; + uint32_t blue = clamp((0x10000 - fg_blue * fg_alpha + bg_blue * bg_alpha) / 256, 256, 256 + 255) - 256; + + *dest = 0xff000000 | (red << 16) | (green << 8) | blue; + source += 4; + dest += pitch; + } while (--count); + } +}; + +class RtSubClamp4colsRGBACommand : public DrawerCommand +{ + int sx; + int yl; + int yh; + BYTE *dc_destorg; + int dc_pitch; + fixed_t dc_light; + fixed_t dc_srcalpha; + fixed_t dc_destalpha; + ShadeConstants dc_shade_constants; + +public: + RtSubClamp4colsRGBACommand(int sx, int yl, int yh) + { + this->sx = sx; + this->yl = yl; + this->yh = yh; + + dc_destorg = ::dc_destorg; + dc_pitch = ::dc_pitch; + dc_light = ::dc_light; + dc_srcalpha = ::dc_srcalpha; + dc_destalpha = ::dc_destalpha; + dc_shade_constants = ::dc_shade_constants; + } + + void Execute(DrawerThread *thread) override + { + uint32_t *source; + uint32_t *dest; + int count; + int pitch; + + count = yh - yl; + if (count < 0) + return; + count++; + + dest = ylookup[yl] + sx + (uint32_t*)dc_destorg; + source = &thread->dc_temp_rgba[yl * 4]; + pitch = dc_pitch; + + uint32_t light = calc_light_multiplier(dc_light); + ShadeConstants shade_constants = dc_shade_constants; + + uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + + do { + for (int i = 0; i < 4; i++) + { + uint32_t fg = shade_pal_index(source[i], light, shade_constants); + uint32_t fg_red = (fg >> 16) & 0xff; + uint32_t fg_green = (fg >> 8) & 0xff; + uint32_t fg_blue = fg & 0xff; + + uint32_t bg_red = (dest[i] >> 16) & 0xff; + uint32_t bg_green = (dest[i] >> 8) & 0xff; + uint32_t bg_blue = (dest[i]) & 0xff; + + uint32_t red = clamp((0x10000 - fg_red * fg_alpha + bg_red * bg_alpha) / 256, 256, 256 + 255) - 256; + uint32_t green = clamp((0x10000 - fg_green * fg_alpha + bg_green * bg_alpha) / 256, 256, 256 + 255) - 256; + uint32_t blue = clamp((0x10000 - fg_blue * fg_alpha + bg_blue * bg_alpha) / 256, 256, 256 + 255) - 256; + + dest[i] = 0xff000000 | (red << 16) | (green << 8) | blue; + } + + source += 4; + dest += pitch; + } while (--count); + } +}; + +class RtRevSubClamp1colRGBACommand : public DrawerCommand +{ + int hx; + int sx; + int yl; + int yh; + BYTE *dc_destorg; + int dc_pitch; + fixed_t dc_light; + fixed_t dc_srcalpha; + fixed_t dc_destalpha; + ShadeConstants dc_shade_constants; + +public: + RtRevSubClamp1colRGBACommand(int hx, int sx, int yl, int yh) + { + this->hx = hx; + this->sx = sx; + this->yl = yl; + this->yh = yh; + + dc_destorg = ::dc_destorg; + dc_pitch = ::dc_pitch; + dc_light = ::dc_light; + dc_srcalpha = ::dc_srcalpha; + dc_destalpha = ::dc_destalpha; + dc_shade_constants = ::dc_shade_constants; + } + + void Execute(DrawerThread *thread) override + { + uint32_t *source; + uint32_t *dest; + int count; + int pitch; + + count = yh - yl; + if (count < 0) + return; + count++; + + dest = ylookup[yl] + sx + (uint32_t*)dc_destorg; + source = &thread->dc_temp_rgba[yl * 4 + hx]; + pitch = dc_pitch; + + uint32_t light = calc_light_multiplier(dc_light); + ShadeConstants shade_constants = dc_shade_constants; + + uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + + do { + uint32_t fg = shade_pal_index(*source, light, shade_constants); + uint32_t fg_red = (fg >> 16) & 0xff; + uint32_t fg_green = (fg >> 8) & 0xff; + uint32_t fg_blue = fg & 0xff; + + uint32_t bg_red = (*dest >> 16) & 0xff; + uint32_t bg_green = (*dest >> 8) & 0xff; + uint32_t bg_blue = (*dest) & 0xff; + + uint32_t red = clamp((0x10000 + fg_red * fg_alpha - bg_red * bg_alpha) / 256, 256, 256 + 255) - 256; + uint32_t green = clamp((0x10000 + fg_green * fg_alpha - bg_green * bg_alpha) / 256, 256, 256 + 255) - 256; + uint32_t blue = clamp((0x10000 + fg_blue * fg_alpha - bg_blue * bg_alpha) / 256, 256, 256 + 255) - 256; + + *dest = 0xff000000 | (red << 16) | (green << 8) | blue; + source += 4; + dest += pitch; + } while (--count); + } +}; + +class RtRevSubClamp4colsRGBACommand : public DrawerCommand +{ + int sx; + int yl; + int yh; + BYTE *dc_destorg; + int dc_pitch; + fixed_t dc_light; + fixed_t dc_srcalpha; + fixed_t dc_destalpha; + ShadeConstants dc_shade_constants; + +public: + RtRevSubClamp4colsRGBACommand(int sx, int yl, int yh) + { + this->sx = sx; + this->yl = yl; + this->yh = yh; + + dc_destorg = ::dc_destorg; + dc_pitch = ::dc_pitch; + dc_light = ::dc_light; + dc_srcalpha = ::dc_srcalpha; + dc_destalpha = ::dc_destalpha; + dc_shade_constants = ::dc_shade_constants; + } + + void Execute(DrawerThread *thread) override + { + uint32_t *source; + uint32_t *dest; + int count; + int pitch; + + count = yh - yl; + if (count < 0) + return; + count++; + + dest = ylookup[yl] + sx + (uint32_t*)dc_destorg; + source = &thread->dc_temp_rgba[yl * 4]; + pitch = dc_pitch; + + uint32_t light = calc_light_multiplier(dc_light); + ShadeConstants shade_constants = dc_shade_constants; + + uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + + do { + for (int i = 0; i < 4; i++) + { + uint32_t fg = shade_pal_index(source[i], light, shade_constants); + uint32_t fg_red = (fg >> 16) & 0xff; + uint32_t fg_green = (fg >> 8) & 0xff; + uint32_t fg_blue = fg & 0xff; + + uint32_t bg_red = (dest[i] >> 16) & 0xff; + uint32_t bg_green = (dest[i] >> 8) & 0xff; + uint32_t bg_blue = (dest[i]) & 0xff; + + uint32_t red = clamp((0x10000 + fg_red * fg_alpha - bg_red * bg_alpha) / 256, 256, 256 + 255) - 256; + uint32_t green = clamp((0x10000 + fg_green * fg_alpha - bg_green * bg_alpha) / 256, 256, 256 + 255) - 256; + uint32_t blue = clamp((0x10000 + fg_blue * fg_alpha - bg_blue * bg_alpha) / 256, 256, 256 + 255) - 256; + + dest[i] = 0xff000000 | (red << 16) | (green << 8) | blue; + } + + source += 4; + dest += pitch; + } while (--count); + } +}; + +class RtInitColsRGBACommand : public DrawerCommand +{ + BYTE *buff; + +public: + RtInitColsRGBACommand(BYTE *buff) + { + this->buff = buff; + } + + void Execute(DrawerThread *thread) override + { + thread->dc_temp_rgba = buff == NULL ? thread->dc_temp_rgbabuff_rgba : (uint32_t*)buff; + } +}; + +class DrawColumnHorizRGBACommand : public DrawerCommand +{ + int dc_count; + fixed_t dc_iscale; + fixed_t dc_texturefrac; + const BYTE *dc_source; + int dc_x; + int dc_yl; + int dc_yh; + +public: + DrawColumnHorizRGBACommand() + { + dc_count = ::dc_count; + dc_iscale = ::dc_iscale; + dc_texturefrac = ::dc_texturefrac; + dc_source = ::dc_source; + dc_x = ::dc_x; + dc_yl = ::dc_yl; + dc_yh = ::dc_yh; + } + + void Execute(DrawerThread *thread) override + { + int count = dc_count; + uint32_t *dest; + fixed_t fracstep; + fixed_t frac; + + if (count <= 0) + return; + + { + int x = dc_x & 3; + dest = &thread->dc_temp_rgba[x + 4 * dc_yl]; + } + fracstep = dc_iscale; + frac = dc_texturefrac; + + { + const BYTE *source = dc_source; + + if (count & 1) { + *dest = source[frac >> FRACBITS]; dest += 4; frac += fracstep; + } + if (count & 2) { + dest[0] = source[frac >> FRACBITS]; frac += fracstep; + dest[4] = source[frac >> FRACBITS]; frac += fracstep; + dest += 8; + } + if (count & 4) { + dest[0] = source[frac >> FRACBITS]; frac += fracstep; + dest[4] = source[frac >> FRACBITS]; frac += fracstep; + dest[8] = source[frac >> FRACBITS]; frac += fracstep; + dest[12] = source[frac >> FRACBITS]; frac += fracstep; + dest += 16; + } + count >>= 3; + if (!count) return; + + do + { + dest[0] = source[frac >> FRACBITS]; frac += fracstep; + dest[4] = source[frac >> FRACBITS]; frac += fracstep; + dest[8] = source[frac >> FRACBITS]; frac += fracstep; + dest[12] = source[frac >> FRACBITS]; frac += fracstep; + dest[16] = source[frac >> FRACBITS]; frac += fracstep; + dest[20] = source[frac >> FRACBITS]; frac += fracstep; + dest[24] = source[frac >> FRACBITS]; frac += fracstep; + dest[28] = source[frac >> FRACBITS]; frac += fracstep; + dest += 32; + } while (--count); + } + } +}; + +class FillColumnHorizRGBACommand : public DrawerCommand +{ + int dc_x; + int dc_yl; + int dc_yh; + int dc_count; + int dc_color; + +public: + FillColumnHorizRGBACommand() + { + dc_x = ::dc_x; + dc_count = ::dc_count; + dc_color = ::dc_color; + dc_yl = ::dc_yl; + dc_yh = ::dc_yh; + } + + void Execute(DrawerThread *thread) override + { + int count = dc_count; + int color = dc_color; + uint32_t *dest; + + if (count <= 0) + return; + + { + int x = dc_x & 3; + dest = &thread->dc_temp_rgba[x + 4 * dc_yl]; + } + + if (count & 1) { + *dest = color; + dest += 4; + } + if (!(count >>= 1)) + return; + do { + dest[0] = color; dest[4] = color; + dest += 8; + } while (--count); + } +}; + +///////////////////////////////////////////////////////////////////////////// + // Copies one span at hx to the screen at sx. void rt_copy1col_RGBA_c (int hx, int sx, int yl, int yh) { - uint32_t *source; - uint32_t *dest; - int count; - int pitch; - - count = yh-yl; - if (count < 0) - return; - count++; - - dest = ylookup[yl] + sx + (uint32_t*)dc_destorg; - source = &dc_temp_rgba[yl*4 + hx]; - pitch = dc_pitch; - - if (count & 1) { - *dest = *source; - source += 4; - dest += pitch; - } - if (count & 2) { - dest[0] = source[0]; - dest[pitch] = source[4]; - source += 8; - dest += pitch*2; - } - if (!(count >>= 2)) - return; - - do { - dest[0] = source[0]; - dest[pitch] = source[4]; - dest[pitch*2] = source[8]; - dest[pitch*3] = source[12]; - source += 16; - dest += pitch*4; - } while (--count); + DrawerCommandQueue::QueueCommand(hx, sx, yl, yh); } // Copies all four spans to the screen starting at sx. @@ -108,293 +1615,23 @@ void rt_copy4cols_RGBA_c (int sx, int yl, int yh) // Maps one span at hx to the screen at sx. void rt_map1col_RGBA_c (int hx, int sx, int yl, int yh) { - uint32_t *source; - uint32_t *dest; - int count; - int pitch; - - count = yh-yl; - if (count < 0) - return; - count++; - - uint32_t light = calc_light_multiplier(dc_light); - ShadeConstants shade_constants = dc_shade_constants; - - dest = ylookup[yl] + sx + (uint32_t*)dc_destorg; - source = &dc_temp_rgba[yl*4 + hx]; - pitch = dc_pitch; - - if (count & 1) { - *dest = shade_pal_index(*source, light, shade_constants); - source += 4; - dest += pitch; - } - if (!(count >>= 1)) - return; - - do { - dest[0] = shade_pal_index(source[0], light, shade_constants); - dest[pitch] = shade_pal_index(source[4], light, shade_constants); - source += 8; - dest += pitch*2; - } while (--count); + DrawerCommandQueue::QueueCommand(hx, sx, yl, yh); } // Maps all four spans to the screen starting at sx. void rt_map4cols_RGBA_c (int sx, int yl, int yh) { - uint32_t *source; - uint32_t *dest; - int count; - int pitch; - - count = yh-yl; - if (count < 0) - return; - count++; - - uint32_t light = calc_light_multiplier(dc_light); - ShadeConstants shade_constants = dc_shade_constants; - - dest = ylookup[yl] + sx + (uint32_t*)dc_destorg; - source = &dc_temp_rgba[yl*4]; - pitch = dc_pitch; - - if (count & 1) { - dest[0] = shade_pal_index(source[0], light, shade_constants); - dest[1] = shade_pal_index(source[1], light, shade_constants); - dest[2] = shade_pal_index(source[2], light, shade_constants); - dest[3] = shade_pal_index(source[3], light, shade_constants); - source += 4; - dest += pitch; - } - if (!(count >>= 1)) - return; - - do { - dest[0] = shade_pal_index(source[0], light, shade_constants); - dest[1] = shade_pal_index(source[1], light, shade_constants); - dest[2] = shade_pal_index(source[2], light, shade_constants); - dest[3] = shade_pal_index(source[3], light, shade_constants); - dest[pitch] = shade_pal_index(source[4], light, shade_constants); - dest[pitch + 1] = shade_pal_index(source[5], light, shade_constants); - dest[pitch + 2] = shade_pal_index(source[6], light, shade_constants); - dest[pitch + 3] = shade_pal_index(source[7], light, shade_constants); - source += 8; - dest += pitch*2; - } while (--count); -} - -// Maps all four spans to the screen starting at sx. -void rt_map4cols_RGBA_SSE(int sx, int yl, int yh) -{ - uint32_t *source; - uint32_t *dest; - int count; - int pitch; - - count = yh - yl; - if (count < 0) - return; - count++; - - ShadeConstants shade_constants = dc_shade_constants; - uint32_t light = calc_light_multiplier(dc_light); - uint32_t *palette = (uint32_t*)GPalette.BaseColors; - - dest = ylookup[yl] + sx + (uint32_t*)dc_destorg; - source = &dc_temp_rgba[yl * 4]; - pitch = dc_pitch; - - if (shade_constants.simple_shade) - { - SSE_SHADE_SIMPLE_INIT(light); - - if (count & 1) { - uint32_t p0 = source[0]; - uint32_t p1 = source[1]; - uint32_t p2 = source[2]; - uint32_t p3 = source[3]; - - // shade_pal_index: - __m128i fg = _mm_set_epi32(palette[p3], palette[p2], palette[p1], palette[p0]); - SSE_SHADE_SIMPLE(fg); - _mm_storeu_si128((__m128i*)dest, fg); - - source += 4; - dest += pitch; - } - if (!(count >>= 1)) - return; - - do { - // shade_pal_index 0-3 - { - uint32_t p0 = source[0]; - uint32_t p1 = source[1]; - uint32_t p2 = source[2]; - uint32_t p3 = source[3]; - - __m128i fg = _mm_set_epi32(palette[p3], palette[p2], palette[p1], palette[p0]); - SSE_SHADE_SIMPLE(fg); - _mm_storeu_si128((__m128i*)dest, fg); - } - - // shade_pal_index 4-7 (pitch) - { - uint32_t p0 = source[4]; - uint32_t p1 = source[5]; - uint32_t p2 = source[6]; - uint32_t p3 = source[7]; - - __m128i fg = _mm_set_epi32(palette[p3], palette[p2], palette[p1], palette[p0]); - SSE_SHADE_SIMPLE(fg); - _mm_storeu_si128((__m128i*)(dest + pitch), fg); - } - - source += 8; - dest += pitch * 2; - } while (--count); - } - else - { - SSE_SHADE_INIT(light, shade_constants); - - if (count & 1) { - uint32_t p0 = source[0]; - uint32_t p1 = source[1]; - uint32_t p2 = source[2]; - uint32_t p3 = source[3]; - - // shade_pal_index: - __m128i fg = _mm_set_epi32(palette[p3], palette[p2], palette[p1], palette[p0]); - SSE_SHADE(fg, shade_constants); - _mm_storeu_si128((__m128i*)dest, fg); - - source += 4; - dest += pitch; - } - if (!(count >>= 1)) - return; - - do { - // shade_pal_index 0-3 - { - uint32_t p0 = source[0]; - uint32_t p1 = source[1]; - uint32_t p2 = source[2]; - uint32_t p3 = source[3]; - - __m128i fg = _mm_set_epi32(palette[p3], palette[p2], palette[p1], palette[p0]); - SSE_SHADE(fg, shade_constants); - _mm_storeu_si128((__m128i*)dest, fg); - } - - // shade_pal_index 4-7 (pitch) - { - uint32_t p0 = source[4]; - uint32_t p1 = source[5]; - uint32_t p2 = source[6]; - uint32_t p3 = source[7]; - - __m128i fg = _mm_set_epi32(palette[p3], palette[p2], palette[p1], palette[p0]); - SSE_SHADE(fg, shade_constants); - _mm_storeu_si128((__m128i*)(dest + pitch), fg); - } - - source += 8; - dest += pitch * 2; - } while (--count); - } + DrawerCommandQueue::QueueCommand(sx, yl, yh); } void rt_Translate1col_RGBA_c(const BYTE *translation, int hx, int yl, int yh) { - int count = yh - yl + 1; - uint32_t *source = &dc_temp_rgba[yl*4 + hx]; - - // Things we do to hit the compiler's optimizer with a clue bat: - // 1. Parallelism is explicitly spelled out by using a separate - // C instruction for each assembly instruction. GCC lets me - // have four temporaries, but VC++ spills to the stack with - // more than two. Two is probably optimal, anyway. - // 2. The results of the translation lookups are explicitly - // stored in byte-sized variables. This causes the VC++ code - // to use byte mov instructions in most cases; for apparently - // random reasons, it will use movzx for some places. GCC - // ignores this and uses movzx always. - - // Do 8 rows at a time. - for (int count8 = count >> 3; count8; --count8) - { - int c0, c1; - BYTE b0, b1; - - c0 = source[0]; c1 = source[4]; - b0 = translation[c0]; b1 = translation[c1]; - source[0] = b0; source[4] = b1; - - c0 = source[8]; c1 = source[12]; - b0 = translation[c0]; b1 = translation[c1]; - source[8] = b0; source[12] = b1; - - c0 = source[16]; c1 = source[20]; - b0 = translation[c0]; b1 = translation[c1]; - source[16] = b0; source[20] = b1; - - c0 = source[24]; c1 = source[28]; - b0 = translation[c0]; b1 = translation[c1]; - source[24] = b0; source[28] = b1; - - source += 32; - } - // Finish by doing 1 row at a time. - for (count &= 7; count; --count, source += 4) - { - source[0] = translation[source[0]]; - } + DrawerCommandQueue::QueueCommand(translation, hx, yl, yh); } void rt_Translate4cols_RGBA_c(const BYTE *translation, int yl, int yh) { - int count = yh - yl + 1; - uint32_t *source = &dc_temp_rgba[yl*4]; - int c0, c1; - BYTE b0, b1; - - // Do 2 rows at a time. - for (int count8 = count >> 1; count8; --count8) - { - c0 = source[0]; c1 = source[1]; - b0 = translation[c0]; b1 = translation[c1]; - source[0] = b0; source[1] = b1; - - c0 = source[2]; c1 = source[3]; - b0 = translation[c0]; b1 = translation[c1]; - source[2] = b0; source[3] = b1; - - c0 = source[4]; c1 = source[5]; - b0 = translation[c0]; b1 = translation[c1]; - source[4] = b0; source[5] = b1; - - c0 = source[6]; c1 = source[7]; - b0 = translation[c0]; b1 = translation[c1]; - source[6] = b0; source[7] = b1; - - source += 8; - } - // Do the final row if count was odd. - if (count & 1) - { - c0 = source[0]; c1 = source[1]; - b0 = translation[c0]; b1 = translation[c1]; - source[0] = b0; source[1] = b1; - - c0 = source[2]; c1 = source[3]; - b0 = translation[c0]; b1 = translation[c1]; - source[2] = b0; source[3] = b1; - } + DrawerCommandQueue::QueueCommand(translation, yl, yh); } // Translates one span at hx to the screen at sx. @@ -414,195 +1651,15 @@ void rt_tlate4cols_RGBA_c (int sx, int yl, int yh) // Adds one span at hx to the screen at sx without clamping. void rt_add1col_RGBA_c (int hx, int sx, int yl, int yh) { - uint32_t *source; - uint32_t *dest; - int count; - int pitch; - - count = yh-yl; - if (count < 0) - return; - count++; - - dest = ylookup[yl] + sx + (uint32_t*)dc_destorg; - source = &dc_temp_rgba[yl*4 + hx]; - pitch = dc_pitch; - - uint32_t light = calc_light_multiplier(dc_light); - ShadeConstants shade_constants = dc_shade_constants; - - uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); - - do { - uint32_t fg = shade_pal_index(*source, light, shade_constants); - uint32_t fg_red = (fg >> 16) & 0xff; - uint32_t fg_green = (fg >> 8) & 0xff; - uint32_t fg_blue = fg & 0xff; - - uint32_t bg_red = (*dest >> 16) & 0xff; - uint32_t bg_green = (*dest >> 8) & 0xff; - uint32_t bg_blue = (*dest) & 0xff; - - uint32_t red = clamp((fg_red * fg_alpha + bg_red * bg_alpha) / 256, 0, 255); - uint32_t green = clamp((fg_green * fg_alpha + bg_green * bg_alpha) / 256, 0, 255); - uint32_t blue = clamp((fg_blue * fg_alpha + bg_blue * bg_alpha) / 256, 0, 255); - - *dest = 0xff000000 | (red << 16) | (green << 8) | blue; - - source += 4; - dest += pitch; - } while (--count); + DrawerCommandQueue::QueueCommand(hx, sx, yl, yh); } // Adds all four spans to the screen starting at sx without clamping. void rt_add4cols_RGBA_c (int sx, int yl, int yh) { - uint32_t *source; - uint32_t *dest; - int count; - int pitch; - - count = yh-yl; - if (count < 0) - return; - count++; - - dest = ylookup[yl] + sx + (uint32_t*)dc_destorg; - source = &dc_temp_rgba[yl*4]; - pitch = dc_pitch; - - uint32_t light = calc_light_multiplier(dc_light); - ShadeConstants shade_constants = dc_shade_constants; - - uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); - - do { - for (int i = 0; i < 4; i++) - { - uint32_t fg = shade_pal_index(source[i], light, shade_constants); - uint32_t fg_red = (fg >> 16) & 0xff; - uint32_t fg_green = (fg >> 8) & 0xff; - uint32_t fg_blue = fg & 0xff; - - uint32_t bg_red = (dest[i] >> 16) & 0xff; - uint32_t bg_green = (dest[i] >> 8) & 0xff; - uint32_t bg_blue = (dest[i]) & 0xff; - - uint32_t red = clamp((fg_red * fg_alpha + bg_red * bg_alpha) / 256, 0, 255); - uint32_t green = clamp((fg_green * fg_alpha + bg_green * bg_alpha) / 256, 0, 255); - uint32_t blue = clamp((fg_blue * fg_alpha + bg_blue * bg_alpha) / 256, 0, 255); - - dest[i] = 0xff000000 | (red << 16) | (green << 8) | blue; - } - - source += 4; - dest += pitch; - } while (--count); + DrawerCommandQueue::QueueCommand(sx, yl, yh); } -// Adds all four spans to the screen starting at sx without clamping. -#ifndef NO_SSE -void rt_add4cols_RGBA_SSE(int sx, int yl, int yh) -{ - uint32_t *source; - uint32_t *dest; - int count; - int pitch; - - count = yh - yl; - if (count < 0) - return; - count++; - - dest = ylookup[yl] + sx + (uint32_t*)dc_destorg; - source = &dc_temp_rgba[yl * 4]; - pitch = dc_pitch; - - uint32_t light = calc_light_multiplier(dc_light); - uint32_t *palette = (uint32_t*)GPalette.BaseColors; - - uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); - - ShadeConstants shade_constants = dc_shade_constants; - - if (shade_constants.simple_shade) - { - SSE_SHADE_SIMPLE_INIT(light); - - __m128i mfg_alpha = _mm_set_epi16(256, fg_alpha, fg_alpha, fg_alpha, 256, fg_alpha, fg_alpha, fg_alpha); - __m128i mbg_alpha = _mm_set_epi16(256, bg_alpha, bg_alpha, bg_alpha, 256, bg_alpha, bg_alpha, bg_alpha); - - do { - uint32_t p0 = source[0]; - uint32_t p1 = source[1]; - uint32_t p2 = source[2]; - uint32_t p3 = source[3]; - - // shade_pal_index: - __m128i fg = _mm_set_epi32(palette[p3], palette[p2], palette[p1], palette[p0]); - SSE_SHADE_SIMPLE(fg); - - __m128i fg_hi = _mm_unpackhi_epi8(fg, _mm_setzero_si128()); - __m128i fg_lo = _mm_unpacklo_epi8(fg, _mm_setzero_si128()); - - // unpack bg: - __m128i bg = _mm_loadu_si128((const __m128i*)dest); - __m128i bg_hi = _mm_unpackhi_epi8(bg, _mm_setzero_si128()); - __m128i bg_lo = _mm_unpacklo_epi8(bg, _mm_setzero_si128()); - - // (fg_red * fg_alpha + bg_red * bg_alpha) / 256: - __m128i color_hi = _mm_srli_epi16(_mm_adds_epu16(_mm_mullo_epi16(fg_hi, mfg_alpha), _mm_mullo_epi16(bg_hi, mbg_alpha)), 8); - __m128i color_lo = _mm_srli_epi16(_mm_adds_epu16(_mm_mullo_epi16(fg_lo, mfg_alpha), _mm_mullo_epi16(bg_lo, mbg_alpha)), 8); - - __m128i color = _mm_packus_epi16(color_lo, color_hi); - _mm_storeu_si128((__m128i*)dest, color); - - source += 4; - dest += pitch; - } while (--count); - } - else - { - SSE_SHADE_INIT(light, shade_constants); - - __m128i mfg_alpha = _mm_set_epi16(256, fg_alpha, fg_alpha, fg_alpha, 256, fg_alpha, fg_alpha, fg_alpha); - __m128i mbg_alpha = _mm_set_epi16(256, bg_alpha, bg_alpha, bg_alpha, 256, bg_alpha, bg_alpha, bg_alpha); - - do { - uint32_t p0 = source[0]; - uint32_t p1 = source[1]; - uint32_t p2 = source[2]; - uint32_t p3 = source[3]; - - // shade_pal_index: - __m128i fg = _mm_set_epi32(palette[p3], palette[p2], palette[p1], palette[p0]); - SSE_SHADE(fg, shade_constants); - - __m128i fg_hi = _mm_unpackhi_epi8(fg, _mm_setzero_si128()); - __m128i fg_lo = _mm_unpacklo_epi8(fg, _mm_setzero_si128()); - - // unpack bg: - __m128i bg = _mm_loadu_si128((const __m128i*)dest); - __m128i bg_hi = _mm_unpackhi_epi8(bg, _mm_setzero_si128()); - __m128i bg_lo = _mm_unpacklo_epi8(bg, _mm_setzero_si128()); - - // (fg_red * fg_alpha + bg_red * bg_alpha) / 256: - __m128i color_hi = _mm_srli_epi16(_mm_adds_epu16(_mm_mullo_epi16(fg_hi, mfg_alpha), _mm_mullo_epi16(bg_hi, mbg_alpha)), 8); - __m128i color_lo = _mm_srli_epi16(_mm_adds_epu16(_mm_mullo_epi16(fg_lo, mfg_alpha), _mm_mullo_epi16(bg_lo, mbg_alpha)), 8); - - __m128i color = _mm_packus_epi16(color_lo, color_hi); - _mm_storeu_si128((__m128i*)dest, color); - - source += 4; - dest += pitch; - } while (--count); - } -} -#endif - // Translates and adds one span at hx to the screen at sx without clamping. void rt_tlateadd1col_RGBA_c (int hx, int sx, int yl, int yh) { @@ -620,332 +1677,27 @@ void rt_tlateadd4cols_RGBA_c(int sx, int yl, int yh) // Shades one span at hx to the screen at sx. void rt_shaded1col_RGBA_c (int hx, int sx, int yl, int yh) { - BYTE *colormap; - uint32_t *source; - uint32_t *dest; - int count; - int pitch; - - count = yh-yl; - if (count < 0) - return; - count++; - - colormap = dc_colormap; - dest = ylookup[yl] + sx + (uint32_t*)dc_destorg; - source = &dc_temp_rgba[yl*4 + hx]; - pitch = dc_pitch; - - uint32_t fg = shade_pal_index_simple(dc_color, calc_light_multiplier(dc_light)); - uint32_t fg_red = (fg >> 16) & 0xff; - uint32_t fg_green = (fg >> 8) & 0xff; - uint32_t fg_blue = fg & 0xff; - - do { - uint32_t alpha = colormap[*source]; - uint32_t inv_alpha = 64 - alpha; - - uint32_t bg_red = (*dest >> 16) & 0xff; - uint32_t bg_green = (*dest >> 8) & 0xff; - uint32_t bg_blue = (*dest) & 0xff; - - uint32_t red = (fg_red * alpha + bg_red * inv_alpha) / 64; - uint32_t green = (fg_green * alpha + bg_green * inv_alpha) / 64; - uint32_t blue = (fg_blue * alpha + bg_blue * inv_alpha) / 64; - - *dest = 0xff000000 | (red << 16) | (green << 8) | blue; - source += 4; - dest += pitch; - } while (--count); + DrawerCommandQueue::QueueCommand(hx, sx, yl, yh); } // Shades all four spans to the screen starting at sx. void rt_shaded4cols_RGBA_c (int sx, int yl, int yh) { - BYTE *colormap; - uint32_t *source; - uint32_t *dest; - int count; - int pitch; - - count = yh-yl; - if (count < 0) - return; - count++; - - colormap = dc_colormap; - dest = ylookup[yl] + sx + (uint32_t*)dc_destorg; - source = &dc_temp_rgba[yl*4]; - pitch = dc_pitch; - - uint32_t fg = shade_pal_index_simple(dc_color, calc_light_multiplier(dc_light)); - uint32_t fg_red = (fg >> 16) & 0xff; - uint32_t fg_green = (fg >> 8) & 0xff; - uint32_t fg_blue = fg & 0xff; - - do { - for (int i = 0; i < 4; i++) - { - uint32_t alpha = colormap[source[i]]; - uint32_t inv_alpha = 64 - alpha; - - uint32_t bg_red = (dest[i] >> 16) & 0xff; - uint32_t bg_green = (dest[i] >> 8) & 0xff; - uint32_t bg_blue = (dest[i]) & 0xff; - - uint32_t red = (fg_red * alpha + bg_red * inv_alpha) / 64; - uint32_t green = (fg_green * alpha + bg_green * inv_alpha) / 64; - uint32_t blue = (fg_blue * alpha + bg_blue * inv_alpha) / 64; - - dest[i] = 0xff000000 | (red << 16) | (green << 8) | blue; - } - source += 4; - dest += pitch; - } while (--count); + DrawerCommandQueue::QueueCommand(sx, yl, yh); } -// Shades all four spans to the screen starting at sx. -#ifndef NO_SSE -void rt_shaded4cols_RGBA_SSE(int sx, int yl, int yh) -{ - BYTE *colormap; - uint32_t *source; - uint32_t *dest; - int count; - int pitch; - - count = yh - yl; - if (count < 0) - return; - count++; - - colormap = dc_colormap; - dest = ylookup[yl] + sx + (uint32_t*)dc_destorg; - source = &dc_temp_rgba[yl * 4]; - pitch = dc_pitch; - - __m128i fg = _mm_unpackhi_epi8(_mm_set1_epi32(shade_pal_index_simple(dc_color, calc_light_multiplier(dc_light))), _mm_setzero_si128()); - __m128i alpha_one = _mm_set1_epi16(64); - - do { - uint32_t p0 = colormap[source[0]]; - uint32_t p1 = colormap[source[1]]; - uint32_t p2 = colormap[source[2]]; - uint32_t p3 = colormap[source[3]]; - - __m128i alpha_hi = _mm_set_epi16(64, p3, p3, p3, 64, p2, p2, p2); - __m128i alpha_lo = _mm_set_epi16(64, p1, p1, p1, 64, p0, p0, p0); - __m128i inv_alpha_hi = _mm_subs_epu16(alpha_one, alpha_hi); - __m128i inv_alpha_lo = _mm_subs_epu16(alpha_one, alpha_lo); - - // unpack bg: - __m128i bg = _mm_loadu_si128((const __m128i*)dest); - __m128i bg_hi = _mm_unpackhi_epi8(bg, _mm_setzero_si128()); - __m128i bg_lo = _mm_unpacklo_epi8(bg, _mm_setzero_si128()); - - // (fg_red * alpha + bg_red * inv_alpha) / 64: - __m128i color_hi = _mm_srli_epi16(_mm_adds_epu16(_mm_mullo_epi16(fg, alpha_hi), _mm_mullo_epi16(bg_hi, inv_alpha_hi)), 6); - __m128i color_lo = _mm_srli_epi16(_mm_adds_epu16(_mm_mullo_epi16(fg, alpha_lo), _mm_mullo_epi16(bg_lo, inv_alpha_lo)), 6); - - __m128i color = _mm_packus_epi16(color_lo, color_hi); - _mm_storeu_si128((__m128i*)dest, color); - - source += 4; - dest += pitch; - } while (--count); -} -#endif - // Adds one span at hx to the screen at sx with clamping. void rt_addclamp1col_RGBA_c (int hx, int sx, int yl, int yh) { - uint32_t *source; - uint32_t *dest; - int count; - int pitch; - - count = yh-yl; - if (count < 0) - return; - count++; - - dest = ylookup[yl] + sx + (uint32_t*)dc_destorg; - source = &dc_temp_rgba[yl*4 + hx]; - pitch = dc_pitch; - - uint32_t light = calc_light_multiplier(dc_light); - ShadeConstants shade_constants = dc_shade_constants; - - uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); - - do { - uint32_t fg = shade_pal_index(*source, light, shade_constants); - uint32_t fg_red = (fg >> 16) & 0xff; - uint32_t fg_green = (fg >> 8) & 0xff; - uint32_t fg_blue = fg & 0xff; - - uint32_t bg_red = (*dest >> 16) & 0xff; - uint32_t bg_green = (*dest >> 8) & 0xff; - uint32_t bg_blue = (*dest) & 0xff; - - uint32_t red = clamp((fg_red * fg_alpha + bg_red * bg_alpha) / 256, 0, 255); - uint32_t green = clamp((fg_green * fg_alpha + bg_green * bg_alpha) / 256, 0, 255); - uint32_t blue = clamp((fg_blue * fg_alpha + bg_blue * bg_alpha) / 256, 0, 255); - - *dest = 0xff000000 | (red << 16) | (green << 8) | blue; - source += 4; - dest += pitch; - } while (--count); + DrawerCommandQueue::QueueCommand(hx, sx, yl, yh); } // Adds all four spans to the screen starting at sx with clamping. void rt_addclamp4cols_RGBA_c (int sx, int yl, int yh) { - uint32_t *source; - uint32_t *dest; - int count; - int pitch; - - count = yh-yl; - if (count < 0) - return; - count++; - - dest = ylookup[yl] + sx + (uint32_t*)dc_destorg; - source = &dc_temp_rgba[yl*4]; - pitch = dc_pitch; - - uint32_t light = calc_light_multiplier(dc_light); - ShadeConstants shade_constants = dc_shade_constants; - - uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); - - do { - for (int i = 0; i < 4; i++) - { - uint32_t fg = shade_pal_index(source[i], light, shade_constants); - uint32_t fg_red = (fg >> 16) & 0xff; - uint32_t fg_green = (fg >> 8) & 0xff; - uint32_t fg_blue = fg & 0xff; - - uint32_t bg_red = (dest[i] >> 16) & 0xff; - uint32_t bg_green = (dest[i] >> 8) & 0xff; - uint32_t bg_blue = (dest[i]) & 0xff; - - uint32_t red = clamp((fg_red * fg_alpha + bg_red * bg_alpha) / 256, 0, 255); - uint32_t green = clamp((fg_green * fg_alpha + bg_green * bg_alpha) / 256, 0, 255); - uint32_t blue = clamp((fg_blue * fg_alpha + bg_blue * bg_alpha) / 256, 0, 255); - - dest[i] = 0xff000000 | (red << 16) | (green << 8) | blue; - } - source += 4; - dest += pitch; - } while (--count); + DrawerCommandQueue::QueueCommand(sx, yl, yh); } -// Adds all four spans to the screen starting at sx with clamping. -#ifndef NO_SSE -void rt_addclamp4cols_RGBA_SSE(int sx, int yl, int yh) -{ - uint32_t *source; - uint32_t *dest; - int count; - int pitch; - - count = yh - yl; - if (count < 0) - return; - count++; - - dest = ylookup[yl] + sx + (uint32_t*)dc_destorg; - source = &dc_temp_rgba[yl * 4]; - pitch = dc_pitch; - - uint32_t light = calc_light_multiplier(dc_light); - uint32_t *palette = (uint32_t*)GPalette.BaseColors; - - uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); - - ShadeConstants shade_constants = dc_shade_constants; - - if (shade_constants.simple_shade) - { - SSE_SHADE_SIMPLE_INIT(light); - - __m128i mfg_alpha = _mm_set_epi16(256, fg_alpha, fg_alpha, fg_alpha, 256, fg_alpha, fg_alpha, fg_alpha); - __m128i mbg_alpha = _mm_set_epi16(256, bg_alpha, bg_alpha, bg_alpha, 256, bg_alpha, bg_alpha, bg_alpha); - - do { - uint32_t p0 = source[0]; - uint32_t p1 = source[1]; - uint32_t p2 = source[2]; - uint32_t p3 = source[3]; - - // shade_pal_index: - __m128i fg = _mm_set_epi32(palette[p3], palette[p2], palette[p1], palette[p0]); - SSE_SHADE_SIMPLE(fg); - - __m128i fg_hi = _mm_unpackhi_epi8(fg, _mm_setzero_si128()); - __m128i fg_lo = _mm_unpacklo_epi8(fg, _mm_setzero_si128()); - - // unpack bg: - __m128i bg = _mm_loadu_si128((const __m128i*)dest); - __m128i bg_hi = _mm_unpackhi_epi8(bg, _mm_setzero_si128()); - __m128i bg_lo = _mm_unpacklo_epi8(bg, _mm_setzero_si128()); - - // (fg_red * fg_alpha + bg_red * bg_alpha) / 256: - __m128i color_hi = _mm_srli_epi16(_mm_adds_epu16(_mm_mullo_epi16(fg_hi, mfg_alpha), _mm_mullo_epi16(bg_hi, mbg_alpha)), 8); - __m128i color_lo = _mm_srli_epi16(_mm_adds_epu16(_mm_mullo_epi16(fg_lo, mfg_alpha), _mm_mullo_epi16(bg_lo, mbg_alpha)), 8); - - __m128i color = _mm_packus_epi16(color_lo, color_hi); - _mm_storeu_si128((__m128i*)dest, color); - - source += 4; - dest += pitch; - } while (--count); - } - else - { - SSE_SHADE_INIT(light, shade_constants); - - __m128i mfg_alpha = _mm_set_epi16(256, fg_alpha, fg_alpha, fg_alpha, 256, fg_alpha, fg_alpha, fg_alpha); - __m128i mbg_alpha = _mm_set_epi16(256, bg_alpha, bg_alpha, bg_alpha, 256, bg_alpha, bg_alpha, bg_alpha); - - do { - uint32_t p0 = source[0]; - uint32_t p1 = source[1]; - uint32_t p2 = source[2]; - uint32_t p3 = source[3]; - - // shade_pal_index: - __m128i fg = _mm_set_epi32(palette[p3], palette[p2], palette[p1], palette[p0]); - SSE_SHADE(fg, shade_constants); - - __m128i fg_hi = _mm_unpackhi_epi8(fg, _mm_setzero_si128()); - __m128i fg_lo = _mm_unpacklo_epi8(fg, _mm_setzero_si128()); - - // unpack bg: - __m128i bg = _mm_loadu_si128((const __m128i*)dest); - __m128i bg_hi = _mm_unpackhi_epi8(bg, _mm_setzero_si128()); - __m128i bg_lo = _mm_unpacklo_epi8(bg, _mm_setzero_si128()); - - // (fg_red * fg_alpha + bg_red * bg_alpha) / 256: - __m128i color_hi = _mm_srli_epi16(_mm_adds_epu16(_mm_mullo_epi16(fg_hi, mfg_alpha), _mm_mullo_epi16(bg_hi, mbg_alpha)), 8); - __m128i color_lo = _mm_srli_epi16(_mm_adds_epu16(_mm_mullo_epi16(fg_lo, mfg_alpha), _mm_mullo_epi16(bg_lo, mbg_alpha)), 8); - - __m128i color = _mm_packus_epi16(color_lo, color_hi); - _mm_storeu_si128((__m128i*)dest, color); - - source += 4; - dest += pitch; - } while (--count); - } -} -#endif - // Translates and adds one span at hx to the screen at sx with clamping. void rt_tlateaddclamp1col_RGBA_c (int hx, int sx, int yl, int yh) { @@ -963,91 +1715,13 @@ void rt_tlateaddclamp4cols_RGBA_c (int sx, int yl, int yh) // Subtracts one span at hx to the screen at sx with clamping. void rt_subclamp1col_RGBA_c (int hx, int sx, int yl, int yh) { - uint32_t *source; - uint32_t *dest; - int count; - int pitch; - - count = yh-yl; - if (count < 0) - return; - count++; - - dest = ylookup[yl] + sx + (uint32_t*)dc_destorg; - source = &dc_temp_rgba[yl*4 + hx]; - pitch = dc_pitch; - - uint32_t light = calc_light_multiplier(dc_light); - ShadeConstants shade_constants = dc_shade_constants; - - uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); - - do { - uint32_t fg = shade_pal_index(*source, light, shade_constants); - uint32_t fg_red = (fg >> 16) & 0xff; - uint32_t fg_green = (fg >> 8) & 0xff; - uint32_t fg_blue = fg & 0xff; - - uint32_t bg_red = (*dest >> 16) & 0xff; - uint32_t bg_green = (*dest >> 8) & 0xff; - uint32_t bg_blue = (*dest) & 0xff; - - uint32_t red = clamp((0x10000 - fg_red * fg_alpha + bg_red * bg_alpha) / 256, 256, 256 + 255) - 256; - uint32_t green = clamp((0x10000 - fg_green * fg_alpha + bg_green * bg_alpha) / 256, 256, 256 + 255) - 256; - uint32_t blue = clamp((0x10000 - fg_blue * fg_alpha + bg_blue * bg_alpha) / 256, 256, 256 + 255) - 256; - - *dest = 0xff000000 | (red << 16) | (green << 8) | blue; - source += 4; - dest += pitch; - } while (--count); + DrawerCommandQueue::QueueCommand(hx, sx, yl, yh); } // Subtracts all four spans to the screen starting at sx with clamping. void rt_subclamp4cols_RGBA_c (int sx, int yl, int yh) { - uint32_t *source; - uint32_t *dest; - int count; - int pitch; - - count = yh-yl; - if (count < 0) - return; - count++; - - dest = ylookup[yl] + sx + (uint32_t*)dc_destorg; - source = &dc_temp_rgba[yl*4]; - pitch = dc_pitch; - - uint32_t light = calc_light_multiplier(dc_light); - ShadeConstants shade_constants = dc_shade_constants; - - uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); - - do { - for (int i = 0; i < 4; i++) - { - uint32_t fg = shade_pal_index(source[i], light, shade_constants); - uint32_t fg_red = (fg >> 16) & 0xff; - uint32_t fg_green = (fg >> 8) & 0xff; - uint32_t fg_blue = fg & 0xff; - - uint32_t bg_red = (dest[i] >> 16) & 0xff; - uint32_t bg_green = (dest[i] >> 8) & 0xff; - uint32_t bg_blue = (dest[i]) & 0xff; - - uint32_t red = clamp((0x10000 - fg_red * fg_alpha + bg_red * bg_alpha) / 256, 256, 256 + 255) - 256; - uint32_t green = clamp((0x10000 - fg_green * fg_alpha + bg_green * bg_alpha) / 256, 256, 256 + 255) - 256; - uint32_t blue = clamp((0x10000 - fg_blue * fg_alpha + bg_blue * bg_alpha) / 256, 256, 256 + 255) - 256; - - dest[i] = 0xff000000 | (red << 16) | (green << 8) | blue; - } - - source += 4; - dest += pitch; - } while (--count); + DrawerCommandQueue::QueueCommand(sx, yl, yh); } // Translates and subtracts one span at hx to the screen at sx with clamping. @@ -1067,91 +1741,13 @@ void rt_tlatesubclamp4cols_RGBA_c (int sx, int yl, int yh) // Subtracts one span at hx from the screen at sx with clamping. void rt_revsubclamp1col_RGBA_c (int hx, int sx, int yl, int yh) { - uint32_t *source; - uint32_t *dest; - int count; - int pitch; - - count = yh-yl; - if (count < 0) - return; - count++; - - dest = ylookup[yl] + sx + (uint32_t*)dc_destorg; - source = &dc_temp_rgba[yl*4 + hx]; - pitch = dc_pitch; - - uint32_t light = calc_light_multiplier(dc_light); - ShadeConstants shade_constants = dc_shade_constants; - - uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); - - do { - uint32_t fg = shade_pal_index(*source, light, shade_constants); - uint32_t fg_red = (fg >> 16) & 0xff; - uint32_t fg_green = (fg >> 8) & 0xff; - uint32_t fg_blue = fg & 0xff; - - uint32_t bg_red = (*dest >> 16) & 0xff; - uint32_t bg_green = (*dest >> 8) & 0xff; - uint32_t bg_blue = (*dest) & 0xff; - - uint32_t red = clamp((0x10000 + fg_red * fg_alpha - bg_red * bg_alpha) / 256, 256, 256 + 255) - 256; - uint32_t green = clamp((0x10000 + fg_green * fg_alpha - bg_green * bg_alpha) / 256, 256, 256 + 255) - 256; - uint32_t blue = clamp((0x10000 + fg_blue * fg_alpha - bg_blue * bg_alpha) / 256, 256, 256 + 255) - 256; - - *dest = 0xff000000 | (red << 16) | (green << 8) | blue; - source += 4; - dest += pitch; - } while (--count); + DrawerCommandQueue::QueueCommand(hx, sx, yl, yh); } // Subtracts all four spans from the screen starting at sx with clamping. void rt_revsubclamp4cols_RGBA_c (int sx, int yl, int yh) { - uint32_t *source; - uint32_t *dest; - int count; - int pitch; - - count = yh-yl; - if (count < 0) - return; - count++; - - dest = ylookup[yl] + sx + (uint32_t*)dc_destorg; - source = &dc_temp_rgba[yl*4]; - pitch = dc_pitch; - - uint32_t light = calc_light_multiplier(dc_light); - ShadeConstants shade_constants = dc_shade_constants; - - uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); - - do { - for (int i = 0; i < 4; i++) - { - uint32_t fg = shade_pal_index(source[i], light, shade_constants); - uint32_t fg_red = (fg >> 16) & 0xff; - uint32_t fg_green = (fg >> 8) & 0xff; - uint32_t fg_blue = fg & 0xff; - - uint32_t bg_red = (dest[i] >> 16) & 0xff; - uint32_t bg_green = (dest[i] >> 8) & 0xff; - uint32_t bg_blue = (dest[i]) & 0xff; - - uint32_t red = clamp((0x10000 + fg_red * fg_alpha - bg_red * bg_alpha) / 256, 256, 256 + 255) - 256; - uint32_t green = clamp((0x10000 + fg_green * fg_alpha - bg_green * bg_alpha) / 256, 256, 256 + 255) - 256; - uint32_t blue = clamp((0x10000 + fg_blue * fg_alpha - bg_blue * bg_alpha) / 256, 256, 256 + 255) - 256; - - dest[i] = 0xff000000 | (red << 16) | (green << 8) | blue; - } - - source += 4; - dest += pitch; - } while (--count); + DrawerCommandQueue::QueueCommand(sx, yl, yh); } // Translates and subtracts one span at hx from the screen at sx with clamping. @@ -1172,102 +1768,41 @@ void rt_tlaterevsubclamp4cols_RGBA_c (int sx, int yl, int yh) // call this function to set up the span pointers. void rt_initcols_rgba (BYTE *buff) { - int y; - - dc_temp_rgba = buff == NULL ? dc_temp_rgbabuff_rgba : (uint32_t*)buff; - for (y = 3; y >= 0; y--) + for (int y = 3; y >= 0; y--) horizspan[y] = dc_ctspan[y] = &dc_tspans[y][0]; + + DrawerCommandQueue::QueueCommand(buff); +} + +void rt_span_coverage_rgba(int x, int start, int stop) +{ + unsigned int **tspan = &dc_ctspan[x & 3]; + (*tspan)[0] = start; + (*tspan)[1] = stop; + *tspan += 2; } // Stretches a column into a temporary buffer which is later // drawn to the screen along with up to three other columns. void R_DrawColumnHorizP_RGBA_C (void) { - int count = dc_count; - uint32_t *dest; - fixed_t fracstep; - fixed_t frac; + int x = dc_x & 3; + unsigned int **span = &dc_ctspan[x]; + (*span)[0] = dc_yl; + (*span)[1] = dc_yh; + *span += 2; - if (count <= 0) - return; - - { - int x = dc_x & 3; - unsigned int **span; - - span = &dc_ctspan[x]; - (*span)[0] = dc_yl; - (*span)[1] = dc_yh; - *span += 2; - dest = &dc_temp_rgba[x + 4*dc_yl]; - } - fracstep = dc_iscale; - frac = dc_texturefrac; - - { - const BYTE *source = dc_source; - - if (count & 1) { - *dest = source[frac>>FRACBITS]; dest += 4; frac += fracstep; - } - if (count & 2) { - dest[0] = source[frac>>FRACBITS]; frac += fracstep; - dest[4] = source[frac>>FRACBITS]; frac += fracstep; - dest += 8; - } - if (count & 4) { - dest[0] = source[frac>>FRACBITS]; frac += fracstep; - dest[4] = source[frac>>FRACBITS]; frac += fracstep; - dest[8] = source[frac>>FRACBITS]; frac += fracstep; - dest[12]= source[frac>>FRACBITS]; frac += fracstep; - dest += 16; - } - count >>= 3; - if (!count) return; - - do - { - dest[0] = source[frac>>FRACBITS]; frac += fracstep; - dest[4] = source[frac>>FRACBITS]; frac += fracstep; - dest[8] = source[frac>>FRACBITS]; frac += fracstep; - dest[12]= source[frac>>FRACBITS]; frac += fracstep; - dest[16]= source[frac>>FRACBITS]; frac += fracstep; - dest[20]= source[frac>>FRACBITS]; frac += fracstep; - dest[24]= source[frac>>FRACBITS]; frac += fracstep; - dest[28]= source[frac>>FRACBITS]; frac += fracstep; - dest += 32; - } while (--count); - } + DrawerCommandQueue::QueueCommand(); } // [RH] Just fills a column with a given color void R_FillColumnHorizP_RGBA_C (void) { - int count = dc_count; - BYTE color = dc_color; - uint32_t *dest; + int x = dc_x & 3; + unsigned int **span = &dc_ctspan[x]; + (*span)[0] = dc_yl; + (*span)[1] = dc_yh; + *span += 2; - if (count <= 0) - return; - - { - int x = dc_x & 3; - unsigned int **span = &dc_ctspan[x]; - - (*span)[0] = dc_yl; - (*span)[1] = dc_yh; - *span += 2; - dest = &dc_temp_rgba[x + 4*dc_yl]; - } - - if (count & 1) { - *dest = color; - dest += 4; - } - if (!(count >>= 1)) - return; - do { - dest[0] = color; dest[4] = color; - dest += 8; - } while (--count); + DrawerCommandQueue::QueueCommand(); } diff --git a/src/r_main.cpp b/src/r_main.cpp index a795f8016..348c70120 100644 --- a/src/r_main.cpp +++ b/src/r_main.cpp @@ -979,6 +979,8 @@ void R_RenderViewToCanvas (AActor *actor, DCanvas *canvas, R_SetupBuffer (); screen->Unlock (); + R_FinishDrawerCommands(); + viewactive = savedviewactive; r_swtruecolor = savedoutputformat; diff --git a/src/r_swrenderer.cpp b/src/r_swrenderer.cpp index 645741a2a..62190b606 100644 --- a/src/r_swrenderer.cpp +++ b/src/r_swrenderer.cpp @@ -165,6 +165,7 @@ void FSoftwareRenderer::RenderView(player_t *player) R_RenderActorView (player->mo); // [RH] Let cameras draw onto textures that were visible this frame. FCanvasTextureInfo::UpdateAll (); + R_FinishDrawerCommands(); } //========================================================================== diff --git a/src/r_things.cpp b/src/r_things.cpp index c132cc2fd..b3a2daefe 100644 --- a/src/r_things.cpp +++ b/src/r_things.cpp @@ -706,10 +706,7 @@ void R_DrawVisVoxel(vissprite_t *spr, int minslabz, int maxslabz, short *cliptop } else { - unsigned int **tspan = &dc_ctspan[x & 3]; - (*tspan)[0] = span->Start; - (*tspan)[1] = span->Stop - 1; - *tspan += 2; + rt_span_coverage(x, span->Start, span->Stop - 1); } } if (!(flags & DVF_SPANSONLY) && (x & 3) == 3)