Minor code cleanup

This commit is contained in:
Magnus Norddahl 2016-06-14 23:05:20 +02:00
parent 9c8c1e0ea5
commit 77c4786b9d
10 changed files with 277 additions and 308 deletions

View file

@ -78,7 +78,7 @@ bool wipe_initMelt (int ticks)
int i, r;
// copy start screen to main screen
screen->DrawBlock(0, 0, SCREENWIDTH, SCREENHEIGHT, (BYTE *)wipe_scr_start);
screen->DrawBlock (0, 0, SCREENWIDTH, SCREENHEIGHT, (BYTE *)wipe_scr_start);
// makes this wipe faster (in theory)
// to have stuff in column-major format
@ -271,8 +271,7 @@ bool wipe_doBurn (int ticks)
// Draw the screen
int xstep, ystep, firex, firey;
int x, y;
BYTE *to;
BYTE *fromold, *fromnew;
BYTE *to, *fromold, *fromnew;
const int SHIFT = 16;
xstep = (FIREWIDTH << SHIFT) / SCREENWIDTH;

View file

@ -63,7 +63,7 @@ extern int ST_Y;
BYTE* viewimage;
extern "C" {
int ylookup[MAXHEIGHT];
BYTE* dc_destorg;
BYTE *dc_destorg;
}
int scaledviewwidth;
@ -276,7 +276,7 @@ void R_DrawColumnP_C (void)
{
// Re-map color indices from wall texture column
// using a lighting/special effects LUT.
*dest = colormap[source[frac >> FRACBITS]];
*dest = colormap[source[frac>>FRACBITS]];
dest += pitch;
frac += fracstep;
@ -321,13 +321,12 @@ void R_FillAddColumn_C (void)
return;
dest = dc_dest;
int pitch = dc_pitch;
DWORD *bg2rgb;
DWORD fg;
bg2rgb = dc_destblend;
fg = dc_srccolor;
int pitch = dc_pitch;
do
{
@ -348,13 +347,12 @@ void R_FillAddClampColumn_C (void)
return;
dest = dc_dest;
int pitch = dc_pitch;
DWORD *bg2rgb;
DWORD fg;
bg2rgb = dc_destblend;
fg = dc_srccolor;
int pitch = dc_pitch;
do
{
@ -381,13 +379,12 @@ void R_FillSubClampColumn_C (void)
return;
dest = dc_dest;
int pitch = dc_pitch;
DWORD *bg2rgb;
DWORD fg;
bg2rgb = dc_destblend;
fg = dc_srccolor | 0x40100400;
int pitch = dc_pitch;
do
{
@ -413,13 +410,12 @@ void R_FillRevSubClampColumn_C (void)
return;
dest = dc_dest;
int pitch = dc_pitch;
DWORD *bg2rgb;
DWORD fg;
bg2rgb = dc_destblend;
fg = dc_srccolor;
int pitch = dc_pitch;
do
{
@ -672,13 +668,14 @@ void R_DrawTranslatedColumnP_C (void)
{
*dest = colormap[translation[source[frac>>FRACBITS]]];
dest += pitch;
frac += fracstep;
} while (--count);
}
}
// Draw a column that is both translated and translucent
void R_DrawTlatedAddColumnP_C()
void R_DrawTlatedAddColumnP_C (void)
{
int count;
BYTE *dest;
@ -772,15 +769,15 @@ void R_DrawAddClampColumnP_C ()
frac = dc_texturefrac;
{
const BYTE *source = dc_source;
BYTE *colormap = dc_colormap;
const BYTE *source = dc_source;
int pitch = dc_pitch;
DWORD *fg2rgb = dc_srcblend;
DWORD *bg2rgb = dc_destblend;
do
{
DWORD a = fg2rgb[colormap[source[frac >> FRACBITS]]] + bg2rgb[*dest];
DWORD a = fg2rgb[colormap[source[frac>>FRACBITS]]] + bg2rgb[*dest];
DWORD b = a;
a |= 0x01f07c1f;
@ -788,7 +785,7 @@ void R_DrawAddClampColumnP_C ()
a &= 0x3fffffff;
b = b - (b >> 5);
a |= b;
*dest = RGB32k.All[a & (a >> 15)];
*dest = RGB32k.All[a & (a>>15)];
dest += pitch;
frac += fracstep;
} while (--count);
@ -1190,9 +1187,6 @@ void R_DrawSpanP_C (void)
} while (--count);
}
}
#endif
#ifndef X86_ASM
// [RH] Draw a span with holes
void R_DrawSpanMaskedP_C (void)
@ -1282,8 +1276,6 @@ void R_DrawSpanTranslucentP_C (void)
xstep = ds_xstep;
ystep = ds_ystep;
uint32_t light = calc_light_multiplier(ds_light);
if (ds_xbits == 6 && ds_ybits == 6)
{
// 64x64 is the most common case by far, so special case it.
@ -1334,8 +1326,6 @@ void R_DrawSpanMaskedTranslucentP_C (void)
DWORD *fg2rgb = dc_srcblend;
DWORD *bg2rgb = dc_destblend;
uint32_t light = calc_light_multiplier(ds_light);
xfrac = ds_xfrac;
yfrac = ds_yfrac;
@ -1426,7 +1416,6 @@ void R_DrawSpanAddClampP_C (void)
do
{
spot = ((xfrac>>(32-6-6))&(63*64)) + (yfrac>>(32-6));
DWORD a = fg2rgb[colormap[source[spot]]] + bg2rgb[*dest];
DWORD b = a;
@ -1436,7 +1425,6 @@ void R_DrawSpanAddClampP_C (void)
b = b - (b >> 5);
a |= b;
*dest++ = RGB32k.All[a & (a>>15)];
xfrac += xstep;
yfrac += ystep;
} while (--count);
@ -1449,7 +1437,6 @@ void R_DrawSpanAddClampP_C (void)
do
{
spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift);
DWORD a = fg2rgb[colormap[source[spot]]] + bg2rgb[*dest];
DWORD b = a;
@ -1459,14 +1446,12 @@ void R_DrawSpanAddClampP_C (void)
b = b - (b >> 5);
a |= b;
*dest++ = RGB32k.All[a & (a>>15)];
xfrac += xstep;
yfrac += ystep;
} while (--count);
}
}
void R_DrawSpanMaskedAddClampP_C (void)
{
dsfixed_t xfrac;
@ -1481,8 +1466,6 @@ void R_DrawSpanMaskedAddClampP_C (void)
DWORD *fg2rgb = dc_srcblend;
DWORD *bg2rgb = dc_destblend;
uint32_t light = calc_light_multiplier(ds_light);
xfrac = ds_xfrac;
yfrac = ds_yfrac;
@ -1552,7 +1535,7 @@ void R_DrawSpanMaskedAddClampP_C (void)
// [RH] Just fill a span with a color
void R_FillSpan_C (void)
{
memset (ylookup[ds_y] + ds_x1 + dc_destorg, ds_color, (ds_x2 - ds_x1 + 1));
memset (ylookup[ds_y] + ds_x1 + dc_destorg, ds_color, ds_x2 - ds_x1 + 1);
}
@ -1759,7 +1742,7 @@ DWORD vlinec1 ()
do
{
*dest = colormap[source[frac >> bits]];
*dest = colormap[source[frac>>bits]];
frac += fracstep;
dest += pitch;
} while (--count);
@ -1830,9 +1813,7 @@ DWORD mvlinec1 ()
return frac;
}
#endif
#if !defined(X86_ASM)
void mvlinec4 ()
{
BYTE *dest = dc_dest;
@ -1843,6 +1824,7 @@ void mvlinec4 ()
do
{
BYTE pix;
pix = bufplce[0][(place=vplce[0])>>bits]; if(pix) dest[0] = palookupoffse[0][pix]; vplce[0] = place+vince[0];
pix = bufplce[1][(place=vplce[1])>>bits]; if(pix) dest[1] = palookupoffse[1][pix]; vplce[1] = place+vince[1];
pix = bufplce[2][(place=vplce[2])>>bits]; if(pix) dest[2] = palookupoffse[2][pix]; vplce[2] = place+vince[2];
@ -1879,7 +1861,6 @@ static void R_DrawFogBoundaryLine (int y, int x)
int x2 = spanend[y];
BYTE *colormap = dc_colormap;
BYTE *dest = ylookup[y] + dc_destorg;
do
{
dest[x] = colormap[dest[x]];
@ -1996,8 +1977,6 @@ fixed_t tmvline1_add_C ()
DWORD *fg2rgb = dc_srcblend;
DWORD *bg2rgb = dc_destblend;
uint32_t light = calc_light_multiplier(dc_light);
do
{
BYTE pix = source[frac>>bits];
@ -2024,12 +2003,6 @@ void tmvline4_add_C ()
DWORD *fg2rgb = dc_srcblend;
DWORD *bg2rgb = dc_destblend;
uint32_t light[4];
light[0] = calc_light_multiplier(palookuplight[0]);
light[1] = calc_light_multiplier(palookuplight[1]);
light[2] = calc_light_multiplier(palookuplight[2]);
light[3] = calc_light_multiplier(palookuplight[3]);
do
{
for (int i = 0; i < 4; ++i)
@ -2062,8 +2035,6 @@ fixed_t tmvline1_addclamp_C ()
DWORD *fg2rgb = dc_srcblend;
DWORD *bg2rgb = dc_destblend;
uint32_t light = calc_light_multiplier(dc_light);
do
{
BYTE pix = source[frac>>bits];

View file

@ -32,7 +32,20 @@ extern "C" int fuzzpos;
extern "C" int fuzzviewheight;
struct FColormap;
struct ShadeConstants;
struct ShadeConstants
{
uint16_t light_alpha;
uint16_t light_red;
uint16_t light_green;
uint16_t light_blue;
uint16_t fade_alpha;
uint16_t fade_red;
uint16_t fade_green;
uint16_t fade_blue;
uint16_t desaturate;
bool simple_shade;
};
extern "C" int ylookup[MAXHEIGHT];
@ -58,7 +71,7 @@ extern "C" fixed_t dc_destalpha;
// first pixel in a column
extern "C" const BYTE* dc_source;
extern "C" BYTE* dc_dest, *dc_destorg;
extern "C" BYTE *dc_dest, *dc_destorg;
extern "C" int dc_count;
extern "C" DWORD vplce[4];

View file

@ -24,6 +24,7 @@
#define __R_DRAW_RGBA__
#include "r_draw.h"
#include "v_palette.h"
#include <vector>
#include <memory>
#include <thread>
@ -273,4 +274,216 @@ public:
void Execute(DrawerThread *thread) override;
};
/////////////////////////////////////////////////////////////////////////////
// Pixel shading macros and inline functions:
// Give the compiler a strong hint we want these functions inlined:
#ifndef FORCEINLINE
#if defined(_MSC_VER)
#define FORCEINLINE __forceinline
#elif defined(__GNUC__)
#define FORCEINLINE __attribute__((always_inline)) inline
#else
#define FORCEINLINE inline
#endif
#endif
// calculates the light constant passed to the shade_pal_index function
FORCEINLINE uint32_t calc_light_multiplier(dsfixed_t light)
{
return 256 - (light >> (FRACBITS - 8));
}
// Calculates a ARGB8 color for the given palette index and light multiplier
FORCEINLINE uint32_t shade_pal_index_simple(uint32_t index, uint32_t light)
{
const PalEntry &color = GPalette.BaseColors[index];
uint32_t red = color.r;
uint32_t green = color.g;
uint32_t blue = color.b;
red = red * light / 256;
green = green * light / 256;
blue = blue * light / 256;
return 0xff000000 | (red << 16) | (green << 8) | blue;
}
FORCEINLINE uint32_t shade_bgra_simple(uint32_t color, uint32_t light)
{
uint32_t red = (color >> 16) & 0xff;
uint32_t green = (color >> 8) & 0xff;
uint32_t blue = color & 0xff;
red = red * light / 256;
green = green * light / 256;
blue = blue * light / 256;
return 0xff000000 | (red << 16) | (green << 8) | blue;
}
// Calculates a ARGB8 color for the given palette index, light multiplier and dynamic colormap
FORCEINLINE uint32_t shade_pal_index(uint32_t index, uint32_t light, const ShadeConstants &constants)
{
const PalEntry &color = GPalette.BaseColors[index];
uint32_t red = color.r;
uint32_t green = color.g;
uint32_t blue = color.b;
if (constants.simple_shade)
{
red = red * light / 256;
green = green * light / 256;
blue = blue * light / 256;
}
else
{
uint32_t inv_light = 256 - light;
uint32_t inv_desaturate = 256 - constants.desaturate;
uint32_t intensity = ((red * 77 + green * 143 + blue * 37) >> 8) * constants.desaturate;
red = (red * inv_desaturate + intensity) / 256;
green = (green * inv_desaturate + intensity) / 256;
blue = (blue * inv_desaturate + intensity) / 256;
red = (constants.fade_red * inv_light + red * light) / 256;
green = (constants.fade_green * inv_light + green * light) / 256;
blue = (constants.fade_blue * inv_light + blue * light) / 256;
red = (red * constants.light_red) / 256;
green = (green * constants.light_green) / 256;
blue = (blue * constants.light_blue) / 256;
}
return 0xff000000 | (red << 16) | (green << 8) | blue;
}
FORCEINLINE uint32_t shade_bgra(uint32_t color, uint32_t light, const ShadeConstants &constants)
{
uint32_t red = (color >> 16) & 0xff;
uint32_t green = (color >> 8) & 0xff;
uint32_t blue = color & 0xff;
if (constants.simple_shade)
{
red = red * light / 256;
green = green * light / 256;
blue = blue * light / 256;
}
else
{
uint32_t inv_light = 256 - light;
uint32_t inv_desaturate = 256 - constants.desaturate;
uint32_t intensity = ((red * 77 + green * 143 + blue * 37) >> 8) * constants.desaturate;
red = (red * inv_desaturate + intensity) / 256;
green = (green * inv_desaturate + intensity) / 256;
blue = (blue * inv_desaturate + intensity) / 256;
red = (constants.fade_red * inv_light + red * light) / 256;
green = (constants.fade_green * inv_light + green * light) / 256;
blue = (constants.fade_blue * inv_light + blue * light) / 256;
red = (red * constants.light_red) / 256;
green = (green * constants.light_green) / 256;
blue = (blue * constants.light_blue) / 256;
}
return 0xff000000 | (red << 16) | (green << 8) | blue;
}
FORCEINLINE uint32_t alpha_blend(uint32_t fg, uint32_t bg)
{
uint32_t fg_alpha = (fg >> 24) & 0xff;
uint32_t fg_red = (fg >> 16) & 0xff;
uint32_t fg_green = (fg >> 8) & 0xff;
uint32_t fg_blue = fg & 0xff;
uint32_t alpha = fg_alpha + (fg_alpha >> 7); // 255 -> 256
uint32_t inv_alpha = 256 - alpha;
uint32_t bg_red = (bg >> 16) & 0xff;
uint32_t bg_green = (bg >> 8) & 0xff;
uint32_t bg_blue = bg & 0xff;
uint32_t red = ((fg_red * alpha) + (bg_red * inv_alpha)) / 256;
uint32_t green = ((fg_green * alpha) + (bg_green * inv_alpha)) / 256;
uint32_t blue = ((fg_blue * alpha) + (bg_blue * inv_alpha)) / 256;
return 0xff000000 | (red << 16) | (green << 8) | blue;
}
// Calculate constants for a simple shade
#define SSE_SHADE_SIMPLE_INIT(light) \
__m128i mlight_hi = _mm_set_epi16(256, light, light, light, 256, light, light, light); \
__m128i mlight_lo = mlight_hi;
// Calculate constants for a simple shade with different light levels for each pixel
#define SSE_SHADE_SIMPLE_INIT4(light3, light2, light1, light0) \
__m128i mlight_hi = _mm_set_epi16(256, light1, light1, light1, 256, light0, light0, light0); \
__m128i mlight_lo = _mm_set_epi16(256, light3, light3, light3, 256, light2, light2, light2);
// Simple shade 4 pixels
#define SSE_SHADE_SIMPLE(fg) { \
__m128i fg_hi = _mm_unpackhi_epi8(fg, _mm_setzero_si128()); \
__m128i fg_lo = _mm_unpacklo_epi8(fg, _mm_setzero_si128()); \
fg_hi = _mm_mullo_epi16(fg_hi, mlight_hi); \
fg_hi = _mm_srli_epi16(fg_hi, 8); \
fg_lo = _mm_mullo_epi16(fg_lo, mlight_lo); \
fg_lo = _mm_srli_epi16(fg_lo, 8); \
fg = _mm_packus_epi16(fg_lo, fg_hi); \
}
// Calculate constants for a complex shade
#define SSE_SHADE_INIT(light, shade_constants) \
__m128i mlight_hi = _mm_set_epi16(256, light, light, light, 256, light, light, light); \
__m128i mlight_lo = mlight_hi; \
__m128i color = _mm_set_epi16( \
shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue, \
shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue); \
__m128i fade = _mm_set_epi16( \
shade_constants.fade_alpha, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue, \
shade_constants.fade_alpha, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue); \
__m128i fade_amount_hi = _mm_mullo_epi16(fade, _mm_subs_epu16(_mm_set1_epi16(256), mlight_hi)); \
__m128i fade_amount_lo = fade_amount_hi; \
__m128i inv_desaturate = _mm_set1_epi16(256 - shade_constants.desaturate); \
// Calculate constants for a complex shade with different light levels for each pixel
#define SSE_SHADE_INIT4(light3, light2, light1, light0, shade_constants) \
__m128i mlight_hi = _mm_set_epi16(256, light1, light1, light1, 256, light0, light0, light0); \
__m128i mlight_lo = _mm_set_epi16(256, light3, light3, light3, 256, light2, light2, light2); \
__m128i color = _mm_set_epi16( \
shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue, \
shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue); \
__m128i fade = _mm_set_epi16( \
shade_constants.fade_alpha, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue, \
shade_constants.fade_alpha, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue); \
__m128i fade_amount_hi = _mm_mullo_epi16(fade, _mm_subs_epu16(_mm_set1_epi16(256), mlight_hi)); \
__m128i fade_amount_lo = _mm_mullo_epi16(fade, _mm_subs_epu16(_mm_set1_epi16(256), mlight_lo)); \
__m128i inv_desaturate = _mm_set1_epi16(256 - shade_constants.desaturate); \
// Complex shade 4 pixels
#define SSE_SHADE(fg, shade_constants) { \
__m128i fg_hi = _mm_unpackhi_epi8(fg, _mm_setzero_si128()); \
__m128i fg_lo = _mm_unpacklo_epi8(fg, _mm_setzero_si128()); \
\
__m128i intensity_hi = _mm_mullo_epi16(fg_hi, _mm_set_epi16(0, 77, 143, 37, 0, 77, 143, 37)); \
uint16_t intensity_hi0 = ((_mm_extract_epi16(intensity_hi, 2) + _mm_extract_epi16(intensity_hi, 1) + _mm_extract_epi16(intensity_hi, 0)) >> 8) * shade_constants.desaturate; \
uint16_t intensity_hi1 = ((_mm_extract_epi16(intensity_hi, 6) + _mm_extract_epi16(intensity_hi, 5) + _mm_extract_epi16(intensity_hi, 4)) >> 8) * shade_constants.desaturate; \
intensity_hi = _mm_set_epi16(intensity_hi1, intensity_hi1, intensity_hi1, intensity_hi1, intensity_hi0, intensity_hi0, intensity_hi0, intensity_hi0); \
\
fg_hi = _mm_srli_epi16(_mm_adds_epu16(_mm_mullo_epi16(fg_hi, inv_desaturate), intensity_hi), 8); \
fg_hi = _mm_srli_epi16(_mm_adds_epu16(_mm_mullo_epi16(fg_hi, mlight_hi), fade_amount_hi), 8); \
fg_hi = _mm_srli_epi16(_mm_mullo_epi16(fg_hi, color), 8); \
\
__m128i intensity_lo = _mm_mullo_epi16(fg_lo, _mm_set_epi16(0, 77, 143, 37, 0, 77, 143, 37)); \
uint16_t intensity_lo0 = ((_mm_extract_epi16(intensity_lo, 2) + _mm_extract_epi16(intensity_lo, 1) + _mm_extract_epi16(intensity_lo, 0)) >> 8) * shade_constants.desaturate; \
uint16_t intensity_lo1 = ((_mm_extract_epi16(intensity_lo, 6) + _mm_extract_epi16(intensity_lo, 5) + _mm_extract_epi16(intensity_lo, 4)) >> 8) * shade_constants.desaturate; \
intensity_lo = _mm_set_epi16(intensity_lo1, intensity_lo1, intensity_lo1, intensity_lo1, intensity_lo0, intensity_lo0, intensity_lo0, intensity_lo0); \
\
fg_lo = _mm_srli_epi16(_mm_adds_epu16(_mm_mullo_epi16(fg_lo, inv_desaturate), intensity_lo), 8); \
fg_lo = _mm_srli_epi16(_mm_adds_epu16(_mm_mullo_epi16(fg_lo, mlight_lo), fade_amount_lo), 8); \
fg_lo = _mm_srli_epi16(_mm_mullo_epi16(fg_lo, color), 8); \
\
fg = _mm_packus_epi16(fg_lo, fg_hi); \
}
#endif

View file

@ -340,13 +340,13 @@ void rt_add1col_c (int hx, int sx, int yl, int yh)
return;
count++;
DWORD *fg2rgb = dc_srcblend;
DWORD *bg2rgb = dc_destblend;
dest = ylookup[yl] + sx + dc_destorg;
source = &dc_temp[yl*4 + hx];
pitch = dc_pitch;
colormap = dc_colormap;
DWORD *fg2rgb = dc_srcblend;
DWORD *bg2rgb = dc_destblend;
do {
DWORD fg = colormap[*source];
DWORD bg = *dest;
@ -374,14 +374,13 @@ void rt_add4cols_c (int sx, int yl, int yh)
return;
count++;
DWORD *fg2rgb = dc_srcblend;
DWORD *bg2rgb = dc_destblend;
dest = ylookup[yl] + sx + dc_destorg;
source = &dc_temp[yl*4];
pitch = dc_pitch;
colormap = dc_colormap;
DWORD *fg2rgb = dc_srcblend;
DWORD *bg2rgb = dc_destblend;
do {
DWORD fg = colormap[source[0]];
DWORD bg = dest[0];
@ -434,6 +433,7 @@ void rt_tlateadd4cols_c (int sx, int yl, int yh)
// Shades one span at hx to the screen at sx.
void rt_shaded1col_c (int hx, int sx, int yl, int yh)
{
DWORD *fgstart;
BYTE *colormap;
BYTE *source;
BYTE *dest;
@ -445,14 +445,12 @@ void rt_shaded1col_c (int hx, int sx, int yl, int yh)
return;
count++;
fgstart = &Col2RGB8[0][dc_color];
colormap = dc_colormap;
dest = ylookup[yl] + sx + dc_destorg;
source = &dc_temp[yl*4 + hx];
pitch = dc_pitch;
DWORD *fgstart;
fgstart = &Col2RGB8[0][dc_color];
do {
DWORD val = colormap[*source];
DWORD fg = fgstart[val<<8];
@ -466,6 +464,7 @@ void rt_shaded1col_c (int hx, int sx, int yl, int yh)
// Shades all four spans to the screen starting at sx.
void rt_shaded4cols_c (int sx, int yl, int yh)
{
DWORD *fgstart;
BYTE *colormap;
BYTE *source;
BYTE *dest;
@ -477,14 +476,12 @@ void rt_shaded4cols_c (int sx, int yl, int yh)
return;
count++;
fgstart = &Col2RGB8[0][dc_color];
colormap = dc_colormap;
dest = ylookup[yl] + sx + dc_destorg;
source = &dc_temp[yl*4];
pitch = dc_pitch;
DWORD *fgstart;
fgstart = &Col2RGB8[0][dc_color];
do {
DWORD val;
@ -523,14 +520,13 @@ void rt_addclamp1col_c (int hx, int sx, int yl, int yh)
return;
count++;
DWORD *fg2rgb = dc_srcblend;
DWORD *bg2rgb = dc_destblend;
dest = ylookup[yl] + sx + dc_destorg;
source = &dc_temp[yl*4 + hx];
pitch = dc_pitch;
colormap = dc_colormap;
DWORD *fg2rgb = dc_srcblend;
DWORD *bg2rgb = dc_destblend;
do {
DWORD a = fg2rgb[colormap[*source]] + bg2rgb[*dest];
DWORD b = a;
@ -639,13 +635,13 @@ void rt_subclamp1col_c (int hx, int sx, int yl, int yh)
return;
count++;
DWORD *fg2rgb = dc_srcblend;
DWORD *bg2rgb = dc_destblend;
dest = ylookup[yl] + sx + dc_destorg;
source = &dc_temp[yl*4 + hx];
pitch = dc_pitch;
colormap = dc_colormap;
DWORD *fg2rgb = dc_srcblend;
DWORD *bg2rgb = dc_destblend;
do {
DWORD a = (fg2rgb[colormap[*source]] | 0x40100400) - bg2rgb[*dest];
DWORD b = a;
@ -674,13 +670,13 @@ void rt_subclamp4cols_c (int sx, int yl, int yh)
return;
count++;
DWORD *fg2rgb = dc_srcblend;
DWORD *bg2rgb = dc_destblend;
dest = ylookup[yl] + sx + dc_destorg;
source = &dc_temp[yl*4];
pitch = dc_pitch;
colormap = dc_colormap;
DWORD *fg2rgb = dc_srcblend;
DWORD *bg2rgb = dc_destblend;
do {
DWORD a = (fg2rgb[colormap[source[0]]] | 0x40100400) - bg2rgb[dest[0]];
DWORD b = a;

View file

@ -90,229 +90,6 @@ extern bool r_dontmaplines;
// Converts fixedlightlev into a shade value
#define FIXEDLIGHT2SHADE(lightlev) (((lightlev) >> COLORMAPSHIFT) << FRACBITS)
struct ShadeConstants
{
uint16_t light_alpha;
uint16_t light_red;
uint16_t light_green;
uint16_t light_blue;
uint16_t fade_alpha;
uint16_t fade_red;
uint16_t fade_green;
uint16_t fade_blue;
uint16_t desaturate;
bool simple_shade;
};
// calculates the light constant passed to the shade_pal_index function
inline uint32_t calc_light_multiplier(dsfixed_t light)
{
return 256 - (light >> (FRACBITS - 8));
}
// Give the compiler a strong hint we want these functions inlined:
#ifndef FORCEINLINE
#if defined(_MSC_VER)
#define FORCEINLINE __forceinline
#elif defined(__GNUC__)
#define FORCEINLINE __attribute__((always_inline)) inline
#else
#define FORCEINLINE inline
#endif
#endif
// Calculates a ARGB8 color for the given palette index and light multiplier
FORCEINLINE uint32_t shade_pal_index_simple(uint32_t index, uint32_t light)
{
const PalEntry &color = GPalette.BaseColors[index];
uint32_t red = color.r;
uint32_t green = color.g;
uint32_t blue = color.b;
red = red * light / 256;
green = green * light / 256;
blue = blue * light / 256;
return 0xff000000 | (red << 16) | (green << 8) | blue;
}
FORCEINLINE uint32_t shade_bgra_simple(uint32_t color, uint32_t light)
{
uint32_t red = (color >> 16) & 0xff;
uint32_t green = (color >> 8) & 0xff;
uint32_t blue = color & 0xff;
red = red * light / 256;
green = green * light / 256;
blue = blue * light / 256;
return 0xff000000 | (red << 16) | (green << 8) | blue;
}
// Calculates a ARGB8 color for the given palette index, light multiplier and dynamic colormap
FORCEINLINE uint32_t shade_pal_index(uint32_t index, uint32_t light, const ShadeConstants &constants)
{
const PalEntry &color = GPalette.BaseColors[index];
uint32_t red = color.r;
uint32_t green = color.g;
uint32_t blue = color.b;
if (constants.simple_shade)
{
red = red * light / 256;
green = green * light / 256;
blue = blue * light / 256;
}
else
{
uint32_t inv_light = 256 - light;
uint32_t inv_desaturate = 256 - constants.desaturate;
uint32_t intensity = ((red * 77 + green * 143 + blue * 37) >> 8) * constants.desaturate;
red = (red * inv_desaturate + intensity) / 256;
green = (green * inv_desaturate + intensity) / 256;
blue = (blue * inv_desaturate + intensity) / 256;
red = (constants.fade_red * inv_light + red * light) / 256;
green = (constants.fade_green * inv_light + green * light) / 256;
blue = (constants.fade_blue * inv_light + blue * light) / 256;
red = (red * constants.light_red) / 256;
green = (green * constants.light_green) / 256;
blue = (blue * constants.light_blue) / 256;
}
return 0xff000000 | (red << 16) | (green << 8) | blue;
}
FORCEINLINE uint32_t shade_bgra(uint32_t color, uint32_t light, const ShadeConstants &constants)
{
uint32_t red = (color >> 16) & 0xff;
uint32_t green = (color >> 8) & 0xff;
uint32_t blue = color & 0xff;
if (constants.simple_shade)
{
red = red * light / 256;
green = green * light / 256;
blue = blue * light / 256;
}
else
{
uint32_t inv_light = 256 - light;
uint32_t inv_desaturate = 256 - constants.desaturate;
uint32_t intensity = ((red * 77 + green * 143 + blue * 37) >> 8) * constants.desaturate;
red = (red * inv_desaturate + intensity) / 256;
green = (green * inv_desaturate + intensity) / 256;
blue = (blue * inv_desaturate + intensity) / 256;
red = (constants.fade_red * inv_light + red * light) / 256;
green = (constants.fade_green * inv_light + green * light) / 256;
blue = (constants.fade_blue * inv_light + blue * light) / 256;
red = (red * constants.light_red) / 256;
green = (green * constants.light_green) / 256;
blue = (blue * constants.light_blue) / 256;
}
return 0xff000000 | (red << 16) | (green << 8) | blue;
}
FORCEINLINE uint32_t alpha_blend(uint32_t fg, uint32_t bg)
{
uint32_t fg_alpha = (fg >> 24) & 0xff;
uint32_t fg_red = (fg >> 16) & 0xff;
uint32_t fg_green = (fg >> 8) & 0xff;
uint32_t fg_blue = fg & 0xff;
uint32_t alpha = fg_alpha + (fg_alpha >> 7); // 255 -> 256
uint32_t inv_alpha = 256 - alpha;
uint32_t bg_red = (bg >> 16) & 0xff;
uint32_t bg_green = (bg >> 8) & 0xff;
uint32_t bg_blue = bg & 0xff;
uint32_t red = ((fg_red * alpha) + (bg_red * inv_alpha)) / 256;
uint32_t green = ((fg_green * alpha) + (bg_green * inv_alpha)) / 256;
uint32_t blue = ((fg_blue * alpha) + (bg_blue * inv_alpha)) / 256;
return 0xff000000 | (red << 16) | (green << 8) | blue;
}
// Calculate constants for a simple shade
#define SSE_SHADE_SIMPLE_INIT(light) \
__m128i mlight_hi = _mm_set_epi16(256, light, light, light, 256, light, light, light); \
__m128i mlight_lo = mlight_hi;
// Calculate constants for a simple shade with different light levels for each pixel
#define SSE_SHADE_SIMPLE_INIT4(light3, light2, light1, light0) \
__m128i mlight_hi = _mm_set_epi16(256, light1, light1, light1, 256, light0, light0, light0); \
__m128i mlight_lo = _mm_set_epi16(256, light3, light3, light3, 256, light2, light2, light2);
// Simple shade 4 pixels
#define SSE_SHADE_SIMPLE(fg) { \
__m128i fg_hi = _mm_unpackhi_epi8(fg, _mm_setzero_si128()); \
__m128i fg_lo = _mm_unpacklo_epi8(fg, _mm_setzero_si128()); \
fg_hi = _mm_mullo_epi16(fg_hi, mlight_hi); \
fg_hi = _mm_srli_epi16(fg_hi, 8); \
fg_lo = _mm_mullo_epi16(fg_lo, mlight_lo); \
fg_lo = _mm_srli_epi16(fg_lo, 8); \
fg = _mm_packus_epi16(fg_lo, fg_hi); \
}
// Calculate constants for a complex shade
#define SSE_SHADE_INIT(light, shade_constants) \
__m128i mlight_hi = _mm_set_epi16(256, light, light, light, 256, light, light, light); \
__m128i mlight_lo = mlight_hi; \
__m128i color = _mm_set_epi16( \
shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue, \
shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue); \
__m128i fade = _mm_set_epi16( \
shade_constants.fade_alpha, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue, \
shade_constants.fade_alpha, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue); \
__m128i fade_amount_hi = _mm_mullo_epi16(fade, _mm_subs_epu16(_mm_set1_epi16(256), mlight_hi)); \
__m128i fade_amount_lo = fade_amount_hi; \
__m128i inv_desaturate = _mm_set1_epi16(256 - shade_constants.desaturate); \
// Calculate constants for a complex shade with different light levels for each pixel
#define SSE_SHADE_INIT4(light3, light2, light1, light0, shade_constants) \
__m128i mlight_hi = _mm_set_epi16(256, light1, light1, light1, 256, light0, light0, light0); \
__m128i mlight_lo = _mm_set_epi16(256, light3, light3, light3, 256, light2, light2, light2); \
__m128i color = _mm_set_epi16( \
shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue, \
shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue); \
__m128i fade = _mm_set_epi16( \
shade_constants.fade_alpha, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue, \
shade_constants.fade_alpha, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue); \
__m128i fade_amount_hi = _mm_mullo_epi16(fade, _mm_subs_epu16(_mm_set1_epi16(256), mlight_hi)); \
__m128i fade_amount_lo = _mm_mullo_epi16(fade, _mm_subs_epu16(_mm_set1_epi16(256), mlight_lo)); \
__m128i inv_desaturate = _mm_set1_epi16(256 - shade_constants.desaturate); \
// Complex shade 4 pixels
#define SSE_SHADE(fg, shade_constants) { \
__m128i fg_hi = _mm_unpackhi_epi8(fg, _mm_setzero_si128()); \
__m128i fg_lo = _mm_unpacklo_epi8(fg, _mm_setzero_si128()); \
\
__m128i intensity_hi = _mm_mullo_epi16(fg_hi, _mm_set_epi16(0, 77, 143, 37, 0, 77, 143, 37)); \
uint16_t intensity_hi0 = ((_mm_extract_epi16(intensity_hi, 2) + _mm_extract_epi16(intensity_hi, 1) + _mm_extract_epi16(intensity_hi, 0)) >> 8) * shade_constants.desaturate; \
uint16_t intensity_hi1 = ((_mm_extract_epi16(intensity_hi, 6) + _mm_extract_epi16(intensity_hi, 5) + _mm_extract_epi16(intensity_hi, 4)) >> 8) * shade_constants.desaturate; \
intensity_hi = _mm_set_epi16(intensity_hi1, intensity_hi1, intensity_hi1, intensity_hi1, intensity_hi0, intensity_hi0, intensity_hi0, intensity_hi0); \
\
fg_hi = _mm_srli_epi16(_mm_adds_epu16(_mm_mullo_epi16(fg_hi, inv_desaturate), intensity_hi), 8); \
fg_hi = _mm_srli_epi16(_mm_adds_epu16(_mm_mullo_epi16(fg_hi, mlight_hi), fade_amount_hi), 8); \
fg_hi = _mm_srli_epi16(_mm_mullo_epi16(fg_hi, color), 8); \
\
__m128i intensity_lo = _mm_mullo_epi16(fg_lo, _mm_set_epi16(0, 77, 143, 37, 0, 77, 143, 37)); \
uint16_t intensity_lo0 = ((_mm_extract_epi16(intensity_lo, 2) + _mm_extract_epi16(intensity_lo, 1) + _mm_extract_epi16(intensity_lo, 0)) >> 8) * shade_constants.desaturate; \
uint16_t intensity_lo1 = ((_mm_extract_epi16(intensity_lo, 6) + _mm_extract_epi16(intensity_lo, 5) + _mm_extract_epi16(intensity_lo, 4)) >> 8) * shade_constants.desaturate; \
intensity_lo = _mm_set_epi16(intensity_lo1, intensity_lo1, intensity_lo1, intensity_lo1, intensity_lo0, intensity_lo0, intensity_lo0, intensity_lo0); \
\
fg_lo = _mm_srli_epi16(_mm_adds_epu16(_mm_mullo_epi16(fg_lo, inv_desaturate), intensity_lo), 8); \
fg_lo = _mm_srli_epi16(_mm_adds_epu16(_mm_mullo_epi16(fg_lo, mlight_lo), fade_amount_lo), 8); \
fg_lo = _mm_srli_epi16(_mm_mullo_epi16(fg_lo, color), 8); \
\
fg = _mm_packus_epi16(fg_lo, fg_hi); \
}
extern bool r_swtruecolor;
extern double GlobVis;

View file

@ -58,6 +58,7 @@
#include "r_3dfloors.h"
#include "v_palette.h"
#include "r_data/colormaps.h"
#include "r_draw_rgba.h"
#ifdef _MSC_VER
#pragma warning(disable:4244)
@ -506,7 +507,7 @@ void R_MapTiltedPlane_rgba (int y, int x1)
void R_MapColoredPlane_C (int y, int x1)
{
memset (ylookup[y] + x1 + dc_destorg, ds_color, (spanend[y] - x1 + 1));
memset (ylookup[y] + x1 + dc_destorg, ds_color, spanend[y] - x1 + 1);
}
void R_MapColoredPlane_rgba(int y, int x1)
@ -1710,7 +1711,7 @@ void R_DrawNormalPlane (visplane_t *pl, double _xscale, double _yscale, fixed_t
//
//==========================================================================
void R_DrawTiltedPlane(visplane_t *pl, double _xscale, double _yscale, fixed_t alpha, bool additive, bool masked)
void R_DrawTiltedPlane (visplane_t *pl, double _xscale, double _yscale, fixed_t alpha, bool additive, bool masked)
{
static const float ifloatpow2[16] =
{
@ -1745,7 +1746,7 @@ void R_DrawTiltedPlane(visplane_t *pl, double _xscale, double _yscale, fixed_t a
// p is the texture origin in view space
// Don't add in the offsets at this stage, because doing so can result in
// errors if the flat is rotated.
ang = M_PI * 3 / 2 - ViewAngle.Radians();
ang = M_PI*3/2 - ViewAngle.Radians();
cosine = cos(ang), sine = sin(ang);
p[0] = ViewPos.X * cosine - ViewPos.Y * sine;
p[2] = ViewPos.X * sine + ViewPos.Y * cosine;
@ -1756,21 +1757,21 @@ void R_DrawTiltedPlane(visplane_t *pl, double _xscale, double _yscale, fixed_t a
cosine = cos(ang), sine = sin(ang);
m[0] = yscale * cosine;
m[2] = yscale * sine;
// m[1] = pl->height.ZatPointF (0, iyscale) - pl->height.ZatPointF (0,0));
// VectorScale2 (m, 64.f/VectorLength(m));
// m[1] = pl->height.ZatPointF (0, iyscale) - pl->height.ZatPointF (0,0));
// VectorScale2 (m, 64.f/VectorLength(m));
// n is the u direction vector in view space
#if 0
//let's use the sin/cosine we already know instead of computing new ones
ang += M_PI / 2
ang += M_PI/2
n[0] = -xscale * cos(ang);
n[2] = -xscale * sin(ang);
#else
n[0] = xscale * sine;
n[2] = -xscale * cosine;
#endif
// n[1] = pl->height.ZatPointF (ixscale, 0) - pl->height.ZatPointF (0,0));
// VectorScale2 (n, 64.f/VectorLength(n));
// n[1] = pl->height.ZatPointF (ixscale, 0) - pl->height.ZatPointF (0,0));
// VectorScale2 (n, 64.f/VectorLength(n));
// This code keeps the texture coordinates constant across the x,y plane no matter
// how much you slope the surface. Use the commented-out code above instead to keep
@ -1807,7 +1808,6 @@ void R_DrawTiltedPlane(visplane_t *pl, double _xscale, double _yscale, fixed_t a
if (pl->height.fC() > 0)
planelightfloat = -planelightfloat;
ds_light = 0;
if (fixedlightlev >= 0)
{
R_SetDSColorMapLight(basecolormap, 0, FIXEDLIGHT2SHADE(fixedlightlev));

View file

@ -96,7 +96,7 @@ void FSoftwareRenderer::PrecacheTexture(FTexture *tex, int cache)
else if (cache != 0)
{
if (r_swtruecolor)
tex->GetPixels();
tex->GetPixelsBgra();
else
tex->GetPixels ();
}

View file

@ -2612,8 +2612,10 @@ static void R_DrawMaskedSegsBehindParticle (const vissprite_t *vis)
void R_DrawParticle_C (vissprite_t *vis)
{
DWORD *bg2rgb;
int spacing;
BYTE *dest;
DWORD fg;
BYTE color = vis->Style.BaseColormap->Maps[(vis->Style.ColormapNum << COLORMAPSHIFT) + vis->startfrac];
int yl = vis->y1;
int ycount = vis->y2 - yl + 1;
@ -2622,9 +2624,6 @@ void R_DrawParticle_C (vissprite_t *vis)
R_DrawMaskedSegsBehindParticle (vis);
DWORD *bg2rgb;
DWORD fg;
// vis->renderflags holds translucency level (0-255)
{
fixed_t fglevel, bglevel;

View file

@ -44,6 +44,7 @@
#include "r_utility.h"
#ifndef NO_SWRENDER
#include "r_draw.h"
#include "r_draw_rgba.h"
#include "r_main.h"
#include "r_things.h"
#endif