# Conflicts:
#	src/v_video.cpp
This commit is contained in:
raa-eruanna 2016-09-08 03:19:08 -04:00
commit 32f758de41
60 changed files with 8525 additions and 1045 deletions

View file

@ -919,7 +919,9 @@ set( FASTMATH_PCH_SOURCES
r_3dfloors.cpp
r_bsp.cpp
r_draw.cpp
r_draw_rgba.cpp
r_drawt.cpp
r_drawt_rgba.cpp
r_main.cpp
r_plane.cpp
r_segs.cpp

View file

@ -99,6 +99,11 @@ typedef TMap<int, PClassActor *> FClassMap;
#endif
// Only use SSE intrinsics on Intel architecture
#if !defined(_M_IX86) && !defined(__i386__) && !defined(_M_X64) && !defined(__amd64__)
#define NO_SSE
#endif
#if defined(_MSC_VER)
#define NOVTABLE __declspec(novtable)

View file

@ -382,6 +382,9 @@ static bool (*wipes[])(int) =
// Returns true if the wipe should be performed.
bool wipe_StartScreen (int type)
{
if (screen->IsBgra())
return false;
CurrentWipeType = clamp(type, 0, wipe_NUMWIPES - 1);
if (CurrentWipeType)
@ -395,11 +398,15 @@ bool wipe_StartScreen (int type)
void wipe_EndScreen (void)
{
if (screen->IsBgra())
return;
if (CurrentWipeType)
{
wipe_scr_end = new short[SCREENWIDTH * SCREENHEIGHT / 2];
screen->GetBlock (0, 0, SCREENWIDTH, SCREENHEIGHT, (BYTE *)wipe_scr_end);
screen->DrawBlock (0, 0, SCREENWIDTH, SCREENHEIGHT, (BYTE *)wipe_scr_start); // restore start scr.
// Initialize the wipe
(*wipes[(CurrentWipeType-1)*3])(0);
}
@ -410,6 +417,9 @@ bool wipe_ScreenWipe (int ticks)
{
bool rc;
if (screen->IsBgra())
return true;
if (CurrentWipeType == wipe_None)
return true;
@ -423,6 +433,9 @@ bool wipe_ScreenWipe (int ticks)
// Final things for the wipe
void wipe_Cleanup()
{
if (screen->IsBgra())
return;
if (wipe_scr_start != NULL)
{
delete[] wipe_scr_start;

View file

@ -1312,7 +1312,7 @@ void G_InitLevelLocals ()
level_info_t *info;
BaseBlendA = 0.0f; // Remove underwater blend effect, if any
NormalLight.Maps = realcolormaps;
NormalLight.Maps = realcolormaps.Maps;
// [BB] Instead of just setting the color, we also have to reset Desaturate and build the lights.
NormalLight.ChangeColor (PalEntry (255, 255, 255), 0);

View file

@ -737,7 +737,8 @@ int APowerInvisibility::AlterWeaponSprite (visstyle_t *vis)
if ((vis->Alpha < 0.25f && special1 > 0) || (vis->Alpha == 0))
{
vis->Alpha = clamp((1.f - float(Strength/100)), 0.f, 1.f);
vis->colormap = SpecialColormaps[INVERSECOLORMAP].Colormap;
vis->BaseColormap = &SpecialColormaps[INVERSECOLORMAP];
vis->ColormapNum = 0;
}
return -1; // This item is valid so another one shouldn't reset the translucency
}

View file

@ -35,7 +35,6 @@ public:
const BYTE *GetColumn (unsigned int column, const Span **spans_out);
const BYTE *GetPixels ();
bool CheckModified ();
void Unload ();
void SetVial (int level);
@ -90,10 +89,6 @@ bool FHealthBar::CheckModified ()
return NeedRefresh;
}
void FHealthBar::Unload ()
{
}
const BYTE *FHealthBar::GetColumn (unsigned int column, const Span **spans_out)
{
if (NeedRefresh)

View file

@ -78,7 +78,6 @@ public:
const BYTE *GetColumn(unsigned int column, const Span **spans_out);
const BYTE *GetPixels();
void Unload();
bool CheckModified();
protected:
@ -212,10 +211,6 @@ bool FBackdropTexture::CheckModified()
return LastRenderTic != gametic;
}
void FBackdropTexture::Unload()
{
}
//=============================================================================
//
//

View file

@ -96,6 +96,17 @@ EXTERN_CVAR(Bool, ticker )
EXTERN_CVAR(Bool, vid_vsync)
EXTERN_CVAR(Bool, vid_hidpi)
CUSTOM_CVAR(Bool, swtruecolor, false, CVAR_ARCHIVE | CVAR_GLOBALCONFIG | CVAR_NOINITCALL)
{
// Strictly speaking this doesn't require a mode switch, but it is the easiest
// way to force a CreateFramebuffer call without a lot of refactoring.
extern int NewWidth, NewHeight, NewBits, DisplayBits;
NewWidth = screen->GetWidth();
NewHeight = screen->GetHeight();
NewBits = DisplayBits;
setmodeneeded = true;
}
CUSTOM_CVAR(Bool, fullscreen, false, CVAR_ARCHIVE | CVAR_GLOBALCONFIG)
{
extern int NewWidth, NewHeight, NewBits, DisplayBits;
@ -199,7 +210,7 @@ public:
virtual EDisplayType GetDisplayType() { return DISPLAY_Both; }
virtual void SetWindowedScale(float scale);
virtual DFrameBuffer* CreateFrameBuffer(int width, int height, bool fs, DFrameBuffer* old);
virtual DFrameBuffer* CreateFrameBuffer(int width, int height, bool bgra, bool fs, DFrameBuffer* old);
virtual void StartModeIterator(int bits, bool fullscreen);
virtual bool NextMode(int* width, int* height, bool* letterbox);
@ -238,7 +249,7 @@ private:
class CocoaFrameBuffer : public DFrameBuffer
{
public:
CocoaFrameBuffer(int width, int height, bool fullscreen);
CocoaFrameBuffer(int width, int height, bool bgra, bool fullscreen);
~CocoaFrameBuffer();
virtual bool Lock(bool buffer);
@ -518,14 +529,14 @@ bool CocoaVideo::NextMode(int* const width, int* const height, bool* const lette
return false;
}
DFrameBuffer* CocoaVideo::CreateFrameBuffer(const int width, const int height, const bool fullscreen, DFrameBuffer* const old)
DFrameBuffer* CocoaVideo::CreateFrameBuffer(const int width, const int height, const bool bgra, const bool fullscreen, DFrameBuffer* const old)
{
PalEntry flashColor = 0;
int flashAmount = 0;
if (NULL != old)
{
if (width == m_width && height == m_height)
if (width == m_width && height == m_height && bgra == old->IsBgra())
{
SetMode(width, height, fullscreen, vid_hidpi);
return old;
@ -542,7 +553,7 @@ DFrameBuffer* CocoaVideo::CreateFrameBuffer(const int width, const int height, c
delete old;
}
CocoaFrameBuffer* fb = new CocoaFrameBuffer(width, height, fullscreen);
CocoaFrameBuffer* fb = new CocoaFrameBuffer(width, height, bgra, fullscreen);
fb->SetFlash(flashColor, flashAmount);
SetMode(width, height, fullscreen, vid_hidpi);
@ -761,8 +772,8 @@ CocoaVideo* CocoaVideo::GetInstance()
}
CocoaFrameBuffer::CocoaFrameBuffer(int width, int height, bool fullscreen)
: DFrameBuffer(width, height)
CocoaFrameBuffer::CocoaFrameBuffer(int width, int height, bool bgra, bool fullscreen)
: DFrameBuffer(width, height, bgra)
, m_needPaletteUpdate(false)
, m_gamma(0.0f)
, m_needGammaUpdate(false)
@ -856,8 +867,15 @@ void CocoaFrameBuffer::Update()
FlipCycles.Reset();
BlitCycles.Clock();
GPfx.Convert(MemBuffer, Pitch, m_pixelBuffer, Width * BYTES_PER_PIXEL,
Width, Height, FRACUNIT, FRACUNIT, 0, 0);
if (IsBgra())
{
CopyWithGammaBgra(m_pixelBuffer, Width * BYTES_PER_PIXEL, m_gammaTable[0], m_gammaTable[1], m_gammaTable[2], m_flashColor, m_flashAmount);
}
else
{
GPfx.Convert(MemBuffer, Pitch, m_pixelBuffer, Width * BYTES_PER_PIXEL,
Width, Height, FRACUNIT, FRACUNIT, 0, 0);
}
FlipCycles.Clock();
Flip();
@ -989,8 +1007,10 @@ void CocoaFrameBuffer::Flip()
static const GLenum format = GL_ABGR_EXT;
#endif // __LITTLE_ENDIAN__
glTexImage2D(GL_TEXTURE_RECTANGLE_ARB, 0, GL_RGBA8,
Width, Height, 0, format, GL_UNSIGNED_BYTE, m_pixelBuffer);
if (IsBgra())
glTexImage2D(GL_TEXTURE_RECTANGLE_ARB, 0, GL_RGBA8, Width, Height, 0, GL_BGRA_EXT, GL_UNSIGNED_BYTE, m_pixelBuffer);
else
glTexImage2D(GL_TEXTURE_RECTANGLE_ARB, 0, GL_RGBA8, Width, Height, 0, format, GL_UNSIGNED_BYTE, m_pixelBuffer);
glBegin(GL_QUADS);
glColor4f(1.0f, 1.0f, 1.0f, 1.0f);
@ -1064,7 +1084,7 @@ void I_CreateRenderer()
DFrameBuffer* I_SetMode(int &width, int &height, DFrameBuffer* old)
{
return Video->CreateFrameBuffer(width, height, fullscreen, old);
return Video->CreateFrameBuffer(width, height, swtruecolor, fullscreen, old);
}
bool I_CheckResolution(const int width, const int height, const int bits)

View file

@ -74,7 +74,7 @@ class IVideo
virtual EDisplayType GetDisplayType () = 0;
virtual void SetWindowedScale (float scale) = 0;
virtual DFrameBuffer *CreateFrameBuffer (int width, int height, bool fs, DFrameBuffer *old) = 0;
virtual DFrameBuffer *CreateFrameBuffer (int width, int height, bool bgra, bool fs, DFrameBuffer *old) = 0;
virtual void StartModeIterator (int bits, bool fs) = 0;
virtual bool NextMode (int *width, int *height, bool *letterbox) = 0;

View file

@ -51,6 +51,7 @@
EXTERN_CVAR (Bool, ticker)
EXTERN_CVAR (Bool, fullscreen)
EXTERN_CVAR (Bool, swtruecolor)
EXTERN_CVAR (Float, vid_winscale)
IVideo *Video;
@ -128,7 +129,7 @@ DFrameBuffer *I_SetMode (int &width, int &height, DFrameBuffer *old)
fs = fullscreen;
break;
}
DFrameBuffer *res = Video->CreateFrameBuffer (width, height, fs, old);
DFrameBuffer *res = Video->CreateFrameBuffer (width, height, swtruecolor, fs, old);
/* Right now, CreateFrameBuffer cannot return NULL
if (res == NULL)
@ -282,6 +283,16 @@ CUSTOM_CVAR (Int, vid_maxfps, 200, CVAR_ARCHIVE | CVAR_GLOBALCONFIG)
extern int NewWidth, NewHeight, NewBits, DisplayBits;
CUSTOM_CVAR(Bool, swtruecolor, false, CVAR_ARCHIVE|CVAR_GLOBALCONFIG|CVAR_NOINITCALL)
{
// Strictly speaking this doesn't require a mode switch, but it is the easiest
// way to force a CreateFramebuffer call without a lot of refactoring.
NewWidth = screen->GetWidth();
NewHeight = screen->GetHeight();
NewBits = DisplayBits;
setmodeneeded = true;
}
CUSTOM_CVAR (Bool, fullscreen, false, CVAR_ARCHIVE|CVAR_GLOBALCONFIG)
{
NewWidth = screen->GetWidth();

View file

@ -28,7 +28,7 @@ class SDLFB : public DFrameBuffer
{
DECLARE_CLASS(SDLFB, DFrameBuffer)
public:
SDLFB (int width, int height, bool fullscreen, SDL_Window *oldwin);
SDLFB (int width, int height, bool bgra, bool fullscreen, SDL_Window *oldwin);
~SDLFB ();
bool Lock (bool buffer);
@ -257,7 +257,7 @@ bool SDLVideo::NextMode (int *width, int *height, bool *letterbox)
return false;
}
DFrameBuffer *SDLVideo::CreateFrameBuffer (int width, int height, bool fullscreen, DFrameBuffer *old)
DFrameBuffer *SDLVideo::CreateFrameBuffer (int width, int height, bool bgra, bool fullscreen, DFrameBuffer *old)
{
static int retry = 0;
static int owidth, oheight;
@ -271,7 +271,8 @@ DFrameBuffer *SDLVideo::CreateFrameBuffer (int width, int height, bool fullscree
{ // Reuse the old framebuffer if its attributes are the same
SDLFB *fb = static_cast<SDLFB *> (old);
if (fb->Width == width &&
fb->Height == height)
fb->Height == height &&
fb->Bgra == bgra)
{
bool fsnow = (SDL_GetWindowFlags (fb->Screen) & SDL_WINDOW_FULLSCREEN_DESKTOP) != 0;
@ -296,7 +297,7 @@ DFrameBuffer *SDLVideo::CreateFrameBuffer (int width, int height, bool fullscree
flashAmount = 0;
}
SDLFB *fb = new SDLFB (width, height, fullscreen, oldwin);
SDLFB *fb = new SDLFB (width, height, bgra, fullscreen, oldwin);
// If we could not create the framebuffer, try again with slightly
// different parameters in this order:
@ -335,7 +336,7 @@ DFrameBuffer *SDLVideo::CreateFrameBuffer (int width, int height, bool fullscree
}
++retry;
fb = static_cast<SDLFB *>(CreateFrameBuffer (width, height, fullscreen, NULL));
fb = static_cast<SDLFB *>(CreateFrameBuffer (width, height, bgra, fullscreen, NULL));
}
retry = 0;
@ -350,8 +351,8 @@ void SDLVideo::SetWindowedScale (float scale)
// FrameBuffer implementation -----------------------------------------------
SDLFB::SDLFB (int width, int height, bool fullscreen, SDL_Window *oldwin)
: DFrameBuffer (width, height)
SDLFB::SDLFB (int width, int height, bool bgra, bool fullscreen, SDL_Window *oldwin)
: DFrameBuffer (width, height, bgra)
{
int i;
@ -494,7 +495,11 @@ void SDLFB::Update ()
pitch = Surface->pitch;
}
if (NotPaletted)
if (Bgra)
{
CopyWithGammaBgra(pixels, pitch, GammaTable[0], GammaTable[1], GammaTable[2], Flash, FlashAmount);
}
else if (NotPaletted)
{
GPfx.Convert (MemBuffer, Pitch,
pixels, pitch, Width, Height,
@ -674,13 +679,20 @@ void SDLFB::ResetSDLRenderer ()
SDL_SetRenderDrawColor(Renderer, 0, 0, 0, 255);
Uint32 fmt;
switch(vid_displaybits)
if (Bgra)
{
default: fmt = SDL_PIXELFORMAT_ARGB8888; break;
case 30: fmt = SDL_PIXELFORMAT_ARGB2101010; break;
case 24: fmt = SDL_PIXELFORMAT_RGB888; break;
case 16: fmt = SDL_PIXELFORMAT_RGB565; break;
case 15: fmt = SDL_PIXELFORMAT_ARGB1555; break;
fmt = SDL_PIXELFORMAT_ARGB8888;
}
else
{
switch (vid_displaybits)
{
default: fmt = SDL_PIXELFORMAT_ARGB8888; break;
case 30: fmt = SDL_PIXELFORMAT_ARGB2101010; break;
case 24: fmt = SDL_PIXELFORMAT_RGB888; break;
case 16: fmt = SDL_PIXELFORMAT_RGB565; break;
case 15: fmt = SDL_PIXELFORMAT_ARGB1555; break;
}
}
Texture = SDL_CreateTexture (Renderer, fmt, SDL_TEXTUREACCESS_STREAMING, Width, Height);

View file

@ -10,7 +10,7 @@ class SDLVideo : public IVideo
EDisplayType GetDisplayType () { return DISPLAY_Both; }
void SetWindowedScale (float scale);
DFrameBuffer *CreateFrameBuffer (int width, int height, bool fs, DFrameBuffer *old);
DFrameBuffer *CreateFrameBuffer (int width, int height, bool bgra, bool fs, DFrameBuffer *old);
void StartModeIterator (int bits, bool fs);
bool NextMode (int *width, int *height, bool *letterbox);

View file

@ -71,7 +71,7 @@ struct FakeCmap
};
TArray<FakeCmap> fakecmaps;
BYTE *realcolormaps;
FColormap realcolormaps;
size_t numfakecmaps;
@ -408,7 +408,7 @@ void R_SetDefaultColormap (const char *name)
foo.Color = 0xFFFFFF;
foo.Fade = 0;
foo.Maps = realcolormaps;
foo.Maps = realcolormaps.Maps;
foo.Desaturate = 0;
foo.Next = NULL;
foo.BuildLights ();
@ -430,7 +430,7 @@ void R_SetDefaultColormap (const char *name)
remap[0] = 0;
for (i = 0; i < NUMCOLORMAPS; ++i)
{
BYTE *map2 = &realcolormaps[i*256];
BYTE *map2 = &realcolormaps.Maps[i*256];
lumpr.Read (map, 256);
for (j = 0; j < 256; ++j)
{
@ -454,11 +454,7 @@ void R_DeinitColormaps ()
{
SpecialColormaps.Clear();
fakecmaps.Clear();
if (realcolormaps != NULL)
{
delete[] realcolormaps;
realcolormaps = NULL;
}
delete[] realcolormaps.Maps;
FreeSpecialLights();
}
@ -501,7 +497,7 @@ void R_InitColormaps ()
}
}
}
realcolormaps = new BYTE[256*NUMCOLORMAPS*fakecmaps.Size()];
realcolormaps.Maps = new BYTE[256*NUMCOLORMAPS*fakecmaps.Size()];
R_SetDefaultColormap ("COLORMAP");
if (fakecmaps.Size() > 1)
@ -523,7 +519,7 @@ void R_InitColormaps ()
{
int k, r, g, b;
FWadLump lump = Wads.OpenLumpNum (fakecmaps[j].lump);
BYTE *const map = realcolormaps + NUMCOLORMAPS*256*j;
BYTE *const map = realcolormaps.Maps + NUMCOLORMAPS*256*j;
for (k = 0; k < NUMCOLORMAPS; ++k)
{
@ -550,8 +546,8 @@ void R_InitColormaps ()
}
NormalLight.Color = PalEntry (255, 255, 255);
NormalLight.Fade = 0;
NormalLight.Maps = realcolormaps;
NormalLightHasFixedLights = R_CheckForFixedLights(realcolormaps);
NormalLight.Maps = realcolormaps.Maps;
NormalLightHasFixedLights = R_CheckForFixedLights(realcolormaps.Maps);
numfakecmaps = fakecmaps.Size();
// build default special maps (e.g. invulnerability)

View file

@ -1,18 +1,26 @@
#ifndef __RES_CMAP_H
#define __RES_CMAP_H
struct FColormap;
void R_InitColormaps ();
void R_DeinitColormaps ();
DWORD R_ColormapNumForName(const char *name); // killough 4/4/98
void R_SetDefaultColormap (const char *name); // [RH] change normal fadetable
DWORD R_BlendForColormap (DWORD map); // [RH] return calculated blend for a colormap
extern BYTE *realcolormaps; // [RH] make the colormaps externally visible
extern FColormap realcolormaps; // [RH] make the colormaps externally visible
extern size_t numfakecmaps;
struct FColormap
{
BYTE *Maps = nullptr;
PalEntry Color = 0xffffffff;
PalEntry Fade = 0xff000000;
int Desaturate = 0;
};
struct FDynamicColormap
struct FDynamicColormap : FColormap
{
void ChangeFade (PalEntry fadecolor);
void ChangeColor (PalEntry lightcolor, int desaturate);
@ -20,10 +28,6 @@ struct FDynamicColormap
void BuildLights ();
static void RebuildAllLights();
BYTE *Maps;
PalEntry Color;
PalEntry Fade;
int Desaturate;
FDynamicColormap *Next;
};
@ -43,8 +47,13 @@ enum
};
struct FSpecialColormap
struct FSpecialColormap : FColormap
{
FSpecialColormap()
{
Maps = Colormap;
}
float ColorizeStart[3];
float ColorizeEnd[3];
BYTE Colormap[256];

View file

@ -1397,11 +1397,13 @@ struct FMiniBSP
//
typedef BYTE lighttable_t; // This could be wider for >8 bit display.
struct FColormap;
// This encapsulates the fields of vissprite_t that can be altered by AlterWeaponSprite
struct visstyle_t
{
lighttable_t *colormap;
int ColormapNum; // Which colormap is rendered
FColormap *BaseColormap; // Base colormap used together with ColormapNum
float Alpha;
FRenderStyle RenderStyle;
};

View file

@ -38,6 +38,8 @@
#include "r_data/r_translate.h"
#include "v_palette.h"
#include "r_data/colormaps.h"
#include "r_plane.h"
#include "r_draw_rgba.h"
#include "gi.h"
#include "stats.h"
@ -70,6 +72,19 @@ int scaledviewwidth;
// screen depth and asm/no asm.
void (*R_DrawColumnHoriz)(void);
void (*R_DrawColumn)(void);
void (*R_FillColumn)(void);
void (*R_FillAddColumn)(void);
void (*R_FillAddClampColumn)(void);
void (*R_FillSubClampColumn)(void);
void (*R_FillRevSubClampColumn)(void);
void (*R_DrawAddColumn)(void);
void (*R_DrawTlatedAddColumn)(void);
void (*R_DrawAddClampColumn)(void);
void (*R_DrawAddClampTranslatedColumn)(void);
void (*R_DrawSubClampColumn)(void);
void (*R_DrawSubClampTranslatedColumn)(void);
void (*R_DrawRevSubClampColumn)(void);
void (*R_DrawRevSubClampTranslatedColumn)(void);
void (*R_DrawFuzzColumn)(void);
void (*R_DrawTranslatedColumn)(void);
void (*R_DrawShadedColumn)(void);
@ -79,7 +94,48 @@ void (*R_DrawSpanTranslucent)(void);
void (*R_DrawSpanMaskedTranslucent)(void);
void (*R_DrawSpanAddClamp)(void);
void (*R_DrawSpanMaskedAddClamp)(void);
void (*rt_map4cols)(int,int,int);
void (*R_FillSpan)(void);
void (*R_FillColumnHoriz)(void);
void (*R_DrawFogBoundary)(int x1, int x2, short *uclip, short *dclip);
void (*R_MapTiltedPlane)(int y, int x1);
void (*R_MapColoredPlane)(int y, int x1);
void (*R_DrawParticle)(vissprite_t *);
void (*R_SetupDrawSlab)(FColormap *base_colormap, float light, int shade);
void (*R_DrawSlab)(int dx, fixed_t v, int dy, fixed_t vi, const BYTE *vptr, BYTE *p);
fixed_t (*tmvline1_add)();
void (*tmvline4_add)();
fixed_t (*tmvline1_addclamp)();
void (*tmvline4_addclamp)();
fixed_t (*tmvline1_subclamp)();
void (*tmvline4_subclamp)();
fixed_t (*tmvline1_revsubclamp)();
void (*tmvline4_revsubclamp)();
void (*rt_copy1col)(int hx, int sx, int yl, int yh);
void (*rt_copy4cols)(int sx, int yl, int yh);
void (*rt_shaded1col)(int hx, int sx, int yl, int yh);
void (*rt_shaded4cols)(int sx, int yl, int yh);
void (*rt_map1col)(int hx, int sx, int yl, int yh);
void (*rt_add1col)(int hx, int sx, int yl, int yh);
void (*rt_addclamp1col)(int hx, int sx, int yl, int yh);
void (*rt_subclamp1col)(int hx, int sx, int yl, int yh);
void (*rt_revsubclamp1col)(int hx, int sx, int yl, int yh);
void (*rt_tlate1col)(int hx, int sx, int yl, int yh);
void (*rt_tlateadd1col)(int hx, int sx, int yl, int yh);
void (*rt_tlateaddclamp1col)(int hx, int sx, int yl, int yh);
void (*rt_tlatesubclamp1col)(int hx, int sx, int yl, int yh);
void (*rt_tlaterevsubclamp1col)(int hx, int sx, int yl, int yh);
void (*rt_map4cols)(int sx, int yl, int yh);
void (*rt_add4cols)(int sx, int yl, int yh);
void (*rt_addclamp4cols)(int sx, int yl, int yh);
void (*rt_subclamp4cols)(int sx, int yl, int yh);
void (*rt_revsubclamp4cols)(int sx, int yl, int yh);
void (*rt_tlate4cols)(int sx, int yl, int yh);
void (*rt_tlateadd4cols)(int sx, int yl, int yh);
void (*rt_tlateaddclamp4cols)(int sx, int yl, int yh);
void (*rt_tlatesubclamp4cols)(int sx, int yl, int yh);
void (*rt_tlaterevsubclamp4cols)(int sx, int yl, int yh);
void (*rt_initcols)(BYTE *buffer);
void (*rt_span_coverage)(int x, int start, int stop);
//
// R_DrawColumn
@ -90,18 +146,27 @@ extern "C" {
int dc_pitch=0xABadCafe; // [RH] Distance between rows
lighttable_t* dc_colormap;
FColormap *dc_fcolormap;
ShadeConstants dc_shade_constants;
fixed_t dc_light;
int dc_x;
int dc_yl;
int dc_yh;
fixed_t dc_iscale;
fixed_t dc_texturefrac;
uint32_t dc_textureheight;
int dc_color; // [RH] Color for column filler
DWORD dc_srccolor;
uint32_t dc_srccolor_bgra;
DWORD *dc_srcblend; // [RH] Source and destination
DWORD *dc_destblend; // blending lookups
fixed_t dc_srcalpha; // Alpha value used by dc_srcblend
fixed_t dc_destalpha; // Alpha value used by dc_destblend
// first pixel in a column (possibly virtual)
const BYTE* dc_source;
const BYTE* dc_source2;
uint32_t dc_texturefracx;
BYTE* dc_dest;
int dc_count;
@ -109,7 +174,11 @@ int dc_count;
DWORD vplce[4];
DWORD vince[4];
BYTE* palookupoffse[4];
fixed_t palookuplight[4];
const BYTE* bufplce[4];
const BYTE* bufplce2[4];
uint32_t buftexturefracx[4];
uint32_t bufheight[4];
// just for profiling
int dccount;
@ -120,10 +189,10 @@ BYTE *dc_translation;
BYTE shadetables[NUMCOLORMAPS*16*256];
FDynamicColormap ShadeFakeColormap[16];
BYTE identitymap[256];
FDynamicColormap identitycolormap;
EXTERN_CVAR (Int, r_columnmethod)
void R_InitShadeMaps()
{
int i,j;
@ -161,6 +230,10 @@ void R_InitShadeMaps()
{
identitymap[i] = i;
}
identitycolormap.Color = ~0u;
identitycolormap.Desaturate = 0;
identitycolormap.Next = NULL;
identitycolormap.Maps = identitymap;
}
/************************************/
@ -223,7 +296,7 @@ void R_DrawColumnP_C (void)
#endif
// [RH] Just fills a column with a color
void R_FillColumnP (void)
void R_FillColumnP_C (void)
{
int count;
BYTE* dest;
@ -247,7 +320,7 @@ void R_FillColumnP (void)
}
}
void R_FillAddColumn (void)
void R_FillAddColumn_C (void)
{
int count;
BYTE *dest;
@ -271,10 +344,9 @@ void R_FillAddColumn (void)
*dest = RGB32k.All[bg & (bg>>15)];
dest += pitch;
} while (--count);
}
void R_FillAddClampColumn (void)
void R_FillAddClampColumn_C (void)
{
int count;
BYTE *dest;
@ -304,10 +376,9 @@ void R_FillAddClampColumn (void)
*dest = RGB32k.All[a & (a>>15)];
dest += pitch;
} while (--count);
}
void R_FillSubClampColumn (void)
void R_FillSubClampColumn_C (void)
{
int count;
BYTE *dest;
@ -336,10 +407,9 @@ void R_FillSubClampColumn (void)
*dest = RGB32k.All[a & (a>>15)];
dest += pitch;
} while (--count);
}
void R_FillRevSubClampColumn (void)
void R_FillRevSubClampColumn_C (void)
{
int count;
BYTE *dest;
@ -368,13 +438,11 @@ void R_FillRevSubClampColumn (void)
*dest = RGB32k.All[a & (a>>15)];
dest += pitch;
} while (--count);
}
//
// Spectre/Invisibility.
//
#define FUZZTABLE 50
extern "C"
{
@ -647,8 +715,8 @@ void R_DrawTlatedAddColumnP_C (void)
fg = fg2rgb[fg];
bg = bg2rgb[bg];
fg = (fg+bg) | 0x1f07c1f;
*dest = RGB32k.All[fg & (fg>>15)];
fg = (fg + bg) | 0x1f07c1f;
*dest = RGB32k.All[fg & (fg >> 15)];
dest += pitch;
frac += fracstep;
} while (--count);
@ -937,8 +1005,6 @@ void R_DrawRevSubClampTranslatedColumnP_C ()
}
}
//
// R_DrawSpan
// With DOOM style restrictions on view orientation,
@ -966,7 +1032,10 @@ int ds_y;
int ds_x1;
int ds_x2;
FColormap* ds_fcolormap;
lighttable_t* ds_colormap;
ShadeConstants ds_shade_constants;
dsfixed_t ds_light;
dsfixed_t ds_xfrac;
dsfixed_t ds_yfrac;
@ -977,6 +1046,7 @@ int ds_ybits;
// start of a floor/ceiling tile image
const BYTE* ds_source;
bool ds_source_mipmapped;
// just for profiling
int dscount;
@ -997,13 +1067,14 @@ extern "C" BYTE *ds_curcolormap, *ds_cursource, *ds_curtiltedsource;
//
//==========================================================================
void R_SetSpanSource(const BYTE *pixels)
void R_SetSpanSource(FTexture *tex)
{
ds_source = pixels;
ds_source = r_swtruecolor ? (const BYTE*)tex->GetPixelsBgra() : tex->GetPixels();
ds_source_mipmapped = tex->Mipmapped();
#ifdef X86_ASM
if (ds_cursource != ds_source)
if (!r_swtruecolor && ds_cursource != ds_source)
{
R_SetSpanSource_ASM(pixels);
R_SetSpanSource_ASM(ds_source);
}
#endif
}
@ -1016,11 +1087,11 @@ void R_SetSpanSource(const BYTE *pixels)
//
//==========================================================================
void R_SetSpanColormap(BYTE *colormap)
void R_SetSpanColormap(FDynamicColormap *colormap, int shade)
{
ds_colormap = colormap;
R_SetDSColorMapLight(colormap, 0, shade);
#ifdef X86_ASM
if (ds_colormap != ds_curcolormap)
if (!r_swtruecolor && ds_colormap != ds_curcolormap)
{
R_SetSpanColormap_ASM (ds_colormap);
}
@ -1049,7 +1120,8 @@ void R_SetupSpanBits(FTexture *tex)
ds_ybits--;
}
#ifdef X86_ASM
R_SetSpanSize_ASM (ds_xbits, ds_ybits);
if (!r_swtruecolor)
R_SetSpanSize_ASM (ds_xbits, ds_ybits);
#endif
}
@ -1090,6 +1162,7 @@ void R_DrawSpanP_C (void)
if (ds_xbits == 6 && ds_ybits == 6)
{
// 64x64 is the most common case by far, so special case it.
do
{
// Current texture index in u,v.
@ -1471,11 +1544,12 @@ void R_DrawSpanMaskedAddClampP_C (void)
}
// [RH] Just fill a span with a color
void R_FillSpan (void)
void R_FillSpan_C (void)
{
memset (ylookup[ds_y] + ds_x1 + dc_destorg, ds_color, ds_x2 - ds_x1 + 1);
}
// Draw a voxel slab
//
// "Build Engine & Tools" Copyright (c) 1993-1997 Ken Silverman
@ -1572,17 +1646,19 @@ extern "C" void R_DrawSlabC(int dx, fixed_t v, int dy, fixed_t vi, const BYTE *v
// wallscan stuff, in C
int vlinebits;
int mvlinebits;
#ifndef X86_ASM
static DWORD vlinec1 ();
static int vlinebits;
DWORD (*dovline1)() = vlinec1;
DWORD (*doprevline1)() = vlinec1;
#ifdef X64_ASM
extern "C" void vlinetallasm4();
#define dovline4 vlinetallasm4
extern "C" void setupvlinetallasm (int);
void (*dovline4)() = vlinetallasm4;
#else
static void vlinec4 ();
void (*dovline4)() = vlinec4;
@ -1590,7 +1666,6 @@ void (*dovline4)() = vlinec4;
static DWORD mvlinec1();
static void mvlinec4();
static int mvlinebits;
DWORD (*domvline1)() = mvlinec1;
void (*domvline4)() = mvlinec4;
@ -1624,6 +1699,12 @@ void (*domvline4)() = mvlineasm4;
void setupvline (int fracbits)
{
if (r_swtruecolor)
{
vlinebits = fracbits;
return;
}
#ifdef X86_ASM
if (CPU.Family <= 5)
{
@ -1679,7 +1760,9 @@ DWORD vlinec1 ()
return frac;
}
#endif
#if !defined(X86_ASM)
void vlinec4 ()
{
BYTE *dest = dc_dest;
@ -1700,13 +1783,20 @@ void vlinec4 ()
void setupmvline (int fracbits)
{
if (!r_swtruecolor)
{
#if defined(X86_ASM)
setupmvlineasm (fracbits);
domvline1 = mvlineasm1;
domvline4 = mvlineasm4;
setupmvlineasm(fracbits);
domvline1 = mvlineasm1;
domvline4 = mvlineasm4;
#else
mvlinebits = fracbits;
mvlinebits = fracbits;
#endif
}
else
{
mvlinebits = fracbits;
}
}
#if !defined(X86_ASM)
@ -1788,7 +1878,7 @@ static void R_DrawFogBoundaryLine (int y, int x)
} while (++x <= x2);
}
void R_DrawFogBoundary (int x1, int x2, short *uclip, short *dclip)
void R_DrawFogBoundary_C (int x1, int x2, short *uclip, short *dclip)
{
// This is essentially the same as R_MapVisPlane but with an extra step
// to create new horizontal spans whenever the light changes enough that
@ -1808,7 +1898,7 @@ void R_DrawFogBoundary (int x1, int x2, short *uclip, short *dclip)
clearbufshort (spanend+t2, b2-t2, x);
}
dc_colormap = basecolormapdata + (rcolormap << COLORMAPSHIFT);
R_SetColorMapLight(basecolormap, (float)light, wallshade);
for (--x; x >= x1; --x)
{
@ -1833,7 +1923,7 @@ void R_DrawFogBoundary (int x1, int x2, short *uclip, short *dclip)
clearbufshort (spanend+t2, b2-t2, x);
}
rcolormap = lcolormap;
dc_colormap = basecolormapdata + (lcolormap << COLORMAPSHIFT);
R_SetColorMapLight(basecolormap, (float)light, wallshade);
}
else
{
@ -1884,7 +1974,7 @@ void setuptmvline (int bits)
tmvlinebits = bits;
}
fixed_t tmvline1_add ()
fixed_t tmvline1_add_C ()
{
DWORD fracstep = dc_iscale;
DWORD frac = dc_texturefrac;
@ -1915,7 +2005,7 @@ fixed_t tmvline1_add ()
return frac;
}
void tmvline4_add ()
void tmvline4_add_C ()
{
BYTE *dest = dc_dest;
int count = dc_count;
@ -1942,7 +2032,7 @@ void tmvline4_add ()
} while (--count);
}
fixed_t tmvline1_addclamp ()
fixed_t tmvline1_addclamp_C ()
{
DWORD fracstep = dc_iscale;
DWORD frac = dc_texturefrac;
@ -1978,7 +2068,7 @@ fixed_t tmvline1_addclamp ()
return frac;
}
void tmvline4_addclamp ()
void tmvline4_addclamp_C ()
{
BYTE *dest = dc_dest;
int count = dc_count;
@ -2010,7 +2100,7 @@ void tmvline4_addclamp ()
} while (--count);
}
fixed_t tmvline1_subclamp ()
fixed_t tmvline1_subclamp_C ()
{
DWORD fracstep = dc_iscale;
DWORD frac = dc_texturefrac;
@ -2045,7 +2135,7 @@ fixed_t tmvline1_subclamp ()
return frac;
}
void tmvline4_subclamp ()
void tmvline4_subclamp_C ()
{
BYTE *dest = dc_dest;
int count = dc_count;
@ -2076,7 +2166,7 @@ void tmvline4_subclamp ()
} while (--count);
}
fixed_t tmvline1_revsubclamp ()
fixed_t tmvline1_revsubclamp_C ()
{
DWORD fracstep = dc_iscale;
DWORD frac = dc_texturefrac;
@ -2111,7 +2201,7 @@ fixed_t tmvline1_revsubclamp ()
return frac;
}
void tmvline4_revsubclamp ()
void tmvline4_revsubclamp_C ()
{
BYTE *dest = dc_dest;
int count = dc_count;
@ -2142,7 +2232,6 @@ void tmvline4_revsubclamp ()
} while (--count);
}
//==========================================================================
//
// R_GetColumn
@ -2159,43 +2248,242 @@ const BYTE *R_GetColumn (FTexture *tex, int col)
{
col = width + (col % width);
}
return tex->GetColumn (col, NULL);
}
if (r_swtruecolor)
return (const BYTE *)tex->GetColumnBgra(col, NULL);
else
return tex->GetColumn(col, NULL);
}
// [RH] Initialize the column drawer pointers
void R_InitColumnDrawers ()
{
#ifdef X86_ASM
R_DrawColumn = R_DrawColumnP_ASM;
R_DrawColumnHoriz = R_DrawColumnHorizP_ASM;
R_DrawFuzzColumn = R_DrawFuzzColumnP_ASM;
R_DrawTranslatedColumn = R_DrawTranslatedColumnP_C;
R_DrawShadedColumn = R_DrawShadedColumnP_C;
R_DrawSpan = R_DrawSpanP_ASM;
R_DrawSpanMasked = R_DrawSpanMaskedP_ASM;
if (CPU.Family <= 5)
// Save a copy when switching to true color mode as the assembly palette drawers might change them
static bool pointers_saved = false;
static DWORD(*dovline1_saved)();
static DWORD(*doprevline1_saved)();
static DWORD(*domvline1_saved)();
static void(*dovline4_saved)();
static void(*domvline4_saved)();
if (r_swtruecolor)
{
rt_map4cols = rt_map4cols_asm2;
if (!pointers_saved)
{
pointers_saved = true;
dovline1_saved = dovline1;
doprevline1_saved = doprevline1;
domvline1_saved = domvline1;
dovline4_saved = dovline4;
domvline4_saved = domvline4;
}
R_DrawColumnHoriz = R_DrawColumnHoriz_rgba;
R_DrawColumn = R_DrawColumn_rgba;
R_DrawFuzzColumn = R_DrawFuzzColumn_rgba;
R_DrawTranslatedColumn = R_DrawTranslatedColumn_rgba;
R_DrawShadedColumn = R_DrawShadedColumn_rgba;
R_DrawSpanMasked = R_DrawSpanMasked_rgba;
R_DrawSpan = R_DrawSpan_rgba;
R_DrawSpanTranslucent = R_DrawSpanTranslucent_rgba;
R_DrawSpanMaskedTranslucent = R_DrawSpanMaskedTranslucent_rgba;
R_DrawSpanAddClamp = R_DrawSpanAddClamp_rgba;
R_DrawSpanMaskedAddClamp = R_DrawSpanMaskedAddClamp_rgba;
R_FillColumn = R_FillColumn_rgba;
R_FillAddColumn = R_FillAddColumn_rgba;
R_FillAddClampColumn = R_FillAddClampColumn_rgba;
R_FillSubClampColumn = R_FillSubClampColumn_rgba;
R_FillRevSubClampColumn = R_FillRevSubClampColumn_rgba;
R_DrawAddColumn = R_DrawAddColumn_rgba;
R_DrawTlatedAddColumn = R_DrawTlatedAddColumn_rgba;
R_DrawAddClampColumn = R_DrawAddClampColumn_rgba;
R_DrawAddClampTranslatedColumn = R_DrawAddClampTranslatedColumn_rgba;
R_DrawSubClampColumn = R_DrawSubClampColumn_rgba;
R_DrawSubClampTranslatedColumn = R_DrawSubClampTranslatedColumn_rgba;
R_DrawRevSubClampColumn = R_DrawRevSubClampColumn_rgba;
R_DrawRevSubClampTranslatedColumn = R_DrawRevSubClampTranslatedColumn_rgba;
R_FillSpan = R_FillSpan_rgba;
R_DrawFogBoundary = R_DrawFogBoundary_rgba;
R_FillColumnHoriz = R_FillColumnHoriz_rgba;
R_DrawFogBoundary = R_DrawFogBoundary_rgba;
R_MapTiltedPlane = R_MapTiltedPlane_rgba;
R_MapColoredPlane = R_MapColoredPlane_rgba;
R_DrawParticle = R_DrawParticle_rgba;
R_SetupDrawSlab = R_SetupDrawSlab_rgba;
R_DrawSlab = R_DrawSlab_rgba;
tmvline1_add = tmvline1_add_rgba;
tmvline4_add = tmvline4_add_rgba;
tmvline1_addclamp = tmvline1_addclamp_rgba;
tmvline4_addclamp = tmvline4_addclamp_rgba;
tmvline1_subclamp = tmvline1_subclamp_rgba;
tmvline4_subclamp = tmvline4_subclamp_rgba;
tmvline1_revsubclamp = tmvline1_revsubclamp_rgba;
tmvline4_revsubclamp = tmvline4_revsubclamp_rgba;
rt_copy1col = rt_copy1col_rgba;
rt_copy4cols = rt_copy4cols_rgba;
rt_map1col = rt_map1col_rgba;
rt_map4cols = rt_map4cols_rgba;
rt_shaded1col = rt_shaded1col_rgba;
rt_shaded4cols = rt_shaded4cols_rgba;
rt_add1col = rt_add1col_rgba;
rt_add4cols = rt_add4cols_rgba;
rt_addclamp1col = rt_addclamp1col_rgba;
rt_addclamp4cols = rt_addclamp4cols_rgba;
rt_subclamp1col = rt_subclamp1col_rgba;
rt_revsubclamp1col = rt_revsubclamp1col_rgba;
rt_tlate1col = rt_tlate1col_rgba;
rt_tlateadd1col = rt_tlateadd1col_rgba;
rt_tlateaddclamp1col = rt_tlateaddclamp1col_rgba;
rt_tlatesubclamp1col = rt_tlatesubclamp1col_rgba;
rt_tlaterevsubclamp1col = rt_tlaterevsubclamp1col_rgba;
rt_subclamp4cols = rt_subclamp4cols_rgba;
rt_revsubclamp4cols = rt_revsubclamp4cols_rgba;
rt_tlate4cols = rt_tlate4cols_rgba;
rt_tlateadd4cols = rt_tlateadd4cols_rgba;
rt_tlateaddclamp4cols = rt_tlateaddclamp4cols_rgba;
rt_tlatesubclamp4cols = rt_tlatesubclamp4cols_rgba;
rt_tlaterevsubclamp4cols = rt_tlaterevsubclamp4cols_rgba;
rt_initcols = rt_initcols_rgba;
rt_span_coverage = rt_span_coverage_rgba;
dovline1 = vlinec1_rgba;
doprevline1 = vlinec1_rgba;
domvline1 = mvlinec1_rgba;
dovline4 = vlinec4_rgba;
domvline4 = mvlinec4_rgba;
}
else
{
rt_map4cols = rt_map4cols_asm1;
}
#ifdef X86_ASM
R_DrawColumn = R_DrawColumnP_ASM;
R_DrawColumnHoriz = R_DrawColumnHorizP_ASM;
R_DrawFuzzColumn = R_DrawFuzzColumnP_ASM;
R_DrawTranslatedColumn = R_DrawTranslatedColumnP_C;
R_DrawShadedColumn = R_DrawShadedColumnP_C;
R_DrawSpan = R_DrawSpanP_ASM;
R_DrawSpanMasked = R_DrawSpanMaskedP_ASM;
if (CPU.Family <= 5)
{
rt_map4cols = rt_map4cols_asm2;
}
else
{
rt_map4cols = rt_map4cols_asm1;
}
#else
R_DrawColumnHoriz = R_DrawColumnHorizP_C;
R_DrawColumn = R_DrawColumnP_C;
R_DrawFuzzColumn = R_DrawFuzzColumnP_C;
R_DrawTranslatedColumn = R_DrawTranslatedColumnP_C;
R_DrawShadedColumn = R_DrawShadedColumnP_C;
R_DrawSpan = R_DrawSpanP_C;
R_DrawSpanMasked = R_DrawSpanMaskedP_C;
rt_map4cols = rt_map4cols_c;
R_DrawColumnHoriz = R_DrawColumnHorizP_C;
R_DrawColumn = R_DrawColumnP_C;
R_DrawFuzzColumn = R_DrawFuzzColumnP_C;
R_DrawTranslatedColumn = R_DrawTranslatedColumnP_C;
R_DrawShadedColumn = R_DrawShadedColumnP_C;
R_DrawSpan = R_DrawSpanP_C;
R_DrawSpanMasked = R_DrawSpanMaskedP_C;
rt_map4cols = rt_map4cols_c;
#endif
R_DrawSpanTranslucent = R_DrawSpanTranslucentP_C;
R_DrawSpanMaskedTranslucent = R_DrawSpanMaskedTranslucentP_C;
R_DrawSpanAddClamp = R_DrawSpanAddClampP_C;
R_DrawSpanMaskedAddClamp = R_DrawSpanMaskedAddClampP_C;
R_DrawSpanTranslucent = R_DrawSpanTranslucentP_C;
R_DrawSpanMaskedTranslucent = R_DrawSpanMaskedTranslucentP_C;
R_DrawSpanAddClamp = R_DrawSpanAddClampP_C;
R_DrawSpanMaskedAddClamp = R_DrawSpanMaskedAddClampP_C;
R_FillColumn = R_FillColumnP_C;
R_FillAddColumn = R_FillAddColumn_C;
R_FillAddClampColumn = R_FillAddClampColumn_C;
R_FillSubClampColumn = R_FillSubClampColumn_C;
R_FillRevSubClampColumn = R_FillRevSubClampColumn_C;
R_DrawAddColumn = R_DrawAddColumnP_C;
R_DrawTlatedAddColumn = R_DrawTlatedAddColumnP_C;
R_DrawAddClampColumn = R_DrawAddClampColumnP_C;
R_DrawAddClampTranslatedColumn = R_DrawAddClampTranslatedColumnP_C;
R_DrawSubClampColumn = R_DrawSubClampColumnP_C;
R_DrawSubClampTranslatedColumn = R_DrawSubClampTranslatedColumnP_C;
R_DrawRevSubClampColumn = R_DrawRevSubClampColumnP_C;
R_DrawRevSubClampTranslatedColumn = R_DrawRevSubClampTranslatedColumnP_C;
R_FillSpan = R_FillSpan_C;
R_DrawFogBoundary = R_DrawFogBoundary_C;
R_FillColumnHoriz = R_FillColumnHorizP_C;
R_DrawFogBoundary = R_DrawFogBoundary_C;
R_MapTiltedPlane = R_MapTiltedPlane_C;
R_MapColoredPlane = R_MapColoredPlane_C;
R_DrawParticle = R_DrawParticle_C;
#ifdef X86_ASM
R_SetupDrawSlab = [](FColormap *colormap, float light, int shade) { R_SetupDrawSlabA(colormap->Maps + (GETPALOOKUP(light, shade) << COLORMAPSHIFT)); };
R_DrawSlab = R_DrawSlabA;
#else
R_SetupDrawSlab = [](FColormap *colormap, float light, int shade) { R_SetupDrawSlabC(colormap->Maps + (GETPALOOKUP(light, shade) << COLORMAPSHIFT)); };
R_DrawSlab = R_DrawSlabC;
#endif
tmvline1_add = tmvline1_add_C;
tmvline4_add = tmvline4_add_C;
tmvline1_addclamp = tmvline1_addclamp_C;
tmvline4_addclamp = tmvline4_addclamp_C;
tmvline1_subclamp = tmvline1_subclamp_C;
tmvline4_subclamp = tmvline4_subclamp_C;
tmvline1_revsubclamp = tmvline1_revsubclamp_C;
tmvline4_revsubclamp = tmvline4_revsubclamp_C;
#ifdef X86_ASM
rt_copy1col = rt_copy1col_asm;
rt_copy4cols = rt_copy4cols_asm;
rt_map1col = rt_map1col_asm;
rt_shaded4cols = rt_shaded4cols_asm;
rt_add4cols = rt_add4cols_asm;
rt_addclamp4cols = rt_addclamp4cols_asm;
#else
rt_copy1col = rt_copy1col_c;
rt_copy4cols = rt_copy4cols_c;
rt_map1col = rt_map1col_c;
rt_shaded4cols = rt_shaded4cols_c;
rt_add4cols = rt_add4cols_c;
rt_addclamp4cols = rt_addclamp4cols_c;
#endif
rt_shaded1col = rt_shaded1col_c;
rt_add1col = rt_add1col_c;
rt_addclamp1col = rt_addclamp1col_c;
rt_subclamp1col = rt_subclamp1col_c;
rt_revsubclamp1col = rt_revsubclamp1col_c;
rt_tlate1col = rt_tlate1col_c;
rt_tlateadd1col = rt_tlateadd1col_c;
rt_tlateaddclamp1col = rt_tlateaddclamp1col_c;
rt_tlatesubclamp1col = rt_tlatesubclamp1col_c;
rt_tlaterevsubclamp1col = rt_tlaterevsubclamp1col_c;
rt_subclamp4cols = rt_subclamp4cols_c;
rt_revsubclamp4cols = rt_revsubclamp4cols_c;
rt_tlate4cols = rt_tlate4cols_c;
rt_tlateadd4cols = rt_tlateadd4cols_c;
rt_tlateaddclamp4cols = rt_tlateaddclamp4cols_c;
rt_tlatesubclamp4cols = rt_tlatesubclamp4cols_c;
rt_tlaterevsubclamp4cols = rt_tlaterevsubclamp4cols_c;
rt_initcols = rt_initcols_pal;
rt_span_coverage = rt_span_coverage_pal;
if (pointers_saved)
{
pointers_saved = false;
dovline1 = dovline1_saved;
doprevline1 = doprevline1_saved;
domvline1 = domvline1_saved;
dovline4 = dovline4_saved;
domvline4 = domvline4_saved;
}
}
colfunc = basecolfunc = R_DrawColumn;
fuzzcolfunc = R_DrawFuzzColumn;
transcolfunc = R_DrawTranslatedColumn;
spanfunc = R_DrawSpan;
// [RH] Horizontal column drawers
hcolfunc_pre = R_DrawColumnHoriz;
hcolfunc_post1 = rt_map1col;
hcolfunc_post4 = rt_map4cols;
}
// [RH] Choose column drawers in a single place
@ -2213,7 +2501,7 @@ static bool R_SetBlendFunc (int op, fixed_t fglevel, fixed_t bglevel, int flags)
{
if (flags & STYLEF_ColorIsFixed)
{
colfunc = R_FillColumnP;
colfunc = R_FillColumn;
hcolfunc_post1 = rt_copy1col;
hcolfunc_post4 = rt_copy4cols;
}
@ -2235,16 +2523,22 @@ static bool R_SetBlendFunc (int op, fixed_t fglevel, fixed_t bglevel, int flags)
{
dc_srcblend = Col2RGB8_Inverse[fglevel>>10];
dc_destblend = Col2RGB8_LessPrecision[bglevel>>10];
dc_srcalpha = fglevel;
dc_destalpha = bglevel;
}
else if (op == STYLEOP_Add && fglevel + bglevel <= FRACUNIT)
{
dc_srcblend = Col2RGB8[fglevel>>10];
dc_destblend = Col2RGB8[bglevel>>10];
dc_srcalpha = fglevel;
dc_destalpha = bglevel;
}
else
{
dc_srcblend = Col2RGB8_LessPrecision[fglevel>>10];
dc_destblend = Col2RGB8_LessPrecision[bglevel>>10];
dc_srcalpha = fglevel;
dc_destalpha = bglevel;
}
switch (op)
{
@ -2263,13 +2557,13 @@ static bool R_SetBlendFunc (int op, fixed_t fglevel, fixed_t bglevel, int flags)
}
else if (dc_translation == NULL)
{
colfunc = R_DrawAddColumnP_C;
colfunc = R_DrawAddColumn;
hcolfunc_post1 = rt_add1col;
hcolfunc_post4 = rt_add4cols;
}
else
{
colfunc = R_DrawTlatedAddColumnP_C;
colfunc = R_DrawTlatedAddColumn;
hcolfunc_post1 = rt_tlateadd1col;
hcolfunc_post4 = rt_tlateadd4cols;
}
@ -2284,13 +2578,13 @@ static bool R_SetBlendFunc (int op, fixed_t fglevel, fixed_t bglevel, int flags)
}
else if (dc_translation == NULL)
{
colfunc = R_DrawAddClampColumnP_C;
colfunc = R_DrawAddClampColumn;
hcolfunc_post1 = rt_addclamp1col;
hcolfunc_post4 = rt_addclamp4cols;
}
else
{
colfunc = R_DrawAddClampTranslatedColumnP_C;
colfunc = R_DrawAddClampTranslatedColumn;
hcolfunc_post1 = rt_tlateaddclamp1col;
hcolfunc_post4 = rt_tlateaddclamp4cols;
}
@ -2306,13 +2600,13 @@ static bool R_SetBlendFunc (int op, fixed_t fglevel, fixed_t bglevel, int flags)
}
else if (dc_translation == NULL)
{
colfunc = R_DrawSubClampColumnP_C;
colfunc = R_DrawSubClampColumn;
hcolfunc_post1 = rt_subclamp1col;
hcolfunc_post4 = rt_subclamp4cols;
}
else
{
colfunc = R_DrawSubClampTranslatedColumnP_C;
colfunc = R_DrawSubClampTranslatedColumn;
hcolfunc_post1 = rt_tlatesubclamp1col;
hcolfunc_post4 = rt_tlatesubclamp4cols;
}
@ -2331,13 +2625,13 @@ static bool R_SetBlendFunc (int op, fixed_t fglevel, fixed_t bglevel, int flags)
}
else if (dc_translation == NULL)
{
colfunc = R_DrawRevSubClampColumnP_C;
colfunc = R_DrawRevSubClampColumn;
hcolfunc_post1 = rt_revsubclamp1col;
hcolfunc_post4 = rt_revsubclamp4cols;
}
else
{
colfunc = R_DrawRevSubClampTranslatedColumnP_C;
colfunc = R_DrawRevSubClampTranslatedColumn;
hcolfunc_post1 = rt_tlaterevsubclamp1col;
hcolfunc_post4 = rt_tlaterevsubclamp4cols;
}
@ -2412,11 +2706,15 @@ ESPSResult R_SetPatchStyle (FRenderStyle style, fixed_t alpha, int translation,
colfunc = R_DrawShadedColumn;
hcolfunc_post1 = rt_shaded1col;
hcolfunc_post4 = rt_shaded4cols;
dc_color = fixedcolormap ? fixedcolormap[APART(color)] : basecolormap->Maps[APART(color)];
dc_colormap = (basecolormap = &ShadeFakeColormap[16-alpha])->Maps;
dc_color = fixedcolormap ? fixedcolormap->Maps[APART(color)] : basecolormap->Maps[APART(color)];
basecolormap = &ShadeFakeColormap[16-alpha];
if (fixedlightlev >= 0 && fixedcolormap == NULL)
{
dc_colormap += fixedlightlev;
R_SetColorMapLight(basecolormap, 0, FIXEDLIGHT2SHADE(fixedlightlev));
}
else
{
R_SetColorMapLight(basecolormap, 0, 0);
}
return r_columnmethod ? DoDraw1 : DoDraw0;
}
@ -2426,10 +2724,10 @@ ESPSResult R_SetPatchStyle (FRenderStyle style, fixed_t alpha, int translation,
if (style.Flags & STYLEF_ColorIsFixed)
{
int x = fglevel >> 10;
int r = RPART(color);
int g = GPART(color);
int b = BPART(color);
uint32_t x = fglevel >> 10;
uint32_t r = RPART(color);
uint32_t g = GPART(color);
uint32_t b = BPART(color);
// dc_color is used by the rt_* routines. It is indexed into dc_srcblend.
dc_color = RGB32k.RGB[r>>3][g>>3][b>>3];
if (style.Flags & STYLEF_InvertSource)
@ -2438,11 +2736,13 @@ ESPSResult R_SetPatchStyle (FRenderStyle style, fixed_t alpha, int translation,
g = 255 - g;
b = 255 - b;
}
uint32_t alpha = clamp(fglevel >> (FRACBITS - 8), 0, 255);
dc_srccolor_bgra = (alpha << 24) | (r << 16) | (g << 8) | b;
// dc_srccolor is used by the R_Fill* routines. It is premultiplied
// with the alpha.
dc_srccolor = ((((r*x)>>4)<<20) | ((g*x)>>4) | ((((b)*x)>>4)<<10)) & 0x3feffbff;
hcolfunc_pre = R_FillColumnHorizP;
dc_colormap = identitymap;
hcolfunc_pre = R_FillColumnHoriz;
R_SetColorMapLight(&identitycolormap, 0, 0);
}
if (!R_SetBlendFunc (style.BlendOp, fglevel, bglevel, style.Flags))
@ -2459,25 +2759,25 @@ void R_FinishSetPatchStyle ()
bool R_GetTransMaskDrawers (fixed_t (**tmvline1)(), void (**tmvline4)())
{
if (colfunc == R_DrawAddColumnP_C)
if (colfunc == R_DrawAddColumn)
{
*tmvline1 = tmvline1_add;
*tmvline4 = tmvline4_add;
return true;
}
if (colfunc == R_DrawAddClampColumnP_C)
if (colfunc == R_DrawAddClampColumn)
{
*tmvline1 = tmvline1_addclamp;
*tmvline4 = tmvline4_addclamp;
return true;
}
if (colfunc == R_DrawSubClampColumnP_C)
if (colfunc == R_DrawSubClampColumn)
{
*tmvline1 = tmvline1_subclamp;
*tmvline4 = tmvline4_subclamp;
return true;
}
if (colfunc == R_DrawRevSubClampColumnP_C)
if (colfunc == R_DrawRevSubClampColumn)
{
*tmvline1 = tmvline1_revsubclamp;
*tmvline4 = tmvline4_revsubclamp;
@ -2486,3 +2786,70 @@ bool R_GetTransMaskDrawers (fixed_t (**tmvline1)(), void (**tmvline4)())
return false;
}
void R_SetTranslationMap(lighttable_t *translation)
{
dc_fcolormap = nullptr;
dc_colormap = translation;
if (r_swtruecolor)
{
dc_shade_constants.light_red = 256;
dc_shade_constants.light_green = 256;
dc_shade_constants.light_blue = 256;
dc_shade_constants.light_alpha = 256;
dc_shade_constants.fade_red = 0;
dc_shade_constants.fade_green = 0;
dc_shade_constants.fade_blue = 0;
dc_shade_constants.fade_alpha = 256;
dc_shade_constants.desaturate = 0;
dc_shade_constants.simple_shade = true;
dc_light = 0;
}
}
void R_SetColorMapLight(FColormap *base_colormap, float light, int shade)
{
dc_fcolormap = base_colormap;
if (r_swtruecolor)
{
dc_shade_constants.light_red = dc_fcolormap->Color.r * 256 / 255;
dc_shade_constants.light_green = dc_fcolormap->Color.g * 256 / 255;
dc_shade_constants.light_blue = dc_fcolormap->Color.b * 256 / 255;
dc_shade_constants.light_alpha = dc_fcolormap->Color.a * 256 / 255;
dc_shade_constants.fade_red = dc_fcolormap->Fade.r;
dc_shade_constants.fade_green = dc_fcolormap->Fade.g;
dc_shade_constants.fade_blue = dc_fcolormap->Fade.b;
dc_shade_constants.fade_alpha = dc_fcolormap->Fade.a;
dc_shade_constants.desaturate = MIN(abs(dc_fcolormap->Desaturate), 255) * 255 / 256;
dc_shade_constants.simple_shade = (dc_fcolormap->Color.d == 0x00ffffff && dc_fcolormap->Fade.d == 0x00000000 && dc_fcolormap->Desaturate == 0);
dc_colormap = base_colormap->Maps;
dc_light = LIGHTSCALE(light, shade);
}
else
{
dc_colormap = base_colormap->Maps + (GETPALOOKUP(light, shade) << COLORMAPSHIFT);
}
}
void R_SetDSColorMapLight(FColormap *base_colormap, float light, int shade)
{
ds_fcolormap = base_colormap;
if (r_swtruecolor)
{
ds_shade_constants.light_red = ds_fcolormap->Color.r * 256 / 255;
ds_shade_constants.light_green = ds_fcolormap->Color.g * 256 / 255;
ds_shade_constants.light_blue = ds_fcolormap->Color.b * 256 / 255;
ds_shade_constants.light_alpha = ds_fcolormap->Color.a * 256 / 255;
ds_shade_constants.fade_red = ds_fcolormap->Fade.r;
ds_shade_constants.fade_green = ds_fcolormap->Fade.g;
ds_shade_constants.fade_blue = ds_fcolormap->Fade.b;
ds_shade_constants.fade_alpha = ds_fcolormap->Fade.a;
ds_shade_constants.desaturate = MIN(abs(ds_fcolormap->Desaturate), 255) * 255 / 256;
ds_shade_constants.simple_shade = (ds_fcolormap->Color.d == 0x00ffffff && ds_fcolormap->Fade.d == 0x00000000 && ds_fcolormap->Desaturate == 0);
ds_colormap = base_colormap->Maps;
ds_light = LIGHTSCALE(light, shade);
}
else
{
ds_colormap = base_colormap->Maps + (GETPALOOKUP(light, shade) << COLORMAPSHIFT);
}
}

View file

@ -25,24 +25,55 @@
#include "r_defs.h"
// Spectre/Invisibility.
#define FUZZTABLE 50
extern "C" int fuzzoffset[FUZZTABLE + 1]; // [RH] +1 for the assembly routine
extern "C" int fuzzpos;
extern "C" int fuzzviewheight;
struct FColormap;
struct ShadeConstants
{
uint16_t light_alpha;
uint16_t light_red;
uint16_t light_green;
uint16_t light_blue;
uint16_t fade_alpha;
uint16_t fade_red;
uint16_t fade_green;
uint16_t fade_blue;
uint16_t desaturate;
bool simple_shade;
};
extern "C" int ylookup[MAXHEIGHT];
extern "C" int dc_pitch; // [RH] Distance between rows
extern "C" lighttable_t*dc_colormap;
extern "C" FColormap *dc_fcolormap;
extern "C" ShadeConstants dc_shade_constants;
extern "C" fixed_t dc_light;
extern "C" int dc_x;
extern "C" int dc_yl;
extern "C" int dc_yh;
extern "C" fixed_t dc_iscale;
extern double dc_texturemid;
extern "C" fixed_t dc_texturefrac;
extern "C" uint32_t dc_textureheight;
extern "C" int dc_color; // [RH] For flat colors (no texturing)
extern "C" DWORD dc_srccolor;
extern "C" uint32_t dc_srccolor_bgra;
extern "C" DWORD *dc_srcblend;
extern "C" DWORD *dc_destblend;
extern "C" fixed_t dc_srcalpha;
extern "C" fixed_t dc_destalpha;
// first pixel in a column
extern "C" const BYTE* dc_source;
extern "C" const BYTE* dc_source2;
extern "C" uint32_t dc_texturefracx;
extern "C" BYTE *dc_dest, *dc_destorg;
extern "C" int dc_count;
@ -50,7 +81,11 @@ extern "C" int dc_count;
extern "C" DWORD vplce[4];
extern "C" DWORD vince[4];
extern "C" BYTE* palookupoffse[4];
extern "C" fixed_t palookuplight[4];
extern "C" const BYTE* bufplce[4];
extern "C" const BYTE* bufplce2[4];
extern "C" uint32_t buftexturefracx[4];
extern "C" uint32_t bufheight[4];
// [RH] Temporary buffer for column drawing
extern "C" BYTE *dc_temp;
@ -58,7 +93,6 @@ extern "C" unsigned int dc_tspans[4][MAXHEIGHT];
extern "C" unsigned int *dc_ctspan[4];
extern "C" unsigned int horizspans[4];
// [RH] Pointers to the different column and span drawers...
// The span blitting interface.
@ -67,12 +101,7 @@ extern void (*R_DrawColumn)(void);
extern DWORD (*dovline1) ();
extern DWORD (*doprevline1) ();
#ifdef X64_ASM
#define dovline4 vlinetallasm4
extern "C" void vlinetallasm4();
#else
extern void (*dovline4) ();
#endif
extern void setupvline (int);
extern DWORD (*domvline1) ();
@ -94,8 +123,8 @@ extern void (*R_DrawTranslatedColumn)(void);
// Span drawing for rows, floor/ceiling. No Spectre effect needed.
extern void (*R_DrawSpan)(void);
void R_SetupSpanBits(FTexture *tex);
void R_SetSpanColormap(BYTE *colormap);
void R_SetSpanSource(const BYTE *pixels);
void R_SetSpanColormap(FDynamicColormap *colormap, int shade);
void R_SetSpanSource(FTexture *tex);
// Span drawing for masked textures.
extern void (*R_DrawSpanMasked)(void);
@ -125,33 +154,33 @@ extern "C"
void rt_copy1col_c (int hx, int sx, int yl, int yh);
void rt_copy4cols_c (int sx, int yl, int yh);
void rt_shaded1col (int hx, int sx, int yl, int yh);
void rt_shaded1col_c (int hx, int sx, int yl, int yh);
void rt_shaded4cols_c (int sx, int yl, int yh);
void rt_shaded4cols_asm (int sx, int yl, int yh);
void rt_map1col_c (int hx, int sx, int yl, int yh);
void rt_add1col (int hx, int sx, int yl, int yh);
void rt_addclamp1col (int hx, int sx, int yl, int yh);
void rt_subclamp1col (int hx, int sx, int yl, int yh);
void rt_revsubclamp1col (int hx, int sx, int yl, int yh);
void rt_add1col_c (int hx, int sx, int yl, int yh);
void rt_addclamp1col_c (int hx, int sx, int yl, int yh);
void rt_subclamp1col_c (int hx, int sx, int yl, int yh);
void rt_revsubclamp1col_c (int hx, int sx, int yl, int yh);
void rt_tlate1col (int hx, int sx, int yl, int yh);
void rt_tlateadd1col (int hx, int sx, int yl, int yh);
void rt_tlateaddclamp1col (int hx, int sx, int yl, int yh);
void rt_tlatesubclamp1col (int hx, int sx, int yl, int yh);
void rt_tlaterevsubclamp1col (int hx, int sx, int yl, int yh);
void rt_tlate1col_c (int hx, int sx, int yl, int yh);
void rt_tlateadd1col_c (int hx, int sx, int yl, int yh);
void rt_tlateaddclamp1col_c (int hx, int sx, int yl, int yh);
void rt_tlatesubclamp1col_c (int hx, int sx, int yl, int yh);
void rt_tlaterevsubclamp1col_c (int hx, int sx, int yl, int yh);
void rt_map4cols_c (int sx, int yl, int yh);
void rt_add4cols_c (int sx, int yl, int yh);
void rt_addclamp4cols_c (int sx, int yl, int yh);
void rt_subclamp4cols (int sx, int yl, int yh);
void rt_revsubclamp4cols (int sx, int yl, int yh);
void rt_subclamp4cols_c (int sx, int yl, int yh);
void rt_revsubclamp4cols_c (int sx, int yl, int yh);
void rt_tlate4cols (int sx, int yl, int yh);
void rt_tlateadd4cols (int sx, int yl, int yh);
void rt_tlateaddclamp4cols (int sx, int yl, int yh);
void rt_tlatesubclamp4cols (int sx, int yl, int yh);
void rt_tlaterevsubclamp4cols (int sx, int yl, int yh);
void rt_tlate4cols_c (int sx, int yl, int yh);
void rt_tlateadd4cols_c (int sx, int yl, int yh);
void rt_tlateaddclamp4cols_c (int sx, int yl, int yh);
void rt_tlatesubclamp4cols_c (int sx, int yl, int yh);
void rt_tlaterevsubclamp4cols_c (int sx, int yl, int yh);
void rt_copy1col_asm (int hx, int sx, int yl, int yh);
void rt_map1col_asm (int hx, int sx, int yl, int yh);
@ -163,30 +192,49 @@ void rt_add4cols_asm (int sx, int yl, int yh);
void rt_addclamp4cols_asm (int sx, int yl, int yh);
}
extern void (*rt_map4cols)(int sx, int yl, int yh);
extern void (*rt_copy1col)(int hx, int sx, int yl, int yh);
extern void (*rt_copy4cols)(int sx, int yl, int yh);
#ifdef X86_ASM
#define rt_copy1col rt_copy1col_asm
#define rt_copy4cols rt_copy4cols_asm
#define rt_map1col rt_map1col_asm
#define rt_shaded4cols rt_shaded4cols_asm
#define rt_add4cols rt_add4cols_asm
#define rt_addclamp4cols rt_addclamp4cols_asm
#else
#define rt_copy1col rt_copy1col_c
#define rt_copy4cols rt_copy4cols_c
#define rt_map1col rt_map1col_c
#define rt_shaded4cols rt_shaded4cols_c
#define rt_add4cols rt_add4cols_c
#define rt_addclamp4cols rt_addclamp4cols_c
#endif
extern void (*rt_shaded1col)(int hx, int sx, int yl, int yh);
extern void (*rt_shaded4cols)(int sx, int yl, int yh);
extern void (*rt_map1col)(int hx, int sx, int yl, int yh);
extern void (*rt_add1col)(int hx, int sx, int yl, int yh);
extern void (*rt_addclamp1col)(int hx, int sx, int yl, int yh);
extern void (*rt_subclamp1col)(int hx, int sx, int yl, int yh);
extern void (*rt_revsubclamp1col)(int hx, int sx, int yl, int yh);
extern void (*rt_tlate1col)(int hx, int sx, int yl, int yh);
extern void (*rt_tlateadd1col)(int hx, int sx, int yl, int yh);
extern void (*rt_tlateaddclamp1col)(int hx, int sx, int yl, int yh);
extern void (*rt_tlatesubclamp1col)(int hx, int sx, int yl, int yh);
extern void (*rt_tlaterevsubclamp1col)(int hx, int sx, int yl, int yh);
extern void (*rt_map4cols)(int sx, int yl, int yh);
extern void (*rt_add4cols)(int sx, int yl, int yh);
extern void (*rt_addclamp4cols)(int sx, int yl, int yh);
extern void (*rt_subclamp4cols)(int sx, int yl, int yh);
extern void (*rt_revsubclamp4cols)(int sx, int yl, int yh);
extern void (*rt_tlate4cols)(int sx, int yl, int yh);
extern void (*rt_tlateadd4cols)(int sx, int yl, int yh);
extern void (*rt_tlateaddclamp4cols)(int sx, int yl, int yh);
extern void (*rt_tlatesubclamp4cols)(int sx, int yl, int yh);
extern void (*rt_tlaterevsubclamp4cols)(int sx, int yl, int yh);
extern void (*rt_initcols)(BYTE *buffer);
extern void (*rt_span_coverage)(int x, int start, int stop);
void rt_draw4cols (int sx);
// [RH] Preps the temporary horizontal buffer.
void rt_initcols (BYTE *buffer=NULL);
void rt_initcols_pal (BYTE *buffer);
void R_DrawFogBoundary (int x1, int x2, short *uclip, short *dclip);
void rt_span_coverage_pal(int x, int start, int stop);
extern void (*R_DrawFogBoundary)(int x1, int x2, short *uclip, short *dclip);
void R_DrawFogBoundary_C (int x1, int x2, short *uclip, short *dclip);
#ifdef X86_ASM
@ -218,26 +266,47 @@ void R_DrawSpanMaskedTranslucentP_C (void);
void R_DrawTlatedLucentColumnP_C (void);
#define R_DrawTlatedLucentColumn R_DrawTlatedLucentColumnP_C
void R_FillColumnP (void);
void R_FillColumnHorizP (void);
void R_FillSpan (void);
extern void(*R_FillColumn)(void);
extern void(*R_FillAddColumn)(void);
extern void(*R_FillAddClampColumn)(void);
extern void(*R_FillSubClampColumn)(void);
extern void(*R_FillRevSubClampColumn)(void);
extern void(*R_DrawAddColumn)(void);
extern void(*R_DrawTlatedAddColumn)(void);
extern void(*R_DrawAddClampColumn)(void);
extern void(*R_DrawAddClampTranslatedColumn)(void);
extern void(*R_DrawSubClampColumn)(void);
extern void(*R_DrawSubClampTranslatedColumn)(void);
extern void(*R_DrawRevSubClampColumn)(void);
extern void(*R_DrawRevSubClampTranslatedColumn)(void);
extern void(*R_FillSpan)(void);
extern void(*R_FillColumnHoriz)(void);
void R_FillColumnP_C (void);
void R_FillColumnHorizP_C (void);
void R_FillSpan_C (void);
extern void(*R_SetupDrawSlab)(FColormap *base_colormap, float light, int shade);
extern void(*R_DrawSlab)(int dx, fixed_t v, int dy, fixed_t vi, const BYTE *vptr, BYTE *p);
#ifdef X86_ASM
#define R_SetupDrawSlab R_SetupDrawSlabA
#define R_DrawSlab R_DrawSlabA
extern "C" void R_SetupDrawSlabA(const BYTE *colormap);
extern "C" void R_DrawSlabA(int dx, fixed_t v, int dy, fixed_t vi, const BYTE *vptr, BYTE *p);
#else
#define R_SetupDrawSlab R_SetupDrawSlabC
#define R_DrawSlab R_DrawSlabC
extern "C" void R_SetupDrawSlabC(const BYTE *colormap);
extern "C" void R_DrawSlabC(int dx, fixed_t v, int dy, fixed_t vi, const BYTE *vptr, BYTE *p);
#endif
extern "C" void R_SetupDrawSlab(const BYTE *colormap);
extern "C" void R_DrawSlab(int dx, fixed_t v, int dy, fixed_t vi, const BYTE *vptr, BYTE *p);
extern "C" int ds_y;
extern "C" int ds_x1;
extern "C" int ds_x2;
extern "C" FColormap* ds_fcolormap;
extern "C" lighttable_t* ds_colormap;
extern "C" ShadeConstants ds_shade_constants;
extern "C" dsfixed_t ds_light;
extern "C" dsfixed_t ds_xfrac;
extern "C" dsfixed_t ds_yfrac;
@ -249,12 +318,14 @@ extern "C" fixed_t ds_alpha;
// start of a 64*64 tile image
extern "C" const BYTE* ds_source;
extern "C" bool ds_source_mipmapped;
extern "C" int ds_color; // [RH] For flat color (no texturing)
extern BYTE shadetables[/*NUMCOLORMAPS*16*256*/];
extern FDynamicColormap ShadeFakeColormap[16];
extern BYTE identitymap[256];
extern FDynamicColormap identitycolormap;
extern BYTE *dc_translation;
// [RH] Added for muliresolution support
@ -278,6 +349,15 @@ inline ESPSResult R_SetPatchStyle(FRenderStyle style, float alpha, int translati
// style was STYLE_Shade
void R_FinishSetPatchStyle ();
extern fixed_t(*tmvline1_add)();
extern void(*tmvline4_add)();
extern fixed_t(*tmvline1_addclamp)();
extern void(*tmvline4_addclamp)();
extern fixed_t(*tmvline1_subclamp)();
extern void(*tmvline4_subclamp)();
extern fixed_t(*tmvline1_revsubclamp)();
extern void(*tmvline4_revsubclamp)();
// transmaskwallscan calls this to find out what column drawers to use
bool R_GetTransMaskDrawers (fixed_t (**tmvline1)(), void (**tmvline4)());
@ -293,4 +373,19 @@ void maskwallscan (int x1, int x2, short *uwal, short *dwal, float *swal, fixed_
// transmaskwallscan is like maskwallscan, but it can also blend to the background
void transmaskwallscan (int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat, const BYTE *(*getcol)(FTexture *tex, int col)=R_GetColumn);
// Sets dc_colormap and dc_light to their appropriate values depending on the output format (pal vs true color)
void R_SetColorMapLight(FColormap *base_colormap, float light, int shade);
// Same as R_SetColorMapLight, but for ds_colormap and ds_light
void R_SetDSColorMapLight(FColormap *base_colormap, float light, int shade);
void R_SetTranslationMap(lighttable_t *translation);
extern bool r_swtruecolor;
EXTERN_CVAR(Bool, r_multithreaded);
EXTERN_CVAR(Bool, r_magfilter);
EXTERN_CVAR(Bool, r_minfilter);
EXTERN_CVAR(Bool, r_mipmap);
#endif

2969
src/r_draw_rgba.cpp Normal file

File diff suppressed because it is too large Load diff

994
src/r_draw_rgba.h Normal file
View file

@ -0,0 +1,994 @@
// Emacs style mode select -*- C++ -*-
//-----------------------------------------------------------------------------
//
// $Id:$
//
// Copyright (C) 1993-1996 by id Software, Inc.
//
// This source is available for distribution and/or modification
// only under the terms of the DOOM Source Code License as
// published by id Software. All rights reserved.
//
// The source is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// FITNESS FOR A PARTICULAR PURPOSE. See the DOOM Source Code License
// for more details.
//
// DESCRIPTION:
// System specific interface stuff.
//
//-----------------------------------------------------------------------------
#ifndef __R_DRAW_RGBA__
#define __R_DRAW_RGBA__
#include "r_draw.h"
#include "v_palette.h"
#include <vector>
#include <memory>
#include <thread>
#include <mutex>
#include <condition_variable>
#ifndef NO_SSE
#include <immintrin.h>
#endif
/////////////////////////////////////////////////////////////////////////////
// Drawer functions:
void rt_initcols_rgba(BYTE *buffer);
void rt_span_coverage_rgba(int x, int start, int stop);
void rt_copy1col_rgba(int hx, int sx, int yl, int yh);
void rt_copy4cols_rgba(int sx, int yl, int yh);
void rt_shaded1col_rgba(int hx, int sx, int yl, int yh);
void rt_shaded4cols_rgba(int sx, int yl, int yh);
void rt_map1col_rgba(int hx, int sx, int yl, int yh);
void rt_add1col_rgba(int hx, int sx, int yl, int yh);
void rt_addclamp1col_rgba(int hx, int sx, int yl, int yh);
void rt_subclamp1col_rgba(int hx, int sx, int yl, int yh);
void rt_revsubclamp1col_rgba(int hx, int sx, int yl, int yh);
void rt_tlate1col_rgba(int hx, int sx, int yl, int yh);
void rt_tlateadd1col_rgba(int hx, int sx, int yl, int yh);
void rt_tlateaddclamp1col_rgba(int hx, int sx, int yl, int yh);
void rt_tlatesubclamp1col_rgba(int hx, int sx, int yl, int yh);
void rt_tlaterevsubclamp1col_rgba(int hx, int sx, int yl, int yh);
void rt_map4cols_rgba(int sx, int yl, int yh);
void rt_add4cols_rgba(int sx, int yl, int yh);
void rt_addclamp4cols_rgba(int sx, int yl, int yh);
void rt_subclamp4cols_rgba(int sx, int yl, int yh);
void rt_revsubclamp4cols_rgba(int sx, int yl, int yh);
void rt_tlate4cols_rgba(int sx, int yl, int yh);
void rt_tlateadd4cols_rgba(int sx, int yl, int yh);
void rt_tlateaddclamp4cols_rgba(int sx, int yl, int yh);
void rt_tlatesubclamp4cols_rgba(int sx, int yl, int yh);
void rt_tlaterevsubclamp4cols_rgba(int sx, int yl, int yh);
void R_DrawColumnHoriz_rgba();
void R_DrawColumn_rgba();
void R_DrawFuzzColumn_rgba();
void R_DrawTranslatedColumn_rgba();
void R_DrawShadedColumn_rgba();
void R_FillColumn_rgba();
void R_FillAddColumn_rgba();
void R_FillAddClampColumn_rgba();
void R_FillSubClampColumn_rgba();
void R_FillRevSubClampColumn_rgba();
void R_DrawAddColumn_rgba();
void R_DrawTlatedAddColumn_rgba();
void R_DrawAddClampColumn_rgba();
void R_DrawAddClampTranslatedColumn_rgba();
void R_DrawSubClampColumn_rgba();
void R_DrawSubClampTranslatedColumn_rgba();
void R_DrawRevSubClampColumn_rgba();
void R_DrawRevSubClampTranslatedColumn_rgba();
void R_DrawSpan_rgba(void);
void R_DrawSpanMasked_rgba(void);
void R_DrawSpanTranslucent_rgba();
void R_DrawSpanMaskedTranslucent_rgba();
void R_DrawSpanAddClamp_rgba();
void R_DrawSpanMaskedAddClamp_rgba();
void R_FillSpan_rgba();
void R_DrawTiltedSpan_rgba(int y, int x1, int x2, const FVector3 &plane_sz, const FVector3 &plane_su, const FVector3 &plane_sv, bool plane_shade, int planeshade, float planelightfloat, fixed_t pviewx, fixed_t pviewy);
void R_DrawColoredSpan_rgba(int y, int x1, int x2);
void R_SetupDrawSlab_rgba(FColormap *base_colormap, float light, int shade);
void R_DrawSlab_rgba(int dx, fixed_t v, int dy, fixed_t vi, const BYTE *vptr, BYTE *p);
void R_DrawFogBoundary_rgba(int x1, int x2, short *uclip, short *dclip);
DWORD vlinec1_rgba();
void vlinec4_rgba();
DWORD mvlinec1_rgba();
void mvlinec4_rgba();
fixed_t tmvline1_add_rgba();
void tmvline4_add_rgba();
fixed_t tmvline1_addclamp_rgba();
void tmvline4_addclamp_rgba();
fixed_t tmvline1_subclamp_rgba();
void tmvline4_subclamp_rgba();
fixed_t tmvline1_revsubclamp_rgba();
void tmvline4_revsubclamp_rgba();
void R_FillColumnHoriz_rgba();
void R_FillSpan_rgba();
/////////////////////////////////////////////////////////////////////////////
// Multithreaded rendering infrastructure:
// Redirect drawer commands to worker threads
void R_BeginDrawerCommands();
// Wait until all drawers finished executing
void R_EndDrawerCommands();
struct FSpecialColormap;
class DrawerCommandQueue;
// Worker data for each thread executing drawer commands
class DrawerThread
{
public:
std::thread thread;
// Thread line index of this thread
int core = 0;
// Number of active threads
int num_cores = 1;
// Range of rows processed this pass
int pass_start_y = 0;
int pass_end_y = MAXHEIGHT;
uint32_t dc_temp_rgbabuff_rgba[MAXHEIGHT * 4];
uint32_t *dc_temp_rgba;
// Checks if a line is rendered by this thread
bool line_skipped_by_thread(int line)
{
return line < pass_start_y || line >= pass_end_y || line % num_cores != core;
}
// The number of lines to skip to reach the first line to be rendered by this thread
int skipped_by_thread(int first_line)
{
int pass_skip = MAX(pass_start_y - first_line, 0);
int core_skip = (num_cores - (first_line + pass_skip - core) % num_cores) % num_cores;
return pass_skip + core_skip;
}
// The number of lines to be rendered by this thread
int count_for_thread(int first_line, int count)
{
int lines_until_pass_end = MAX(pass_end_y - first_line, 0);
count = MIN(count, lines_until_pass_end);
int c = (count - skipped_by_thread(first_line) + num_cores - 1) / num_cores;
return MAX(c, 0);
}
// Calculate the dest address for the first line to be rendered by this thread
uint32_t *dest_for_thread(int first_line, int pitch, uint32_t *dest)
{
return dest + skipped_by_thread(first_line) * pitch;
}
};
// Task to be executed by each worker thread
class DrawerCommand
{
protected:
int _dest_y;
public:
DrawerCommand()
{
_dest_y = static_cast<int>((dc_dest - dc_destorg) / (dc_pitch * 4));
}
virtual void Execute(DrawerThread *thread) = 0;
};
EXTERN_CVAR(Bool, r_multithreaded)
EXTERN_CVAR(Bool, r_mipmap)
// Manages queueing up commands and executing them on worker threads
class DrawerCommandQueue
{
enum { memorypool_size = 16 * 1024 * 1024 };
char memorypool[memorypool_size];
size_t memorypool_pos = 0;
std::vector<DrawerCommand *> commands;
std::vector<DrawerThread> threads;
std::mutex start_mutex;
std::condition_variable start_condition;
std::vector<DrawerCommand *> active_commands;
bool shutdown_flag = false;
int run_id = 0;
std::mutex end_mutex;
std::condition_variable end_condition;
size_t finished_threads = 0;
int threaded_render = 0;
DrawerThread single_core_thread;
int num_passes = 1;
int rows_in_pass = MAXHEIGHT;
void StartThreads();
void StopThreads();
void Finish();
static DrawerCommandQueue *Instance();
DrawerCommandQueue();
~DrawerCommandQueue();
public:
// Allocate memory valid for the duration of a command execution
static void* AllocMemory(size_t size);
// Queue command to be executed by drawer worker threads
template<typename T, typename... Types>
static void QueueCommand(Types &&... args)
{
auto queue = Instance();
if (queue->threaded_render == 0 || !r_multithreaded)
{
T command(std::forward<Types>(args)...);
command.Execute(&queue->single_core_thread);
}
else
{
void *ptr = AllocMemory(sizeof(T));
if (!ptr) // Out of memory - render what we got
{
queue->Finish();
ptr = AllocMemory(sizeof(T));
if (!ptr)
return;
}
T *command = new (ptr)T(std::forward<Types>(args)...);
queue->commands.push_back(command);
}
}
// Redirects all drawing commands to worker threads until End is called
// Begin/End blocks can be nested.
static void Begin();
// End redirection and wait until all worker threads finished executing
static void End();
// Waits until all worker threads finished executing
static void WaitForWorkers();
};
/////////////////////////////////////////////////////////////////////////////
// Drawer commands:
class ApplySpecialColormapRGBACommand : public DrawerCommand
{
BYTE *buffer;
int pitch;
int width;
int height;
int start_red;
int start_green;
int start_blue;
int end_red;
int end_green;
int end_blue;
public:
ApplySpecialColormapRGBACommand(FSpecialColormap *colormap, DFrameBuffer *screen);
void Execute(DrawerThread *thread) override;
};
template<typename CommandType, typename BlendMode>
class DrawerBlendCommand : public CommandType
{
public:
void Execute(DrawerThread *thread) override
{
typename CommandType::LoopIterator loop(this, thread);
if (!loop) return;
BlendMode blend(*this, loop);
do
{
blend.Blend(*this, loop);
} while (loop.next());
}
};
/////////////////////////////////////////////////////////////////////////////
// Pixel shading inline functions:
// Give the compiler a strong hint we want these functions inlined:
#ifndef FORCEINLINE
#if defined(_MSC_VER)
#define FORCEINLINE __forceinline
#elif defined(__GNUC__)
#define FORCEINLINE __attribute__((always_inline)) inline
#else
#define FORCEINLINE inline
#endif
#endif
// Promise compiler we have no aliasing of this pointer
#ifndef RESTRICT
#if defined(_MSC_VER)
#define RESTRICT __restrict
#elif defined(__GNUC__)
#define RESTRICT __restrict__
#else
#define RESTRICT
#endif
#endif
class LightBgra
{
public:
// calculates the light constant passed to the shade_pal_index function
FORCEINLINE static uint32_t calc_light_multiplier(dsfixed_t light)
{
return 256 - (light >> (FRACBITS - 8));
}
// Calculates a ARGB8 color for the given palette index and light multiplier
FORCEINLINE static uint32_t shade_pal_index_simple(uint32_t index, uint32_t light)
{
const PalEntry &color = GPalette.BaseColors[index];
uint32_t red = color.r;
uint32_t green = color.g;
uint32_t blue = color.b;
red = red * light / 256;
green = green * light / 256;
blue = blue * light / 256;
return 0xff000000 | (red << 16) | (green << 8) | blue;
}
// Calculates a ARGB8 color for the given palette index, light multiplier and dynamic colormap
FORCEINLINE static uint32_t shade_pal_index(uint32_t index, uint32_t light, const ShadeConstants &constants)
{
const PalEntry &color = GPalette.BaseColors[index];
uint32_t alpha = color.d & 0xff000000;
uint32_t red = color.r;
uint32_t green = color.g;
uint32_t blue = color.b;
if (constants.simple_shade)
{
red = red * light / 256;
green = green * light / 256;
blue = blue * light / 256;
}
else
{
uint32_t inv_light = 256 - light;
uint32_t inv_desaturate = 256 - constants.desaturate;
uint32_t intensity = ((red * 77 + green * 143 + blue * 37) >> 8) * constants.desaturate;
red = (red * inv_desaturate + intensity) / 256;
green = (green * inv_desaturate + intensity) / 256;
blue = (blue * inv_desaturate + intensity) / 256;
red = (constants.fade_red * inv_light + red * light) / 256;
green = (constants.fade_green * inv_light + green * light) / 256;
blue = (constants.fade_blue * inv_light + blue * light) / 256;
red = (red * constants.light_red) / 256;
green = (green * constants.light_green) / 256;
blue = (blue * constants.light_blue) / 256;
}
return alpha | (red << 16) | (green << 8) | blue;
}
FORCEINLINE static uint32_t shade_bgra_simple(uint32_t color, uint32_t light)
{
uint32_t red = RPART(color) * light / 256;
uint32_t green = GPART(color) * light / 256;
uint32_t blue = BPART(color) * light / 256;
return 0xff000000 | (red << 16) | (green << 8) | blue;
}
FORCEINLINE static uint32_t shade_bgra(uint32_t color, uint32_t light, const ShadeConstants &constants)
{
uint32_t alpha = color & 0xff000000;
uint32_t red = (color >> 16) & 0xff;
uint32_t green = (color >> 8) & 0xff;
uint32_t blue = color & 0xff;
if (constants.simple_shade)
{
red = red * light / 256;
green = green * light / 256;
blue = blue * light / 256;
}
else
{
uint32_t inv_light = 256 - light;
uint32_t inv_desaturate = 256 - constants.desaturate;
uint32_t intensity = ((red * 77 + green * 143 + blue * 37) >> 8) * constants.desaturate;
red = (red * inv_desaturate + intensity) / 256;
green = (green * inv_desaturate + intensity) / 256;
blue = (blue * inv_desaturate + intensity) / 256;
red = (constants.fade_red * inv_light + red * light) / 256;
green = (constants.fade_green * inv_light + green * light) / 256;
blue = (constants.fade_blue * inv_light + blue * light) / 256;
red = (red * constants.light_red) / 256;
green = (green * constants.light_green) / 256;
blue = (blue * constants.light_blue) / 256;
}
return alpha | (red << 16) | (green << 8) | blue;
}
};
class BlendBgra
{
public:
FORCEINLINE static uint32_t copy(uint32_t fg)
{
return fg;
}
FORCEINLINE static uint32_t add(uint32_t fg, uint32_t bg, uint32_t srcalpha, uint32_t destalpha)
{
uint32_t red = MIN<uint32_t>((RPART(fg) * srcalpha + RPART(bg) * destalpha) >> 8, 255);
uint32_t green = MIN<uint32_t>((GPART(fg) * srcalpha + GPART(bg) * destalpha) >> 8, 255);
uint32_t blue = MIN<uint32_t>((BPART(fg) * srcalpha + BPART(bg) * destalpha) >> 8, 255);
return 0xff000000 | (red << 16) | (green << 8) | blue;
}
FORCEINLINE static uint32_t sub(uint32_t fg, uint32_t bg, uint32_t srcalpha, uint32_t destalpha)
{
uint32_t red = clamp<uint32_t>((0x10000 - RPART(fg) * srcalpha + RPART(bg) * destalpha) >> 8, 256, 256 + 255) - 256;
uint32_t green = clamp<uint32_t>((0x10000 - GPART(fg) * srcalpha + GPART(bg) * destalpha) >> 8, 256, 256 + 255) - 256;
uint32_t blue = clamp<uint32_t>((0x10000 - BPART(fg) * srcalpha + BPART(bg) * destalpha) >> 8, 256, 256 + 255) - 256;
return 0xff000000 | (red << 16) | (green << 8) | blue;
}
FORCEINLINE static uint32_t revsub(uint32_t fg, uint32_t bg, uint32_t srcalpha, uint32_t destalpha)
{
uint32_t red = clamp<uint32_t>((0x10000 + RPART(fg) * srcalpha - RPART(bg) * destalpha) >> 8, 256, 256 + 255) - 256;
uint32_t green = clamp<uint32_t>((0x10000 + GPART(fg) * srcalpha - GPART(bg) * destalpha) >> 8, 256, 256 + 255) - 256;
uint32_t blue = clamp<uint32_t>((0x10000 + BPART(fg) * srcalpha - BPART(bg) * destalpha) >> 8, 256, 256 + 255) - 256;
return 0xff000000 | (red << 16) | (green << 8) | blue;
}
FORCEINLINE static uint32_t alpha_blend(uint32_t fg, uint32_t bg)
{
uint32_t alpha = APART(fg) + (APART(fg) >> 7); // 255 -> 256
uint32_t inv_alpha = 256 - alpha;
uint32_t red = MIN<uint32_t>(RPART(fg) * alpha + (RPART(bg) * inv_alpha) / 256, 255);
uint32_t green = MIN<uint32_t>(GPART(fg) * alpha + (GPART(bg) * inv_alpha) / 256, 255);
uint32_t blue = MIN<uint32_t>(BPART(fg) * alpha + (BPART(bg) * inv_alpha) / 256, 255);
return 0xff000000 | (red << 16) | (green << 8) | blue;
}
};
class SampleBgra
{
public:
inline static bool span_sampler_setup(const uint32_t * RESTRICT &source, int &xbits, int &ybits, fixed_t xstep, fixed_t ystep, bool mipmapped)
{
// Is this a magfilter or minfilter?
fixed_t xmagnitude = abs(xstep) >> (32 - xbits - FRACBITS);
fixed_t ymagnitude = abs(ystep) >> (32 - ybits - FRACBITS);
fixed_t magnitude = (xmagnitude + ymagnitude) * 2 + (1 << (FRACBITS - 1));
bool magnifying = (magnitude >> FRACBITS == 0);
if (r_mipmap && mipmapped)
{
int level = magnitude >> (FRACBITS + 1);
while (level != 0)
{
if (xbits <= 2 || ybits <= 2)
break;
source += (1 << (xbits)) * (1 << (ybits));
xbits -= 1;
ybits -= 1;
level >>= 1;
}
}
return (magnifying && r_magfilter) || (!magnifying && r_minfilter);
}
FORCEINLINE static uint32_t sample_bilinear(const uint32_t *col0, const uint32_t *col1, uint32_t texturefracx, uint32_t texturefracy, uint32_t one, uint32_t height)
{
uint32_t frac_y0 = (texturefracy >> FRACBITS) * height;
uint32_t frac_y1 = ((texturefracy + one) >> FRACBITS) * height;
uint32_t y0 = frac_y0 >> FRACBITS;
uint32_t y1 = frac_y1 >> FRACBITS;
uint32_t p00 = col0[y0];
uint32_t p01 = col0[y1];
uint32_t p10 = col1[y0];
uint32_t p11 = col1[y1];
uint32_t inv_b = texturefracx;
uint32_t inv_a = (frac_y1 >> (FRACBITS - 4)) & 15;
uint32_t a = 16 - inv_a;
uint32_t b = 16 - inv_b;
uint32_t red = (RPART(p00) * a * b + RPART(p01) * inv_a * b + RPART(p10) * a * inv_b + RPART(p11) * inv_a * inv_b + 127) >> 8;
uint32_t green = (GPART(p00) * a * b + GPART(p01) * inv_a * b + GPART(p10) * a * inv_b + GPART(p11) * inv_a * inv_b + 127) >> 8;
uint32_t blue = (BPART(p00) * a * b + BPART(p01) * inv_a * b + BPART(p10) * a * inv_b + BPART(p11) * inv_a * inv_b + 127) >> 8;
uint32_t alpha = (APART(p00) * a * b + APART(p01) * inv_a * b + APART(p10) * a * inv_b + APART(p11) * inv_a * inv_b + 127) >> 8;
return (alpha << 24) | (red << 16) | (green << 8) | blue;
}
FORCEINLINE static uint32_t sample_bilinear(const uint32_t *texture, dsfixed_t xfrac, dsfixed_t yfrac, int xbits, int ybits)
{
int xshift = (32 - xbits);
int yshift = (32 - ybits);
int xmask = (1 << xshift) - 1;
int ymask = (1 << yshift) - 1;
uint32_t x = xfrac >> xbits;
uint32_t y = yfrac >> ybits;
uint32_t p00 = texture[(y & ymask) + ((x & xmask) << yshift)];
uint32_t p01 = texture[((y + 1) & ymask) + ((x & xmask) << yshift)];
uint32_t p10 = texture[(y & ymask) + (((x + 1) & xmask) << yshift)];
uint32_t p11 = texture[((y + 1) & ymask) + (((x + 1) & xmask) << yshift)];
uint32_t inv_b = (xfrac >> (xbits - 4)) & 15;
uint32_t inv_a = (yfrac >> (ybits - 4)) & 15;
uint32_t a = 16 - inv_a;
uint32_t b = 16 - inv_b;
uint32_t red = (RPART(p00) * a * b + RPART(p01) * inv_a * b + RPART(p10) * a * inv_b + RPART(p11) * inv_a * inv_b + 127) >> 8;
uint32_t green = (GPART(p00) * a * b + GPART(p01) * inv_a * b + GPART(p10) * a * inv_b + GPART(p11) * inv_a * inv_b + 127) >> 8;
uint32_t blue = (BPART(p00) * a * b + BPART(p01) * inv_a * b + BPART(p10) * a * inv_b + BPART(p11) * inv_a * inv_b + 127) >> 8;
uint32_t alpha = (APART(p00) * a * b + APART(p01) * inv_a * b + APART(p10) * a * inv_b + APART(p11) * inv_a * inv_b + 127) >> 8;
return (alpha << 24) | (red << 16) | (green << 8) | blue;
}
#ifndef NO_SSE
static __m128i samplertable[256 * 2];
#endif
};
/////////////////////////////////////////////////////////////////////////////
// SSE/AVX shading macros:
#define AVX2_SAMPLE_BILINEAR4_COLUMN_INIT(col0, col1, one, height, texturefracx) \
const uint32_t *baseptr = col0[0]; \
__m128i coloffsets0 = _mm_setr_epi32(col0[0] - baseptr, col0[1] - baseptr, col0[2] - baseptr, col0[3] - baseptr); \
__m128i coloffsets1 = _mm_setr_epi32(col1[0] - baseptr, col1[1] - baseptr, col1[2] - baseptr, col1[3] - baseptr); \
__m128i mone = _mm_loadu_si128((const __m128i*)one); \
__m128i m127 = _mm_set1_epi16(127); \
__m128i m16 = _mm_set1_epi32(16); \
__m128i m15 = _mm_set1_epi32(15); \
__m128i mheight = _mm_loadu_si128((const __m128i*)height); \
__m128i mtexturefracx = _mm_loadu_si128((const __m128i*)texturefracx);
#define AVX2_SAMPLE_BILINEAR4_COLUMN(fg, texturefracy) { \
__m128i mtexturefracy = _mm_loadu_si128((const __m128i*)texturefracy); \
__m128i multmp0 = _mm_srli_epi32(mtexturefracy, FRACBITS); \
__m128i multmp1 = _mm_srli_epi32(_mm_add_epi32(mtexturefracy, mone), FRACBITS); \
__m128i frac_y0 = _mm_or_si128(_mm_mul_epu32(multmp0, mheight), _mm_slli_si128(_mm_mul_epu32(_mm_srli_si128(multmp0, 4), _mm_srli_si128(mheight, 4)), 4)); \
__m128i frac_y1 = _mm_or_si128(_mm_mul_epu32(multmp1, mheight), _mm_slli_si128(_mm_mul_epu32(_mm_srli_si128(multmp1, 4), _mm_srli_si128(mheight, 4)), 4)); \
__m128i y0 = _mm_srli_epi32(frac_y0, FRACBITS); \
__m128i y1 = _mm_srli_epi32(frac_y1, FRACBITS); \
__m128i inv_b = mtexturefracx; \
__m128i inv_a = _mm_and_si128(_mm_srli_epi32(frac_y1, FRACBITS - 4), m15); \
__m128i a = _mm_sub_epi32(m16, inv_a); \
__m128i b = _mm_sub_epi32(m16, inv_b); \
__m128i ab = _mm_mullo_epi16(a, b); \
__m128i invab = _mm_mullo_epi16(inv_a, b); \
__m128i ainvb = _mm_mullo_epi16(a, inv_b); \
__m128i invainvb = _mm_mullo_epi16(inv_a, inv_b); \
__m128i ab_lo = _mm_shuffle_epi32(ab, _MM_SHUFFLE(1, 1, 0, 0)); \
__m128i ab_hi = _mm_shuffle_epi32(ab, _MM_SHUFFLE(3, 3, 2, 2)); \
__m128i invab_lo = _mm_shuffle_epi32(invab, _MM_SHUFFLE(1, 1, 0, 0)); \
__m128i invab_hi = _mm_shuffle_epi32(invab, _MM_SHUFFLE(3, 3, 2, 2)); \
__m128i ainvb_lo = _mm_shuffle_epi32(ainvb, _MM_SHUFFLE(1, 1, 0, 0)); \
__m128i ainvb_hi = _mm_shuffle_epi32(ainvb, _MM_SHUFFLE(3, 3, 2, 2)); \
__m128i invainvb_lo = _mm_shuffle_epi32(invainvb, _MM_SHUFFLE(1, 1, 0, 0)); \
__m128i invainvb_hi = _mm_shuffle_epi32(invainvb, _MM_SHUFFLE(3, 3, 2, 2)); \
ab_lo = _mm_or_si128(ab_lo, _mm_slli_epi32(ab_lo, 16)); \
ab_hi = _mm_or_si128(ab_hi, _mm_slli_epi32(ab_hi, 16)); \
invab_lo = _mm_or_si128(invab_lo, _mm_slli_epi32(invab_lo, 16)); \
invab_hi = _mm_or_si128(invab_hi, _mm_slli_epi32(invab_hi, 16)); \
ainvb_lo = _mm_or_si128(ainvb_lo, _mm_slli_epi32(ainvb_lo, 16)); \
ainvb_hi = _mm_or_si128(ainvb_hi, _mm_slli_epi32(ainvb_hi, 16)); \
invainvb_lo = _mm_or_si128(invainvb_lo, _mm_slli_epi32(invainvb_lo, 16)); \
invainvb_hi = _mm_or_si128(invainvb_hi, _mm_slli_epi32(invainvb_hi, 16)); \
__m128i p00 = _mm_i32gather_epi32((const int *)baseptr, _mm_add_epi32(y0, coloffsets0), 4); \
__m128i p01 = _mm_i32gather_epi32((const int *)baseptr, _mm_add_epi32(y1, coloffsets0), 4); \
__m128i p10 = _mm_i32gather_epi32((const int *)baseptr, _mm_add_epi32(y0, coloffsets1), 4); \
__m128i p11 = _mm_i32gather_epi32((const int *)baseptr, _mm_add_epi32(y1, coloffsets1), 4); \
__m128i p00_lo = _mm_mullo_epi16(_mm_unpacklo_epi8(p00, _mm_setzero_si128()), ab_lo); \
__m128i p01_lo = _mm_mullo_epi16(_mm_unpacklo_epi8(p01, _mm_setzero_si128()), invab_lo); \
__m128i p10_lo = _mm_mullo_epi16(_mm_unpacklo_epi8(p10, _mm_setzero_si128()), ainvb_lo); \
__m128i p11_lo = _mm_mullo_epi16(_mm_unpacklo_epi8(p11, _mm_setzero_si128()), invainvb_lo); \
__m128i p00_hi = _mm_mullo_epi16(_mm_unpackhi_epi8(p00, _mm_setzero_si128()), ab_hi); \
__m128i p01_hi = _mm_mullo_epi16(_mm_unpackhi_epi8(p01, _mm_setzero_si128()), invab_hi); \
__m128i p10_hi = _mm_mullo_epi16(_mm_unpackhi_epi8(p10, _mm_setzero_si128()), ainvb_hi); \
__m128i p11_hi = _mm_mullo_epi16(_mm_unpackhi_epi8(p11, _mm_setzero_si128()), invainvb_hi); \
__m128i fg_lo = _mm_srli_epi16(_mm_adds_epu16(_mm_adds_epu16(_mm_adds_epu16(p00_lo, p01_lo), _mm_adds_epu16(p10_lo, p11_lo)), m127), 8); \
__m128i fg_hi = _mm_srli_epi16(_mm_adds_epu16(_mm_adds_epu16(_mm_adds_epu16(p00_hi, p01_hi), _mm_adds_epu16(p10_hi, p11_hi)), m127), 8); \
fg = _mm_packus_epi16(fg_lo, fg_hi); \
}
#define VEC_SAMPLE_BILINEAR4_COLUMN(fg, col0, col1, texturefracx, texturefracy, one, height) { \
__m128i m127 = _mm_set1_epi16(127); \
fg = _mm_setzero_si128(); \
for (int i = 0; i < 4; i++) \
{ \
uint32_t frac_y0 = (texturefracy[i] >> FRACBITS) * height[i]; \
uint32_t frac_y1 = ((texturefracy[i] + one[i]) >> FRACBITS) * height[i]; \
uint32_t y0 = (frac_y0 >> FRACBITS); \
uint32_t y1 = (frac_y1 >> FRACBITS); \
\
uint32_t inv_b = texturefracx[i]; \
uint32_t inv_a = (frac_y1 >> (FRACBITS - 4)) & 15; \
\
__m128i ab_invab = _mm_load_si128(SampleBgra::samplertable + inv_b * 32 + inv_a * 2); \
__m128i ainvb_invainvb = _mm_load_si128(SampleBgra::samplertable + inv_b * 32 + inv_a * 2 + 1); \
\
__m128i gather = _mm_set_epi32(col1[i][y1], col1[i][y0], col0[i][y1], col0[i][y0]); \
__m128i p0 = _mm_unpacklo_epi8(gather, _mm_setzero_si128()); \
__m128i p1 = _mm_unpackhi_epi8(gather, _mm_setzero_si128()); \
\
__m128i tmp = _mm_adds_epu16(_mm_mullo_epi16(p0, ab_invab), _mm_mullo_epi16(p1, ainvb_invainvb)); \
__m128i color = _mm_srli_epi16(_mm_adds_epu16(_mm_adds_epu16(_mm_srli_si128(tmp, 8), tmp), m127), 8); \
\
fg = _mm_or_si128(_mm_srli_si128(fg, 4), _mm_slli_si128(_mm_packus_epi16(color, _mm_setzero_si128()), 12)); \
} \
}
#define VEC_SAMPLE_MIP_NEAREST4_COLUMN(fg, col0, col1, mipfrac, texturefracy, height0, height1) { \
uint32_t y0[4], y1[4]; \
for (int i = 0; i < 4; i++) \
{ \
y0[i] = (texturefracy[i] >> FRACBITS) * height0[i]; \
y1[i] = (texturefracy[i] >> FRACBITS) * height1[i]; \
} \
__m128i p0 = _mm_set_epi32(col0[y0[3]], col0[y0[2]], col0[y0[1]], col0[y0[0]]); \
__m128i p1 = _mm_set_epi32(col1[y1[3]], col1[y1[2]], col1[y1[1]], col1[y1[0]]); \
__m128i t = _mm_loadu_si128((const __m128i*)mipfrac); \
__m128i inv_t = _mm_sub_epi32(_mm_set1_epi32(256), mipfrac); \
__m128i p0_lo = _mm_unpacklo_epi8(p0, _mm_setzero_si128()); \
__m128i p0_hi = _mm_unpackhi_epi8(p0, _mm_setzero_si128()); \
__m128i p1_lo = _mm_unpacklo_epi8(p1, _mm_setzero_si128()); \
__m128i p1_hi = _mm_unpackhi_epi8(p1, _mm_setzero_si128()); \
__m128i fg_lo = _mm_srli_epi16(_mm_adds_epu16(_mm_mullo_epi16(p0_lo, t), _mm_mullo_epi16(p1_lo, inv_t)), 8); \
__m128i fg_hi = _mm_srli_epi16(_mm_adds_epu16(_mm_mullo_epi16(p0_hi, t), _mm_mullo_epi16(p1_hi, inv_t)), 8); \
fg = _mm_packus_epi16(fg_lo, fg_hi); \
}
#define VEC_SAMPLE_BILINEAR4_SPAN(fg, texture, xfrac, yfrac, xstep, ystep, xbits, ybits) { \
int xshift = (32 - xbits); \
int yshift = (32 - ybits); \
int xmask = (1 << xshift) - 1; \
int ymask = (1 << yshift) - 1; \
\
__m128i m127 = _mm_set1_epi16(127); \
fg = _mm_setzero_si128(); \
for (int i = 0; i < 4; i++) \
{ \
uint32_t x = xfrac >> xbits; \
uint32_t y = yfrac >> ybits; \
\
uint32_t p00 = texture[(y & ymask) + ((x & xmask) << yshift)]; \
uint32_t p01 = texture[((y + 1) & ymask) + ((x & xmask) << yshift)]; \
uint32_t p10 = texture[(y & ymask) + (((x + 1) & xmask) << yshift)]; \
uint32_t p11 = texture[((y + 1) & ymask) + (((x + 1) & xmask) << yshift)]; \
\
uint32_t inv_b = (xfrac >> (xbits - 4)) & 15; \
uint32_t inv_a = (yfrac >> (ybits - 4)) & 15; \
\
__m128i ab_invab = _mm_load_si128(SampleBgra::samplertable + inv_b * 32 + inv_a * 2); \
__m128i ainvb_invainvb = _mm_load_si128(SampleBgra::samplertable + inv_b * 32 + inv_a * 2 + 1); \
\
__m128i p0 = _mm_unpacklo_epi8(_mm_set_epi32(0, 0, p01, p00), _mm_setzero_si128()); \
__m128i p1 = _mm_unpacklo_epi8(_mm_set_epi32(0, 0, p11, p10), _mm_setzero_si128()); \
\
__m128i tmp = _mm_adds_epu16(_mm_mullo_epi16(p0, ab_invab), _mm_mullo_epi16(p1, ainvb_invainvb)); \
__m128i color = _mm_srli_epi16(_mm_adds_epu16(_mm_adds_epu16(_mm_srli_si128(tmp, 8), tmp), m127), 8); \
\
fg = _mm_or_si128(_mm_srli_si128(fg, 4), _mm_slli_si128(_mm_packus_epi16(color, _mm_setzero_si128()), 12)); \
\
xfrac += xstep; \
yfrac += ystep; \
} \
}
// Calculate constants for a simple shade with gamma correction
#define AVX_LINEAR_SHADE_SIMPLE_INIT(light) \
__m256 mlight_hi = _mm256_set_ps(1.0f, light * (1.0f/256.0f), light * (1.0f/256.0f), light * (1.0f/256.0f), 1.0f, light * (1.0f/256.0f), light * (1.0f/256.0f), light * (1.0f/256.0f)); \
mlight_hi = _mm256_mul_ps(mlight_hi, mlight_hi); \
__m256 mlight_lo = mlight_hi; \
__m256 mrcp_255 = _mm256_set1_ps(1.0f/255.0f); \
__m256 m255 = _mm256_set1_ps(255.0f);
// Calculate constants for a simple shade with different light levels for each pixel and gamma correction
#define AVX_LINEAR_SHADE_SIMPLE_INIT4(light3, light2, light1, light0) \
__m256 mlight_hi = _mm256_set_ps(1.0f, light1 * (1.0f/256.0f), light1 * (1.0f/256.0f), light1 * (1.0f/256.0f), 1.0f, light0 * (1.0f/256.0f), light0 * (1.0f/256.0f), light0 * (1.0f/256.0f)); \
__m256 mlight_lo = _mm256_set_ps(1.0f, light3 * (1.0f/256.0f), light3 * (1.0f/256.0f), light3 * (1.0f/256.0f), 1.0f, light2 * (1.0f/256.0f), light2 * (1.0f/256.0f), light2 * (1.0f/256.0f)); \
mlight_hi = _mm256_mul_ps(mlight_hi, mlight_hi); \
mlight_lo = _mm256_mul_ps(mlight_lo, mlight_lo); \
__m256 mrcp_255 = _mm256_set1_ps(1.0f/255.0f); \
__m256 m255 = _mm256_set1_ps(255.0f);
// Simple shade 4 pixels with gamma correction
#define AVX_LINEAR_SHADE_SIMPLE(fg) { \
__m256i fg_16 = _mm256_set_m128i(_mm_unpackhi_epi8(fg, _mm_setzero_si128()), _mm_unpacklo_epi8(fg, _mm_setzero_si128())); \
__m256 fg_hi = _mm256_cvtepi32_ps(_mm256_unpackhi_epi16(fg_16, _mm256_setzero_si256())); \
__m256 fg_lo = _mm256_cvtepi32_ps(_mm256_unpacklo_epi16(fg_16, _mm256_setzero_si256())); \
fg_hi = _mm256_mul_ps(fg_hi, mrcp_255); \
fg_hi = _mm256_mul_ps(fg_hi, fg_hi); \
fg_hi = _mm256_mul_ps(fg_hi, mlight_hi); \
fg_hi = _mm256_sqrt_ps(fg_hi); \
fg_hi = _mm256_mul_ps(fg_hi, m255); \
fg_lo = _mm256_mul_ps(fg_lo, mrcp_255); \
fg_lo = _mm256_mul_ps(fg_lo, fg_lo); \
fg_lo = _mm256_mul_ps(fg_lo, mlight_lo); \
fg_lo = _mm256_sqrt_ps(fg_lo); \
fg_lo = _mm256_mul_ps(fg_lo, m255); \
fg_16 = _mm256_packus_epi32(_mm256_cvtps_epi32(fg_lo), _mm256_cvtps_epi32(fg_hi)); \
fg = _mm_packus_epi16(_mm256_extractf128_si256(fg_16, 0), _mm256_extractf128_si256(fg_16, 1)); \
}
// Calculate constants for a complex shade with gamma correction
#define AVX_LINEAR_SHADE_INIT(light, shade_constants) \
__m256 mlight_hi = _mm256_set_ps(1.0f, light * (1.0f/256.0f), light * (1.0f/256.0f), light * (1.0f/256.0f), 1.0f, light * (1.0f/256.0f), light * (1.0f/256.0f), light * (1.0f/256.0f)); \
mlight_hi = _mm256_mul_ps(mlight_hi, mlight_hi); \
__m256 mlight_lo = mlight_hi; \
__m256 mrcp_255 = _mm256_set1_ps(1.0f/255.0f); \
__m256 m255 = _mm256_set1_ps(255.0f); \
__m256 color = _mm256_set_ps( \
1.0f, shade_constants.light_red * (1.0f/256.0f), shade_constants.light_green * (1.0f/256.0f), shade_constants.light_blue * (1.0f/256.0f), \
1.0f, shade_constants.light_red * (1.0f/256.0f), shade_constants.light_green * (1.0f/256.0f), shade_constants.light_blue * (1.0f/256.0f)); \
__m256 fade = _mm256_set_ps( \
0.0f, shade_constants.fade_red * (1.0f/256.0f), shade_constants.fade_green * (1.0f/256.0f), shade_constants.fade_blue * (1.0f/256.0f), \
0.0f, shade_constants.fade_red * (1.0f/256.0f), shade_constants.fade_green * (1.0f/256.0f), shade_constants.fade_blue * (1.0f/256.0f)); \
__m256 fade_amount_hi = _mm256_mul_ps(fade, _mm256_sub_ps(_mm256_set1_ps(1.0f), mlight_hi)); \
__m256 fade_amount_lo = _mm256_mul_ps(fade, _mm256_sub_ps(_mm256_set1_ps(1.0f), mlight_lo)); \
__m256 inv_desaturate = _mm256_set1_ps((256 - shade_constants.desaturate) * (1.0f/256.0f)); \
__m128 ss_desaturate = _mm_set_ss(shade_constants.desaturate * (1.0f/256.0f)); \
__m128 intensity_weight = _mm_set_ps(0.0f, 77.0f/256.0f, 143.0f/256.0f, 37.0f/256.0f);
// Calculate constants for a complex shade with different light levels for each pixel and gamma correction
#define AVX_LINEAR_SHADE_INIT4(light3, light2, light1, light0, shade_constants) \
__m256 mlight_hi = _mm256_set_ps(1.0f, light1 * (1.0f/256.0f), light1 * (1.0f/256.0f), light1 * (1.0f/256.0f), 1.0f, light0 * (1.0f/256.0f), light0 * (1.0f/256.0f), light0 * (1.0f/256.0f)); \
__m256 mlight_lo = _mm256_set_ps(1.0f, light3 * (1.0f/256.0f), light3 * (1.0f/256.0f), light3 * (1.0f/256.0f), 1.0f, light2 * (1.0f/256.0f), light2 * (1.0f/256.0f), light2 * (1.0f/256.0f)); \
mlight_hi = _mm256_mul_ps(mlight_hi, mlight_hi); \
mlight_lo = _mm256_mul_ps(mlight_lo, mlight_lo); \
__m256 mrcp_255 = _mm256_set1_ps(1.0f/255.0f); \
__m256 m255 = _mm256_set1_ps(255.0f); \
__m256 color = _mm256_set_ps( \
1.0f, shade_constants.light_red * (1.0f/256.0f), shade_constants.light_green * (1.0f/256.0f), shade_constants.light_blue * (1.0f/256.0f), \
1.0f, shade_constants.light_red * (1.0f/256.0f), shade_constants.light_green * (1.0f/256.0f), shade_constants.light_blue * (1.0f/256.0f)); \
__m256 fade = _mm256_set_ps( \
0.0f, shade_constants.fade_red * (1.0f/256.0f), shade_constants.fade_green * (1.0f/256.0f), shade_constants.fade_blue * (1.0f/256.0f), \
0.0f, shade_constants.fade_red * (1.0f/256.0f), shade_constants.fade_green * (1.0f/256.0f), shade_constants.fade_blue * (1.0f/256.0f)); \
__m256 fade_amount_hi = _mm256_mul_ps(fade, _mm256_sub_ps(_mm256_set1_ps(1.0f), mlight_hi)); \
__m256 fade_amount_lo = _mm256_mul_ps(fade, _mm256_sub_ps(_mm256_set1_ps(1.0f), mlight_lo)); \
__m256 inv_desaturate = _mm256_set1_ps((256 - shade_constants.desaturate) * (1.0f/256.0f)); \
__m128 ss_desaturate = _mm_set_ss(shade_constants.desaturate * (1.0f/256.0f)); \
__m128 intensity_weight = _mm_set_ps(0.0f, 77.0f/256.0f, 143.0f/256.0f, 37.0f/256.0f);
// Complex shade 4 pixels with gamma correction
#define AVX_LINEAR_SHADE(fg, shade_constants) { \
__m256i fg_16 = _mm256_set_m128i(_mm_unpackhi_epi8(fg, _mm_setzero_si128()), _mm_unpacklo_epi8(fg, _mm_setzero_si128())); \
__m256 fg_hi = _mm256_cvtepi32_ps(_mm256_unpackhi_epi16(fg_16, _mm256_setzero_si256())); \
__m256 fg_lo = _mm256_cvtepi32_ps(_mm256_unpacklo_epi16(fg_16, _mm256_setzero_si256())); \
fg_hi = _mm256_mul_ps(fg_hi, mrcp_255); \
fg_hi = _mm256_mul_ps(fg_hi, fg_hi); \
fg_lo = _mm256_mul_ps(fg_lo, mrcp_255); \
fg_lo = _mm256_mul_ps(fg_lo, fg_lo); \
\
__m128 intensity_hi0 = _mm_mul_ps(_mm256_extractf128_ps(fg_hi, 0), intensity_weight); \
__m128 intensity_hi1 = _mm_mul_ps(_mm256_extractf128_ps(fg_hi, 1), intensity_weight); \
intensity_hi0 = _mm_mul_ss(_mm_add_ss(_mm_add_ss(intensity_hi0, _mm_shuffle_ps(intensity_hi0, intensity_hi0, _MM_SHUFFLE(1,1,1,1))), _mm_shuffle_ps(intensity_hi0, intensity_hi0, _MM_SHUFFLE(2,2,2,2))), ss_desaturate); \
intensity_hi0 = _mm_shuffle_ps(intensity_hi0, intensity_hi0, _MM_SHUFFLE(0,0,0,0)); \
intensity_hi1 = _mm_mul_ss(_mm_add_ss(_mm_add_ss(intensity_hi1, _mm_shuffle_ps(intensity_hi1, intensity_hi1, _MM_SHUFFLE(1,1,1,1))), _mm_shuffle_ps(intensity_hi1, intensity_hi1, _MM_SHUFFLE(2,2,2,2))), ss_desaturate); \
intensity_hi1 = _mm_shuffle_ps(intensity_hi1, intensity_hi1, _MM_SHUFFLE(0,0,0,0)); \
__m256 intensity_hi = _mm256_set_m128(intensity_hi1, intensity_hi0); \
\
fg_hi = _mm256_add_ps(_mm256_mul_ps(fg_hi, inv_desaturate), intensity_hi); \
fg_hi = _mm256_add_ps(_mm256_mul_ps(fg_hi, mlight_hi), fade_amount_hi); \
fg_hi = _mm256_mul_ps(fg_hi, color); \
\
__m128 intensity_lo0 = _mm_mul_ps(_mm256_extractf128_ps(fg_lo, 0), intensity_weight); \
__m128 intensity_lo1 = _mm_mul_ps(_mm256_extractf128_ps(fg_lo, 1), intensity_weight); \
intensity_lo0 = _mm_mul_ss(_mm_add_ss(_mm_add_ss(intensity_lo0, _mm_shuffle_ps(intensity_lo0, intensity_lo0, _MM_SHUFFLE(1,1,1,1))), _mm_shuffle_ps(intensity_lo0, intensity_lo0, _MM_SHUFFLE(2,2,2,2))), ss_desaturate); \
intensity_lo0 = _mm_shuffle_ps(intensity_lo0, intensity_lo0, _MM_SHUFFLE(0,0,0,0)); \
intensity_lo1 = _mm_mul_ss(_mm_add_ss(_mm_add_ss(intensity_lo1, _mm_shuffle_ps(intensity_lo1, intensity_lo1, _MM_SHUFFLE(1,1,1,1))), _mm_shuffle_ps(intensity_lo1, intensity_lo1, _MM_SHUFFLE(2,2,2,2))), ss_desaturate); \
intensity_lo1 = _mm_shuffle_ps(intensity_lo1, intensity_lo1, _MM_SHUFFLE(0,0,0,0)); \
__m256 intensity_lo = _mm256_set_m128(intensity_lo1, intensity_lo0); \
\
fg_lo = _mm256_add_ps(_mm256_mul_ps(fg_lo, inv_desaturate), intensity_lo); \
fg_lo = _mm256_add_ps(_mm256_mul_ps(fg_lo, mlight_lo), fade_amount_lo); \
fg_lo = _mm256_mul_ps(fg_lo, color); \
\
fg_hi = _mm256_sqrt_ps(fg_hi); \
fg_hi = _mm256_mul_ps(fg_hi, m255); \
fg_lo = _mm256_sqrt_ps(fg_lo); \
fg_lo = _mm256_mul_ps(fg_lo, m255); \
fg_16 = _mm256_packus_epi32(_mm256_cvtps_epi32(fg_lo), _mm256_cvtps_epi32(fg_hi)); \
fg = _mm_packus_epi16(_mm256_extractf128_si256(fg_16, 0), _mm256_extractf128_si256(fg_16, 1)); \
}
/*
// Complex shade 8 pixels
#define AVX_SHADE(fg, shade_constants) { \
__m256i fg_hi = _mm256_unpackhi_epi8(fg, _mm256_setzero_si256()); \
__m256i fg_lo = _mm256_unpacklo_epi8(fg, _mm256_setzero_si256()); \
\
__m256i intensity_hi = _mm256_mullo_epi16(fg_hi, _mm256_set_epi16(0, 77, 143, 37, 0, 77, 143, 37, 0, 77, 143, 37, 0, 77, 143, 37)); \
__m256i intensity_lo = _mm256_mullo_epi16(fg_lo, _mm256_set_epi16(0, 77, 143, 37, 0, 77, 143, 37, 0, 77, 143, 37, 0, 77, 143, 37)); \
__m256i intensity = _mm256_mullo_epi16(_mm256_srli_epi16(_mm256_hadd_epi16(_mm256_hadd_epi16(intensity_lo, intensity_hi), _mm256_setzero_si256()), 8), desaturate); \
intensity = _mm256_unpacklo_epi16(intensity, intensity); \
intensity_hi = _mm256_unpackhi_epi32(intensity, intensity); \
intensity_lo = _mm256_unpacklo_epi32(intensity, intensity); \
\
fg_hi = _mm256_srli_epi16(_mm256_adds_epu16(_mm256_mullo_epi16(fg_hi, inv_desaturate), intensity_hi), 8); \
fg_hi = _mm256_srli_epi16(_mm256_adds_epu16(_mm256_mullo_epi16(fg_hi, mlight), fade_amount), 8); \
fg_hi = _mm256_srli_epi16(_mm256_mullo_epi16(fg_hi, color), 8); \
\
fg_lo = _mm256_srli_epi16(_mm256_adds_epu16(_mm256_mullo_epi16(fg_lo, inv_desaturate), intensity_lo), 8); \
fg_lo = _mm256_srli_epi16(_mm256_adds_epu16(_mm256_mullo_epi16(fg_lo, mlight), fade_amount), 8); \
fg_lo = _mm256_srli_epi16(_mm256_mullo_epi16(fg_lo, color), 8); \
\
fg = _mm256_packus_epi16(fg_lo, fg_hi); \
}
*/
// Normal premultiplied alpha blend using the alpha from fg
#define VEC_ALPHA_BLEND(fg,bg) { \
__m128i fg_hi = _mm_unpackhi_epi8(fg, _mm_setzero_si128()); \
__m128i fg_lo = _mm_unpacklo_epi8(fg, _mm_setzero_si128()); \
__m128i bg_hi = _mm_unpackhi_epi8(bg, _mm_setzero_si128()); \
__m128i bg_lo = _mm_unpacklo_epi8(bg, _mm_setzero_si128()); \
__m128i m256 = _mm_set1_epi16(256); \
__m128i alpha_hi = _mm_shufflehi_epi16(_mm_shufflelo_epi16(fg_hi, _MM_SHUFFLE(3,3,3,3)), _MM_SHUFFLE(3,3,3,3)); \
__m128i alpha_lo = _mm_shufflehi_epi16(_mm_shufflelo_epi16(fg_lo, _MM_SHUFFLE(3,3,3,3)), _MM_SHUFFLE(3,3,3,3)); \
alpha_hi = _mm_add_epi16(alpha_hi, _mm_srli_epi16(alpha_hi, 7)); \
alpha_lo = _mm_add_epi16(alpha_lo, _mm_srli_epi16(alpha_lo, 7)); \
__m128i inv_alpha_hi = _mm_sub_epi16(m256, alpha_hi); \
__m128i inv_alpha_lo = _mm_sub_epi16(m256, alpha_lo); \
fg_hi = _mm_mullo_epi16(fg_hi, alpha_hi); \
fg_hi = _mm_srli_epi16(fg_hi, 8); \
fg_lo = _mm_mullo_epi16(fg_lo, alpha_lo); \
fg_lo = _mm_srli_epi16(fg_lo, 8); \
fg = _mm_packus_epi16(fg_lo, fg_hi); \
bg_hi = _mm_mullo_epi16(bg_hi, inv_alpha_hi); \
bg_hi = _mm_srli_epi16(bg_hi, 8); \
bg_lo = _mm_mullo_epi16(bg_lo, inv_alpha_lo); \
bg_lo = _mm_srli_epi16(bg_lo, 8); \
bg = _mm_packus_epi16(bg_lo, bg_hi); \
fg = _mm_adds_epu8(fg, bg); \
}
// Calculates the final alpha values to be used when combined with the source texture alpha channel
FORCEINLINE uint32_t calc_blend_bgalpha(uint32_t fg, uint32_t dest_alpha)
{
uint32_t alpha = fg >> 24;
alpha += alpha >> 7;
uint32_t inv_alpha = 256 - alpha;
return (dest_alpha * alpha + 256 * inv_alpha + 128) >> 8;
}
#define VEC_CALC_BLEND_ALPHA_VARS() __m128i msrc_alpha, mdest_alpha, m256, m255, m128;
#define VEC_CALC_BLEND_ALPHA_INIT(src_alpha, dest_alpha) \
msrc_alpha = _mm_set1_epi16(src_alpha); \
mdest_alpha = _mm_set1_epi16(dest_alpha * 255 / 256); \
m256 = _mm_set1_epi16(256); \
m255 = _mm_set1_epi16(255); \
m128 = _mm_set1_epi16(128);
// Calculates the final alpha values to be used when combined with the source texture alpha channel
#define VEC_CALC_BLEND_ALPHA(fg) \
__m128i fg_alpha_hi, fg_alpha_lo, bg_alpha_hi, bg_alpha_lo; { \
__m128i alpha_hi = _mm_shufflehi_epi16(_mm_shufflelo_epi16(_mm_unpackhi_epi8(fg, _mm_setzero_si128()), _MM_SHUFFLE(3, 3, 3, 3)), _MM_SHUFFLE(3, 3, 3, 3)); \
__m128i alpha_lo = _mm_shufflehi_epi16(_mm_shufflelo_epi16(_mm_unpacklo_epi8(fg, _mm_setzero_si128()), _MM_SHUFFLE(3, 3, 3, 3)), _MM_SHUFFLE(3, 3, 3, 3)); \
alpha_hi = _mm_add_epi16(alpha_hi, _mm_srli_epi16(alpha_hi, 7)); \
alpha_lo = _mm_add_epi16(alpha_lo, _mm_srli_epi16(alpha_lo, 7)); \
bg_alpha_hi = _mm_srli_epi16(_mm_adds_epu16(_mm_adds_epu16(_mm_mullo_epi16(mdest_alpha, alpha_hi), _mm_mullo_epi16(m255, _mm_sub_epi16(m256, alpha_hi))), m128), 8); \
bg_alpha_hi = _mm_add_epi16(bg_alpha_hi, _mm_srli_epi16(bg_alpha_hi, 7)); \
bg_alpha_lo = _mm_srli_epi16(_mm_adds_epu16(_mm_adds_epu16(_mm_mullo_epi16(mdest_alpha, alpha_lo), _mm_mullo_epi16(m255, _mm_sub_epi16(m256, alpha_lo))), m128), 8); \
bg_alpha_lo = _mm_add_epi16(bg_alpha_lo, _mm_srli_epi16(bg_alpha_lo, 7)); \
fg_alpha_hi = msrc_alpha; \
fg_alpha_lo = msrc_alpha; \
}
#define SSE_SHADE_VARS() __m128i mlight_hi, mlight_lo, color, fade, fade_amount_hi, fade_amount_lo, inv_desaturate;
// Calculate constants for a simple shade
#define SSE_SHADE_SIMPLE_INIT(light) \
mlight_hi = _mm_set_epi16(256, light, light, light, 256, light, light, light); \
mlight_lo = mlight_hi;
// Calculate constants for a simple shade with different light levels for each pixel
#define SSE_SHADE_SIMPLE_INIT4(light3, light2, light1, light0) \
mlight_hi = _mm_set_epi16(256, light1, light1, light1, 256, light0, light0, light0); \
mlight_lo = _mm_set_epi16(256, light3, light3, light3, 256, light2, light2, light2);
// Simple shade 4 pixels
#define SSE_SHADE_SIMPLE(fg) { \
__m128i fg_hi = _mm_unpackhi_epi8(fg, _mm_setzero_si128()); \
__m128i fg_lo = _mm_unpacklo_epi8(fg, _mm_setzero_si128()); \
fg_hi = _mm_mullo_epi16(fg_hi, mlight_hi); \
fg_hi = _mm_srli_epi16(fg_hi, 8); \
fg_lo = _mm_mullo_epi16(fg_lo, mlight_lo); \
fg_lo = _mm_srli_epi16(fg_lo, 8); \
fg = _mm_packus_epi16(fg_lo, fg_hi); \
}
// Calculate constants for a complex shade
#define SSE_SHADE_INIT(light, shade_constants) \
mlight_hi = _mm_set_epi16(256, light, light, light, 256, light, light, light); \
mlight_lo = mlight_hi; \
color = _mm_set_epi16( \
256, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue, \
256, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue); \
fade = _mm_set_epi16( \
0, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue, \
0, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue); \
fade_amount_hi = _mm_mullo_epi16(fade, _mm_subs_epu16(_mm_set1_epi16(256), mlight_hi)); \
fade_amount_lo = fade_amount_hi; \
inv_desaturate = _mm_set1_epi16(256 - shade_constants.desaturate); \
// Calculate constants for a complex shade with different light levels for each pixel
#define SSE_SHADE_INIT4(light3, light2, light1, light0, shade_constants) \
mlight_hi = _mm_set_epi16(256, light1, light1, light1, 256, light0, light0, light0); \
mlight_lo = _mm_set_epi16(256, light3, light3, light3, 256, light2, light2, light2); \
color = _mm_set_epi16( \
256, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue, \
256, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue); \
fade = _mm_set_epi16( \
0, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue, \
0, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue); \
fade_amount_hi = _mm_mullo_epi16(fade, _mm_subs_epu16(_mm_set1_epi16(256), mlight_hi)); \
fade_amount_lo = _mm_mullo_epi16(fade, _mm_subs_epu16(_mm_set1_epi16(256), mlight_lo)); \
inv_desaturate = _mm_set1_epi16(256 - shade_constants.desaturate); \
// Complex shade 4 pixels
#define SSE_SHADE(fg, shade_constants) { \
__m128i fg_hi = _mm_unpackhi_epi8(fg, _mm_setzero_si128()); \
__m128i fg_lo = _mm_unpacklo_epi8(fg, _mm_setzero_si128()); \
\
__m128i intensity_hi = _mm_mullo_epi16(fg_hi, _mm_set_epi16(0, 77, 143, 37, 0, 77, 143, 37)); \
uint16_t intensity_hi0 = ((_mm_extract_epi16(intensity_hi, 2) + _mm_extract_epi16(intensity_hi, 1) + _mm_extract_epi16(intensity_hi, 0)) >> 8) * shade_constants.desaturate; \
uint16_t intensity_hi1 = ((_mm_extract_epi16(intensity_hi, 6) + _mm_extract_epi16(intensity_hi, 5) + _mm_extract_epi16(intensity_hi, 4)) >> 8) * shade_constants.desaturate; \
intensity_hi = _mm_set_epi16(intensity_hi1, intensity_hi1, intensity_hi1, intensity_hi1, intensity_hi0, intensity_hi0, intensity_hi0, intensity_hi0); \
\
fg_hi = _mm_srli_epi16(_mm_adds_epu16(_mm_mullo_epi16(fg_hi, inv_desaturate), intensity_hi), 8); \
fg_hi = _mm_srli_epi16(_mm_adds_epu16(_mm_mullo_epi16(fg_hi, mlight_hi), fade_amount_hi), 8); \
fg_hi = _mm_srli_epi16(_mm_mullo_epi16(fg_hi, color), 8); \
\
__m128i intensity_lo = _mm_mullo_epi16(fg_lo, _mm_set_epi16(0, 77, 143, 37, 0, 77, 143, 37)); \
uint16_t intensity_lo0 = ((_mm_extract_epi16(intensity_lo, 2) + _mm_extract_epi16(intensity_lo, 1) + _mm_extract_epi16(intensity_lo, 0)) >> 8) * shade_constants.desaturate; \
uint16_t intensity_lo1 = ((_mm_extract_epi16(intensity_lo, 6) + _mm_extract_epi16(intensity_lo, 5) + _mm_extract_epi16(intensity_lo, 4)) >> 8) * shade_constants.desaturate; \
intensity_lo = _mm_set_epi16(intensity_lo1, intensity_lo1, intensity_lo1, intensity_lo1, intensity_lo0, intensity_lo0, intensity_lo0, intensity_lo0); \
\
fg_lo = _mm_srli_epi16(_mm_adds_epu16(_mm_mullo_epi16(fg_lo, inv_desaturate), intensity_lo), 8); \
fg_lo = _mm_srli_epi16(_mm_adds_epu16(_mm_mullo_epi16(fg_lo, mlight_lo), fade_amount_lo), 8); \
fg_lo = _mm_srli_epi16(_mm_mullo_epi16(fg_lo, color), 8); \
\
fg = _mm_packus_epi16(fg_lo, fg_hi); \
}
#endif

367
src/r_draw_rgba_sse.h Normal file
View file

@ -0,0 +1,367 @@
//
// SSE/AVX intrinsics based drawers for the r_draw family of drawers.
//
// Note: This header file is intentionally not guarded by a __R_DRAW_RGBA_SSE__ define.
// It is because the code is nearly identical for SSE vs AVX. The file is included
// multiple times by r_draw_rgba.cpp with different defines that changes the class
// names outputted and the type of intrinsics used.
#ifdef _MSC_VER
#pragma warning(disable: 4752) // warning C4752: found Intel(R) Advanced Vector Extensions; consider using /arch:AVX
#endif
class VecCommand(DrawSpanRGBA) : public DrawerCommand
{
const uint32_t * RESTRICT _source;
fixed_t _xfrac;
fixed_t _yfrac;
fixed_t _xstep;
fixed_t _ystep;
int _x1;
int _x2;
int _y;
int _xbits;
int _ybits;
BYTE * RESTRICT _destorg;
fixed_t _light;
ShadeConstants _shade_constants;
bool _nearest_filter;
public:
VecCommand(DrawSpanRGBA)()
{
_source = (const uint32_t*)ds_source;
_xfrac = ds_xfrac;
_yfrac = ds_yfrac;
_xstep = ds_xstep;
_ystep = ds_ystep;
_x1 = ds_x1;
_x2 = ds_x2;
_y = ds_y;
_xbits = ds_xbits;
_ybits = ds_ybits;
_destorg = dc_destorg;
_light = ds_light;
_shade_constants = ds_shade_constants;
_nearest_filter = !SampleBgra::span_sampler_setup(_source, _xbits, _ybits, _xstep, _ystep, ds_source_mipmapped);
}
void Execute(DrawerThread *thread) override
{
if (thread->line_skipped_by_thread(_y))
return;
dsfixed_t xfrac;
dsfixed_t yfrac;
dsfixed_t xstep;
dsfixed_t ystep;
uint32_t* dest;
const uint32_t* source = _source;
int count;
int spot;
xfrac = _xfrac;
yfrac = _yfrac;
dest = ylookup[_y] + _x1 + (uint32_t*)_destorg;
count = _x2 - _x1 + 1;
xstep = _xstep;
ystep = _ystep;
uint32_t light = LightBgra::calc_light_multiplier(_light);
ShadeConstants shade_constants = _shade_constants;
if (_nearest_filter)
{
if (_xbits == 6 && _ybits == 6)
{
// 64x64 is the most common case by far, so special case it.
int sse_count = count / 4;
count -= sse_count * 4;
if (shade_constants.simple_shade)
{
VEC_SHADE_VARS();
VEC_SHADE_SIMPLE_INIT(light);
while (sse_count--)
{
// Current texture index in u,v.
spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6));
uint32_t p0 = source[spot];
xfrac += xstep;
yfrac += ystep;
spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6));
uint32_t p1 = source[spot];
xfrac += xstep;
yfrac += ystep;
spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6));
uint32_t p2 = source[spot];
xfrac += xstep;
yfrac += ystep;
spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6));
uint32_t p3 = source[spot];
xfrac += xstep;
yfrac += ystep;
// Lookup pixel from flat texture tile,
// re-index using light/colormap.
__m128i fg = _mm_set_epi32(p3, p2, p1, p0);
VEC_SHADE_SIMPLE(fg);
_mm_storeu_si128((__m128i*)dest, fg);
// Next step in u,v.
dest += 4;
}
}
else
{
VEC_SHADE_VARS();
VEC_SHADE_INIT(light, shade_constants);
while (sse_count--)
{
// Current texture index in u,v.
spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6));
uint32_t p0 = source[spot];
xfrac += xstep;
yfrac += ystep;
spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6));
uint32_t p1 = source[spot];
xfrac += xstep;
yfrac += ystep;
spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6));
uint32_t p2 = source[spot];
xfrac += xstep;
yfrac += ystep;
spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6));
uint32_t p3 = source[spot];
xfrac += xstep;
yfrac += ystep;
// Lookup pixel from flat texture tile,
// re-index using light/colormap.
__m128i fg = _mm_set_epi32(p3, p2, p1, p0);
VEC_SHADE(fg, shade_constants);
_mm_storeu_si128((__m128i*)dest, fg);
// Next step in u,v.
dest += 4;
}
}
if (count == 0)
return;
do
{
// Current texture index in u,v.
spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6));
// Lookup pixel from flat texture tile
*dest++ = LightBgra::shade_bgra(source[spot], light, shade_constants);
// Next step in u,v.
xfrac += xstep;
yfrac += ystep;
} while (--count);
}
else
{
BYTE yshift = 32 - _ybits;
BYTE xshift = yshift - _xbits;
int xmask = ((1 << _xbits) - 1) << _ybits;
int sse_count = count / 4;
count -= sse_count * 4;
if (shade_constants.simple_shade)
{
VEC_SHADE_VARS();
VEC_SHADE_SIMPLE_INIT(light);
while (sse_count--)
{
spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift);
uint32_t p0 = source[spot];
xfrac += xstep;
yfrac += ystep;
spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift);
uint32_t p1 = source[spot];
xfrac += xstep;
yfrac += ystep;
spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift);
uint32_t p2 = source[spot];
xfrac += xstep;
yfrac += ystep;
spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift);
uint32_t p3 = source[spot];
xfrac += xstep;
yfrac += ystep;
// Lookup pixel from flat texture tile
__m128i fg = _mm_set_epi32(p3, p2, p1, p0);
VEC_SHADE_SIMPLE(fg);
_mm_storeu_si128((__m128i*)dest, fg);
dest += 4;
}
}
else
{
VEC_SHADE_VARS();
VEC_SHADE_INIT(light, shade_constants);
while (sse_count--)
{
spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift);
uint32_t p0 = source[spot];
xfrac += xstep;
yfrac += ystep;
spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift);
uint32_t p1 = source[spot];
xfrac += xstep;
yfrac += ystep;
spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift);
uint32_t p2 = source[spot];
xfrac += xstep;
yfrac += ystep;
spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift);
uint32_t p3 = source[spot];
xfrac += xstep;
yfrac += ystep;
// Lookup pixel from flat texture tile
__m128i fg = _mm_set_epi32(p3, p2, p1, p0);
VEC_SHADE(fg, shade_constants);
_mm_storeu_si128((__m128i*)dest, fg);
dest += 4;
}
}
if (count == 0)
return;
do
{
// Current texture index in u,v.
spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift);
// Lookup pixel from flat texture tile
*dest++ = LightBgra::shade_bgra(source[spot], light, shade_constants);
// Next step in u,v.
xfrac += xstep;
yfrac += ystep;
} while (--count);
}
}
else
{
if (_xbits == 6 && _ybits == 6)
{
// 64x64 is the most common case by far, so special case it.
int sse_count = count / 4;
count -= sse_count * 4;
if (shade_constants.simple_shade)
{
VEC_SHADE_VARS();
VEC_SHADE_SIMPLE_INIT(light);
while (sse_count--)
{
__m128i fg;
VEC_SAMPLE_BILINEAR4_SPAN(fg, source, xfrac, yfrac, xstep, ystep, 26, 26);
VEC_SHADE_SIMPLE(fg);
_mm_storeu_si128((__m128i*)dest, fg);
dest += 4;
}
}
else
{
VEC_SHADE_VARS();
VEC_SHADE_INIT(light, shade_constants);
while (sse_count--)
{
__m128i fg;
VEC_SAMPLE_BILINEAR4_SPAN(fg, source, xfrac, yfrac, xstep, ystep, 26, 26);
VEC_SHADE(fg, shade_constants);
_mm_storeu_si128((__m128i*)dest, fg);
dest += 4;
}
}
if (count == 0)
return;
do
{
*dest++ = LightBgra::shade_bgra(SampleBgra::sample_bilinear(source, xfrac, yfrac, 26, 26), light, shade_constants);
xfrac += xstep;
yfrac += ystep;
} while (--count);
}
else
{
int sse_count = count / 4;
count -= sse_count * 4;
if (shade_constants.simple_shade)
{
VEC_SHADE_VARS();
VEC_SHADE_SIMPLE_INIT(light);
while (sse_count--)
{
__m128i fg;
int tmpx = 32 - _xbits;
int tmpy = 32 - _ybits;
VEC_SAMPLE_BILINEAR4_SPAN(fg, source, xfrac, yfrac, xstep, ystep, tmpx, tmpy);
VEC_SHADE_SIMPLE(fg);
_mm_storeu_si128((__m128i*)dest, fg);
dest += 4;
}
}
else
{
VEC_SHADE_VARS();
VEC_SHADE_INIT(light, shade_constants);
while (sse_count--)
{
__m128i fg;
int tmpx = 32 - _xbits;
int tmpy = 32 - _ybits;
VEC_SAMPLE_BILINEAR4_SPAN(fg, source, xfrac, yfrac, xstep, ystep, tmpx, tmpy);
VEC_SHADE(fg, shade_constants);
_mm_storeu_si128((__m128i*)dest, fg);
dest += 4;
}
}
if (count == 0)
return;
do
{
*dest++ = LightBgra::shade_bgra(SampleBgra::sample_bilinear(source, xfrac, yfrac, 32 - _xbits, 32 - _ybits), light, shade_constants);
xfrac += xstep;
yfrac += ystep;
} while (--count);
}
}
}
};

View file

@ -313,21 +313,21 @@ void rt_Translate4cols(const BYTE *translation, int yl, int yh)
}
// Translates one span at hx to the screen at sx.
void rt_tlate1col (int hx, int sx, int yl, int yh)
void rt_tlate1col_c (int hx, int sx, int yl, int yh)
{
rt_Translate1col(dc_translation, hx, yl, yh);
rt_map1col(hx, sx, yl, yh);
}
// Translates all four spans to the screen starting at sx.
void rt_tlate4cols (int sx, int yl, int yh)
void rt_tlate4cols_c (int sx, int yl, int yh)
{
rt_Translate4cols(dc_translation, yl, yh);
rt_map4cols(sx, yl, yh);
}
// Adds one span at hx to the screen at sx without clamping.
void rt_add1col (int hx, int sx, int yl, int yh)
void rt_add1col_c (int hx, int sx, int yl, int yh)
{
BYTE *colormap;
BYTE *source;
@ -417,21 +417,21 @@ void rt_add4cols_c (int sx, int yl, int yh)
}
// Translates and adds one span at hx to the screen at sx without clamping.
void rt_tlateadd1col (int hx, int sx, int yl, int yh)
void rt_tlateadd1col_c (int hx, int sx, int yl, int yh)
{
rt_Translate1col(dc_translation, hx, yl, yh);
rt_add1col(hx, sx, yl, yh);
}
// Translates and adds all four spans to the screen starting at sx without clamping.
void rt_tlateadd4cols (int sx, int yl, int yh)
void rt_tlateadd4cols_c (int sx, int yl, int yh)
{
rt_Translate4cols(dc_translation, yl, yh);
rt_add4cols(sx, yl, yh);
}
// Shades one span at hx to the screen at sx.
void rt_shaded1col (int hx, int sx, int yl, int yh)
void rt_shaded1col_c (int hx, int sx, int yl, int yh)
{
DWORD *fgstart;
BYTE *colormap;
@ -507,7 +507,7 @@ void rt_shaded4cols_c (int sx, int yl, int yh)
}
// Adds one span at hx to the screen at sx with clamping.
void rt_addclamp1col (int hx, int sx, int yl, int yh)
void rt_addclamp1col_c (int hx, int sx, int yl, int yh)
{
BYTE *colormap;
BYTE *source;
@ -556,13 +556,14 @@ void rt_addclamp4cols_c (int sx, int yl, int yh)
return;
count++;
DWORD *fg2rgb = dc_srcblend;
DWORD *bg2rgb = dc_destblend;
dest = ylookup[yl] + sx + dc_destorg;
source = &dc_temp[yl*4];
pitch = dc_pitch;
colormap = dc_colormap;
DWORD *fg2rgb = dc_srcblend;
DWORD *bg2rgb = dc_destblend;
do {
DWORD a = fg2rgb[colormap[source[0]]] + bg2rgb[dest[0]];
DWORD b = a;
@ -607,21 +608,21 @@ void rt_addclamp4cols_c (int sx, int yl, int yh)
}
// Translates and adds one span at hx to the screen at sx with clamping.
void rt_tlateaddclamp1col (int hx, int sx, int yl, int yh)
void rt_tlateaddclamp1col_c (int hx, int sx, int yl, int yh)
{
rt_Translate1col(dc_translation, hx, yl, yh);
rt_addclamp1col(hx, sx, yl, yh);
}
// Translates and adds all four spans to the screen starting at sx with clamping.
void rt_tlateaddclamp4cols (int sx, int yl, int yh)
void rt_tlateaddclamp4cols_c (int sx, int yl, int yh)
{
rt_Translate4cols(dc_translation, yl, yh);
rt_addclamp4cols(sx, yl, yh);
}
// Subtracts one span at hx to the screen at sx with clamping.
void rt_subclamp1col (int hx, int sx, int yl, int yh)
void rt_subclamp1col_c (int hx, int sx, int yl, int yh)
{
BYTE *colormap;
BYTE *source;
@ -656,7 +657,7 @@ void rt_subclamp1col (int hx, int sx, int yl, int yh)
}
// Subtracts all four spans to the screen starting at sx with clamping.
void rt_subclamp4cols (int sx, int yl, int yh)
void rt_subclamp4cols_c (int sx, int yl, int yh)
{
BYTE *colormap;
BYTE *source;
@ -716,21 +717,21 @@ void rt_subclamp4cols (int sx, int yl, int yh)
}
// Translates and subtracts one span at hx to the screen at sx with clamping.
void rt_tlatesubclamp1col (int hx, int sx, int yl, int yh)
void rt_tlatesubclamp1col_c (int hx, int sx, int yl, int yh)
{
rt_Translate1col(dc_translation, hx, yl, yh);
rt_subclamp1col(hx, sx, yl, yh);
}
// Translates and subtracts all four spans to the screen starting at sx with clamping.
void rt_tlatesubclamp4cols (int sx, int yl, int yh)
void rt_tlatesubclamp4cols_c (int sx, int yl, int yh)
{
rt_Translate4cols(dc_translation, yl, yh);
rt_subclamp4cols(sx, yl, yh);
}
// Subtracts one span at hx from the screen at sx with clamping.
void rt_revsubclamp1col (int hx, int sx, int yl, int yh)
void rt_revsubclamp1col_c (int hx, int sx, int yl, int yh)
{
BYTE *colormap;
BYTE *source;
@ -765,7 +766,7 @@ void rt_revsubclamp1col (int hx, int sx, int yl, int yh)
}
// Subtracts all four spans from the screen starting at sx with clamping.
void rt_revsubclamp4cols (int sx, int yl, int yh)
void rt_revsubclamp4cols_c (int sx, int yl, int yh)
{
BYTE *colormap;
BYTE *source;
@ -825,14 +826,14 @@ void rt_revsubclamp4cols (int sx, int yl, int yh)
}
// Translates and subtracts one span at hx from the screen at sx with clamping.
void rt_tlaterevsubclamp1col (int hx, int sx, int yl, int yh)
void rt_tlaterevsubclamp1col_c (int hx, int sx, int yl, int yh)
{
rt_Translate1col(dc_translation, hx, yl, yh);
rt_revsubclamp1col(hx, sx, yl, yh);
}
// Translates and subtracts all four spans from the screen starting at sx with clamping.
void rt_tlaterevsubclamp4cols (int sx, int yl, int yh)
void rt_tlaterevsubclamp4cols_c (int sx, int yl, int yh)
{
rt_Translate4cols(dc_translation, yl, yh);
rt_revsubclamp4cols(sx, yl, yh);
@ -855,18 +856,21 @@ void rt_draw4cols (int sx)
}
#ifdef X86_ASM
// Setup assembly routines for changed colormaps or other parameters.
if (hcolfunc_post4 == rt_shaded4cols)
if (!r_swtruecolor)
{
R_SetupShadedCol();
}
else if (hcolfunc_post4 == rt_addclamp4cols || hcolfunc_post4 == rt_tlateaddclamp4cols)
{
R_SetupAddClampCol();
}
else if (hcolfunc_post4 == rt_add4cols || hcolfunc_post4 == rt_tlateadd4cols)
{
R_SetupAddCol();
// Setup assembly routines for changed colormaps or other parameters.
if (hcolfunc_post4 == rt_shaded4cols)
{
R_SetupShadedCol();
}
else if (hcolfunc_post4 == rt_addclamp4cols || hcolfunc_post4 == rt_tlateaddclamp4cols)
{
R_SetupAddClampCol();
}
else if (hcolfunc_post4 == rt_add4cols || hcolfunc_post4 == rt_tlateadd4cols)
{
R_SetupAddCol();
}
}
#endif
@ -1002,7 +1006,7 @@ void rt_draw4cols (int sx)
// Before each pass through a rendering loop that uses these routines,
// call this function to set up the span pointers.
void rt_initcols (BYTE *buff)
void rt_initcols_pal (BYTE *buff)
{
int y;
@ -1011,6 +1015,14 @@ void rt_initcols (BYTE *buff)
horizspan[y] = dc_ctspan[y] = &dc_tspans[y][0];
}
void rt_span_coverage_pal(int x, int start, int stop)
{
unsigned int **tspan = &dc_ctspan[x & 3];
(*tspan)[0] = start;
(*tspan)[1] = stop;
*tspan += 2;
}
// Stretches a column into a temporary buffer which is later
// drawn to the screen along with up to three other columns.
void R_DrawColumnHorizP_C (void)
@ -1073,7 +1085,7 @@ void R_DrawColumnHorizP_C (void)
}
// [RH] Just fills a column with a given color
void R_FillColumnHorizP (void)
void R_FillColumnHorizP_C (void)
{
int count = dc_count;
BYTE color = dc_color;
@ -1108,6 +1120,7 @@ void R_FillColumnHorizP (void)
void R_DrawMaskedColumnHoriz (const BYTE *column, const FTexture::Span *span)
{
int pixelsize = r_swtruecolor ? 4 : 1;
const fixed_t texturemid = FLOAT2FIXED(dc_texturemid);
while (span->Length != 0)
{
@ -1177,7 +1190,7 @@ void R_DrawMaskedColumnHoriz (const BYTE *column, const FTexture::Span *span)
}
}
dc_source = column + top;
dc_dest = ylookup[dc_yl] + dc_x + dc_destorg;
dc_dest = (ylookup[dc_yl] + dc_x) * pixelsize + dc_destorg;
dc_count = dc_yh - dc_yl + 1;
hcolfunc_pre ();
}

995
src/r_drawt_rgba.cpp Normal file
View file

@ -0,0 +1,995 @@
/*
** r_drawt_rgba.cpp
** Faster column drawers for modern processors, true color edition
**
**---------------------------------------------------------------------------
** Copyright 1998-2006 Randy Heit
** All rights reserved.
**
** Redistribution and use in source and binary forms, with or without
** modification, are permitted provided that the following conditions
** are met:
**
** 1. Redistributions of source code must retain the above copyright
** notice, this list of conditions and the following disclaimer.
** 2. Redistributions in binary form must reproduce the above copyright
** notice, this list of conditions and the following disclaimer in the
** documentation and/or other materials provided with the distribution.
** 3. The name of the author may not be used to endorse or promote products
** derived from this software without specific prior written permission.
**
** THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
** IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
** OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
** IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
** INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
** NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
** DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
** THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
** (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
** THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
**---------------------------------------------------------------------------
**
** True color versions of the similar functions in r_drawt.cpp
** Please see r_drawt.cpp for a description of the globals used.
*/
#include "templates.h"
#include "doomtype.h"
#include "doomdef.h"
#include "r_defs.h"
#include "r_draw.h"
#include "r_main.h"
#include "r_things.h"
#include "v_video.h"
#include "r_draw_rgba.h"
#ifndef NO_SSE
#include <emmintrin.h>
#endif
extern unsigned int dc_tspans[4][MAXHEIGHT];
extern unsigned int *dc_ctspan[4];
extern unsigned int *horizspan[4];
#ifndef NO_SSE
#ifdef _MSC_VER
#pragma warning(disable: 4101) // warning C4101: unreferenced local variable
#endif
// Generate SSE drawers:
#define VecCommand(name) name##_SSE_Command
#define VEC_SHADE_VARS SSE_SHADE_VARS
#define VEC_SHADE_SIMPLE_INIT SSE_SHADE_SIMPLE_INIT
#define VEC_SHADE_SIMPLE_INIT4 SSE_SHADE_SIMPLE_INIT4
#define VEC_SHADE_SIMPLE SSE_SHADE_SIMPLE
#define VEC_SHADE_INIT SSE_SHADE_INIT
#define VEC_SHADE_INIT4 SSE_SHADE_INIT4
#define VEC_SHADE SSE_SHADE
#include "r_drawt_rgba_sse.h"
/*
// Generate AVX drawers:
#undef VecCommand
#undef VEC_SHADE_SIMPLE_INIT
#undef VEC_SHADE_SIMPLE_INIT4
#undef VEC_SHADE_SIMPLE
#undef VEC_SHADE_INIT
#undef VEC_SHADE_INIT4
#undef VEC_SHADE
#define VecCommand(name) name##_AVX_Command
#define VEC_SHADE_SIMPLE_INIT AVX_LINEAR_SHADE_SIMPLE_INIT
#define VEC_SHADE_SIMPLE_INIT4 AVX_LINEAR_SHADE_SIMPLE_INIT4
#define VEC_SHADE_SIMPLE AVX_LINEAR_SHADE_SIMPLE
#define VEC_SHADE_INIT AVX_LINEAR_SHADE_INIT
#define VEC_SHADE_INIT4 AVX_LINEAR_SHADE_INIT4
#define VEC_SHADE AVX_LINEAR_SHADE
#include "r_drawt_rgba_sse.h"
*/
#endif
/////////////////////////////////////////////////////////////////////////////
class DrawerRt1colCommand : public DrawerCommand
{
public:
int hx;
int sx;
int yl;
int yh;
BYTE * RESTRICT _destorg;
int _pitch;
uint32_t _light;
ShadeConstants _shade_constants;
BYTE * RESTRICT _colormap;
uint32_t _srcalpha;
uint32_t _destalpha;
DrawerRt1colCommand(int hx, int sx, int yl, int yh)
{
this->hx = hx;
this->sx = sx;
this->yl = yl;
this->yh = yh;
_destorg = dc_destorg;
_pitch = dc_pitch;
_light = LightBgra::calc_light_multiplier(dc_light);
_shade_constants = dc_shade_constants;
_colormap = dc_colormap;
_srcalpha = dc_srcalpha >> (FRACBITS - 8);
_destalpha = dc_destalpha >> (FRACBITS - 8);
}
class LoopIterator
{
public:
uint32_t *source;
uint32_t *dest;
int count;
int pitch, sincr;
LoopIterator(DrawerRt1colCommand *command, DrawerThread *thread)
{
count = thread->count_for_thread(command->yl, (command->yh - command->yl + 1));
if (count <= 0)
return;
dest = thread->dest_for_thread(command->yl, command->_pitch, ylookup[command->yl] + command->sx + (uint32_t*)command->_destorg);
source = &thread->dc_temp_rgba[command->yl * 4 + command->hx] + thread->skipped_by_thread(command->yl) * 4;
pitch = command->_pitch * thread->num_cores;
sincr = thread->num_cores * 4;
}
explicit operator bool()
{
return count > 0;
}
bool next()
{
dest += pitch;
source += sincr;
return (--count) != 0;
}
};
};
class DrawerRt4colsCommand : public DrawerCommand
{
public:
int sx;
int yl;
int yh;
uint32_t _light;
ShadeConstants _shade_constants;
BYTE * RESTRICT _destorg;
int _pitch;
BYTE * RESTRICT _colormap;
uint32_t _srcalpha;
uint32_t _destalpha;
DrawerRt4colsCommand(int sx, int yl, int yh)
{
this->sx = sx;
this->yl = yl;
this->yh = yh;
_light = LightBgra::calc_light_multiplier(dc_light);
_shade_constants = dc_shade_constants;
_destorg = dc_destorg;
_pitch = dc_pitch;
_colormap = dc_colormap;
_srcalpha = dc_srcalpha >> (FRACBITS - 8);
_destalpha = dc_destalpha >> (FRACBITS - 8);
}
class LoopIterator
{
public:
uint32_t *source;
uint32_t *dest;
int count;
int pitch;
int sincr;
LoopIterator(DrawerRt4colsCommand *command, DrawerThread *thread)
{
count = thread->count_for_thread(command->yl, command->yh - command->yl + 1);
if (count <= 0)
return;
dest = thread->dest_for_thread(command->yl, command->_pitch, ylookup[command->yl] + command->sx + (uint32_t*)command->_destorg);
source = &thread->dc_temp_rgba[command->yl * 4] + thread->skipped_by_thread(command->yl) * 4;
pitch = command->_pitch * thread->num_cores;
sincr = thread->num_cores * 4;
}
explicit operator bool()
{
return count > 0;
}
bool next()
{
dest += pitch;
source += sincr;
return (--count) != 0;
}
};
};
class RtCopy1colRGBACommand : public DrawerRt1colCommand
{
public:
RtCopy1colRGBACommand(int hx, int sx, int yl, int yh) : DrawerRt1colCommand(hx, sx, yl, yh)
{
}
void Execute(DrawerThread *thread) override
{
LoopIterator loop(this, thread);
if (!loop) return;
do
{
uint32_t fg = GPalette.BaseColors[*loop.source];
*loop.dest = BlendBgra::copy(fg);
} while (loop.next());
}
};
class RtMap1colRGBACommand : public DrawerRt1colCommand
{
public:
RtMap1colRGBACommand(int hx, int sx, int yl, int yh) : DrawerRt1colCommand(hx, sx, yl, yh)
{
}
void Execute(DrawerThread *thread) override
{
LoopIterator loop(this, thread);
if (!loop) return;
do
{
uint32_t fg = LightBgra::shade_pal_index(_colormap[*loop.source], _light, _shade_constants);
*loop.dest = BlendBgra::copy(fg);
} while (loop.next());
}
};
class RtMap4colsRGBACommand : public DrawerRt4colsCommand
{
public:
RtMap4colsRGBACommand(int sx, int yl, int yh) : DrawerRt4colsCommand(sx, yl, yh)
{
}
void Execute(DrawerThread *thread) override
{
LoopIterator loop(this, thread);
if (!loop) return;
do
{
for (int i = 0; i < 4; i++)
{
uint32_t fg = LightBgra::shade_pal_index(_colormap[loop.source[i]], _light, _shade_constants);
loop.dest[i] = BlendBgra::copy(fg);
}
} while (loop.next());
}
};
class RtAdd1colRGBACommand : public DrawerRt1colCommand
{
public:
RtAdd1colRGBACommand(int hx, int sx, int yl, int yh) : DrawerRt1colCommand(hx, sx, yl, yh)
{
}
void Execute(DrawerThread *thread) override
{
LoopIterator loop(this, thread);
if (!loop) return;
do
{
uint32_t fg = LightBgra::shade_pal_index(_colormap[*loop.source], _light, _shade_constants);
*loop.dest = BlendBgra::add(fg, *loop.dest, _srcalpha, _destalpha);
} while (loop.next());
}
};
class RtAdd4colsRGBACommand : public DrawerRt4colsCommand
{
public:
RtAdd4colsRGBACommand(int sx, int yl, int yh) : DrawerRt4colsCommand(sx, yl, yh)
{
}
void Execute(DrawerThread *thread) override
{
LoopIterator loop(this, thread);
if (!loop) return;
do
{
for (int i = 0; i < 4; i++)
{
uint32_t fg = LightBgra::shade_pal_index(_colormap[loop.source[i]], _light, _shade_constants);
loop.dest[i] = BlendBgra::add(fg, loop.dest[i], _srcalpha, _destalpha);
}
} while (loop.next());
}
};
class RtShaded1colRGBACommand : public DrawerRt1colCommand
{
uint32_t _color;
public:
RtShaded1colRGBACommand(int hx, int sx, int yl, int yh) : DrawerRt1colCommand(hx, sx, yl, yh)
{
_color = LightBgra::shade_pal_index(dc_color, _light, _shade_constants);
}
void Execute(DrawerThread *thread) override
{
LoopIterator loop(this, thread);
if (!loop) return;
do
{
uint32_t alpha = _colormap[*loop.source] * 4;
uint32_t inv_alpha = 256 - alpha;
*loop.dest = BlendBgra::add(_color, *loop.dest, alpha, inv_alpha);
} while (loop.next());
}
};
class RtShaded4colsRGBACommand : public DrawerRt4colsCommand
{
uint32_t _color;
public:
RtShaded4colsRGBACommand(int sx, int yl, int yh) : DrawerRt4colsCommand(sx, yl, yh)
{
_color = LightBgra::shade_pal_index(dc_color, _light, _shade_constants);
}
void Execute(DrawerThread *thread) override
{
LoopIterator loop(this, thread);
if (!loop) return;
do
{
for (int i = 0; i < 4; i++)
{
uint32_t alpha = _colormap[loop.source[i]] * 4;
uint32_t inv_alpha = 256 - alpha;
loop.dest[i] = BlendBgra::add(_color, loop.dest[i], alpha, inv_alpha);
}
} while (loop.next());
}
};
class RtAddClamp1colRGBACommand : public DrawerRt1colCommand
{
public:
RtAddClamp1colRGBACommand(int hx, int sx, int yl, int yh) : DrawerRt1colCommand(hx, sx, yl, yh)
{
}
void Execute(DrawerThread *thread) override
{
LoopIterator loop(this, thread);
if (!loop) return;
do
{
uint32_t fg = LightBgra::shade_pal_index(*loop.source, _light, _shade_constants);
*loop.dest = BlendBgra::add(fg, *loop.dest, _srcalpha, _destalpha);
} while (loop.next());
}
};
class RtAddClamp4colsRGBACommand : public DrawerRt4colsCommand
{
public:
RtAddClamp4colsRGBACommand(int sx, int yl, int yh) : DrawerRt4colsCommand(sx, yl, yh)
{
}
void Execute(DrawerThread *thread) override
{
LoopIterator loop(this, thread);
if (!loop) return;
do
{
for (int i = 0; i < 4; i++)
{
uint32_t fg = LightBgra::shade_pal_index(loop.source[i], _light, _shade_constants);
loop.dest[i] = BlendBgra::add(fg, loop.dest[i], _srcalpha, _destalpha);
}
} while (loop.next());
}
};
class RtSubClamp1colRGBACommand : public DrawerRt1colCommand
{
public:
RtSubClamp1colRGBACommand(int hx, int sx, int yl, int yh) : DrawerRt1colCommand(hx, sx, yl, yh)
{
}
void Execute(DrawerThread *thread) override
{
LoopIterator loop(this, thread);
if (!loop) return;
do
{
uint32_t fg = LightBgra::shade_pal_index(*loop.source, _light, _shade_constants);
*loop.dest = BlendBgra::sub(fg, *loop.dest, _srcalpha, _destalpha);
} while (loop.next());
}
};
class RtSubClamp4colsRGBACommand : public DrawerRt4colsCommand
{
public:
RtSubClamp4colsRGBACommand(int sx, int yl, int yh) : DrawerRt4colsCommand(sx, yl, yh)
{
}
void Execute(DrawerThread *thread) override
{
LoopIterator loop(this, thread);
if (!loop) return;
do
{
for (int i = 0; i < 4; i++)
{
uint32_t fg = LightBgra::shade_pal_index(loop.source[i], _light, _shade_constants);
loop.dest[i] = BlendBgra::sub(fg, loop.dest[i], _srcalpha, _destalpha);
}
} while (loop.next());
}
};
class RtRevSubClamp1colRGBACommand : public DrawerRt1colCommand
{
public:
RtRevSubClamp1colRGBACommand(int hx, int sx, int yl, int yh) : DrawerRt1colCommand(hx, sx, yl, yh)
{
}
void Execute(DrawerThread *thread) override
{
LoopIterator loop(this, thread);
if (!loop) return;
do
{
uint32_t fg = LightBgra::shade_pal_index(*loop.source, _light, _shade_constants);
*loop.dest = BlendBgra::revsub(fg, *loop.dest, _srcalpha, _destalpha);
} while (loop.next());
}
};
class RtRevSubClamp4colsRGBACommand : public DrawerRt4colsCommand
{
public:
RtRevSubClamp4colsRGBACommand(int sx, int yl, int yh) : DrawerRt4colsCommand(sx, yl, yh)
{
}
void Execute(DrawerThread *thread) override
{
LoopIterator loop(this, thread);
if (!loop) return;
do
{
for (int i = 0; i < 4; i++)
{
uint32_t fg = LightBgra::shade_pal_index(loop.source[i], _light, _shade_constants);
loop.dest[i] = BlendBgra::revsub(fg, loop.dest[i], _srcalpha, _destalpha);
}
} while (loop.next());
}
};
class RtTranslate1colRGBACommand : public DrawerCommand
{
const BYTE * RESTRICT translation;
int hx;
int yl;
int yh;
public:
RtTranslate1colRGBACommand(const BYTE *translation, int hx, int yl, int yh)
{
this->translation = translation;
this->hx = hx;
this->yl = yl;
this->yh = yh;
}
void Execute(DrawerThread *thread) override
{
int count = yh - yl + 1;
uint32_t *source = &thread->dc_temp_rgba[yl*4 + hx];
// Things we do to hit the compiler's optimizer with a clue bat:
// 1. Parallelism is explicitly spelled out by using a separate
// C instruction for each assembly instruction. GCC lets me
// have four temporaries, but VC++ spills to the stack with
// more than two. Two is probably optimal, anyway.
// 2. The results of the translation lookups are explicitly
// stored in byte-sized variables. This causes the VC++ code
// to use byte mov instructions in most cases; for apparently
// random reasons, it will use movzx for some places. GCC
// ignores this and uses movzx always.
// Do 8 rows at a time.
for (int count8 = count >> 3; count8; --count8)
{
int c0, c1;
BYTE b0, b1;
c0 = source[0]; c1 = source[4];
b0 = translation[c0]; b1 = translation[c1];
source[0] = b0; source[4] = b1;
c0 = source[8]; c1 = source[12];
b0 = translation[c0]; b1 = translation[c1];
source[8] = b0; source[12] = b1;
c0 = source[16]; c1 = source[20];
b0 = translation[c0]; b1 = translation[c1];
source[16] = b0; source[20] = b1;
c0 = source[24]; c1 = source[28];
b0 = translation[c0]; b1 = translation[c1];
source[24] = b0; source[28] = b1;
source += 32;
}
// Finish by doing 1 row at a time.
for (count &= 7; count; --count, source += 4)
{
source[0] = translation[source[0]];
}
}
};
class RtTranslate4colsRGBACommand : public DrawerCommand
{
const BYTE * RESTRICT translation;
int yl;
int yh;
public:
RtTranslate4colsRGBACommand(const BYTE *translation, int yl, int yh)
{
this->translation = translation;
this->yl = yl;
this->yh = yh;
}
void Execute(DrawerThread *thread) override
{
int count = yh - yl + 1;
uint32_t *source = &thread->dc_temp_rgba[yl*4];
int c0, c1;
BYTE b0, b1;
// Do 2 rows at a time.
for (int count8 = count >> 1; count8; --count8)
{
c0 = source[0]; c1 = source[1];
b0 = translation[c0]; b1 = translation[c1];
source[0] = b0; source[1] = b1;
c0 = source[2]; c1 = source[3];
b0 = translation[c0]; b1 = translation[c1];
source[2] = b0; source[3] = b1;
c0 = source[4]; c1 = source[5];
b0 = translation[c0]; b1 = translation[c1];
source[4] = b0; source[5] = b1;
c0 = source[6]; c1 = source[7];
b0 = translation[c0]; b1 = translation[c1];
source[6] = b0; source[7] = b1;
source += 8;
}
// Do the final row if count was odd.
if (count & 1)
{
c0 = source[0]; c1 = source[1];
b0 = translation[c0]; b1 = translation[c1];
source[0] = b0; source[1] = b1;
c0 = source[2]; c1 = source[3];
b0 = translation[c0]; b1 = translation[c1];
source[2] = b0; source[3] = b1;
}
}
};
class RtInitColsRGBACommand : public DrawerCommand
{
BYTE * RESTRICT buff;
public:
RtInitColsRGBACommand(BYTE *buff)
{
this->buff = buff;
}
void Execute(DrawerThread *thread) override
{
thread->dc_temp_rgba = buff == NULL ? thread->dc_temp_rgbabuff_rgba : (uint32_t*)buff;
}
};
class DrawColumnHorizRGBACommand : public DrawerCommand
{
int _count;
fixed_t _iscale;
fixed_t _texturefrac;
const BYTE * RESTRICT _source;
int _x;
int _yl;
int _yh;
public:
DrawColumnHorizRGBACommand()
{
_count = dc_count;
_iscale = dc_iscale;
_texturefrac = dc_texturefrac;
_source = dc_source;
_x = dc_x;
_yl = dc_yl;
_yh = dc_yh;
}
void Execute(DrawerThread *thread) override
{
int count = _count;
uint32_t *dest;
fixed_t fracstep;
fixed_t frac;
if (count <= 0)
return;
{
int x = _x & 3;
dest = &thread->dc_temp_rgba[x + 4 * _yl];
}
fracstep = _iscale;
frac = _texturefrac;
const BYTE *source = _source;
if (count & 1) {
*dest = source[frac >> FRACBITS]; dest += 4; frac += fracstep;
}
if (count & 2) {
dest[0] = source[frac >> FRACBITS]; frac += fracstep;
dest[4] = source[frac >> FRACBITS]; frac += fracstep;
dest += 8;
}
if (count & 4) {
dest[0] = source[frac >> FRACBITS]; frac += fracstep;
dest[4] = source[frac >> FRACBITS]; frac += fracstep;
dest[8] = source[frac >> FRACBITS]; frac += fracstep;
dest[12] = source[frac >> FRACBITS]; frac += fracstep;
dest += 16;
}
count >>= 3;
if (!count) return;
do
{
dest[0] = source[frac >> FRACBITS]; frac += fracstep;
dest[4] = source[frac >> FRACBITS]; frac += fracstep;
dest[8] = source[frac >> FRACBITS]; frac += fracstep;
dest[12] = source[frac >> FRACBITS]; frac += fracstep;
dest[16] = source[frac >> FRACBITS]; frac += fracstep;
dest[20] = source[frac >> FRACBITS]; frac += fracstep;
dest[24] = source[frac >> FRACBITS]; frac += fracstep;
dest[28] = source[frac >> FRACBITS]; frac += fracstep;
dest += 32;
} while (--count);
}
};
class FillColumnHorizRGBACommand : public DrawerCommand
{
int _x;
int _yl;
int _yh;
int _count;
int _color;
public:
FillColumnHorizRGBACommand()
{
_x = dc_x;
_count = dc_count;
_color = dc_color;
_yl = dc_yl;
_yh = dc_yh;
}
void Execute(DrawerThread *thread) override
{
int count = _count;
int color = _color;
uint32_t *dest;
if (count <= 0)
return;
{
int x = _x & 3;
dest = &thread->dc_temp_rgba[x + 4 * _yl];
}
if (count & 1) {
*dest = color;
dest += 4;
}
if (!(count >>= 1))
return;
do {
dest[0] = color; dest[4] = color;
dest += 8;
} while (--count);
}
};
/////////////////////////////////////////////////////////////////////////////
// Copies one span at hx to the screen at sx.
void rt_copy1col_rgba (int hx, int sx, int yl, int yh)
{
DrawerCommandQueue::QueueCommand<RtCopy1colRGBACommand>(hx, sx, yl, yh);
}
// Copies all four spans to the screen starting at sx.
void rt_copy4cols_rgba (int sx, int yl, int yh)
{
// To do: we could do this with SSE using __m128i
rt_copy1col_rgba(0, sx, yl, yh);
rt_copy1col_rgba(1, sx + 1, yl, yh);
rt_copy1col_rgba(2, sx + 2, yl, yh);
rt_copy1col_rgba(3, sx + 3, yl, yh);
}
// Maps one span at hx to the screen at sx.
void rt_map1col_rgba (int hx, int sx, int yl, int yh)
{
DrawerCommandQueue::QueueCommand<RtMap1colRGBACommand>(hx, sx, yl, yh);
}
// Maps all four spans to the screen starting at sx.
void rt_map4cols_rgba (int sx, int yl, int yh)
{
#ifdef NO_SSE
DrawerCommandQueue::QueueCommand<RtMap4colsRGBACommand>(sx, yl, yh);
#else
DrawerCommandQueue::QueueCommand<RtMap4colsRGBA_SSE_Command>(sx, yl, yh);
#endif
}
void rt_Translate1col_rgba(const BYTE *translation, int hx, int yl, int yh)
{
DrawerCommandQueue::QueueCommand<RtTranslate1colRGBACommand>(translation, hx, yl, yh);
}
void rt_Translate4cols_rgba(const BYTE *translation, int yl, int yh)
{
DrawerCommandQueue::QueueCommand<RtTranslate4colsRGBACommand>(translation, yl, yh);
}
// Translates one span at hx to the screen at sx.
void rt_tlate1col_rgba (int hx, int sx, int yl, int yh)
{
rt_Translate1col_rgba(dc_translation, hx, yl, yh);
rt_map1col(hx, sx, yl, yh);
}
// Translates all four spans to the screen starting at sx.
void rt_tlate4cols_rgba (int sx, int yl, int yh)
{
rt_Translate4cols_rgba(dc_translation, yl, yh);
rt_map4cols(sx, yl, yh);
}
// Adds one span at hx to the screen at sx without clamping.
void rt_add1col_rgba (int hx, int sx, int yl, int yh)
{
DrawerCommandQueue::QueueCommand<RtAdd1colRGBACommand>(hx, sx, yl, yh);
}
// Adds all four spans to the screen starting at sx without clamping.
void rt_add4cols_rgba (int sx, int yl, int yh)
{
#ifdef NO_SSE
DrawerCommandQueue::QueueCommand<RtAdd4colsRGBACommand>(sx, yl, yh);
#else
DrawerCommandQueue::QueueCommand<RtAdd4colsRGBA_SSE_Command>(sx, yl, yh);
#endif
}
// Translates and adds one span at hx to the screen at sx without clamping.
void rt_tlateadd1col_rgba (int hx, int sx, int yl, int yh)
{
rt_Translate1col_rgba(dc_translation, hx, yl, yh);
rt_add1col(hx, sx, yl, yh);
}
// Translates and adds all four spans to the screen starting at sx without clamping.
void rt_tlateadd4cols_rgba(int sx, int yl, int yh)
{
rt_Translate4cols_rgba(dc_translation, yl, yh);
rt_add4cols(sx, yl, yh);
}
// Shades one span at hx to the screen at sx.
void rt_shaded1col_rgba (int hx, int sx, int yl, int yh)
{
DrawerCommandQueue::QueueCommand<RtShaded1colRGBACommand>(hx, sx, yl, yh);
}
// Shades all four spans to the screen starting at sx.
void rt_shaded4cols_rgba (int sx, int yl, int yh)
{
#ifdef NO_SSE
DrawerCommandQueue::QueueCommand<RtShaded4colsRGBACommand>(sx, yl, yh);
#else
DrawerCommandQueue::QueueCommand<RtShaded4colsRGBA_SSE_Command>(sx, yl, yh);
#endif
}
// Adds one span at hx to the screen at sx with clamping.
void rt_addclamp1col_rgba (int hx, int sx, int yl, int yh)
{
DrawerCommandQueue::QueueCommand<RtAddClamp1colRGBACommand>(hx, sx, yl, yh);
}
// Adds all four spans to the screen starting at sx with clamping.
void rt_addclamp4cols_rgba (int sx, int yl, int yh)
{
#ifdef NO_SSE
DrawerCommandQueue::QueueCommand<RtAddClamp4colsRGBACommand>(sx, yl, yh);
#else
DrawerCommandQueue::QueueCommand<RtAddClamp4colsRGBA_SSE_Command>(sx, yl, yh);
#endif
}
// Translates and adds one span at hx to the screen at sx with clamping.
void rt_tlateaddclamp1col_rgba (int hx, int sx, int yl, int yh)
{
rt_Translate1col_rgba(dc_translation, hx, yl, yh);
rt_addclamp1col_rgba(hx, sx, yl, yh);
}
// Translates and adds all four spans to the screen starting at sx with clamping.
void rt_tlateaddclamp4cols_rgba (int sx, int yl, int yh)
{
rt_Translate4cols_rgba(dc_translation, yl, yh);
rt_addclamp4cols(sx, yl, yh);
}
// Subtracts one span at hx to the screen at sx with clamping.
void rt_subclamp1col_rgba (int hx, int sx, int yl, int yh)
{
DrawerCommandQueue::QueueCommand<RtSubClamp1colRGBACommand>(hx, sx, yl, yh);
}
// Subtracts all four spans to the screen starting at sx with clamping.
void rt_subclamp4cols_rgba (int sx, int yl, int yh)
{
#ifdef NO_SSE
DrawerCommandQueue::QueueCommand<RtSubClamp4colsRGBACommand>(sx, yl, yh);
#else
DrawerCommandQueue::QueueCommand<RtSubClamp4colsRGBA_SSE_Command>(sx, yl, yh);
#endif
}
// Translates and subtracts one span at hx to the screen at sx with clamping.
void rt_tlatesubclamp1col_rgba (int hx, int sx, int yl, int yh)
{
rt_Translate1col_rgba(dc_translation, hx, yl, yh);
rt_subclamp1col_rgba(hx, sx, yl, yh);
}
// Translates and subtracts all four spans to the screen starting at sx with clamping.
void rt_tlatesubclamp4cols_rgba (int sx, int yl, int yh)
{
rt_Translate4cols_rgba(dc_translation, yl, yh);
rt_subclamp4cols_rgba(sx, yl, yh);
}
// Subtracts one span at hx from the screen at sx with clamping.
void rt_revsubclamp1col_rgba (int hx, int sx, int yl, int yh)
{
DrawerCommandQueue::QueueCommand<RtRevSubClamp1colRGBACommand>(hx, sx, yl, yh);
}
// Subtracts all four spans from the screen starting at sx with clamping.
void rt_revsubclamp4cols_rgba (int sx, int yl, int yh)
{
#ifdef NO_SSE
DrawerCommandQueue::QueueCommand<RtRevSubClamp4colsRGBACommand>(sx, yl, yh);
#else
DrawerCommandQueue::QueueCommand<RtRevSubClamp4colsRGBA_SSE_Command>(sx, yl, yh);
#endif
}
// Translates and subtracts one span at hx from the screen at sx with clamping.
void rt_tlaterevsubclamp1col_rgba (int hx, int sx, int yl, int yh)
{
rt_Translate1col_rgba(dc_translation, hx, yl, yh);
rt_revsubclamp1col_rgba(hx, sx, yl, yh);
}
// Translates and subtracts all four spans from the screen starting at sx with clamping.
void rt_tlaterevsubclamp4cols_rgba (int sx, int yl, int yh)
{
rt_Translate4cols_rgba(dc_translation, yl, yh);
rt_revsubclamp4cols_rgba(sx, yl, yh);
}
// Before each pass through a rendering loop that uses these routines,
// call this function to set up the span pointers.
void rt_initcols_rgba (BYTE *buff)
{
for (int y = 3; y >= 0; y--)
horizspan[y] = dc_ctspan[y] = &dc_tspans[y][0];
DrawerCommandQueue::QueueCommand<RtInitColsRGBACommand>(buff);
}
void rt_span_coverage_rgba(int x, int start, int stop)
{
unsigned int **tspan = &dc_ctspan[x & 3];
(*tspan)[0] = start;
(*tspan)[1] = stop;
*tspan += 2;
}
// Stretches a column into a temporary buffer which is later
// drawn to the screen along with up to three other columns.
void R_DrawColumnHoriz_rgba (void)
{
if (dc_count <= 0)
return;
int x = dc_x & 3;
unsigned int **span = &dc_ctspan[x];
(*span)[0] = dc_yl;
(*span)[1] = dc_yh;
*span += 2;
DrawerCommandQueue::QueueCommand<DrawColumnHorizRGBACommand>();
}
// [RH] Just fills a column with a given color
void R_FillColumnHoriz_rgba (void)
{
if (dc_count <= 0)
return;
int x = dc_x & 3;
unsigned int **span = &dc_ctspan[x];
(*span)[0] = dc_yl;
(*span)[1] = dc_yh;
*span += 2;
DrawerCommandQueue::QueueCommand<FillColumnHorizRGBACommand>();
}

757
src/r_drawt_rgba_sse.h Normal file
View file

@ -0,0 +1,757 @@
//
// SSE/AVX intrinsics based drawers for the r_drawt family of drawers.
//
// Note: This header file is intentionally not guarded by a __R_DRAWT_RGBA_SSE__ define.
// It is because the code is nearly identical for SSE vs AVX. The file is included
// multiple times by r_drawt_rgba.cpp with different defines that changes the class
// names outputted and the type of intrinsics used.
#ifdef _MSC_VER
#pragma warning(disable: 4752) // warning C4752: found Intel(R) Advanced Vector Extensions; consider using /arch:AVX
#endif
class VecCommand(RtMap4colsRGBA) : public DrawerCommand
{
int sx;
int yl;
int yh;
fixed_t _light;
ShadeConstants _shade_constants;
BYTE * RESTRICT _destorg;
int _pitch;
BYTE * RESTRICT _colormap;
public:
VecCommand(RtMap4colsRGBA)(int sx, int yl, int yh)
{
this->sx = sx;
this->yl = yl;
this->yh = yh;
_light = dc_light;
_shade_constants = dc_shade_constants;
_destorg = dc_destorg;
_pitch = dc_pitch;
_colormap = dc_colormap;
}
void Execute(DrawerThread *thread) override
{
uint32_t *source;
uint32_t *dest;
int count;
int pitch;
int sincr;
count = thread->count_for_thread(yl, yh - yl + 1);
if (count <= 0)
return;
ShadeConstants shade_constants = _shade_constants;
uint32_t light = LightBgra::calc_light_multiplier(_light);
uint32_t *palette = (uint32_t*)GPalette.BaseColors;
dest = thread->dest_for_thread(yl, _pitch, ylookup[yl] + sx + (uint32_t*)_destorg);
source = &thread->dc_temp_rgba[yl * 4] + thread->skipped_by_thread(yl) * 4;
pitch = _pitch * thread->num_cores;
sincr = thread->num_cores * 4;
BYTE *colormap = _colormap;
if (shade_constants.simple_shade)
{
VEC_SHADE_VARS();
VEC_SHADE_SIMPLE_INIT(light);
if (count & 1) {
uint32_t p0 = colormap[source[0]];
uint32_t p1 = colormap[source[1]];
uint32_t p2 = colormap[source[2]];
uint32_t p3 = colormap[source[3]];
// shade_pal_index:
__m128i fg = _mm_set_epi32(palette[p3], palette[p2], palette[p1], palette[p0]);
VEC_SHADE_SIMPLE(fg);
_mm_storeu_si128((__m128i*)dest, fg);
source += sincr;
dest += pitch;
}
if (!(count >>= 1))
return;
do {
// shade_pal_index 0-3
{
uint32_t p0 = colormap[source[0]];
uint32_t p1 = colormap[source[1]];
uint32_t p2 = colormap[source[2]];
uint32_t p3 = colormap[source[3]];
__m128i fg = _mm_set_epi32(palette[p3], palette[p2], palette[p1], palette[p0]);
VEC_SHADE_SIMPLE(fg);
_mm_storeu_si128((__m128i*)dest, fg);
}
// shade_pal_index 4-7 (pitch)
{
uint32_t p0 = colormap[source[sincr]];
uint32_t p1 = colormap[source[sincr + 1]];
uint32_t p2 = colormap[source[sincr + 2]];
uint32_t p3 = colormap[source[sincr + 3]];
__m128i fg = _mm_set_epi32(palette[p3], palette[p2], palette[p1], palette[p0]);
VEC_SHADE_SIMPLE(fg);
_mm_storeu_si128((__m128i*)(dest + pitch), fg);
}
source += sincr * 2;
dest += pitch * 2;
} while (--count);
}
else
{
VEC_SHADE_VARS();
VEC_SHADE_INIT(light, shade_constants);
if (count & 1) {
uint32_t p0 = colormap[source[0]];
uint32_t p1 = colormap[source[1]];
uint32_t p2 = colormap[source[2]];
uint32_t p3 = colormap[source[3]];
// shade_pal_index:
__m128i fg = _mm_set_epi32(palette[p3], palette[p2], palette[p1], palette[p0]);
VEC_SHADE(fg, shade_constants);
_mm_storeu_si128((__m128i*)dest, fg);
source += sincr;
dest += pitch;
}
if (!(count >>= 1))
return;
do {
// shade_pal_index 0-3
{
uint32_t p0 = colormap[source[0]];
uint32_t p1 = colormap[source[1]];
uint32_t p2 = colormap[source[2]];
uint32_t p3 = colormap[source[3]];
__m128i fg = _mm_set_epi32(palette[p3], palette[p2], palette[p1], palette[p0]);
VEC_SHADE(fg, shade_constants);
_mm_storeu_si128((__m128i*)dest, fg);
}
// shade_pal_index 4-7 (pitch)
{
uint32_t p0 = colormap[source[sincr]];
uint32_t p1 = colormap[source[sincr + 1]];
uint32_t p2 = colormap[source[sincr + 2]];
uint32_t p3 = colormap[source[sincr + 3]];
__m128i fg = _mm_set_epi32(palette[p3], palette[p2], palette[p1], palette[p0]);
VEC_SHADE(fg, shade_constants);
_mm_storeu_si128((__m128i*)(dest + pitch), fg);
}
source += sincr * 2;
dest += pitch * 2;
} while (--count);
}
}
};
class VecCommand(RtAdd4colsRGBA) : public DrawerCommand
{
int sx;
int yl;
int yh;
BYTE * RESTRICT _destorg;
int _pitch;
fixed_t _light;
ShadeConstants _shade_constants;
BYTE * RESTRICT _colormap;
fixed_t _srcalpha;
fixed_t _destalpha;
public:
VecCommand(RtAdd4colsRGBA)(int sx, int yl, int yh)
{
this->sx = sx;
this->yl = yl;
this->yh = yh;
_destorg = dc_destorg;
_pitch = dc_pitch;
_light = dc_light;
_shade_constants = dc_shade_constants;
_colormap = dc_colormap;
_srcalpha = dc_srcalpha;
_destalpha = dc_destalpha;
}
void Execute(DrawerThread *thread) override
{
uint32_t *source;
uint32_t *dest;
int count;
int pitch;
int sincr;
count = thread->count_for_thread(yl, yh - yl + 1);
if (count <= 0)
return;
dest = thread->dest_for_thread(yl, _pitch, ylookup[yl] + sx + (uint32_t*)_destorg);
source = &thread->dc_temp_rgba[yl * 4] + thread->skipped_by_thread(yl) * 4;
pitch = _pitch * thread->num_cores;
sincr = 4 * thread->num_cores;
uint32_t light = LightBgra::calc_light_multiplier(_light);
uint32_t *palette = (uint32_t*)GPalette.BaseColors;
BYTE *colormap = _colormap;
uint32_t fg_alpha = _srcalpha >> (FRACBITS - 8);
uint32_t bg_alpha = _destalpha >> (FRACBITS - 8);
ShadeConstants shade_constants = _shade_constants;
if (shade_constants.simple_shade)
{
VEC_SHADE_VARS();
VEC_SHADE_SIMPLE_INIT(light);
__m128i mfg_alpha = _mm_set_epi16(256, fg_alpha, fg_alpha, fg_alpha, 256, fg_alpha, fg_alpha, fg_alpha);
__m128i mbg_alpha = _mm_set_epi16(256, bg_alpha, bg_alpha, bg_alpha, 256, bg_alpha, bg_alpha, bg_alpha);
do {
uint32_t p0 = colormap[source[0]];
uint32_t p1 = colormap[source[1]];
uint32_t p2 = colormap[source[2]];
uint32_t p3 = colormap[source[3]];
// shade_pal_index:
__m128i fg = _mm_set_epi32(palette[p3], palette[p2], palette[p1], palette[p0]);
VEC_SHADE_SIMPLE(fg);
__m128i fg_hi = _mm_unpackhi_epi8(fg, _mm_setzero_si128());
__m128i fg_lo = _mm_unpacklo_epi8(fg, _mm_setzero_si128());
// unpack bg:
__m128i bg = _mm_loadu_si128((const __m128i*)dest);
__m128i bg_hi = _mm_unpackhi_epi8(bg, _mm_setzero_si128());
__m128i bg_lo = _mm_unpacklo_epi8(bg, _mm_setzero_si128());
// (fg_red * fg_alpha + bg_red * bg_alpha) / 256:
__m128i color_hi = _mm_srli_epi16(_mm_adds_epu16(_mm_mullo_epi16(fg_hi, mfg_alpha), _mm_mullo_epi16(bg_hi, mbg_alpha)), 8);
__m128i color_lo = _mm_srli_epi16(_mm_adds_epu16(_mm_mullo_epi16(fg_lo, mfg_alpha), _mm_mullo_epi16(bg_lo, mbg_alpha)), 8);
__m128i color = _mm_packus_epi16(color_lo, color_hi);
_mm_storeu_si128((__m128i*)dest, color);
source += sincr;
dest += pitch;
} while (--count);
}
else
{
VEC_SHADE_VARS();
VEC_SHADE_INIT(light, shade_constants);
__m128i mfg_alpha = _mm_set_epi16(256, fg_alpha, fg_alpha, fg_alpha, 256, fg_alpha, fg_alpha, fg_alpha);
__m128i mbg_alpha = _mm_set_epi16(256, bg_alpha, bg_alpha, bg_alpha, 256, bg_alpha, bg_alpha, bg_alpha);
do {
uint32_t p0 = colormap[source[0]];
uint32_t p1 = colormap[source[1]];
uint32_t p2 = colormap[source[2]];
uint32_t p3 = colormap[source[3]];
// shade_pal_index:
__m128i fg = _mm_set_epi32(palette[p3], palette[p2], palette[p1], palette[p0]);
VEC_SHADE(fg, shade_constants);
__m128i fg_hi = _mm_unpackhi_epi8(fg, _mm_setzero_si128());
__m128i fg_lo = _mm_unpacklo_epi8(fg, _mm_setzero_si128());
// unpack bg:
__m128i bg = _mm_loadu_si128((const __m128i*)dest);
__m128i bg_hi = _mm_unpackhi_epi8(bg, _mm_setzero_si128());
__m128i bg_lo = _mm_unpacklo_epi8(bg, _mm_setzero_si128());
// (fg_red * fg_alpha + bg_red * bg_alpha) / 256:
__m128i color_hi = _mm_srli_epi16(_mm_adds_epu16(_mm_mullo_epi16(fg_hi, mfg_alpha), _mm_mullo_epi16(bg_hi, mbg_alpha)), 8);
__m128i color_lo = _mm_srli_epi16(_mm_adds_epu16(_mm_mullo_epi16(fg_lo, mfg_alpha), _mm_mullo_epi16(bg_lo, mbg_alpha)), 8);
__m128i color = _mm_packus_epi16(color_lo, color_hi);
_mm_storeu_si128((__m128i*)dest, color);
source += sincr;
dest += pitch;
} while (--count);
}
}
};
class VecCommand(RtShaded4colsRGBA) : public DrawerCommand
{
int sx;
int yl;
int yh;
lighttable_t * RESTRICT _colormap;
int _color;
BYTE * RESTRICT _destorg;
int _pitch;
fixed_t _light;
public:
VecCommand(RtShaded4colsRGBA)(int sx, int yl, int yh)
{
this->sx = sx;
this->yl = yl;
this->yh = yh;
_colormap = dc_colormap;
_color = dc_color;
_destorg = dc_destorg;
_pitch = dc_pitch;
_light = dc_light;
}
void Execute(DrawerThread *thread) override
{
BYTE *colormap;
uint32_t *source;
uint32_t *dest;
int count;
int pitch;
int sincr;
count = thread->count_for_thread(yl, yh - yl + 1);
if (count <= 0)
return;
colormap = _colormap;
dest = thread->dest_for_thread(yl, _pitch, ylookup[yl] + sx + (uint32_t*)_destorg);
source = &thread->dc_temp_rgba[yl * 4] + thread->skipped_by_thread(yl) * 4;
pitch = _pitch * thread->num_cores;
sincr = 4 * thread->num_cores;
__m128i fg = _mm_unpackhi_epi8(_mm_set1_epi32(LightBgra::shade_pal_index_simple(_color, LightBgra::calc_light_multiplier(_light))), _mm_setzero_si128());
__m128i alpha_one = _mm_set1_epi16(64);
do {
uint32_t p0 = colormap[source[0]];
uint32_t p1 = colormap[source[1]];
uint32_t p2 = colormap[source[2]];
uint32_t p3 = colormap[source[3]];
__m128i alpha_hi = _mm_set_epi16(64, p3, p3, p3, 64, p2, p2, p2);
__m128i alpha_lo = _mm_set_epi16(64, p1, p1, p1, 64, p0, p0, p0);
__m128i inv_alpha_hi = _mm_subs_epu16(alpha_one, alpha_hi);
__m128i inv_alpha_lo = _mm_subs_epu16(alpha_one, alpha_lo);
// unpack bg:
__m128i bg = _mm_loadu_si128((const __m128i*)dest);
__m128i bg_hi = _mm_unpackhi_epi8(bg, _mm_setzero_si128());
__m128i bg_lo = _mm_unpacklo_epi8(bg, _mm_setzero_si128());
// (fg_red * alpha + bg_red * inv_alpha) / 64:
__m128i color_hi = _mm_srli_epi16(_mm_adds_epu16(_mm_mullo_epi16(fg, alpha_hi), _mm_mullo_epi16(bg_hi, inv_alpha_hi)), 6);
__m128i color_lo = _mm_srli_epi16(_mm_adds_epu16(_mm_mullo_epi16(fg, alpha_lo), _mm_mullo_epi16(bg_lo, inv_alpha_lo)), 6);
__m128i color = _mm_packus_epi16(color_lo, color_hi);
_mm_storeu_si128((__m128i*)dest, color);
source += sincr;
dest += pitch;
} while (--count);
}
};
class VecCommand(RtAddClamp4colsRGBA) : public DrawerCommand
{
int sx;
int yl;
int yh;
BYTE * RESTRICT _destorg;
int _pitch;
fixed_t _light;
fixed_t _srcalpha;
fixed_t _destalpha;
ShadeConstants _shade_constants;
public:
VecCommand(RtAddClamp4colsRGBA)(int sx, int yl, int yh)
{
this->sx = sx;
this->yl = yl;
this->yh = yh;
_destorg = dc_destorg;
_pitch = dc_pitch;
_light = dc_light;
_srcalpha = dc_srcalpha;
_destalpha = dc_destalpha;
_shade_constants = dc_shade_constants;
}
void Execute(DrawerThread *thread) override
{
uint32_t *source;
uint32_t *dest;
int count;
int pitch;
int sincr;
count = thread->count_for_thread(yl, yh - yl + 1);
if (count <= 0)
return;
dest = thread->dest_for_thread(yl, _pitch, ylookup[yl] + sx + (uint32_t*)_destorg);
source = &thread->dc_temp_rgba[yl * 4] + thread->skipped_by_thread(yl) * 4;
pitch = _pitch * thread->num_cores;
sincr = 4 * thread->num_cores;
uint32_t light = LightBgra::calc_light_multiplier(_light);
uint32_t *palette = (uint32_t*)GPalette.BaseColors;
uint32_t fg_alpha = _srcalpha >> (FRACBITS - 8);
uint32_t bg_alpha = _destalpha >> (FRACBITS - 8);
ShadeConstants shade_constants = _shade_constants;
if (shade_constants.simple_shade)
{
VEC_SHADE_VARS();
VEC_SHADE_SIMPLE_INIT(light);
__m128i mfg_alpha = _mm_set_epi16(256, fg_alpha, fg_alpha, fg_alpha, 256, fg_alpha, fg_alpha, fg_alpha);
__m128i mbg_alpha = _mm_set_epi16(256, bg_alpha, bg_alpha, bg_alpha, 256, bg_alpha, bg_alpha, bg_alpha);
do {
uint32_t p0 = source[0];
uint32_t p1 = source[1];
uint32_t p2 = source[2];
uint32_t p3 = source[3];
// shade_pal_index:
__m128i fg = _mm_set_epi32(palette[p3], palette[p2], palette[p1], palette[p0]);
VEC_SHADE_SIMPLE(fg);
__m128i fg_hi = _mm_unpackhi_epi8(fg, _mm_setzero_si128());
__m128i fg_lo = _mm_unpacklo_epi8(fg, _mm_setzero_si128());
// unpack bg:
__m128i bg = _mm_loadu_si128((const __m128i*)dest);
__m128i bg_hi = _mm_unpackhi_epi8(bg, _mm_setzero_si128());
__m128i bg_lo = _mm_unpacklo_epi8(bg, _mm_setzero_si128());
// (fg_red * fg_alpha + bg_red * bg_alpha) / 256:
__m128i color_hi = _mm_srli_epi16(_mm_adds_epu16(_mm_mullo_epi16(fg_hi, mfg_alpha), _mm_mullo_epi16(bg_hi, mbg_alpha)), 8);
__m128i color_lo = _mm_srli_epi16(_mm_adds_epu16(_mm_mullo_epi16(fg_lo, mfg_alpha), _mm_mullo_epi16(bg_lo, mbg_alpha)), 8);
__m128i color = _mm_packus_epi16(color_lo, color_hi);
_mm_storeu_si128((__m128i*)dest, color);
source += sincr;
dest += pitch;
} while (--count);
}
else
{
VEC_SHADE_VARS();
VEC_SHADE_INIT(light, shade_constants);
__m128i mfg_alpha = _mm_set_epi16(256, fg_alpha, fg_alpha, fg_alpha, 256, fg_alpha, fg_alpha, fg_alpha);
__m128i mbg_alpha = _mm_set_epi16(256, bg_alpha, bg_alpha, bg_alpha, 256, bg_alpha, bg_alpha, bg_alpha);
do {
uint32_t p0 = source[0];
uint32_t p1 = source[1];
uint32_t p2 = source[2];
uint32_t p3 = source[3];
// shade_pal_index:
__m128i fg = _mm_set_epi32(palette[p3], palette[p2], palette[p1], palette[p0]);
VEC_SHADE(fg, shade_constants);
__m128i fg_hi = _mm_unpackhi_epi8(fg, _mm_setzero_si128());
__m128i fg_lo = _mm_unpacklo_epi8(fg, _mm_setzero_si128());
// unpack bg:
__m128i bg = _mm_loadu_si128((const __m128i*)dest);
__m128i bg_hi = _mm_unpackhi_epi8(bg, _mm_setzero_si128());
__m128i bg_lo = _mm_unpacklo_epi8(bg, _mm_setzero_si128());
// (fg_red * fg_alpha + bg_red * bg_alpha) / 256:
__m128i color_hi = _mm_srli_epi16(_mm_adds_epu16(_mm_mullo_epi16(fg_hi, mfg_alpha), _mm_mullo_epi16(bg_hi, mbg_alpha)), 8);
__m128i color_lo = _mm_srli_epi16(_mm_adds_epu16(_mm_mullo_epi16(fg_lo, mfg_alpha), _mm_mullo_epi16(bg_lo, mbg_alpha)), 8);
__m128i color = _mm_packus_epi16(color_lo, color_hi);
_mm_storeu_si128((__m128i*)dest, color);
source += sincr;
dest += pitch;
} while (--count);
}
}
};
class VecCommand(RtSubClamp4colsRGBA) : public DrawerCommand
{
int sx;
int yl;
int yh;
BYTE * RESTRICT _destorg;
int _pitch;
fixed_t _light;
fixed_t _srcalpha;
fixed_t _destalpha;
ShadeConstants _shade_constants;
public:
VecCommand(RtSubClamp4colsRGBA)(int sx, int yl, int yh)
{
this->sx = sx;
this->yl = yl;
this->yh = yh;
_destorg = dc_destorg;
_pitch = dc_pitch;
_light = dc_light;
_srcalpha = dc_srcalpha;
_destalpha = dc_destalpha;
_shade_constants = dc_shade_constants;
}
void Execute(DrawerThread *thread) override
{
uint32_t *source;
uint32_t *dest;
int count;
int pitch;
int sincr;
count = thread->count_for_thread(yl, yh - yl + 1);
if (count <= 0)
return;
dest = thread->dest_for_thread(yl, _pitch, ylookup[yl] + sx + (uint32_t*)_destorg);
source = &thread->dc_temp_rgba[yl * 4] + thread->skipped_by_thread(yl) * 4;
pitch = _pitch * thread->num_cores;
sincr = 4 * thread->num_cores;
uint32_t light = LightBgra::calc_light_multiplier(_light);
uint32_t *palette = (uint32_t*)GPalette.BaseColors;
ShadeConstants shade_constants = _shade_constants;
uint32_t fg_alpha = _srcalpha >> (FRACBITS - 8);
uint32_t bg_alpha = _destalpha >> (FRACBITS - 8);
if (shade_constants.simple_shade)
{
VEC_SHADE_VARS();
VEC_SHADE_SIMPLE_INIT(light);
__m128i mfg_alpha = _mm_set_epi16(256, fg_alpha, fg_alpha, fg_alpha, 256, fg_alpha, fg_alpha, fg_alpha);
__m128i mbg_alpha = _mm_set_epi16(256, bg_alpha, bg_alpha, bg_alpha, 256, bg_alpha, bg_alpha, bg_alpha);
do {
uint32_t p0 = source[0];
uint32_t p1 = source[1];
uint32_t p2 = source[2];
uint32_t p3 = source[3];
// shade_pal_index:
__m128i fg = _mm_set_epi32(palette[p3], palette[p2], palette[p1], palette[p0]);
VEC_SHADE_SIMPLE(fg);
__m128i fg_hi = _mm_unpackhi_epi8(fg, _mm_setzero_si128());
__m128i fg_lo = _mm_unpacklo_epi8(fg, _mm_setzero_si128());
// unpack bg:
__m128i bg = _mm_loadu_si128((const __m128i*)dest);
__m128i bg_hi = _mm_unpackhi_epi8(bg, _mm_setzero_si128());
__m128i bg_lo = _mm_unpacklo_epi8(bg, _mm_setzero_si128());
// (bg_red * bg_alpha - fg_red * fg_alpha) / 256:
__m128i color_hi = _mm_srli_epi16(_mm_subs_epu16(_mm_mullo_epi16(bg_hi, mbg_alpha), _mm_mullo_epi16(fg_hi, mfg_alpha)), 8);
__m128i color_lo = _mm_srli_epi16(_mm_subs_epu16(_mm_mullo_epi16(bg_lo, mbg_alpha), _mm_mullo_epi16(fg_lo, mfg_alpha)), 8);
__m128i color = _mm_packus_epi16(color_lo, color_hi);
_mm_storeu_si128((__m128i*)dest, color);
source += sincr;
dest += pitch;
} while (--count);
}
else
{
VEC_SHADE_VARS();
VEC_SHADE_INIT(light, shade_constants);
__m128i mfg_alpha = _mm_set_epi16(256, fg_alpha, fg_alpha, fg_alpha, 256, fg_alpha, fg_alpha, fg_alpha);
__m128i mbg_alpha = _mm_set_epi16(256, bg_alpha, bg_alpha, bg_alpha, 256, bg_alpha, bg_alpha, bg_alpha);
do {
uint32_t p0 = source[0];
uint32_t p1 = source[1];
uint32_t p2 = source[2];
uint32_t p3 = source[3];
// shade_pal_index:
__m128i fg = _mm_set_epi32(palette[p3], palette[p2], palette[p1], palette[p0]);
VEC_SHADE(fg, shade_constants);
__m128i fg_hi = _mm_unpackhi_epi8(fg, _mm_setzero_si128());
__m128i fg_lo = _mm_unpacklo_epi8(fg, _mm_setzero_si128());
// unpack bg:
__m128i bg = _mm_loadu_si128((const __m128i*)dest);
__m128i bg_hi = _mm_unpackhi_epi8(bg, _mm_setzero_si128());
__m128i bg_lo = _mm_unpacklo_epi8(bg, _mm_setzero_si128());
// (bg_red * bg_alpha - fg_red * fg_alpha) / 256:
__m128i color_hi = _mm_srli_epi16(_mm_subs_epu16(_mm_mullo_epi16(bg_hi, mbg_alpha), _mm_mullo_epi16(fg_hi, mfg_alpha)), 8);
__m128i color_lo = _mm_srli_epi16(_mm_subs_epu16(_mm_mullo_epi16(bg_lo, mbg_alpha), _mm_mullo_epi16(fg_lo, mfg_alpha)), 8);
__m128i color = _mm_packus_epi16(color_lo, color_hi);
_mm_storeu_si128((__m128i*)dest, color);
source += sincr;
dest += pitch;
} while (--count);
}
}
};
class VecCommand(RtRevSubClamp4colsRGBA) : public DrawerCommand
{
int sx;
int yl;
int yh;
BYTE * RESTRICT _destorg;
int _pitch;
fixed_t _light;
fixed_t _srcalpha;
fixed_t _destalpha;
ShadeConstants _shade_constants;
public:
VecCommand(RtRevSubClamp4colsRGBA)(int sx, int yl, int yh)
{
this->sx = sx;
this->yl = yl;
this->yh = yh;
_destorg = dc_destorg;
_pitch = dc_pitch;
_light = dc_light;
_srcalpha = dc_srcalpha;
_destalpha = dc_destalpha;
_shade_constants = dc_shade_constants;
}
void Execute(DrawerThread *thread) override
{
uint32_t *source;
uint32_t *dest;
int count;
int pitch;
int sincr;
count = thread->count_for_thread(yl, yh - yl + 1);
if (count <= 0)
return;
dest = thread->dest_for_thread(yl, _pitch, ylookup[yl] + sx + (uint32_t*)_destorg);
source = &thread->dc_temp_rgba[yl * 4] + thread->skipped_by_thread(yl) * 4;
pitch = _pitch * thread->num_cores;
sincr = 4 * thread->num_cores;
uint32_t light = LightBgra::calc_light_multiplier(_light);
uint32_t *palette = (uint32_t*)GPalette.BaseColors;
ShadeConstants shade_constants = _shade_constants;
uint32_t fg_alpha = _srcalpha >> (FRACBITS - 8);
uint32_t bg_alpha = _destalpha >> (FRACBITS - 8);
if (shade_constants.simple_shade)
{
VEC_SHADE_VARS();
VEC_SHADE_SIMPLE_INIT(light);
__m128i mfg_alpha = _mm_set_epi16(256, fg_alpha, fg_alpha, fg_alpha, 256, fg_alpha, fg_alpha, fg_alpha);
__m128i mbg_alpha = _mm_set_epi16(256, bg_alpha, bg_alpha, bg_alpha, 256, bg_alpha, bg_alpha, bg_alpha);
do {
uint32_t p0 = source[0];
uint32_t p1 = source[1];
uint32_t p2 = source[2];
uint32_t p3 = source[3];
// shade_pal_index:
__m128i fg = _mm_set_epi32(palette[p3], palette[p2], palette[p1], palette[p0]);
VEC_SHADE_SIMPLE(fg);
__m128i fg_hi = _mm_unpackhi_epi8(fg, _mm_setzero_si128());
__m128i fg_lo = _mm_unpacklo_epi8(fg, _mm_setzero_si128());
// unpack bg:
__m128i bg = _mm_loadu_si128((const __m128i*)dest);
__m128i bg_hi = _mm_unpackhi_epi8(bg, _mm_setzero_si128());
__m128i bg_lo = _mm_unpacklo_epi8(bg, _mm_setzero_si128());
// (fg_red * fg_alpha - bg_red * bg_alpha) / 256:
__m128i color_hi = _mm_srli_epi16(_mm_subs_epu16(_mm_mullo_epi16(fg_hi, mfg_alpha), _mm_mullo_epi16(bg_hi, mbg_alpha)), 8);
__m128i color_lo = _mm_srli_epi16(_mm_subs_epu16(_mm_mullo_epi16(fg_lo, mfg_alpha), _mm_mullo_epi16(bg_lo, mbg_alpha)), 8);
__m128i color = _mm_packus_epi16(color_lo, color_hi);
_mm_storeu_si128((__m128i*)dest, color);
source += sincr;
dest += pitch;
} while (--count);
}
else
{
VEC_SHADE_VARS();
VEC_SHADE_INIT(light, shade_constants);
__m128i mfg_alpha = _mm_set_epi16(256, fg_alpha, fg_alpha, fg_alpha, 256, fg_alpha, fg_alpha, fg_alpha);
__m128i mbg_alpha = _mm_set_epi16(256, bg_alpha, bg_alpha, bg_alpha, 256, bg_alpha, bg_alpha, bg_alpha);
do {
uint32_t p0 = source[0];
uint32_t p1 = source[1];
uint32_t p2 = source[2];
uint32_t p3 = source[3];
// shade_pal_index:
__m128i fg = _mm_set_epi32(palette[p3], palette[p2], palette[p1], palette[p0]);
VEC_SHADE(fg, shade_constants);
__m128i fg_hi = _mm_unpackhi_epi8(fg, _mm_setzero_si128());
__m128i fg_lo = _mm_unpacklo_epi8(fg, _mm_setzero_si128());
// unpack bg:
__m128i bg = _mm_loadu_si128((const __m128i*)dest);
__m128i bg_hi = _mm_unpackhi_epi8(bg, _mm_setzero_si128());
__m128i bg_lo = _mm_unpacklo_epi8(bg, _mm_setzero_si128());
// (fg_red * fg_alpha - bg_red * bg_alpha) / 256:
__m128i color_hi = _mm_srli_epi16(_mm_subs_epu16(_mm_mullo_epi16(fg_hi, mfg_alpha), _mm_mullo_epi16(bg_hi, mbg_alpha)), 8);
__m128i color_lo = _mm_srli_epi16(_mm_subs_epu16(_mm_mullo_epi16(fg_lo, mfg_alpha), _mm_mullo_epi16(bg_lo, mbg_alpha)), 8);
__m128i color = _mm_packus_epi16(color_lo, color_hi);
_mm_storeu_si128((__m128i*)dest, color);
source += sincr;
dest += pitch;
} while (--count);
}
}
};

View file

@ -40,6 +40,7 @@
#include "r_segs.h"
#include "r_3dfloors.h"
#include "r_sky.h"
#include "r_draw_rgba.h"
#include "st_stuff.h"
#include "c_cvars.h"
#include "c_dispatch.h"
@ -104,6 +105,8 @@ bool r_dontmaplines;
CVAR (String, r_viewsize, "", CVAR_NOSET)
CVAR (Bool, r_shadercolormaps, true, CVAR_ARCHIVE)
bool r_swtruecolor;
double r_BaseVisibility;
double r_WallVisibility;
double r_FloorVisibility;
@ -117,7 +120,7 @@ double FocalLengthX;
double FocalLengthY;
FDynamicColormap*basecolormap; // [RH] colormap currently drawing with
int fixedlightlev;
lighttable_t *fixedcolormap;
FColormap *fixedcolormap;
FSpecialColormap *realfixedcolormap;
double WallTMapScale2;
@ -397,16 +400,6 @@ void R_InitRenderer()
R_InitPlanes ();
R_InitShadeMaps();
R_InitColumnDrawers ();
colfunc = basecolfunc = R_DrawColumn;
fuzzcolfunc = R_DrawFuzzColumn;
transcolfunc = R_DrawTranslatedColumn;
spanfunc = R_DrawSpan;
// [RH] Horizontal column drawers
hcolfunc_pre = R_DrawColumnHoriz;
hcolfunc_post1 = rt_map1col;
hcolfunc_post4 = rt_map4cols;
}
//==========================================================================
@ -467,16 +460,16 @@ void R_SetupColormap(player_t *player)
if (player->fixedcolormap >= 0 && player->fixedcolormap < (int)SpecialColormaps.Size())
{
realfixedcolormap = &SpecialColormaps[player->fixedcolormap];
if (RenderTarget == screen && (DFrameBuffer *)screen->Accel2D && r_shadercolormaps)
if (RenderTarget == screen && (r_swtruecolor || ((DFrameBuffer *)screen->Accel2D && r_shadercolormaps)))
{
// Render everything fullbright. The copy to video memory will
// apply the special colormap, so it won't be restricted to the
// palette.
fixedcolormap = realcolormaps;
fixedcolormap = &realcolormaps;
}
else
{
fixedcolormap = SpecialColormaps[player->fixedcolormap].Colormap;
fixedcolormap = &SpecialColormaps[player->fixedcolormap];
}
}
else if (player->fixedlightlevel >= 0 && player->fixedlightlevel < NUMCOLORMAPS)
@ -487,7 +480,7 @@ void R_SetupColormap(player_t *player)
// [RH] Inverse light for shooting the Sigil
if (fixedcolormap == NULL && extralight == INT_MIN)
{
fixedcolormap = SpecialColormaps[INVERSECOLORMAP].Colormap;
fixedcolormap = &SpecialColormaps[INVERSECOLORMAP];
extralight = 0;
}
}
@ -576,6 +569,9 @@ void R_HighlightPortal (PortalDrawseg* pds)
// [ZZ] NO OVERFLOW CHECKS HERE
// I believe it won't break. if it does, blame me. :(
if (r_swtruecolor) // Assuming this is just a debug function
return;
BYTE color = (BYTE)BestColor((DWORD *)GPalette.BaseColors, 255, 0, 0, 0, 255);
BYTE* pixels = RenderTarget->GetBuffer();
@ -623,12 +619,26 @@ void R_EnterPortal (PortalDrawseg* pds, int depth)
int Ytop = pds->ceilingclip[x-pds->x1];
int Ybottom = pds->floorclip[x-pds->x1];
BYTE *dest = RenderTarget->GetBuffer() + x + Ytop * spacing;
for (int y = Ytop; y <= Ybottom; y++)
if (r_swtruecolor)
{
*dest = color;
dest += spacing;
uint32_t *dest = (uint32_t*)RenderTarget->GetBuffer() + x + Ytop * spacing;
uint32_t c = GPalette.BaseColors[color].d;
for (int y = Ytop; y <= Ybottom; y++)
{
*dest = c;
dest += spacing;
}
}
else
{
BYTE *dest = RenderTarget->GetBuffer() + x + Ytop * spacing;
for (int y = Ytop; y <= Ybottom; y++)
{
*dest = color;
dest += spacing;
}
}
}
@ -797,7 +807,8 @@ void R_SetupBuffer ()
static BYTE *lastbuff = NULL;
int pitch = RenderTarget->GetPitch();
BYTE *lineptr = RenderTarget->GetBuffer() + viewwindowy*pitch + viewwindowx;
int pixelsize = r_swtruecolor ? 4 : 1;
BYTE *lineptr = RenderTarget->GetBuffer() + (viewwindowy*pitch + viewwindowx) * pixelsize;
if (dc_pitch != pitch || lineptr != lastbuff)
{
@ -847,10 +858,10 @@ void R_RenderActorView (AActor *actor, bool dontmaplines)
// [RH] Show off segs if r_drawflat is 1
if (r_drawflat)
{
hcolfunc_pre = R_FillColumnHorizP;
hcolfunc_pre = R_FillColumnHoriz;
hcolfunc_post1 = rt_copy1col;
hcolfunc_post4 = rt_copy4cols;
colfunc = R_FillColumnP;
colfunc = R_FillColumn;
spanfunc = R_FillSpan;
}
else
@ -925,7 +936,7 @@ void R_RenderActorView (AActor *actor, bool dontmaplines)
// If we don't want shadered colormaps, NULL it now so that the
// copy to the screen does not use a special colormap shader.
if (!r_shadercolormaps)
if (!r_shadercolormaps && !r_swtruecolor)
{
realfixedcolormap = NULL;
}
@ -943,6 +954,15 @@ void R_RenderViewToCanvas (AActor *actor, DCanvas *canvas,
int x, int y, int width, int height, bool dontmaplines)
{
const bool savedviewactive = viewactive;
const bool savedoutputformat = r_swtruecolor;
if (r_swtruecolor != canvas->IsBgra())
{
r_swtruecolor = canvas->IsBgra();
R_InitColumnDrawers();
}
R_BeginDrawerCommands();
viewwidth = width;
RenderTarget = canvas;
@ -955,13 +975,22 @@ void R_RenderViewToCanvas (AActor *actor, DCanvas *canvas,
R_RenderActorView (actor, dontmaplines);
R_EndDrawerCommands();
RenderTarget = screen;
bRenderingToCanvas = false;
R_ExecuteSetViewSize ();
screen->Lock (true);
R_SetupBuffer ();
screen->Unlock ();
viewactive = savedviewactive;
r_swtruecolor = savedoutputformat;
if (r_swtruecolor != canvas->IsBgra())
{
R_InitColumnDrawers();
}
}
//==========================================================================

View file

@ -82,6 +82,16 @@ extern bool r_dontmaplines;
// Change R_CalcTiltedLighting() when this changes.
#define GETPALOOKUP(vis,shade) (clamp<int> (((shade)-FLOAT2FIXED(MIN(MAXLIGHTVIS,double(vis))))>>FRACBITS, 0, NUMCOLORMAPS-1))
// Calculate the light multiplier for dc_light/ds_light
// This is used instead of GETPALOOKUP when ds_colormap/dc_colormap is set to the base colormap
// Returns a value between 0 and 1 in fixed point
#define LIGHTSCALE(vis,shade) FLOAT2FIXED(clamp((FIXED2DBL(shade) - (MIN(MAXLIGHTVIS,double(vis)))) / NUMCOLORMAPS, 0.0, (NUMCOLORMAPS-1)/(double)NUMCOLORMAPS))
// Converts fixedlightlev into a shade value
#define FIXEDLIGHT2SHADE(lightlev) (((lightlev) >> COLORMAPSHIFT) << FRACBITS)
extern bool r_swtruecolor;
extern double GlobVis;
void R_SetVisibility(double visibility);
@ -96,7 +106,7 @@ extern double r_SpriteVisibility;
extern int r_actualextralight;
extern bool foggy;
extern int fixedlightlev;
extern lighttable_t* fixedcolormap;
extern FColormap* fixedcolormap;
extern FSpecialColormap*realfixedcolormap;

View file

@ -58,6 +58,7 @@
#include "r_3dfloors.h"
#include "v_palette.h"
#include "r_data/colormaps.h"
#include "r_draw_rgba.h"
#ifdef _MSC_VER
#pragma warning(disable:4244)
@ -227,12 +228,11 @@ void R_MapPlane (int y, int x1)
if (plane_shade)
{
// Determine lighting based on the span's distance from the viewer.
ds_colormap = basecolormap->Maps + (GETPALOOKUP (
GlobVis * fabs(CenterY - y), planeshade) << COLORMAPSHIFT);
R_SetDSColorMapLight(basecolormap, GlobVis * fabs(CenterY - y), planeshade);
}
#ifdef X86_ASM
if (ds_colormap != ds_curcolormap)
if (!r_swtruecolor && ds_colormap != ds_curcolormap)
R_SetSpanColormap_ASM (ds_colormap);
#endif
@ -355,7 +355,7 @@ void R_CalcTiltedLighting (double lval, double lend, int width)
//
//==========================================================================
void R_MapTiltedPlane (int y, int x1)
void R_MapTiltedPlane_C (int y, int x1)
{
int x2 = spanend[y];
int width = x2 - x1;
@ -392,7 +392,7 @@ void R_MapTiltedPlane (int y, int x1)
u = SQWORD(uz*z) + pviewx;
v = SQWORD(vz*z) + pviewy;
ds_colormap = tiltlighting[i];
R_SetDSColorMapLight(tiltlighting[i], 0, 0);
fb[i++] = ds_colormap[ds_source[(v >> vshift) | ((u >> ushift) & umask)]];
iz += plane_sz[0];
uz += plane_su[0];
@ -478,17 +478,27 @@ void R_MapTiltedPlane (int y, int x1)
#endif
}
void R_MapTiltedPlane_rgba (int y, int x1)
{
R_DrawTiltedSpan_rgba(y, x1, spanend[y], plane_sz, plane_su, plane_sv, plane_shade, planeshade, planelightfloat, pviewx, pviewy);
}
//==========================================================================
//
// R_MapColoredPlane
//
//==========================================================================
void R_MapColoredPlane (int y, int x1)
void R_MapColoredPlane_C (int y, int x1)
{
memset (ylookup[y] + x1 + dc_destorg, ds_color, spanend[y] - x1 + 1);
}
void R_MapColoredPlane_rgba(int y, int x1)
{
R_DrawColoredSpan_rgba(y, x1, spanend[y]);
}
//==========================================================================
//
// R_ClearPlanes
@ -841,15 +851,24 @@ extern FTexture *rw_pic;
// Allow for layer skies up to 512 pixels tall. This is overkill,
// since the most anyone can ever see of the sky is 500 pixels.
// We need 4 skybufs because wallscan can draw up to 4 columns at a time.
// Need two versions - one for true color and one for palette
static BYTE skybuf[4][512];
static uint32_t skybuf_bgra[4][512];
static DWORD lastskycol[4];
static DWORD lastskycol_bgra[4];
static int skycolplace;
static int skycolplace_bgra;
// Get a column of sky when there is only one sky texture.
static const BYTE *R_GetOneSkyColumn (FTexture *fronttex, int x)
{
angle_t column = (skyangle + xtoviewangle[x]) ^ skyflip;
return fronttex->GetColumn((UMulScale16(column, frontcyl) + frontpos) >> FRACBITS, NULL);
int tx = (UMulScale16(column, frontcyl) + frontpos) >> FRACBITS;
if (!r_swtruecolor)
return fronttex->GetColumn(tx, NULL);
else
return (const BYTE *)fronttex->GetColumnBgra(tx, NULL);
}
// Get a column of sky when there are two overlapping sky textures
@ -864,38 +883,77 @@ static const BYTE *R_GetTwoSkyColumns (FTexture *fronttex, int x)
DWORD skycol = (angle1 << 16) | angle2;
int i;
for (i = 0; i < 4; ++i)
if (!r_swtruecolor)
{
if (lastskycol[i] == skycol)
for (i = 0; i < 4; ++i)
{
return skybuf[i];
if (lastskycol[i] == skycol)
{
return skybuf[i];
}
}
lastskycol[skycolplace] = skycol;
BYTE *composite = skybuf[skycolplace];
skycolplace = (skycolplace + 1) & 3;
// The ordering of the following code has been tuned to allow VC++ to optimize
// it well. In particular, this arrangement lets it keep count in a register
// instead of on the stack.
const BYTE *front = fronttex->GetColumn(angle1, NULL);
const BYTE *back = backskytex->GetColumn(angle2, NULL);
int count = MIN<int>(512, MIN(backskytex->GetHeight(), fronttex->GetHeight()));
i = 0;
do
{
if (front[i])
{
composite[i] = front[i];
}
else
{
composite[i] = back[i];
}
} while (++i, --count);
return composite;
}
lastskycol[skycolplace] = skycol;
BYTE *composite = skybuf[skycolplace];
skycolplace = (skycolplace + 1) & 3;
// The ordering of the following code has been tuned to allow VC++ to optimize
// it well. In particular, this arrangement lets it keep count in a register
// instead of on the stack.
const BYTE *front = fronttex->GetColumn (angle1, NULL);
const BYTE *back = backskytex->GetColumn (angle2, NULL);
int count = MIN<int> (512, MIN (backskytex->GetHeight(), fronttex->GetHeight()));
i = 0;
do
else
{
if (front[i])
return R_GetOneSkyColumn(fronttex, x);
for (i = 0; i < 4; ++i)
{
composite[i] = front[i];
if (lastskycol_bgra[i] == skycol)
{
return (BYTE*)(skybuf_bgra[i]);
}
}
else
lastskycol_bgra[skycolplace_bgra] = skycol;
uint32_t *composite = skybuf_bgra[skycolplace_bgra];
skycolplace_bgra = (skycolplace_bgra + 1) & 3;
// The ordering of the following code has been tuned to allow VC++ to optimize
// it well. In particular, this arrangement lets it keep count in a register
// instead of on the stack.
const uint32_t *front = (const uint32_t *)fronttex->GetColumnBgra(angle1, NULL);
const uint32_t *back = (const uint32_t *)backskytex->GetColumnBgra(angle2, NULL);
int count = MIN<int>(512, MIN(backskytex->GetHeight(), fronttex->GetHeight()));
i = 0;
do
{
composite[i] = back[i];
}
} while (++i, --count);
return composite;
if (front[i])
{
composite[i] = front[i];
}
else
{
composite[i] = back[i];
}
} while (++i, --count);
return (BYTE*)composite;
}
}
static void R_DrawSky (visplane_t *pl)
@ -930,6 +988,7 @@ static void R_DrawSky (visplane_t *pl)
for (x = 0; x < 4; ++x)
{
lastskycol[x] = 0xffffffff;
lastskycol_bgra[x] = 0xffffffff;
}
rw_pic = frontskytex;
@ -943,6 +1002,7 @@ static void R_DrawSky (visplane_t *pl)
for (x = 0; x < 4; ++x)
{
lastskycol[x] = 0xffffffff;
lastskycol_bgra[x] = 0xffffffff;
}
wallscan (pl->left, pl->right, (short *)pl->top, (short *)pl->bottom, swall, lwall,
frontyScale, backskytex == NULL ? R_GetOneSkyColumn : R_GetTwoSkyColumns);
@ -951,7 +1011,7 @@ static void R_DrawSky (visplane_t *pl)
{ // The texture does not tile nicely
frontyScale *= skyscale;
frontiScale = 1 / frontyScale;
R_DrawSkyStriped (pl);
//R_DrawSkyStriped (pl);
}
}
@ -980,6 +1040,7 @@ static void R_DrawSkyStriped (visplane_t *pl)
for (x = 0; x < 4; ++x)
{
lastskycol[x] = 0xffffffff;
lastskycol_bgra[x] = 0xffffffff;
}
wallscan (pl->left, pl->right, top, bot, swall, lwall, rw_pic->Scale.Y,
backskytex == NULL ? R_GetOneSkyColumn : R_GetTwoSkyColumns);
@ -1098,7 +1159,7 @@ void R_DrawSinglePlane (visplane_t *pl, fixed_t alpha, bool additive, bool maske
R_SetupSpanBits(tex);
double xscale = pl->xform.xScale * tex->Scale.X;
double yscale = pl->xform.yScale * tex->Scale.Y;
ds_source = tex->GetPixels ();
R_SetSpanSource(tex);
basecolormap = pl->colormap;
planeshade = LIGHT2SHADE(pl->lightlevel);
@ -1461,12 +1522,13 @@ void R_DrawSkyPlane (visplane_t *pl)
bool fakefixed = false;
if (fixedcolormap)
{
dc_colormap = fixedcolormap;
R_SetColorMapLight(fixedcolormap, 0, 0);
}
else
{
fakefixed = true;
fixedcolormap = dc_colormap = NormalLight.Maps;
fixedcolormap = &NormalLight;
R_SetColorMapLight(fixedcolormap, 0, 0);
}
R_DrawSky (pl);
@ -1484,7 +1546,7 @@ void R_DrawSkyPlane (visplane_t *pl)
void R_DrawNormalPlane (visplane_t *pl, double _xscale, double _yscale, fixed_t alpha, bool additive, bool masked)
{
#ifdef X86_ASM
if (ds_source != ds_cursource)
if (!r_swtruecolor && ds_source != ds_cursource)
{
R_SetSpanSource_ASM (ds_source);
}
@ -1547,12 +1609,21 @@ void R_DrawNormalPlane (visplane_t *pl, double _xscale, double _yscale, fixed_t
planeheight = fabs(pl->height.Zat0() - ViewPos.Z);
GlobVis = r_FloorVisibility / planeheight;
ds_light = 0;
if (fixedlightlev >= 0)
ds_colormap = basecolormap->Maps + fixedlightlev, plane_shade = false;
{
R_SetDSColorMapLight(basecolormap, 0, FIXEDLIGHT2SHADE(fixedlightlev));
plane_shade = false;
}
else if (fixedcolormap)
ds_colormap = fixedcolormap, plane_shade = false;
{
R_SetDSColorMapLight(fixedcolormap, 0, 0);
plane_shade = false;
}
else
{
plane_shade = true;
}
if (spanfunc != R_FillSpan)
{
@ -1565,12 +1636,16 @@ void R_DrawNormalPlane (visplane_t *pl, double _xscale, double _yscale, fixed_t
spanfunc = R_DrawSpanMaskedTranslucent;
dc_srcblend = Col2RGB8[alpha>>10];
dc_destblend = Col2RGB8[(OPAQUE-alpha)>>10];
dc_srcalpha = alpha;
dc_destalpha = OPAQUE - alpha;
}
else
{
spanfunc = R_DrawSpanMaskedAddClamp;
dc_srcblend = Col2RGB8_LessPrecision[alpha>>10];
dc_destblend = Col2RGB8_LessPrecision[FRACUNIT>>10];
dc_srcalpha = alpha;
dc_destalpha = OPAQUE - alpha;
}
}
else
@ -1587,12 +1662,16 @@ void R_DrawNormalPlane (visplane_t *pl, double _xscale, double _yscale, fixed_t
spanfunc = R_DrawSpanTranslucent;
dc_srcblend = Col2RGB8[alpha>>10];
dc_destblend = Col2RGB8[(OPAQUE-alpha)>>10];
dc_srcalpha = alpha;
dc_destalpha = OPAQUE - alpha;
}
else
{
spanfunc = R_DrawSpanAddClamp;
dc_srcblend = Col2RGB8_LessPrecision[alpha>>10];
dc_destblend = Col2RGB8_LessPrecision[FRACUNIT>>10];
dc_srcalpha = alpha;
dc_destalpha = OPAQUE - alpha;
}
}
else
@ -1708,11 +1787,20 @@ void R_DrawTiltedPlane (visplane_t *pl, double _xscale, double _yscale, fixed_t
planelightfloat = -planelightfloat;
if (fixedlightlev >= 0)
ds_colormap = basecolormap->Maps + fixedlightlev, plane_shade = false;
{
R_SetDSColorMapLight(basecolormap, 0, FIXEDLIGHT2SHADE(fixedlightlev));
plane_shade = false;
}
else if (fixedcolormap)
ds_colormap = fixedcolormap, plane_shade = false;
{
R_SetDSColorMapLight(fixedcolormap, 0, 0);
plane_shade = false;
}
else
ds_colormap = basecolormap->Maps, plane_shade = true;
{
R_SetDSColorMapLight(basecolormap, 0, 0);
plane_shade = true;
}
if (!plane_shade)
{
@ -1723,9 +1811,16 @@ void R_DrawTiltedPlane (visplane_t *pl, double _xscale, double _yscale, fixed_t
}
#if defined(X86_ASM)
if (ds_source != ds_curtiltedsource)
R_SetTiltedSpanSource_ASM (ds_source);
R_MapVisPlane (pl, R_DrawTiltedPlane_ASM);
if (!r_swtruecolor)
{
if (ds_source != ds_curtiltedsource)
R_SetTiltedSpanSource_ASM(ds_source);
R_MapVisPlane(pl, R_DrawTiltedPlane_ASM);
}
else
{
R_MapVisPlane(pl, R_MapTiltedPlane);
}
#else
R_MapVisPlane (pl, R_MapTiltedPlane);
#endif

View file

@ -93,6 +93,14 @@ void R_DrawNormalPlane (visplane_t *pl, double xscale, double yscale, fixed_t al
void R_DrawTiltedPlane (visplane_t *pl, double xscale, double yscale, fixed_t alpha, bool additive, bool masked);
void R_MapVisPlane (visplane_t *pl, void (*mapfunc)(int y, int x1));
extern void(*R_MapColoredPlane)(int y, int x1);
extern void(*R_MapTiltedPlane)(int y, int x1);
void R_MapTiltedPlane_C(int y, int x1);
void R_MapTiltedPlane_rgba(int y, int x);
void R_MapColoredPlane_C(int y, int x1);
void R_MapColoredPlane_rgba(int y, int x1);
visplane_t *R_FindPlane
( const secplane_t &height,
FTextureID picnum,

File diff suppressed because it is too large Load diff

View file

@ -42,7 +42,9 @@
#include "r_3dfloors.h"
#include "textures/textures.h"
#include "r_data/voxels.h"
#include "r_draw_rgba.h"
EXTERN_CVAR(Bool, r_shadercolormaps)
class FArchive;
void R_SWRSetWindow(int windowSize, int fullWidth, int fullHeight, int stHeight, int trueratio);
@ -58,6 +60,7 @@ void R_InitRenderer();
void FSoftwareRenderer::Init()
{
r_swtruecolor = screen->IsBgra();
R_InitRenderer();
}
@ -85,11 +88,17 @@ void FSoftwareRenderer::PrecacheTexture(FTexture *tex, int cache)
if (cache & FTextureManager::HIT_Columnmode)
{
const FTexture::Span *spanp;
tex->GetColumn(0, &spanp);
if (r_swtruecolor)
tex->GetColumnBgra(0, &spanp);
else
tex->GetColumn(0, &spanp);
}
else if (cache != 0)
{
tex->GetPixels ();
if (r_swtruecolor)
tex->GetPixelsBgra();
else
tex->GetPixels ();
}
else
{
@ -155,9 +164,24 @@ void FSoftwareRenderer::Precache(BYTE *texhitlist, TMap<PClassActor*, bool> &act
void FSoftwareRenderer::RenderView(player_t *player)
{
if (r_swtruecolor != screen->IsBgra())
{
r_swtruecolor = screen->IsBgra();
R_InitColumnDrawers();
}
R_BeginDrawerCommands();
R_RenderActorView (player->mo);
// [RH] Let cameras draw onto textures that were visible this frame.
FCanvasTextureInfo::UpdateAll ();
// Apply special colormap if the target cannot do it
if (realfixedcolormap && r_swtruecolor && !(r_shadercolormaps && screen->Accel2D))
{
DrawerCommandQueue::QueueCommand<ApplySpecialColormapRGBACommand>(realfixedcolormap, screen);
}
R_EndDrawerCommands();
}
//==========================================================================
@ -182,7 +206,7 @@ void FSoftwareRenderer::RemapVoxels()
void FSoftwareRenderer::WriteSavePic (player_t *player, FILE *file, int width, int height)
{
DCanvas *pic = new DSimpleCanvas (width, height);
DCanvas *pic = new DSimpleCanvas (width, height, false);
PalEntry palette[256];
// Take a snapshot of the player's view
@ -311,27 +335,67 @@ void FSoftwareRenderer::CopyStackedViewParameters()
void FSoftwareRenderer::RenderTextureView (FCanvasTexture *tex, AActor *viewpoint, int fov)
{
BYTE *Pixels = const_cast<BYTE*>(tex->GetPixels());
DSimpleCanvas *Canvas = tex->GetCanvas();
BYTE *Pixels = r_swtruecolor ? (BYTE*)tex->GetPixelsBgra() : (BYTE*)tex->GetPixels();
DSimpleCanvas *Canvas = r_swtruecolor ? tex->GetCanvasBgra() : tex->GetCanvas();
// curse Doom's overuse of global variables in the renderer.
// These get clobbered by rendering to a camera texture but they need to be preserved so the final rendering can be done with the correct palette.
unsigned char *savecolormap = fixedcolormap;
FColormap *savecolormap = fixedcolormap;
FSpecialColormap *savecm = realfixedcolormap;
DAngle savedfov = FieldOfView;
R_SetFOV ((double)fov);
R_RenderViewToCanvas (viewpoint, Canvas, 0, 0, tex->GetWidth(), tex->GetHeight(), tex->bFirstUpdate);
R_SetFOV (savedfov);
if (Pixels == Canvas->GetBuffer())
if (Canvas->IsBgra())
{
FTexture::FlipSquareBlockRemap (Pixels, tex->GetWidth(), tex->GetHeight(), GPalette.Remap);
if (Pixels == Canvas->GetBuffer())
{
FTexture::FlipSquareBlockBgra((uint32_t*)Pixels, tex->GetWidth(), tex->GetHeight());
}
else
{
FTexture::FlipNonSquareBlockBgra((uint32_t*)Pixels, (const uint32_t*)Canvas->GetBuffer(), tex->GetWidth(), tex->GetHeight(), Canvas->GetPitch());
}
}
else
{
FTexture::FlipNonSquareBlockRemap (Pixels, Canvas->GetBuffer(), tex->GetWidth(), tex->GetHeight(), Canvas->GetPitch(), GPalette.Remap);
if (Pixels == Canvas->GetBuffer())
{
FTexture::FlipSquareBlockRemap(Pixels, tex->GetWidth(), tex->GetHeight(), GPalette.Remap);
}
else
{
FTexture::FlipNonSquareBlockRemap(Pixels, Canvas->GetBuffer(), tex->GetWidth(), tex->GetHeight(), Canvas->GetPitch(), GPalette.Remap);
}
}
if (r_swtruecolor)
{
// True color render still sometimes uses palette textures (for sprites, mostly).
// We need to make sure that both pixel buffers contain data:
int width = tex->GetWidth();
int height = tex->GetHeight();
BYTE *palbuffer = (BYTE *)tex->GetPixels();
uint32_t *bgrabuffer = (uint32_t*)tex->GetPixelsBgra();
for (int x = 0; x < width; x++)
{
for (int y = 0; y < height; y++)
{
uint32_t color = bgrabuffer[y];
int r = RPART(color);
int g = GPART(color);
int b = BPART(color);
palbuffer[y] = RGB32k.RGB[r >> 3][g >> 3][b >> 3];
}
palbuffer += height;
bgrabuffer += height;
}
}
tex->SetUpdated();
fixedcolormap = savecolormap;
realfixedcolormap = savecm;
}

View file

@ -58,6 +58,7 @@
#include "r_plane.h"
#include "r_segs.h"
#include "r_3dfloors.h"
#include "r_draw_rgba.h"
#include "v_palette.h"
#include "r_data/r_translate.h"
#include "r_data/colormaps.h"
@ -251,6 +252,7 @@ bool sprflipvert;
void R_DrawMaskedColumn (const BYTE *column, const FTexture::Span *span)
{
int pixelsize = r_swtruecolor ? 4 : 1;
const fixed_t centeryfrac = FLOAT2FIXED(CenterY);
const fixed_t texturemid = FLOAT2FIXED(dc_texturemid);
while (span->Length != 0)
@ -321,7 +323,7 @@ void R_DrawMaskedColumn (const BYTE *column, const FTexture::Span *span)
}
}
dc_source = column + top;
dc_dest = ylookup[dc_yl] + dc_x + dc_destorg;
dc_dest = (ylookup[dc_yl] + dc_x) * pixelsize + dc_destorg;
dc_count = dc_yh - dc_yl + 1;
colfunc ();
}
@ -414,7 +416,7 @@ void R_DrawVisSprite (vissprite_t *vis)
}
fixed_t centeryfrac = FLOAT2FIXED(CenterY);
dc_colormap = vis->Style.colormap;
R_SetColorMapLight(vis->Style.BaseColormap, 0, vis->Style.ColormapNum << FRACBITS);
mode = R_SetPatchStyle (vis->Style.RenderStyle, vis->Style.Alpha, vis->Translation, vis->FillColor);
@ -422,7 +424,7 @@ void R_DrawVisSprite (vissprite_t *vis)
{ // For shaded sprites, R_SetPatchStyle sets a dc_colormap to an alpha table, but
// it is the brightest one. We need to get back to the proper light level for
// this sprite.
dc_colormap += vis->ColormapNum << COLORMAPSHIFT;
R_SetColorMapLight(dc_fcolormap, 0, vis->Style.ColormapNum << FRACBITS);
}
if (mode != DontDraw)
@ -476,7 +478,7 @@ void R_DrawVisSprite (vissprite_t *vis)
while (dc_x < stop4)
{
rt_initcols();
rt_initcols(nullptr);
for (int zz = 4; zz; --zz)
{
pixels = tex->GetColumn (frac >> FRACBITS, &spans);
@ -544,11 +546,11 @@ void R_DrawWallSprite(vissprite_t *spr)
rw_lightstep = float((GlobVis / spr->wallc.sz2 - rw_lightleft) / (spr->wallc.sx2 - spr->wallc.sx1));
rw_light = rw_lightleft + (x1 - spr->wallc.sx1) * rw_lightstep;
if (fixedlightlev >= 0)
dc_colormap = usecolormap->Maps + fixedlightlev;
R_SetColorMapLight(usecolormap, 0, FIXEDLIGHT2SHADE(fixedlightlev));
else if (fixedcolormap != NULL)
dc_colormap = fixedcolormap;
R_SetColorMapLight(fixedcolormap, 0, 0);
else if (!foggy && (spr->renderflags & RF_FULLBRIGHT))
dc_colormap = usecolormap->Maps;
R_SetColorMapLight(usecolormap, 0, 0);
else
calclighting = true;
@ -599,7 +601,7 @@ void R_DrawWallSprite(vissprite_t *spr)
{
if (calclighting)
{ // calculate lighting
dc_colormap = usecolormap->Maps + (GETPALOOKUP (rw_light, shade) << COLORMAPSHIFT);
R_SetColorMapLight(usecolormap, rw_light, shade);
}
if (!R_ClipSpriteColumnWithPortals(spr))
R_WallSpriteColumn(R_DrawMaskedColumn);
@ -610,9 +612,9 @@ void R_DrawWallSprite(vissprite_t *spr)
{
if (calclighting)
{ // calculate lighting
dc_colormap = usecolormap->Maps + (GETPALOOKUP (rw_light, shade) << COLORMAPSHIFT);
R_SetColorMapLight(usecolormap, rw_light, shade);
}
rt_initcols();
rt_initcols(nullptr);
for (int zz = 4; zz; --zz)
{
if (!R_ClipSpriteColumnWithPortals(spr))
@ -626,7 +628,7 @@ void R_DrawWallSprite(vissprite_t *spr)
{
if (calclighting)
{ // calculate lighting
dc_colormap = usecolormap->Maps + (GETPALOOKUP (rw_light, shade) << COLORMAPSHIFT);
R_SetColorMapLight(usecolormap, rw_light, shade);
}
if (!R_ClipSpriteColumnWithPortals(spr))
R_WallSpriteColumn(R_DrawMaskedColumn);
@ -660,14 +662,14 @@ void R_DrawVisVoxel(vissprite_t *spr, int minslabz, int maxslabz, short *cliptop
int flags = 0;
// Do setup for blending.
dc_colormap = spr->Style.colormap;
R_SetColorMapLight(spr->Style.BaseColormap, 0, spr->Style.ColormapNum << FRACBITS);
mode = R_SetPatchStyle(spr->Style.RenderStyle, spr->Style.Alpha, spr->Translation, spr->FillColor);
if (mode == DontDraw)
{
return;
}
if (colfunc == fuzzcolfunc || colfunc == R_FillColumnP)
if (colfunc == fuzzcolfunc || colfunc == R_FillColumn)
{
flags = DVF_OFFSCREEN | DVF_SPANSONLY;
}
@ -686,12 +688,13 @@ void R_DrawVisVoxel(vissprite_t *spr, int minslabz, int maxslabz, short *cliptop
// Render the voxel, either directly to the screen or offscreen.
R_DrawVoxel(spr->pa.vpos, spr->pa.vang, spr->gpos, spr->Angle,
spr->xscale, FLOAT2FIXED(spr->yscale), spr->voxel, spr->Style.colormap, cliptop, clipbot,
spr->xscale, FLOAT2FIXED(spr->yscale), spr->voxel, spr->Style.BaseColormap, spr->Style.ColormapNum, cliptop, clipbot,
minslabz, maxslabz, flags);
// Blend the voxel, if that's what we need to do.
if ((flags & ~DVF_MIRRORED) != 0)
{
int pixelsize = r_swtruecolor ? 4 : 1;
for (int x = 0; x < viewwidth; ++x)
{
if (!(flags & DVF_SPANSONLY) && (x & 3) == 0)
@ -706,15 +709,12 @@ void R_DrawVisVoxel(vissprite_t *spr, int minslabz, int maxslabz, short *cliptop
dc_yl = span->Start;
dc_yh = span->Stop - 1;
dc_count = span->Stop - span->Start;
dc_dest = ylookup[span->Start] + x + dc_destorg;
dc_dest = (ylookup[span->Start] + x) * pixelsize + dc_destorg;
colfunc();
}
else
{
unsigned int **tspan = &dc_ctspan[x & 3];
(*tspan)[0] = span->Start;
(*tspan)[1] = span->Stop - 1;
*tspan += 2;
rt_span_coverage(x, span->Start, span->Stop - 1);
}
}
if (!(flags & DVF_SPANSONLY) && (x & 3) == 3)
@ -1073,7 +1073,7 @@ void R_ProjectSprite (AActor *thing, int fakeside, F3DFloor *fakefloor, F3DFloor
vis->Style.Alpha = float(thing->Alpha);
vis->fakefloor = fakefloor;
vis->fakeceiling = fakeceiling;
vis->ColormapNum = 0;
vis->Style.ColormapNum = 0;
vis->bInMirror = MirrorFlags & RF_XFLIP;
vis->bSplitSprite = false;
@ -1125,7 +1125,8 @@ void R_ProjectSprite (AActor *thing, int fakeside, F3DFloor *fakefloor, F3DFloor
// get light level
if (fixedcolormap != NULL)
{ // fixed map
vis->Style.colormap = fixedcolormap;
vis->Style.BaseColormap = fixedcolormap;
vis->Style.ColormapNum = 0;
}
else
{
@ -1135,17 +1136,19 @@ void R_ProjectSprite (AActor *thing, int fakeside, F3DFloor *fakefloor, F3DFloor
}
if (fixedlightlev >= 0)
{
vis->Style.colormap = mybasecolormap->Maps + fixedlightlev;
vis->Style.BaseColormap = mybasecolormap;
vis->Style.ColormapNum = fixedlightlev >> COLORMAPSHIFT;
}
else if (!foggy && ((renderflags & RF_FULLBRIGHT) || (thing->flags5 & MF5_BRIGHT)))
{ // full bright
vis->Style.colormap = mybasecolormap->Maps;
vis->Style.BaseColormap = mybasecolormap;
vis->Style.ColormapNum = 0;
}
else
{ // diminished light
vis->ColormapNum = GETPALOOKUP(
vis->Style.ColormapNum = GETPALOOKUP(
r_SpriteVisibility / MAX(tz, MINZ), spriteshade);
vis->Style.colormap = mybasecolormap->Maps + (vis->ColormapNum << COLORMAPSHIFT);
vis->Style.BaseColormap = mybasecolormap;
}
}
}
@ -1214,14 +1217,13 @@ static void R_ProjectWallSprite(AActor *thing, const DVector3 &pos, FTextureID p
vis->Style.Alpha = float(thing->Alpha);
vis->fakefloor = NULL;
vis->fakeceiling = NULL;
vis->ColormapNum = 0;
vis->bInMirror = MirrorFlags & RF_XFLIP;
vis->pic = pic;
vis->bIsVoxel = false;
vis->bWallSprite = true;
vis->ColormapNum = GETPALOOKUP(
vis->Style.ColormapNum = GETPALOOKUP(
r_SpriteVisibility / MAX(tz, MINZ), spriteshade);
vis->Style.colormap = basecolormap->Maps + (vis->ColormapNum << COLORMAPSHIFT);
vis->Style.BaseColormap = basecolormap;
vis->wallc = wallc;
}
@ -1401,7 +1403,7 @@ void R_DrawPSprite(DPSprite *pspr, AActor *owner, float bobx, float boby, double
vis->yscale = float(pspriteyscale / tex->Scale.Y);
vis->Translation = 0; // [RH] Use default colors
vis->pic = tex;
vis->ColormapNum = 0;
vis->Style.ColormapNum = 0;
if (flip)
{
@ -1449,9 +1451,10 @@ void R_DrawPSprite(DPSprite *pspr, AActor *owner, float bobx, float boby, double
}
}
if (realfixedcolormap != nullptr)
if (realfixedcolormap != nullptr && (!r_swtruecolor || (r_shadercolormaps && screen->Accel2D)))
{ // fixed color
vis->Style.colormap = realfixedcolormap->Colormap;
vis->Style.BaseColormap = realfixedcolormap;
vis->Style.ColormapNum = 0;
}
else
{
@ -1461,35 +1464,38 @@ void R_DrawPSprite(DPSprite *pspr, AActor *owner, float bobx, float boby, double
}
if (fixedlightlev >= 0)
{
vis->Style.colormap = mybasecolormap->Maps + fixedlightlev;
vis->Style.BaseColormap = mybasecolormap;
vis->Style.ColormapNum = fixedlightlev >> COLORMAPSHIFT;
}
else if (!foggy && pspr->GetState()->GetFullbright())
{ // full bright
vis->Style.colormap = mybasecolormap->Maps; // [RH] use basecolormap
vis->Style.BaseColormap = mybasecolormap; // [RH] use basecolormap
vis->Style.ColormapNum = 0;
}
else
{ // local light
vis->Style.colormap = mybasecolormap->Maps + (GETPALOOKUP(0, spriteshade) << COLORMAPSHIFT);
vis->Style.BaseColormap = mybasecolormap;
vis->Style.ColormapNum = GETPALOOKUP(0, spriteshade);
}
}
if (camera->Inventory != nullptr)
{
lighttable_t *oldcolormap = vis->Style.colormap;
camera->Inventory->AlterWeaponSprite(&vis->Style);
if (vis->Style.colormap != oldcolormap)
BYTE oldcolormapnum = vis->Style.ColormapNum;
FColormap *oldcolormap = vis->Style.BaseColormap;
camera->Inventory->AlterWeaponSprite (&vis->Style);
if (vis->Style.BaseColormap != oldcolormap || vis->Style.ColormapNum != oldcolormapnum)
{
// The colormap has changed. Is it one we can easily identify?
// If not, then don't bother trying to identify it for
// hardware accelerated drawing.
if (vis->Style.colormap < SpecialColormaps[0].Colormap ||
vis->Style.colormap > SpecialColormaps.Last().Colormap)
if (vis->Style.BaseColormap < &SpecialColormaps[0] ||
vis->Style.BaseColormap > &SpecialColormaps.Last())
{
noaccel = true;
}
// Has the basecolormap changed? If so, we can't hardware accelerate it,
// since we don't know what it is anymore.
else if (vis->Style.colormap < mybasecolormap->Maps ||
vis->Style.colormap >= mybasecolormap->Maps + NUMCOLORMAPS * 256)
else if (vis->Style.BaseColormap != mybasecolormap)
{
noaccel = true;
}
@ -1497,13 +1503,13 @@ void R_DrawPSprite(DPSprite *pspr, AActor *owner, float bobx, float boby, double
}
// If we're drawing with a special colormap, but shaders for them are disabled, do
// not accelerate.
if (!r_shadercolormaps && (vis->Style.colormap >= SpecialColormaps[0].Colormap &&
vis->Style.colormap <= SpecialColormaps.Last().Colormap))
if (!r_shadercolormaps && (vis->Style.BaseColormap >= &SpecialColormaps[0] &&
vis->Style.BaseColormap <= &SpecialColormaps.Last()))
{
noaccel = true;
}
// If drawing with a BOOM colormap, disable acceleration.
if (mybasecolormap == &NormalLight && NormalLight.Maps != realcolormaps)
if (mybasecolormap == &NormalLight && NormalLight.Maps != realcolormaps.Maps)
{
noaccel = true;
}
@ -1520,7 +1526,8 @@ void R_DrawPSprite(DPSprite *pspr, AActor *owner, float bobx, float boby, double
else
{
colormap_to_use = basecolormap;
vis->Style.colormap = basecolormap->Maps;
vis->Style.BaseColormap = basecolormap;
vis->Style.ColormapNum = 0;
vis->Style.RenderStyle = STYLE_Normal;
}
@ -1691,18 +1698,16 @@ void R_DrawRemainingPlayerSprites()
FColormapStyle colormapstyle;
bool usecolormapstyle = false;
if (vis->Style.colormap >= SpecialColormaps[0].Colormap &&
vis->Style.colormap < SpecialColormaps[SpecialColormaps.Size()].Colormap)
if (vis->Style.BaseColormap >= &SpecialColormaps[0] &&
vis->Style.BaseColormap < &SpecialColormaps[SpecialColormaps.Size()])
{
// Yuck! There needs to be a better way to store colormaps in the vissprite... :(
ptrdiff_t specialmap = (vis->Style.colormap - SpecialColormaps[0].Colormap) / sizeof(FSpecialColormap);
special = &SpecialColormaps[specialmap];
special = static_cast<FSpecialColormap*>(vis->Style.BaseColormap);
}
else if (colormap->Color == PalEntry(255,255,255) &&
colormap->Desaturate == 0)
{
overlay = colormap->Fade;
overlay.a = BYTE(((vis->Style.colormap - colormap->Maps) >> 8) * 255 / NUMCOLORMAPS);
overlay.a = BYTE(vis->Style.ColormapNum * 255 / NUMCOLORMAPS);
}
else
{
@ -1710,7 +1715,7 @@ void R_DrawRemainingPlayerSprites()
colormapstyle.Color = colormap->Color;
colormapstyle.Fade = colormap->Fade;
colormapstyle.Desaturate = colormap->Desaturate;
colormapstyle.FadeLevel = ((vis->Style.colormap - colormap->Maps) >> 8) / float(NUMCOLORMAPS);
colormapstyle.FadeLevel = vis->Style.ColormapNum / float(NUMCOLORMAPS);
}
screen->DrawTexture(vis->pic,
viewwindowx + vispsprites[i].x1,
@ -1955,7 +1960,8 @@ void R_DrawSprite (vissprite_t *spr)
int r1, r2;
short topclip, botclip;
short *clip1, *clip2;
lighttable_t *colormap = spr->Style.colormap;
FColormap *colormap = spr->Style.BaseColormap;
int colormapnum = spr->Style.ColormapNum;
F3DFloor *rover;
FDynamicColormap *mybasecolormap;
@ -2052,17 +2058,19 @@ void R_DrawSprite (vissprite_t *spr)
}
if (fixedlightlev >= 0)
{
spr->Style.colormap = mybasecolormap->Maps + fixedlightlev;
spr->Style.BaseColormap = mybasecolormap;
spr->Style.ColormapNum = fixedlightlev >> COLORMAPSHIFT;
}
else if (!foggy && (spr->renderflags & RF_FULLBRIGHT))
{ // full bright
spr->Style.colormap = mybasecolormap->Maps;
spr->Style.BaseColormap = mybasecolormap;
spr->Style.ColormapNum = 0;
}
else
{ // diminished light
spriteshade = LIGHT2SHADE(sec->lightlevel + r_actualextralight);
spr->Style.colormap = mybasecolormap->Maps + (GETPALOOKUP (
r_SpriteVisibility / MAX(MINZ, (double)spr->depth), spriteshade) << COLORMAPSHIFT);
spr->Style.BaseColormap = mybasecolormap;
spr->Style.ColormapNum = GETPALOOKUP(r_SpriteVisibility / MAX(MINZ, (double)spr->depth), spriteshade);
}
}
}
@ -2210,7 +2218,8 @@ void R_DrawSprite (vissprite_t *spr)
if (topclip >= botclip)
{
spr->Style.colormap = colormap;
spr->Style.BaseColormap = colormap;
spr->Style.ColormapNum = colormapnum;
return;
}
@ -2340,7 +2349,8 @@ void R_DrawSprite (vissprite_t *spr)
}
if (i == x2)
{
spr->Style.colormap = colormap;
spr->Style.BaseColormap = colormap;
spr->Style.ColormapNum = colormapnum;
return;
}
}
@ -2358,7 +2368,8 @@ void R_DrawSprite (vissprite_t *spr)
int maxvoxely = spr->gzb > hzb ? INT_MAX : xs_RoundToInt((spr->gzt - hzb) / spr->yscale);
R_DrawVisVoxel(spr, minvoxely, maxvoxely, cliptop, clipbot);
}
spr->Style.colormap = colormap;
spr->Style.BaseColormap = colormap;
spr->Style.ColormapNum = colormapnum;
}
// kg3D:
@ -2475,7 +2486,7 @@ void R_ProjectParticle (particle_t *particle, const sector_t *sector, int shade,
int x1, x2, y1, y2;
vissprite_t* vis;
sector_t* heightsec = NULL;
BYTE* map;
FColormap* map;
// [ZZ] Particle not visible through the portal plane
if (CurrentPortal && !!P_PointOnLineSide(particle->Pos, CurrentPortal->dst))
@ -2548,7 +2559,7 @@ void R_ProjectParticle (particle_t *particle, const sector_t *sector, int shade,
botplane = &heightsec->ceilingplane;
toppic = sector->GetTexture(sector_t::ceiling);
botpic = heightsec->GetTexture(sector_t::ceiling);
map = heightsec->ColorMap->Maps;
map = heightsec->ColorMap;
}
else if (fakeside == FAKED_BelowFloor)
{
@ -2556,7 +2567,7 @@ void R_ProjectParticle (particle_t *particle, const sector_t *sector, int shade,
botplane = &sector->floorplane;
toppic = heightsec->GetTexture(sector_t::floor);
botpic = sector->GetTexture(sector_t::floor);
map = heightsec->ColorMap->Maps;
map = heightsec->ColorMap;
}
else
{
@ -2564,7 +2575,7 @@ void R_ProjectParticle (particle_t *particle, const sector_t *sector, int shade,
botplane = &heightsec->floorplane;
toppic = heightsec->GetTexture(sector_t::ceiling);
botpic = heightsec->GetTexture(sector_t::floor);
map = sector->ColorMap->Maps;
map = sector->ColorMap;
}
}
else
@ -2573,7 +2584,7 @@ void R_ProjectParticle (particle_t *particle, const sector_t *sector, int shade,
botplane = &sector->floorplane;
toppic = sector->GetTexture(sector_t::ceiling);
botpic = sector->GetTexture(sector_t::floor);
map = sector->ColorMap->Maps;
map = sector->ColorMap;
}
if (botpic != skyflatnum && particle->Pos.Z < botplane->ZatPoint (particle->Pos))
@ -2602,25 +2613,28 @@ void R_ProjectParticle (particle_t *particle, const sector_t *sector, int shade,
vis->renderflags = particle->trans;
vis->FakeFlatStat = fakeside;
vis->floorclip = 0;
vis->ColormapNum = 0;
vis->Style.ColormapNum = 0;
if (fixedlightlev >= 0)
{
vis->Style.colormap = map + fixedlightlev;
vis->Style.BaseColormap = map;
vis->Style.ColormapNum = fixedlightlev >> COLORMAPSHIFT;
}
else if (fixedcolormap)
{
vis->Style.colormap = fixedcolormap;
vis->Style.BaseColormap = fixedcolormap;
vis->Style.ColormapNum = 0;
}
else if (particle->bright)
{
vis->Style.colormap = map;
vis->Style.BaseColormap = map;
vis->Style.ColormapNum = 0;
}
else
{
// Particles are slightly more visible than regular sprites.
vis->ColormapNum = GETPALOOKUP(tiz * r_SpriteVisibility * 0.5, shade);
vis->Style.colormap = map + (vis->ColormapNum << COLORMAPSHIFT);
vis->Style.ColormapNum = GETPALOOKUP(tiz * r_SpriteVisibility * 0.5, shade);
vis->Style.BaseColormap = map;
}
}
@ -2649,13 +2663,13 @@ static void R_DrawMaskedSegsBehindParticle (const vissprite_t *vis)
}
}
void R_DrawParticle (vissprite_t *vis)
void R_DrawParticle_C (vissprite_t *vis)
{
DWORD *bg2rgb;
int spacing;
BYTE *dest;
DWORD fg;
BYTE color = vis->Style.colormap[vis->startfrac];
BYTE color = vis->Style.BaseColormap->Maps[(vis->Style.ColormapNum << COLORMAPSHIFT) + vis->startfrac];
int yl = vis->y1;
int ycount = vis->y2 - yl + 1;
int x1 = vis->x1;
@ -2714,12 +2728,64 @@ void R_DrawParticle (vissprite_t *vis)
}
}
void R_DrawParticle_rgba(vissprite_t *vis)
{
int spacing;
uint32_t *dest;
BYTE color = vis->Style.BaseColormap->Maps[vis->startfrac];
int yl = vis->y1;
int ycount = vis->y2 - yl + 1;
int x1 = vis->x1;
int countbase = vis->x2 - x1;
R_DrawMaskedSegsBehindParticle(vis);
DrawerCommandQueue::WaitForWorkers();
uint32_t fg = LightBgra::shade_pal_index_simple(color, LightBgra::calc_light_multiplier(LIGHTSCALE(0, vis->Style.ColormapNum << FRACBITS)));
uint32_t fg_red = (fg >> 16) & 0xff;
uint32_t fg_green = (fg >> 8) & 0xff;
uint32_t fg_blue = fg & 0xff;
// vis->renderflags holds translucency level (0-255)
fixed_t fglevel = ((vis->renderflags + 1) << 8) & ~0x3ff;
uint32_t alpha = fglevel * 256 / FRACUNIT;
uint32_t inv_alpha = 256 - alpha;
fg_red *= alpha;
fg_green *= alpha;
fg_blue *= alpha;
spacing = RenderTarget->GetPitch();
for (int x = x1; x < (x1 + countbase); x++)
{
dc_x = x;
if (R_ClipSpriteColumnWithPortals(vis))
continue;
dest = ylookup[yl] + x + (uint32_t*)dc_destorg;
for (int y = 0; y < ycount; y++)
{
uint32_t bg_red = (*dest >> 16) & 0xff;
uint32_t bg_green = (*dest >> 8) & 0xff;
uint32_t bg_blue = (*dest) & 0xff;
uint32_t red = (fg_red + bg_red * inv_alpha) / 256;
uint32_t green = (fg_green + bg_green * inv_alpha) / 256;
uint32_t blue = (fg_blue + bg_blue * inv_alpha) / 256;
*dest = 0xff000000 | (red << 16) | (green << 8) | blue;
dest += spacing;
}
}
}
extern double BaseYaspectMul;;
void R_DrawVoxel(const FVector3 &globalpos, FAngle viewangle,
const FVector3 &dasprpos, DAngle dasprang,
fixed_t daxscale, fixed_t dayscale, FVoxel *voxobj,
lighttable_t *colormap, short *daumost, short *dadmost, int minslabz, int maxslabz, int flags)
FColormap *colormap, int colormapnum, short *daumost, short *dadmost, int minslabz, int maxslabz, int flags)
{
int i, j, k, x, y, syoff, ggxstart, ggystart, nxoff;
fixed_t cosang, sinang, sprcosang, sprsinang;
@ -2761,7 +2827,9 @@ void R_DrawVoxel(const FVector3 &globalpos, FAngle viewangle,
sprcosang = FLOAT2FIXED(dasprang.Cos()) >> 2;
sprsinang = FLOAT2FIXED(-dasprang.Sin()) >> 2;
R_SetupDrawSlab(colormap);
R_SetupDrawSlab(colormap, 0.0f, colormapnum << FRACBITS);
int pixelsize = r_swtruecolor ? 4 : 1;
// Select mip level
i = abs(DMulScale6(dasprx - globalposx, cosang, daspry - globalposy, sinang));
@ -3016,7 +3084,7 @@ void R_DrawVoxel(const FVector3 &globalpos, FAngle viewangle,
if (!(flags & DVF_OFFSCREEN))
{
// Draw directly to the screen.
R_DrawSlab(xxr - xxl, yplc[xxl], z2 - z1, yinc, col, ylookup[z1] + lxt + xxl + dc_destorg);
R_DrawSlab(xxr - xxl, yplc[xxl], z2 - z1, yinc, col, (ylookup[z1] + lxt + xxl) * pixelsize + dc_destorg);
}
else
{
@ -3247,12 +3315,12 @@ void R_CheckOffscreenBuffer(int width, int height, bool spansonly)
{
if (OffscreenColorBuffer == NULL)
{
OffscreenColorBuffer = new BYTE[width * height];
OffscreenColorBuffer = new BYTE[width * height * 4];
}
else if (OffscreenBufferWidth != width || OffscreenBufferHeight != height)
{
delete[] OffscreenColorBuffer;
OffscreenColorBuffer = new BYTE[width * height];
OffscreenColorBuffer = new BYTE[width * height * 4];
}
}
OffscreenBufferWidth = width;

View file

@ -86,7 +86,6 @@ struct vissprite_t
BYTE bSplitSprite:1; // [RH] Sprite was split by a drawseg
BYTE bInMirror:1; // [RH] Sprite is "inside" a mirror
BYTE FakeFlatStat; // [RH] which side of fake/floor ceiling sprite is on
BYTE ColormapNum; // Which colormap is rendered (needed for shaded drawer)
short renderflags;
DWORD Translation; // [RH] for color translation
visstyle_t Style;
@ -97,7 +96,10 @@ struct vissprite_t
struct particle_t;
void R_DrawParticle (vissprite_t *);
extern void(*R_DrawParticle)(vissprite_t *);
void R_DrawParticle_C (vissprite_t *);
void R_DrawParticle_rgba (vissprite_t *);
void R_ProjectParticle (particle_t *, const sector_t *sector, int shade, int fakeside);
extern int MaxVisSprites;
@ -142,7 +144,7 @@ enum { DVF_OFFSCREEN = 1, DVF_SPANSONLY = 2, DVF_MIRRORED = 4 };
void R_DrawVoxel(const FVector3 &viewpos, FAngle viewangle,
const FVector3 &sprpos, DAngle dasprang,
fixed_t daxscale, fixed_t dayscale, struct FVoxel *voxobj,
lighttable_t *colormap, short *daumost, short *dadmost, int minslabz, int maxslabz, int flags);
FColormap *colormap, int colormapnum, short *daumost, short *dadmost, int minslabz, int maxslabz, int flags);
void R_ClipVisSprite (vissprite_t *vis, int xl, int xh);

View file

@ -889,11 +889,11 @@ void R_SetupFrame (AActor *actor)
BaseBlendG = GPART(newblend);
BaseBlendB = BPART(newblend);
BaseBlendA = APART(newblend) / 255.f;
NormalLight.Maps = realcolormaps;
NormalLight.Maps = realcolormaps.Maps;
}
else
{
NormalLight.Maps = realcolormaps + NUMCOLORMAPS*256*newblend;
NormalLight.Maps = realcolormaps.Maps + NUMCOLORMAPS*256*newblend;
BaseBlendR = BaseBlendG = BaseBlendB = 0;
BaseBlendA = 0.f;
}

View file

@ -122,6 +122,7 @@ void FAutomapTexture::Unload ()
delete[] Pixels;
Pixels = NULL;
}
FTexture::Unload();
}
//==========================================================================

View file

@ -56,7 +56,6 @@ public:
const BYTE *GetColumn (unsigned int column, const Span **spans_out);
const BYTE *GetPixels ();
void Unload ();
protected:
const BYTE *Pixels;
@ -103,17 +102,6 @@ FBuildTexture::~FBuildTexture ()
//
//==========================================================================
void FBuildTexture::Unload ()
{
// Nothing to do, since the pixels are accessed from memory-mapped files directly
}
//==========================================================================
//
//
//
//==========================================================================
const BYTE *FBuildTexture::GetPixels ()
{
return Pixels;

View file

@ -53,7 +53,6 @@ FCanvasTexture::FCanvasTexture (const char *name, int width, int height)
DummySpans[1].TopOffset = 0;
DummySpans[1].Length = 0;
UseType = TEX_Wall;
Canvas = NULL;
bNeedsUpdate = true;
bDidUpdate = false;
bHasCanvas = true;
@ -101,11 +100,22 @@ const BYTE *FCanvasTexture::GetPixels ()
return Pixels;
}
const uint32_t *FCanvasTexture::GetPixelsBgra()
{
bNeedsUpdate = true;
if (CanvasBgra == NULL)
{
MakeTextureBgra();
}
return PixelsBgra;
}
void FCanvasTexture::MakeTexture ()
{
Canvas = new DSimpleCanvas (Width, Height);
Canvas = new DSimpleCanvas (Width, Height, false);
Canvas->Lock ();
GC::AddSoftRoot(Canvas);
if (Width != Height || Width != Canvas->GetPitch())
{
Pixels = new BYTE[Width*Height];
@ -113,29 +123,68 @@ void FCanvasTexture::MakeTexture ()
}
else
{
Pixels = Canvas->GetBuffer();
Pixels = (BYTE*)Canvas->GetBuffer();
bPixelsAllocated = false;
}
// Draw a special "unrendered" initial texture into the buffer.
memset (Pixels, 0, Width*Height/2);
memset (Pixels+Width*Height/2, 255, Width*Height/2);
}
void FCanvasTexture::MakeTextureBgra()
{
CanvasBgra = new DSimpleCanvas(Width, Height, true);
CanvasBgra->Lock();
GC::AddSoftRoot(CanvasBgra);
if (Width != Height || Width != CanvasBgra->GetPitch())
{
PixelsBgra = new uint32_t[Width*Height];
bPixelsAllocatedBgra = true;
}
else
{
PixelsBgra = (uint32_t*)CanvasBgra->GetBuffer();
bPixelsAllocatedBgra = false;
}
// Draw a special "unrendered" initial texture into the buffer.
memset(PixelsBgra, 0, Width*Height / 2 * 4);
memset(PixelsBgra + Width*Height / 2, 255, Width*Height / 2 * 4);
}
void FCanvasTexture::Unload ()
{
if (bPixelsAllocated)
{
if (Pixels != NULL) delete [] Pixels;
if (Pixels != NULL) delete[] Pixels;
bPixelsAllocated = false;
Pixels = NULL;
}
if (bPixelsAllocatedBgra)
{
if (PixelsBgra != NULL) delete[] PixelsBgra;
bPixelsAllocatedBgra = false;
PixelsBgra = NULL;
}
if (Canvas != NULL)
{
GC::DelSoftRoot(Canvas);
Canvas->Destroy();
Canvas = NULL;
}
if (CanvasBgra != NULL)
{
GC::DelSoftRoot(CanvasBgra);
CanvasBgra->Destroy();
CanvasBgra = NULL;
}
FTexture::Unload();
}
bool FCanvasTexture::CheckModified ()

View file

@ -401,6 +401,7 @@ void FDDSTexture::Unload ()
delete[] Pixels;
Pixels = NULL;
}
FTexture::Unload();
}
//==========================================================================

View file

@ -138,6 +138,7 @@ void FFlatTexture::Unload ()
delete[] Pixels;
Pixels = NULL;
}
FTexture::Unload();
}
//==========================================================================

View file

@ -142,6 +142,7 @@ void FIMGZTexture::Unload ()
delete[] Pixels;
Pixels = NULL;
}
FTexture::Unload();
}
//==========================================================================

View file

@ -295,11 +295,9 @@ FJPEGTexture::~FJPEGTexture ()
void FJPEGTexture::Unload ()
{
if (Pixels != NULL)
{
delete[] Pixels;
Pixels = NULL;
}
delete[] Pixels;
Pixels = NULL;
FTexture::Unload();
}
//==========================================================================

View file

@ -362,6 +362,7 @@ void FMultiPatchTexture::Unload ()
delete[] Pixels;
Pixels = NULL;
}
FTexture::Unload();
}
//==========================================================================

View file

@ -184,6 +184,7 @@ void FPatchTexture::Unload ()
delete[] Pixels;
Pixels = NULL;
}
FTexture::Unload();
}
//==========================================================================

View file

@ -191,6 +191,7 @@ void FPCXTexture::Unload ()
delete[] Pixels;
Pixels = NULL;
}
FTexture::Unload();
}
//==========================================================================

View file

@ -369,11 +369,9 @@ FPNGTexture::~FPNGTexture ()
void FPNGTexture::Unload ()
{
if (Pixels != NULL)
{
delete[] Pixels;
Pixels = NULL;
}
delete[] Pixels;
Pixels = NULL;
FTexture::Unload();
}
//==========================================================================
@ -446,6 +444,7 @@ const BYTE *FPNGTexture::GetPixels ()
return Pixels;
}
//==========================================================================
//
//

View file

@ -206,6 +206,7 @@ void FRawPageTexture::Unload ()
delete[] Pixels;
Pixels = NULL;
}
FTexture::Unload();
}
//==========================================================================

View file

@ -45,6 +45,7 @@
#include "v_video.h"
#include "m_fixed.h"
#include "textures/textures.h"
#include "v_palette.h"
typedef bool (*CheckFunc)(FileReader & file);
typedef FTexture * (*CreateFunc)(FileReader & file, int lumpnum);
@ -175,6 +176,37 @@ FTexture::~FTexture ()
KillNative();
}
void FTexture::Unload()
{
PixelsBgra = std::vector<uint32_t>();
}
const uint32_t *FTexture::GetColumnBgra(unsigned int column, const Span **spans_out)
{
const uint32_t *pixels = GetPixelsBgra();
column %= Width;
if (spans_out != nullptr)
GetColumn(column, spans_out);
return pixels + column * Height;
}
const uint32_t *FTexture::GetPixelsBgra()
{
if (PixelsBgra.empty() || CheckModified())
{
if (!GetColumn(0, nullptr))
return nullptr;
FBitmap bitmap;
bitmap.Create(GetWidth(), GetHeight());
CopyTrueColorPixels(&bitmap, 0, 0);
GenerateBgraFromBitmap(bitmap);
}
return PixelsBgra.data();
}
bool FTexture::CheckModified ()
{
return false;
@ -318,6 +350,210 @@ void FTexture::FreeSpans (Span **spans) const
M_Free (spans);
}
void FTexture::GenerateBgraFromBitmap(const FBitmap &bitmap)
{
CreatePixelsBgraWithMipmaps();
// Transpose
const uint32_t *src = (const uint32_t *)bitmap.GetPixels();
uint32_t *dest = PixelsBgra.data();
for (int x = 0; x < Width; x++)
{
for (int y = 0; y < Height; y++)
{
dest[y + x * Height] = src[x + y * Width];
}
}
GenerateBgraMipmaps();
}
void FTexture::CreatePixelsBgraWithMipmaps()
{
int levels = MipmapLevels();
int buffersize = 0;
for (int i = 0; i < levels; i++)
{
int w = MAX(Width >> i, 1);
int h = MAX(Height >> i, 1);
buffersize += w * h;
}
PixelsBgra.resize(buffersize, 0xffff0000);
}
int FTexture::MipmapLevels() const
{
int widthbits = 0;
while ((Width >> widthbits) != 0) widthbits++;
int heightbits = 0;
while ((Height >> heightbits) != 0) heightbits++;
return MAX(widthbits, heightbits);
}
void FTexture::GenerateBgraMipmaps()
{
struct Color4f
{
float a, r, g, b;
Color4f operator*(const Color4f &v) const { return Color4f{ a * v.a, r * v.r, g * v.g, b * v.b }; }
Color4f operator/(const Color4f &v) const { return Color4f{ a / v.a, r / v.r, g / v.g, b / v.b }; }
Color4f operator+(const Color4f &v) const { return Color4f{ a + v.a, r + v.r, g + v.g, b + v.b }; }
Color4f operator-(const Color4f &v) const { return Color4f{ a - v.a, r - v.r, g - v.g, b - v.b }; }
Color4f operator*(float s) const { return Color4f{ a * s, r * s, g * s, b * s }; }
Color4f operator/(float s) const { return Color4f{ a / s, r / s, g / s, b / s }; }
Color4f operator+(float s) const { return Color4f{ a + s, r + s, g + s, b + s }; }
Color4f operator-(float s) const { return Color4f{ a - s, r - s, g - s, b - s }; }
};
int levels = MipmapLevels();
std::vector<Color4f> image(PixelsBgra.size());
// Convert to normalized linear colorspace
{
for (int x = 0; x < Width; x++)
{
for (int y = 0; y < Height; y++)
{
uint32_t c8 = PixelsBgra[x * Height + y];
Color4f c;
c.a = powf(APART(c8) * (1.0f / 255.0f), 2.2f);
c.r = powf(RPART(c8) * (1.0f / 255.0f), 2.2f);
c.g = powf(GPART(c8) * (1.0f / 255.0f), 2.2f);
c.b = powf(BPART(c8) * (1.0f / 255.0f), 2.2f);
image[x * Height + y] = c;
}
}
}
// Generate mipmaps
{
std::vector<Color4f> smoothed(Width * Height);
Color4f *src = image.data();
Color4f *dest = src + Width * Height;
for (int i = 1; i < levels; i++)
{
int srcw = MAX(Width >> (i - 1), 1);
int srch = MAX(Height >> (i - 1), 1);
int w = MAX(Width >> i, 1);
int h = MAX(Height >> i, 1);
// Downscale
for (int x = 0; x < w; x++)
{
int sx0 = x * 2;
int sx1 = MIN((x + 1) * 2, srcw - 1);
for (int y = 0; y < h; y++)
{
int sy0 = y * 2;
int sy1 = MIN((y + 1) * 2, srch - 1);
Color4f src00 = src[sy0 + sx0 * srch];
Color4f src01 = src[sy1 + sx0 * srch];
Color4f src10 = src[sy0 + sx1 * srch];
Color4f src11 = src[sy1 + sx1 * srch];
Color4f c = (src00 + src01 + src10 + src11) * 0.25f;
dest[y + x * h] = src00;
}
}
// Sharpen filter with a 3x3 kernel:
for (int x = 0; x < w; x++)
{
for (int y = 0; y < h; y++)
{
Color4f c = { 0.0f, 0.0f, 0.0f, 0.0f };
for (int kx = -1; kx < 2; kx++)
{
for (int ky = -1; ky < 2; ky++)
{
int a = y + ky;
int b = x + kx;
if (a < 0) a = h - 1;
if (a == h) a = 0;
if (b < 0) b = w - 1;
if (b == w) b = 0;
c = c + dest[a + b * h];
}
}
c = c * (1.0f / 9.0f);
smoothed[y + x * h] = c;
}
}
float k = 0.04f;
for (int j = 0; j < w * h; j++)
dest[j] = dest[j] + (dest[j] - smoothed[j]) * k;
src = dest;
dest += w * h;
}
}
// Convert to bgra8 sRGB colorspace
{
Color4f *src = image.data() + Width * Height;
uint32_t *dest = PixelsBgra.data() + Width * Height;
for (int i = 1; i < levels; i++)
{
int w = MAX(Width >> i, 1);
int h = MAX(Height >> i, 1);
for (int j = 0; j < w * h; j++)
{
uint32_t a = (uint32_t)clamp(powf(src[j].a, 1.0f / 2.2f) * 255.0f + 0.5f, 0.0f, 255.0f);
uint32_t r = (uint32_t)clamp(powf(src[j].r, 1.0f / 2.2f) * 255.0f + 0.5f, 0.0f, 255.0f);
uint32_t g = (uint32_t)clamp(powf(src[j].g, 1.0f / 2.2f) * 255.0f + 0.5f, 0.0f, 255.0f);
uint32_t b = (uint32_t)clamp(powf(src[j].b, 1.0f / 2.2f) * 255.0f + 0.5f, 0.0f, 255.0f);
dest[j] = (a << 24) | (r << 16) | (g << 8) | b;
}
src += w * h;
dest += w * h;
}
}
}
void FTexture::GenerateBgraMipmapsFast()
{
uint32_t *src = PixelsBgra.data();
uint32_t *dest = src + Width * Height;
int levels = MipmapLevels();
for (int i = 1; i < levels; i++)
{
int srcw = MAX(Width >> (i - 1), 1);
int srch = MAX(Height >> (i - 1), 1);
int w = MAX(Width >> i, 1);
int h = MAX(Height >> i, 1);
for (int x = 0; x < w; x++)
{
int sx0 = x * 2;
int sx1 = MIN((x + 1) * 2, srcw - 1);
for (int y = 0; y < h; y++)
{
int sy0 = y * 2;
int sy1 = MIN((y + 1) * 2, srch - 1);
uint32_t src00 = src[sy0 + sx0 * srch];
uint32_t src01 = src[sy1 + sx0 * srch];
uint32_t src10 = src[sy0 + sx1 * srch];
uint32_t src11 = src[sy1 + sx1 * srch];
uint32_t alpha = (APART(src00) + APART(src01) + APART(src10) + APART(src11) + 2) / 4;
uint32_t red = (RPART(src00) + RPART(src01) + RPART(src10) + RPART(src11) + 2) / 4;
uint32_t green = (GPART(src00) + GPART(src01) + GPART(src10) + GPART(src11) + 2) / 4;
uint32_t blue = (BPART(src00) + BPART(src01) + BPART(src10) + BPART(src11) + 2) / 4;
dest[y + x * h] = (alpha << 24) | (red << 16) | (green << 8) | blue;
}
}
src = dest;
dest += w * h;
}
}
void FTexture::CopyToBlock (BYTE *dest, int dwidth, int dheight, int xpos, int ypos, int rotate, const BYTE *translation)
{
const BYTE *pixels = GetPixels();
@ -384,6 +620,29 @@ void FTexture::FlipSquareBlock (BYTE *block, int x, int y)
}
}
void FTexture::FlipSquareBlockBgra(uint32_t *block, int x, int y)
{
int i, j;
if (x != y) return;
for (i = 0; i < x; ++i)
{
uint32_t *corner = block + x*i + i;
int count = x - i;
if (count & 1)
{
count--;
swapvalues<uint32_t>(corner[count], corner[count*x]);
}
for (j = 0; j < count; j += 2)
{
swapvalues<uint32_t>(corner[j], corner[j*x]);
swapvalues<uint32_t>(corner[j + 1], corner[(j + 1)*x]);
}
}
}
void FTexture::FlipSquareBlockRemap (BYTE *block, int x, int y, const BYTE *remap)
{
int i, j;
@ -427,6 +686,19 @@ void FTexture::FlipNonSquareBlock (BYTE *dst, const BYTE *src, int x, int y, int
}
}
void FTexture::FlipNonSquareBlockBgra(uint32_t *dst, const uint32_t *src, int x, int y, int srcpitch)
{
int i, j;
for (i = 0; i < x; ++i)
{
for (j = 0; j < y; ++j)
{
dst[i*y + j] = src[i + j*srcpitch];
}
}
}
void FTexture::FlipNonSquareBlockRemap (BYTE *dst, const BYTE *src, int x, int y, int srcpitch, const BYTE *remap)
{
int i, j;
@ -580,10 +852,6 @@ FDummyTexture::FDummyTexture ()
UseType = TEX_Null;
}
void FDummyTexture::Unload ()
{
}
void FDummyTexture::SetSize (int width, int height)
{
Width = width;

View file

@ -3,6 +3,7 @@
#include "doomtype.h"
#include "vectors.h"
#include <vector>
class FBitmap;
struct FRemapTable;
@ -175,9 +176,18 @@ public:
// Returns a single column of the texture
virtual const BYTE *GetColumn (unsigned int column, const Span **spans_out) = 0;
// Returns a single column of the texture, in BGRA8 format
virtual const uint32_t *GetColumnBgra(unsigned int column, const Span **spans_out);
// Returns the whole texture, stored in column-major order
virtual const BYTE *GetPixels () = 0;
// Returns the whole texture, stored in column-major order, in BGRA8 format
virtual const uint32_t *GetPixelsBgra();
// Returns true if GetPixelsBgra includes mipmaps
virtual bool Mipmapped() { return true; }
virtual int CopyTrueColorPixels(FBitmap *bmp, int x, int y, int rotate=0, FCopyInfo *inf = NULL);
int CopyTrueColorTranslated(FBitmap *bmp, int x, int y, int rotate, FRemapTable *remap, FCopyInfo *inf = NULL);
virtual bool UseBasePalette();
@ -185,7 +195,7 @@ public:
virtual FTexture *GetRedirect(bool wantwarped);
virtual FTexture *GetRawTexture(); // for FMultiPatchTexture to override
virtual void Unload () = 0;
virtual void Unload ();
// Returns the native pixel format for this image
virtual FTextureFormat GetFormat();
@ -262,10 +272,20 @@ protected:
Rotations = other->Rotations;
}
std::vector<uint32_t> PixelsBgra;
void GenerateBgraFromBitmap(const FBitmap &bitmap);
void CreatePixelsBgraWithMipmaps();
void GenerateBgraMipmaps();
void GenerateBgraMipmapsFast();
int MipmapLevels() const;
public:
static void FlipSquareBlock (BYTE *block, int x, int y);
static void FlipSquareBlockBgra (uint32_t *block, int x, int y);
static void FlipSquareBlockRemap (BYTE *block, int x, int y, const BYTE *remap);
static void FlipNonSquareBlock (BYTE *blockto, const BYTE *blockfrom, int x, int y, int srcpitch);
static void FlipNonSquareBlockBgra (uint32_t *blockto, const uint32_t *blockfrom, int x, int y, int srcpitch);
static void FlipNonSquareBlockRemap (BYTE *blockto, const BYTE *blockfrom, int x, int y, int srcpitch, const BYTE *remap);
friend class D3DTex;
@ -460,7 +480,6 @@ public:
FDummyTexture ();
const BYTE *GetColumn (unsigned int column, const Span **spans_out);
const BYTE *GetPixels ();
void Unload ();
void SetSize (int width, int height);
};
@ -474,6 +493,7 @@ public:
virtual int CopyTrueColorPixels(FBitmap *bmp, int x, int y, int rotate=0, FCopyInfo *inf = NULL);
const BYTE *GetColumn (unsigned int column, const Span **spans_out);
const BYTE *GetPixels ();
const uint32_t *GetPixelsBgra() override;
void Unload ();
bool CheckModified ();
@ -508,21 +528,28 @@ public:
const BYTE *GetColumn (unsigned int column, const Span **spans_out);
const BYTE *GetPixels ();
const uint32_t *GetPixelsBgra() override;
void Unload ();
bool CheckModified ();
void NeedUpdate() { bNeedsUpdate=true; }
void SetUpdated() { bNeedsUpdate = false; bDidUpdate = true; bFirstUpdate = false; }
DSimpleCanvas *GetCanvas() { return Canvas; }
DSimpleCanvas *GetCanvasBgra() { return CanvasBgra; }
bool Mipmapped() override { return false; }
void MakeTexture ();
void MakeTextureBgra ();
protected:
DSimpleCanvas *Canvas;
BYTE *Pixels;
DSimpleCanvas *Canvas = nullptr;
DSimpleCanvas *CanvasBgra = nullptr;
BYTE *Pixels = nullptr;
uint32_t *PixelsBgra = nullptr;
Span DummySpans[2];
bool bNeedsUpdate;
bool bDidUpdate;
bool bPixelsAllocated;
bool bNeedsUpdate = true;
bool bDidUpdate = false;
bool bPixelsAllocated = false;
bool bPixelsAllocatedBgra = false;
public:
bool bFirstUpdate;

View file

@ -181,6 +181,7 @@ void FTGATexture::Unload ()
delete[] Pixels;
Pixels = NULL;
}
FTexture::Unload();
}
//==========================================================================

View file

@ -39,6 +39,7 @@
#include "r_utility.h"
#include "textures/textures.h"
#include "warpbuffer.h"
#include "v_palette.h"
FWarpTexture::FWarpTexture (FTexture *source, int warptype)
@ -74,6 +75,7 @@ void FWarpTexture::Unload ()
Spans = NULL;
}
SourcePic->Unload ();
FTexture::Unload();
}
bool FWarpTexture::CheckModified ()
@ -92,6 +94,25 @@ const BYTE *FWarpTexture::GetPixels ()
return Pixels;
}
const uint32_t *FWarpTexture::GetPixelsBgra()
{
DWORD time = r_FrameTime;
if (Pixels == NULL || time != GenTime)
{
MakeTexture(time);
CreatePixelsBgraWithMipmaps();
for (int i = 0; i < Width * Height; i++)
{
if (Pixels[i] != 0)
PixelsBgra[i] = 0xff000000 | GPalette.BaseColors[Pixels[i]].d;
else
PixelsBgra[i] = 0;
}
GenerateBgraMipmapsFast();
}
return PixelsBgra.data();
}
const BYTE *FWarpTexture::GetColumn (unsigned int column, const Span **spans_out)
{
DWORD time = r_FrameTime;

View file

@ -44,6 +44,7 @@
#include "r_utility.h"
#ifndef NO_SWRENDER
#include "r_draw.h"
#include "r_draw_rgba.h"
#include "r_main.h"
#include "r_things.h"
#endif
@ -137,6 +138,12 @@ void DCanvas::DrawTextureParms(FTexture *img, DrawParms &parms)
static short bottomclipper[MAXWIDTH], topclipper[MAXWIDTH];
const BYTE *translation = NULL;
if (r_swtruecolor != IsBgra())
{
r_swtruecolor = IsBgra();
R_InitColumnDrawers();
}
if (parms.masked)
{
spanptr = &spans;
@ -173,14 +180,14 @@ void DCanvas::DrawTextureParms(FTexture *img, DrawParms &parms)
if (translation != NULL)
{
dc_colormap = (lighttable_t *)translation;
R_SetTranslationMap((lighttable_t *)translation);
}
else
{
dc_colormap = identitymap;
R_SetTranslationMap(identitymap);
}
fixedcolormap = dc_colormap;
fixedcolormap = dc_fcolormap;
ESPSResult mode = R_SetPatchStyle (parms.style, parms.Alpha, 0, parms.fillcolor);
BYTE *destorgsave = dc_destorg;
@ -306,7 +313,7 @@ void DCanvas::DrawTextureParms(FTexture *img, DrawParms &parms)
while (dc_x < stop4)
{
rt_initcols();
rt_initcols(nullptr);
for (int zz = 4; zz; --zz)
{
pixels = img->GetColumn(frac >> FRACBITS, spanptr);
@ -1023,13 +1030,35 @@ void DCanvas::PUTTRANSDOT (int xx, int yy, int basecolor, int level)
oldyyshifted = yy * GetPitch();
}
BYTE *spot = GetBuffer() + oldyyshifted + xx;
DWORD *bg2rgb = Col2RGB8[1+level];
DWORD *fg2rgb = Col2RGB8[63-level];
DWORD fg = fg2rgb[basecolor];
DWORD bg = bg2rgb[*spot];
bg = (fg+bg) | 0x1f07c1f;
*spot = RGB32k.All[bg&(bg>>15)];
if (IsBgra())
{
uint32_t *spot = (uint32_t*)GetBuffer() + oldyyshifted + xx;
uint32_t fg = LightBgra::shade_pal_index_simple(basecolor, LightBgra::calc_light_multiplier(0));
uint32_t fg_red = (fg >> 16) & 0xff;
uint32_t fg_green = (fg >> 8) & 0xff;
uint32_t fg_blue = fg & 0xff;
uint32_t bg_red = (*spot >> 16) & 0xff;
uint32_t bg_green = (*spot >> 8) & 0xff;
uint32_t bg_blue = (*spot) & 0xff;
uint32_t red = (fg_red + bg_red + 1) / 2;
uint32_t green = (fg_green + bg_green + 1) / 2;
uint32_t blue = (fg_blue + bg_blue + 1) / 2;
*spot = 0xff000000 | (red << 16) | (green << 8) | blue;
}
else
{
BYTE *spot = GetBuffer() + oldyyshifted + xx;
DWORD *bg2rgb = Col2RGB8[1+level];
DWORD *fg2rgb = Col2RGB8[63-level];
DWORD fg = fg2rgb[basecolor];
DWORD bg = bg2rgb[*spot];
bg = (fg+bg) | 0x1f07c1f;
*spot = RGB32k.All[bg&(bg>>15)];
}
}
void DCanvas::DrawLine(int x0, int y0, int x1, int y1, int palColor, uint32 realcolor)
@ -1073,27 +1102,65 @@ void DCanvas::DrawLine(int x0, int y0, int x1, int y1, int palColor, uint32 real
{
swapvalues (x0, x1);
}
memset (GetBuffer() + y0*GetPitch() + x0, palColor, deltaX+1);
if (IsBgra())
{
uint32_t fillColor = GPalette.BaseColors[palColor].d;
uint32_t *spot = (uint32_t*)GetBuffer() + y0*GetPitch() + x0;
for (int i = 0; i <= deltaX; i++)
spot[i] = fillColor;
}
else
{
memset (GetBuffer() + y0*GetPitch() + x0, palColor, deltaX+1);
}
}
else if (deltaX == 0)
{ // vertical line
BYTE *spot = GetBuffer() + y0*GetPitch() + x0;
int pitch = GetPitch ();
do
if (IsBgra())
{
*spot = palColor;
spot += pitch;
} while (--deltaY != 0);
uint32_t fillColor = GPalette.BaseColors[palColor].d;
uint32_t *spot = (uint32_t*)GetBuffer() + y0*GetPitch() + x0;
int pitch = GetPitch();
do
{
*spot = fillColor;
spot += pitch;
} while (--deltaY != 0);
}
else
{
BYTE *spot = GetBuffer() + y0*GetPitch() + x0;
int pitch = GetPitch();
do
{
*spot = palColor;
spot += pitch;
} while (--deltaY != 0);
}
}
else if (deltaX == deltaY)
{ // diagonal line.
BYTE *spot = GetBuffer() + y0*GetPitch() + x0;
int advance = GetPitch() + xDir;
do
if (IsBgra())
{
*spot = palColor;
spot += advance;
} while (--deltaY != 0);
uint32_t fillColor = GPalette.BaseColors[palColor].d;
uint32_t *spot = (uint32_t*)GetBuffer() + y0*GetPitch() + x0;
int advance = GetPitch() + xDir;
do
{
*spot = fillColor;
spot += advance;
} while (--deltaY != 0);
}
else
{
BYTE *spot = GetBuffer() + y0*GetPitch() + x0;
int advance = GetPitch() + xDir;
do
{
*spot = palColor;
spot += advance;
} while (--deltaY != 0);
}
}
else
{
@ -1213,7 +1280,6 @@ void DCanvas::DrawPixel(int x, int y, int palColor, uint32 realcolor)
void DCanvas::Clear (int left, int top, int right, int bottom, int palcolor, uint32 color)
{
int x, y;
BYTE *dest;
if (left == right || top == bottom)
{
@ -1243,12 +1309,28 @@ void DCanvas::Clear (int left, int top, int right, int bottom, int palcolor, uin
palcolor = PalFromRGB(color);
}
dest = Buffer + top * Pitch + left;
x = right - left;
for (y = top; y < bottom; y++)
if (IsBgra())
{
memset(dest, palcolor, x);
dest += Pitch;
uint32_t fill_color = GPalette.BaseColors[palcolor];
uint32_t *dest = (uint32_t*)Buffer + top * Pitch + left;
x = right - left;
for (y = top; y < bottom; y++)
{
for (int i = 0; i < x; i++)
dest[i] = fill_color;
dest += Pitch;
}
}
else
{
BYTE *dest = Buffer + top * Pitch + left;
x = right - left;
for (y = top; y < bottom; y++)
{
memset(dest, palcolor, x);
dest += Pitch;
}
}
}
@ -1339,8 +1421,11 @@ void DCanvas::FillSimplePoly(FTexture *tex, FVector2 *points, int npoints,
// Setup constant texture mapping parameters.
R_SetupSpanBits(tex);
R_SetSpanColormap(colormap != NULL ? &colormap->Maps[clamp(shade >> FRACBITS, 0, NUMCOLORMAPS-1) * 256] : identitymap);
R_SetSpanSource(tex->GetPixels());
if (colormap)
R_SetSpanColormap(colormap, clamp(shade >> FRACBITS, 0, NUMCOLORMAPS - 1));
else
R_SetSpanColormap(&identitycolormap, 0);
R_SetSpanSource(tex);
scalex = double(1u << (32 - ds_xbits)) / scalex;
scaley = double(1u << (32 - ds_ybits)) / scaley;
ds_xstep = xs_RoundToInt(cosrot * scalex);
@ -1449,6 +1534,9 @@ void DCanvas::FillSimplePoly(FTexture *tex, FVector2 *points, int npoints,
//
void DCanvas::DrawBlock (int x, int y, int _width, int _height, const BYTE *src) const
{
if (IsBgra())
return;
int srcpitch = _width;
int destpitch;
BYTE *dest;
@ -1475,6 +1563,9 @@ void DCanvas::DrawBlock (int x, int y, int _width, int _height, const BYTE *src)
//
void DCanvas::GetBlock (int x, int y, int _width, int _height, BYTE *dest) const
{
if (IsBgra())
return;
const BYTE *src;
#ifdef RANGECHECK

View file

@ -1662,6 +1662,7 @@ void FFontChar1::Unload ()
delete[] Pixels;
Pixels = NULL;
}
FTexture::Unload();
}
//==========================================================================
@ -1723,6 +1724,7 @@ void FFontChar2::Unload ()
delete[] Pixels;
Pixels = NULL;
}
FTexture::Unload();
}
//==========================================================================

View file

@ -83,7 +83,7 @@ class DDummyFrameBuffer : public DFrameBuffer
DECLARE_CLASS (DDummyFrameBuffer, DFrameBuffer);
public:
DDummyFrameBuffer (int width, int height)
: DFrameBuffer (0, 0)
: DFrameBuffer (0, 0, false)
{
Width = width;
Height = height;
@ -119,7 +119,6 @@ public:
const BYTE *GetColumn(unsigned int column, const Span **spans_out);
const BYTE *GetPixels();
void Unload();
bool CheckModified();
void SetTranslation(int num);
@ -208,13 +207,14 @@ DCanvas *DCanvas::CanvasChain = NULL;
//
//==========================================================================
DCanvas::DCanvas (int _width, int _height)
DCanvas::DCanvas (int _width, int _height, bool _bgra)
{
// Init member vars
Buffer = NULL;
LockCount = 0;
Width = _width;
Height = _height;
Bgra = _bgra;
// Add to list of active canvases
Next = CanvasChain;
@ -344,10 +344,7 @@ void DCanvas::Dim (PalEntry color, float damount, int x1, int y1, int w, int h)
if (damount == 0.f)
return;
DWORD *bg2rgb;
DWORD fg;
int gap;
BYTE *spot;
int x, y;
if (x1 >= Width || y1 >= Height)
@ -367,31 +364,73 @@ void DCanvas::Dim (PalEntry color, float damount, int x1, int y1, int w, int h)
return;
}
{
int amount;
amount = (int)(damount * 64);
bg2rgb = Col2RGB8[64-amount];
fg = (((color.r * amount) >> 4) << 20) |
((color.g * amount) >> 4) |
(((color.b * amount) >> 4) << 10);
}
spot = Buffer + x1 + y1*Pitch;
gap = Pitch - w;
for (y = h; y != 0; y--)
{
for (x = w; x != 0; x--)
{
DWORD bg;
bg = bg2rgb[(*spot)&0xff];
bg = (fg+bg) | 0x1f07c1f;
*spot = RGB32k.All[bg&(bg>>15)];
spot++;
if (IsBgra())
{
uint32_t *spot = (uint32_t*)Buffer + x1 + y1*Pitch;
uint32_t fg = color.d;
uint32_t fg_red = (fg >> 16) & 0xff;
uint32_t fg_green = (fg >> 8) & 0xff;
uint32_t fg_blue = fg & 0xff;
uint32_t alpha = (uint32_t)clamp(damount * 256 + 0.5f, 0.0f, 256.0f);
uint32_t inv_alpha = 256 - alpha;
fg_red *= alpha;
fg_green *= alpha;
fg_blue *= alpha;
for (y = h; y != 0; y--)
{
for (x = w; x != 0; x--)
{
uint32_t bg_red = (*spot >> 16) & 0xff;
uint32_t bg_green = (*spot >> 8) & 0xff;
uint32_t bg_blue = (*spot) & 0xff;
uint32_t red = (fg_red + bg_red * inv_alpha) / 256;
uint32_t green = (fg_green + bg_green * inv_alpha) / 256;
uint32_t blue = (fg_blue + bg_blue * inv_alpha) / 256;
*spot = 0xff000000 | (red << 16) | (green << 8) | blue;
spot++;
}
spot += gap;
}
}
else
{
BYTE *spot = Buffer + x1 + y1*Pitch;
DWORD *bg2rgb;
DWORD fg;
{
int amount;
amount = (int)(damount * 64);
bg2rgb = Col2RGB8[64-amount];
fg = (((color.r * amount) >> 4) << 20) |
((color.g * amount) >> 4) |
(((color.b * amount) >> 4) << 10);
}
for (y = h; y != 0; y--)
{
for (x = w; x != 0; x--)
{
DWORD bg;
bg = bg2rgb[(*spot)&0xff];
bg = (fg+bg) | 0x1f07c1f;
*spot = RGB32k.All[bg&(bg>>15)];
spot++;
}
spot += gap;
}
spot += gap;
}
}
@ -408,8 +447,8 @@ void DCanvas::GetScreenshotBuffer(const BYTE *&buffer, int &pitch, ESSType &colo
{
Lock(true);
buffer = GetBuffer();
pitch = GetPitch();
color_type = SS_PAL;
pitch = IsBgra() ? GetPitch() * 4 : GetPitch();
color_type = IsBgra() ? SS_BGRA : SS_PAL;
}
//==========================================================================
@ -704,13 +743,12 @@ void DCanvas::CalcGamma (float gamma, BYTE gammalookup[256])
// I found this formula on the web at
// <http://panda.mostang.com/sane/sane-gamma.html>,
// but that page no longer exits.
double invgamma = 1.f / gamma;
int i;
for (i = 0; i < 256; i++)
{
gammalookup[i] = (BYTE)(255.0 * pow (i / 255.0, invgamma));
gammalookup[i] = (BYTE)(255.0 * pow (i / 255.0, invgamma) + 0.5);
}
}
@ -722,8 +760,8 @@ void DCanvas::CalcGamma (float gamma, BYTE gammalookup[256])
//
//==========================================================================
DSimpleCanvas::DSimpleCanvas (int width, int height)
: DCanvas (width, height)
DSimpleCanvas::DSimpleCanvas (int width, int height, bool bgra)
: DCanvas (width, height, bgra)
{
// Making the pitch a power of 2 is very bad for performance
// Try to maximize the number of cache lines that can be filled
@ -760,8 +798,9 @@ DSimpleCanvas::DSimpleCanvas (int width, int height)
Pitch = width + MAX(0, CPU.DataL1LineSize - 8);
}
}
MemBuffer = new BYTE[Pitch * height];
memset (MemBuffer, 0, Pitch * height);
int bytes_per_pixel = bgra ? 4 : 1;
MemBuffer = new BYTE[Pitch * height * bytes_per_pixel];
memset (MemBuffer, 0, Pitch * height * bytes_per_pixel);
}
//==========================================================================
@ -830,13 +869,77 @@ void DSimpleCanvas::Unlock ()
//
//==========================================================================
DFrameBuffer::DFrameBuffer (int width, int height)
: DSimpleCanvas (width, height)
DFrameBuffer::DFrameBuffer (int width, int height, bool bgra)
: DSimpleCanvas (width, height, bgra)
{
LastMS = LastSec = FrameCount = LastCount = LastTic = 0;
Accel2D = false;
}
//==========================================================================
//
// DFrameBuffer :: PostprocessBgra
//
// Copies data to destination buffer while performing gamma and flash.
// This is only needed if a target cannot do this with shaders.
//
//==========================================================================
void DFrameBuffer::CopyWithGammaBgra(void *output, int pitch, const BYTE *gammared, const BYTE *gammagreen, const BYTE *gammablue, PalEntry flash, int flash_amount)
{
const BYTE *gammatables[3] = { gammared, gammagreen, gammablue };
if (flash_amount > 0)
{
uint16_t inv_flash_amount = 256 - flash_amount;
uint16_t flash_red = flash.r * flash_amount;
uint16_t flash_green = flash.g * flash_amount;
uint16_t flash_blue = flash.b * flash_amount;
for (int y = 0; y < Height; y++)
{
BYTE *dest = (BYTE*)output + y * pitch;
BYTE *src = MemBuffer + y * Pitch * 4;
for (int x = 0; x < Width; x++)
{
uint16_t fg_red = src[2];
uint16_t fg_green = src[1];
uint16_t fg_blue = src[0];
uint16_t red = (fg_red * inv_flash_amount + flash_red) >> 8;
uint16_t green = (fg_green * inv_flash_amount + flash_green) >> 8;
uint16_t blue = (fg_blue * inv_flash_amount + flash_blue) >> 8;
dest[0] = gammatables[2][blue];
dest[1] = gammatables[1][green];
dest[2] = gammatables[0][red];
dest[3] = 0xff;
dest += 4;
src += 4;
}
}
}
else
{
for (int y = 0; y < Height; y++)
{
BYTE *dest = (BYTE*)output + y * pitch;
BYTE *src = MemBuffer + y * Pitch * 4;
for (int x = 0; x < Width; x++)
{
dest[0] = gammatables[2][src[0]];
dest[1] = gammatables[1][src[1]];
dest[2] = gammatables[0][src[2]];
dest[3] = 0xff;
dest += 4;
src += 4;
}
}
}
}
//==========================================================================
//
// DFrameBuffer :: DrawRateStuff
@ -898,10 +1001,21 @@ void DFrameBuffer::DrawRateStuff ()
// Buffer can be NULL if we're doing hardware accelerated 2D
if (buffer != NULL)
{
buffer += (GetHeight()-1) * GetPitch();
for (i = 0; i < tics*2; i += 2) buffer[i] = 0xff;
for ( ; i < 20*2; i += 2) buffer[i] = 0x00;
if (IsBgra())
{
uint32_t *buffer32 = (uint32_t*)buffer;
buffer32 += (GetHeight() - 1) * GetPitch();
for (i = 0; i < tics * 2; i += 2) buffer32[i] = 0xffffffff;
for (; i < 20 * 2; i += 2) buffer32[i] = 0xff000000;
}
else
{
buffer += (GetHeight() - 1) * GetPitch();
for (i = 0; i < tics * 2; i += 2) buffer[i] = 0xff;
for (; i < 20 * 2; i += 2) buffer[i] = 0x00;
}
}
else
{
@ -974,16 +1088,6 @@ void FPaletteTester::SetTranslation(int num)
}
}
//==========================================================================
//
// FPaletteTester :: Unload
//
//==========================================================================
void FPaletteTester::Unload()
{
}
//==========================================================================
//
// FPaletteTester :: GetColumn

View file

@ -185,7 +185,7 @@ class DCanvas : public DObject
{
DECLARE_ABSTRACT_CLASS (DCanvas, DObject)
public:
DCanvas (int width, int height);
DCanvas (int width, int height, bool bgra);
virtual ~DCanvas ();
// Member variable access
@ -193,6 +193,7 @@ public:
inline int GetWidth () const { return Width; }
inline int GetHeight () const { return Height; }
inline int GetPitch () const { return Pitch; }
inline bool IsBgra() const { return Bgra; }
virtual bool IsValid ();
@ -270,6 +271,7 @@ protected:
int Height;
int Pitch;
int LockCount;
bool Bgra;
bool ClipBox (int &left, int &top, int &width, int &height, const BYTE *&src, const int srcpitch) const;
void DrawTextureV(FTexture *img, double x, double y, uint32 tag, va_list tags) = delete;
@ -292,7 +294,7 @@ class DSimpleCanvas : public DCanvas
{
DECLARE_CLASS (DSimpleCanvas, DCanvas)
public:
DSimpleCanvas (int width, int height);
DSimpleCanvas (int width, int height, bool bgra);
~DSimpleCanvas ();
bool IsValid ();
@ -330,7 +332,7 @@ class DFrameBuffer : public DSimpleCanvas
{
DECLARE_ABSTRACT_CLASS (DFrameBuffer, DSimpleCanvas)
public:
DFrameBuffer (int width, int height);
DFrameBuffer (int width, int height, bool bgra);
// Force the surface to use buffered output if true is passed.
virtual bool Lock (bool buffered) = 0;
@ -421,6 +423,7 @@ public:
protected:
void DrawRateStuff ();
void CopyFromBuff (BYTE *src, int srcPitch, int width, int height, BYTE *dest);
void CopyWithGammaBgra(void *output, int pitch, const BYTE *gammared, const BYTE *gammagreen, const BYTE *gammablue, PalEntry flash, int flash_amount);
DFrameBuffer () {}

View file

@ -242,8 +242,8 @@ CVAR(Bool, vid_hwaalines, true, CVAR_ARCHIVE|CVAR_GLOBALCONFIG)
//
//==========================================================================
D3DFB::D3DFB (UINT adapter, int width, int height, bool fullscreen)
: BaseWinFB (width, height)
D3DFB::D3DFB (UINT adapter, int width, int height, bool bgra, bool fullscreen)
: BaseWinFB (width, height, bgra)
{
D3DPRESENT_PARAMETERS d3dpp;
@ -765,14 +765,16 @@ void D3DFB::KillNativeTexs()
bool D3DFB::CreateFBTexture ()
{
if (FAILED(D3DDevice->CreateTexture(Width, Height, 1, D3DUSAGE_DYNAMIC, D3DFMT_L8, D3DPOOL_DEFAULT, &FBTexture, NULL)))
FBFormat = IsBgra() ? D3DFMT_A8R8G8B8 : D3DFMT_L8;
if (FAILED(D3DDevice->CreateTexture(Width, Height, 1, D3DUSAGE_DYNAMIC, FBFormat, D3DPOOL_DEFAULT, &FBTexture, NULL)))
{
int pow2width, pow2height, i;
for (i = 1; i < Width; i <<= 1) {} pow2width = i;
for (i = 1; i < Height; i <<= 1) {} pow2height = i;
if (FAILED(D3DDevice->CreateTexture(pow2width, pow2height, 1, D3DUSAGE_DYNAMIC, D3DFMT_L8, D3DPOOL_DEFAULT, &FBTexture, NULL)))
if (FAILED(D3DDevice->CreateTexture(pow2width, pow2height, 1, D3DUSAGE_DYNAMIC, FBFormat, D3DPOOL_DEFAULT, &FBTexture, NULL)))
{
return false;
}
@ -1304,20 +1306,45 @@ void D3DFB::Draw3DPart(bool copy3d)
SUCCEEDED(FBTexture->LockRect (0, &lockrect, NULL, D3DLOCK_DISCARD))) ||
SUCCEEDED(FBTexture->LockRect (0, &lockrect, &texrect, 0)))
{
if (lockrect.Pitch == Pitch && Pitch == Width)
if (IsBgra() && FBFormat == D3DFMT_A8R8G8B8)
{
memcpy (lockrect.pBits, MemBuffer, Width * Height);
if (lockrect.Pitch == Pitch * sizeof(uint32_t) && Pitch == Width)
{
memcpy(lockrect.pBits, MemBuffer, Width * Height * sizeof(uint32_t));
}
else
{
uint32_t *dest = (uint32_t *)lockrect.pBits;
uint32_t *src = (uint32_t*)MemBuffer;
for (int y = 0; y < Height; y++)
{
memcpy(dest, src, Width * sizeof(uint32_t));
dest = reinterpret_cast<uint32_t*>(reinterpret_cast<uint8_t*>(dest) + lockrect.Pitch);
src += Pitch;
}
}
}
else if (!IsBgra() && FBFormat == D3DFMT_L8)
{
if (lockrect.Pitch == Pitch && Pitch == Width)
{
memcpy(lockrect.pBits, MemBuffer, Width * Height);
}
else
{
BYTE *dest = (BYTE *)lockrect.pBits;
BYTE *src = (BYTE *)MemBuffer;
for (int y = 0; y < Height; y++)
{
memcpy(dest, src, Width);
dest = reinterpret_cast<BYTE*>(reinterpret_cast<uint8_t*>(dest) + lockrect.Pitch);
src += Pitch;
}
}
}
else
{
BYTE *dest = (BYTE *)lockrect.pBits;
BYTE *src = MemBuffer;
for (int y = 0; y < Height; y++)
{
memcpy (dest, src, Width);
dest += lockrect.Pitch;
src += Pitch;
}
memset(lockrect.pBits, 0, lockrect.Pitch * Height);
}
FBTexture->UnlockRect (0);
}
@ -1349,7 +1376,10 @@ void D3DFB::Draw3DPart(bool copy3d)
memset(Constant, 0, sizeof(Constant));
SetAlphaBlend(D3DBLENDOP(0));
EnableAlphaTest(FALSE);
SetPixelShader(Shaders[SHADER_NormalColorPal]);
if (IsBgra())
SetPixelShader(Shaders[SHADER_NormalColor]);
else
SetPixelShader(Shaders[SHADER_NormalColorPal]);
if (copy3d)
{
FBVERTEX verts[4];
@ -1367,7 +1397,10 @@ void D3DFB::Draw3DPart(bool copy3d)
realfixedcolormap->ColorizeStart[1]/2, realfixedcolormap->ColorizeStart[2]/2, 0);
color1 = D3DCOLOR_COLORVALUE(realfixedcolormap->ColorizeEnd[0]/2,
realfixedcolormap->ColorizeEnd[1]/2, realfixedcolormap->ColorizeEnd[2]/2, 1);
SetPixelShader(Shaders[SHADER_SpecialColormapPal]);
if (IsBgra())
SetPixelShader(Shaders[SHADER_SpecialColormap]);
else
SetPixelShader(Shaders[SHADER_SpecialColormapPal]);
}
}
else
@ -1378,7 +1411,10 @@ void D3DFB::Draw3DPart(bool copy3d)
CalcFullscreenCoords(verts, Accel2D, false, color0, color1);
D3DDevice->DrawPrimitiveUP(D3DPT_TRIANGLEFAN, 2, verts, sizeof(FBVERTEX));
}
SetPixelShader(Shaders[SHADER_NormalColorPal]);
if (IsBgra())
SetPixelShader(Shaders[SHADER_NormalColor]);
else
SetPixelShader(Shaders[SHADER_NormalColorPal]);
}
//==========================================================================

View file

@ -32,7 +32,6 @@
**
*/
// HEADER FILES ------------------------------------------------------------
#define DIRECTDRAW_VERSION 0x0300
@ -120,7 +119,7 @@ cycle_t BlitCycles;
// CODE --------------------------------------------------------------------
DDrawFB::DDrawFB (int width, int height, bool fullscreen)
: BaseWinFB (width, height)
: BaseWinFB (width, height, false)
{
int i;

View file

@ -51,6 +51,7 @@
EXTERN_CVAR (Bool, ticker)
EXTERN_CVAR (Bool, fullscreen)
EXTERN_CVAR (Bool, swtruecolor)
EXTERN_CVAR (Float, vid_winscale)
CVAR(Int, win_x, -1, CVAR_ARCHIVE | CVAR_GLOBALCONFIG)
@ -146,7 +147,7 @@ DFrameBuffer *I_SetMode (int &width, int &height, DFrameBuffer *old)
}
break;
}
DFrameBuffer *res = Video->CreateFrameBuffer (width, height, fs, old);
DFrameBuffer *res = Video->CreateFrameBuffer (width, height, swtruecolor, fs, old);
/* Right now, CreateFrameBuffer cannot return NULL
if (res == NULL)
@ -312,6 +313,16 @@ void I_RestoreWindowedPos ()
extern int NewWidth, NewHeight, NewBits, DisplayBits;
CUSTOM_CVAR(Bool, swtruecolor, false, CVAR_ARCHIVE|CVAR_GLOBALCONFIG|CVAR_NOINITCALL)
{
// Strictly speaking this doesn't require a mode switch, but it is the easiest
// way to force a CreateFramebuffer call without a lot of refactoring.
NewWidth = screen->GetWidth();
NewHeight = screen->GetHeight();
NewBits = DisplayBits;
setmodeneeded = true;
}
CUSTOM_CVAR (Bool, fullscreen, true, CVAR_ARCHIVE|CVAR_GLOBALCONFIG|CVAR_NOINITCALL)
{
NewWidth = screen->GetWidth();

View file

@ -45,7 +45,7 @@ class IVideo
virtual EDisplayType GetDisplayType () = 0;
virtual void SetWindowedScale (float scale) = 0;
virtual DFrameBuffer *CreateFrameBuffer (int width, int height, bool fs, DFrameBuffer *old) = 0;
virtual DFrameBuffer *CreateFrameBuffer (int width, int height, bool bgra, bool fs, DFrameBuffer *old) = 0;
virtual void StartModeIterator (int bits, bool fs) = 0;
virtual bool NextMode (int *width, int *height, bool *letterbox) = 0;

View file

@ -70,7 +70,7 @@ class Win32Video : public IVideo
EDisplayType GetDisplayType () { return DISPLAY_Both; }
void SetWindowedScale (float scale);
DFrameBuffer *CreateFrameBuffer (int width, int height, bool fs, DFrameBuffer *old);
DFrameBuffer *CreateFrameBuffer (int width, int height, bool bgra, bool fs, DFrameBuffer *old);
void StartModeIterator (int bits, bool fs);
bool NextMode (int *width, int *height, bool *letterbox);
@ -121,7 +121,7 @@ class BaseWinFB : public DFrameBuffer
{
DECLARE_ABSTRACT_CLASS(BaseWinFB, DFrameBuffer)
public:
BaseWinFB (int width, int height) : DFrameBuffer (width, height), Windowed (true) {}
BaseWinFB (int width, int height, bool bgra) : DFrameBuffer (width, height, bgra), Windowed (true) {}
bool IsFullscreen () { return !Windowed; }
virtual void Blank () = 0;
@ -228,7 +228,7 @@ class D3DFB : public BaseWinFB
{
DECLARE_CLASS(D3DFB, BaseWinFB)
public:
D3DFB (UINT adapter, int width, int height, bool fullscreen);
D3DFB (UINT adapter, int width, int height, bool bgra, bool fullscreen);
~D3DFB ();
bool IsValid ();
@ -422,6 +422,7 @@ private:
bool NeedPalUpdate;
bool NeedGammaUpdate;
int FBWidth, FBHeight;
D3DFORMAT FBFormat;
bool VSync;
RECT BlendingRect;
int In2D;

View file

@ -629,7 +629,7 @@ bool Win32Video::NextMode (int *width, int *height, bool *letterbox)
return false;
}
DFrameBuffer *Win32Video::CreateFrameBuffer (int width, int height, bool fullscreen, DFrameBuffer *old)
DFrameBuffer *Win32Video::CreateFrameBuffer (int width, int height, bool bgra, bool fullscreen, DFrameBuffer *old)
{
static int retry = 0;
static int owidth, oheight;
@ -645,7 +645,8 @@ DFrameBuffer *Win32Video::CreateFrameBuffer (int width, int height, bool fullscr
BaseWinFB *fb = static_cast<BaseWinFB *> (old);
if (fb->Width == width &&
fb->Height == height &&
fb->Windowed == !fullscreen)
fb->Windowed == !fullscreen &&
fb->Bgra == bgra)
{
return old;
}
@ -662,12 +663,13 @@ DFrameBuffer *Win32Video::CreateFrameBuffer (int width, int height, bool fullscr
if (D3D != NULL)
{
fb = new D3DFB (m_Adapter, width, height, fullscreen);
fb = new D3DFB (m_Adapter, width, height, bgra, fullscreen);
}
else
{
fb = new DDrawFB (width, height, fullscreen);
}
LOG1 ("New fb created @ %p\n", fb);
// If we could not create the framebuffer, try again with slightly
@ -726,7 +728,7 @@ DFrameBuffer *Win32Video::CreateFrameBuffer (int width, int height, bool fullscr
}
++retry;
fb = static_cast<DDrawFB *>(CreateFrameBuffer (width, height, fullscreen, NULL));
fb = static_cast<DDrawFB *>(CreateFrameBuffer (width, height, bgra, fullscreen, NULL));
}
retry = 0;

View file

@ -1780,6 +1780,10 @@ DSPLYMNU_BRIGHTNESS = "Brightness";
DSPLYMNU_VSYNC = "Vertical Sync";
DSPLYMNU_CAPFPS = "Rendering Interpolation";
DSPLYMNU_COLUMNMETHOD = "Column render mode";
DSPLYMNU_TRUECOLOR = "True color output";
DSPLYMNU_MINFILTER = "Linear filter when downscaling";
DSPLYMNU_MAGFILTER = "Linear filter when upscaling";
DSPLYMNU_MIPMAP = "Use mipmapped textures";
DSPLYMNU_WIPETYPE = "Screen wipe style";
DSPLYMNU_SHOWENDOOM = "Show ENDOOM screen";
DSPLYMNU_PALLETEHACK = "DirectDraw palette hack"; // Not used

View file

@ -661,6 +661,10 @@ OptionMenu "VideoOptions"
Option "$DSPLYMNU_VSYNC", "vid_vsync", "OnOff"
Option "$DSPLYMNU_CAPFPS", "cl_capfps", "OffOn"
Option "$DSPLYMNU_COLUMNMETHOD", "r_columnmethod", "ColumnMethods"
Option "$DSPLYMNU_TRUECOLOR", "swtruecolor", "OnOff"
Option "$DSPLYMNU_MINFILTER", "r_minfilter", "OnOff"
Option "$DSPLYMNU_MAGFILTER", "r_magfilter", "OnOff"
Option "$DSPLYMNU_MIPMAP", "r_mipmap", "OnOff"
StaticText " "
Option "$DSPLYMNU_WIPETYPE", "wipetype", "Wipes"