Merge remote-tracking branch 'qzdoom/master' into dpdoom

This commit is contained in:
Magnus Norddahl 2016-09-24 08:17:49 +02:00
commit c21090333f
69 changed files with 8556 additions and 1074 deletions

View File

@ -1,5 +1,5 @@
cmake_minimum_required( VERSION 2.8.7 )
project(GZDoom)
project(QZDoom)
if( COMMAND cmake_policy )
if( POLICY CMP0011 )
@ -73,7 +73,7 @@ IF( NOT CMAKE_BUILD_TYPE )
ENDIF()
set( ZDOOM_OUTPUT_DIR ${CMAKE_BINARY_DIR} CACHE PATH "Directory where zdoom.pk3 and the executable will be created." )
set( ZDOOM_EXE_NAME "gzdoom" CACHE FILEPATH "Name of the executable to create" )
set( ZDOOM_EXE_NAME "qzdoom" CACHE FILEPATH "Name of the executable to create" )
if( MSVC )
# Allow the user to use ZDOOM_OUTPUT_DIR as a single release point.
# Use zdoom, zdoomd, zdoom64, and zdoomd64 for the binary names

View File

@ -974,7 +974,9 @@ set( FASTMATH_PCH_SOURCES
r_3dfloors.cpp
r_bsp.cpp
r_draw.cpp
r_draw_rgba.cpp
r_drawt.cpp
r_drawt_rgba.cpp
r_main.cpp
r_plane.cpp
r_segs.cpp

View File

@ -99,6 +99,11 @@ typedef TMap<int, PClassActor *> FClassMap;
#endif
// Only use SSE intrinsics on Intel architecture
#if !defined(_M_IX86) && !defined(__i386__) && !defined(_M_X64) && !defined(__amd64__)
#define NO_SSE
#endif
#if defined(_MSC_VER)
#define NOVTABLE __declspec(novtable)

View File

@ -382,6 +382,9 @@ static bool (*wipes[])(int) =
// Returns true if the wipe should be performed.
bool wipe_StartScreen (int type)
{
if (screen->IsBgra())
return false;
CurrentWipeType = clamp(type, 0, wipe_NUMWIPES - 1);
if (CurrentWipeType)
@ -395,11 +398,15 @@ bool wipe_StartScreen (int type)
void wipe_EndScreen (void)
{
if (screen->IsBgra())
return;
if (CurrentWipeType)
{
wipe_scr_end = new short[SCREENWIDTH * SCREENHEIGHT / 2];
screen->GetBlock (0, 0, SCREENWIDTH, SCREENHEIGHT, (BYTE *)wipe_scr_end);
screen->DrawBlock (0, 0, SCREENWIDTH, SCREENHEIGHT, (BYTE *)wipe_scr_start); // restore start scr.
// Initialize the wipe
(*wipes[(CurrentWipeType-1)*3])(0);
}
@ -410,6 +417,9 @@ bool wipe_ScreenWipe (int ticks)
{
bool rc;
if (screen->IsBgra())
return true;
if (CurrentWipeType == wipe_None)
return true;
@ -423,6 +433,9 @@ bool wipe_ScreenWipe (int ticks)
// Final things for the wipe
void wipe_Cleanup()
{
if (screen->IsBgra())
return;
if (wipe_scr_start != NULL)
{
delete[] wipe_scr_start;

View File

@ -1315,7 +1315,7 @@ void G_InitLevelLocals ()
level_info_t *info;
BaseBlendA = 0.0f; // Remove underwater blend effect, if any
NormalLight.Maps = realcolormaps;
NormalLight.Maps = realcolormaps.Maps;
// [BB] Instead of just setting the color, we also have to reset Desaturate and build the lights.
NormalLight.ChangeColor (PalEntry (255, 255, 255), 0);

View File

@ -743,7 +743,8 @@ int APowerInvisibility::AlterWeaponSprite (visstyle_t *vis)
if ((vis->Alpha < 0.25f && special1 > 0) || (vis->Alpha == 0))
{
vis->Alpha = clamp((1.f - float(Strength/100)), 0.f, 1.f);
vis->colormap = SpecialColormaps[INVERSECOLORMAP].Colormap;
vis->BaseColormap = &SpecialColormaps[INVERSECOLORMAP];
vis->ColormapNum = 0;
}
return -1; // This item is valid so another one shouldn't reset the translucency
}

View File

@ -35,7 +35,6 @@ public:
const BYTE *GetColumn (unsigned int column, const Span **spans_out);
const BYTE *GetPixels ();
bool CheckModified ();
void Unload ();
void SetVial (int level);
@ -90,10 +89,6 @@ bool FHealthBar::CheckModified ()
return NeedRefresh;
}
void FHealthBar::Unload ()
{
}
const BYTE *FHealthBar::GetColumn (unsigned int column, const Span **spans_out)
{
if (NeedRefresh)

View File

@ -75,5 +75,4 @@ struct FColormap
};
#endif

View File

@ -78,7 +78,6 @@ public:
const BYTE *GetColumn(unsigned int column, const Span **spans_out);
const BYTE *GetPixels();
void Unload();
bool CheckModified();
protected:
@ -212,10 +211,6 @@ bool FBackdropTexture::CheckModified()
return LastRenderTic != gametic;
}
void FBackdropTexture::Unload()
{
}
//=============================================================================
//
//

View File

@ -106,6 +106,17 @@ EXTERN_CVAR(Bool, ticker )
EXTERN_CVAR(Bool, vid_vsync)
EXTERN_CVAR(Bool, vid_hidpi)
CUSTOM_CVAR(Bool, swtruecolor, true, CVAR_ARCHIVE | CVAR_GLOBALCONFIG | CVAR_NOINITCALL)
{
// Strictly speaking this doesn't require a mode switch, but it is the easiest
// way to force a CreateFramebuffer call without a lot of refactoring.
extern int NewWidth, NewHeight, NewBits, DisplayBits;
NewWidth = screen->GetWidth();
NewHeight = screen->GetHeight();
NewBits = DisplayBits;
setmodeneeded = true;
}
CUSTOM_CVAR(Bool, fullscreen, false, CVAR_ARCHIVE | CVAR_GLOBALCONFIG)
{
extern int NewWidth, NewHeight, NewBits, DisplayBits;
@ -123,7 +134,7 @@ CUSTOM_CVAR(Bool, vid_autoswitch, true, CVAR_ARCHIVE | CVAR_GLOBALCONFIG | CVAR_
static int s_currentRenderer;
CUSTOM_CVAR(Int, vid_renderer, 1, CVAR_ARCHIVE | CVAR_GLOBALCONFIG | CVAR_NOINITCALL)
CUSTOM_CVAR(Int, vid_renderer, 0, CVAR_ARCHIVE | CVAR_GLOBALCONFIG | CVAR_NOINITCALL)
{
// 0: Software renderer
// 1: OpenGL renderer
@ -238,7 +249,7 @@ public:
virtual EDisplayType GetDisplayType() { return DISPLAY_Both; }
virtual void SetWindowedScale(float scale);
virtual DFrameBuffer* CreateFrameBuffer(int width, int height, bool fs, DFrameBuffer* old);
virtual DFrameBuffer* CreateFrameBuffer(int width, int height, bool bgra, bool fs, DFrameBuffer* old);
virtual void StartModeIterator(int bits, bool fullscreen);
virtual bool NextMode(int* width, int* height, bool* letterbox);
@ -280,7 +291,7 @@ private:
class CocoaFrameBuffer : public DFrameBuffer
{
public:
CocoaFrameBuffer(int width, int height, bool fullscreen);
CocoaFrameBuffer(int width, int height, bool bgra, bool fullscreen);
~CocoaFrameBuffer();
virtual bool Lock(bool buffer);
@ -590,14 +601,14 @@ bool CocoaVideo::NextMode(int* const width, int* const height, bool* const lette
return false;
}
DFrameBuffer* CocoaVideo::CreateFrameBuffer(const int width, const int height, const bool fullscreen, DFrameBuffer* const old)
DFrameBuffer* CocoaVideo::CreateFrameBuffer(const int width, const int height, const bool bgra, const bool fullscreen, DFrameBuffer* const old)
{
PalEntry flashColor = 0;
int flashAmount = 0;
if (NULL != old)
{
if (width == m_width && height == m_height)
if (width == m_width && height == m_height && bgra == old->IsBgra())
{
SetMode(width, height, fullscreen, vid_hidpi);
return old;
@ -622,7 +633,7 @@ DFrameBuffer* CocoaVideo::CreateFrameBuffer(const int width, const int height, c
}
else
{
fb = new CocoaFrameBuffer(width, height, fullscreen);
fb = new CocoaFrameBuffer(width, height, bgra, fullscreen);
}
fb->SetFlash(flashColor, flashAmount);
@ -846,8 +857,8 @@ CocoaVideo* CocoaVideo::GetInstance()
// ---------------------------------------------------------------------------
CocoaFrameBuffer::CocoaFrameBuffer(int width, int height, bool fullscreen)
: DFrameBuffer(width, height)
CocoaFrameBuffer::CocoaFrameBuffer(int width, int height, bool bgra, bool fullscreen)
: DFrameBuffer(width, height, bgra)
, m_needPaletteUpdate(false)
, m_gamma(0.0f)
, m_needGammaUpdate(false)
@ -949,8 +960,15 @@ void CocoaFrameBuffer::Update()
FlipCycles.Reset();
BlitCycles.Clock();
GPfx.Convert(MemBuffer, Pitch, m_pixelBuffer, Width * BYTES_PER_PIXEL,
Width, Height, FRACUNIT, FRACUNIT, 0, 0);
if (IsBgra())
{
CopyWithGammaBgra(m_pixelBuffer, Width * BYTES_PER_PIXEL, m_gammaTable[0], m_gammaTable[1], m_gammaTable[2], m_flashColor, m_flashAmount);
}
else
{
GPfx.Convert(MemBuffer, Pitch, m_pixelBuffer, Width * BYTES_PER_PIXEL,
Width, Height, FRACUNIT, FRACUNIT, 0, 0);
}
FlipCycles.Clock();
Flip();
@ -1082,8 +1100,10 @@ void CocoaFrameBuffer::Flip()
static const GLenum format = GL_ABGR_EXT;
#endif // __LITTLE_ENDIAN__
glTexImage2D(GL_TEXTURE_RECTANGLE_ARB, 0, GL_RGBA8,
Width, Height, 0, format, GL_UNSIGNED_BYTE, m_pixelBuffer);
if (IsBgra())
glTexImage2D(GL_TEXTURE_RECTANGLE_ARB, 0, GL_RGBA8, Width, Height, 0, GL_BGRA_EXT, GL_UNSIGNED_BYTE, m_pixelBuffer);
else
glTexImage2D(GL_TEXTURE_RECTANGLE_ARB, 0, GL_RGBA8, Width, Height, 0, format, GL_UNSIGNED_BYTE, m_pixelBuffer);
glBegin(GL_QUADS);
glColor4f(1.0f, 1.0f, 1.0f, 1.0f);
@ -1313,7 +1333,7 @@ void I_CreateRenderer()
DFrameBuffer* I_SetMode(int &width, int &height, DFrameBuffer* old)
{
return Video->CreateFrameBuffer(width, height, fullscreen, old);
return Video->CreateFrameBuffer(width, height, swtruecolor, fullscreen, old);
}
bool I_CheckResolution(const int width, const int height, const int bits)

View File

@ -74,7 +74,7 @@ class IVideo
virtual EDisplayType GetDisplayType () = 0;
virtual void SetWindowedScale (float scale) = 0;
virtual DFrameBuffer *CreateFrameBuffer (int width, int height, bool fs, DFrameBuffer *old) = 0;
virtual DFrameBuffer *CreateFrameBuffer (int width, int height, bool bgra, bool fs, DFrameBuffer *old) = 0;
virtual void StartModeIterator (int bits, bool fs) = 0;
virtual bool NextMode (int *width, int *height, bool *letterbox) = 0;

View File

@ -53,6 +53,7 @@
EXTERN_CVAR (Bool, ticker)
EXTERN_CVAR (Bool, fullscreen)
EXTERN_CVAR (Bool, swtruecolor)
EXTERN_CVAR (Float, vid_winscale)
IVideo *Video;
@ -64,7 +65,7 @@ void I_RestartRenderer();
int currentrenderer;
// [ZDoomGL]
CUSTOM_CVAR (Int, vid_renderer, 1, CVAR_ARCHIVE | CVAR_GLOBALCONFIG | CVAR_NOINITCALL)
CUSTOM_CVAR (Int, vid_renderer, 0, CVAR_ARCHIVE | CVAR_GLOBALCONFIG | CVAR_NOINITCALL)
{
// 0: Software renderer
// 1: OpenGL renderer
@ -166,7 +167,7 @@ DFrameBuffer *I_SetMode (int &width, int &height, DFrameBuffer *old)
fs = fullscreen;
break;
}
DFrameBuffer *res = Video->CreateFrameBuffer (width, height, fs, old);
DFrameBuffer *res = Video->CreateFrameBuffer (width, height, swtruecolor, fs, old);
/* Right now, CreateFrameBuffer cannot return NULL
if (res == NULL)
@ -320,6 +321,16 @@ CUSTOM_CVAR (Int, vid_maxfps, 200, CVAR_ARCHIVE | CVAR_GLOBALCONFIG)
extern int NewWidth, NewHeight, NewBits, DisplayBits;
CUSTOM_CVAR(Bool, swtruecolor, true, CVAR_ARCHIVE|CVAR_GLOBALCONFIG|CVAR_NOINITCALL)
{
// Strictly speaking this doesn't require a mode switch, but it is the easiest
// way to force a CreateFramebuffer call without a lot of refactoring.
NewWidth = screen->GetWidth();
NewHeight = screen->GetHeight();
NewBits = DisplayBits;
setmodeneeded = true;
}
CUSTOM_CVAR (Bool, fullscreen, false, CVAR_ARCHIVE|CVAR_GLOBALCONFIG)
{
NewWidth = screen->GetWidth();

View File

@ -163,7 +163,7 @@ bool SDLGLVideo::NextMode (int *width, int *height, bool *letterbox)
return false;
}
DFrameBuffer *SDLGLVideo::CreateFrameBuffer (int width, int height, bool fullscreen, DFrameBuffer *old)
DFrameBuffer *SDLGLVideo::CreateFrameBuffer (int width, int height, bool bgra, bool fullscreen, DFrameBuffer *old)
{
static int retry = 0;
static int owidth, oheight;
@ -315,7 +315,7 @@ bool SDLGLVideo::InitHardware (bool allowsoftware, int multisample)
// FrameBuffer implementation -----------------------------------------------
SDLGLFB::SDLGLFB (void *, int width, int height, int, int, bool fullscreen)
: DFrameBuffer (width, height)
: DFrameBuffer (width, height, false)
{
int i;

View File

@ -21,7 +21,7 @@ class SDLGLVideo : public IVideo
EDisplayType GetDisplayType () { return DISPLAY_Both; }
void SetWindowedScale (float scale);
DFrameBuffer *CreateFrameBuffer (int width, int height, bool fs, DFrameBuffer *old);
DFrameBuffer *CreateFrameBuffer (int width, int height, bool bgra, bool fs, DFrameBuffer *old);
void StartModeIterator (int bits, bool fs);
bool NextMode (int *width, int *height, bool *letterbox);

View File

@ -28,7 +28,7 @@ class SDLFB : public DFrameBuffer
{
DECLARE_CLASS(SDLFB, DFrameBuffer)
public:
SDLFB (int width, int height, bool fullscreen, SDL_Window *oldwin);
SDLFB (int width, int height, bool bgra, bool fullscreen, SDL_Window *oldwin);
~SDLFB ();
bool Lock (bool buffer);
@ -257,7 +257,7 @@ bool SDLVideo::NextMode (int *width, int *height, bool *letterbox)
return false;
}
DFrameBuffer *SDLVideo::CreateFrameBuffer (int width, int height, bool fullscreen, DFrameBuffer *old)
DFrameBuffer *SDLVideo::CreateFrameBuffer (int width, int height, bool bgra, bool fullscreen, DFrameBuffer *old)
{
static int retry = 0;
static int owidth, oheight;
@ -271,7 +271,8 @@ DFrameBuffer *SDLVideo::CreateFrameBuffer (int width, int height, bool fullscree
{ // Reuse the old framebuffer if its attributes are the same
SDLFB *fb = static_cast<SDLFB *> (old);
if (fb->Width == width &&
fb->Height == height)
fb->Height == height &&
fb->Bgra == bgra)
{
bool fsnow = (SDL_GetWindowFlags (fb->Screen) & SDL_WINDOW_FULLSCREEN_DESKTOP) != 0;
@ -296,7 +297,7 @@ DFrameBuffer *SDLVideo::CreateFrameBuffer (int width, int height, bool fullscree
flashAmount = 0;
}
SDLFB *fb = new SDLFB (width, height, fullscreen, oldwin);
SDLFB *fb = new SDLFB (width, height, bgra, fullscreen, oldwin);
// If we could not create the framebuffer, try again with slightly
// different parameters in this order:
@ -335,7 +336,7 @@ DFrameBuffer *SDLVideo::CreateFrameBuffer (int width, int height, bool fullscree
}
++retry;
fb = static_cast<SDLFB *>(CreateFrameBuffer (width, height, fullscreen, NULL));
fb = static_cast<SDLFB *>(CreateFrameBuffer (width, height, bgra, fullscreen, NULL));
}
retry = 0;
@ -350,8 +351,8 @@ void SDLVideo::SetWindowedScale (float scale)
// FrameBuffer implementation -----------------------------------------------
SDLFB::SDLFB (int width, int height, bool fullscreen, SDL_Window *oldwin)
: DFrameBuffer (width, height)
SDLFB::SDLFB (int width, int height, bool bgra, bool fullscreen, SDL_Window *oldwin)
: DFrameBuffer (width, height, bgra)
{
int i;
@ -494,7 +495,11 @@ void SDLFB::Update ()
pitch = Surface->pitch;
}
if (NotPaletted)
if (Bgra)
{
CopyWithGammaBgra(pixels, pitch, GammaTable[0], GammaTable[1], GammaTable[2], Flash, FlashAmount);
}
else if (NotPaletted)
{
GPfx.Convert (MemBuffer, Pitch,
pixels, pitch, Width, Height,
@ -674,13 +679,20 @@ void SDLFB::ResetSDLRenderer ()
SDL_SetRenderDrawColor(Renderer, 0, 0, 0, 255);
Uint32 fmt;
switch(vid_displaybits)
if (Bgra)
{
default: fmt = SDL_PIXELFORMAT_ARGB8888; break;
case 30: fmt = SDL_PIXELFORMAT_ARGB2101010; break;
case 24: fmt = SDL_PIXELFORMAT_RGB888; break;
case 16: fmt = SDL_PIXELFORMAT_RGB565; break;
case 15: fmt = SDL_PIXELFORMAT_ARGB1555; break;
fmt = SDL_PIXELFORMAT_ARGB8888;
}
else
{
switch (vid_displaybits)
{
default: fmt = SDL_PIXELFORMAT_ARGB8888; break;
case 30: fmt = SDL_PIXELFORMAT_ARGB2101010; break;
case 24: fmt = SDL_PIXELFORMAT_RGB888; break;
case 16: fmt = SDL_PIXELFORMAT_RGB565; break;
case 15: fmt = SDL_PIXELFORMAT_ARGB1555; break;
}
}
Texture = SDL_CreateTexture (Renderer, fmt, SDL_TEXTUREACCESS_STREAMING, Width, Height);

View File

@ -10,7 +10,7 @@ class SDLVideo : public IVideo
EDisplayType GetDisplayType () { return DISPLAY_Both; }
void SetWindowedScale (float scale);
DFrameBuffer *CreateFrameBuffer (int width, int height, bool fs, DFrameBuffer *old);
DFrameBuffer *CreateFrameBuffer (int width, int height, bool bgra, bool fs, DFrameBuffer *old);
void StartModeIterator (int bits, bool fs);
bool NextMode (int *width, int *height, bool *letterbox);

View File

@ -71,7 +71,7 @@ struct FakeCmap
};
TArray<FakeCmap> fakecmaps;
BYTE *realcolormaps;
FSWColormap realcolormaps;
size_t numfakecmaps;
@ -408,7 +408,7 @@ void R_SetDefaultColormap (const char *name)
foo.Color = 0xFFFFFF;
foo.Fade = 0;
foo.Maps = realcolormaps;
foo.Maps = realcolormaps.Maps;
foo.Desaturate = 0;
foo.Next = NULL;
foo.BuildLights ();
@ -430,7 +430,7 @@ void R_SetDefaultColormap (const char *name)
remap[0] = 0;
for (i = 0; i < NUMCOLORMAPS; ++i)
{
BYTE *map2 = &realcolormaps[i*256];
BYTE *map2 = &realcolormaps.Maps[i*256];
lumpr.Read (map, 256);
for (j = 0; j < 256; ++j)
{
@ -454,11 +454,7 @@ void R_DeinitColormaps ()
{
SpecialColormaps.Clear();
fakecmaps.Clear();
if (realcolormaps != NULL)
{
delete[] realcolormaps;
realcolormaps = NULL;
}
delete[] realcolormaps.Maps;
FreeSpecialLights();
}
@ -501,7 +497,7 @@ void R_InitColormaps ()
}
}
}
realcolormaps = new BYTE[256*NUMCOLORMAPS*fakecmaps.Size()];
realcolormaps.Maps = new BYTE[256*NUMCOLORMAPS*fakecmaps.Size()];
R_SetDefaultColormap ("COLORMAP");
if (fakecmaps.Size() > 1)
@ -523,7 +519,7 @@ void R_InitColormaps ()
{
int k, r, g, b;
FWadLump lump = Wads.OpenLumpNum (fakecmaps[j].lump);
BYTE *const map = realcolormaps + NUMCOLORMAPS*256*j;
BYTE *const map = realcolormaps.Maps + NUMCOLORMAPS*256*j;
for (k = 0; k < NUMCOLORMAPS; ++k)
{
@ -550,8 +546,8 @@ void R_InitColormaps ()
}
NormalLight.Color = PalEntry (255, 255, 255);
NormalLight.Fade = 0;
NormalLight.Maps = realcolormaps;
NormalLightHasFixedLights = R_CheckForFixedLights(realcolormaps);
NormalLight.Maps = realcolormaps.Maps;
NormalLightHasFixedLights = R_CheckForFixedLights(realcolormaps.Maps);
numfakecmaps = fakecmaps.Size();
// build default special maps (e.g. invulnerability)

View File

@ -1,18 +1,26 @@
#ifndef __RES_CMAP_H
#define __RES_CMAP_H
struct FSWColormap;
void R_InitColormaps ();
void R_DeinitColormaps ();
DWORD R_ColormapNumForName(const char *name); // killough 4/4/98
void R_SetDefaultColormap (const char *name); // [RH] change normal fadetable
DWORD R_BlendForColormap (DWORD map); // [RH] return calculated blend for a colormap
extern BYTE *realcolormaps; // [RH] make the colormaps externally visible
extern FSWColormap realcolormaps; // [RH] make the colormaps externally visible
extern size_t numfakecmaps;
struct FSWColormap
{
BYTE *Maps = nullptr;
PalEntry Color = 0xffffffff;
PalEntry Fade = 0xff000000;
int Desaturate = 0;
};
struct FDynamicColormap
struct FDynamicColormap : FSWColormap
{
void ChangeFade (PalEntry fadecolor);
void ChangeColor (PalEntry lightcolor, int desaturate);
@ -20,10 +28,6 @@ struct FDynamicColormap
void BuildLights ();
static void RebuildAllLights();
BYTE *Maps;
PalEntry Color;
PalEntry Fade;
int Desaturate;
FDynamicColormap *Next;
};
@ -43,8 +47,13 @@ enum
};
struct FSpecialColormap
struct FSpecialColormap : FSWColormap
{
FSpecialColormap()
{
Maps = Colormap;
}
float ColorizeStart[3];
float ColorizeEnd[3];
BYTE Colormap[256];

View File

@ -1498,11 +1498,14 @@ struct FMiniBSP
//
typedef BYTE lighttable_t; // This could be wider for >8 bit display.
struct FSWColormap;
// This encapsulates the fields of vissprite_t that can be altered by AlterWeaponSprite
struct visstyle_t
{
lighttable_t *colormap;
int ColormapNum; // Which colormap is rendered
FSWColormap *BaseColormap; // Base colormap used together with ColormapNum
lighttable_t *colormap; // [SP] Restored from GZDoom - will this work?
float Alpha;
FRenderStyle RenderStyle;
};

View File

@ -38,6 +38,8 @@
#include "r_data/r_translate.h"
#include "v_palette.h"
#include "r_data/colormaps.h"
#include "r_plane.h"
#include "r_draw_rgba.h"
#include "gi.h"
#include "stats.h"
@ -70,6 +72,19 @@ int scaledviewwidth;
// screen depth and asm/no asm.
void (*R_DrawColumnHoriz)(void);
void (*R_DrawColumn)(void);
void (*R_FillColumn)(void);
void (*R_FillAddColumn)(void);
void (*R_FillAddClampColumn)(void);
void (*R_FillSubClampColumn)(void);
void (*R_FillRevSubClampColumn)(void);
void (*R_DrawAddColumn)(void);
void (*R_DrawTlatedAddColumn)(void);
void (*R_DrawAddClampColumn)(void);
void (*R_DrawAddClampTranslatedColumn)(void);
void (*R_DrawSubClampColumn)(void);
void (*R_DrawSubClampTranslatedColumn)(void);
void (*R_DrawRevSubClampColumn)(void);
void (*R_DrawRevSubClampTranslatedColumn)(void);
void (*R_DrawFuzzColumn)(void);
void (*R_DrawTranslatedColumn)(void);
void (*R_DrawShadedColumn)(void);
@ -79,7 +94,48 @@ void (*R_DrawSpanTranslucent)(void);
void (*R_DrawSpanMaskedTranslucent)(void);
void (*R_DrawSpanAddClamp)(void);
void (*R_DrawSpanMaskedAddClamp)(void);
void (*rt_map4cols)(int,int,int);
void (*R_FillSpan)(void);
void (*R_FillColumnHoriz)(void);
void (*R_DrawFogBoundary)(int x1, int x2, short *uclip, short *dclip);
void (*R_MapTiltedPlane)(int y, int x1);
void (*R_MapColoredPlane)(int y, int x1);
void (*R_DrawParticle)(vissprite_t *);
void (*R_SetupDrawSlab)(FSWColormap *base_colormap, float light, int shade);
void (*R_DrawSlab)(int dx, fixed_t v, int dy, fixed_t vi, const BYTE *vptr, BYTE *p);
fixed_t (*tmvline1_add)();
void (*tmvline4_add)();
fixed_t (*tmvline1_addclamp)();
void (*tmvline4_addclamp)();
fixed_t (*tmvline1_subclamp)();
void (*tmvline4_subclamp)();
fixed_t (*tmvline1_revsubclamp)();
void (*tmvline4_revsubclamp)();
void (*rt_copy1col)(int hx, int sx, int yl, int yh);
void (*rt_copy4cols)(int sx, int yl, int yh);
void (*rt_shaded1col)(int hx, int sx, int yl, int yh);
void (*rt_shaded4cols)(int sx, int yl, int yh);
void (*rt_map1col)(int hx, int sx, int yl, int yh);
void (*rt_add1col)(int hx, int sx, int yl, int yh);
void (*rt_addclamp1col)(int hx, int sx, int yl, int yh);
void (*rt_subclamp1col)(int hx, int sx, int yl, int yh);
void (*rt_revsubclamp1col)(int hx, int sx, int yl, int yh);
void (*rt_tlate1col)(int hx, int sx, int yl, int yh);
void (*rt_tlateadd1col)(int hx, int sx, int yl, int yh);
void (*rt_tlateaddclamp1col)(int hx, int sx, int yl, int yh);
void (*rt_tlatesubclamp1col)(int hx, int sx, int yl, int yh);
void (*rt_tlaterevsubclamp1col)(int hx, int sx, int yl, int yh);
void (*rt_map4cols)(int sx, int yl, int yh);
void (*rt_add4cols)(int sx, int yl, int yh);
void (*rt_addclamp4cols)(int sx, int yl, int yh);
void (*rt_subclamp4cols)(int sx, int yl, int yh);
void (*rt_revsubclamp4cols)(int sx, int yl, int yh);
void (*rt_tlate4cols)(int sx, int yl, int yh);
void (*rt_tlateadd4cols)(int sx, int yl, int yh);
void (*rt_tlateaddclamp4cols)(int sx, int yl, int yh);
void (*rt_tlatesubclamp4cols)(int sx, int yl, int yh);
void (*rt_tlaterevsubclamp4cols)(int sx, int yl, int yh);
void (*rt_initcols)(BYTE *buffer);
void (*rt_span_coverage)(int x, int start, int stop);
//
// R_DrawColumn
@ -90,18 +146,27 @@ extern "C" {
int dc_pitch=0xABadCafe; // [RH] Distance between rows
lighttable_t* dc_colormap;
FSWColormap *dc_fcolormap;
ShadeConstants dc_shade_constants;
fixed_t dc_light;
int dc_x;
int dc_yl;
int dc_yh;
fixed_t dc_iscale;
fixed_t dc_texturefrac;
uint32_t dc_textureheight;
int dc_color; // [RH] Color for column filler
DWORD dc_srccolor;
uint32_t dc_srccolor_bgra;
DWORD *dc_srcblend; // [RH] Source and destination
DWORD *dc_destblend; // blending lookups
fixed_t dc_srcalpha; // Alpha value used by dc_srcblend
fixed_t dc_destalpha; // Alpha value used by dc_destblend
// first pixel in a column (possibly virtual)
const BYTE* dc_source;
const BYTE* dc_source2;
uint32_t dc_texturefracx;
BYTE* dc_dest;
int dc_count;
@ -109,7 +174,11 @@ int dc_count;
DWORD vplce[4];
DWORD vince[4];
BYTE* palookupoffse[4];
fixed_t palookuplight[4];
const BYTE* bufplce[4];
const BYTE* bufplce2[4];
uint32_t buftexturefracx[4];
uint32_t bufheight[4];
// just for profiling
int dccount;
@ -120,10 +189,10 @@ BYTE *dc_translation;
BYTE shadetables[NUMCOLORMAPS*16*256];
FDynamicColormap ShadeFakeColormap[16];
BYTE identitymap[256];
FDynamicColormap identitycolormap;
EXTERN_CVAR (Int, r_columnmethod)
void R_InitShadeMaps()
{
int i,j;
@ -161,6 +230,10 @@ void R_InitShadeMaps()
{
identitymap[i] = i;
}
identitycolormap.Color = ~0u;
identitycolormap.Desaturate = 0;
identitycolormap.Next = NULL;
identitycolormap.Maps = identitymap;
}
/************************************/
@ -223,7 +296,7 @@ void R_DrawColumnP_C (void)
#endif
// [RH] Just fills a column with a color
void R_FillColumnP (void)
void R_FillColumnP_C (void)
{
int count;
BYTE* dest;
@ -247,7 +320,7 @@ void R_FillColumnP (void)
}
}
void R_FillAddColumn (void)
void R_FillAddColumn_C (void)
{
int count;
BYTE *dest;
@ -271,10 +344,9 @@ void R_FillAddColumn (void)
*dest = RGB32k.All[bg & (bg>>15)];
dest += pitch;
} while (--count);
}
void R_FillAddClampColumn (void)
void R_FillAddClampColumn_C (void)
{
int count;
BYTE *dest;
@ -304,10 +376,9 @@ void R_FillAddClampColumn (void)
*dest = RGB32k.All[a & (a>>15)];
dest += pitch;
} while (--count);
}
void R_FillSubClampColumn (void)
void R_FillSubClampColumn_C (void)
{
int count;
BYTE *dest;
@ -336,10 +407,9 @@ void R_FillSubClampColumn (void)
*dest = RGB32k.All[a & (a>>15)];
dest += pitch;
} while (--count);
}
void R_FillRevSubClampColumn (void)
void R_FillRevSubClampColumn_C (void)
{
int count;
BYTE *dest;
@ -368,13 +438,11 @@ void R_FillRevSubClampColumn (void)
*dest = RGB32k.All[a & (a>>15)];
dest += pitch;
} while (--count);
}
//
// Spectre/Invisibility.
//
#define FUZZTABLE 50
extern "C"
{
@ -647,8 +715,8 @@ void R_DrawTlatedAddColumnP_C (void)
fg = fg2rgb[fg];
bg = bg2rgb[bg];
fg = (fg+bg) | 0x1f07c1f;
*dest = RGB32k.All[fg & (fg>>15)];
fg = (fg + bg) | 0x1f07c1f;
*dest = RGB32k.All[fg & (fg >> 15)];
dest += pitch;
frac += fracstep;
} while (--count);
@ -937,8 +1005,6 @@ void R_DrawRevSubClampTranslatedColumnP_C ()
}
}
//
// R_DrawSpan
// With DOOM style restrictions on view orientation,
@ -966,7 +1032,10 @@ int ds_y;
int ds_x1;
int ds_x2;
FSWColormap* ds_fcolormap;
lighttable_t* ds_colormap;
ShadeConstants ds_shade_constants;
dsfixed_t ds_light;
dsfixed_t ds_xfrac;
dsfixed_t ds_yfrac;
@ -977,6 +1046,7 @@ int ds_ybits;
// start of a floor/ceiling tile image
const BYTE* ds_source;
bool ds_source_mipmapped;
// just for profiling
int dscount;
@ -997,13 +1067,14 @@ extern "C" BYTE *ds_curcolormap, *ds_cursource, *ds_curtiltedsource;
//
//==========================================================================
void R_SetSpanSource(const BYTE *pixels)
void R_SetSpanSource(FTexture *tex)
{
ds_source = pixels;
ds_source = r_swtruecolor ? (const BYTE*)tex->GetPixelsBgra() : tex->GetPixels();
ds_source_mipmapped = tex->Mipmapped();
#ifdef X86_ASM
if (ds_cursource != ds_source)
if (!r_swtruecolor && ds_cursource != ds_source)
{
R_SetSpanSource_ASM(pixels);
R_SetSpanSource_ASM(ds_source);
}
#endif
}
@ -1016,11 +1087,11 @@ void R_SetSpanSource(const BYTE *pixels)
//
//==========================================================================
void R_SetSpanColormap(BYTE *colormap)
void R_SetSpanColormap(FDynamicColormap *colormap, int shade)
{
ds_colormap = colormap;
R_SetDSColorMapLight(colormap, 0, shade);
#ifdef X86_ASM
if (ds_colormap != ds_curcolormap)
if (!r_swtruecolor && ds_colormap != ds_curcolormap)
{
R_SetSpanColormap_ASM (ds_colormap);
}
@ -1049,7 +1120,8 @@ void R_SetupSpanBits(FTexture *tex)
ds_ybits--;
}
#ifdef X86_ASM
R_SetSpanSize_ASM (ds_xbits, ds_ybits);
if (!r_swtruecolor)
R_SetSpanSize_ASM (ds_xbits, ds_ybits);
#endif
}
@ -1090,6 +1162,7 @@ void R_DrawSpanP_C (void)
if (ds_xbits == 6 && ds_ybits == 6)
{
// 64x64 is the most common case by far, so special case it.
do
{
// Current texture index in u,v.
@ -1471,11 +1544,12 @@ void R_DrawSpanMaskedAddClampP_C (void)
}
// [RH] Just fill a span with a color
void R_FillSpan (void)
void R_FillSpan_C (void)
{
memset (ylookup[ds_y] + ds_x1 + dc_destorg, ds_color, ds_x2 - ds_x1 + 1);
}
// Draw a voxel slab
//
// "Build Engine & Tools" Copyright (c) 1993-1997 Ken Silverman
@ -1572,17 +1646,19 @@ extern "C" void R_DrawSlabC(int dx, fixed_t v, int dy, fixed_t vi, const BYTE *v
// wallscan stuff, in C
int vlinebits;
int mvlinebits;
#ifndef X86_ASM
static DWORD vlinec1 ();
static int vlinebits;
DWORD (*dovline1)() = vlinec1;
DWORD (*doprevline1)() = vlinec1;
#ifdef X64_ASM
extern "C" void vlinetallasm4();
#define dovline4 vlinetallasm4
extern "C" void setupvlinetallasm (int);
void (*dovline4)() = vlinetallasm4;
#else
static void vlinec4 ();
void (*dovline4)() = vlinec4;
@ -1590,7 +1666,6 @@ void (*dovline4)() = vlinec4;
static DWORD mvlinec1();
static void mvlinec4();
static int mvlinebits;
DWORD (*domvline1)() = mvlinec1;
void (*domvline4)() = mvlinec4;
@ -1624,6 +1699,12 @@ void (*domvline4)() = mvlineasm4;
void setupvline (int fracbits)
{
if (r_swtruecolor)
{
vlinebits = fracbits;
return;
}
#ifdef X86_ASM
if (CPU.Family <= 5)
{
@ -1679,7 +1760,9 @@ DWORD vlinec1 ()
return frac;
}
#endif
#if !defined(X86_ASM)
void vlinec4 ()
{
BYTE *dest = dc_dest;
@ -1700,13 +1783,20 @@ void vlinec4 ()
void setupmvline (int fracbits)
{
if (!r_swtruecolor)
{
#if defined(X86_ASM)
setupmvlineasm (fracbits);
domvline1 = mvlineasm1;
domvline4 = mvlineasm4;
setupmvlineasm(fracbits);
domvline1 = mvlineasm1;
domvline4 = mvlineasm4;
#else
mvlinebits = fracbits;
mvlinebits = fracbits;
#endif
}
else
{
mvlinebits = fracbits;
}
}
#if !defined(X86_ASM)
@ -1788,7 +1878,7 @@ static void R_DrawFogBoundaryLine (int y, int x)
} while (++x <= x2);
}
void R_DrawFogBoundary (int x1, int x2, short *uclip, short *dclip)
void R_DrawFogBoundary_C (int x1, int x2, short *uclip, short *dclip)
{
// This is essentially the same as R_MapVisPlane but with an extra step
// to create new horizontal spans whenever the light changes enough that
@ -1808,7 +1898,7 @@ void R_DrawFogBoundary (int x1, int x2, short *uclip, short *dclip)
clearbufshort (spanend+t2, b2-t2, x);
}
dc_colormap = basecolormapdata + (rcolormap << COLORMAPSHIFT);
R_SetColorMapLight(basecolormap, (float)light, wallshade);
for (--x; x >= x1; --x)
{
@ -1833,7 +1923,7 @@ void R_DrawFogBoundary (int x1, int x2, short *uclip, short *dclip)
clearbufshort (spanend+t2, b2-t2, x);
}
rcolormap = lcolormap;
dc_colormap = basecolormapdata + (lcolormap << COLORMAPSHIFT);
R_SetColorMapLight(basecolormap, (float)light, wallshade);
}
else
{
@ -1884,7 +1974,7 @@ void setuptmvline (int bits)
tmvlinebits = bits;
}
fixed_t tmvline1_add ()
fixed_t tmvline1_add_C ()
{
DWORD fracstep = dc_iscale;
DWORD frac = dc_texturefrac;
@ -1915,7 +2005,7 @@ fixed_t tmvline1_add ()
return frac;
}
void tmvline4_add ()
void tmvline4_add_C ()
{
BYTE *dest = dc_dest;
int count = dc_count;
@ -1942,7 +2032,7 @@ void tmvline4_add ()
} while (--count);
}
fixed_t tmvline1_addclamp ()
fixed_t tmvline1_addclamp_C ()
{
DWORD fracstep = dc_iscale;
DWORD frac = dc_texturefrac;
@ -1978,7 +2068,7 @@ fixed_t tmvline1_addclamp ()
return frac;
}
void tmvline4_addclamp ()
void tmvline4_addclamp_C ()
{
BYTE *dest = dc_dest;
int count = dc_count;
@ -2010,7 +2100,7 @@ void tmvline4_addclamp ()
} while (--count);
}
fixed_t tmvline1_subclamp ()
fixed_t tmvline1_subclamp_C ()
{
DWORD fracstep = dc_iscale;
DWORD frac = dc_texturefrac;
@ -2045,7 +2135,7 @@ fixed_t tmvline1_subclamp ()
return frac;
}
void tmvline4_subclamp ()
void tmvline4_subclamp_C ()
{
BYTE *dest = dc_dest;
int count = dc_count;
@ -2076,7 +2166,7 @@ void tmvline4_subclamp ()
} while (--count);
}
fixed_t tmvline1_revsubclamp ()
fixed_t tmvline1_revsubclamp_C ()
{
DWORD fracstep = dc_iscale;
DWORD frac = dc_texturefrac;
@ -2111,7 +2201,7 @@ fixed_t tmvline1_revsubclamp ()
return frac;
}
void tmvline4_revsubclamp ()
void tmvline4_revsubclamp_C ()
{
BYTE *dest = dc_dest;
int count = dc_count;
@ -2142,7 +2232,6 @@ void tmvline4_revsubclamp ()
} while (--count);
}
//==========================================================================
//
// R_GetColumn
@ -2159,43 +2248,242 @@ const BYTE *R_GetColumn (FTexture *tex, int col)
{
col = width + (col % width);
}
return tex->GetColumn (col, NULL);
}
if (r_swtruecolor)
return (const BYTE *)tex->GetColumnBgra(col, NULL);
else
return tex->GetColumn(col, NULL);
}
// [RH] Initialize the column drawer pointers
void R_InitColumnDrawers ()
{
#ifdef X86_ASM
R_DrawColumn = R_DrawColumnP_ASM;
R_DrawColumnHoriz = R_DrawColumnHorizP_ASM;
R_DrawFuzzColumn = R_DrawFuzzColumnP_ASM;
R_DrawTranslatedColumn = R_DrawTranslatedColumnP_C;
R_DrawShadedColumn = R_DrawShadedColumnP_C;
R_DrawSpan = R_DrawSpanP_ASM;
R_DrawSpanMasked = R_DrawSpanMaskedP_ASM;
if (CPU.Family <= 5)
// Save a copy when switching to true color mode as the assembly palette drawers might change them
static bool pointers_saved = false;
static DWORD(*dovline1_saved)();
static DWORD(*doprevline1_saved)();
static DWORD(*domvline1_saved)();
static void(*dovline4_saved)();
static void(*domvline4_saved)();
if (r_swtruecolor)
{
rt_map4cols = rt_map4cols_asm2;
if (!pointers_saved)
{
pointers_saved = true;
dovline1_saved = dovline1;
doprevline1_saved = doprevline1;
domvline1_saved = domvline1;
dovline4_saved = dovline4;
domvline4_saved = domvline4;
}
R_DrawColumnHoriz = R_DrawColumnHoriz_rgba;
R_DrawColumn = R_DrawColumn_rgba;
R_DrawFuzzColumn = R_DrawFuzzColumn_rgba;
R_DrawTranslatedColumn = R_DrawTranslatedColumn_rgba;
R_DrawShadedColumn = R_DrawShadedColumn_rgba;
R_DrawSpanMasked = R_DrawSpanMasked_rgba;
R_DrawSpan = R_DrawSpan_rgba;
R_DrawSpanTranslucent = R_DrawSpanTranslucent_rgba;
R_DrawSpanMaskedTranslucent = R_DrawSpanMaskedTranslucent_rgba;
R_DrawSpanAddClamp = R_DrawSpanAddClamp_rgba;
R_DrawSpanMaskedAddClamp = R_DrawSpanMaskedAddClamp_rgba;
R_FillColumn = R_FillColumn_rgba;
R_FillAddColumn = R_FillAddColumn_rgba;
R_FillAddClampColumn = R_FillAddClampColumn_rgba;
R_FillSubClampColumn = R_FillSubClampColumn_rgba;
R_FillRevSubClampColumn = R_FillRevSubClampColumn_rgba;
R_DrawAddColumn = R_DrawAddColumn_rgba;
R_DrawTlatedAddColumn = R_DrawTlatedAddColumn_rgba;
R_DrawAddClampColumn = R_DrawAddClampColumn_rgba;
R_DrawAddClampTranslatedColumn = R_DrawAddClampTranslatedColumn_rgba;
R_DrawSubClampColumn = R_DrawSubClampColumn_rgba;
R_DrawSubClampTranslatedColumn = R_DrawSubClampTranslatedColumn_rgba;
R_DrawRevSubClampColumn = R_DrawRevSubClampColumn_rgba;
R_DrawRevSubClampTranslatedColumn = R_DrawRevSubClampTranslatedColumn_rgba;
R_FillSpan = R_FillSpan_rgba;
R_DrawFogBoundary = R_DrawFogBoundary_rgba;
R_FillColumnHoriz = R_FillColumnHoriz_rgba;
R_DrawFogBoundary = R_DrawFogBoundary_rgba;
R_MapTiltedPlane = R_MapTiltedPlane_rgba;
R_MapColoredPlane = R_MapColoredPlane_rgba;
R_DrawParticle = R_DrawParticle_rgba;
R_SetupDrawSlab = R_SetupDrawSlab_rgba;
R_DrawSlab = R_DrawSlab_rgba;
tmvline1_add = tmvline1_add_rgba;
tmvline4_add = tmvline4_add_rgba;
tmvline1_addclamp = tmvline1_addclamp_rgba;
tmvline4_addclamp = tmvline4_addclamp_rgba;
tmvline1_subclamp = tmvline1_subclamp_rgba;
tmvline4_subclamp = tmvline4_subclamp_rgba;
tmvline1_revsubclamp = tmvline1_revsubclamp_rgba;
tmvline4_revsubclamp = tmvline4_revsubclamp_rgba;
rt_copy1col = rt_copy1col_rgba;
rt_copy4cols = rt_copy4cols_rgba;
rt_map1col = rt_map1col_rgba;
rt_map4cols = rt_map4cols_rgba;
rt_shaded1col = rt_shaded1col_rgba;
rt_shaded4cols = rt_shaded4cols_rgba;
rt_add1col = rt_add1col_rgba;
rt_add4cols = rt_add4cols_rgba;
rt_addclamp1col = rt_addclamp1col_rgba;
rt_addclamp4cols = rt_addclamp4cols_rgba;
rt_subclamp1col = rt_subclamp1col_rgba;
rt_revsubclamp1col = rt_revsubclamp1col_rgba;
rt_tlate1col = rt_tlate1col_rgba;
rt_tlateadd1col = rt_tlateadd1col_rgba;
rt_tlateaddclamp1col = rt_tlateaddclamp1col_rgba;
rt_tlatesubclamp1col = rt_tlatesubclamp1col_rgba;
rt_tlaterevsubclamp1col = rt_tlaterevsubclamp1col_rgba;
rt_subclamp4cols = rt_subclamp4cols_rgba;
rt_revsubclamp4cols = rt_revsubclamp4cols_rgba;
rt_tlate4cols = rt_tlate4cols_rgba;
rt_tlateadd4cols = rt_tlateadd4cols_rgba;
rt_tlateaddclamp4cols = rt_tlateaddclamp4cols_rgba;
rt_tlatesubclamp4cols = rt_tlatesubclamp4cols_rgba;
rt_tlaterevsubclamp4cols = rt_tlaterevsubclamp4cols_rgba;
rt_initcols = rt_initcols_rgba;
rt_span_coverage = rt_span_coverage_rgba;
dovline1 = vlinec1_rgba;
doprevline1 = vlinec1_rgba;
domvline1 = mvlinec1_rgba;
dovline4 = vlinec4_rgba;
domvline4 = mvlinec4_rgba;
}
else
{
rt_map4cols = rt_map4cols_asm1;
}
#ifdef X86_ASM
R_DrawColumn = R_DrawColumnP_ASM;
R_DrawColumnHoriz = R_DrawColumnHorizP_ASM;
R_DrawFuzzColumn = R_DrawFuzzColumnP_ASM;
R_DrawTranslatedColumn = R_DrawTranslatedColumnP_C;
R_DrawShadedColumn = R_DrawShadedColumnP_C;
R_DrawSpan = R_DrawSpanP_ASM;
R_DrawSpanMasked = R_DrawSpanMaskedP_ASM;
if (CPU.Family <= 5)
{
rt_map4cols = rt_map4cols_asm2;
}
else
{
rt_map4cols = rt_map4cols_asm1;
}
#else
R_DrawColumnHoriz = R_DrawColumnHorizP_C;
R_DrawColumn = R_DrawColumnP_C;
R_DrawFuzzColumn = R_DrawFuzzColumnP_C;
R_DrawTranslatedColumn = R_DrawTranslatedColumnP_C;
R_DrawShadedColumn = R_DrawShadedColumnP_C;
R_DrawSpan = R_DrawSpanP_C;
R_DrawSpanMasked = R_DrawSpanMaskedP_C;
rt_map4cols = rt_map4cols_c;
R_DrawColumnHoriz = R_DrawColumnHorizP_C;
R_DrawColumn = R_DrawColumnP_C;
R_DrawFuzzColumn = R_DrawFuzzColumnP_C;
R_DrawTranslatedColumn = R_DrawTranslatedColumnP_C;
R_DrawShadedColumn = R_DrawShadedColumnP_C;
R_DrawSpan = R_DrawSpanP_C;
R_DrawSpanMasked = R_DrawSpanMaskedP_C;
rt_map4cols = rt_map4cols_c;
#endif
R_DrawSpanTranslucent = R_DrawSpanTranslucentP_C;
R_DrawSpanMaskedTranslucent = R_DrawSpanMaskedTranslucentP_C;
R_DrawSpanAddClamp = R_DrawSpanAddClampP_C;
R_DrawSpanMaskedAddClamp = R_DrawSpanMaskedAddClampP_C;
R_DrawSpanTranslucent = R_DrawSpanTranslucentP_C;
R_DrawSpanMaskedTranslucent = R_DrawSpanMaskedTranslucentP_C;
R_DrawSpanAddClamp = R_DrawSpanAddClampP_C;
R_DrawSpanMaskedAddClamp = R_DrawSpanMaskedAddClampP_C;
R_FillColumn = R_FillColumnP_C;
R_FillAddColumn = R_FillAddColumn_C;
R_FillAddClampColumn = R_FillAddClampColumn_C;
R_FillSubClampColumn = R_FillSubClampColumn_C;
R_FillRevSubClampColumn = R_FillRevSubClampColumn_C;
R_DrawAddColumn = R_DrawAddColumnP_C;
R_DrawTlatedAddColumn = R_DrawTlatedAddColumnP_C;
R_DrawAddClampColumn = R_DrawAddClampColumnP_C;
R_DrawAddClampTranslatedColumn = R_DrawAddClampTranslatedColumnP_C;
R_DrawSubClampColumn = R_DrawSubClampColumnP_C;
R_DrawSubClampTranslatedColumn = R_DrawSubClampTranslatedColumnP_C;
R_DrawRevSubClampColumn = R_DrawRevSubClampColumnP_C;
R_DrawRevSubClampTranslatedColumn = R_DrawRevSubClampTranslatedColumnP_C;
R_FillSpan = R_FillSpan_C;
R_DrawFogBoundary = R_DrawFogBoundary_C;
R_FillColumnHoriz = R_FillColumnHorizP_C;
R_DrawFogBoundary = R_DrawFogBoundary_C;
R_MapTiltedPlane = R_MapTiltedPlane_C;
R_MapColoredPlane = R_MapColoredPlane_C;
R_DrawParticle = R_DrawParticle_C;
#ifdef X86_ASM
R_SetupDrawSlab = [](FSWColormap *colormap, float light, int shade) { R_SetupDrawSlabA(colormap->Maps + (GETPALOOKUP(light, shade) << COLORMAPSHIFT)); };
R_DrawSlab = R_DrawSlabA;
#else
R_SetupDrawSlab = [](FSWColormap *colormap, float light, int shade) { R_SetupDrawSlabC(colormap->Maps + (GETPALOOKUP(light, shade) << COLORMAPSHIFT)); };
R_DrawSlab = R_DrawSlabC;
#endif
tmvline1_add = tmvline1_add_C;
tmvline4_add = tmvline4_add_C;
tmvline1_addclamp = tmvline1_addclamp_C;
tmvline4_addclamp = tmvline4_addclamp_C;
tmvline1_subclamp = tmvline1_subclamp_C;
tmvline4_subclamp = tmvline4_subclamp_C;
tmvline1_revsubclamp = tmvline1_revsubclamp_C;
tmvline4_revsubclamp = tmvline4_revsubclamp_C;
#ifdef X86_ASM
rt_copy1col = rt_copy1col_asm;
rt_copy4cols = rt_copy4cols_asm;
rt_map1col = rt_map1col_asm;
rt_shaded4cols = rt_shaded4cols_asm;
rt_add4cols = rt_add4cols_asm;
rt_addclamp4cols = rt_addclamp4cols_asm;
#else
rt_copy1col = rt_copy1col_c;
rt_copy4cols = rt_copy4cols_c;
rt_map1col = rt_map1col_c;
rt_shaded4cols = rt_shaded4cols_c;
rt_add4cols = rt_add4cols_c;
rt_addclamp4cols = rt_addclamp4cols_c;
#endif
rt_shaded1col = rt_shaded1col_c;
rt_add1col = rt_add1col_c;
rt_addclamp1col = rt_addclamp1col_c;
rt_subclamp1col = rt_subclamp1col_c;
rt_revsubclamp1col = rt_revsubclamp1col_c;
rt_tlate1col = rt_tlate1col_c;
rt_tlateadd1col = rt_tlateadd1col_c;
rt_tlateaddclamp1col = rt_tlateaddclamp1col_c;
rt_tlatesubclamp1col = rt_tlatesubclamp1col_c;
rt_tlaterevsubclamp1col = rt_tlaterevsubclamp1col_c;
rt_subclamp4cols = rt_subclamp4cols_c;
rt_revsubclamp4cols = rt_revsubclamp4cols_c;
rt_tlate4cols = rt_tlate4cols_c;
rt_tlateadd4cols = rt_tlateadd4cols_c;
rt_tlateaddclamp4cols = rt_tlateaddclamp4cols_c;
rt_tlatesubclamp4cols = rt_tlatesubclamp4cols_c;
rt_tlaterevsubclamp4cols = rt_tlaterevsubclamp4cols_c;
rt_initcols = rt_initcols_pal;
rt_span_coverage = rt_span_coverage_pal;
if (pointers_saved)
{
pointers_saved = false;
dovline1 = dovline1_saved;
doprevline1 = doprevline1_saved;
domvline1 = domvline1_saved;
dovline4 = dovline4_saved;
domvline4 = domvline4_saved;
}
}
colfunc = basecolfunc = R_DrawColumn;
fuzzcolfunc = R_DrawFuzzColumn;
transcolfunc = R_DrawTranslatedColumn;
spanfunc = R_DrawSpan;
// [RH] Horizontal column drawers
hcolfunc_pre = R_DrawColumnHoriz;
hcolfunc_post1 = rt_map1col;
hcolfunc_post4 = rt_map4cols;
}
// [RH] Choose column drawers in a single place
@ -2213,7 +2501,7 @@ static bool R_SetBlendFunc (int op, fixed_t fglevel, fixed_t bglevel, int flags)
{
if (flags & STYLEF_ColorIsFixed)
{
colfunc = R_FillColumnP;
colfunc = R_FillColumn;
hcolfunc_post1 = rt_copy1col;
hcolfunc_post4 = rt_copy4cols;
}
@ -2235,16 +2523,22 @@ static bool R_SetBlendFunc (int op, fixed_t fglevel, fixed_t bglevel, int flags)
{
dc_srcblend = Col2RGB8_Inverse[fglevel>>10];
dc_destblend = Col2RGB8_LessPrecision[bglevel>>10];
dc_srcalpha = fglevel;
dc_destalpha = bglevel;
}
else if (op == STYLEOP_Add && fglevel + bglevel <= FRACUNIT)
{
dc_srcblend = Col2RGB8[fglevel>>10];
dc_destblend = Col2RGB8[bglevel>>10];
dc_srcalpha = fglevel;
dc_destalpha = bglevel;
}
else
{
dc_srcblend = Col2RGB8_LessPrecision[fglevel>>10];
dc_destblend = Col2RGB8_LessPrecision[bglevel>>10];
dc_srcalpha = fglevel;
dc_destalpha = bglevel;
}
switch (op)
{
@ -2263,13 +2557,13 @@ static bool R_SetBlendFunc (int op, fixed_t fglevel, fixed_t bglevel, int flags)
}
else if (dc_translation == NULL)
{
colfunc = R_DrawAddColumnP_C;
colfunc = R_DrawAddColumn;
hcolfunc_post1 = rt_add1col;
hcolfunc_post4 = rt_add4cols;
}
else
{
colfunc = R_DrawTlatedAddColumnP_C;
colfunc = R_DrawTlatedAddColumn;
hcolfunc_post1 = rt_tlateadd1col;
hcolfunc_post4 = rt_tlateadd4cols;
}
@ -2284,13 +2578,13 @@ static bool R_SetBlendFunc (int op, fixed_t fglevel, fixed_t bglevel, int flags)
}
else if (dc_translation == NULL)
{
colfunc = R_DrawAddClampColumnP_C;
colfunc = R_DrawAddClampColumn;
hcolfunc_post1 = rt_addclamp1col;
hcolfunc_post4 = rt_addclamp4cols;
}
else
{
colfunc = R_DrawAddClampTranslatedColumnP_C;
colfunc = R_DrawAddClampTranslatedColumn;
hcolfunc_post1 = rt_tlateaddclamp1col;
hcolfunc_post4 = rt_tlateaddclamp4cols;
}
@ -2306,13 +2600,13 @@ static bool R_SetBlendFunc (int op, fixed_t fglevel, fixed_t bglevel, int flags)
}
else if (dc_translation == NULL)
{
colfunc = R_DrawSubClampColumnP_C;
colfunc = R_DrawSubClampColumn;
hcolfunc_post1 = rt_subclamp1col;
hcolfunc_post4 = rt_subclamp4cols;
}
else
{
colfunc = R_DrawSubClampTranslatedColumnP_C;
colfunc = R_DrawSubClampTranslatedColumn;
hcolfunc_post1 = rt_tlatesubclamp1col;
hcolfunc_post4 = rt_tlatesubclamp4cols;
}
@ -2331,13 +2625,13 @@ static bool R_SetBlendFunc (int op, fixed_t fglevel, fixed_t bglevel, int flags)
}
else if (dc_translation == NULL)
{
colfunc = R_DrawRevSubClampColumnP_C;
colfunc = R_DrawRevSubClampColumn;
hcolfunc_post1 = rt_revsubclamp1col;
hcolfunc_post4 = rt_revsubclamp4cols;
}
else
{
colfunc = R_DrawRevSubClampTranslatedColumnP_C;
colfunc = R_DrawRevSubClampTranslatedColumn;
hcolfunc_post1 = rt_tlaterevsubclamp1col;
hcolfunc_post4 = rt_tlaterevsubclamp4cols;
}
@ -2412,11 +2706,15 @@ ESPSResult R_SetPatchStyle (FRenderStyle style, fixed_t alpha, int translation,
colfunc = R_DrawShadedColumn;
hcolfunc_post1 = rt_shaded1col;
hcolfunc_post4 = rt_shaded4cols;
dc_color = fixedcolormap ? fixedcolormap[APART(color)] : basecolormap->Maps[APART(color)];
dc_colormap = (basecolormap = &ShadeFakeColormap[16-alpha])->Maps;
dc_color = fixedcolormap ? fixedcolormap->Maps[APART(color)] : basecolormap->Maps[APART(color)];
basecolormap = &ShadeFakeColormap[16-alpha];
if (fixedlightlev >= 0 && fixedcolormap == NULL)
{
dc_colormap += fixedlightlev;
R_SetColorMapLight(basecolormap, 0, FIXEDLIGHT2SHADE(fixedlightlev));
}
else
{
R_SetColorMapLight(basecolormap, 0, 0);
}
return r_columnmethod ? DoDraw1 : DoDraw0;
}
@ -2426,10 +2724,10 @@ ESPSResult R_SetPatchStyle (FRenderStyle style, fixed_t alpha, int translation,
if (style.Flags & STYLEF_ColorIsFixed)
{
int x = fglevel >> 10;
int r = RPART(color);
int g = GPART(color);
int b = BPART(color);
uint32_t x = fglevel >> 10;
uint32_t r = RPART(color);
uint32_t g = GPART(color);
uint32_t b = BPART(color);
// dc_color is used by the rt_* routines. It is indexed into dc_srcblend.
dc_color = RGB32k.RGB[r>>3][g>>3][b>>3];
if (style.Flags & STYLEF_InvertSource)
@ -2438,11 +2736,13 @@ ESPSResult R_SetPatchStyle (FRenderStyle style, fixed_t alpha, int translation,
g = 255 - g;
b = 255 - b;
}
uint32_t alpha = clamp(fglevel >> (FRACBITS - 8), 0, 255);
dc_srccolor_bgra = (alpha << 24) | (r << 16) | (g << 8) | b;
// dc_srccolor is used by the R_Fill* routines. It is premultiplied
// with the alpha.
dc_srccolor = ((((r*x)>>4)<<20) | ((g*x)>>4) | ((((b)*x)>>4)<<10)) & 0x3feffbff;
hcolfunc_pre = R_FillColumnHorizP;
dc_colormap = identitymap;
hcolfunc_pre = R_FillColumnHoriz;
R_SetColorMapLight(&identitycolormap, 0, 0);
}
if (!R_SetBlendFunc (style.BlendOp, fglevel, bglevel, style.Flags))
@ -2459,25 +2759,25 @@ void R_FinishSetPatchStyle ()
bool R_GetTransMaskDrawers (fixed_t (**tmvline1)(), void (**tmvline4)())
{
if (colfunc == R_DrawAddColumnP_C)
if (colfunc == R_DrawAddColumn)
{
*tmvline1 = tmvline1_add;
*tmvline4 = tmvline4_add;
return true;
}
if (colfunc == R_DrawAddClampColumnP_C)
if (colfunc == R_DrawAddClampColumn)
{
*tmvline1 = tmvline1_addclamp;
*tmvline4 = tmvline4_addclamp;
return true;
}
if (colfunc == R_DrawSubClampColumnP_C)
if (colfunc == R_DrawSubClampColumn)
{
*tmvline1 = tmvline1_subclamp;
*tmvline4 = tmvline4_subclamp;
return true;
}
if (colfunc == R_DrawRevSubClampColumnP_C)
if (colfunc == R_DrawRevSubClampColumn)
{
*tmvline1 = tmvline1_revsubclamp;
*tmvline4 = tmvline4_revsubclamp;
@ -2486,3 +2786,70 @@ bool R_GetTransMaskDrawers (fixed_t (**tmvline1)(), void (**tmvline4)())
return false;
}
void R_SetTranslationMap(lighttable_t *translation)
{
dc_fcolormap = nullptr;
dc_colormap = translation;
if (r_swtruecolor)
{
dc_shade_constants.light_red = 256;
dc_shade_constants.light_green = 256;
dc_shade_constants.light_blue = 256;
dc_shade_constants.light_alpha = 256;
dc_shade_constants.fade_red = 0;
dc_shade_constants.fade_green = 0;
dc_shade_constants.fade_blue = 0;
dc_shade_constants.fade_alpha = 256;
dc_shade_constants.desaturate = 0;
dc_shade_constants.simple_shade = true;
dc_light = 0;
}
}
void R_SetColorMapLight(FSWColormap *base_colormap, float light, int shade)
{
dc_fcolormap = base_colormap;
if (r_swtruecolor)
{
dc_shade_constants.light_red = dc_fcolormap->Color.r * 256 / 255;
dc_shade_constants.light_green = dc_fcolormap->Color.g * 256 / 255;
dc_shade_constants.light_blue = dc_fcolormap->Color.b * 256 / 255;
dc_shade_constants.light_alpha = dc_fcolormap->Color.a * 256 / 255;
dc_shade_constants.fade_red = dc_fcolormap->Fade.r;
dc_shade_constants.fade_green = dc_fcolormap->Fade.g;
dc_shade_constants.fade_blue = dc_fcolormap->Fade.b;
dc_shade_constants.fade_alpha = dc_fcolormap->Fade.a;
dc_shade_constants.desaturate = MIN(abs(dc_fcolormap->Desaturate), 255) * 255 / 256;
dc_shade_constants.simple_shade = (dc_fcolormap->Color.d == 0x00ffffff && dc_fcolormap->Fade.d == 0x00000000 && dc_fcolormap->Desaturate == 0);
dc_colormap = base_colormap->Maps;
dc_light = LIGHTSCALE(light, shade);
}
else
{
dc_colormap = base_colormap->Maps + (GETPALOOKUP(light, shade) << COLORMAPSHIFT);
}
}
void R_SetDSColorMapLight(FSWColormap *base_colormap, float light, int shade)
{
ds_fcolormap = base_colormap;
if (r_swtruecolor)
{
ds_shade_constants.light_red = ds_fcolormap->Color.r * 256 / 255;
ds_shade_constants.light_green = ds_fcolormap->Color.g * 256 / 255;
ds_shade_constants.light_blue = ds_fcolormap->Color.b * 256 / 255;
ds_shade_constants.light_alpha = ds_fcolormap->Color.a * 256 / 255;
ds_shade_constants.fade_red = ds_fcolormap->Fade.r;
ds_shade_constants.fade_green = ds_fcolormap->Fade.g;
ds_shade_constants.fade_blue = ds_fcolormap->Fade.b;
ds_shade_constants.fade_alpha = ds_fcolormap->Fade.a;
ds_shade_constants.desaturate = MIN(abs(ds_fcolormap->Desaturate), 255) * 255 / 256;
ds_shade_constants.simple_shade = (ds_fcolormap->Color.d == 0x00ffffff && ds_fcolormap->Fade.d == 0x00000000 && ds_fcolormap->Desaturate == 0);
ds_colormap = base_colormap->Maps;
ds_light = LIGHTSCALE(light, shade);
}
else
{
ds_colormap = base_colormap->Maps + (GETPALOOKUP(light, shade) << COLORMAPSHIFT);
}
}

View File

@ -25,24 +25,55 @@
#include "r_defs.h"
// Spectre/Invisibility.
#define FUZZTABLE 50
extern "C" int fuzzoffset[FUZZTABLE + 1]; // [RH] +1 for the assembly routine
extern "C" int fuzzpos;
extern "C" int fuzzviewheight;
struct FSWColormap;
struct ShadeConstants
{
uint16_t light_alpha;
uint16_t light_red;
uint16_t light_green;
uint16_t light_blue;
uint16_t fade_alpha;
uint16_t fade_red;
uint16_t fade_green;
uint16_t fade_blue;
uint16_t desaturate;
bool simple_shade;
};
extern "C" int ylookup[MAXHEIGHT];
extern "C" int dc_pitch; // [RH] Distance between rows
extern "C" lighttable_t*dc_colormap;
extern "C" FSWColormap *dc_fcolormap;
extern "C" ShadeConstants dc_shade_constants;
extern "C" fixed_t dc_light;
extern "C" int dc_x;
extern "C" int dc_yl;
extern "C" int dc_yh;
extern "C" fixed_t dc_iscale;
extern double dc_texturemid;
extern "C" fixed_t dc_texturefrac;
extern "C" uint32_t dc_textureheight;
extern "C" int dc_color; // [RH] For flat colors (no texturing)
extern "C" DWORD dc_srccolor;
extern "C" uint32_t dc_srccolor_bgra;
extern "C" DWORD *dc_srcblend;
extern "C" DWORD *dc_destblend;
extern "C" fixed_t dc_srcalpha;
extern "C" fixed_t dc_destalpha;
// first pixel in a column
extern "C" const BYTE* dc_source;
extern "C" const BYTE* dc_source2;
extern "C" uint32_t dc_texturefracx;
extern "C" BYTE *dc_dest, *dc_destorg;
extern "C" int dc_count;
@ -50,7 +81,11 @@ extern "C" int dc_count;
extern "C" DWORD vplce[4];
extern "C" DWORD vince[4];
extern "C" BYTE* palookupoffse[4];
extern "C" fixed_t palookuplight[4];
extern "C" const BYTE* bufplce[4];
extern "C" const BYTE* bufplce2[4];
extern "C" uint32_t buftexturefracx[4];
extern "C" uint32_t bufheight[4];
// [RH] Temporary buffer for column drawing
extern "C" BYTE *dc_temp;
@ -58,7 +93,6 @@ extern "C" unsigned int dc_tspans[4][MAXHEIGHT];
extern "C" unsigned int *dc_ctspan[4];
extern "C" unsigned int horizspans[4];
// [RH] Pointers to the different column and span drawers...
// The span blitting interface.
@ -67,12 +101,7 @@ extern void (*R_DrawColumn)(void);
extern DWORD (*dovline1) ();
extern DWORD (*doprevline1) ();
#ifdef X64_ASM
#define dovline4 vlinetallasm4
extern "C" void vlinetallasm4();
#else
extern void (*dovline4) ();
#endif
extern void setupvline (int);
extern DWORD (*domvline1) ();
@ -94,8 +123,8 @@ extern void (*R_DrawTranslatedColumn)(void);
// Span drawing for rows, floor/ceiling. No Spectre effect needed.
extern void (*R_DrawSpan)(void);
void R_SetupSpanBits(FTexture *tex);
void R_SetSpanColormap(BYTE *colormap);
void R_SetSpanSource(const BYTE *pixels);
void R_SetSpanColormap(FDynamicColormap *colormap, int shade);
void R_SetSpanSource(FTexture *tex);
// Span drawing for masked textures.
extern void (*R_DrawSpanMasked)(void);
@ -125,33 +154,33 @@ extern "C"
void rt_copy1col_c (int hx, int sx, int yl, int yh);
void rt_copy4cols_c (int sx, int yl, int yh);
void rt_shaded1col (int hx, int sx, int yl, int yh);
void rt_shaded1col_c (int hx, int sx, int yl, int yh);
void rt_shaded4cols_c (int sx, int yl, int yh);
void rt_shaded4cols_asm (int sx, int yl, int yh);
void rt_map1col_c (int hx, int sx, int yl, int yh);
void rt_add1col (int hx, int sx, int yl, int yh);
void rt_addclamp1col (int hx, int sx, int yl, int yh);
void rt_subclamp1col (int hx, int sx, int yl, int yh);
void rt_revsubclamp1col (int hx, int sx, int yl, int yh);
void rt_add1col_c (int hx, int sx, int yl, int yh);
void rt_addclamp1col_c (int hx, int sx, int yl, int yh);
void rt_subclamp1col_c (int hx, int sx, int yl, int yh);
void rt_revsubclamp1col_c (int hx, int sx, int yl, int yh);
void rt_tlate1col (int hx, int sx, int yl, int yh);
void rt_tlateadd1col (int hx, int sx, int yl, int yh);
void rt_tlateaddclamp1col (int hx, int sx, int yl, int yh);
void rt_tlatesubclamp1col (int hx, int sx, int yl, int yh);
void rt_tlaterevsubclamp1col (int hx, int sx, int yl, int yh);
void rt_tlate1col_c (int hx, int sx, int yl, int yh);
void rt_tlateadd1col_c (int hx, int sx, int yl, int yh);
void rt_tlateaddclamp1col_c (int hx, int sx, int yl, int yh);
void rt_tlatesubclamp1col_c (int hx, int sx, int yl, int yh);
void rt_tlaterevsubclamp1col_c (int hx, int sx, int yl, int yh);
void rt_map4cols_c (int sx, int yl, int yh);
void rt_add4cols_c (int sx, int yl, int yh);
void rt_addclamp4cols_c (int sx, int yl, int yh);
void rt_subclamp4cols (int sx, int yl, int yh);
void rt_revsubclamp4cols (int sx, int yl, int yh);
void rt_subclamp4cols_c (int sx, int yl, int yh);
void rt_revsubclamp4cols_c (int sx, int yl, int yh);
void rt_tlate4cols (int sx, int yl, int yh);
void rt_tlateadd4cols (int sx, int yl, int yh);
void rt_tlateaddclamp4cols (int sx, int yl, int yh);
void rt_tlatesubclamp4cols (int sx, int yl, int yh);
void rt_tlaterevsubclamp4cols (int sx, int yl, int yh);
void rt_tlate4cols_c (int sx, int yl, int yh);
void rt_tlateadd4cols_c (int sx, int yl, int yh);
void rt_tlateaddclamp4cols_c (int sx, int yl, int yh);
void rt_tlatesubclamp4cols_c (int sx, int yl, int yh);
void rt_tlaterevsubclamp4cols_c (int sx, int yl, int yh);
void rt_copy1col_asm (int hx, int sx, int yl, int yh);
void rt_map1col_asm (int hx, int sx, int yl, int yh);
@ -163,30 +192,49 @@ void rt_add4cols_asm (int sx, int yl, int yh);
void rt_addclamp4cols_asm (int sx, int yl, int yh);
}
extern void (*rt_map4cols)(int sx, int yl, int yh);
extern void (*rt_copy1col)(int hx, int sx, int yl, int yh);
extern void (*rt_copy4cols)(int sx, int yl, int yh);
#ifdef X86_ASM
#define rt_copy1col rt_copy1col_asm
#define rt_copy4cols rt_copy4cols_asm
#define rt_map1col rt_map1col_asm
#define rt_shaded4cols rt_shaded4cols_asm
#define rt_add4cols rt_add4cols_asm
#define rt_addclamp4cols rt_addclamp4cols_asm
#else
#define rt_copy1col rt_copy1col_c
#define rt_copy4cols rt_copy4cols_c
#define rt_map1col rt_map1col_c
#define rt_shaded4cols rt_shaded4cols_c
#define rt_add4cols rt_add4cols_c
#define rt_addclamp4cols rt_addclamp4cols_c
#endif
extern void (*rt_shaded1col)(int hx, int sx, int yl, int yh);
extern void (*rt_shaded4cols)(int sx, int yl, int yh);
extern void (*rt_map1col)(int hx, int sx, int yl, int yh);
extern void (*rt_add1col)(int hx, int sx, int yl, int yh);
extern void (*rt_addclamp1col)(int hx, int sx, int yl, int yh);
extern void (*rt_subclamp1col)(int hx, int sx, int yl, int yh);
extern void (*rt_revsubclamp1col)(int hx, int sx, int yl, int yh);
extern void (*rt_tlate1col)(int hx, int sx, int yl, int yh);
extern void (*rt_tlateadd1col)(int hx, int sx, int yl, int yh);
extern void (*rt_tlateaddclamp1col)(int hx, int sx, int yl, int yh);
extern void (*rt_tlatesubclamp1col)(int hx, int sx, int yl, int yh);
extern void (*rt_tlaterevsubclamp1col)(int hx, int sx, int yl, int yh);
extern void (*rt_map4cols)(int sx, int yl, int yh);
extern void (*rt_add4cols)(int sx, int yl, int yh);
extern void (*rt_addclamp4cols)(int sx, int yl, int yh);
extern void (*rt_subclamp4cols)(int sx, int yl, int yh);
extern void (*rt_revsubclamp4cols)(int sx, int yl, int yh);
extern void (*rt_tlate4cols)(int sx, int yl, int yh);
extern void (*rt_tlateadd4cols)(int sx, int yl, int yh);
extern void (*rt_tlateaddclamp4cols)(int sx, int yl, int yh);
extern void (*rt_tlatesubclamp4cols)(int sx, int yl, int yh);
extern void (*rt_tlaterevsubclamp4cols)(int sx, int yl, int yh);
extern void (*rt_initcols)(BYTE *buffer);
extern void (*rt_span_coverage)(int x, int start, int stop);
void rt_draw4cols (int sx);
// [RH] Preps the temporary horizontal buffer.
void rt_initcols (BYTE *buffer=NULL);
void rt_initcols_pal (BYTE *buffer);
void R_DrawFogBoundary (int x1, int x2, short *uclip, short *dclip);
void rt_span_coverage_pal(int x, int start, int stop);
extern void (*R_DrawFogBoundary)(int x1, int x2, short *uclip, short *dclip);
void R_DrawFogBoundary_C (int x1, int x2, short *uclip, short *dclip);
#ifdef X86_ASM
@ -218,26 +266,47 @@ void R_DrawSpanMaskedTranslucentP_C (void);
void R_DrawTlatedLucentColumnP_C (void);
#define R_DrawTlatedLucentColumn R_DrawTlatedLucentColumnP_C
void R_FillColumnP (void);
void R_FillColumnHorizP (void);
void R_FillSpan (void);
extern void(*R_FillColumn)(void);
extern void(*R_FillAddColumn)(void);
extern void(*R_FillAddClampColumn)(void);
extern void(*R_FillSubClampColumn)(void);
extern void(*R_FillRevSubClampColumn)(void);
extern void(*R_DrawAddColumn)(void);
extern void(*R_DrawTlatedAddColumn)(void);
extern void(*R_DrawAddClampColumn)(void);
extern void(*R_DrawAddClampTranslatedColumn)(void);
extern void(*R_DrawSubClampColumn)(void);
extern void(*R_DrawSubClampTranslatedColumn)(void);
extern void(*R_DrawRevSubClampColumn)(void);
extern void(*R_DrawRevSubClampTranslatedColumn)(void);
extern void(*R_FillSpan)(void);
extern void(*R_FillColumnHoriz)(void);
void R_FillColumnP_C (void);
void R_FillColumnHorizP_C (void);
void R_FillSpan_C (void);
extern void(*R_SetupDrawSlab)(FSWColormap *base_colormap, float light, int shade);
extern void(*R_DrawSlab)(int dx, fixed_t v, int dy, fixed_t vi, const BYTE *vptr, BYTE *p);
#ifdef X86_ASM
#define R_SetupDrawSlab R_SetupDrawSlabA
#define R_DrawSlab R_DrawSlabA
extern "C" void R_SetupDrawSlabA(const BYTE *colormap);
extern "C" void R_DrawSlabA(int dx, fixed_t v, int dy, fixed_t vi, const BYTE *vptr, BYTE *p);
#else
#define R_SetupDrawSlab R_SetupDrawSlabC
#define R_DrawSlab R_DrawSlabC
extern "C" void R_SetupDrawSlabC(const BYTE *colormap);
extern "C" void R_DrawSlabC(int dx, fixed_t v, int dy, fixed_t vi, const BYTE *vptr, BYTE *p);
#endif
extern "C" void R_SetupDrawSlab(const BYTE *colormap);
extern "C" void R_DrawSlab(int dx, fixed_t v, int dy, fixed_t vi, const BYTE *vptr, BYTE *p);
extern "C" int ds_y;
extern "C" int ds_x1;
extern "C" int ds_x2;
extern "C" FSWColormap* ds_fcolormap;
extern "C" lighttable_t* ds_colormap;
extern "C" ShadeConstants ds_shade_constants;
extern "C" dsfixed_t ds_light;
extern "C" dsfixed_t ds_xfrac;
extern "C" dsfixed_t ds_yfrac;
@ -249,12 +318,14 @@ extern "C" fixed_t ds_alpha;
// start of a 64*64 tile image
extern "C" const BYTE* ds_source;
extern "C" bool ds_source_mipmapped;
extern "C" int ds_color; // [RH] For flat color (no texturing)
extern BYTE shadetables[/*NUMCOLORMAPS*16*256*/];
extern FDynamicColormap ShadeFakeColormap[16];
extern BYTE identitymap[256];
extern FDynamicColormap identitycolormap;
extern BYTE *dc_translation;
// [RH] Added for muliresolution support
@ -278,6 +349,15 @@ inline ESPSResult R_SetPatchStyle(FRenderStyle style, float alpha, int translati
// style was STYLE_Shade
void R_FinishSetPatchStyle ();
extern fixed_t(*tmvline1_add)();
extern void(*tmvline4_add)();
extern fixed_t(*tmvline1_addclamp)();
extern void(*tmvline4_addclamp)();
extern fixed_t(*tmvline1_subclamp)();
extern void(*tmvline4_subclamp)();
extern fixed_t(*tmvline1_revsubclamp)();
extern void(*tmvline4_revsubclamp)();
// transmaskwallscan calls this to find out what column drawers to use
bool R_GetTransMaskDrawers (fixed_t (**tmvline1)(), void (**tmvline4)());
@ -293,4 +373,19 @@ void maskwallscan (int x1, int x2, short *uwal, short *dwal, float *swal, fixed_
// transmaskwallscan is like maskwallscan, but it can also blend to the background
void transmaskwallscan (int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat, const BYTE *(*getcol)(FTexture *tex, int col)=R_GetColumn);
// Sets dc_colormap and dc_light to their appropriate values depending on the output format (pal vs true color)
void R_SetColorMapLight(FSWColormap *base_colormap, float light, int shade);
// Same as R_SetColorMapLight, but for ds_colormap and ds_light
void R_SetDSColorMapLight(FSWColormap *base_colormap, float light, int shade);
void R_SetTranslationMap(lighttable_t *translation);
extern bool r_swtruecolor;
EXTERN_CVAR(Bool, r_multithreaded);
EXTERN_CVAR(Bool, r_magfilter);
EXTERN_CVAR(Bool, r_minfilter);
EXTERN_CVAR(Bool, r_mipmap);
#endif

2969
src/r_draw_rgba.cpp Normal file

File diff suppressed because it is too large Load Diff

994
src/r_draw_rgba.h Normal file
View File

@ -0,0 +1,994 @@
// Emacs style mode select -*- C++ -*-
//-----------------------------------------------------------------------------
//
// $Id:$
//
// Copyright (C) 1993-1996 by id Software, Inc.
//
// This source is available for distribution and/or modification
// only under the terms of the DOOM Source Code License as
// published by id Software. All rights reserved.
//
// The source is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// FITNESS FOR A PARTICULAR PURPOSE. See the DOOM Source Code License
// for more details.
//
// DESCRIPTION:
// System specific interface stuff.
//
//-----------------------------------------------------------------------------
#ifndef __R_DRAW_RGBA__
#define __R_DRAW_RGBA__
#include "r_draw.h"
#include "v_palette.h"
#include <vector>
#include <memory>
#include <thread>
#include <mutex>
#include <condition_variable>
#ifndef NO_SSE
#include <immintrin.h>
#endif
/////////////////////////////////////////////////////////////////////////////
// Drawer functions:
void rt_initcols_rgba(BYTE *buffer);
void rt_span_coverage_rgba(int x, int start, int stop);
void rt_copy1col_rgba(int hx, int sx, int yl, int yh);
void rt_copy4cols_rgba(int sx, int yl, int yh);
void rt_shaded1col_rgba(int hx, int sx, int yl, int yh);
void rt_shaded4cols_rgba(int sx, int yl, int yh);
void rt_map1col_rgba(int hx, int sx, int yl, int yh);
void rt_add1col_rgba(int hx, int sx, int yl, int yh);
void rt_addclamp1col_rgba(int hx, int sx, int yl, int yh);
void rt_subclamp1col_rgba(int hx, int sx, int yl, int yh);
void rt_revsubclamp1col_rgba(int hx, int sx, int yl, int yh);
void rt_tlate1col_rgba(int hx, int sx, int yl, int yh);
void rt_tlateadd1col_rgba(int hx, int sx, int yl, int yh);
void rt_tlateaddclamp1col_rgba(int hx, int sx, int yl, int yh);
void rt_tlatesubclamp1col_rgba(int hx, int sx, int yl, int yh);
void rt_tlaterevsubclamp1col_rgba(int hx, int sx, int yl, int yh);
void rt_map4cols_rgba(int sx, int yl, int yh);
void rt_add4cols_rgba(int sx, int yl, int yh);
void rt_addclamp4cols_rgba(int sx, int yl, int yh);
void rt_subclamp4cols_rgba(int sx, int yl, int yh);
void rt_revsubclamp4cols_rgba(int sx, int yl, int yh);
void rt_tlate4cols_rgba(int sx, int yl, int yh);
void rt_tlateadd4cols_rgba(int sx, int yl, int yh);
void rt_tlateaddclamp4cols_rgba(int sx, int yl, int yh);
void rt_tlatesubclamp4cols_rgba(int sx, int yl, int yh);
void rt_tlaterevsubclamp4cols_rgba(int sx, int yl, int yh);
void R_DrawColumnHoriz_rgba();
void R_DrawColumn_rgba();
void R_DrawFuzzColumn_rgba();
void R_DrawTranslatedColumn_rgba();
void R_DrawShadedColumn_rgba();
void R_FillColumn_rgba();
void R_FillAddColumn_rgba();
void R_FillAddClampColumn_rgba();
void R_FillSubClampColumn_rgba();
void R_FillRevSubClampColumn_rgba();
void R_DrawAddColumn_rgba();
void R_DrawTlatedAddColumn_rgba();
void R_DrawAddClampColumn_rgba();
void R_DrawAddClampTranslatedColumn_rgba();
void R_DrawSubClampColumn_rgba();
void R_DrawSubClampTranslatedColumn_rgba();
void R_DrawRevSubClampColumn_rgba();
void R_DrawRevSubClampTranslatedColumn_rgba();
void R_DrawSpan_rgba(void);
void R_DrawSpanMasked_rgba(void);
void R_DrawSpanTranslucent_rgba();
void R_DrawSpanMaskedTranslucent_rgba();
void R_DrawSpanAddClamp_rgba();
void R_DrawSpanMaskedAddClamp_rgba();
void R_FillSpan_rgba();
void R_DrawTiltedSpan_rgba(int y, int x1, int x2, const FVector3 &plane_sz, const FVector3 &plane_su, const FVector3 &plane_sv, bool plane_shade, int planeshade, float planelightfloat, fixed_t pviewx, fixed_t pviewy);
void R_DrawColoredSpan_rgba(int y, int x1, int x2);
void R_SetupDrawSlab_rgba(FSWColormap *base_colormap, float light, int shade);
void R_DrawSlab_rgba(int dx, fixed_t v, int dy, fixed_t vi, const BYTE *vptr, BYTE *p);
void R_DrawFogBoundary_rgba(int x1, int x2, short *uclip, short *dclip);
DWORD vlinec1_rgba();
void vlinec4_rgba();
DWORD mvlinec1_rgba();
void mvlinec4_rgba();
fixed_t tmvline1_add_rgba();
void tmvline4_add_rgba();
fixed_t tmvline1_addclamp_rgba();
void tmvline4_addclamp_rgba();
fixed_t tmvline1_subclamp_rgba();
void tmvline4_subclamp_rgba();
fixed_t tmvline1_revsubclamp_rgba();
void tmvline4_revsubclamp_rgba();
void R_FillColumnHoriz_rgba();
void R_FillSpan_rgba();
/////////////////////////////////////////////////////////////////////////////
// Multithreaded rendering infrastructure:
// Redirect drawer commands to worker threads
void R_BeginDrawerCommands();
// Wait until all drawers finished executing
void R_EndDrawerCommands();
struct FSpecialColormap;
class DrawerCommandQueue;
// Worker data for each thread executing drawer commands
class DrawerThread
{
public:
std::thread thread;
// Thread line index of this thread
int core = 0;
// Number of active threads
int num_cores = 1;
// Range of rows processed this pass
int pass_start_y = 0;
int pass_end_y = MAXHEIGHT;
uint32_t dc_temp_rgbabuff_rgba[MAXHEIGHT * 4];
uint32_t *dc_temp_rgba;
// Checks if a line is rendered by this thread
bool line_skipped_by_thread(int line)
{
return line < pass_start_y || line >= pass_end_y || line % num_cores != core;
}
// The number of lines to skip to reach the first line to be rendered by this thread
int skipped_by_thread(int first_line)
{
int pass_skip = MAX(pass_start_y - first_line, 0);
int core_skip = (num_cores - (first_line + pass_skip - core) % num_cores) % num_cores;
return pass_skip + core_skip;
}
// The number of lines to be rendered by this thread
int count_for_thread(int first_line, int count)
{
int lines_until_pass_end = MAX(pass_end_y - first_line, 0);
count = MIN(count, lines_until_pass_end);
int c = (count - skipped_by_thread(first_line) + num_cores - 1) / num_cores;
return MAX(c, 0);
}
// Calculate the dest address for the first line to be rendered by this thread
uint32_t *dest_for_thread(int first_line, int pitch, uint32_t *dest)
{
return dest + skipped_by_thread(first_line) * pitch;
}
};
// Task to be executed by each worker thread
class DrawerCommand
{
protected:
int _dest_y;
public:
DrawerCommand()
{
_dest_y = static_cast<int>((dc_dest - dc_destorg) / (dc_pitch * 4));
}
virtual void Execute(DrawerThread *thread) = 0;
};
EXTERN_CVAR(Bool, r_multithreaded)
EXTERN_CVAR(Bool, r_mipmap)
// Manages queueing up commands and executing them on worker threads
class DrawerCommandQueue
{
enum { memorypool_size = 16 * 1024 * 1024 };
char memorypool[memorypool_size];
size_t memorypool_pos = 0;
std::vector<DrawerCommand *> commands;
std::vector<DrawerThread> threads;
std::mutex start_mutex;
std::condition_variable start_condition;
std::vector<DrawerCommand *> active_commands;
bool shutdown_flag = false;
int run_id = 0;
std::mutex end_mutex;
std::condition_variable end_condition;
size_t finished_threads = 0;
int threaded_render = 0;
DrawerThread single_core_thread;
int num_passes = 1;
int rows_in_pass = MAXHEIGHT;
void StartThreads();
void StopThreads();
void Finish();
static DrawerCommandQueue *Instance();
DrawerCommandQueue();
~DrawerCommandQueue();
public:
// Allocate memory valid for the duration of a command execution
static void* AllocMemory(size_t size);
// Queue command to be executed by drawer worker threads
template<typename T, typename... Types>
static void QueueCommand(Types &&... args)
{
auto queue = Instance();
if (queue->threaded_render == 0 || !r_multithreaded)
{
T command(std::forward<Types>(args)...);
command.Execute(&queue->single_core_thread);
}
else
{
void *ptr = AllocMemory(sizeof(T));
if (!ptr) // Out of memory - render what we got
{
queue->Finish();
ptr = AllocMemory(sizeof(T));
if (!ptr)
return;
}
T *command = new (ptr)T(std::forward<Types>(args)...);
queue->commands.push_back(command);
}
}
// Redirects all drawing commands to worker threads until End is called
// Begin/End blocks can be nested.
static void Begin();
// End redirection and wait until all worker threads finished executing
static void End();
// Waits until all worker threads finished executing
static void WaitForWorkers();
};
/////////////////////////////////////////////////////////////////////////////
// Drawer commands:
class ApplySpecialColormapRGBACommand : public DrawerCommand
{
BYTE *buffer;
int pitch;
int width;
int height;
int start_red;
int start_green;
int start_blue;
int end_red;
int end_green;
int end_blue;
public:
ApplySpecialColormapRGBACommand(FSpecialColormap *colormap, DFrameBuffer *screen);
void Execute(DrawerThread *thread) override;
};
template<typename CommandType, typename BlendMode>
class DrawerBlendCommand : public CommandType
{
public:
void Execute(DrawerThread *thread) override
{
typename CommandType::LoopIterator loop(this, thread);
if (!loop) return;
BlendMode blend(*this, loop);
do
{
blend.Blend(*this, loop);
} while (loop.next());
}
};
/////////////////////////////////////////////////////////////////////////////
// Pixel shading inline functions:
// Give the compiler a strong hint we want these functions inlined:
#ifndef FORCEINLINE
#if defined(_MSC_VER)
#define FORCEINLINE __forceinline
#elif defined(__GNUC__)
#define FORCEINLINE __attribute__((always_inline)) inline
#else
#define FORCEINLINE inline
#endif
#endif
// Promise compiler we have no aliasing of this pointer
#ifndef RESTRICT
#if defined(_MSC_VER)
#define RESTRICT __restrict
#elif defined(__GNUC__)
#define RESTRICT __restrict__
#else
#define RESTRICT
#endif
#endif
class LightBgra
{
public:
// calculates the light constant passed to the shade_pal_index function
FORCEINLINE static uint32_t calc_light_multiplier(dsfixed_t light)
{
return 256 - (light >> (FRACBITS - 8));
}
// Calculates a ARGB8 color for the given palette index and light multiplier
FORCEINLINE static uint32_t shade_pal_index_simple(uint32_t index, uint32_t light)
{
const PalEntry &color = GPalette.BaseColors[index];
uint32_t red = color.r;
uint32_t green = color.g;
uint32_t blue = color.b;
red = red * light / 256;
green = green * light / 256;
blue = blue * light / 256;
return 0xff000000 | (red << 16) | (green << 8) | blue;
}
// Calculates a ARGB8 color for the given palette index, light multiplier and dynamic colormap
FORCEINLINE static uint32_t shade_pal_index(uint32_t index, uint32_t light, const ShadeConstants &constants)
{
const PalEntry &color = GPalette.BaseColors[index];
uint32_t alpha = color.d & 0xff000000;
uint32_t red = color.r;
uint32_t green = color.g;
uint32_t blue = color.b;
if (constants.simple_shade)
{
red = red * light / 256;
green = green * light / 256;
blue = blue * light / 256;
}
else
{
uint32_t inv_light = 256 - light;
uint32_t inv_desaturate = 256 - constants.desaturate;
uint32_t intensity = ((red * 77 + green * 143 + blue * 37) >> 8) * constants.desaturate;
red = (red * inv_desaturate + intensity) / 256;
green = (green * inv_desaturate + intensity) / 256;
blue = (blue * inv_desaturate + intensity) / 256;
red = (constants.fade_red * inv_light + red * light) / 256;
green = (constants.fade_green * inv_light + green * light) / 256;
blue = (constants.fade_blue * inv_light + blue * light) / 256;
red = (red * constants.light_red) / 256;
green = (green * constants.light_green) / 256;
blue = (blue * constants.light_blue) / 256;
}
return alpha | (red << 16) | (green << 8) | blue;
}
FORCEINLINE static uint32_t shade_bgra_simple(uint32_t color, uint32_t light)
{
uint32_t red = RPART(color) * light / 256;
uint32_t green = GPART(color) * light / 256;
uint32_t blue = BPART(color) * light / 256;
return 0xff000000 | (red << 16) | (green << 8) | blue;
}
FORCEINLINE static uint32_t shade_bgra(uint32_t color, uint32_t light, const ShadeConstants &constants)
{
uint32_t alpha = color & 0xff000000;
uint32_t red = (color >> 16) & 0xff;
uint32_t green = (color >> 8) & 0xff;
uint32_t blue = color & 0xff;
if (constants.simple_shade)
{
red = red * light / 256;
green = green * light / 256;
blue = blue * light / 256;
}
else
{
uint32_t inv_light = 256 - light;
uint32_t inv_desaturate = 256 - constants.desaturate;
uint32_t intensity = ((red * 77 + green * 143 + blue * 37) >> 8) * constants.desaturate;
red = (red * inv_desaturate + intensity) / 256;
green = (green * inv_desaturate + intensity) / 256;
blue = (blue * inv_desaturate + intensity) / 256;
red = (constants.fade_red * inv_light + red * light) / 256;
green = (constants.fade_green * inv_light + green * light) / 256;
blue = (constants.fade_blue * inv_light + blue * light) / 256;
red = (red * constants.light_red) / 256;
green = (green * constants.light_green) / 256;
blue = (blue * constants.light_blue) / 256;
}
return alpha | (red << 16) | (green << 8) | blue;
}
};
class BlendBgra
{
public:
FORCEINLINE static uint32_t copy(uint32_t fg)
{
return fg;
}
FORCEINLINE static uint32_t add(uint32_t fg, uint32_t bg, uint32_t srcalpha, uint32_t destalpha)
{
uint32_t red = MIN<uint32_t>((RPART(fg) * srcalpha + RPART(bg) * destalpha) >> 8, 255);
uint32_t green = MIN<uint32_t>((GPART(fg) * srcalpha + GPART(bg) * destalpha) >> 8, 255);
uint32_t blue = MIN<uint32_t>((BPART(fg) * srcalpha + BPART(bg) * destalpha) >> 8, 255);
return 0xff000000 | (red << 16) | (green << 8) | blue;
}
FORCEINLINE static uint32_t sub(uint32_t fg, uint32_t bg, uint32_t srcalpha, uint32_t destalpha)
{
uint32_t red = clamp<uint32_t>((0x10000 - RPART(fg) * srcalpha + RPART(bg) * destalpha) >> 8, 256, 256 + 255) - 256;
uint32_t green = clamp<uint32_t>((0x10000 - GPART(fg) * srcalpha + GPART(bg) * destalpha) >> 8, 256, 256 + 255) - 256;
uint32_t blue = clamp<uint32_t>((0x10000 - BPART(fg) * srcalpha + BPART(bg) * destalpha) >> 8, 256, 256 + 255) - 256;
return 0xff000000 | (red << 16) | (green << 8) | blue;
}
FORCEINLINE static uint32_t revsub(uint32_t fg, uint32_t bg, uint32_t srcalpha, uint32_t destalpha)
{
uint32_t red = clamp<uint32_t>((0x10000 + RPART(fg) * srcalpha - RPART(bg) * destalpha) >> 8, 256, 256 + 255) - 256;
uint32_t green = clamp<uint32_t>((0x10000 + GPART(fg) * srcalpha - GPART(bg) * destalpha) >> 8, 256, 256 + 255) - 256;
uint32_t blue = clamp<uint32_t>((0x10000 + BPART(fg) * srcalpha - BPART(bg) * destalpha) >> 8, 256, 256 + 255) - 256;
return 0xff000000 | (red << 16) | (green << 8) | blue;
}
FORCEINLINE static uint32_t alpha_blend(uint32_t fg, uint32_t bg)
{
uint32_t alpha = APART(fg) + (APART(fg) >> 7); // 255 -> 256
uint32_t inv_alpha = 256 - alpha;
uint32_t red = MIN<uint32_t>(RPART(fg) * alpha + (RPART(bg) * inv_alpha) / 256, 255);
uint32_t green = MIN<uint32_t>(GPART(fg) * alpha + (GPART(bg) * inv_alpha) / 256, 255);
uint32_t blue = MIN<uint32_t>(BPART(fg) * alpha + (BPART(bg) * inv_alpha) / 256, 255);
return 0xff000000 | (red << 16) | (green << 8) | blue;
}
};
class SampleBgra
{
public:
inline static bool span_sampler_setup(const uint32_t * RESTRICT &source, int &xbits, int &ybits, fixed_t xstep, fixed_t ystep, bool mipmapped)
{
// Is this a magfilter or minfilter?
fixed_t xmagnitude = abs(xstep) >> (32 - xbits - FRACBITS);
fixed_t ymagnitude = abs(ystep) >> (32 - ybits - FRACBITS);
fixed_t magnitude = (xmagnitude + ymagnitude) * 2 + (1 << (FRACBITS - 1));
bool magnifying = (magnitude >> FRACBITS == 0);
if (r_mipmap && mipmapped)
{
int level = magnitude >> (FRACBITS + 1);
while (level != 0)
{
if (xbits <= 2 || ybits <= 2)
break;
source += (1 << (xbits)) * (1 << (ybits));
xbits -= 1;
ybits -= 1;
level >>= 1;
}
}
return (magnifying && r_magfilter) || (!magnifying && r_minfilter);
}
FORCEINLINE static uint32_t sample_bilinear(const uint32_t *col0, const uint32_t *col1, uint32_t texturefracx, uint32_t texturefracy, uint32_t one, uint32_t height)
{
uint32_t frac_y0 = (texturefracy >> FRACBITS) * height;
uint32_t frac_y1 = ((texturefracy + one) >> FRACBITS) * height;
uint32_t y0 = frac_y0 >> FRACBITS;
uint32_t y1 = frac_y1 >> FRACBITS;
uint32_t p00 = col0[y0];
uint32_t p01 = col0[y1];
uint32_t p10 = col1[y0];
uint32_t p11 = col1[y1];
uint32_t inv_b = texturefracx;
uint32_t inv_a = (frac_y1 >> (FRACBITS - 4)) & 15;
uint32_t a = 16 - inv_a;
uint32_t b = 16 - inv_b;
uint32_t red = (RPART(p00) * a * b + RPART(p01) * inv_a * b + RPART(p10) * a * inv_b + RPART(p11) * inv_a * inv_b + 127) >> 8;
uint32_t green = (GPART(p00) * a * b + GPART(p01) * inv_a * b + GPART(p10) * a * inv_b + GPART(p11) * inv_a * inv_b + 127) >> 8;
uint32_t blue = (BPART(p00) * a * b + BPART(p01) * inv_a * b + BPART(p10) * a * inv_b + BPART(p11) * inv_a * inv_b + 127) >> 8;
uint32_t alpha = (APART(p00) * a * b + APART(p01) * inv_a * b + APART(p10) * a * inv_b + APART(p11) * inv_a * inv_b + 127) >> 8;
return (alpha << 24) | (red << 16) | (green << 8) | blue;
}
FORCEINLINE static uint32_t sample_bilinear(const uint32_t *texture, dsfixed_t xfrac, dsfixed_t yfrac, int xbits, int ybits)
{
int xshift = (32 - xbits);
int yshift = (32 - ybits);
int xmask = (1 << xshift) - 1;
int ymask = (1 << yshift) - 1;
uint32_t x = xfrac >> xbits;
uint32_t y = yfrac >> ybits;
uint32_t p00 = texture[(y & ymask) + ((x & xmask) << yshift)];
uint32_t p01 = texture[((y + 1) & ymask) + ((x & xmask) << yshift)];
uint32_t p10 = texture[(y & ymask) + (((x + 1) & xmask) << yshift)];
uint32_t p11 = texture[((y + 1) & ymask) + (((x + 1) & xmask) << yshift)];
uint32_t inv_b = (xfrac >> (xbits - 4)) & 15;
uint32_t inv_a = (yfrac >> (ybits - 4)) & 15;
uint32_t a = 16 - inv_a;
uint32_t b = 16 - inv_b;
uint32_t red = (RPART(p00) * a * b + RPART(p01) * inv_a * b + RPART(p10) * a * inv_b + RPART(p11) * inv_a * inv_b + 127) >> 8;
uint32_t green = (GPART(p00) * a * b + GPART(p01) * inv_a * b + GPART(p10) * a * inv_b + GPART(p11) * inv_a * inv_b + 127) >> 8;
uint32_t blue = (BPART(p00) * a * b + BPART(p01) * inv_a * b + BPART(p10) * a * inv_b + BPART(p11) * inv_a * inv_b + 127) >> 8;
uint32_t alpha = (APART(p00) * a * b + APART(p01) * inv_a * b + APART(p10) * a * inv_b + APART(p11) * inv_a * inv_b + 127) >> 8;
return (alpha << 24) | (red << 16) | (green << 8) | blue;
}
#ifndef NO_SSE
static __m128i samplertable[256 * 2];
#endif
};
/////////////////////////////////////////////////////////////////////////////
// SSE/AVX shading macros:
#define AVX2_SAMPLE_BILINEAR4_COLUMN_INIT(col0, col1, one, height, texturefracx) \
const uint32_t *baseptr = col0[0]; \
__m128i coloffsets0 = _mm_setr_epi32(col0[0] - baseptr, col0[1] - baseptr, col0[2] - baseptr, col0[3] - baseptr); \
__m128i coloffsets1 = _mm_setr_epi32(col1[0] - baseptr, col1[1] - baseptr, col1[2] - baseptr, col1[3] - baseptr); \
__m128i mone = _mm_loadu_si128((const __m128i*)one); \
__m128i m127 = _mm_set1_epi16(127); \
__m128i m16 = _mm_set1_epi32(16); \
__m128i m15 = _mm_set1_epi32(15); \
__m128i mheight = _mm_loadu_si128((const __m128i*)height); \
__m128i mtexturefracx = _mm_loadu_si128((const __m128i*)texturefracx);
#define AVX2_SAMPLE_BILINEAR4_COLUMN(fg, texturefracy) { \
__m128i mtexturefracy = _mm_loadu_si128((const __m128i*)texturefracy); \
__m128i multmp0 = _mm_srli_epi32(mtexturefracy, FRACBITS); \
__m128i multmp1 = _mm_srli_epi32(_mm_add_epi32(mtexturefracy, mone), FRACBITS); \
__m128i frac_y0 = _mm_or_si128(_mm_mul_epu32(multmp0, mheight), _mm_slli_si128(_mm_mul_epu32(_mm_srli_si128(multmp0, 4), _mm_srli_si128(mheight, 4)), 4)); \
__m128i frac_y1 = _mm_or_si128(_mm_mul_epu32(multmp1, mheight), _mm_slli_si128(_mm_mul_epu32(_mm_srli_si128(multmp1, 4), _mm_srli_si128(mheight, 4)), 4)); \
__m128i y0 = _mm_srli_epi32(frac_y0, FRACBITS); \
__m128i y1 = _mm_srli_epi32(frac_y1, FRACBITS); \
__m128i inv_b = mtexturefracx; \
__m128i inv_a = _mm_and_si128(_mm_srli_epi32(frac_y1, FRACBITS - 4), m15); \
__m128i a = _mm_sub_epi32(m16, inv_a); \
__m128i b = _mm_sub_epi32(m16, inv_b); \
__m128i ab = _mm_mullo_epi16(a, b); \
__m128i invab = _mm_mullo_epi16(inv_a, b); \
__m128i ainvb = _mm_mullo_epi16(a, inv_b); \
__m128i invainvb = _mm_mullo_epi16(inv_a, inv_b); \
__m128i ab_lo = _mm_shuffle_epi32(ab, _MM_SHUFFLE(1, 1, 0, 0)); \
__m128i ab_hi = _mm_shuffle_epi32(ab, _MM_SHUFFLE(3, 3, 2, 2)); \
__m128i invab_lo = _mm_shuffle_epi32(invab, _MM_SHUFFLE(1, 1, 0, 0)); \
__m128i invab_hi = _mm_shuffle_epi32(invab, _MM_SHUFFLE(3, 3, 2, 2)); \
__m128i ainvb_lo = _mm_shuffle_epi32(ainvb, _MM_SHUFFLE(1, 1, 0, 0)); \
__m128i ainvb_hi = _mm_shuffle_epi32(ainvb, _MM_SHUFFLE(3, 3, 2, 2)); \
__m128i invainvb_lo = _mm_shuffle_epi32(invainvb, _MM_SHUFFLE(1, 1, 0, 0)); \
__m128i invainvb_hi = _mm_shuffle_epi32(invainvb, _MM_SHUFFLE(3, 3, 2, 2)); \
ab_lo = _mm_or_si128(ab_lo, _mm_slli_epi32(ab_lo, 16)); \
ab_hi = _mm_or_si128(ab_hi, _mm_slli_epi32(ab_hi, 16)); \
invab_lo = _mm_or_si128(invab_lo, _mm_slli_epi32(invab_lo, 16)); \
invab_hi = _mm_or_si128(invab_hi, _mm_slli_epi32(invab_hi, 16)); \
ainvb_lo = _mm_or_si128(ainvb_lo, _mm_slli_epi32(ainvb_lo, 16)); \
ainvb_hi = _mm_or_si128(ainvb_hi, _mm_slli_epi32(ainvb_hi, 16)); \
invainvb_lo = _mm_or_si128(invainvb_lo, _mm_slli_epi32(invainvb_lo, 16)); \
invainvb_hi = _mm_or_si128(invainvb_hi, _mm_slli_epi32(invainvb_hi, 16)); \
__m128i p00 = _mm_i32gather_epi32((const int *)baseptr, _mm_add_epi32(y0, coloffsets0), 4); \
__m128i p01 = _mm_i32gather_epi32((const int *)baseptr, _mm_add_epi32(y1, coloffsets0), 4); \
__m128i p10 = _mm_i32gather_epi32((const int *)baseptr, _mm_add_epi32(y0, coloffsets1), 4); \
__m128i p11 = _mm_i32gather_epi32((const int *)baseptr, _mm_add_epi32(y1, coloffsets1), 4); \
__m128i p00_lo = _mm_mullo_epi16(_mm_unpacklo_epi8(p00, _mm_setzero_si128()), ab_lo); \
__m128i p01_lo = _mm_mullo_epi16(_mm_unpacklo_epi8(p01, _mm_setzero_si128()), invab_lo); \
__m128i p10_lo = _mm_mullo_epi16(_mm_unpacklo_epi8(p10, _mm_setzero_si128()), ainvb_lo); \
__m128i p11_lo = _mm_mullo_epi16(_mm_unpacklo_epi8(p11, _mm_setzero_si128()), invainvb_lo); \
__m128i p00_hi = _mm_mullo_epi16(_mm_unpackhi_epi8(p00, _mm_setzero_si128()), ab_hi); \
__m128i p01_hi = _mm_mullo_epi16(_mm_unpackhi_epi8(p01, _mm_setzero_si128()), invab_hi); \
__m128i p10_hi = _mm_mullo_epi16(_mm_unpackhi_epi8(p10, _mm_setzero_si128()), ainvb_hi); \
__m128i p11_hi = _mm_mullo_epi16(_mm_unpackhi_epi8(p11, _mm_setzero_si128()), invainvb_hi); \
__m128i fg_lo = _mm_srli_epi16(_mm_adds_epu16(_mm_adds_epu16(_mm_adds_epu16(p00_lo, p01_lo), _mm_adds_epu16(p10_lo, p11_lo)), m127), 8); \
__m128i fg_hi = _mm_srli_epi16(_mm_adds_epu16(_mm_adds_epu16(_mm_adds_epu16(p00_hi, p01_hi), _mm_adds_epu16(p10_hi, p11_hi)), m127), 8); \
fg = _mm_packus_epi16(fg_lo, fg_hi); \
}
#define VEC_SAMPLE_BILINEAR4_COLUMN(fg, col0, col1, texturefracx, texturefracy, one, height) { \
__m128i m127 = _mm_set1_epi16(127); \
fg = _mm_setzero_si128(); \
for (int i = 0; i < 4; i++) \
{ \
uint32_t frac_y0 = (texturefracy[i] >> FRACBITS) * height[i]; \
uint32_t frac_y1 = ((texturefracy[i] + one[i]) >> FRACBITS) * height[i]; \
uint32_t y0 = (frac_y0 >> FRACBITS); \
uint32_t y1 = (frac_y1 >> FRACBITS); \
\
uint32_t inv_b = texturefracx[i]; \
uint32_t inv_a = (frac_y1 >> (FRACBITS - 4)) & 15; \
\
__m128i ab_invab = _mm_load_si128(SampleBgra::samplertable + inv_b * 32 + inv_a * 2); \
__m128i ainvb_invainvb = _mm_load_si128(SampleBgra::samplertable + inv_b * 32 + inv_a * 2 + 1); \
\
__m128i gather = _mm_set_epi32(col1[i][y1], col1[i][y0], col0[i][y1], col0[i][y0]); \
__m128i p0 = _mm_unpacklo_epi8(gather, _mm_setzero_si128()); \
__m128i p1 = _mm_unpackhi_epi8(gather, _mm_setzero_si128()); \
\
__m128i tmp = _mm_adds_epu16(_mm_mullo_epi16(p0, ab_invab), _mm_mullo_epi16(p1, ainvb_invainvb)); \
__m128i color = _mm_srli_epi16(_mm_adds_epu16(_mm_adds_epu16(_mm_srli_si128(tmp, 8), tmp), m127), 8); \
\
fg = _mm_or_si128(_mm_srli_si128(fg, 4), _mm_slli_si128(_mm_packus_epi16(color, _mm_setzero_si128()), 12)); \
} \
}
#define VEC_SAMPLE_MIP_NEAREST4_COLUMN(fg, col0, col1, mipfrac, texturefracy, height0, height1) { \
uint32_t y0[4], y1[4]; \
for (int i = 0; i < 4; i++) \
{ \
y0[i] = (texturefracy[i] >> FRACBITS) * height0[i]; \
y1[i] = (texturefracy[i] >> FRACBITS) * height1[i]; \
} \
__m128i p0 = _mm_set_epi32(col0[y0[3]], col0[y0[2]], col0[y0[1]], col0[y0[0]]); \
__m128i p1 = _mm_set_epi32(col1[y1[3]], col1[y1[2]], col1[y1[1]], col1[y1[0]]); \
__m128i t = _mm_loadu_si128((const __m128i*)mipfrac); \
__m128i inv_t = _mm_sub_epi32(_mm_set1_epi32(256), mipfrac); \
__m128i p0_lo = _mm_unpacklo_epi8(p0, _mm_setzero_si128()); \
__m128i p0_hi = _mm_unpackhi_epi8(p0, _mm_setzero_si128()); \
__m128i p1_lo = _mm_unpacklo_epi8(p1, _mm_setzero_si128()); \
__m128i p1_hi = _mm_unpackhi_epi8(p1, _mm_setzero_si128()); \
__m128i fg_lo = _mm_srli_epi16(_mm_adds_epu16(_mm_mullo_epi16(p0_lo, t), _mm_mullo_epi16(p1_lo, inv_t)), 8); \
__m128i fg_hi = _mm_srli_epi16(_mm_adds_epu16(_mm_mullo_epi16(p0_hi, t), _mm_mullo_epi16(p1_hi, inv_t)), 8); \
fg = _mm_packus_epi16(fg_lo, fg_hi); \
}
#define VEC_SAMPLE_BILINEAR4_SPAN(fg, texture, xfrac, yfrac, xstep, ystep, xbits, ybits) { \
int xshift = (32 - xbits); \
int yshift = (32 - ybits); \
int xmask = (1 << xshift) - 1; \
int ymask = (1 << yshift) - 1; \
\
__m128i m127 = _mm_set1_epi16(127); \
fg = _mm_setzero_si128(); \
for (int i = 0; i < 4; i++) \
{ \
uint32_t x = xfrac >> xbits; \
uint32_t y = yfrac >> ybits; \
\
uint32_t p00 = texture[(y & ymask) + ((x & xmask) << yshift)]; \
uint32_t p01 = texture[((y + 1) & ymask) + ((x & xmask) << yshift)]; \
uint32_t p10 = texture[(y & ymask) + (((x + 1) & xmask) << yshift)]; \
uint32_t p11 = texture[((y + 1) & ymask) + (((x + 1) & xmask) << yshift)]; \
\
uint32_t inv_b = (xfrac >> (xbits - 4)) & 15; \
uint32_t inv_a = (yfrac >> (ybits - 4)) & 15; \
\
__m128i ab_invab = _mm_load_si128(SampleBgra::samplertable + inv_b * 32 + inv_a * 2); \
__m128i ainvb_invainvb = _mm_load_si128(SampleBgra::samplertable + inv_b * 32 + inv_a * 2 + 1); \
\
__m128i p0 = _mm_unpacklo_epi8(_mm_set_epi32(0, 0, p01, p00), _mm_setzero_si128()); \
__m128i p1 = _mm_unpacklo_epi8(_mm_set_epi32(0, 0, p11, p10), _mm_setzero_si128()); \
\
__m128i tmp = _mm_adds_epu16(_mm_mullo_epi16(p0, ab_invab), _mm_mullo_epi16(p1, ainvb_invainvb)); \
__m128i color = _mm_srli_epi16(_mm_adds_epu16(_mm_adds_epu16(_mm_srli_si128(tmp, 8), tmp), m127), 8); \
\
fg = _mm_or_si128(_mm_srli_si128(fg, 4), _mm_slli_si128(_mm_packus_epi16(color, _mm_setzero_si128()), 12)); \
\
xfrac += xstep; \
yfrac += ystep; \
} \
}
// Calculate constants for a simple shade with gamma correction
#define AVX_LINEAR_SHADE_SIMPLE_INIT(light) \
__m256 mlight_hi = _mm256_set_ps(1.0f, light * (1.0f/256.0f), light * (1.0f/256.0f), light * (1.0f/256.0f), 1.0f, light * (1.0f/256.0f), light * (1.0f/256.0f), light * (1.0f/256.0f)); \
mlight_hi = _mm256_mul_ps(mlight_hi, mlight_hi); \
__m256 mlight_lo = mlight_hi; \
__m256 mrcp_255 = _mm256_set1_ps(1.0f/255.0f); \
__m256 m255 = _mm256_set1_ps(255.0f);
// Calculate constants for a simple shade with different light levels for each pixel and gamma correction
#define AVX_LINEAR_SHADE_SIMPLE_INIT4(light3, light2, light1, light0) \
__m256 mlight_hi = _mm256_set_ps(1.0f, light1 * (1.0f/256.0f), light1 * (1.0f/256.0f), light1 * (1.0f/256.0f), 1.0f, light0 * (1.0f/256.0f), light0 * (1.0f/256.0f), light0 * (1.0f/256.0f)); \
__m256 mlight_lo = _mm256_set_ps(1.0f, light3 * (1.0f/256.0f), light3 * (1.0f/256.0f), light3 * (1.0f/256.0f), 1.0f, light2 * (1.0f/256.0f), light2 * (1.0f/256.0f), light2 * (1.0f/256.0f)); \
mlight_hi = _mm256_mul_ps(mlight_hi, mlight_hi); \
mlight_lo = _mm256_mul_ps(mlight_lo, mlight_lo); \
__m256 mrcp_255 = _mm256_set1_ps(1.0f/255.0f); \
__m256 m255 = _mm256_set1_ps(255.0f);
// Simple shade 4 pixels with gamma correction
#define AVX_LINEAR_SHADE_SIMPLE(fg) { \
__m256i fg_16 = _mm256_set_m128i(_mm_unpackhi_epi8(fg, _mm_setzero_si128()), _mm_unpacklo_epi8(fg, _mm_setzero_si128())); \
__m256 fg_hi = _mm256_cvtepi32_ps(_mm256_unpackhi_epi16(fg_16, _mm256_setzero_si256())); \
__m256 fg_lo = _mm256_cvtepi32_ps(_mm256_unpacklo_epi16(fg_16, _mm256_setzero_si256())); \
fg_hi = _mm256_mul_ps(fg_hi, mrcp_255); \
fg_hi = _mm256_mul_ps(fg_hi, fg_hi); \
fg_hi = _mm256_mul_ps(fg_hi, mlight_hi); \
fg_hi = _mm256_sqrt_ps(fg_hi); \
fg_hi = _mm256_mul_ps(fg_hi, m255); \
fg_lo = _mm256_mul_ps(fg_lo, mrcp_255); \
fg_lo = _mm256_mul_ps(fg_lo, fg_lo); \
fg_lo = _mm256_mul_ps(fg_lo, mlight_lo); \
fg_lo = _mm256_sqrt_ps(fg_lo); \
fg_lo = _mm256_mul_ps(fg_lo, m255); \
fg_16 = _mm256_packus_epi32(_mm256_cvtps_epi32(fg_lo), _mm256_cvtps_epi32(fg_hi)); \
fg = _mm_packus_epi16(_mm256_extractf128_si256(fg_16, 0), _mm256_extractf128_si256(fg_16, 1)); \
}
// Calculate constants for a complex shade with gamma correction
#define AVX_LINEAR_SHADE_INIT(light, shade_constants) \
__m256 mlight_hi = _mm256_set_ps(1.0f, light * (1.0f/256.0f), light * (1.0f/256.0f), light * (1.0f/256.0f), 1.0f, light * (1.0f/256.0f), light * (1.0f/256.0f), light * (1.0f/256.0f)); \
mlight_hi = _mm256_mul_ps(mlight_hi, mlight_hi); \
__m256 mlight_lo = mlight_hi; \
__m256 mrcp_255 = _mm256_set1_ps(1.0f/255.0f); \
__m256 m255 = _mm256_set1_ps(255.0f); \
__m256 color = _mm256_set_ps( \
1.0f, shade_constants.light_red * (1.0f/256.0f), shade_constants.light_green * (1.0f/256.0f), shade_constants.light_blue * (1.0f/256.0f), \
1.0f, shade_constants.light_red * (1.0f/256.0f), shade_constants.light_green * (1.0f/256.0f), shade_constants.light_blue * (1.0f/256.0f)); \
__m256 fade = _mm256_set_ps( \
0.0f, shade_constants.fade_red * (1.0f/256.0f), shade_constants.fade_green * (1.0f/256.0f), shade_constants.fade_blue * (1.0f/256.0f), \
0.0f, shade_constants.fade_red * (1.0f/256.0f), shade_constants.fade_green * (1.0f/256.0f), shade_constants.fade_blue * (1.0f/256.0f)); \
__m256 fade_amount_hi = _mm256_mul_ps(fade, _mm256_sub_ps(_mm256_set1_ps(1.0f), mlight_hi)); \
__m256 fade_amount_lo = _mm256_mul_ps(fade, _mm256_sub_ps(_mm256_set1_ps(1.0f), mlight_lo)); \
__m256 inv_desaturate = _mm256_set1_ps((256 - shade_constants.desaturate) * (1.0f/256.0f)); \
__m128 ss_desaturate = _mm_set_ss(shade_constants.desaturate * (1.0f/256.0f)); \
__m128 intensity_weight = _mm_set_ps(0.0f, 77.0f/256.0f, 143.0f/256.0f, 37.0f/256.0f);
// Calculate constants for a complex shade with different light levels for each pixel and gamma correction
#define AVX_LINEAR_SHADE_INIT4(light3, light2, light1, light0, shade_constants) \
__m256 mlight_hi = _mm256_set_ps(1.0f, light1 * (1.0f/256.0f), light1 * (1.0f/256.0f), light1 * (1.0f/256.0f), 1.0f, light0 * (1.0f/256.0f), light0 * (1.0f/256.0f), light0 * (1.0f/256.0f)); \
__m256 mlight_lo = _mm256_set_ps(1.0f, light3 * (1.0f/256.0f), light3 * (1.0f/256.0f), light3 * (1.0f/256.0f), 1.0f, light2 * (1.0f/256.0f), light2 * (1.0f/256.0f), light2 * (1.0f/256.0f)); \
mlight_hi = _mm256_mul_ps(mlight_hi, mlight_hi); \
mlight_lo = _mm256_mul_ps(mlight_lo, mlight_lo); \
__m256 mrcp_255 = _mm256_set1_ps(1.0f/255.0f); \
__m256 m255 = _mm256_set1_ps(255.0f); \
__m256 color = _mm256_set_ps( \
1.0f, shade_constants.light_red * (1.0f/256.0f), shade_constants.light_green * (1.0f/256.0f), shade_constants.light_blue * (1.0f/256.0f), \
1.0f, shade_constants.light_red * (1.0f/256.0f), shade_constants.light_green * (1.0f/256.0f), shade_constants.light_blue * (1.0f/256.0f)); \
__m256 fade = _mm256_set_ps( \
0.0f, shade_constants.fade_red * (1.0f/256.0f), shade_constants.fade_green * (1.0f/256.0f), shade_constants.fade_blue * (1.0f/256.0f), \
0.0f, shade_constants.fade_red * (1.0f/256.0f), shade_constants.fade_green * (1.0f/256.0f), shade_constants.fade_blue * (1.0f/256.0f)); \
__m256 fade_amount_hi = _mm256_mul_ps(fade, _mm256_sub_ps(_mm256_set1_ps(1.0f), mlight_hi)); \
__m256 fade_amount_lo = _mm256_mul_ps(fade, _mm256_sub_ps(_mm256_set1_ps(1.0f), mlight_lo)); \
__m256 inv_desaturate = _mm256_set1_ps((256 - shade_constants.desaturate) * (1.0f/256.0f)); \
__m128 ss_desaturate = _mm_set_ss(shade_constants.desaturate * (1.0f/256.0f)); \
__m128 intensity_weight = _mm_set_ps(0.0f, 77.0f/256.0f, 143.0f/256.0f, 37.0f/256.0f);
// Complex shade 4 pixels with gamma correction
#define AVX_LINEAR_SHADE(fg, shade_constants) { \
__m256i fg_16 = _mm256_set_m128i(_mm_unpackhi_epi8(fg, _mm_setzero_si128()), _mm_unpacklo_epi8(fg, _mm_setzero_si128())); \
__m256 fg_hi = _mm256_cvtepi32_ps(_mm256_unpackhi_epi16(fg_16, _mm256_setzero_si256())); \
__m256 fg_lo = _mm256_cvtepi32_ps(_mm256_unpacklo_epi16(fg_16, _mm256_setzero_si256())); \
fg_hi = _mm256_mul_ps(fg_hi, mrcp_255); \
fg_hi = _mm256_mul_ps(fg_hi, fg_hi); \
fg_lo = _mm256_mul_ps(fg_lo, mrcp_255); \
fg_lo = _mm256_mul_ps(fg_lo, fg_lo); \
\
__m128 intensity_hi0 = _mm_mul_ps(_mm256_extractf128_ps(fg_hi, 0), intensity_weight); \
__m128 intensity_hi1 = _mm_mul_ps(_mm256_extractf128_ps(fg_hi, 1), intensity_weight); \
intensity_hi0 = _mm_mul_ss(_mm_add_ss(_mm_add_ss(intensity_hi0, _mm_shuffle_ps(intensity_hi0, intensity_hi0, _MM_SHUFFLE(1,1,1,1))), _mm_shuffle_ps(intensity_hi0, intensity_hi0, _MM_SHUFFLE(2,2,2,2))), ss_desaturate); \
intensity_hi0 = _mm_shuffle_ps(intensity_hi0, intensity_hi0, _MM_SHUFFLE(0,0,0,0)); \
intensity_hi1 = _mm_mul_ss(_mm_add_ss(_mm_add_ss(intensity_hi1, _mm_shuffle_ps(intensity_hi1, intensity_hi1, _MM_SHUFFLE(1,1,1,1))), _mm_shuffle_ps(intensity_hi1, intensity_hi1, _MM_SHUFFLE(2,2,2,2))), ss_desaturate); \
intensity_hi1 = _mm_shuffle_ps(intensity_hi1, intensity_hi1, _MM_SHUFFLE(0,0,0,0)); \
__m256 intensity_hi = _mm256_set_m128(intensity_hi1, intensity_hi0); \
\
fg_hi = _mm256_add_ps(_mm256_mul_ps(fg_hi, inv_desaturate), intensity_hi); \
fg_hi = _mm256_add_ps(_mm256_mul_ps(fg_hi, mlight_hi), fade_amount_hi); \
fg_hi = _mm256_mul_ps(fg_hi, color); \
\
__m128 intensity_lo0 = _mm_mul_ps(_mm256_extractf128_ps(fg_lo, 0), intensity_weight); \
__m128 intensity_lo1 = _mm_mul_ps(_mm256_extractf128_ps(fg_lo, 1), intensity_weight); \
intensity_lo0 = _mm_mul_ss(_mm_add_ss(_mm_add_ss(intensity_lo0, _mm_shuffle_ps(intensity_lo0, intensity_lo0, _MM_SHUFFLE(1,1,1,1))), _mm_shuffle_ps(intensity_lo0, intensity_lo0, _MM_SHUFFLE(2,2,2,2))), ss_desaturate); \
intensity_lo0 = _mm_shuffle_ps(intensity_lo0, intensity_lo0, _MM_SHUFFLE(0,0,0,0)); \
intensity_lo1 = _mm_mul_ss(_mm_add_ss(_mm_add_ss(intensity_lo1, _mm_shuffle_ps(intensity_lo1, intensity_lo1, _MM_SHUFFLE(1,1,1,1))), _mm_shuffle_ps(intensity_lo1, intensity_lo1, _MM_SHUFFLE(2,2,2,2))), ss_desaturate); \
intensity_lo1 = _mm_shuffle_ps(intensity_lo1, intensity_lo1, _MM_SHUFFLE(0,0,0,0)); \
__m256 intensity_lo = _mm256_set_m128(intensity_lo1, intensity_lo0); \
\
fg_lo = _mm256_add_ps(_mm256_mul_ps(fg_lo, inv_desaturate), intensity_lo); \
fg_lo = _mm256_add_ps(_mm256_mul_ps(fg_lo, mlight_lo), fade_amount_lo); \
fg_lo = _mm256_mul_ps(fg_lo, color); \
\
fg_hi = _mm256_sqrt_ps(fg_hi); \
fg_hi = _mm256_mul_ps(fg_hi, m255); \
fg_lo = _mm256_sqrt_ps(fg_lo); \
fg_lo = _mm256_mul_ps(fg_lo, m255); \
fg_16 = _mm256_packus_epi32(_mm256_cvtps_epi32(fg_lo), _mm256_cvtps_epi32(fg_hi)); \
fg = _mm_packus_epi16(_mm256_extractf128_si256(fg_16, 0), _mm256_extractf128_si256(fg_16, 1)); \
}
/*
// Complex shade 8 pixels
#define AVX_SHADE(fg, shade_constants) { \
__m256i fg_hi = _mm256_unpackhi_epi8(fg, _mm256_setzero_si256()); \
__m256i fg_lo = _mm256_unpacklo_epi8(fg, _mm256_setzero_si256()); \
\
__m256i intensity_hi = _mm256_mullo_epi16(fg_hi, _mm256_set_epi16(0, 77, 143, 37, 0, 77, 143, 37, 0, 77, 143, 37, 0, 77, 143, 37)); \
__m256i intensity_lo = _mm256_mullo_epi16(fg_lo, _mm256_set_epi16(0, 77, 143, 37, 0, 77, 143, 37, 0, 77, 143, 37, 0, 77, 143, 37)); \
__m256i intensity = _mm256_mullo_epi16(_mm256_srli_epi16(_mm256_hadd_epi16(_mm256_hadd_epi16(intensity_lo, intensity_hi), _mm256_setzero_si256()), 8), desaturate); \
intensity = _mm256_unpacklo_epi16(intensity, intensity); \
intensity_hi = _mm256_unpackhi_epi32(intensity, intensity); \
intensity_lo = _mm256_unpacklo_epi32(intensity, intensity); \
\
fg_hi = _mm256_srli_epi16(_mm256_adds_epu16(_mm256_mullo_epi16(fg_hi, inv_desaturate), intensity_hi), 8); \
fg_hi = _mm256_srli_epi16(_mm256_adds_epu16(_mm256_mullo_epi16(fg_hi, mlight), fade_amount), 8); \
fg_hi = _mm256_srli_epi16(_mm256_mullo_epi16(fg_hi, color), 8); \
\
fg_lo = _mm256_srli_epi16(_mm256_adds_epu16(_mm256_mullo_epi16(fg_lo, inv_desaturate), intensity_lo), 8); \
fg_lo = _mm256_srli_epi16(_mm256_adds_epu16(_mm256_mullo_epi16(fg_lo, mlight), fade_amount), 8); \
fg_lo = _mm256_srli_epi16(_mm256_mullo_epi16(fg_lo, color), 8); \
\
fg = _mm256_packus_epi16(fg_lo, fg_hi); \
}
*/
// Normal premultiplied alpha blend using the alpha from fg
#define VEC_ALPHA_BLEND(fg,bg) { \
__m128i fg_hi = _mm_unpackhi_epi8(fg, _mm_setzero_si128()); \
__m128i fg_lo = _mm_unpacklo_epi8(fg, _mm_setzero_si128()); \
__m128i bg_hi = _mm_unpackhi_epi8(bg, _mm_setzero_si128()); \
__m128i bg_lo = _mm_unpacklo_epi8(bg, _mm_setzero_si128()); \
__m128i m256 = _mm_set1_epi16(256); \
__m128i alpha_hi = _mm_shufflehi_epi16(_mm_shufflelo_epi16(fg_hi, _MM_SHUFFLE(3,3,3,3)), _MM_SHUFFLE(3,3,3,3)); \
__m128i alpha_lo = _mm_shufflehi_epi16(_mm_shufflelo_epi16(fg_lo, _MM_SHUFFLE(3,3,3,3)), _MM_SHUFFLE(3,3,3,3)); \
alpha_hi = _mm_add_epi16(alpha_hi, _mm_srli_epi16(alpha_hi, 7)); \
alpha_lo = _mm_add_epi16(alpha_lo, _mm_srli_epi16(alpha_lo, 7)); \
__m128i inv_alpha_hi = _mm_sub_epi16(m256, alpha_hi); \
__m128i inv_alpha_lo = _mm_sub_epi16(m256, alpha_lo); \
fg_hi = _mm_mullo_epi16(fg_hi, alpha_hi); \
fg_hi = _mm_srli_epi16(fg_hi, 8); \
fg_lo = _mm_mullo_epi16(fg_lo, alpha_lo); \
fg_lo = _mm_srli_epi16(fg_lo, 8); \
fg = _mm_packus_epi16(fg_lo, fg_hi); \
bg_hi = _mm_mullo_epi16(bg_hi, inv_alpha_hi); \
bg_hi = _mm_srli_epi16(bg_hi, 8); \
bg_lo = _mm_mullo_epi16(bg_lo, inv_alpha_lo); \
bg_lo = _mm_srli_epi16(bg_lo, 8); \
bg = _mm_packus_epi16(bg_lo, bg_hi); \
fg = _mm_adds_epu8(fg, bg); \
}
// Calculates the final alpha values to be used when combined with the source texture alpha channel
FORCEINLINE uint32_t calc_blend_bgalpha(uint32_t fg, uint32_t dest_alpha)
{
uint32_t alpha = fg >> 24;
alpha += alpha >> 7;
uint32_t inv_alpha = 256 - alpha;
return (dest_alpha * alpha + 256 * inv_alpha + 128) >> 8;
}
#define VEC_CALC_BLEND_ALPHA_VARS() __m128i msrc_alpha, mdest_alpha, m256, m255, m128;
#define VEC_CALC_BLEND_ALPHA_INIT(src_alpha, dest_alpha) \
msrc_alpha = _mm_set1_epi16(src_alpha); \
mdest_alpha = _mm_set1_epi16(dest_alpha * 255 / 256); \
m256 = _mm_set1_epi16(256); \
m255 = _mm_set1_epi16(255); \
m128 = _mm_set1_epi16(128);
// Calculates the final alpha values to be used when combined with the source texture alpha channel
#define VEC_CALC_BLEND_ALPHA(fg) \
__m128i fg_alpha_hi, fg_alpha_lo, bg_alpha_hi, bg_alpha_lo; { \
__m128i alpha_hi = _mm_shufflehi_epi16(_mm_shufflelo_epi16(_mm_unpackhi_epi8(fg, _mm_setzero_si128()), _MM_SHUFFLE(3, 3, 3, 3)), _MM_SHUFFLE(3, 3, 3, 3)); \
__m128i alpha_lo = _mm_shufflehi_epi16(_mm_shufflelo_epi16(_mm_unpacklo_epi8(fg, _mm_setzero_si128()), _MM_SHUFFLE(3, 3, 3, 3)), _MM_SHUFFLE(3, 3, 3, 3)); \
alpha_hi = _mm_add_epi16(alpha_hi, _mm_srli_epi16(alpha_hi, 7)); \
alpha_lo = _mm_add_epi16(alpha_lo, _mm_srli_epi16(alpha_lo, 7)); \
bg_alpha_hi = _mm_srli_epi16(_mm_adds_epu16(_mm_adds_epu16(_mm_mullo_epi16(mdest_alpha, alpha_hi), _mm_mullo_epi16(m255, _mm_sub_epi16(m256, alpha_hi))), m128), 8); \
bg_alpha_hi = _mm_add_epi16(bg_alpha_hi, _mm_srli_epi16(bg_alpha_hi, 7)); \
bg_alpha_lo = _mm_srli_epi16(_mm_adds_epu16(_mm_adds_epu16(_mm_mullo_epi16(mdest_alpha, alpha_lo), _mm_mullo_epi16(m255, _mm_sub_epi16(m256, alpha_lo))), m128), 8); \
bg_alpha_lo = _mm_add_epi16(bg_alpha_lo, _mm_srli_epi16(bg_alpha_lo, 7)); \
fg_alpha_hi = msrc_alpha; \
fg_alpha_lo = msrc_alpha; \
}
#define SSE_SHADE_VARS() __m128i mlight_hi, mlight_lo, color, fade, fade_amount_hi, fade_amount_lo, inv_desaturate;
// Calculate constants for a simple shade
#define SSE_SHADE_SIMPLE_INIT(light) \
mlight_hi = _mm_set_epi16(256, light, light, light, 256, light, light, light); \
mlight_lo = mlight_hi;
// Calculate constants for a simple shade with different light levels for each pixel
#define SSE_SHADE_SIMPLE_INIT4(light3, light2, light1, light0) \
mlight_hi = _mm_set_epi16(256, light1, light1, light1, 256, light0, light0, light0); \
mlight_lo = _mm_set_epi16(256, light3, light3, light3, 256, light2, light2, light2);
// Simple shade 4 pixels
#define SSE_SHADE_SIMPLE(fg) { \
__m128i fg_hi = _mm_unpackhi_epi8(fg, _mm_setzero_si128()); \
__m128i fg_lo = _mm_unpacklo_epi8(fg, _mm_setzero_si128()); \
fg_hi = _mm_mullo_epi16(fg_hi, mlight_hi); \
fg_hi = _mm_srli_epi16(fg_hi, 8); \
fg_lo = _mm_mullo_epi16(fg_lo, mlight_lo); \
fg_lo = _mm_srli_epi16(fg_lo, 8); \
fg = _mm_packus_epi16(fg_lo, fg_hi); \
}
// Calculate constants for a complex shade
#define SSE_SHADE_INIT(light, shade_constants) \
mlight_hi = _mm_set_epi16(256, light, light, light, 256, light, light, light); \
mlight_lo = mlight_hi; \
color = _mm_set_epi16( \
256, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue, \
256, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue); \
fade = _mm_set_epi16( \
0, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue, \
0, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue); \
fade_amount_hi = _mm_mullo_epi16(fade, _mm_subs_epu16(_mm_set1_epi16(256), mlight_hi)); \
fade_amount_lo = fade_amount_hi; \
inv_desaturate = _mm_set1_epi16(256 - shade_constants.desaturate); \
// Calculate constants for a complex shade with different light levels for each pixel
#define SSE_SHADE_INIT4(light3, light2, light1, light0, shade_constants) \
mlight_hi = _mm_set_epi16(256, light1, light1, light1, 256, light0, light0, light0); \
mlight_lo = _mm_set_epi16(256, light3, light3, light3, 256, light2, light2, light2); \
color = _mm_set_epi16( \
256, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue, \
256, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue); \
fade = _mm_set_epi16( \
0, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue, \
0, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue); \
fade_amount_hi = _mm_mullo_epi16(fade, _mm_subs_epu16(_mm_set1_epi16(256), mlight_hi)); \
fade_amount_lo = _mm_mullo_epi16(fade, _mm_subs_epu16(_mm_set1_epi16(256), mlight_lo)); \
inv_desaturate = _mm_set1_epi16(256 - shade_constants.desaturate); \
// Complex shade 4 pixels
#define SSE_SHADE(fg, shade_constants) { \
__m128i fg_hi = _mm_unpackhi_epi8(fg, _mm_setzero_si128()); \
__m128i fg_lo = _mm_unpacklo_epi8(fg, _mm_setzero_si128()); \
\
__m128i intensity_hi = _mm_mullo_epi16(fg_hi, _mm_set_epi16(0, 77, 143, 37, 0, 77, 143, 37)); \
uint16_t intensity_hi0 = ((_mm_extract_epi16(intensity_hi, 2) + _mm_extract_epi16(intensity_hi, 1) + _mm_extract_epi16(intensity_hi, 0)) >> 8) * shade_constants.desaturate; \
uint16_t intensity_hi1 = ((_mm_extract_epi16(intensity_hi, 6) + _mm_extract_epi16(intensity_hi, 5) + _mm_extract_epi16(intensity_hi, 4)) >> 8) * shade_constants.desaturate; \
intensity_hi = _mm_set_epi16(intensity_hi1, intensity_hi1, intensity_hi1, intensity_hi1, intensity_hi0, intensity_hi0, intensity_hi0, intensity_hi0); \
\
fg_hi = _mm_srli_epi16(_mm_adds_epu16(_mm_mullo_epi16(fg_hi, inv_desaturate), intensity_hi), 8); \
fg_hi = _mm_srli_epi16(_mm_adds_epu16(_mm_mullo_epi16(fg_hi, mlight_hi), fade_amount_hi), 8); \
fg_hi = _mm_srli_epi16(_mm_mullo_epi16(fg_hi, color), 8); \
\
__m128i intensity_lo = _mm_mullo_epi16(fg_lo, _mm_set_epi16(0, 77, 143, 37, 0, 77, 143, 37)); \
uint16_t intensity_lo0 = ((_mm_extract_epi16(intensity_lo, 2) + _mm_extract_epi16(intensity_lo, 1) + _mm_extract_epi16(intensity_lo, 0)) >> 8) * shade_constants.desaturate; \
uint16_t intensity_lo1 = ((_mm_extract_epi16(intensity_lo, 6) + _mm_extract_epi16(intensity_lo, 5) + _mm_extract_epi16(intensity_lo, 4)) >> 8) * shade_constants.desaturate; \
intensity_lo = _mm_set_epi16(intensity_lo1, intensity_lo1, intensity_lo1, intensity_lo1, intensity_lo0, intensity_lo0, intensity_lo0, intensity_lo0); \
\
fg_lo = _mm_srli_epi16(_mm_adds_epu16(_mm_mullo_epi16(fg_lo, inv_desaturate), intensity_lo), 8); \
fg_lo = _mm_srli_epi16(_mm_adds_epu16(_mm_mullo_epi16(fg_lo, mlight_lo), fade_amount_lo), 8); \
fg_lo = _mm_srli_epi16(_mm_mullo_epi16(fg_lo, color), 8); \
\
fg = _mm_packus_epi16(fg_lo, fg_hi); \
}
#endif

367
src/r_draw_rgba_sse.h Normal file
View File

@ -0,0 +1,367 @@
//
// SSE/AVX intrinsics based drawers for the r_draw family of drawers.
//
// Note: This header file is intentionally not guarded by a __R_DRAW_RGBA_SSE__ define.
// It is because the code is nearly identical for SSE vs AVX. The file is included
// multiple times by r_draw_rgba.cpp with different defines that changes the class
// names outputted and the type of intrinsics used.
#ifdef _MSC_VER
#pragma warning(disable: 4752) // warning C4752: found Intel(R) Advanced Vector Extensions; consider using /arch:AVX
#endif
class VecCommand(DrawSpanRGBA) : public DrawerCommand
{
const uint32_t * RESTRICT _source;
fixed_t _xfrac;
fixed_t _yfrac;
fixed_t _xstep;
fixed_t _ystep;
int _x1;
int _x2;
int _y;
int _xbits;
int _ybits;
BYTE * RESTRICT _destorg;
fixed_t _light;
ShadeConstants _shade_constants;
bool _nearest_filter;
public:
VecCommand(DrawSpanRGBA)()
{
_source = (const uint32_t*)ds_source;
_xfrac = ds_xfrac;
_yfrac = ds_yfrac;
_xstep = ds_xstep;
_ystep = ds_ystep;
_x1 = ds_x1;
_x2 = ds_x2;
_y = ds_y;
_xbits = ds_xbits;
_ybits = ds_ybits;
_destorg = dc_destorg;
_light = ds_light;
_shade_constants = ds_shade_constants;
_nearest_filter = !SampleBgra::span_sampler_setup(_source, _xbits, _ybits, _xstep, _ystep, ds_source_mipmapped);
}
void Execute(DrawerThread *thread) override
{
if (thread->line_skipped_by_thread(_y))
return;
dsfixed_t xfrac;
dsfixed_t yfrac;
dsfixed_t xstep;
dsfixed_t ystep;
uint32_t* dest;
const uint32_t* source = _source;
int count;
int spot;
xfrac = _xfrac;
yfrac = _yfrac;
dest = ylookup[_y] + _x1 + (uint32_t*)_destorg;
count = _x2 - _x1 + 1;
xstep = _xstep;
ystep = _ystep;
uint32_t light = LightBgra::calc_light_multiplier(_light);
ShadeConstants shade_constants = _shade_constants;
if (_nearest_filter)
{
if (_xbits == 6 && _ybits == 6)
{
// 64x64 is the most common case by far, so special case it.
int sse_count = count / 4;
count -= sse_count * 4;
if (shade_constants.simple_shade)
{
VEC_SHADE_VARS();
VEC_SHADE_SIMPLE_INIT(light);
while (sse_count--)
{
// Current texture index in u,v.
spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6));
uint32_t p0 = source[spot];
xfrac += xstep;
yfrac += ystep;
spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6));
uint32_t p1 = source[spot];
xfrac += xstep;
yfrac += ystep;
spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6));
uint32_t p2 = source[spot];
xfrac += xstep;
yfrac += ystep;
spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6));
uint32_t p3 = source[spot];
xfrac += xstep;
yfrac += ystep;
// Lookup pixel from flat texture tile,
// re-index using light/colormap.
__m128i fg = _mm_set_epi32(p3, p2, p1, p0);
VEC_SHADE_SIMPLE(fg);
_mm_storeu_si128((__m128i*)dest, fg);
// Next step in u,v.
dest += 4;
}
}
else
{
VEC_SHADE_VARS();
VEC_SHADE_INIT(light, shade_constants);
while (sse_count--)
{
// Current texture index in u,v.
spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6));
uint32_t p0 = source[spot];
xfrac += xstep;
yfrac += ystep;
spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6));
uint32_t p1 = source[spot];
xfrac += xstep;
yfrac += ystep;
spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6));
uint32_t p2 = source[spot];
xfrac += xstep;
yfrac += ystep;
spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6));
uint32_t p3 = source[spot];
xfrac += xstep;
yfrac += ystep;
// Lookup pixel from flat texture tile,
// re-index using light/colormap.
__m128i fg = _mm_set_epi32(p3, p2, p1, p0);
VEC_SHADE(fg, shade_constants);
_mm_storeu_si128((__m128i*)dest, fg);
// Next step in u,v.
dest += 4;
}
}
if (count == 0)
return;
do
{
// Current texture index in u,v.
spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6));
// Lookup pixel from flat texture tile
*dest++ = LightBgra::shade_bgra(source[spot], light, shade_constants);
// Next step in u,v.
xfrac += xstep;
yfrac += ystep;
} while (--count);
}
else
{
BYTE yshift = 32 - _ybits;
BYTE xshift = yshift - _xbits;
int xmask = ((1 << _xbits) - 1) << _ybits;
int sse_count = count / 4;
count -= sse_count * 4;
if (shade_constants.simple_shade)
{
VEC_SHADE_VARS();
VEC_SHADE_SIMPLE_INIT(light);
while (sse_count--)
{
spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift);
uint32_t p0 = source[spot];
xfrac += xstep;
yfrac += ystep;
spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift);
uint32_t p1 = source[spot];
xfrac += xstep;
yfrac += ystep;
spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift);
uint32_t p2 = source[spot];
xfrac += xstep;
yfrac += ystep;
spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift);
uint32_t p3 = source[spot];
xfrac += xstep;
yfrac += ystep;
// Lookup pixel from flat texture tile
__m128i fg = _mm_set_epi32(p3, p2, p1, p0);
VEC_SHADE_SIMPLE(fg);
_mm_storeu_si128((__m128i*)dest, fg);
dest += 4;
}
}
else
{
VEC_SHADE_VARS();
VEC_SHADE_INIT(light, shade_constants);
while (sse_count--)
{
spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift);
uint32_t p0 = source[spot];
xfrac += xstep;
yfrac += ystep;
spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift);
uint32_t p1 = source[spot];
xfrac += xstep;
yfrac += ystep;
spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift);
uint32_t p2 = source[spot];
xfrac += xstep;
yfrac += ystep;
spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift);
uint32_t p3 = source[spot];
xfrac += xstep;
yfrac += ystep;
// Lookup pixel from flat texture tile
__m128i fg = _mm_set_epi32(p3, p2, p1, p0);
VEC_SHADE(fg, shade_constants);
_mm_storeu_si128((__m128i*)dest, fg);
dest += 4;
}
}
if (count == 0)
return;
do
{
// Current texture index in u,v.
spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift);
// Lookup pixel from flat texture tile
*dest++ = LightBgra::shade_bgra(source[spot], light, shade_constants);
// Next step in u,v.
xfrac += xstep;
yfrac += ystep;
} while (--count);
}
}
else
{
if (_xbits == 6 && _ybits == 6)
{
// 64x64 is the most common case by far, so special case it.
int sse_count = count / 4;
count -= sse_count * 4;
if (shade_constants.simple_shade)
{
VEC_SHADE_VARS();
VEC_SHADE_SIMPLE_INIT(light);
while (sse_count--)
{
__m128i fg;
VEC_SAMPLE_BILINEAR4_SPAN(fg, source, xfrac, yfrac, xstep, ystep, 26, 26);
VEC_SHADE_SIMPLE(fg);
_mm_storeu_si128((__m128i*)dest, fg);
dest += 4;
}
}
else
{
VEC_SHADE_VARS();
VEC_SHADE_INIT(light, shade_constants);
while (sse_count--)
{
__m128i fg;
VEC_SAMPLE_BILINEAR4_SPAN(fg, source, xfrac, yfrac, xstep, ystep, 26, 26);
VEC_SHADE(fg, shade_constants);
_mm_storeu_si128((__m128i*)dest, fg);
dest += 4;
}
}
if (count == 0)
return;
do
{
*dest++ = LightBgra::shade_bgra(SampleBgra::sample_bilinear(source, xfrac, yfrac, 26, 26), light, shade_constants);
xfrac += xstep;
yfrac += ystep;
} while (--count);
}
else
{
int sse_count = count / 4;
count -= sse_count * 4;
if (shade_constants.simple_shade)
{
VEC_SHADE_VARS();
VEC_SHADE_SIMPLE_INIT(light);
while (sse_count--)
{
__m128i fg;
int tmpx = 32 - _xbits;
int tmpy = 32 - _ybits;
VEC_SAMPLE_BILINEAR4_SPAN(fg, source, xfrac, yfrac, xstep, ystep, tmpx, tmpy);
VEC_SHADE_SIMPLE(fg);
_mm_storeu_si128((__m128i*)dest, fg);
dest += 4;
}
}
else
{
VEC_SHADE_VARS();
VEC_SHADE_INIT(light, shade_constants);
while (sse_count--)
{
__m128i fg;
int tmpx = 32 - _xbits;
int tmpy = 32 - _ybits;
VEC_SAMPLE_BILINEAR4_SPAN(fg, source, xfrac, yfrac, xstep, ystep, tmpx, tmpy);
VEC_SHADE(fg, shade_constants);
_mm_storeu_si128((__m128i*)dest, fg);
dest += 4;
}
}
if (count == 0)
return;
do
{
*dest++ = LightBgra::shade_bgra(SampleBgra::sample_bilinear(source, xfrac, yfrac, 32 - _xbits, 32 - _ybits), light, shade_constants);
xfrac += xstep;
yfrac += ystep;
} while (--count);
}
}
}
};

View File

@ -313,21 +313,21 @@ void rt_Translate4cols(const BYTE *translation, int yl, int yh)
}
// Translates one span at hx to the screen at sx.
void rt_tlate1col (int hx, int sx, int yl, int yh)
void rt_tlate1col_c (int hx, int sx, int yl, int yh)
{
rt_Translate1col(dc_translation, hx, yl, yh);
rt_map1col(hx, sx, yl, yh);
}
// Translates all four spans to the screen starting at sx.
void rt_tlate4cols (int sx, int yl, int yh)
void rt_tlate4cols_c (int sx, int yl, int yh)
{
rt_Translate4cols(dc_translation, yl, yh);
rt_map4cols(sx, yl, yh);
}
// Adds one span at hx to the screen at sx without clamping.
void rt_add1col (int hx, int sx, int yl, int yh)
void rt_add1col_c (int hx, int sx, int yl, int yh)
{
BYTE *colormap;
BYTE *source;
@ -417,21 +417,21 @@ void rt_add4cols_c (int sx, int yl, int yh)
}
// Translates and adds one span at hx to the screen at sx without clamping.
void rt_tlateadd1col (int hx, int sx, int yl, int yh)
void rt_tlateadd1col_c (int hx, int sx, int yl, int yh)
{
rt_Translate1col(dc_translation, hx, yl, yh);
rt_add1col(hx, sx, yl, yh);
}
// Translates and adds all four spans to the screen starting at sx without clamping.
void rt_tlateadd4cols (int sx, int yl, int yh)
void rt_tlateadd4cols_c (int sx, int yl, int yh)
{
rt_Translate4cols(dc_translation, yl, yh);
rt_add4cols(sx, yl, yh);
}
// Shades one span at hx to the screen at sx.
void rt_shaded1col (int hx, int sx, int yl, int yh)
void rt_shaded1col_c (int hx, int sx, int yl, int yh)
{
DWORD *fgstart;
BYTE *colormap;
@ -507,7 +507,7 @@ void rt_shaded4cols_c (int sx, int yl, int yh)
}
// Adds one span at hx to the screen at sx with clamping.
void rt_addclamp1col (int hx, int sx, int yl, int yh)
void rt_addclamp1col_c (int hx, int sx, int yl, int yh)
{
BYTE *colormap;
BYTE *source;
@ -556,13 +556,14 @@ void rt_addclamp4cols_c (int sx, int yl, int yh)
return;
count++;
DWORD *fg2rgb = dc_srcblend;
DWORD *bg2rgb = dc_destblend;
dest = ylookup[yl] + sx + dc_destorg;
source = &dc_temp[yl*4];
pitch = dc_pitch;
colormap = dc_colormap;
DWORD *fg2rgb = dc_srcblend;
DWORD *bg2rgb = dc_destblend;
do {
DWORD a = fg2rgb[colormap[source[0]]] + bg2rgb[dest[0]];
DWORD b = a;
@ -607,21 +608,21 @@ void rt_addclamp4cols_c (int sx, int yl, int yh)
}
// Translates and adds one span at hx to the screen at sx with clamping.
void rt_tlateaddclamp1col (int hx, int sx, int yl, int yh)
void rt_tlateaddclamp1col_c (int hx, int sx, int yl, int yh)
{
rt_Translate1col(dc_translation, hx, yl, yh);
rt_addclamp1col(hx, sx, yl, yh);
}
// Translates and adds all four spans to the screen starting at sx with clamping.
void rt_tlateaddclamp4cols (int sx, int yl, int yh)
void rt_tlateaddclamp4cols_c (int sx, int yl, int yh)
{
rt_Translate4cols(dc_translation, yl, yh);
rt_addclamp4cols(sx, yl, yh);
}
// Subtracts one span at hx to the screen at sx with clamping.
void rt_subclamp1col (int hx, int sx, int yl, int yh)
void rt_subclamp1col_c (int hx, int sx, int yl, int yh)
{
BYTE *colormap;
BYTE *source;
@ -656,7 +657,7 @@ void rt_subclamp1col (int hx, int sx, int yl, int yh)
}
// Subtracts all four spans to the screen starting at sx with clamping.
void rt_subclamp4cols (int sx, int yl, int yh)
void rt_subclamp4cols_c (int sx, int yl, int yh)
{
BYTE *colormap;
BYTE *source;
@ -716,21 +717,21 @@ void rt_subclamp4cols (int sx, int yl, int yh)
}
// Translates and subtracts one span at hx to the screen at sx with clamping.
void rt_tlatesubclamp1col (int hx, int sx, int yl, int yh)
void rt_tlatesubclamp1col_c (int hx, int sx, int yl, int yh)
{
rt_Translate1col(dc_translation, hx, yl, yh);
rt_subclamp1col(hx, sx, yl, yh);
}
// Translates and subtracts all four spans to the screen starting at sx with clamping.
void rt_tlatesubclamp4cols (int sx, int yl, int yh)
void rt_tlatesubclamp4cols_c (int sx, int yl, int yh)
{
rt_Translate4cols(dc_translation, yl, yh);
rt_subclamp4cols(sx, yl, yh);
}
// Subtracts one span at hx from the screen at sx with clamping.
void rt_revsubclamp1col (int hx, int sx, int yl, int yh)
void rt_revsubclamp1col_c (int hx, int sx, int yl, int yh)
{
BYTE *colormap;
BYTE *source;
@ -765,7 +766,7 @@ void rt_revsubclamp1col (int hx, int sx, int yl, int yh)
}
// Subtracts all four spans from the screen starting at sx with clamping.
void rt_revsubclamp4cols (int sx, int yl, int yh)
void rt_revsubclamp4cols_c (int sx, int yl, int yh)
{
BYTE *colormap;
BYTE *source;
@ -825,14 +826,14 @@ void rt_revsubclamp4cols (int sx, int yl, int yh)
}
// Translates and subtracts one span at hx from the screen at sx with clamping.
void rt_tlaterevsubclamp1col (int hx, int sx, int yl, int yh)
void rt_tlaterevsubclamp1col_c (int hx, int sx, int yl, int yh)
{
rt_Translate1col(dc_translation, hx, yl, yh);
rt_revsubclamp1col(hx, sx, yl, yh);
}
// Translates and subtracts all four spans from the screen starting at sx with clamping.
void rt_tlaterevsubclamp4cols (int sx, int yl, int yh)
void rt_tlaterevsubclamp4cols_c (int sx, int yl, int yh)
{
rt_Translate4cols(dc_translation, yl, yh);
rt_revsubclamp4cols(sx, yl, yh);
@ -855,18 +856,21 @@ void rt_draw4cols (int sx)
}
#ifdef X86_ASM
// Setup assembly routines for changed colormaps or other parameters.
if (hcolfunc_post4 == rt_shaded4cols)
if (!r_swtruecolor)
{
R_SetupShadedCol();
}
else if (hcolfunc_post4 == rt_addclamp4cols || hcolfunc_post4 == rt_tlateaddclamp4cols)
{
R_SetupAddClampCol();
}
else if (hcolfunc_post4 == rt_add4cols || hcolfunc_post4 == rt_tlateadd4cols)
{
R_SetupAddCol();
// Setup assembly routines for changed colormaps or other parameters.
if (hcolfunc_post4 == rt_shaded4cols)
{
R_SetupShadedCol();
}
else if (hcolfunc_post4 == rt_addclamp4cols || hcolfunc_post4 == rt_tlateaddclamp4cols)
{
R_SetupAddClampCol();
}
else if (hcolfunc_post4 == rt_add4cols || hcolfunc_post4 == rt_tlateadd4cols)
{
R_SetupAddCol();
}
}
#endif
@ -1002,7 +1006,7 @@ void rt_draw4cols (int sx)
// Before each pass through a rendering loop that uses these routines,
// call this function to set up the span pointers.
void rt_initcols (BYTE *buff)
void rt_initcols_pal (BYTE *buff)
{
int y;
@ -1011,6 +1015,14 @@ void rt_initcols (BYTE *buff)
horizspan[y] = dc_ctspan[y] = &dc_tspans[y][0];
}
void rt_span_coverage_pal(int x, int start, int stop)
{
unsigned int **tspan = &dc_ctspan[x & 3];
(*tspan)[0] = start;
(*tspan)[1] = stop;
*tspan += 2;
}
// Stretches a column into a temporary buffer which is later
// drawn to the screen along with up to three other columns.
void R_DrawColumnHorizP_C (void)
@ -1073,7 +1085,7 @@ void R_DrawColumnHorizP_C (void)
}
// [RH] Just fills a column with a given color
void R_FillColumnHorizP (void)
void R_FillColumnHorizP_C (void)
{
int count = dc_count;
BYTE color = dc_color;
@ -1108,6 +1120,7 @@ void R_FillColumnHorizP (void)
void R_DrawMaskedColumnHoriz (const BYTE *column, const FTexture::Span *span)
{
int pixelsize = r_swtruecolor ? 4 : 1;
const fixed_t texturemid = FLOAT2FIXED(dc_texturemid);
while (span->Length != 0)
{
@ -1177,7 +1190,7 @@ void R_DrawMaskedColumnHoriz (const BYTE *column, const FTexture::Span *span)
}
}
dc_source = column + top;
dc_dest = ylookup[dc_yl] + dc_x + dc_destorg;
dc_dest = (ylookup[dc_yl] + dc_x) * pixelsize + dc_destorg;
dc_count = dc_yh - dc_yl + 1;
hcolfunc_pre ();
}

995
src/r_drawt_rgba.cpp Normal file
View File

@ -0,0 +1,995 @@
/*
** r_drawt_rgba.cpp
** Faster column drawers for modern processors, true color edition
**
**---------------------------------------------------------------------------
** Copyright 1998-2006 Randy Heit
** All rights reserved.
**
** Redistribution and use in source and binary forms, with or without
** modification, are permitted provided that the following conditions
** are met:
**
** 1. Redistributions of source code must retain the above copyright
** notice, this list of conditions and the following disclaimer.
** 2. Redistributions in binary form must reproduce the above copyright
** notice, this list of conditions and the following disclaimer in the
** documentation and/or other materials provided with the distribution.
** 3. The name of the author may not be used to endorse or promote products
** derived from this software without specific prior written permission.
**
** THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
** IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
** OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
** IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
** INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
** NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
** DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
** THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
** (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
** THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
**---------------------------------------------------------------------------
**
** True color versions of the similar functions in r_drawt.cpp
** Please see r_drawt.cpp for a description of the globals used.
*/
#include "templates.h"
#include "doomtype.h"
#include "doomdef.h"
#include "r_defs.h"
#include "r_draw.h"
#include "r_main.h"
#include "r_things.h"
#include "v_video.h"
#include "r_draw_rgba.h"
#ifndef NO_SSE
#include <emmintrin.h>
#endif
extern unsigned int dc_tspans[4][MAXHEIGHT];
extern unsigned int *dc_ctspan[4];
extern unsigned int *horizspan[4];
#ifndef NO_SSE
#ifdef _MSC_VER
#pragma warning(disable: 4101) // warning C4101: unreferenced local variable
#endif
// Generate SSE drawers:
#define VecCommand(name) name##_SSE_Command
#define VEC_SHADE_VARS SSE_SHADE_VARS
#define VEC_SHADE_SIMPLE_INIT SSE_SHADE_SIMPLE_INIT
#define VEC_SHADE_SIMPLE_INIT4 SSE_SHADE_SIMPLE_INIT4
#define VEC_SHADE_SIMPLE SSE_SHADE_SIMPLE
#define VEC_SHADE_INIT SSE_SHADE_INIT
#define VEC_SHADE_INIT4 SSE_SHADE_INIT4
#define VEC_SHADE SSE_SHADE
#include "r_drawt_rgba_sse.h"
/*
// Generate AVX drawers:
#undef VecCommand
#undef VEC_SHADE_SIMPLE_INIT
#undef VEC_SHADE_SIMPLE_INIT4
#undef VEC_SHADE_SIMPLE
#undef VEC_SHADE_INIT
#undef VEC_SHADE_INIT4
#undef VEC_SHADE
#define VecCommand(name) name##_AVX_Command
#define VEC_SHADE_SIMPLE_INIT AVX_LINEAR_SHADE_SIMPLE_INIT
#define VEC_SHADE_SIMPLE_INIT4 AVX_LINEAR_SHADE_SIMPLE_INIT4
#define VEC_SHADE_SIMPLE AVX_LINEAR_SHADE_SIMPLE
#define VEC_SHADE_INIT AVX_LINEAR_SHADE_INIT
#define VEC_SHADE_INIT4 AVX_LINEAR_SHADE_INIT4
#define VEC_SHADE AVX_LINEAR_SHADE
#include "r_drawt_rgba_sse.h"
*/
#endif
/////////////////////////////////////////////////////////////////////////////
class DrawerRt1colCommand : public DrawerCommand
{
public:
int hx;
int sx;
int yl;
int yh;
BYTE * RESTRICT _destorg;
int _pitch;
uint32_t _light;
ShadeConstants _shade_constants;
BYTE * RESTRICT _colormap;
uint32_t _srcalpha;
uint32_t _destalpha;
DrawerRt1colCommand(int hx, int sx, int yl, int yh)
{
this->hx = hx;
this->sx = sx;
this->yl = yl;
this->yh = yh;
_destorg = dc_destorg;
_pitch = dc_pitch;
_light = LightBgra::calc_light_multiplier(dc_light);
_shade_constants = dc_shade_constants;
_colormap = dc_colormap;
_srcalpha = dc_srcalpha >> (FRACBITS - 8);
_destalpha = dc_destalpha >> (FRACBITS - 8);
}
class LoopIterator
{
public:
uint32_t *source;
uint32_t *dest;
int count;
int pitch, sincr;
LoopIterator(DrawerRt1colCommand *command, DrawerThread *thread)
{
count = thread->count_for_thread(command->yl, (command->yh - command->yl + 1));
if (count <= 0)
return;
dest = thread->dest_for_thread(command->yl, command->_pitch, ylookup[command->yl] + command->sx + (uint32_t*)command->_destorg);
source = &thread->dc_temp_rgba[command->yl * 4 + command->hx] + thread->skipped_by_thread(command->yl) * 4;
pitch = command->_pitch * thread->num_cores;
sincr = thread->num_cores * 4;
}
explicit operator bool()
{
return count > 0;
}
bool next()
{
dest += pitch;
source += sincr;
return (--count) != 0;
}
};
};
class DrawerRt4colsCommand : public DrawerCommand
{
public:
int sx;
int yl;
int yh;
uint32_t _light;
ShadeConstants _shade_constants;
BYTE * RESTRICT _destorg;
int _pitch;
BYTE * RESTRICT _colormap;
uint32_t _srcalpha;
uint32_t _destalpha;
DrawerRt4colsCommand(int sx, int yl, int yh)
{
this->sx = sx;
this->yl = yl;
this->yh = yh;
_light = LightBgra::calc_light_multiplier(dc_light);
_shade_constants = dc_shade_constants;
_destorg = dc_destorg;
_pitch = dc_pitch;
_colormap = dc_colormap;
_srcalpha = dc_srcalpha >> (FRACBITS - 8);
_destalpha = dc_destalpha >> (FRACBITS - 8);
}
class LoopIterator
{
public:
uint32_t *source;
uint32_t *dest;
int count;
int pitch;
int sincr;
LoopIterator(DrawerRt4colsCommand *command, DrawerThread *thread)
{
count = thread->count_for_thread(command->yl, command->yh - command->yl + 1);
if (count <= 0)
return;
dest = thread->dest_for_thread(command->yl, command->_pitch, ylookup[command->yl] + command->sx + (uint32_t*)command->_destorg);
source = &thread->dc_temp_rgba[command->yl * 4] + thread->skipped_by_thread(command->yl) * 4;
pitch = command->_pitch * thread->num_cores;
sincr = thread->num_cores * 4;
}
explicit operator bool()
{
return count > 0;
}
bool next()
{
dest += pitch;
source += sincr;
return (--count) != 0;
}
};
};
class RtCopy1colRGBACommand : public DrawerRt1colCommand
{
public:
RtCopy1colRGBACommand(int hx, int sx, int yl, int yh) : DrawerRt1colCommand(hx, sx, yl, yh)
{
}
void Execute(DrawerThread *thread) override
{
LoopIterator loop(this, thread);
if (!loop) return;
do
{
uint32_t fg = GPalette.BaseColors[*loop.source];
*loop.dest = BlendBgra::copy(fg);
} while (loop.next());
}
};
class RtMap1colRGBACommand : public DrawerRt1colCommand
{
public:
RtMap1colRGBACommand(int hx, int sx, int yl, int yh) : DrawerRt1colCommand(hx, sx, yl, yh)
{
}
void Execute(DrawerThread *thread) override
{
LoopIterator loop(this, thread);
if (!loop) return;
do
{
uint32_t fg = LightBgra::shade_pal_index(_colormap[*loop.source], _light, _shade_constants);
*loop.dest = BlendBgra::copy(fg);
} while (loop.next());
}
};
class RtMap4colsRGBACommand : public DrawerRt4colsCommand
{
public:
RtMap4colsRGBACommand(int sx, int yl, int yh) : DrawerRt4colsCommand(sx, yl, yh)
{
}
void Execute(DrawerThread *thread) override
{
LoopIterator loop(this, thread);
if (!loop) return;
do
{
for (int i = 0; i < 4; i++)
{
uint32_t fg = LightBgra::shade_pal_index(_colormap[loop.source[i]], _light, _shade_constants);
loop.dest[i] = BlendBgra::copy(fg);
}
} while (loop.next());
}
};
class RtAdd1colRGBACommand : public DrawerRt1colCommand
{
public:
RtAdd1colRGBACommand(int hx, int sx, int yl, int yh) : DrawerRt1colCommand(hx, sx, yl, yh)
{
}
void Execute(DrawerThread *thread) override
{
LoopIterator loop(this, thread);
if (!loop) return;
do
{
uint32_t fg = LightBgra::shade_pal_index(_colormap[*loop.source], _light, _shade_constants);
*loop.dest = BlendBgra::add(fg, *loop.dest, _srcalpha, _destalpha);
} while (loop.next());
}
};
class RtAdd4colsRGBACommand : public DrawerRt4colsCommand
{
public:
RtAdd4colsRGBACommand(int sx, int yl, int yh) : DrawerRt4colsCommand(sx, yl, yh)
{
}
void Execute(DrawerThread *thread) override
{
LoopIterator loop(this, thread);
if (!loop) return;
do
{
for (int i = 0; i < 4; i++)
{
uint32_t fg = LightBgra::shade_pal_index(_colormap[loop.source[i]], _light, _shade_constants);
loop.dest[i] = BlendBgra::add(fg, loop.dest[i], _srcalpha, _destalpha);
}
} while (loop.next());
}
};
class RtShaded1colRGBACommand : public DrawerRt1colCommand
{
uint32_t _color;
public:
RtShaded1colRGBACommand(int hx, int sx, int yl, int yh) : DrawerRt1colCommand(hx, sx, yl, yh)
{
_color = LightBgra::shade_pal_index(dc_color, _light, _shade_constants);
}
void Execute(DrawerThread *thread) override
{
LoopIterator loop(this, thread);
if (!loop) return;
do
{
uint32_t alpha = _colormap[*loop.source] * 4;
uint32_t inv_alpha = 256 - alpha;
*loop.dest = BlendBgra::add(_color, *loop.dest, alpha, inv_alpha);
} while (loop.next());
}
};
class RtShaded4colsRGBACommand : public DrawerRt4colsCommand
{
uint32_t _color;
public:
RtShaded4colsRGBACommand(int sx, int yl, int yh) : DrawerRt4colsCommand(sx, yl, yh)
{
_color = LightBgra::shade_pal_index(dc_color, _light, _shade_constants);
}
void Execute(DrawerThread *thread) override
{
LoopIterator loop(this, thread);
if (!loop) return;
do
{
for (int i = 0; i < 4; i++)
{
uint32_t alpha = _colormap[loop.source[i]] * 4;
uint32_t inv_alpha = 256 - alpha;
loop.dest[i] = BlendBgra::add(_color, loop.dest[i], alpha, inv_alpha);
}
} while (loop.next());
}
};
class RtAddClamp1colRGBACommand : public DrawerRt1colCommand
{
public:
RtAddClamp1colRGBACommand(int hx, int sx, int yl, int yh) : DrawerRt1colCommand(hx, sx, yl, yh)
{
}
void Execute(DrawerThread *thread) override
{
LoopIterator loop(this, thread);
if (!loop) return;
do
{
uint32_t fg = LightBgra::shade_pal_index(*loop.source, _light, _shade_constants);
*loop.dest = BlendBgra::add(fg, *loop.dest, _srcalpha, _destalpha);
} while (loop.next());
}
};
class RtAddClamp4colsRGBACommand : public DrawerRt4colsCommand
{
public:
RtAddClamp4colsRGBACommand(int sx, int yl, int yh) : DrawerRt4colsCommand(sx, yl, yh)
{
}
void Execute(DrawerThread *thread) override
{
LoopIterator loop(this, thread);
if (!loop) return;
do
{
for (int i = 0; i < 4; i++)
{
uint32_t fg = LightBgra::shade_pal_index(loop.source[i], _light, _shade_constants);
loop.dest[i] = BlendBgra::add(fg, loop.dest[i], _srcalpha, _destalpha);
}
} while (loop.next());
}
};
class RtSubClamp1colRGBACommand : public DrawerRt1colCommand
{
public:
RtSubClamp1colRGBACommand(int hx, int sx, int yl, int yh) : DrawerRt1colCommand(hx, sx, yl, yh)
{
}
void Execute(DrawerThread *thread) override
{
LoopIterator loop(this, thread);
if (!loop) return;
do
{
uint32_t fg = LightBgra::shade_pal_index(*loop.source, _light, _shade_constants);
*loop.dest = BlendBgra::sub(fg, *loop.dest, _srcalpha, _destalpha);
} while (loop.next());
}
};
class RtSubClamp4colsRGBACommand : public DrawerRt4colsCommand
{
public:
RtSubClamp4colsRGBACommand(int sx, int yl, int yh) : DrawerRt4colsCommand(sx, yl, yh)
{
}
void Execute(DrawerThread *thread) override
{
LoopIterator loop(this, thread);
if (!loop) return;
do
{
for (int i = 0; i < 4; i++)
{
uint32_t fg = LightBgra::shade_pal_index(loop.source[i], _light, _shade_constants);
loop.dest[i] = BlendBgra::sub(fg, loop.dest[i], _srcalpha, _destalpha);
}
} while (loop.next());
}
};
class RtRevSubClamp1colRGBACommand : public DrawerRt1colCommand
{
public:
RtRevSubClamp1colRGBACommand(int hx, int sx, int yl, int yh) : DrawerRt1colCommand(hx, sx, yl, yh)
{
}
void Execute(DrawerThread *thread) override
{
LoopIterator loop(this, thread);
if (!loop) return;
do
{
uint32_t fg = LightBgra::shade_pal_index(*loop.source, _light, _shade_constants);
*loop.dest = BlendBgra::revsub(fg, *loop.dest, _srcalpha, _destalpha);
} while (loop.next());
}
};
class RtRevSubClamp4colsRGBACommand : public DrawerRt4colsCommand
{
public:
RtRevSubClamp4colsRGBACommand(int sx, int yl, int yh) : DrawerRt4colsCommand(sx, yl, yh)
{
}
void Execute(DrawerThread *thread) override
{
LoopIterator loop(this, thread);
if (!loop) return;
do
{
for (int i = 0; i < 4; i++)
{
uint32_t fg = LightBgra::shade_pal_index(loop.source[i], _light, _shade_constants);
loop.dest[i] = BlendBgra::revsub(fg, loop.dest[i], _srcalpha, _destalpha);
}
} while (loop.next());
}
};
class RtTranslate1colRGBACommand : public DrawerCommand
{
const BYTE * RESTRICT translation;
int hx;
int yl;
int yh;
public:
RtTranslate1colRGBACommand(const BYTE *translation, int hx, int yl, int yh)
{
this->translation = translation;
this->hx = hx;
this->yl = yl;
this->yh = yh;
}
void Execute(DrawerThread *thread) override
{
int count = yh - yl + 1;
uint32_t *source = &thread->dc_temp_rgba[yl*4 + hx];
// Things we do to hit the compiler's optimizer with a clue bat:
// 1. Parallelism is explicitly spelled out by using a separate
// C instruction for each assembly instruction. GCC lets me
// have four temporaries, but VC++ spills to the stack with
// more than two. Two is probably optimal, anyway.
// 2. The results of the translation lookups are explicitly
// stored in byte-sized variables. This causes the VC++ code
// to use byte mov instructions in most cases; for apparently
// random reasons, it will use movzx for some places. GCC
// ignores this and uses movzx always.
// Do 8 rows at a time.
for (int count8 = count >> 3; count8; --count8)
{
int c0, c1;
BYTE b0, b1;
c0 = source[0]; c1 = source[4];
b0 = translation[c0]; b1 = translation[c1];
source[0] = b0; source[4] = b1;
c0 = source[8]; c1 = source[12];
b0 = translation[c0]; b1 = translation[c1];
source[8] = b0; source[12] = b1;
c0 = source[16]; c1 = source[20];
b0 = translation[c0]; b1 = translation[c1];
source[16] = b0; source[20] = b1;
c0 = source[24]; c1 = source[28];
b0 = translation[c0]; b1 = translation[c1];
source[24] = b0; source[28] = b1;
source += 32;
}
// Finish by doing 1 row at a time.
for (count &= 7; count; --count, source += 4)
{
source[0] = translation[source[0]];
}
}
};
class RtTranslate4colsRGBACommand : public DrawerCommand
{
const BYTE * RESTRICT translation;
int yl;
int yh;
public:
RtTranslate4colsRGBACommand(const BYTE *translation, int yl, int yh)
{
this->translation = translation;
this->yl = yl;
this->yh = yh;
}
void Execute(DrawerThread *thread) override
{
int count = yh - yl + 1;
uint32_t *source = &thread->dc_temp_rgba[yl*4];
int c0, c1;
BYTE b0, b1;
// Do 2 rows at a time.
for (int count8 = count >> 1; count8; --count8)
{
c0 = source[0]; c1 = source[1];
b0 = translation[c0]; b1 = translation[c1];
source[0] = b0; source[1] = b1;
c0 = source[2]; c1 = source[3];
b0 = translation[c0]; b1 = translation[c1];
source[2] = b0; source[3] = b1;
c0 = source[4]; c1 = source[5];
b0 = translation[c0]; b1 = translation[c1];
source[4] = b0; source[5] = b1;
c0 = source[6]; c1 = source[7];
b0 = translation[c0]; b1 = translation[c1];
source[6] = b0; source[7] = b1;
source += 8;
}
// Do the final row if count was odd.
if (count & 1)
{
c0 = source[0]; c1 = source[1];
b0 = translation[c0]; b1 = translation[c1];
source[0] = b0; source[1] = b1;
c0 = source[2]; c1 = source[3];
b0 = translation[c0]; b1 = translation[c1];
source[2] = b0; source[3] = b1;
}
}
};
class RtInitColsRGBACommand : public DrawerCommand
{
BYTE * RESTRICT buff;
public:
RtInitColsRGBACommand(BYTE *buff)
{
this->buff = buff;
}
void Execute(DrawerThread *thread) override
{
thread->dc_temp_rgba = buff == NULL ? thread->dc_temp_rgbabuff_rgba : (uint32_t*)buff;
}
};
class DrawColumnHorizRGBACommand : public DrawerCommand
{
int _count;
fixed_t _iscale;
fixed_t _texturefrac;
const BYTE * RESTRICT _source;
int _x;
int _yl;
int _yh;
public:
DrawColumnHorizRGBACommand()
{
_count = dc_count;
_iscale = dc_iscale;
_texturefrac = dc_texturefrac;
_source = dc_source;
_x = dc_x;
_yl = dc_yl;
_yh = dc_yh;
}
void Execute(DrawerThread *thread) override
{
int count = _count;
uint32_t *dest;
fixed_t fracstep;
fixed_t frac;
if (count <= 0)
return;
{
int x = _x & 3;
dest = &thread->dc_temp_rgba[x + 4 * _yl];
}
fracstep = _iscale;
frac = _texturefrac;
const BYTE *source = _source;
if (count & 1) {
*dest = source[frac >> FRACBITS]; dest += 4; frac += fracstep;
}
if (count & 2) {
dest[0] = source[frac >> FRACBITS]; frac += fracstep;
dest[4] = source[frac >> FRACBITS]; frac += fracstep;
dest += 8;
}
if (count & 4) {
dest[0] = source[frac >> FRACBITS]; frac += fracstep;
dest[4] = source[frac >> FRACBITS]; frac += fracstep;
dest[8] = source[frac >> FRACBITS]; frac += fracstep;
dest[12] = source[frac >> FRACBITS]; frac += fracstep;
dest += 16;
}
count >>= 3;
if (!count) return;
do
{
dest[0] = source[frac >> FRACBITS]; frac += fracstep;
dest[4] = source[frac >> FRACBITS]; frac += fracstep;
dest[8] = source[frac >> FRACBITS]; frac += fracstep;
dest[12] = source[frac >> FRACBITS]; frac += fracstep;
dest[16] = source[frac >> FRACBITS]; frac += fracstep;
dest[20] = source[frac >> FRACBITS]; frac += fracstep;
dest[24] = source[frac >> FRACBITS]; frac += fracstep;
dest[28] = source[frac >> FRACBITS]; frac += fracstep;
dest += 32;
} while (--count);
}
};
class FillColumnHorizRGBACommand : public DrawerCommand
{
int _x;
int _yl;
int _yh;
int _count;
int _color;
public:
FillColumnHorizRGBACommand()
{
_x = dc_x;
_count = dc_count;
_color = dc_color;
_yl = dc_yl;
_yh = dc_yh;
}
void Execute(DrawerThread *thread) override
{
int count = _count;
int color = _color;
uint32_t *dest;
if (count <= 0)
return;
{
int x = _x & 3;
dest = &thread->dc_temp_rgba[x + 4 * _yl];
}
if (count & 1) {
*dest = color;
dest += 4;
}
if (!(count >>= 1))
return;
do {
dest[0] = color; dest[4] = color;
dest += 8;
} while (--count);
}
};
/////////////////////////////////////////////////////////////////////////////
// Copies one span at hx to the screen at sx.
void rt_copy1col_rgba (int hx, int sx, int yl, int yh)
{
DrawerCommandQueue::QueueCommand<RtCopy1colRGBACommand>(hx, sx, yl, yh);
}
// Copies all four spans to the screen starting at sx.
void rt_copy4cols_rgba (int sx, int yl, int yh)
{
// To do: we could do this with SSE using __m128i
rt_copy1col_rgba(0, sx, yl, yh);
rt_copy1col_rgba(1, sx + 1, yl, yh);
rt_copy1col_rgba(2, sx + 2, yl, yh);
rt_copy1col_rgba(3, sx + 3, yl, yh);
}
// Maps one span at hx to the screen at sx.
void rt_map1col_rgba (int hx, int sx, int yl, int yh)
{
DrawerCommandQueue::QueueCommand<RtMap1colRGBACommand>(hx, sx, yl, yh);
}
// Maps all four spans to the screen starting at sx.
void rt_map4cols_rgba (int sx, int yl, int yh)
{
#ifdef NO_SSE
DrawerCommandQueue::QueueCommand<RtMap4colsRGBACommand>(sx, yl, yh);
#else
DrawerCommandQueue::QueueCommand<RtMap4colsRGBA_SSE_Command>(sx, yl, yh);
#endif
}
void rt_Translate1col_rgba(const BYTE *translation, int hx, int yl, int yh)
{
DrawerCommandQueue::QueueCommand<RtTranslate1colRGBACommand>(translation, hx, yl, yh);
}
void rt_Translate4cols_rgba(const BYTE *translation, int yl, int yh)
{
DrawerCommandQueue::QueueCommand<RtTranslate4colsRGBACommand>(translation, yl, yh);
}
// Translates one span at hx to the screen at sx.
void rt_tlate1col_rgba (int hx, int sx, int yl, int yh)
{
rt_Translate1col_rgba(dc_translation, hx, yl, yh);
rt_map1col(hx, sx, yl, yh);
}
// Translates all four spans to the screen starting at sx.
void rt_tlate4cols_rgba (int sx, int yl, int yh)
{
rt_Translate4cols_rgba(dc_translation, yl, yh);
rt_map4cols(sx, yl, yh);
}
// Adds one span at hx to the screen at sx without clamping.
void rt_add1col_rgba (int hx, int sx, int yl, int yh)
{
DrawerCommandQueue::QueueCommand<RtAdd1colRGBACommand>(hx, sx, yl, yh);
}
// Adds all four spans to the screen starting at sx without clamping.
void rt_add4cols_rgba (int sx, int yl, int yh)
{
#ifdef NO_SSE
DrawerCommandQueue::QueueCommand<RtAdd4colsRGBACommand>(sx, yl, yh);
#else
DrawerCommandQueue::QueueCommand<RtAdd4colsRGBA_SSE_Command>(sx, yl, yh);
#endif
}
// Translates and adds one span at hx to the screen at sx without clamping.
void rt_tlateadd1col_rgba (int hx, int sx, int yl, int yh)
{
rt_Translate1col_rgba(dc_translation, hx, yl, yh);
rt_add1col(hx, sx, yl, yh);
}
// Translates and adds all four spans to the screen starting at sx without clamping.
void rt_tlateadd4cols_rgba(int sx, int yl, int yh)
{
rt_Translate4cols_rgba(dc_translation, yl, yh);
rt_add4cols(sx, yl, yh);
}
// Shades one span at hx to the screen at sx.
void rt_shaded1col_rgba (int hx, int sx, int yl, int yh)
{
DrawerCommandQueue::QueueCommand<RtShaded1colRGBACommand>(hx, sx, yl, yh);
}
// Shades all four spans to the screen starting at sx.
void rt_shaded4cols_rgba (int sx, int yl, int yh)
{
#ifdef NO_SSE
DrawerCommandQueue::QueueCommand<RtShaded4colsRGBACommand>(sx, yl, yh);
#else
DrawerCommandQueue::QueueCommand<RtShaded4colsRGBA_SSE_Command>(sx, yl, yh);
#endif
}
// Adds one span at hx to the screen at sx with clamping.
void rt_addclamp1col_rgba (int hx, int sx, int yl, int yh)
{
DrawerCommandQueue::QueueCommand<RtAddClamp1colRGBACommand>(hx, sx, yl, yh);
}
// Adds all four spans to the screen starting at sx with clamping.
void rt_addclamp4cols_rgba (int sx, int yl, int yh)
{
#ifdef NO_SSE
DrawerCommandQueue::QueueCommand<RtAddClamp4colsRGBACommand>(sx, yl, yh);
#else
DrawerCommandQueue::QueueCommand<RtAddClamp4colsRGBA_SSE_Command>(sx, yl, yh);
#endif
}
// Translates and adds one span at hx to the screen at sx with clamping.
void rt_tlateaddclamp1col_rgba (int hx, int sx, int yl, int yh)
{
rt_Translate1col_rgba(dc_translation, hx, yl, yh);
rt_addclamp1col_rgba(hx, sx, yl, yh);
}
// Translates and adds all four spans to the screen starting at sx with clamping.
void rt_tlateaddclamp4cols_rgba (int sx, int yl, int yh)
{
rt_Translate4cols_rgba(dc_translation, yl, yh);
rt_addclamp4cols(sx, yl, yh);
}
// Subtracts one span at hx to the screen at sx with clamping.
void rt_subclamp1col_rgba (int hx, int sx, int yl, int yh)
{
DrawerCommandQueue::QueueCommand<RtSubClamp1colRGBACommand>(hx, sx, yl, yh);
}
// Subtracts all four spans to the screen starting at sx with clamping.
void rt_subclamp4cols_rgba (int sx, int yl, int yh)
{
#ifdef NO_SSE
DrawerCommandQueue::QueueCommand<RtSubClamp4colsRGBACommand>(sx, yl, yh);
#else
DrawerCommandQueue::QueueCommand<RtSubClamp4colsRGBA_SSE_Command>(sx, yl, yh);
#endif
}
// Translates and subtracts one span at hx to the screen at sx with clamping.
void rt_tlatesubclamp1col_rgba (int hx, int sx, int yl, int yh)
{
rt_Translate1col_rgba(dc_translation, hx, yl, yh);
rt_subclamp1col_rgba(hx, sx, yl, yh);
}
// Translates and subtracts all four spans to the screen starting at sx with clamping.
void rt_tlatesubclamp4cols_rgba (int sx, int yl, int yh)
{
rt_Translate4cols_rgba(dc_translation, yl, yh);
rt_subclamp4cols_rgba(sx, yl, yh);
}
// Subtracts one span at hx from the screen at sx with clamping.
void rt_revsubclamp1col_rgba (int hx, int sx, int yl, int yh)
{
DrawerCommandQueue::QueueCommand<RtRevSubClamp1colRGBACommand>(hx, sx, yl, yh);
}
// Subtracts all four spans from the screen starting at sx with clamping.
void rt_revsubclamp4cols_rgba (int sx, int yl, int yh)
{
#ifdef NO_SSE
DrawerCommandQueue::QueueCommand<RtRevSubClamp4colsRGBACommand>(sx, yl, yh);
#else
DrawerCommandQueue::QueueCommand<RtRevSubClamp4colsRGBA_SSE_Command>(sx, yl, yh);
#endif
}
// Translates and subtracts one span at hx from the screen at sx with clamping.
void rt_tlaterevsubclamp1col_rgba (int hx, int sx, int yl, int yh)
{
rt_Translate1col_rgba(dc_translation, hx, yl, yh);
rt_revsubclamp1col_rgba(hx, sx, yl, yh);
}
// Translates and subtracts all four spans from the screen starting at sx with clamping.
void rt_tlaterevsubclamp4cols_rgba (int sx, int yl, int yh)
{
rt_Translate4cols_rgba(dc_translation, yl, yh);
rt_revsubclamp4cols_rgba(sx, yl, yh);
}
// Before each pass through a rendering loop that uses these routines,
// call this function to set up the span pointers.
void rt_initcols_rgba (BYTE *buff)
{
for (int y = 3; y >= 0; y--)
horizspan[y] = dc_ctspan[y] = &dc_tspans[y][0];
DrawerCommandQueue::QueueCommand<RtInitColsRGBACommand>(buff);
}
void rt_span_coverage_rgba(int x, int start, int stop)
{
unsigned int **tspan = &dc_ctspan[x & 3];
(*tspan)[0] = start;
(*tspan)[1] = stop;
*tspan += 2;
}
// Stretches a column into a temporary buffer which is later
// drawn to the screen along with up to three other columns.
void R_DrawColumnHoriz_rgba (void)
{
if (dc_count <= 0)
return;
int x = dc_x & 3;
unsigned int **span = &dc_ctspan[x];
(*span)[0] = dc_yl;
(*span)[1] = dc_yh;
*span += 2;
DrawerCommandQueue::QueueCommand<DrawColumnHorizRGBACommand>();
}
// [RH] Just fills a column with a given color
void R_FillColumnHoriz_rgba (void)
{
if (dc_count <= 0)
return;
int x = dc_x & 3;
unsigned int **span = &dc_ctspan[x];
(*span)[0] = dc_yl;
(*span)[1] = dc_yh;
*span += 2;
DrawerCommandQueue::QueueCommand<FillColumnHorizRGBACommand>();
}

757
src/r_drawt_rgba_sse.h Normal file
View File

@ -0,0 +1,757 @@
//
// SSE/AVX intrinsics based drawers for the r_drawt family of drawers.
//
// Note: This header file is intentionally not guarded by a __R_DRAWT_RGBA_SSE__ define.
// It is because the code is nearly identical for SSE vs AVX. The file is included
// multiple times by r_drawt_rgba.cpp with different defines that changes the class
// names outputted and the type of intrinsics used.
#ifdef _MSC_VER
#pragma warning(disable: 4752) // warning C4752: found Intel(R) Advanced Vector Extensions; consider using /arch:AVX
#endif
class VecCommand(RtMap4colsRGBA) : public DrawerCommand
{
int sx;
int yl;
int yh;
fixed_t _light;
ShadeConstants _shade_constants;
BYTE * RESTRICT _destorg;
int _pitch;
BYTE * RESTRICT _colormap;
public:
VecCommand(RtMap4colsRGBA)(int sx, int yl, int yh)
{
this->sx = sx;
this->yl = yl;
this->yh = yh;
_light = dc_light;
_shade_constants = dc_shade_constants;
_destorg = dc_destorg;
_pitch = dc_pitch;
_colormap = dc_colormap;
}
void Execute(DrawerThread *thread) override
{
uint32_t *source;
uint32_t *dest;
int count;
int pitch;
int sincr;
count = thread->count_for_thread(yl, yh - yl + 1);
if (count <= 0)
return;
ShadeConstants shade_constants = _shade_constants;
uint32_t light = LightBgra::calc_light_multiplier(_light);
uint32_t *palette = (uint32_t*)GPalette.BaseColors;
dest = thread->dest_for_thread(yl, _pitch, ylookup[yl] + sx + (uint32_t*)_destorg);
source = &thread->dc_temp_rgba[yl * 4] + thread->skipped_by_thread(yl) * 4;
pitch = _pitch * thread->num_cores;
sincr = thread->num_cores * 4;
BYTE *colormap = _colormap;
if (shade_constants.simple_shade)
{
VEC_SHADE_VARS();
VEC_SHADE_SIMPLE_INIT(light);
if (count & 1) {
uint32_t p0 = colormap[source[0]];
uint32_t p1 = colormap[source[1]];
uint32_t p2 = colormap[source[2]];
uint32_t p3 = colormap[source[3]];
// shade_pal_index:
__m128i fg = _mm_set_epi32(palette[p3], palette[p2], palette[p1], palette[p0]);
VEC_SHADE_SIMPLE(fg);
_mm_storeu_si128((__m128i*)dest, fg);
source += sincr;
dest += pitch;
}
if (!(count >>= 1))
return;
do {
// shade_pal_index 0-3
{
uint32_t p0 = colormap[source[0]];
uint32_t p1 = colormap[source[1]];
uint32_t p2 = colormap[source[2]];
uint32_t p3 = colormap[source[3]];
__m128i fg = _mm_set_epi32(palette[p3], palette[p2], palette[p1], palette[p0]);
VEC_SHADE_SIMPLE(fg);
_mm_storeu_si128((__m128i*)dest, fg);
}
// shade_pal_index 4-7 (pitch)
{
uint32_t p0 = colormap[source[sincr]];
uint32_t p1 = colormap[source[sincr + 1]];
uint32_t p2 = colormap[source[sincr + 2]];
uint32_t p3 = colormap[source[sincr + 3]];
__m128i fg = _mm_set_epi32(palette[p3], palette[p2], palette[p1], palette[p0]);
VEC_SHADE_SIMPLE(fg);
_mm_storeu_si128((__m128i*)(dest + pitch), fg);
}
source += sincr * 2;
dest += pitch * 2;
} while (--count);
}
else
{
VEC_SHADE_VARS();
VEC_SHADE_INIT(light, shade_constants);
if (count & 1) {
uint32_t p0 = colormap[source[0]];
uint32_t p1 = colormap[source[1]];
uint32_t p2 = colormap[source[2]];
uint32_t p3 = colormap[source[3]];
// shade_pal_index:
__m128i fg = _mm_set_epi32(palette[p3], palette[p2], palette[p1], palette[p0]);
VEC_SHADE(fg, shade_constants);
_mm_storeu_si128((__m128i*)dest, fg);
source += sincr;
dest += pitch;
}
if (!(count >>= 1))
return;
do {
// shade_pal_index 0-3
{
uint32_t p0 = colormap[source[0]];
uint32_t p1 = colormap[source[1]];
uint32_t p2 = colormap[source[2]];
uint32_t p3 = colormap[source[3]];
__m128i fg = _mm_set_epi32(palette[p3], palette[p2], palette[p1], palette[p0]);
VEC_SHADE(fg, shade_constants);
_mm_storeu_si128((__m128i*)dest, fg);
}
// shade_pal_index 4-7 (pitch)
{
uint32_t p0 = colormap[source[sincr]];
uint32_t p1 = colormap[source[sincr + 1]];
uint32_t p2 = colormap[source[sincr + 2]];
uint32_t p3 = colormap[source[sincr + 3]];
__m128i fg = _mm_set_epi32(palette[p3], palette[p2], palette[p1], palette[p0]);
VEC_SHADE(fg, shade_constants);
_mm_storeu_si128((__m128i*)(dest + pitch), fg);
}
source += sincr * 2;
dest += pitch * 2;
} while (--count);
}
}
};
class VecCommand(RtAdd4colsRGBA) : public DrawerCommand
{
int sx;
int yl;
int yh;
BYTE * RESTRICT _destorg;
int _pitch;
fixed_t _light;
ShadeConstants _shade_constants;
BYTE * RESTRICT _colormap;
fixed_t _srcalpha;
fixed_t _destalpha;
public:
VecCommand(RtAdd4colsRGBA)(int sx, int yl, int yh)
{
this->sx = sx;
this->yl = yl;
this->yh = yh;
_destorg = dc_destorg;
_pitch = dc_pitch;
_light = dc_light;
_shade_constants = dc_shade_constants;
_colormap = dc_colormap;
_srcalpha = dc_srcalpha;
_destalpha = dc_destalpha;
}
void Execute(DrawerThread *thread) override
{
uint32_t *source;
uint32_t *dest;
int count;
int pitch;
int sincr;
count = thread->count_for_thread(yl, yh - yl + 1);
if (count <= 0)
return;
dest = thread->dest_for_thread(yl, _pitch, ylookup[yl] + sx + (uint32_t*)_destorg);
source = &thread->dc_temp_rgba[yl * 4] + thread->skipped_by_thread(yl) * 4;
pitch = _pitch * thread->num_cores;
sincr = 4 * thread->num_cores;
uint32_t light = LightBgra::calc_light_multiplier(_light);
uint32_t *palette = (uint32_t*)GPalette.BaseColors;
BYTE *colormap = _colormap;
uint32_t fg_alpha = _srcalpha >> (FRACBITS - 8);
uint32_t bg_alpha = _destalpha >> (FRACBITS - 8);
ShadeConstants shade_constants = _shade_constants;
if (shade_constants.simple_shade)
{
VEC_SHADE_VARS();
VEC_SHADE_SIMPLE_INIT(light);
__m128i mfg_alpha = _mm_set_epi16(256, fg_alpha, fg_alpha, fg_alpha, 256, fg_alpha, fg_alpha, fg_alpha);
__m128i mbg_alpha = _mm_set_epi16(256, bg_alpha, bg_alpha, bg_alpha, 256, bg_alpha, bg_alpha, bg_alpha);
do {
uint32_t p0 = colormap[source[0]];
uint32_t p1 = colormap[source[1]];
uint32_t p2 = colormap[source[2]];
uint32_t p3 = colormap[source[3]];
// shade_pal_index:
__m128i fg = _mm_set_epi32(palette[p3], palette[p2], palette[p1], palette[p0]);
VEC_SHADE_SIMPLE(fg);
__m128i fg_hi = _mm_unpackhi_epi8(fg, _mm_setzero_si128());
__m128i fg_lo = _mm_unpacklo_epi8(fg, _mm_setzero_si128());
// unpack bg:
__m128i bg = _mm_loadu_si128((const __m128i*)dest);
__m128i bg_hi = _mm_unpackhi_epi8(bg, _mm_setzero_si128());
__m128i bg_lo = _mm_unpacklo_epi8(bg, _mm_setzero_si128());
// (fg_red * fg_alpha + bg_red * bg_alpha) / 256:
__m128i color_hi = _mm_srli_epi16(_mm_adds_epu16(_mm_mullo_epi16(fg_hi, mfg_alpha), _mm_mullo_epi16(bg_hi, mbg_alpha)), 8);
__m128i color_lo = _mm_srli_epi16(_mm_adds_epu16(_mm_mullo_epi16(fg_lo, mfg_alpha), _mm_mullo_epi16(bg_lo, mbg_alpha)), 8);
__m128i color = _mm_packus_epi16(color_lo, color_hi);
_mm_storeu_si128((__m128i*)dest, color);
source += sincr;
dest += pitch;
} while (--count);
}
else
{
VEC_SHADE_VARS();
VEC_SHADE_INIT(light, shade_constants);
__m128i mfg_alpha = _mm_set_epi16(256, fg_alpha, fg_alpha, fg_alpha, 256, fg_alpha, fg_alpha, fg_alpha);
__m128i mbg_alpha = _mm_set_epi16(256, bg_alpha, bg_alpha, bg_alpha, 256, bg_alpha, bg_alpha, bg_alpha);
do {
uint32_t p0 = colormap[source[0]];
uint32_t p1 = colormap[source[1]];
uint32_t p2 = colormap[source[2]];
uint32_t p3 = colormap[source[3]];
// shade_pal_index:
__m128i fg = _mm_set_epi32(palette[p3], palette[p2], palette[p1], palette[p0]);
VEC_SHADE(fg, shade_constants);
__m128i fg_hi = _mm_unpackhi_epi8(fg, _mm_setzero_si128());
__m128i fg_lo = _mm_unpacklo_epi8(fg, _mm_setzero_si128());
// unpack bg:
__m128i bg = _mm_loadu_si128((const __m128i*)dest);
__m128i bg_hi = _mm_unpackhi_epi8(bg, _mm_setzero_si128());
__m128i bg_lo = _mm_unpacklo_epi8(bg, _mm_setzero_si128());
// (fg_red * fg_alpha + bg_red * bg_alpha) / 256:
__m128i color_hi = _mm_srli_epi16(_mm_adds_epu16(_mm_mullo_epi16(fg_hi, mfg_alpha), _mm_mullo_epi16(bg_hi, mbg_alpha)), 8);
__m128i color_lo = _mm_srli_epi16(_mm_adds_epu16(_mm_mullo_epi16(fg_lo, mfg_alpha), _mm_mullo_epi16(bg_lo, mbg_alpha)), 8);
__m128i color = _mm_packus_epi16(color_lo, color_hi);
_mm_storeu_si128((__m128i*)dest, color);
source += sincr;
dest += pitch;
} while (--count);
}
}
};
class VecCommand(RtShaded4colsRGBA) : public DrawerCommand
{
int sx;
int yl;
int yh;
lighttable_t * RESTRICT _colormap;
int _color;
BYTE * RESTRICT _destorg;
int _pitch;
fixed_t _light;
public:
VecCommand(RtShaded4colsRGBA)(int sx, int yl, int yh)
{
this->sx = sx;
this->yl = yl;
this->yh = yh;
_colormap = dc_colormap;
_color = dc_color;
_destorg = dc_destorg;
_pitch = dc_pitch;
_light = dc_light;
}
void Execute(DrawerThread *thread) override
{
BYTE *colormap;
uint32_t *source;
uint32_t *dest;
int count;
int pitch;
int sincr;
count = thread->count_for_thread(yl, yh - yl + 1);
if (count <= 0)
return;
colormap = _colormap;
dest = thread->dest_for_thread(yl, _pitch, ylookup[yl] + sx + (uint32_t*)_destorg);
source = &thread->dc_temp_rgba[yl * 4] + thread->skipped_by_thread(yl) * 4;
pitch = _pitch * thread->num_cores;
sincr = 4 * thread->num_cores;
__m128i fg = _mm_unpackhi_epi8(_mm_set1_epi32(LightBgra::shade_pal_index_simple(_color, LightBgra::calc_light_multiplier(_light))), _mm_setzero_si128());
__m128i alpha_one = _mm_set1_epi16(64);
do {
uint32_t p0 = colormap[source[0]];
uint32_t p1 = colormap[source[1]];
uint32_t p2 = colormap[source[2]];
uint32_t p3 = colormap[source[3]];
__m128i alpha_hi = _mm_set_epi16(64, p3, p3, p3, 64, p2, p2, p2);
__m128i alpha_lo = _mm_set_epi16(64, p1, p1, p1, 64, p0, p0, p0);
__m128i inv_alpha_hi = _mm_subs_epu16(alpha_one, alpha_hi);
__m128i inv_alpha_lo = _mm_subs_epu16(alpha_one, alpha_lo);
// unpack bg:
__m128i bg = _mm_loadu_si128((const __m128i*)dest);
__m128i bg_hi = _mm_unpackhi_epi8(bg, _mm_setzero_si128());
__m128i bg_lo = _mm_unpacklo_epi8(bg, _mm_setzero_si128());
// (fg_red * alpha + bg_red * inv_alpha) / 64:
__m128i color_hi = _mm_srli_epi16(_mm_adds_epu16(_mm_mullo_epi16(fg, alpha_hi), _mm_mullo_epi16(bg_hi, inv_alpha_hi)), 6);
__m128i color_lo = _mm_srli_epi16(_mm_adds_epu16(_mm_mullo_epi16(fg, alpha_lo), _mm_mullo_epi16(bg_lo, inv_alpha_lo)), 6);
__m128i color = _mm_packus_epi16(color_lo, color_hi);
_mm_storeu_si128((__m128i*)dest, color);
source += sincr;
dest += pitch;
} while (--count);
}
};
class VecCommand(RtAddClamp4colsRGBA) : public DrawerCommand
{
int sx;
int yl;
int yh;
BYTE * RESTRICT _destorg;
int _pitch;
fixed_t _light;
fixed_t _srcalpha;
fixed_t _destalpha;
ShadeConstants _shade_constants;
public:
VecCommand(RtAddClamp4colsRGBA)(int sx, int yl, int yh)
{
this->sx = sx;
this->yl = yl;
this->yh = yh;
_destorg = dc_destorg;
_pitch = dc_pitch;
_light = dc_light;
_srcalpha = dc_srcalpha;
_destalpha = dc_destalpha;
_shade_constants = dc_shade_constants;
}
void Execute(DrawerThread *thread) override
{
uint32_t *source;
uint32_t *dest;
int count;
int pitch;
int sincr;
count = thread->count_for_thread(yl, yh - yl + 1);
if (count <= 0)
return;
dest = thread->dest_for_thread(yl, _pitch, ylookup[yl] + sx + (uint32_t*)_destorg);
source = &thread->dc_temp_rgba[yl * 4] + thread->skipped_by_thread(yl) * 4;
pitch = _pitch * thread->num_cores;
sincr = 4 * thread->num_cores;
uint32_t light = LightBgra::calc_light_multiplier(_light);
uint32_t *palette = (uint32_t*)GPalette.BaseColors;
uint32_t fg_alpha = _srcalpha >> (FRACBITS - 8);
uint32_t bg_alpha = _destalpha >> (FRACBITS - 8);
ShadeConstants shade_constants = _shade_constants;
if (shade_constants.simple_shade)
{
VEC_SHADE_VARS();
VEC_SHADE_SIMPLE_INIT(light);
__m128i mfg_alpha = _mm_set_epi16(256, fg_alpha, fg_alpha, fg_alpha, 256, fg_alpha, fg_alpha, fg_alpha);
__m128i mbg_alpha = _mm_set_epi16(256, bg_alpha, bg_alpha, bg_alpha, 256, bg_alpha, bg_alpha, bg_alpha);
do {
uint32_t p0 = source[0];
uint32_t p1 = source[1];
uint32_t p2 = source[2];
uint32_t p3 = source[3];
// shade_pal_index:
__m128i fg = _mm_set_epi32(palette[p3], palette[p2], palette[p1], palette[p0]);
VEC_SHADE_SIMPLE(fg);
__m128i fg_hi = _mm_unpackhi_epi8(fg, _mm_setzero_si128());
__m128i fg_lo = _mm_unpacklo_epi8(fg, _mm_setzero_si128());
// unpack bg:
__m128i bg = _mm_loadu_si128((const __m128i*)dest);
__m128i bg_hi = _mm_unpackhi_epi8(bg, _mm_setzero_si128());
__m128i bg_lo = _mm_unpacklo_epi8(bg, _mm_setzero_si128());
// (fg_red * fg_alpha + bg_red * bg_alpha) / 256:
__m128i color_hi = _mm_srli_epi16(_mm_adds_epu16(_mm_mullo_epi16(fg_hi, mfg_alpha), _mm_mullo_epi16(bg_hi, mbg_alpha)), 8);
__m128i color_lo = _mm_srli_epi16(_mm_adds_epu16(_mm_mullo_epi16(fg_lo, mfg_alpha), _mm_mullo_epi16(bg_lo, mbg_alpha)), 8);
__m128i color = _mm_packus_epi16(color_lo, color_hi);
_mm_storeu_si128((__m128i*)dest, color);
source += sincr;
dest += pitch;
} while (--count);
}
else
{
VEC_SHADE_VARS();
VEC_SHADE_INIT(light, shade_constants);
__m128i mfg_alpha = _mm_set_epi16(256, fg_alpha, fg_alpha, fg_alpha, 256, fg_alpha, fg_alpha, fg_alpha);
__m128i mbg_alpha = _mm_set_epi16(256, bg_alpha, bg_alpha, bg_alpha, 256, bg_alpha, bg_alpha, bg_alpha);
do {
uint32_t p0 = source[0];
uint32_t p1 = source[1];
uint32_t p2 = source[2];
uint32_t p3 = source[3];
// shade_pal_index:
__m128i fg = _mm_set_epi32(palette[p3], palette[p2], palette[p1], palette[p0]);
VEC_SHADE(fg, shade_constants);
__m128i fg_hi = _mm_unpackhi_epi8(fg, _mm_setzero_si128());
__m128i fg_lo = _mm_unpacklo_epi8(fg, _mm_setzero_si128());
// unpack bg:
__m128i bg = _mm_loadu_si128((const __m128i*)dest);
__m128i bg_hi = _mm_unpackhi_epi8(bg, _mm_setzero_si128());
__m128i bg_lo = _mm_unpacklo_epi8(bg, _mm_setzero_si128());
// (fg_red * fg_alpha + bg_red * bg_alpha) / 256:
__m128i color_hi = _mm_srli_epi16(_mm_adds_epu16(_mm_mullo_epi16(fg_hi, mfg_alpha), _mm_mullo_epi16(bg_hi, mbg_alpha)), 8);
__m128i color_lo = _mm_srli_epi16(_mm_adds_epu16(_mm_mullo_epi16(fg_lo, mfg_alpha), _mm_mullo_epi16(bg_lo, mbg_alpha)), 8);
__m128i color = _mm_packus_epi16(color_lo, color_hi);
_mm_storeu_si128((__m128i*)dest, color);
source += sincr;
dest += pitch;
} while (--count);
}
}
};
class VecCommand(RtSubClamp4colsRGBA) : public DrawerCommand
{
int sx;
int yl;
int yh;
BYTE * RESTRICT _destorg;
int _pitch;
fixed_t _light;
fixed_t _srcalpha;
fixed_t _destalpha;
ShadeConstants _shade_constants;
public:
VecCommand(RtSubClamp4colsRGBA)(int sx, int yl, int yh)
{
this->sx = sx;
this->yl = yl;
this->yh = yh;
_destorg = dc_destorg;
_pitch = dc_pitch;
_light = dc_light;
_srcalpha = dc_srcalpha;
_destalpha = dc_destalpha;
_shade_constants = dc_shade_constants;
}
void Execute(DrawerThread *thread) override
{
uint32_t *source;
uint32_t *dest;
int count;
int pitch;
int sincr;
count = thread->count_for_thread(yl, yh - yl + 1);
if (count <= 0)
return;
dest = thread->dest_for_thread(yl, _pitch, ylookup[yl] + sx + (uint32_t*)_destorg);
source = &thread->dc_temp_rgba[yl * 4] + thread->skipped_by_thread(yl) * 4;
pitch = _pitch * thread->num_cores;
sincr = 4 * thread->num_cores;
uint32_t light = LightBgra::calc_light_multiplier(_light);
uint32_t *palette = (uint32_t*)GPalette.BaseColors;
ShadeConstants shade_constants = _shade_constants;
uint32_t fg_alpha = _srcalpha >> (FRACBITS - 8);
uint32_t bg_alpha = _destalpha >> (FRACBITS - 8);
if (shade_constants.simple_shade)
{
VEC_SHADE_VARS();
VEC_SHADE_SIMPLE_INIT(light);
__m128i mfg_alpha = _mm_set_epi16(256, fg_alpha, fg_alpha, fg_alpha, 256, fg_alpha, fg_alpha, fg_alpha);
__m128i mbg_alpha = _mm_set_epi16(256, bg_alpha, bg_alpha, bg_alpha, 256, bg_alpha, bg_alpha, bg_alpha);
do {
uint32_t p0 = source[0];
uint32_t p1 = source[1];
uint32_t p2 = source[2];
uint32_t p3 = source[3];
// shade_pal_index:
__m128i fg = _mm_set_epi32(palette[p3], palette[p2], palette[p1], palette[p0]);
VEC_SHADE_SIMPLE(fg);
__m128i fg_hi = _mm_unpackhi_epi8(fg, _mm_setzero_si128());
__m128i fg_lo = _mm_unpacklo_epi8(fg, _mm_setzero_si128());
// unpack bg:
__m128i bg = _mm_loadu_si128((const __m128i*)dest);
__m128i bg_hi = _mm_unpackhi_epi8(bg, _mm_setzero_si128());
__m128i bg_lo = _mm_unpacklo_epi8(bg, _mm_setzero_si128());
// (bg_red * bg_alpha - fg_red * fg_alpha) / 256:
__m128i color_hi = _mm_srli_epi16(_mm_subs_epu16(_mm_mullo_epi16(bg_hi, mbg_alpha), _mm_mullo_epi16(fg_hi, mfg_alpha)), 8);
__m128i color_lo = _mm_srli_epi16(_mm_subs_epu16(_mm_mullo_epi16(bg_lo, mbg_alpha), _mm_mullo_epi16(fg_lo, mfg_alpha)), 8);
__m128i color = _mm_packus_epi16(color_lo, color_hi);
_mm_storeu_si128((__m128i*)dest, color);
source += sincr;
dest += pitch;
} while (--count);
}
else
{
VEC_SHADE_VARS();
VEC_SHADE_INIT(light, shade_constants);
__m128i mfg_alpha = _mm_set_epi16(256, fg_alpha, fg_alpha, fg_alpha, 256, fg_alpha, fg_alpha, fg_alpha);
__m128i mbg_alpha = _mm_set_epi16(256, bg_alpha, bg_alpha, bg_alpha, 256, bg_alpha, bg_alpha, bg_alpha);
do {
uint32_t p0 = source[0];
uint32_t p1 = source[1];
uint32_t p2 = source[2];
uint32_t p3 = source[3];
// shade_pal_index:
__m128i fg = _mm_set_epi32(palette[p3], palette[p2], palette[p1], palette[p0]);
VEC_SHADE(fg, shade_constants);
__m128i fg_hi = _mm_unpackhi_epi8(fg, _mm_setzero_si128());
__m128i fg_lo = _mm_unpacklo_epi8(fg, _mm_setzero_si128());
// unpack bg:
__m128i bg = _mm_loadu_si128((const __m128i*)dest);
__m128i bg_hi = _mm_unpackhi_epi8(bg, _mm_setzero_si128());
__m128i bg_lo = _mm_unpacklo_epi8(bg, _mm_setzero_si128());
// (bg_red * bg_alpha - fg_red * fg_alpha) / 256:
__m128i color_hi = _mm_srli_epi16(_mm_subs_epu16(_mm_mullo_epi16(bg_hi, mbg_alpha), _mm_mullo_epi16(fg_hi, mfg_alpha)), 8);
__m128i color_lo = _mm_srli_epi16(_mm_subs_epu16(_mm_mullo_epi16(bg_lo, mbg_alpha), _mm_mullo_epi16(fg_lo, mfg_alpha)), 8);
__m128i color = _mm_packus_epi16(color_lo, color_hi);
_mm_storeu_si128((__m128i*)dest, color);
source += sincr;
dest += pitch;
} while (--count);
}
}
};
class VecCommand(RtRevSubClamp4colsRGBA) : public DrawerCommand
{
int sx;
int yl;
int yh;
BYTE * RESTRICT _destorg;
int _pitch;
fixed_t _light;
fixed_t _srcalpha;
fixed_t _destalpha;
ShadeConstants _shade_constants;
public:
VecCommand(RtRevSubClamp4colsRGBA)(int sx, int yl, int yh)
{
this->sx = sx;
this->yl = yl;
this->yh = yh;
_destorg = dc_destorg;
_pitch = dc_pitch;
_light = dc_light;
_srcalpha = dc_srcalpha;
_destalpha = dc_destalpha;
_shade_constants = dc_shade_constants;
}
void Execute(DrawerThread *thread) override
{
uint32_t *source;
uint32_t *dest;
int count;
int pitch;
int sincr;
count = thread->count_for_thread(yl, yh - yl + 1);
if (count <= 0)
return;
dest = thread->dest_for_thread(yl, _pitch, ylookup[yl] + sx + (uint32_t*)_destorg);
source = &thread->dc_temp_rgba[yl * 4] + thread->skipped_by_thread(yl) * 4;
pitch = _pitch * thread->num_cores;
sincr = 4 * thread->num_cores;
uint32_t light = LightBgra::calc_light_multiplier(_light);
uint32_t *palette = (uint32_t*)GPalette.BaseColors;
ShadeConstants shade_constants = _shade_constants;
uint32_t fg_alpha = _srcalpha >> (FRACBITS - 8);
uint32_t bg_alpha = _destalpha >> (FRACBITS - 8);
if (shade_constants.simple_shade)
{
VEC_SHADE_VARS();
VEC_SHADE_SIMPLE_INIT(light);
__m128i mfg_alpha = _mm_set_epi16(256, fg_alpha, fg_alpha, fg_alpha, 256, fg_alpha, fg_alpha, fg_alpha);
__m128i mbg_alpha = _mm_set_epi16(256, bg_alpha, bg_alpha, bg_alpha, 256, bg_alpha, bg_alpha, bg_alpha);
do {
uint32_t p0 = source[0];
uint32_t p1 = source[1];
uint32_t p2 = source[2];
uint32_t p3 = source[3];
// shade_pal_index:
__m128i fg = _mm_set_epi32(palette[p3], palette[p2], palette[p1], palette[p0]);
VEC_SHADE_SIMPLE(fg);
__m128i fg_hi = _mm_unpackhi_epi8(fg, _mm_setzero_si128());
__m128i fg_lo = _mm_unpacklo_epi8(fg, _mm_setzero_si128());
// unpack bg:
__m128i bg = _mm_loadu_si128((const __m128i*)dest);
__m128i bg_hi = _mm_unpackhi_epi8(bg, _mm_setzero_si128());
__m128i bg_lo = _mm_unpacklo_epi8(bg, _mm_setzero_si128());
// (fg_red * fg_alpha - bg_red * bg_alpha) / 256:
__m128i color_hi = _mm_srli_epi16(_mm_subs_epu16(_mm_mullo_epi16(fg_hi, mfg_alpha), _mm_mullo_epi16(bg_hi, mbg_alpha)), 8);
__m128i color_lo = _mm_srli_epi16(_mm_subs_epu16(_mm_mullo_epi16(fg_lo, mfg_alpha), _mm_mullo_epi16(bg_lo, mbg_alpha)), 8);
__m128i color = _mm_packus_epi16(color_lo, color_hi);
_mm_storeu_si128((__m128i*)dest, color);
source += sincr;
dest += pitch;
} while (--count);
}
else
{
VEC_SHADE_VARS();
VEC_SHADE_INIT(light, shade_constants);
__m128i mfg_alpha = _mm_set_epi16(256, fg_alpha, fg_alpha, fg_alpha, 256, fg_alpha, fg_alpha, fg_alpha);
__m128i mbg_alpha = _mm_set_epi16(256, bg_alpha, bg_alpha, bg_alpha, 256, bg_alpha, bg_alpha, bg_alpha);
do {
uint32_t p0 = source[0];
uint32_t p1 = source[1];
uint32_t p2 = source[2];
uint32_t p3 = source[3];
// shade_pal_index:
__m128i fg = _mm_set_epi32(palette[p3], palette[p2], palette[p1], palette[p0]);
VEC_SHADE(fg, shade_constants);
__m128i fg_hi = _mm_unpackhi_epi8(fg, _mm_setzero_si128());
__m128i fg_lo = _mm_unpacklo_epi8(fg, _mm_setzero_si128());
// unpack bg:
__m128i bg = _mm_loadu_si128((const __m128i*)dest);
__m128i bg_hi = _mm_unpackhi_epi8(bg, _mm_setzero_si128());
__m128i bg_lo = _mm_unpacklo_epi8(bg, _mm_setzero_si128());
// (fg_red * fg_alpha - bg_red * bg_alpha) / 256:
__m128i color_hi = _mm_srli_epi16(_mm_subs_epu16(_mm_mullo_epi16(fg_hi, mfg_alpha), _mm_mullo_epi16(bg_hi, mbg_alpha)), 8);
__m128i color_lo = _mm_srli_epi16(_mm_subs_epu16(_mm_mullo_epi16(fg_lo, mfg_alpha), _mm_mullo_epi16(bg_lo, mbg_alpha)), 8);
__m128i color = _mm_packus_epi16(color_lo, color_hi);
_mm_storeu_si128((__m128i*)dest, color);
source += sincr;
dest += pitch;
} while (--count);
}
}
};

View File

@ -40,6 +40,7 @@
#include "r_segs.h"
#include "r_3dfloors.h"
#include "r_sky.h"
#include "r_draw_rgba.h"
#include "st_stuff.h"
#include "c_cvars.h"
#include "c_dispatch.h"
@ -103,6 +104,8 @@ bool r_dontmaplines;
CVAR (String, r_viewsize, "", CVAR_NOSET)
CVAR (Bool, r_shadercolormaps, true, CVAR_ARCHIVE)
bool r_swtruecolor;
double r_BaseVisibility;
double r_WallVisibility;
double r_FloorVisibility;
@ -116,7 +119,7 @@ double FocalLengthX;
double FocalLengthY;
FDynamicColormap*basecolormap; // [RH] colormap currently drawing with
int fixedlightlev;
lighttable_t *fixedcolormap;
FSWColormap *fixedcolormap;
FSpecialColormap *realfixedcolormap;
double WallTMapScale2;
@ -396,16 +399,6 @@ void R_InitRenderer()
R_InitPlanes ();
R_InitShadeMaps();
R_InitColumnDrawers ();
colfunc = basecolfunc = R_DrawColumn;
fuzzcolfunc = R_DrawFuzzColumn;
transcolfunc = R_DrawTranslatedColumn;
spanfunc = R_DrawSpan;
// [RH] Horizontal column drawers
hcolfunc_pre = R_DrawColumnHoriz;
hcolfunc_post1 = rt_map1col;
hcolfunc_post4 = rt_map4cols;
}
//==========================================================================
@ -466,16 +459,16 @@ void R_SetupColormap(player_t *player)
if (player->fixedcolormap >= 0 && player->fixedcolormap < (int)SpecialColormaps.Size())
{
realfixedcolormap = &SpecialColormaps[player->fixedcolormap];
if (RenderTarget == screen && (DFrameBuffer *)screen->Accel2D && r_shadercolormaps)
if (RenderTarget == screen && (r_swtruecolor || ((DFrameBuffer *)screen->Accel2D && r_shadercolormaps)))
{
// Render everything fullbright. The copy to video memory will
// apply the special colormap, so it won't be restricted to the
// palette.
fixedcolormap = realcolormaps;
fixedcolormap = &realcolormaps;
}
else
{
fixedcolormap = SpecialColormaps[player->fixedcolormap].Colormap;
fixedcolormap = &SpecialColormaps[player->fixedcolormap];
}
}
else if (player->fixedlightlevel >= 0 && player->fixedlightlevel < NUMCOLORMAPS)
@ -486,7 +479,7 @@ void R_SetupColormap(player_t *player)
// [RH] Inverse light for shooting the Sigil
if (fixedcolormap == NULL && extralight == INT_MIN)
{
fixedcolormap = SpecialColormaps[INVERSECOLORMAP].Colormap;
fixedcolormap = &SpecialColormaps[INVERSECOLORMAP];
extralight = 0;
}
}
@ -575,6 +568,9 @@ void R_HighlightPortal (PortalDrawseg* pds)
// [ZZ] NO OVERFLOW CHECKS HERE
// I believe it won't break. if it does, blame me. :(
if (r_swtruecolor) // Assuming this is just a debug function
return;
BYTE color = (BYTE)BestColor((DWORD *)GPalette.BaseColors, 255, 0, 0, 0, 255);
BYTE* pixels = RenderTarget->GetBuffer();
@ -622,12 +618,26 @@ void R_EnterPortal (PortalDrawseg* pds, int depth)
int Ytop = pds->ceilingclip[x-pds->x1];
int Ybottom = pds->floorclip[x-pds->x1];
BYTE *dest = RenderTarget->GetBuffer() + x + Ytop * spacing;
for (int y = Ytop; y <= Ybottom; y++)
if (r_swtruecolor)
{
*dest = color;
dest += spacing;
uint32_t *dest = (uint32_t*)RenderTarget->GetBuffer() + x + Ytop * spacing;
uint32_t c = GPalette.BaseColors[color].d;
for (int y = Ytop; y <= Ybottom; y++)
{
*dest = c;
dest += spacing;
}
}
else
{
BYTE *dest = RenderTarget->GetBuffer() + x + Ytop * spacing;
for (int y = Ytop; y <= Ybottom; y++)
{
*dest = color;
dest += spacing;
}
}
}
@ -796,7 +806,8 @@ void R_SetupBuffer ()
static BYTE *lastbuff = NULL;
int pitch = RenderTarget->GetPitch();
BYTE *lineptr = RenderTarget->GetBuffer() + viewwindowy*pitch + viewwindowx;
int pixelsize = r_swtruecolor ? 4 : 1;
BYTE *lineptr = RenderTarget->GetBuffer() + (viewwindowy*pitch + viewwindowx) * pixelsize;
if (dc_pitch != pitch || lineptr != lastbuff)
{
@ -846,10 +857,10 @@ void R_RenderActorView (AActor *actor, bool dontmaplines)
// [RH] Show off segs if r_drawflat is 1
if (r_drawflat)
{
hcolfunc_pre = R_FillColumnHorizP;
hcolfunc_pre = R_FillColumnHoriz;
hcolfunc_post1 = rt_copy1col;
hcolfunc_post4 = rt_copy4cols;
colfunc = R_FillColumnP;
colfunc = R_FillColumn;
spanfunc = R_FillSpan;
}
else
@ -924,7 +935,7 @@ void R_RenderActorView (AActor *actor, bool dontmaplines)
// If we don't want shadered colormaps, NULL it now so that the
// copy to the screen does not use a special colormap shader.
if (!r_shadercolormaps)
if (!r_shadercolormaps && !r_swtruecolor)
{
realfixedcolormap = NULL;
}
@ -942,6 +953,15 @@ void R_RenderViewToCanvas (AActor *actor, DCanvas *canvas,
int x, int y, int width, int height, bool dontmaplines)
{
const bool savedviewactive = viewactive;
const bool savedoutputformat = r_swtruecolor;
if (r_swtruecolor != canvas->IsBgra())
{
r_swtruecolor = canvas->IsBgra();
R_InitColumnDrawers();
}
R_BeginDrawerCommands();
viewwidth = width;
RenderTarget = canvas;
@ -954,13 +974,22 @@ void R_RenderViewToCanvas (AActor *actor, DCanvas *canvas,
R_RenderActorView (actor, dontmaplines);
R_EndDrawerCommands();
RenderTarget = screen;
bRenderingToCanvas = false;
R_ExecuteSetViewSize ();
screen->Lock (true);
R_SetupBuffer ();
screen->Unlock ();
viewactive = savedviewactive;
r_swtruecolor = savedoutputformat;
if (r_swtruecolor != canvas->IsBgra())
{
R_InitColumnDrawers();
}
}
//==========================================================================

View File

@ -82,6 +82,16 @@ extern bool r_dontmaplines;
// Change R_CalcTiltedLighting() when this changes.
#define GETPALOOKUP(vis,shade) (clamp<int> (((shade)-FLOAT2FIXED(MIN(MAXLIGHTVIS,double(vis))))>>FRACBITS, 0, NUMCOLORMAPS-1))
// Calculate the light multiplier for dc_light/ds_light
// This is used instead of GETPALOOKUP when ds_colormap/dc_colormap is set to the base colormap
// Returns a value between 0 and 1 in fixed point
#define LIGHTSCALE(vis,shade) FLOAT2FIXED(clamp((FIXED2DBL(shade) - (MIN(MAXLIGHTVIS,double(vis)))) / NUMCOLORMAPS, 0.0, (NUMCOLORMAPS-1)/(double)NUMCOLORMAPS))
// Converts fixedlightlev into a shade value
#define FIXEDLIGHT2SHADE(lightlev) (((lightlev) >> COLORMAPSHIFT) << FRACBITS)
extern bool r_swtruecolor;
extern double GlobVis;
void R_SetVisibility(double visibility);
@ -96,7 +106,7 @@ extern double r_SpriteVisibility;
extern int r_actualextralight;
extern bool foggy;
extern int fixedlightlev;
extern lighttable_t* fixedcolormap;
extern FSWColormap* fixedcolormap;
extern FSpecialColormap*realfixedcolormap;

View File

@ -58,6 +58,7 @@
#include "r_3dfloors.h"
#include "v_palette.h"
#include "r_data/colormaps.h"
#include "r_draw_rgba.h"
#ifdef _MSC_VER
#pragma warning(disable:4244)
@ -227,12 +228,11 @@ void R_MapPlane (int y, int x1)
if (plane_shade)
{
// Determine lighting based on the span's distance from the viewer.
ds_colormap = basecolormap->Maps + (GETPALOOKUP (
GlobVis * fabs(CenterY - y), planeshade) << COLORMAPSHIFT);
R_SetDSColorMapLight(basecolormap, GlobVis * fabs(CenterY - y), planeshade);
}
#ifdef X86_ASM
if (ds_colormap != ds_curcolormap)
if (!r_swtruecolor && ds_colormap != ds_curcolormap)
R_SetSpanColormap_ASM (ds_colormap);
#endif
@ -355,7 +355,7 @@ void R_CalcTiltedLighting (double lval, double lend, int width)
//
//==========================================================================
void R_MapTiltedPlane (int y, int x1)
void R_MapTiltedPlane_C (int y, int x1)
{
int x2 = spanend[y];
int width = x2 - x1;
@ -392,7 +392,7 @@ void R_MapTiltedPlane (int y, int x1)
u = SQWORD(uz*z) + pviewx;
v = SQWORD(vz*z) + pviewy;
ds_colormap = tiltlighting[i];
R_SetDSColorMapLight(tiltlighting[i], 0, 0);
fb[i++] = ds_colormap[ds_source[(v >> vshift) | ((u >> ushift) & umask)]];
iz += plane_sz[0];
uz += plane_su[0];
@ -478,17 +478,27 @@ void R_MapTiltedPlane (int y, int x1)
#endif
}
void R_MapTiltedPlane_rgba (int y, int x1)
{
R_DrawTiltedSpan_rgba(y, x1, spanend[y], plane_sz, plane_su, plane_sv, plane_shade, planeshade, planelightfloat, pviewx, pviewy);
}
//==========================================================================
//
// R_MapColoredPlane
//
//==========================================================================
void R_MapColoredPlane (int y, int x1)
void R_MapColoredPlane_C (int y, int x1)
{
memset (ylookup[y] + x1 + dc_destorg, ds_color, spanend[y] - x1 + 1);
}
void R_MapColoredPlane_rgba(int y, int x1)
{
R_DrawColoredSpan_rgba(y, x1, spanend[y]);
}
//==========================================================================
//
// R_ClearPlanes
@ -841,15 +851,24 @@ extern FTexture *rw_pic;
// Allow for layer skies up to 512 pixels tall. This is overkill,
// since the most anyone can ever see of the sky is 500 pixels.
// We need 4 skybufs because wallscan can draw up to 4 columns at a time.
// Need two versions - one for true color and one for palette
static BYTE skybuf[4][512];
static uint32_t skybuf_bgra[4][512];
static DWORD lastskycol[4];
static DWORD lastskycol_bgra[4];
static int skycolplace;
static int skycolplace_bgra;
// Get a column of sky when there is only one sky texture.
static const BYTE *R_GetOneSkyColumn (FTexture *fronttex, int x)
{
angle_t column = (skyangle + xtoviewangle[x]) ^ skyflip;
return fronttex->GetColumn((UMulScale16(column, frontcyl) + frontpos) >> FRACBITS, NULL);
int tx = (UMulScale16(column, frontcyl) + frontpos) >> FRACBITS;
if (!r_swtruecolor)
return fronttex->GetColumn(tx, NULL);
else
return (const BYTE *)fronttex->GetColumnBgra(tx, NULL);
}
// Get a column of sky when there are two overlapping sky textures
@ -864,38 +883,77 @@ static const BYTE *R_GetTwoSkyColumns (FTexture *fronttex, int x)
DWORD skycol = (angle1 << 16) | angle2;
int i;
for (i = 0; i < 4; ++i)
if (!r_swtruecolor)
{
if (lastskycol[i] == skycol)
for (i = 0; i < 4; ++i)
{
return skybuf[i];
if (lastskycol[i] == skycol)
{
return skybuf[i];
}
}
lastskycol[skycolplace] = skycol;
BYTE *composite = skybuf[skycolplace];
skycolplace = (skycolplace + 1) & 3;
// The ordering of the following code has been tuned to allow VC++ to optimize
// it well. In particular, this arrangement lets it keep count in a register
// instead of on the stack.
const BYTE *front = fronttex->GetColumn(angle1, NULL);
const BYTE *back = backskytex->GetColumn(angle2, NULL);
int count = MIN<int>(512, MIN(backskytex->GetHeight(), fronttex->GetHeight()));
i = 0;
do
{
if (front[i])
{
composite[i] = front[i];
}
else
{
composite[i] = back[i];
}
} while (++i, --count);
return composite;
}
lastskycol[skycolplace] = skycol;
BYTE *composite = skybuf[skycolplace];
skycolplace = (skycolplace + 1) & 3;
// The ordering of the following code has been tuned to allow VC++ to optimize
// it well. In particular, this arrangement lets it keep count in a register
// instead of on the stack.
const BYTE *front = fronttex->GetColumn (angle1, NULL);
const BYTE *back = backskytex->GetColumn (angle2, NULL);
int count = MIN<int> (512, MIN (backskytex->GetHeight(), fronttex->GetHeight()));
i = 0;
do
else
{
if (front[i])
return R_GetOneSkyColumn(fronttex, x);
for (i = 0; i < 4; ++i)
{
composite[i] = front[i];
if (lastskycol_bgra[i] == skycol)
{
return (BYTE*)(skybuf_bgra[i]);
}
}
else
lastskycol_bgra[skycolplace_bgra] = skycol;
uint32_t *composite = skybuf_bgra[skycolplace_bgra];
skycolplace_bgra = (skycolplace_bgra + 1) & 3;
// The ordering of the following code has been tuned to allow VC++ to optimize
// it well. In particular, this arrangement lets it keep count in a register
// instead of on the stack.
const uint32_t *front = (const uint32_t *)fronttex->GetColumnBgra(angle1, NULL);
const uint32_t *back = (const uint32_t *)backskytex->GetColumnBgra(angle2, NULL);
int count = MIN<int>(512, MIN(backskytex->GetHeight(), fronttex->GetHeight()));
i = 0;
do
{
composite[i] = back[i];
}
} while (++i, --count);
return composite;
if (front[i])
{
composite[i] = front[i];
}
else
{
composite[i] = back[i];
}
} while (++i, --count);
return (BYTE*)composite;
}
}
static void R_DrawSky (visplane_t *pl)
@ -930,6 +988,7 @@ static void R_DrawSky (visplane_t *pl)
for (x = 0; x < 4; ++x)
{
lastskycol[x] = 0xffffffff;
lastskycol_bgra[x] = 0xffffffff;
}
rw_pic = frontskytex;
@ -943,6 +1002,7 @@ static void R_DrawSky (visplane_t *pl)
for (x = 0; x < 4; ++x)
{
lastskycol[x] = 0xffffffff;
lastskycol_bgra[x] = 0xffffffff;
}
wallscan (pl->left, pl->right, (short *)pl->top, (short *)pl->bottom, swall, lwall,
frontyScale, backskytex == NULL ? R_GetOneSkyColumn : R_GetTwoSkyColumns);
@ -951,7 +1011,7 @@ static void R_DrawSky (visplane_t *pl)
{ // The texture does not tile nicely
frontyScale *= skyscale;
frontiScale = 1 / frontyScale;
R_DrawSkyStriped (pl);
//R_DrawSkyStriped (pl);
}
}
@ -980,6 +1040,7 @@ static void R_DrawSkyStriped (visplane_t *pl)
for (x = 0; x < 4; ++x)
{
lastskycol[x] = 0xffffffff;
lastskycol_bgra[x] = 0xffffffff;
}
wallscan (pl->left, pl->right, top, bot, swall, lwall, rw_pic->Scale.Y,
backskytex == NULL ? R_GetOneSkyColumn : R_GetTwoSkyColumns);
@ -1098,7 +1159,7 @@ void R_DrawSinglePlane (visplane_t *pl, fixed_t alpha, bool additive, bool maske
R_SetupSpanBits(tex);
double xscale = pl->xform.xScale * tex->Scale.X;
double yscale = pl->xform.yScale * tex->Scale.Y;
ds_source = tex->GetPixels ();
R_SetSpanSource(tex);
basecolormap = pl->colormap;
planeshade = LIGHT2SHADE(pl->lightlevel);
@ -1461,12 +1522,13 @@ void R_DrawSkyPlane (visplane_t *pl)
bool fakefixed = false;
if (fixedcolormap)
{
dc_colormap = fixedcolormap;
R_SetColorMapLight(fixedcolormap, 0, 0);
}
else
{
fakefixed = true;
fixedcolormap = dc_colormap = NormalLight.Maps;
fixedcolormap = &NormalLight;
R_SetColorMapLight(fixedcolormap, 0, 0);
}
R_DrawSky (pl);
@ -1484,7 +1546,7 @@ void R_DrawSkyPlane (visplane_t *pl)
void R_DrawNormalPlane (visplane_t *pl, double _xscale, double _yscale, fixed_t alpha, bool additive, bool masked)
{
#ifdef X86_ASM
if (ds_source != ds_cursource)
if (!r_swtruecolor && ds_source != ds_cursource)
{
R_SetSpanSource_ASM (ds_source);
}
@ -1547,12 +1609,21 @@ void R_DrawNormalPlane (visplane_t *pl, double _xscale, double _yscale, fixed_t
planeheight = fabs(pl->height.Zat0() - ViewPos.Z);
GlobVis = r_FloorVisibility / planeheight;
ds_light = 0;
if (fixedlightlev >= 0)
ds_colormap = basecolormap->Maps + fixedlightlev, plane_shade = false;
{
R_SetDSColorMapLight(basecolormap, 0, FIXEDLIGHT2SHADE(fixedlightlev));
plane_shade = false;
}
else if (fixedcolormap)
ds_colormap = fixedcolormap, plane_shade = false;
{
R_SetDSColorMapLight(fixedcolormap, 0, 0);
plane_shade = false;
}
else
{
plane_shade = true;
}
if (spanfunc != R_FillSpan)
{
@ -1565,12 +1636,16 @@ void R_DrawNormalPlane (visplane_t *pl, double _xscale, double _yscale, fixed_t
spanfunc = R_DrawSpanMaskedTranslucent;
dc_srcblend = Col2RGB8[alpha>>10];
dc_destblend = Col2RGB8[(OPAQUE-alpha)>>10];
dc_srcalpha = alpha;
dc_destalpha = OPAQUE - alpha;
}
else
{
spanfunc = R_DrawSpanMaskedAddClamp;
dc_srcblend = Col2RGB8_LessPrecision[alpha>>10];
dc_destblend = Col2RGB8_LessPrecision[FRACUNIT>>10];
dc_srcalpha = alpha;
dc_destalpha = OPAQUE - alpha;
}
}
else
@ -1587,12 +1662,16 @@ void R_DrawNormalPlane (visplane_t *pl, double _xscale, double _yscale, fixed_t
spanfunc = R_DrawSpanTranslucent;
dc_srcblend = Col2RGB8[alpha>>10];
dc_destblend = Col2RGB8[(OPAQUE-alpha)>>10];
dc_srcalpha = alpha;
dc_destalpha = OPAQUE - alpha;
}
else
{
spanfunc = R_DrawSpanAddClamp;
dc_srcblend = Col2RGB8_LessPrecision[alpha>>10];
dc_destblend = Col2RGB8_LessPrecision[FRACUNIT>>10];
dc_srcalpha = alpha;
dc_destalpha = OPAQUE - alpha;
}
}
else
@ -1708,11 +1787,20 @@ void R_DrawTiltedPlane (visplane_t *pl, double _xscale, double _yscale, fixed_t
planelightfloat = -planelightfloat;
if (fixedlightlev >= 0)
ds_colormap = basecolormap->Maps + fixedlightlev, plane_shade = false;
{
R_SetDSColorMapLight(basecolormap, 0, FIXEDLIGHT2SHADE(fixedlightlev));
plane_shade = false;
}
else if (fixedcolormap)
ds_colormap = fixedcolormap, plane_shade = false;
{
R_SetDSColorMapLight(fixedcolormap, 0, 0);
plane_shade = false;
}
else
ds_colormap = basecolormap->Maps, plane_shade = true;
{
R_SetDSColorMapLight(basecolormap, 0, 0);
plane_shade = true;
}
if (!plane_shade)
{
@ -1723,9 +1811,16 @@ void R_DrawTiltedPlane (visplane_t *pl, double _xscale, double _yscale, fixed_t
}
#if defined(X86_ASM)
if (ds_source != ds_curtiltedsource)
R_SetTiltedSpanSource_ASM (ds_source);
R_MapVisPlane (pl, R_DrawTiltedPlane_ASM);
if (!r_swtruecolor)
{
if (ds_source != ds_curtiltedsource)
R_SetTiltedSpanSource_ASM(ds_source);
R_MapVisPlane(pl, R_DrawTiltedPlane_ASM);
}
else
{
R_MapVisPlane(pl, R_MapTiltedPlane);
}
#else
R_MapVisPlane (pl, R_MapTiltedPlane);
#endif

View File

@ -93,6 +93,14 @@ void R_DrawNormalPlane (visplane_t *pl, double xscale, double yscale, fixed_t al
void R_DrawTiltedPlane (visplane_t *pl, double xscale, double yscale, fixed_t alpha, bool additive, bool masked);
void R_MapVisPlane (visplane_t *pl, void (*mapfunc)(int y, int x1));
extern void(*R_MapColoredPlane)(int y, int x1);
extern void(*R_MapTiltedPlane)(int y, int x1);
void R_MapTiltedPlane_C(int y, int x1);
void R_MapTiltedPlane_rgba(int y, int x);
void R_MapColoredPlane_C(int y, int x1);
void R_MapColoredPlane_rgba(int y, int x1);
visplane_t *R_FindPlane
( const secplane_t &height,
FTextureID picnum,

File diff suppressed because it is too large Load Diff

View File

@ -42,7 +42,9 @@
#include "r_3dfloors.h"
#include "textures/textures.h"
#include "r_data/voxels.h"
#include "r_draw_rgba.h"
EXTERN_CVAR(Bool, r_shadercolormaps)
void R_SWRSetWindow(int windowSize, int fullWidth, int fullHeight, int stHeight, float trueratio);
void R_SetupColormap(player_t *);
@ -57,6 +59,7 @@ void R_InitRenderer();
void FSoftwareRenderer::Init()
{
r_swtruecolor = screen->IsBgra();
R_InitRenderer();
}
@ -84,11 +87,17 @@ void FSoftwareRenderer::PrecacheTexture(FTexture *tex, int cache)
if (cache & FTextureManager::HIT_Columnmode)
{
const FTexture::Span *spanp;
tex->GetColumn(0, &spanp);
if (r_swtruecolor)
tex->GetColumnBgra(0, &spanp);
else
tex->GetColumn(0, &spanp);
}
else if (cache != 0)
{
tex->GetPixels ();
if (r_swtruecolor)
tex->GetPixelsBgra();
else
tex->GetPixels ();
}
else
{
@ -154,9 +163,24 @@ void FSoftwareRenderer::Precache(BYTE *texhitlist, TMap<PClassActor*, bool> &act
void FSoftwareRenderer::RenderView(player_t *player)
{
if (r_swtruecolor != screen->IsBgra())
{
r_swtruecolor = screen->IsBgra();
R_InitColumnDrawers();
}
R_BeginDrawerCommands();
R_RenderActorView (player->mo);
// [RH] Let cameras draw onto textures that were visible this frame.
FCanvasTextureInfo::UpdateAll ();
// Apply special colormap if the target cannot do it
if (realfixedcolormap && r_swtruecolor && !(r_shadercolormaps && screen->Accel2D))
{
DrawerCommandQueue::QueueCommand<ApplySpecialColormapRGBACommand>(realfixedcolormap, screen);
}
R_EndDrawerCommands();
}
//==========================================================================
@ -181,7 +205,7 @@ void FSoftwareRenderer::RemapVoxels()
void FSoftwareRenderer::WriteSavePic (player_t *player, FileWriter *file, int width, int height)
{
DCanvas *pic = new DSimpleCanvas (width, height);
DCanvas *pic = new DSimpleCanvas (width, height, false);
PalEntry palette[256];
// Take a snapshot of the player's view
@ -310,27 +334,67 @@ void FSoftwareRenderer::CopyStackedViewParameters()
void FSoftwareRenderer::RenderTextureView (FCanvasTexture *tex, AActor *viewpoint, int fov)
{
BYTE *Pixels = const_cast<BYTE*>(tex->GetPixels());
DSimpleCanvas *Canvas = tex->GetCanvas();
BYTE *Pixels = r_swtruecolor ? (BYTE*)tex->GetPixelsBgra() : (BYTE*)tex->GetPixels();
DSimpleCanvas *Canvas = r_swtruecolor ? tex->GetCanvasBgra() : tex->GetCanvas();
// curse Doom's overuse of global variables in the renderer.
// These get clobbered by rendering to a camera texture but they need to be preserved so the final rendering can be done with the correct palette.
unsigned char *savecolormap = fixedcolormap;
FSWColormap *savecolormap = fixedcolormap;
FSpecialColormap *savecm = realfixedcolormap;
DAngle savedfov = FieldOfView;
R_SetFOV ((double)fov);
R_RenderViewToCanvas (viewpoint, Canvas, 0, 0, tex->GetWidth(), tex->GetHeight(), tex->bFirstUpdate);
R_SetFOV (savedfov);
if (Pixels == Canvas->GetBuffer())
if (Canvas->IsBgra())
{
FTexture::FlipSquareBlockRemap (Pixels, tex->GetWidth(), tex->GetHeight(), GPalette.Remap);
if (Pixels == Canvas->GetBuffer())
{
FTexture::FlipSquareBlockBgra((uint32_t*)Pixels, tex->GetWidth(), tex->GetHeight());
}
else
{
FTexture::FlipNonSquareBlockBgra((uint32_t*)Pixels, (const uint32_t*)Canvas->GetBuffer(), tex->GetWidth(), tex->GetHeight(), Canvas->GetPitch());
}
}
else
{
FTexture::FlipNonSquareBlockRemap (Pixels, Canvas->GetBuffer(), tex->GetWidth(), tex->GetHeight(), Canvas->GetPitch(), GPalette.Remap);
if (Pixels == Canvas->GetBuffer())
{
FTexture::FlipSquareBlockRemap(Pixels, tex->GetWidth(), tex->GetHeight(), GPalette.Remap);
}
else
{
FTexture::FlipNonSquareBlockRemap(Pixels, Canvas->GetBuffer(), tex->GetWidth(), tex->GetHeight(), Canvas->GetPitch(), GPalette.Remap);
}
}
if (r_swtruecolor)
{
// True color render still sometimes uses palette textures (for sprites, mostly).
// We need to make sure that both pixel buffers contain data:
int width = tex->GetWidth();
int height = tex->GetHeight();
BYTE *palbuffer = (BYTE *)tex->GetPixels();
uint32_t *bgrabuffer = (uint32_t*)tex->GetPixelsBgra();
for (int x = 0; x < width; x++)
{
for (int y = 0; y < height; y++)
{
uint32_t color = bgrabuffer[y];
int r = RPART(color);
int g = GPART(color);
int b = BPART(color);
palbuffer[y] = RGB32k.RGB[r >> 3][g >> 3][b >> 3];
}
palbuffer += height;
bgrabuffer += height;
}
}
tex->SetUpdated();
fixedcolormap = savecolormap;
realfixedcolormap = savecm;
}

View File

@ -58,6 +58,7 @@
#include "r_plane.h"
#include "r_segs.h"
#include "r_3dfloors.h"
#include "r_draw_rgba.h"
#include "v_palette.h"
#include "r_data/r_translate.h"
#include "r_data/colormaps.h"
@ -251,6 +252,7 @@ bool sprflipvert;
void R_DrawMaskedColumn (const BYTE *column, const FTexture::Span *span)
{
int pixelsize = r_swtruecolor ? 4 : 1;
const fixed_t centeryfrac = FLOAT2FIXED(CenterY);
const fixed_t texturemid = FLOAT2FIXED(dc_texturemid);
while (span->Length != 0)
@ -321,7 +323,7 @@ void R_DrawMaskedColumn (const BYTE *column, const FTexture::Span *span)
}
}
dc_source = column + top;
dc_dest = ylookup[dc_yl] + dc_x + dc_destorg;
dc_dest = (ylookup[dc_yl] + dc_x) * pixelsize + dc_destorg;
dc_count = dc_yh - dc_yl + 1;
colfunc ();
}
@ -414,7 +416,7 @@ void R_DrawVisSprite (vissprite_t *vis)
}
fixed_t centeryfrac = FLOAT2FIXED(CenterY);
dc_colormap = vis->Style.colormap;
R_SetColorMapLight(vis->Style.BaseColormap, 0, vis->Style.ColormapNum << FRACBITS);
mode = R_SetPatchStyle (vis->Style.RenderStyle, vis->Style.Alpha, vis->Translation, vis->FillColor);
@ -422,7 +424,7 @@ void R_DrawVisSprite (vissprite_t *vis)
{ // For shaded sprites, R_SetPatchStyle sets a dc_colormap to an alpha table, but
// it is the brightest one. We need to get back to the proper light level for
// this sprite.
dc_colormap += vis->ColormapNum << COLORMAPSHIFT;
R_SetColorMapLight(dc_fcolormap, 0, vis->Style.ColormapNum << FRACBITS);
}
if (mode != DontDraw)
@ -476,7 +478,7 @@ void R_DrawVisSprite (vissprite_t *vis)
while (dc_x < stop4)
{
rt_initcols();
rt_initcols(nullptr);
for (int zz = 4; zz; --zz)
{
pixels = tex->GetColumn (frac >> FRACBITS, &spans);
@ -544,11 +546,11 @@ void R_DrawWallSprite(vissprite_t *spr)
rw_lightstep = float((GlobVis / spr->wallc.sz2 - rw_lightleft) / (spr->wallc.sx2 - spr->wallc.sx1));
rw_light = rw_lightleft + (x1 - spr->wallc.sx1) * rw_lightstep;
if (fixedlightlev >= 0)
dc_colormap = usecolormap->Maps + fixedlightlev;
R_SetColorMapLight(usecolormap, 0, FIXEDLIGHT2SHADE(fixedlightlev));
else if (fixedcolormap != NULL)
dc_colormap = fixedcolormap;
R_SetColorMapLight(fixedcolormap, 0, 0);
else if (!foggy && (spr->renderflags & RF_FULLBRIGHT))
dc_colormap = usecolormap->Maps;
R_SetColorMapLight(usecolormap, 0, 0);
else
calclighting = true;
@ -599,7 +601,7 @@ void R_DrawWallSprite(vissprite_t *spr)
{
if (calclighting)
{ // calculate lighting
dc_colormap = usecolormap->Maps + (GETPALOOKUP (rw_light, shade) << COLORMAPSHIFT);
R_SetColorMapLight(usecolormap, rw_light, shade);
}
if (!R_ClipSpriteColumnWithPortals(spr))
R_WallSpriteColumn(R_DrawMaskedColumn);
@ -610,9 +612,9 @@ void R_DrawWallSprite(vissprite_t *spr)
{
if (calclighting)
{ // calculate lighting
dc_colormap = usecolormap->Maps + (GETPALOOKUP (rw_light, shade) << COLORMAPSHIFT);
R_SetColorMapLight(usecolormap, rw_light, shade);
}
rt_initcols();
rt_initcols(nullptr);
for (int zz = 4; zz; --zz)
{
if (!R_ClipSpriteColumnWithPortals(spr))
@ -626,7 +628,7 @@ void R_DrawWallSprite(vissprite_t *spr)
{
if (calclighting)
{ // calculate lighting
dc_colormap = usecolormap->Maps + (GETPALOOKUP (rw_light, shade) << COLORMAPSHIFT);
R_SetColorMapLight(usecolormap, rw_light, shade);
}
if (!R_ClipSpriteColumnWithPortals(spr))
R_WallSpriteColumn(R_DrawMaskedColumn);
@ -660,14 +662,14 @@ void R_DrawVisVoxel(vissprite_t *spr, int minslabz, int maxslabz, short *cliptop
int flags = 0;
// Do setup for blending.
dc_colormap = spr->Style.colormap;
R_SetColorMapLight(spr->Style.BaseColormap, 0, spr->Style.ColormapNum << FRACBITS);
mode = R_SetPatchStyle(spr->Style.RenderStyle, spr->Style.Alpha, spr->Translation, spr->FillColor);
if (mode == DontDraw)
{
return;
}
if (colfunc == fuzzcolfunc || colfunc == R_FillColumnP)
if (colfunc == fuzzcolfunc || colfunc == R_FillColumn)
{
flags = DVF_OFFSCREEN | DVF_SPANSONLY;
}
@ -686,12 +688,13 @@ void R_DrawVisVoxel(vissprite_t *spr, int minslabz, int maxslabz, short *cliptop
// Render the voxel, either directly to the screen or offscreen.
R_DrawVoxel(spr->pa.vpos, spr->pa.vang, spr->gpos, spr->Angle,
spr->xscale, FLOAT2FIXED(spr->yscale), spr->voxel, spr->Style.colormap, cliptop, clipbot,
spr->xscale, FLOAT2FIXED(spr->yscale), spr->voxel, spr->Style.BaseColormap, spr->Style.ColormapNum, cliptop, clipbot,
minslabz, maxslabz, flags);
// Blend the voxel, if that's what we need to do.
if ((flags & ~DVF_MIRRORED) != 0)
{
int pixelsize = r_swtruecolor ? 4 : 1;
for (int x = 0; x < viewwidth; ++x)
{
if (!(flags & DVF_SPANSONLY) && (x & 3) == 0)
@ -706,15 +709,12 @@ void R_DrawVisVoxel(vissprite_t *spr, int minslabz, int maxslabz, short *cliptop
dc_yl = span->Start;
dc_yh = span->Stop - 1;
dc_count = span->Stop - span->Start;
dc_dest = ylookup[span->Start] + x + dc_destorg;
dc_dest = (ylookup[span->Start] + x) * pixelsize + dc_destorg;
colfunc();
}
else
{
unsigned int **tspan = &dc_ctspan[x & 3];
(*tspan)[0] = span->Start;
(*tspan)[1] = span->Stop - 1;
*tspan += 2;
rt_span_coverage(x, span->Start, span->Stop - 1);
}
}
if (!(flags & DVF_SPANSONLY) && (x & 3) == 3)
@ -1073,7 +1073,7 @@ void R_ProjectSprite (AActor *thing, int fakeside, F3DFloor *fakefloor, F3DFloor
vis->Style.Alpha = float(thing->Alpha);
vis->fakefloor = fakefloor;
vis->fakeceiling = fakeceiling;
vis->ColormapNum = 0;
vis->Style.ColormapNum = 0;
vis->bInMirror = MirrorFlags & RF_XFLIP;
vis->bSplitSprite = false;
@ -1125,7 +1125,8 @@ void R_ProjectSprite (AActor *thing, int fakeside, F3DFloor *fakefloor, F3DFloor
// get light level
if (fixedcolormap != NULL)
{ // fixed map
vis->Style.colormap = fixedcolormap;
vis->Style.BaseColormap = fixedcolormap;
vis->Style.ColormapNum = 0;
}
else
{
@ -1135,17 +1136,19 @@ void R_ProjectSprite (AActor *thing, int fakeside, F3DFloor *fakefloor, F3DFloor
}
if (fixedlightlev >= 0)
{
vis->Style.colormap = mybasecolormap->Maps + fixedlightlev;
vis->Style.BaseColormap = mybasecolormap;
vis->Style.ColormapNum = fixedlightlev >> COLORMAPSHIFT;
}
else if (!foggy && ((renderflags & RF_FULLBRIGHT) || (thing->flags5 & MF5_BRIGHT)))
{ // full bright
vis->Style.colormap = mybasecolormap->Maps;
vis->Style.BaseColormap = mybasecolormap;
vis->Style.ColormapNum = 0;
}
else
{ // diminished light
vis->ColormapNum = GETPALOOKUP(
vis->Style.ColormapNum = GETPALOOKUP(
r_SpriteVisibility / MAX(tz, MINZ), spriteshade);
vis->Style.colormap = mybasecolormap->Maps + (vis->ColormapNum << COLORMAPSHIFT);
vis->Style.BaseColormap = mybasecolormap;
}
}
}
@ -1214,14 +1217,13 @@ static void R_ProjectWallSprite(AActor *thing, const DVector3 &pos, FTextureID p
vis->Style.Alpha = float(thing->Alpha);
vis->fakefloor = NULL;
vis->fakeceiling = NULL;
vis->ColormapNum = 0;
vis->bInMirror = MirrorFlags & RF_XFLIP;
vis->pic = pic;
vis->bIsVoxel = false;
vis->bWallSprite = true;
vis->ColormapNum = GETPALOOKUP(
vis->Style.ColormapNum = GETPALOOKUP(
r_SpriteVisibility / MAX(tz, MINZ), spriteshade);
vis->Style.colormap = basecolormap->Maps + (vis->ColormapNum << COLORMAPSHIFT);
vis->Style.BaseColormap = basecolormap;
vis->wallc = wallc;
}
@ -1401,7 +1403,7 @@ void R_DrawPSprite(DPSprite *pspr, AActor *owner, float bobx, float boby, double
vis->yscale = float(pspriteyscale / tex->Scale.Y);
vis->Translation = 0; // [RH] Use default colors
vis->pic = tex;
vis->ColormapNum = 0;
vis->Style.ColormapNum = 0;
if (flip)
{
@ -1449,9 +1451,10 @@ void R_DrawPSprite(DPSprite *pspr, AActor *owner, float bobx, float boby, double
}
}
if (realfixedcolormap != nullptr)
if (realfixedcolormap != nullptr && (!r_swtruecolor || (r_shadercolormaps && screen->Accel2D)))
{ // fixed color
vis->Style.colormap = realfixedcolormap->Colormap;
vis->Style.BaseColormap = realfixedcolormap;
vis->Style.ColormapNum = 0;
}
else
{
@ -1461,35 +1464,38 @@ void R_DrawPSprite(DPSprite *pspr, AActor *owner, float bobx, float boby, double
}
if (fixedlightlev >= 0)
{
vis->Style.colormap = mybasecolormap->Maps + fixedlightlev;
vis->Style.BaseColormap = mybasecolormap;
vis->Style.ColormapNum = fixedlightlev >> COLORMAPSHIFT;
}
else if (!foggy && pspr->GetState()->GetFullbright())
{ // full bright
vis->Style.colormap = mybasecolormap->Maps; // [RH] use basecolormap
vis->Style.BaseColormap = mybasecolormap; // [RH] use basecolormap
vis->Style.ColormapNum = 0;
}
else
{ // local light
vis->Style.colormap = mybasecolormap->Maps + (GETPALOOKUP(0, spriteshade) << COLORMAPSHIFT);
vis->Style.BaseColormap = mybasecolormap;
vis->Style.ColormapNum = GETPALOOKUP(0, spriteshade);
}
}
if (camera->Inventory != nullptr)
{
lighttable_t *oldcolormap = vis->Style.colormap;
camera->Inventory->AlterWeaponSprite(&vis->Style);
if (vis->Style.colormap != oldcolormap)
BYTE oldcolormapnum = vis->Style.ColormapNum;
FSWColormap *oldcolormap = vis->Style.BaseColormap;
camera->Inventory->AlterWeaponSprite (&vis->Style);
if (vis->Style.BaseColormap != oldcolormap || vis->Style.ColormapNum != oldcolormapnum)
{
// The colormap has changed. Is it one we can easily identify?
// If not, then don't bother trying to identify it for
// hardware accelerated drawing.
if (vis->Style.colormap < SpecialColormaps[0].Colormap ||
vis->Style.colormap > SpecialColormaps.Last().Colormap)
if (vis->Style.BaseColormap < &SpecialColormaps[0] ||
vis->Style.BaseColormap > &SpecialColormaps.Last())
{
noaccel = true;
}
// Has the basecolormap changed? If so, we can't hardware accelerate it,
// since we don't know what it is anymore.
else if (vis->Style.colormap < mybasecolormap->Maps ||
vis->Style.colormap >= mybasecolormap->Maps + NUMCOLORMAPS * 256)
else if (vis->Style.BaseColormap != mybasecolormap)
{
noaccel = true;
}
@ -1497,13 +1503,13 @@ void R_DrawPSprite(DPSprite *pspr, AActor *owner, float bobx, float boby, double
}
// If we're drawing with a special colormap, but shaders for them are disabled, do
// not accelerate.
if (!r_shadercolormaps && (vis->Style.colormap >= SpecialColormaps[0].Colormap &&
vis->Style.colormap <= SpecialColormaps.Last().Colormap))
if (!r_shadercolormaps && (vis->Style.BaseColormap >= &SpecialColormaps[0] &&
vis->Style.BaseColormap <= &SpecialColormaps.Last()))
{
noaccel = true;
}
// If drawing with a BOOM colormap, disable acceleration.
if (mybasecolormap == &NormalLight && NormalLight.Maps != realcolormaps)
if (mybasecolormap == &NormalLight && NormalLight.Maps != realcolormaps.Maps)
{
noaccel = true;
}
@ -1520,7 +1526,8 @@ void R_DrawPSprite(DPSprite *pspr, AActor *owner, float bobx, float boby, double
else
{
colormap_to_use = basecolormap;
vis->Style.colormap = basecolormap->Maps;
vis->Style.BaseColormap = basecolormap;
vis->Style.ColormapNum = 0;
vis->Style.RenderStyle = STYLE_Normal;
}
@ -1691,18 +1698,16 @@ void R_DrawRemainingPlayerSprites()
FColormapStyle colormapstyle;
bool usecolormapstyle = false;
if (vis->Style.colormap >= SpecialColormaps[0].Colormap &&
vis->Style.colormap < SpecialColormaps[SpecialColormaps.Size()].Colormap)
if (vis->Style.BaseColormap >= &SpecialColormaps[0] &&
vis->Style.BaseColormap < &SpecialColormaps[SpecialColormaps.Size()])
{
// Yuck! There needs to be a better way to store colormaps in the vissprite... :(
ptrdiff_t specialmap = (vis->Style.colormap - SpecialColormaps[0].Colormap) / sizeof(FSpecialColormap);
special = &SpecialColormaps[specialmap];
special = static_cast<FSpecialColormap*>(vis->Style.BaseColormap);
}
else if (colormap->Color == PalEntry(255,255,255) &&
colormap->Desaturate == 0)
{
overlay = colormap->Fade;
overlay.a = BYTE(((vis->Style.colormap - colormap->Maps) >> 8) * 255 / NUMCOLORMAPS);
overlay.a = BYTE(vis->Style.ColormapNum * 255 / NUMCOLORMAPS);
}
else
{
@ -1710,7 +1715,7 @@ void R_DrawRemainingPlayerSprites()
colormapstyle.Color = colormap->Color;
colormapstyle.Fade = colormap->Fade;
colormapstyle.Desaturate = colormap->Desaturate;
colormapstyle.FadeLevel = ((vis->Style.colormap - colormap->Maps) >> 8) / float(NUMCOLORMAPS);
colormapstyle.FadeLevel = vis->Style.ColormapNum / float(NUMCOLORMAPS);
}
screen->DrawTexture(vis->pic,
viewwindowx + vispsprites[i].x1,
@ -1955,7 +1960,8 @@ void R_DrawSprite (vissprite_t *spr)
int r1, r2;
short topclip, botclip;
short *clip1, *clip2;
lighttable_t *colormap = spr->Style.colormap;
FSWColormap *colormap = spr->Style.BaseColormap;
int colormapnum = spr->Style.ColormapNum;
F3DFloor *rover;
FDynamicColormap *mybasecolormap;
@ -2052,17 +2058,19 @@ void R_DrawSprite (vissprite_t *spr)
}
if (fixedlightlev >= 0)
{
spr->Style.colormap = mybasecolormap->Maps + fixedlightlev;
spr->Style.BaseColormap = mybasecolormap;
spr->Style.ColormapNum = fixedlightlev >> COLORMAPSHIFT;
}
else if (!foggy && (spr->renderflags & RF_FULLBRIGHT))
{ // full bright
spr->Style.colormap = mybasecolormap->Maps;
spr->Style.BaseColormap = mybasecolormap;
spr->Style.ColormapNum = 0;
}
else
{ // diminished light
spriteshade = LIGHT2SHADE(sec->lightlevel + r_actualextralight);
spr->Style.colormap = mybasecolormap->Maps + (GETPALOOKUP (
r_SpriteVisibility / MAX(MINZ, (double)spr->depth), spriteshade) << COLORMAPSHIFT);
spr->Style.BaseColormap = mybasecolormap;
spr->Style.ColormapNum = GETPALOOKUP(r_SpriteVisibility / MAX(MINZ, (double)spr->depth), spriteshade);
}
}
}
@ -2210,7 +2218,8 @@ void R_DrawSprite (vissprite_t *spr)
if (topclip >= botclip)
{
spr->Style.colormap = colormap;
spr->Style.BaseColormap = colormap;
spr->Style.ColormapNum = colormapnum;
return;
}
@ -2340,7 +2349,8 @@ void R_DrawSprite (vissprite_t *spr)
}
if (i == x2)
{
spr->Style.colormap = colormap;
spr->Style.BaseColormap = colormap;
spr->Style.ColormapNum = colormapnum;
return;
}
}
@ -2358,7 +2368,8 @@ void R_DrawSprite (vissprite_t *spr)
int maxvoxely = spr->gzb > hzb ? INT_MAX : xs_RoundToInt((spr->gzt - hzb) / spr->yscale);
R_DrawVisVoxel(spr, minvoxely, maxvoxely, cliptop, clipbot);
}
spr->Style.colormap = colormap;
spr->Style.BaseColormap = colormap;
spr->Style.ColormapNum = colormapnum;
}
// kg3D:
@ -2475,7 +2486,7 @@ void R_ProjectParticle (particle_t *particle, const sector_t *sector, int shade,
int x1, x2, y1, y2;
vissprite_t* vis;
sector_t* heightsec = NULL;
BYTE* map;
FSWColormap* map;
// [ZZ] Particle not visible through the portal plane
if (CurrentPortal && !!P_PointOnLineSide(particle->Pos, CurrentPortal->dst))
@ -2548,7 +2559,7 @@ void R_ProjectParticle (particle_t *particle, const sector_t *sector, int shade,
botplane = &heightsec->ceilingplane;
toppic = sector->GetTexture(sector_t::ceiling);
botpic = heightsec->GetTexture(sector_t::ceiling);
map = heightsec->ColorMap->Maps;
map = heightsec->ColorMap;
}
else if (fakeside == FAKED_BelowFloor)
{
@ -2556,7 +2567,7 @@ void R_ProjectParticle (particle_t *particle, const sector_t *sector, int shade,
botplane = &sector->floorplane;
toppic = heightsec->GetTexture(sector_t::floor);
botpic = sector->GetTexture(sector_t::floor);
map = heightsec->ColorMap->Maps;
map = heightsec->ColorMap;
}
else
{
@ -2564,7 +2575,7 @@ void R_ProjectParticle (particle_t *particle, const sector_t *sector, int shade,
botplane = &heightsec->floorplane;
toppic = heightsec->GetTexture(sector_t::ceiling);
botpic = heightsec->GetTexture(sector_t::floor);
map = sector->ColorMap->Maps;
map = sector->ColorMap;
}
}
else
@ -2573,7 +2584,7 @@ void R_ProjectParticle (particle_t *particle, const sector_t *sector, int shade,
botplane = &sector->floorplane;
toppic = sector->GetTexture(sector_t::ceiling);
botpic = sector->GetTexture(sector_t::floor);
map = sector->ColorMap->Maps;
map = sector->ColorMap;
}
if (botpic != skyflatnum && particle->Pos.Z < botplane->ZatPoint (particle->Pos))
@ -2602,25 +2613,28 @@ void R_ProjectParticle (particle_t *particle, const sector_t *sector, int shade,
vis->renderflags = particle->trans;
vis->FakeFlatStat = fakeside;
vis->floorclip = 0;
vis->ColormapNum = 0;
vis->Style.ColormapNum = 0;
if (fixedlightlev >= 0)
{
vis->Style.colormap = map + fixedlightlev;
vis->Style.BaseColormap = map;
vis->Style.ColormapNum = fixedlightlev >> COLORMAPSHIFT;
}
else if (fixedcolormap)
{
vis->Style.colormap = fixedcolormap;
vis->Style.BaseColormap = fixedcolormap;
vis->Style.ColormapNum = 0;
}
else if (particle->bright)
{
vis->Style.colormap = map;
vis->Style.BaseColormap = map;
vis->Style.ColormapNum = 0;
}
else
{
// Particles are slightly more visible than regular sprites.
vis->ColormapNum = GETPALOOKUP(tiz * r_SpriteVisibility * 0.5, shade);
vis->Style.colormap = map + (vis->ColormapNum << COLORMAPSHIFT);
vis->Style.ColormapNum = GETPALOOKUP(tiz * r_SpriteVisibility * 0.5, shade);
vis->Style.BaseColormap = map;
}
}
@ -2649,13 +2663,13 @@ static void R_DrawMaskedSegsBehindParticle (const vissprite_t *vis)
}
}
void R_DrawParticle (vissprite_t *vis)
void R_DrawParticle_C (vissprite_t *vis)
{
DWORD *bg2rgb;
int spacing;
BYTE *dest;
DWORD fg;
BYTE color = vis->Style.colormap[vis->startfrac];
BYTE color = vis->Style.BaseColormap->Maps[(vis->Style.ColormapNum << COLORMAPSHIFT) + vis->startfrac];
int yl = vis->y1;
int ycount = vis->y2 - yl + 1;
int x1 = vis->x1;
@ -2714,12 +2728,64 @@ void R_DrawParticle (vissprite_t *vis)
}
}
void R_DrawParticle_rgba(vissprite_t *vis)
{
int spacing;
uint32_t *dest;
BYTE color = vis->Style.BaseColormap->Maps[vis->startfrac];
int yl = vis->y1;
int ycount = vis->y2 - yl + 1;
int x1 = vis->x1;
int countbase = vis->x2 - x1;
R_DrawMaskedSegsBehindParticle(vis);
DrawerCommandQueue::WaitForWorkers();
uint32_t fg = LightBgra::shade_pal_index_simple(color, LightBgra::calc_light_multiplier(LIGHTSCALE(0, vis->Style.ColormapNum << FRACBITS)));
uint32_t fg_red = (fg >> 16) & 0xff;
uint32_t fg_green = (fg >> 8) & 0xff;
uint32_t fg_blue = fg & 0xff;
// vis->renderflags holds translucency level (0-255)
fixed_t fglevel = ((vis->renderflags + 1) << 8) & ~0x3ff;
uint32_t alpha = fglevel * 256 / FRACUNIT;
uint32_t inv_alpha = 256 - alpha;
fg_red *= alpha;
fg_green *= alpha;
fg_blue *= alpha;
spacing = RenderTarget->GetPitch();
for (int x = x1; x < (x1 + countbase); x++)
{
dc_x = x;
if (R_ClipSpriteColumnWithPortals(vis))
continue;
dest = ylookup[yl] + x + (uint32_t*)dc_destorg;
for (int y = 0; y < ycount; y++)
{
uint32_t bg_red = (*dest >> 16) & 0xff;
uint32_t bg_green = (*dest >> 8) & 0xff;
uint32_t bg_blue = (*dest) & 0xff;
uint32_t red = (fg_red + bg_red * inv_alpha) / 256;
uint32_t green = (fg_green + bg_green * inv_alpha) / 256;
uint32_t blue = (fg_blue + bg_blue * inv_alpha) / 256;
*dest = 0xff000000 | (red << 16) | (green << 8) | blue;
dest += spacing;
}
}
}
extern double BaseYaspectMul;;
void R_DrawVoxel(const FVector3 &globalpos, FAngle viewangle,
const FVector3 &dasprpos, DAngle dasprang,
fixed_t daxscale, fixed_t dayscale, FVoxel *voxobj,
lighttable_t *colormap, short *daumost, short *dadmost, int minslabz, int maxslabz, int flags)
FSWColormap *colormap, int colormapnum, short *daumost, short *dadmost, int minslabz, int maxslabz, int flags)
{
int i, j, k, x, y, syoff, ggxstart, ggystart, nxoff;
fixed_t cosang, sinang, sprcosang, sprsinang;
@ -2761,7 +2827,9 @@ void R_DrawVoxel(const FVector3 &globalpos, FAngle viewangle,
sprcosang = FLOAT2FIXED(dasprang.Cos()) >> 2;
sprsinang = FLOAT2FIXED(-dasprang.Sin()) >> 2;
R_SetupDrawSlab(colormap);
R_SetupDrawSlab(colormap, 0.0f, colormapnum << FRACBITS);
int pixelsize = r_swtruecolor ? 4 : 1;
// Select mip level
i = abs(DMulScale6(dasprx - globalposx, cosang, daspry - globalposy, sinang));
@ -3016,7 +3084,7 @@ void R_DrawVoxel(const FVector3 &globalpos, FAngle viewangle,
if (!(flags & DVF_OFFSCREEN))
{
// Draw directly to the screen.
R_DrawSlab(xxr - xxl, yplc[xxl], z2 - z1, yinc, col, ylookup[z1] + lxt + xxl + dc_destorg);
R_DrawSlab(xxr - xxl, yplc[xxl], z2 - z1, yinc, col, (ylookup[z1] + lxt + xxl) * pixelsize + dc_destorg);
}
else
{
@ -3247,12 +3315,12 @@ void R_CheckOffscreenBuffer(int width, int height, bool spansonly)
{
if (OffscreenColorBuffer == NULL)
{
OffscreenColorBuffer = new BYTE[width * height];
OffscreenColorBuffer = new BYTE[width * height * 4];
}
else if (OffscreenBufferWidth != width || OffscreenBufferHeight != height)
{
delete[] OffscreenColorBuffer;
OffscreenColorBuffer = new BYTE[width * height];
OffscreenColorBuffer = new BYTE[width * height * 4];
}
}
OffscreenBufferWidth = width;

View File

@ -86,7 +86,6 @@ struct vissprite_t
BYTE bSplitSprite:1; // [RH] Sprite was split by a drawseg
BYTE bInMirror:1; // [RH] Sprite is "inside" a mirror
BYTE FakeFlatStat; // [RH] which side of fake/floor ceiling sprite is on
BYTE ColormapNum; // Which colormap is rendered (needed for shaded drawer)
short renderflags;
DWORD Translation; // [RH] for color translation
visstyle_t Style;
@ -97,7 +96,10 @@ struct vissprite_t
struct particle_t;
void R_DrawParticle (vissprite_t *);
extern void(*R_DrawParticle)(vissprite_t *);
void R_DrawParticle_C (vissprite_t *);
void R_DrawParticle_rgba (vissprite_t *);
void R_ProjectParticle (particle_t *, const sector_t *sector, int shade, int fakeside);
extern int MaxVisSprites;
@ -142,7 +144,7 @@ enum { DVF_OFFSCREEN = 1, DVF_SPANSONLY = 2, DVF_MIRRORED = 4 };
void R_DrawVoxel(const FVector3 &viewpos, FAngle viewangle,
const FVector3 &sprpos, DAngle dasprang,
fixed_t daxscale, fixed_t dayscale, struct FVoxel *voxobj,
lighttable_t *colormap, short *daumost, short *dadmost, int minslabz, int maxslabz, int flags);
FSWColormap *colormap, int colormapnum, short *daumost, short *dadmost, int minslabz, int maxslabz, int flags);
void R_ClipVisSprite (vissprite_t *vis, int xl, int xh);

View File

@ -896,11 +896,11 @@ void R_SetupFrame (AActor *actor)
BaseBlendG = GPART(newblend);
BaseBlendB = BPART(newblend);
BaseBlendA = APART(newblend) / 255.f;
NormalLight.Maps = realcolormaps;
NormalLight.Maps = realcolormaps.Maps;
}
else
{
NormalLight.Maps = realcolormaps + NUMCOLORMAPS*256*newblend;
NormalLight.Maps = realcolormaps.Maps + NUMCOLORMAPS*256*newblend;
BaseBlendR = BaseBlendG = BaseBlendB = 0;
BaseBlendA = 0.f;
}

View File

@ -122,6 +122,7 @@ void FAutomapTexture::Unload ()
delete[] Pixels;
Pixels = NULL;
}
FTexture::Unload();
}
//==========================================================================

View File

@ -56,7 +56,6 @@ public:
const BYTE *GetColumn (unsigned int column, const Span **spans_out);
const BYTE *GetPixels ();
void Unload ();
protected:
const BYTE *Pixels;
@ -103,17 +102,6 @@ FBuildTexture::~FBuildTexture ()
//
//==========================================================================
void FBuildTexture::Unload ()
{
// Nothing to do, since the pixels are accessed from memory-mapped files directly
}
//==========================================================================
//
//
//
//==========================================================================
const BYTE *FBuildTexture::GetPixels ()
{
return Pixels;

View File

@ -53,7 +53,6 @@ FCanvasTexture::FCanvasTexture (const char *name, int width, int height)
DummySpans[1].TopOffset = 0;
DummySpans[1].Length = 0;
UseType = TEX_Wall;
Canvas = NULL;
bNeedsUpdate = true;
bDidUpdate = false;
bHasCanvas = true;
@ -101,11 +100,22 @@ const BYTE *FCanvasTexture::GetPixels ()
return Pixels;
}
const uint32_t *FCanvasTexture::GetPixelsBgra()
{
bNeedsUpdate = true;
if (CanvasBgra == NULL)
{
MakeTextureBgra();
}
return PixelsBgra;
}
void FCanvasTexture::MakeTexture ()
{
Canvas = new DSimpleCanvas (Width, Height);
Canvas = new DSimpleCanvas (Width, Height, false);
Canvas->Lock ();
GC::AddSoftRoot(Canvas);
if (Width != Height || Width != Canvas->GetPitch())
{
Pixels = new BYTE[Width*Height];
@ -113,29 +123,68 @@ void FCanvasTexture::MakeTexture ()
}
else
{
Pixels = Canvas->GetBuffer();
Pixels = (BYTE*)Canvas->GetBuffer();
bPixelsAllocated = false;
}
// Draw a special "unrendered" initial texture into the buffer.
memset (Pixels, 0, Width*Height/2);
memset (Pixels+Width*Height/2, 255, Width*Height/2);
}
void FCanvasTexture::MakeTextureBgra()
{
CanvasBgra = new DSimpleCanvas(Width, Height, true);
CanvasBgra->Lock();
GC::AddSoftRoot(CanvasBgra);
if (Width != Height || Width != CanvasBgra->GetPitch())
{
PixelsBgra = new uint32_t[Width*Height];
bPixelsAllocatedBgra = true;
}
else
{
PixelsBgra = (uint32_t*)CanvasBgra->GetBuffer();
bPixelsAllocatedBgra = false;
}
// Draw a special "unrendered" initial texture into the buffer.
memset(PixelsBgra, 0, Width*Height / 2 * 4);
memset(PixelsBgra + Width*Height / 2, 255, Width*Height / 2 * 4);
}
void FCanvasTexture::Unload ()
{
if (bPixelsAllocated)
{
if (Pixels != NULL) delete [] Pixels;
if (Pixels != NULL) delete[] Pixels;
bPixelsAllocated = false;
Pixels = NULL;
}
if (bPixelsAllocatedBgra)
{
if (PixelsBgra != NULL) delete[] PixelsBgra;
bPixelsAllocatedBgra = false;
PixelsBgra = NULL;
}
if (Canvas != NULL)
{
GC::DelSoftRoot(Canvas);
Canvas->Destroy();
Canvas = NULL;
}
if (CanvasBgra != NULL)
{
GC::DelSoftRoot(CanvasBgra);
CanvasBgra->Destroy();
CanvasBgra = NULL;
}
FTexture::Unload();
}
bool FCanvasTexture::CheckModified ()

View File

@ -401,6 +401,7 @@ void FDDSTexture::Unload ()
delete[] Pixels;
Pixels = NULL;
}
FTexture::Unload();
}
//==========================================================================

View File

@ -138,6 +138,7 @@ void FFlatTexture::Unload ()
delete[] Pixels;
Pixels = NULL;
}
FTexture::Unload();
}
//==========================================================================

View File

@ -142,6 +142,7 @@ void FIMGZTexture::Unload ()
delete[] Pixels;
Pixels = NULL;
}
FTexture::Unload();
}
//==========================================================================

View File

@ -295,11 +295,9 @@ FJPEGTexture::~FJPEGTexture ()
void FJPEGTexture::Unload ()
{
if (Pixels != NULL)
{
delete[] Pixels;
Pixels = NULL;
}
delete[] Pixels;
Pixels = NULL;
FTexture::Unload();
}
//==========================================================================

View File

@ -362,6 +362,7 @@ void FMultiPatchTexture::Unload ()
delete[] Pixels;
Pixels = NULL;
}
FTexture::Unload();
}
//==========================================================================

View File

@ -184,6 +184,7 @@ void FPatchTexture::Unload ()
delete[] Pixels;
Pixels = NULL;
}
FTexture::Unload();
}
//==========================================================================

View File

@ -191,6 +191,7 @@ void FPCXTexture::Unload ()
delete[] Pixels;
Pixels = NULL;
}
FTexture::Unload();
}
//==========================================================================

View File

@ -372,11 +372,9 @@ FPNGTexture::~FPNGTexture ()
void FPNGTexture::Unload ()
{
if (Pixels != NULL)
{
delete[] Pixels;
Pixels = NULL;
}
delete[] Pixels;
Pixels = NULL;
FTexture::Unload();
}
//==========================================================================
@ -449,6 +447,7 @@ const BYTE *FPNGTexture::GetPixels ()
return Pixels;
}
//==========================================================================
//
//

View File

@ -206,6 +206,7 @@ void FRawPageTexture::Unload ()
delete[] Pixels;
Pixels = NULL;
}
FTexture::Unload();
}
//==========================================================================

View File

@ -45,6 +45,7 @@
#include "v_video.h"
#include "m_fixed.h"
#include "textures/textures.h"
#include "v_palette.h"
typedef bool (*CheckFunc)(FileReader & file);
typedef FTexture * (*CreateFunc)(FileReader & file, int lumpnum);
@ -175,6 +176,37 @@ FTexture::~FTexture ()
KillNative();
}
void FTexture::Unload()
{
PixelsBgra = std::vector<uint32_t>();
}
const uint32_t *FTexture::GetColumnBgra(unsigned int column, const Span **spans_out)
{
const uint32_t *pixels = GetPixelsBgra();
column %= Width;
if (spans_out != nullptr)
GetColumn(column, spans_out);
return pixels + column * Height;
}
const uint32_t *FTexture::GetPixelsBgra()
{
if (PixelsBgra.empty() || CheckModified())
{
if (!GetColumn(0, nullptr))
return nullptr;
FBitmap bitmap;
bitmap.Create(GetWidth(), GetHeight());
CopyTrueColorPixels(&bitmap, 0, 0);
GenerateBgraFromBitmap(bitmap);
}
return PixelsBgra.data();
}
bool FTexture::CheckModified ()
{
return false;
@ -318,6 +350,210 @@ void FTexture::FreeSpans (Span **spans) const
M_Free (spans);
}
void FTexture::GenerateBgraFromBitmap(const FBitmap &bitmap)
{
CreatePixelsBgraWithMipmaps();
// Transpose
const uint32_t *src = (const uint32_t *)bitmap.GetPixels();
uint32_t *dest = PixelsBgra.data();
for (int x = 0; x < Width; x++)
{
for (int y = 0; y < Height; y++)
{
dest[y + x * Height] = src[x + y * Width];
}
}
GenerateBgraMipmaps();
}
void FTexture::CreatePixelsBgraWithMipmaps()
{
int levels = MipmapLevels();
int buffersize = 0;
for (int i = 0; i < levels; i++)
{
int w = MAX(Width >> i, 1);
int h = MAX(Height >> i, 1);
buffersize += w * h;
}
PixelsBgra.resize(buffersize, 0xffff0000);
}
int FTexture::MipmapLevels() const
{
int widthbits = 0;
while ((Width >> widthbits) != 0) widthbits++;
int heightbits = 0;
while ((Height >> heightbits) != 0) heightbits++;
return MAX(widthbits, heightbits);
}
void FTexture::GenerateBgraMipmaps()
{
struct Color4f
{
float a, r, g, b;
Color4f operator*(const Color4f &v) const { return Color4f{ a * v.a, r * v.r, g * v.g, b * v.b }; }
Color4f operator/(const Color4f &v) const { return Color4f{ a / v.a, r / v.r, g / v.g, b / v.b }; }
Color4f operator+(const Color4f &v) const { return Color4f{ a + v.a, r + v.r, g + v.g, b + v.b }; }
Color4f operator-(const Color4f &v) const { return Color4f{ a - v.a, r - v.r, g - v.g, b - v.b }; }
Color4f operator*(float s) const { return Color4f{ a * s, r * s, g * s, b * s }; }
Color4f operator/(float s) const { return Color4f{ a / s, r / s, g / s, b / s }; }
Color4f operator+(float s) const { return Color4f{ a + s, r + s, g + s, b + s }; }
Color4f operator-(float s) const { return Color4f{ a - s, r - s, g - s, b - s }; }
};
int levels = MipmapLevels();
std::vector<Color4f> image(PixelsBgra.size());
// Convert to normalized linear colorspace
{
for (int x = 0; x < Width; x++)
{
for (int y = 0; y < Height; y++)
{
uint32_t c8 = PixelsBgra[x * Height + y];
Color4f c;
c.a = powf(APART(c8) * (1.0f / 255.0f), 2.2f);
c.r = powf(RPART(c8) * (1.0f / 255.0f), 2.2f);
c.g = powf(GPART(c8) * (1.0f / 255.0f), 2.2f);
c.b = powf(BPART(c8) * (1.0f / 255.0f), 2.2f);
image[x * Height + y] = c;
}
}
}
// Generate mipmaps
{
std::vector<Color4f> smoothed(Width * Height);
Color4f *src = image.data();
Color4f *dest = src + Width * Height;
for (int i = 1; i < levels; i++)
{
int srcw = MAX(Width >> (i - 1), 1);
int srch = MAX(Height >> (i - 1), 1);
int w = MAX(Width >> i, 1);
int h = MAX(Height >> i, 1);
// Downscale
for (int x = 0; x < w; x++)
{
int sx0 = x * 2;
int sx1 = MIN((x + 1) * 2, srcw - 1);
for (int y = 0; y < h; y++)
{
int sy0 = y * 2;
int sy1 = MIN((y + 1) * 2, srch - 1);
Color4f src00 = src[sy0 + sx0 * srch];
Color4f src01 = src[sy1 + sx0 * srch];
Color4f src10 = src[sy0 + sx1 * srch];
Color4f src11 = src[sy1 + sx1 * srch];
Color4f c = (src00 + src01 + src10 + src11) * 0.25f;
dest[y + x * h] = src00;
}
}
// Sharpen filter with a 3x3 kernel:
for (int x = 0; x < w; x++)
{
for (int y = 0; y < h; y++)
{
Color4f c = { 0.0f, 0.0f, 0.0f, 0.0f };
for (int kx = -1; kx < 2; kx++)
{
for (int ky = -1; ky < 2; ky++)
{
int a = y + ky;
int b = x + kx;
if (a < 0) a = h - 1;
if (a == h) a = 0;
if (b < 0) b = w - 1;
if (b == w) b = 0;
c = c + dest[a + b * h];
}
}
c = c * (1.0f / 9.0f);
smoothed[y + x * h] = c;
}
}
float k = 0.04f;
for (int j = 0; j < w * h; j++)
dest[j] = dest[j] + (dest[j] - smoothed[j]) * k;
src = dest;
dest += w * h;
}
}
// Convert to bgra8 sRGB colorspace
{
Color4f *src = image.data() + Width * Height;
uint32_t *dest = PixelsBgra.data() + Width * Height;
for (int i = 1; i < levels; i++)
{
int w = MAX(Width >> i, 1);
int h = MAX(Height >> i, 1);
for (int j = 0; j < w * h; j++)
{
uint32_t a = (uint32_t)clamp(powf(src[j].a, 1.0f / 2.2f) * 255.0f + 0.5f, 0.0f, 255.0f);
uint32_t r = (uint32_t)clamp(powf(src[j].r, 1.0f / 2.2f) * 255.0f + 0.5f, 0.0f, 255.0f);
uint32_t g = (uint32_t)clamp(powf(src[j].g, 1.0f / 2.2f) * 255.0f + 0.5f, 0.0f, 255.0f);
uint32_t b = (uint32_t)clamp(powf(src[j].b, 1.0f / 2.2f) * 255.0f + 0.5f, 0.0f, 255.0f);
dest[j] = (a << 24) | (r << 16) | (g << 8) | b;
}
src += w * h;
dest += w * h;
}
}
}
void FTexture::GenerateBgraMipmapsFast()
{
uint32_t *src = PixelsBgra.data();
uint32_t *dest = src + Width * Height;
int levels = MipmapLevels();
for (int i = 1; i < levels; i++)
{
int srcw = MAX(Width >> (i - 1), 1);
int srch = MAX(Height >> (i - 1), 1);
int w = MAX(Width >> i, 1);
int h = MAX(Height >> i, 1);
for (int x = 0; x < w; x++)
{
int sx0 = x * 2;
int sx1 = MIN((x + 1) * 2, srcw - 1);
for (int y = 0; y < h; y++)
{
int sy0 = y * 2;
int sy1 = MIN((y + 1) * 2, srch - 1);
uint32_t src00 = src[sy0 + sx0 * srch];
uint32_t src01 = src[sy1 + sx0 * srch];
uint32_t src10 = src[sy0 + sx1 * srch];
uint32_t src11 = src[sy1 + sx1 * srch];
uint32_t alpha = (APART(src00) + APART(src01) + APART(src10) + APART(src11) + 2) / 4;
uint32_t red = (RPART(src00) + RPART(src01) + RPART(src10) + RPART(src11) + 2) / 4;
uint32_t green = (GPART(src00) + GPART(src01) + GPART(src10) + GPART(src11) + 2) / 4;
uint32_t blue = (BPART(src00) + BPART(src01) + BPART(src10) + BPART(src11) + 2) / 4;
dest[y + x * h] = (alpha << 24) | (red << 16) | (green << 8) | blue;
}
}
src = dest;
dest += w * h;
}
}
void FTexture::CopyToBlock (BYTE *dest, int dwidth, int dheight, int xpos, int ypos, int rotate, const BYTE *translation)
{
const BYTE *pixels = GetPixels();
@ -384,6 +620,29 @@ void FTexture::FlipSquareBlock (BYTE *block, int x, int y)
}
}
void FTexture::FlipSquareBlockBgra(uint32_t *block, int x, int y)
{
int i, j;
if (x != y) return;
for (i = 0; i < x; ++i)
{
uint32_t *corner = block + x*i + i;
int count = x - i;
if (count & 1)
{
count--;
swapvalues<uint32_t>(corner[count], corner[count*x]);
}
for (j = 0; j < count; j += 2)
{
swapvalues<uint32_t>(corner[j], corner[j*x]);
swapvalues<uint32_t>(corner[j + 1], corner[(j + 1)*x]);
}
}
}
void FTexture::FlipSquareBlockRemap (BYTE *block, int x, int y, const BYTE *remap)
{
int i, j;
@ -427,6 +686,19 @@ void FTexture::FlipNonSquareBlock (BYTE *dst, const BYTE *src, int x, int y, int
}
}
void FTexture::FlipNonSquareBlockBgra(uint32_t *dst, const uint32_t *src, int x, int y, int srcpitch)
{
int i, j;
for (i = 0; i < x; ++i)
{
for (j = 0; j < y; ++j)
{
dst[i*y + j] = src[i + j*srcpitch];
}
}
}
void FTexture::FlipNonSquareBlockRemap (BYTE *dst, const BYTE *src, int x, int y, int srcpitch, const BYTE *remap)
{
int i, j;
@ -580,10 +852,6 @@ FDummyTexture::FDummyTexture ()
UseType = TEX_Null;
}
void FDummyTexture::Unload ()
{
}
void FDummyTexture::SetSize (int width, int height)
{
Width = width;

View File

@ -3,6 +3,7 @@
#include "doomtype.h"
#include "vectors.h"
#include <vector>
struct FloatRect
{
@ -195,9 +196,18 @@ public:
// Returns a single column of the texture
virtual const BYTE *GetColumn (unsigned int column, const Span **spans_out) = 0;
// Returns a single column of the texture, in BGRA8 format
virtual const uint32_t *GetColumnBgra(unsigned int column, const Span **spans_out);
// Returns the whole texture, stored in column-major order
virtual const BYTE *GetPixels () = 0;
// Returns the whole texture, stored in column-major order, in BGRA8 format
virtual const uint32_t *GetPixelsBgra();
// Returns true if GetPixelsBgra includes mipmaps
virtual bool Mipmapped() { return true; }
virtual int CopyTrueColorPixels(FBitmap *bmp, int x, int y, int rotate=0, FCopyInfo *inf = NULL);
int CopyTrueColorTranslated(FBitmap *bmp, int x, int y, int rotate, FRemapTable *remap, FCopyInfo *inf = NULL);
virtual bool UseBasePalette();
@ -205,7 +215,7 @@ public:
virtual FTexture *GetRedirect(bool wantwarped);
virtual FTexture *GetRawTexture(); // for FMultiPatchTexture to override
virtual void Unload () = 0;
virtual void Unload ();
// Returns the native pixel format for this image
virtual FTextureFormat GetFormat();
@ -285,10 +295,20 @@ protected:
gl_info.areas = NULL;
}
std::vector<uint32_t> PixelsBgra;
void GenerateBgraFromBitmap(const FBitmap &bitmap);
void CreatePixelsBgraWithMipmaps();
void GenerateBgraMipmaps();
void GenerateBgraMipmapsFast();
int MipmapLevels() const;
public:
static void FlipSquareBlock (BYTE *block, int x, int y);
static void FlipSquareBlockBgra (uint32_t *block, int x, int y);
static void FlipSquareBlockRemap (BYTE *block, int x, int y, const BYTE *remap);
static void FlipNonSquareBlock (BYTE *blockto, const BYTE *blockfrom, int x, int y, int srcpitch);
static void FlipNonSquareBlockBgra (uint32_t *blockto, const uint32_t *blockfrom, int x, int y, int srcpitch);
static void FlipNonSquareBlockRemap (BYTE *blockto, const BYTE *blockfrom, int x, int y, int srcpitch, const BYTE *remap);
friend class D3DTex;
@ -520,7 +540,6 @@ public:
FDummyTexture ();
const BYTE *GetColumn (unsigned int column, const Span **spans_out);
const BYTE *GetPixels ();
void Unload ();
void SetSize (int width, int height);
};
@ -534,6 +553,7 @@ public:
virtual int CopyTrueColorPixels(FBitmap *bmp, int x, int y, int rotate=0, FCopyInfo *inf = NULL);
const BYTE *GetColumn (unsigned int column, const Span **spans_out);
const BYTE *GetPixels ();
const uint32_t *GetPixelsBgra() override;
void Unload ();
bool CheckModified ();
@ -567,21 +587,28 @@ public:
const BYTE *GetColumn (unsigned int column, const Span **spans_out);
const BYTE *GetPixels ();
const uint32_t *GetPixelsBgra() override;
void Unload ();
bool CheckModified ();
void NeedUpdate() { bNeedsUpdate=true; }
void SetUpdated() { bNeedsUpdate = false; bDidUpdate = true; bFirstUpdate = false; }
DSimpleCanvas *GetCanvas() { return Canvas; }
DSimpleCanvas *GetCanvasBgra() { return CanvasBgra; }
bool Mipmapped() override { return false; }
void MakeTexture ();
void MakeTextureBgra ();
protected:
DSimpleCanvas *Canvas;
BYTE *Pixels;
DSimpleCanvas *Canvas = nullptr;
DSimpleCanvas *CanvasBgra = nullptr;
BYTE *Pixels = nullptr;
uint32_t *PixelsBgra = nullptr;
Span DummySpans[2];
bool bNeedsUpdate;
bool bDidUpdate;
bool bPixelsAllocated;
bool bNeedsUpdate = true;
bool bDidUpdate = false;
bool bPixelsAllocated = false;
bool bPixelsAllocatedBgra = false;
public:
bool bFirstUpdate;

View File

@ -181,6 +181,7 @@ void FTGATexture::Unload ()
delete[] Pixels;
Pixels = NULL;
}
FTexture::Unload();
}
//==========================================================================

View File

@ -39,6 +39,7 @@
#include "r_utility.h"
#include "textures/textures.h"
#include "warpbuffer.h"
#include "v_palette.h"
FWarpTexture::FWarpTexture (FTexture *source, int warptype)
@ -74,6 +75,7 @@ void FWarpTexture::Unload ()
Spans = NULL;
}
SourcePic->Unload ();
FTexture::Unload();
}
bool FWarpTexture::CheckModified ()
@ -92,6 +94,25 @@ const BYTE *FWarpTexture::GetPixels ()
return Pixels;
}
const uint32_t *FWarpTexture::GetPixelsBgra()
{
DWORD time = r_FrameTime;
if (Pixels == NULL || time != GenTime)
{
MakeTexture(time);
CreatePixelsBgraWithMipmaps();
for (int i = 0; i < Width * Height; i++)
{
if (Pixels[i] != 0)
PixelsBgra[i] = 0xff000000 | GPalette.BaseColors[Pixels[i]].d;
else
PixelsBgra[i] = 0;
}
GenerateBgraMipmapsFast();
}
return PixelsBgra.data();
}
const BYTE *FWarpTexture::GetColumn (unsigned int column, const Span **spans_out)
{
DWORD time = r_FrameTime;

View File

@ -44,6 +44,7 @@
#include "r_utility.h"
#ifndef NO_SWRENDER
#include "r_draw.h"
#include "r_draw_rgba.h"
#include "r_main.h"
#include "r_things.h"
#endif
@ -137,6 +138,12 @@ void DCanvas::DrawTextureParms(FTexture *img, DrawParms &parms)
static short bottomclipper[MAXWIDTH], topclipper[MAXWIDTH];
const BYTE *translation = NULL;
if (r_swtruecolor != IsBgra())
{
r_swtruecolor = IsBgra();
R_InitColumnDrawers();
}
if (parms.masked)
{
spanptr = &spans;
@ -173,14 +180,14 @@ void DCanvas::DrawTextureParms(FTexture *img, DrawParms &parms)
if (translation != NULL)
{
dc_colormap = (lighttable_t *)translation;
R_SetTranslationMap((lighttable_t *)translation);
}
else
{
dc_colormap = identitymap;
R_SetTranslationMap(identitymap);
}
fixedcolormap = dc_colormap;
fixedcolormap = dc_fcolormap;
ESPSResult mode = R_SetPatchStyle (parms.style, parms.Alpha, 0, parms.fillcolor);
BYTE *destorgsave = dc_destorg;
@ -306,7 +313,7 @@ void DCanvas::DrawTextureParms(FTexture *img, DrawParms &parms)
while (dc_x < stop4)
{
rt_initcols();
rt_initcols(nullptr);
for (int zz = 4; zz; --zz)
{
pixels = img->GetColumn(frac >> FRACBITS, spanptr);
@ -1023,13 +1030,35 @@ void DCanvas::PUTTRANSDOT (int xx, int yy, int basecolor, int level)
oldyyshifted = yy * GetPitch();
}
BYTE *spot = GetBuffer() + oldyyshifted + xx;
DWORD *bg2rgb = Col2RGB8[1+level];
DWORD *fg2rgb = Col2RGB8[63-level];
DWORD fg = fg2rgb[basecolor];
DWORD bg = bg2rgb[*spot];
bg = (fg+bg) | 0x1f07c1f;
*spot = RGB32k.All[bg&(bg>>15)];
if (IsBgra())
{
uint32_t *spot = (uint32_t*)GetBuffer() + oldyyshifted + xx;
uint32_t fg = LightBgra::shade_pal_index_simple(basecolor, LightBgra::calc_light_multiplier(0));
uint32_t fg_red = (fg >> 16) & 0xff;
uint32_t fg_green = (fg >> 8) & 0xff;
uint32_t fg_blue = fg & 0xff;
uint32_t bg_red = (*spot >> 16) & 0xff;
uint32_t bg_green = (*spot >> 8) & 0xff;
uint32_t bg_blue = (*spot) & 0xff;
uint32_t red = (fg_red + bg_red + 1) / 2;
uint32_t green = (fg_green + bg_green + 1) / 2;
uint32_t blue = (fg_blue + bg_blue + 1) / 2;
*spot = 0xff000000 | (red << 16) | (green << 8) | blue;
}
else
{
BYTE *spot = GetBuffer() + oldyyshifted + xx;
DWORD *bg2rgb = Col2RGB8[1+level];
DWORD *fg2rgb = Col2RGB8[63-level];
DWORD fg = fg2rgb[basecolor];
DWORD bg = bg2rgb[*spot];
bg = (fg+bg) | 0x1f07c1f;
*spot = RGB32k.All[bg&(bg>>15)];
}
}
void DCanvas::DrawLine(int x0, int y0, int x1, int y1, int palColor, uint32 realcolor)
@ -1073,27 +1102,65 @@ void DCanvas::DrawLine(int x0, int y0, int x1, int y1, int palColor, uint32 real
{
swapvalues (x0, x1);
}
memset (GetBuffer() + y0*GetPitch() + x0, palColor, deltaX+1);
if (IsBgra())
{
uint32_t fillColor = GPalette.BaseColors[palColor].d;
uint32_t *spot = (uint32_t*)GetBuffer() + y0*GetPitch() + x0;
for (int i = 0; i <= deltaX; i++)
spot[i] = fillColor;
}
else
{
memset (GetBuffer() + y0*GetPitch() + x0, palColor, deltaX+1);
}
}
else if (deltaX == 0)
{ // vertical line
BYTE *spot = GetBuffer() + y0*GetPitch() + x0;
int pitch = GetPitch ();
do
if (IsBgra())
{
*spot = palColor;
spot += pitch;
} while (--deltaY != 0);
uint32_t fillColor = GPalette.BaseColors[palColor].d;
uint32_t *spot = (uint32_t*)GetBuffer() + y0*GetPitch() + x0;
int pitch = GetPitch();
do
{
*spot = fillColor;
spot += pitch;
} while (--deltaY != 0);
}
else
{
BYTE *spot = GetBuffer() + y0*GetPitch() + x0;
int pitch = GetPitch();
do
{
*spot = palColor;
spot += pitch;
} while (--deltaY != 0);
}
}
else if (deltaX == deltaY)
{ // diagonal line.
BYTE *spot = GetBuffer() + y0*GetPitch() + x0;
int advance = GetPitch() + xDir;
do
if (IsBgra())
{
*spot = palColor;
spot += advance;
} while (--deltaY != 0);
uint32_t fillColor = GPalette.BaseColors[palColor].d;
uint32_t *spot = (uint32_t*)GetBuffer() + y0*GetPitch() + x0;
int advance = GetPitch() + xDir;
do
{
*spot = fillColor;
spot += advance;
} while (--deltaY != 0);
}
else
{
BYTE *spot = GetBuffer() + y0*GetPitch() + x0;
int advance = GetPitch() + xDir;
do
{
*spot = palColor;
spot += advance;
} while (--deltaY != 0);
}
}
else
{
@ -1213,7 +1280,6 @@ void DCanvas::DrawPixel(int x, int y, int palColor, uint32 realcolor)
void DCanvas::Clear (int left, int top, int right, int bottom, int palcolor, uint32 color)
{
int x, y;
BYTE *dest;
if (left == right || top == bottom)
{
@ -1243,12 +1309,28 @@ void DCanvas::Clear (int left, int top, int right, int bottom, int palcolor, uin
palcolor = PalFromRGB(color);
}
dest = Buffer + top * Pitch + left;
x = right - left;
for (y = top; y < bottom; y++)
if (IsBgra())
{
memset(dest, palcolor, x);
dest += Pitch;
uint32_t fill_color = GPalette.BaseColors[palcolor];
uint32_t *dest = (uint32_t*)Buffer + top * Pitch + left;
x = right - left;
for (y = top; y < bottom; y++)
{
for (int i = 0; i < x; i++)
dest[i] = fill_color;
dest += Pitch;
}
}
else
{
BYTE *dest = Buffer + top * Pitch + left;
x = right - left;
for (y = top; y < bottom; y++)
{
memset(dest, palcolor, x);
dest += Pitch;
}
}
}
@ -1339,8 +1421,11 @@ void DCanvas::FillSimplePoly(FTexture *tex, FVector2 *points, int npoints,
// Setup constant texture mapping parameters.
R_SetupSpanBits(tex);
R_SetSpanColormap(colormap != NULL ? &colormap->Maps[clamp(shade >> FRACBITS, 0, NUMCOLORMAPS-1) * 256] : identitymap);
R_SetSpanSource(tex->GetPixels());
if (colormap)
R_SetSpanColormap(colormap, clamp(shade >> FRACBITS, 0, NUMCOLORMAPS - 1));
else
R_SetSpanColormap(&identitycolormap, 0);
R_SetSpanSource(tex);
scalex = double(1u << (32 - ds_xbits)) / scalex;
scaley = double(1u << (32 - ds_ybits)) / scaley;
ds_xstep = xs_RoundToInt(cosrot * scalex);
@ -1449,6 +1534,9 @@ void DCanvas::FillSimplePoly(FTexture *tex, FVector2 *points, int npoints,
//
void DCanvas::DrawBlock (int x, int y, int _width, int _height, const BYTE *src) const
{
if (IsBgra())
return;
int srcpitch = _width;
int destpitch;
BYTE *dest;
@ -1475,6 +1563,9 @@ void DCanvas::DrawBlock (int x, int y, int _width, int _height, const BYTE *src)
//
void DCanvas::GetBlock (int x, int y, int _width, int _height, BYTE *dest) const
{
if (IsBgra())
return;
const BYTE *src;
#ifdef RANGECHECK

View File

@ -1634,6 +1634,7 @@ void FFontChar1::Unload ()
delete[] Pixels;
Pixels = NULL;
}
FTexture::Unload();
}
//==========================================================================
@ -1695,6 +1696,7 @@ void FFontChar2::Unload ()
delete[] Pixels;
Pixels = NULL;
}
FTexture::Unload();
}
//==========================================================================

View File

@ -71,6 +71,7 @@ FRenderer *Renderer;
IMPLEMENT_ABSTRACT_CLASS (DCanvas)
IMPLEMENT_ABSTRACT_CLASS (DFrameBuffer)
EXTERN_CVAR (Bool, swtruecolor)
#if defined(_DEBUG) && defined(_M_IX86)
#define DBGBREAK { __asm int 3 }
@ -83,7 +84,7 @@ class DDummyFrameBuffer : public DFrameBuffer
DECLARE_CLASS (DDummyFrameBuffer, DFrameBuffer);
public:
DDummyFrameBuffer (int width, int height)
: DFrameBuffer (0, 0)
: DFrameBuffer (0, 0, false)
{
Width = width;
Height = height;
@ -119,7 +120,6 @@ public:
const BYTE *GetColumn(unsigned int column, const Span **spans_out);
const BYTE *GetPixels();
void Unload();
bool CheckModified();
void SetTranslation(int num);
@ -208,13 +208,14 @@ DCanvas *DCanvas::CanvasChain = NULL;
//
//==========================================================================
DCanvas::DCanvas (int _width, int _height)
DCanvas::DCanvas (int _width, int _height, bool _bgra)
{
// Init member vars
Buffer = NULL;
LockCount = 0;
Width = _width;
Height = _height;
Bgra = _bgra;
// Add to list of active canvases
Next = CanvasChain;
@ -344,10 +345,7 @@ void DCanvas::Dim (PalEntry color, float damount, int x1, int y1, int w, int h)
if (damount == 0.f)
return;
DWORD *bg2rgb;
DWORD fg;
int gap;
BYTE *spot;
int x, y;
if (x1 >= Width || y1 >= Height)
@ -367,31 +365,73 @@ void DCanvas::Dim (PalEntry color, float damount, int x1, int y1, int w, int h)
return;
}
{
int amount;
amount = (int)(damount * 64);
bg2rgb = Col2RGB8[64-amount];
fg = (((color.r * amount) >> 4) << 20) |
((color.g * amount) >> 4) |
(((color.b * amount) >> 4) << 10);
}
spot = Buffer + x1 + y1*Pitch;
gap = Pitch - w;
for (y = h; y != 0; y--)
{
for (x = w; x != 0; x--)
{
DWORD bg;
bg = bg2rgb[(*spot)&0xff];
bg = (fg+bg) | 0x1f07c1f;
*spot = RGB32k.All[bg&(bg>>15)];
spot++;
if (IsBgra())
{
uint32_t *spot = (uint32_t*)Buffer + x1 + y1*Pitch;
uint32_t fg = color.d;
uint32_t fg_red = (fg >> 16) & 0xff;
uint32_t fg_green = (fg >> 8) & 0xff;
uint32_t fg_blue = fg & 0xff;
uint32_t alpha = (uint32_t)clamp(damount * 256 + 0.5f, 0.0f, 256.0f);
uint32_t inv_alpha = 256 - alpha;
fg_red *= alpha;
fg_green *= alpha;
fg_blue *= alpha;
for (y = h; y != 0; y--)
{
for (x = w; x != 0; x--)
{
uint32_t bg_red = (*spot >> 16) & 0xff;
uint32_t bg_green = (*spot >> 8) & 0xff;
uint32_t bg_blue = (*spot) & 0xff;
uint32_t red = (fg_red + bg_red * inv_alpha) / 256;
uint32_t green = (fg_green + bg_green * inv_alpha) / 256;
uint32_t blue = (fg_blue + bg_blue * inv_alpha) / 256;
*spot = 0xff000000 | (red << 16) | (green << 8) | blue;
spot++;
}
spot += gap;
}
}
else
{
BYTE *spot = Buffer + x1 + y1*Pitch;
DWORD *bg2rgb;
DWORD fg;
{
int amount;
amount = (int)(damount * 64);
bg2rgb = Col2RGB8[64-amount];
fg = (((color.r * amount) >> 4) << 20) |
((color.g * amount) >> 4) |
(((color.b * amount) >> 4) << 10);
}
for (y = h; y != 0; y--)
{
for (x = w; x != 0; x--)
{
DWORD bg;
bg = bg2rgb[(*spot)&0xff];
bg = (fg+bg) | 0x1f07c1f;
*spot = RGB32k.All[bg&(bg>>15)];
spot++;
}
spot += gap;
}
spot += gap;
}
}
@ -408,8 +448,8 @@ void DCanvas::GetScreenshotBuffer(const BYTE *&buffer, int &pitch, ESSType &colo
{
Lock(true);
buffer = GetBuffer();
pitch = GetPitch();
color_type = SS_PAL;
pitch = IsBgra() ? GetPitch() * 4 : GetPitch();
color_type = IsBgra() ? SS_BGRA : SS_PAL;
}
//==========================================================================
@ -704,13 +744,12 @@ void DCanvas::CalcGamma (float gamma, BYTE gammalookup[256])
// I found this formula on the web at
// <http://panda.mostang.com/sane/sane-gamma.html>,
// but that page no longer exits.
double invgamma = 1.f / gamma;
int i;
for (i = 0; i < 256; i++)
{
gammalookup[i] = (BYTE)(255.0 * pow (i / 255.0, invgamma));
gammalookup[i] = (BYTE)(255.0 * pow (i / 255.0, invgamma) + 0.5);
}
}
@ -722,8 +761,8 @@ void DCanvas::CalcGamma (float gamma, BYTE gammalookup[256])
//
//==========================================================================
DSimpleCanvas::DSimpleCanvas (int width, int height)
: DCanvas (width, height)
DSimpleCanvas::DSimpleCanvas (int width, int height, bool bgra)
: DCanvas (width, height, bgra)
{
MemBuffer = nullptr;
Resize(width, height);
@ -775,8 +814,9 @@ void DSimpleCanvas::Resize(int width, int height)
Pitch = width + MAX(0, CPU.DataL1LineSize - 8);
}
}
MemBuffer = new BYTE[Pitch * height];
memset(MemBuffer, 0, Pitch * height);
int bytes_per_pixel = swtruecolor ? 4 : 1;
MemBuffer = new BYTE[Pitch * height * bytes_per_pixel];
memset (MemBuffer, 0, Pitch * height * bytes_per_pixel);
}
//==========================================================================
@ -845,8 +885,8 @@ void DSimpleCanvas::Unlock ()
//
//==========================================================================
DFrameBuffer::DFrameBuffer (int width, int height)
: DSimpleCanvas (width, height)
DFrameBuffer::DFrameBuffer (int width, int height, bool bgra)
: DSimpleCanvas (width, height, bgra)
{
LastMS = LastSec = FrameCount = LastCount = LastTic = 0;
Accel2D = false;
@ -855,6 +895,70 @@ DFrameBuffer::DFrameBuffer (int width, int height)
VideoHeight = height;
}
//==========================================================================
//
// DFrameBuffer :: PostprocessBgra
//
// Copies data to destination buffer while performing gamma and flash.
// This is only needed if a target cannot do this with shaders.
//
//==========================================================================
void DFrameBuffer::CopyWithGammaBgra(void *output, int pitch, const BYTE *gammared, const BYTE *gammagreen, const BYTE *gammablue, PalEntry flash, int flash_amount)
{
const BYTE *gammatables[3] = { gammared, gammagreen, gammablue };
if (flash_amount > 0)
{
uint16_t inv_flash_amount = 256 - flash_amount;
uint16_t flash_red = flash.r * flash_amount;
uint16_t flash_green = flash.g * flash_amount;
uint16_t flash_blue = flash.b * flash_amount;
for (int y = 0; y < Height; y++)
{
BYTE *dest = (BYTE*)output + y * pitch;
BYTE *src = MemBuffer + y * Pitch * 4;
for (int x = 0; x < Width; x++)
{
uint16_t fg_red = src[2];
uint16_t fg_green = src[1];
uint16_t fg_blue = src[0];
uint16_t red = (fg_red * inv_flash_amount + flash_red) >> 8;
uint16_t green = (fg_green * inv_flash_amount + flash_green) >> 8;
uint16_t blue = (fg_blue * inv_flash_amount + flash_blue) >> 8;
dest[0] = gammatables[2][blue];
dest[1] = gammatables[1][green];
dest[2] = gammatables[0][red];
dest[3] = 0xff;
dest += 4;
src += 4;
}
}
}
else
{
for (int y = 0; y < Height; y++)
{
BYTE *dest = (BYTE*)output + y * pitch;
BYTE *src = MemBuffer + y * Pitch * 4;
for (int x = 0; x < Width; x++)
{
dest[0] = gammatables[2][src[0]];
dest[1] = gammatables[1][src[1]];
dest[2] = gammatables[0][src[2]];
dest[3] = 0xff;
dest += 4;
src += 4;
}
}
}
}
//==========================================================================
//
// DFrameBuffer :: DrawRateStuff
@ -916,10 +1020,21 @@ void DFrameBuffer::DrawRateStuff ()
// Buffer can be NULL if we're doing hardware accelerated 2D
if (buffer != NULL)
{
buffer += (GetHeight()-1) * GetPitch();
if (IsBgra())
{
uint32_t *buffer32 = (uint32_t*)buffer;
buffer32 += (GetHeight() - 1) * GetPitch();
for (i = 0; i < tics*2; i += 2) buffer[i] = 0xff;
for ( ; i < 20*2; i += 2) buffer[i] = 0x00;
for (i = 0; i < tics * 2; i += 2) buffer32[i] = 0xffffffff;
for (; i < 20 * 2; i += 2) buffer32[i] = 0xff000000;
}
else
{
buffer += (GetHeight() - 1) * GetPitch();
for (i = 0; i < tics * 2; i += 2) buffer[i] = 0xff;
for (; i < 20 * 2; i += 2) buffer[i] = 0x00;
}
}
else
{
@ -992,16 +1107,6 @@ void FPaletteTester::SetTranslation(int num)
}
}
//==========================================================================
//
// FPaletteTester :: Unload
//
//==========================================================================
void FPaletteTester::Unload()
{
}
//==========================================================================
//
// FPaletteTester :: GetColumn

View File

@ -187,7 +187,7 @@ class DCanvas : public DObject
{
DECLARE_ABSTRACT_CLASS (DCanvas, DObject)
public:
DCanvas (int width, int height);
DCanvas (int width, int height, bool bgra);
virtual ~DCanvas ();
// Member variable access
@ -195,6 +195,7 @@ public:
inline int GetWidth () const { return Width; }
inline int GetHeight () const { return Height; }
inline int GetPitch () const { return Pitch; }
inline bool IsBgra() const { return Bgra; }
virtual bool IsValid ();
@ -272,6 +273,7 @@ protected:
int Height;
int Pitch;
int LockCount;
bool Bgra;
bool ClipBox (int &left, int &top, int &width, int &height, const BYTE *&src, const int srcpitch) const;
void DrawTextureV(FTexture *img, double x, double y, uint32 tag, va_list tags) = delete;
@ -294,7 +296,7 @@ class DSimpleCanvas : public DCanvas
{
DECLARE_CLASS (DSimpleCanvas, DCanvas)
public:
DSimpleCanvas (int width, int height);
DSimpleCanvas (int width, int height, bool bgra);
~DSimpleCanvas ();
bool IsValid ();
@ -334,7 +336,7 @@ class DFrameBuffer : public DSimpleCanvas
{
DECLARE_ABSTRACT_CLASS (DFrameBuffer, DSimpleCanvas)
public:
DFrameBuffer (int width, int height);
DFrameBuffer (int width, int height, bool bgra);
// Force the surface to use buffered output if true is passed.
virtual bool Lock (bool buffered) = 0;
@ -429,6 +431,7 @@ public:
protected:
void DrawRateStuff ();
void CopyFromBuff (BYTE *src, int srcPitch, int width, int height, BYTE *dest);
void CopyWithGammaBgra(void *output, int pitch, const BYTE *gammared, const BYTE *gammagreen, const BYTE *gammablue, PalEntry flash, int flash_amount);
DFrameBuffer () {}

View File

@ -41,12 +41,12 @@ const char *GetVersionString();
/** Lots of different version numbers **/
#define VERSIONSTR "2.3pre"
#define VERSIONSTR "0.0pre"
// The version as seen in the Windows resource
#define RC_FILEVERSION 2,3,9999,0
#define RC_PRODUCTVERSION 2,3,9999,0
#define RC_PRODUCTVERSION2 "2.3pre"
#define RC_FILEVERSION 0,0,9999,0
#define RC_PRODUCTVERSION 0,0,9999,0
#define RC_PRODUCTVERSION2 "0.0pre"
// Version identifier for network games.
// Bump it every time you do a release unless you're certain you
@ -84,12 +84,12 @@ const char *GetVersionString();
#define DYNLIGHT
// This is so that derivates can use the same savegame versions without worrying about engine compatibility
#define GAMESIG "GZDOOM"
#define BASEWAD "gzdoom.pk3"
#define GAMESIG "QZDOOM"
#define BASEWAD "qzdoom.pk3"
// More stuff that needs to be different for derivatives.
#define GAMENAME "GZDoom"
#define GAMENAMELOWERCASE "gzdoom"
#define GAMENAME "QZDoom"
#define GAMENAMELOWERCASE "qzdoom"
#define FORUM_URL "http://forum.drdteam.org"
#define BUGS_FORUM_URL "http://forum.drdteam.org/viewforum.php?f=24"

View File

@ -242,8 +242,8 @@ CVAR(Bool, vid_hwaalines, true, CVAR_ARCHIVE|CVAR_GLOBALCONFIG)
//
//==========================================================================
D3DFB::D3DFB (UINT adapter, int width, int height, bool fullscreen)
: BaseWinFB (width, height)
D3DFB::D3DFB (UINT adapter, int width, int height, bool bgra, bool fullscreen)
: BaseWinFB (width, height, bgra)
{
D3DPRESENT_PARAMETERS d3dpp;
@ -765,14 +765,16 @@ void D3DFB::KillNativeTexs()
bool D3DFB::CreateFBTexture ()
{
if (FAILED(D3DDevice->CreateTexture(Width, Height, 1, D3DUSAGE_DYNAMIC, D3DFMT_L8, D3DPOOL_DEFAULT, &FBTexture, NULL)))
FBFormat = IsBgra() ? D3DFMT_A8R8G8B8 : D3DFMT_L8;
if (FAILED(D3DDevice->CreateTexture(Width, Height, 1, D3DUSAGE_DYNAMIC, FBFormat, D3DPOOL_DEFAULT, &FBTexture, NULL)))
{
int pow2width, pow2height, i;
for (i = 1; i < Width; i <<= 1) {} pow2width = i;
for (i = 1; i < Height; i <<= 1) {} pow2height = i;
if (FAILED(D3DDevice->CreateTexture(pow2width, pow2height, 1, D3DUSAGE_DYNAMIC, D3DFMT_L8, D3DPOOL_DEFAULT, &FBTexture, NULL)))
if (FAILED(D3DDevice->CreateTexture(pow2width, pow2height, 1, D3DUSAGE_DYNAMIC, FBFormat, D3DPOOL_DEFAULT, &FBTexture, NULL)))
{
return false;
}
@ -1322,20 +1324,45 @@ void D3DFB::Draw3DPart(bool copy3d)
SUCCEEDED(FBTexture->LockRect (0, &lockrect, NULL, D3DLOCK_DISCARD))) ||
SUCCEEDED(FBTexture->LockRect (0, &lockrect, &texrect, 0)))
{
if (lockrect.Pitch == Pitch && Pitch == Width)
if (IsBgra() && FBFormat == D3DFMT_A8R8G8B8)
{
memcpy (lockrect.pBits, MemBuffer, Width * Height);
if (lockrect.Pitch == Pitch * sizeof(uint32_t) && Pitch == Width)
{
memcpy(lockrect.pBits, MemBuffer, Width * Height * sizeof(uint32_t));
}
else
{
uint32_t *dest = (uint32_t *)lockrect.pBits;
uint32_t *src = (uint32_t*)MemBuffer;
for (int y = 0; y < Height; y++)
{
memcpy(dest, src, Width * sizeof(uint32_t));
dest = reinterpret_cast<uint32_t*>(reinterpret_cast<uint8_t*>(dest) + lockrect.Pitch);
src += Pitch;
}
}
}
else if (!IsBgra() && FBFormat == D3DFMT_L8)
{
if (lockrect.Pitch == Pitch && Pitch == Width)
{
memcpy(lockrect.pBits, MemBuffer, Width * Height);
}
else
{
BYTE *dest = (BYTE *)lockrect.pBits;
BYTE *src = (BYTE *)MemBuffer;
for (int y = 0; y < Height; y++)
{
memcpy(dest, src, Width);
dest = reinterpret_cast<BYTE*>(reinterpret_cast<uint8_t*>(dest) + lockrect.Pitch);
src += Pitch;
}
}
}
else
{
BYTE *dest = (BYTE *)lockrect.pBits;
BYTE *src = MemBuffer;
for (int y = 0; y < Height; y++)
{
memcpy (dest, src, Width);
dest += lockrect.Pitch;
src += Pitch;
}
memset(lockrect.pBits, 0, lockrect.Pitch * Height);
}
FBTexture->UnlockRect (0);
}
@ -1367,7 +1394,10 @@ void D3DFB::Draw3DPart(bool copy3d)
memset(Constant, 0, sizeof(Constant));
SetAlphaBlend(D3DBLENDOP(0));
EnableAlphaTest(FALSE);
SetPixelShader(Shaders[SHADER_NormalColorPal]);
if (IsBgra())
SetPixelShader(Shaders[SHADER_NormalColor]);
else
SetPixelShader(Shaders[SHADER_NormalColorPal]);
if (copy3d)
{
FBVERTEX verts[4];
@ -1385,7 +1415,10 @@ void D3DFB::Draw3DPart(bool copy3d)
realfixedcolormap->ColorizeStart[1]/2, realfixedcolormap->ColorizeStart[2]/2, 0);
color1 = D3DCOLOR_COLORVALUE(realfixedcolormap->ColorizeEnd[0]/2,
realfixedcolormap->ColorizeEnd[1]/2, realfixedcolormap->ColorizeEnd[2]/2, 1);
SetPixelShader(Shaders[SHADER_SpecialColormapPal]);
if (IsBgra())
SetPixelShader(Shaders[SHADER_SpecialColormap]);
else
SetPixelShader(Shaders[SHADER_SpecialColormapPal]);
}
}
else
@ -1396,7 +1429,10 @@ void D3DFB::Draw3DPart(bool copy3d)
CalcFullscreenCoords(verts, Accel2D, false, color0, color1);
D3DDevice->DrawPrimitiveUP(D3DPT_TRIANGLEFAN, 2, verts, sizeof(FBVERTEX));
}
SetPixelShader(Shaders[SHADER_NormalColorPal]);
if (IsBgra())
SetPixelShader(Shaders[SHADER_NormalColor]);
else
SetPixelShader(Shaders[SHADER_NormalColorPal]);
}
//==========================================================================

View File

@ -32,7 +32,6 @@
**
*/
// HEADER FILES ------------------------------------------------------------
#define DIRECTDRAW_VERSION 0x0300
@ -120,7 +119,7 @@ cycle_t BlitCycles;
// CODE --------------------------------------------------------------------
DDrawFB::DDrawFB (int width, int height, bool fullscreen)
: BaseWinFB (width, height)
: BaseWinFB (width, height, false)
{
int i;

View File

@ -51,6 +51,7 @@
EXTERN_CVAR (Bool, ticker)
EXTERN_CVAR (Bool, fullscreen)
EXTERN_CVAR (Bool, swtruecolor)
EXTERN_CVAR (Float, vid_winscale)
CVAR(Int, win_x, -1, CVAR_ARCHIVE | CVAR_GLOBALCONFIG)
@ -71,7 +72,7 @@ int currentrenderer = -1;
bool changerenderer;
// [ZDoomGL]
CUSTOM_CVAR (Int, vid_renderer, 1, CVAR_ARCHIVE | CVAR_GLOBALCONFIG | CVAR_NOINITCALL)
CUSTOM_CVAR (Int, vid_renderer, 0, CVAR_ARCHIVE | CVAR_GLOBALCONFIG | CVAR_NOINITCALL)
{
// 0: Software renderer
// 1: OpenGL renderer
@ -190,7 +191,7 @@ DFrameBuffer *I_SetMode (int &width, int &height, DFrameBuffer *old)
}
break;
}
DFrameBuffer *res = Video->CreateFrameBuffer (width, height, fs, old);
DFrameBuffer *res = Video->CreateFrameBuffer (width, height, swtruecolor, fs, old);
//* Right now, CreateFrameBuffer cannot return NULL
if (res == NULL)
@ -357,6 +358,16 @@ void I_RestoreWindowedPos ()
extern int NewWidth, NewHeight, NewBits, DisplayBits;
CUSTOM_CVAR(Bool, swtruecolor, true, CVAR_ARCHIVE|CVAR_GLOBALCONFIG|CVAR_NOINITCALL)
{
// Strictly speaking this doesn't require a mode switch, but it is the easiest
// way to force a CreateFramebuffer call without a lot of refactoring.
NewWidth = screen->GetWidth();
NewHeight = screen->GetHeight();
NewBits = DisplayBits;
setmodeneeded = true;
}
CUSTOM_CVAR (Bool, fullscreen, false, CVAR_ARCHIVE|CVAR_GLOBALCONFIG|CVAR_NOINITCALL)
{
NewWidth = screen->GetWidth();

View File

@ -45,7 +45,7 @@ class IVideo
virtual EDisplayType GetDisplayType () = 0;
virtual void SetWindowedScale (float scale) = 0;
virtual DFrameBuffer *CreateFrameBuffer (int width, int height, bool fs, DFrameBuffer *old) = 0;
virtual DFrameBuffer *CreateFrameBuffer (int width, int height, bool bgra, bool fs, DFrameBuffer *old) = 0;
virtual void StartModeIterator (int bits, bool fs) = 0;
virtual bool NextMode (int *width, int *height, bool *letterbox) = 0;

View File

@ -346,7 +346,8 @@ bool Win32GLVideo::GoFullscreen(bool yes)
//
//==========================================================================
DFrameBuffer *Win32GLVideo::CreateFrameBuffer(int width, int height, bool fs, DFrameBuffer *old)
DFrameBuffer *Win32GLVideo::CreateFrameBuffer(int width, int height, bool bgra, bool fs, DFrameBuffer *old)
{
Win32GLFrameBuffer *fb;
@ -860,7 +861,7 @@ IMPLEMENT_ABSTRACT_CLASS(Win32GLFrameBuffer)
//
//==========================================================================
Win32GLFrameBuffer::Win32GLFrameBuffer(void *hMonitor, int width, int height, int bits, int refreshHz, bool fullscreen) : BaseWinFB(width, height)
Win32GLFrameBuffer::Win32GLFrameBuffer(void *hMonitor, int width, int height, int bits, int refreshHz, bool fullscreen) : BaseWinFB(width, height, false)
{
m_Width = width;
m_Height = height;

View File

@ -38,7 +38,7 @@ public:
void StartModeIterator (int bits, bool fs);
bool NextMode (int *width, int *height, bool *letterbox);
bool GoFullscreen(bool yes);
DFrameBuffer *CreateFrameBuffer (int width, int height, bool fs, DFrameBuffer *old);
DFrameBuffer *CreateFrameBuffer (int width, int height, bool bgra, bool fs, DFrameBuffer *old);
virtual bool SetResolution (int width, int height, int bits);
void DumpAdapters();
bool InitHardware (HWND Window, int multisample);

View File

@ -70,7 +70,7 @@ class Win32Video : public IVideo
EDisplayType GetDisplayType () { return DISPLAY_Both; }
void SetWindowedScale (float scale);
DFrameBuffer *CreateFrameBuffer (int width, int height, bool fs, DFrameBuffer *old);
DFrameBuffer *CreateFrameBuffer (int width, int height, bool bgra, bool fs, DFrameBuffer *old);
void StartModeIterator (int bits, bool fs);
bool NextMode (int *width, int *height, bool *letterbox);
@ -121,7 +121,7 @@ class BaseWinFB : public DFrameBuffer
{
DECLARE_ABSTRACT_CLASS(BaseWinFB, DFrameBuffer)
public:
BaseWinFB (int width, int height) : DFrameBuffer (width, height), Windowed (true) {}
BaseWinFB (int width, int height, bool bgra) : DFrameBuffer (width, height, bgra), Windowed (true) {}
bool IsFullscreen () { return !Windowed; }
virtual void Blank () = 0;
@ -228,7 +228,7 @@ class D3DFB : public BaseWinFB
{
DECLARE_CLASS(D3DFB, BaseWinFB)
public:
D3DFB (UINT adapter, int width, int height, bool fullscreen);
D3DFB (UINT adapter, int width, int height, bool bgra, bool fullscreen);
~D3DFB ();
bool IsValid ();
@ -422,6 +422,7 @@ private:
bool NeedPalUpdate;
bool NeedGammaUpdate;
int FBWidth, FBHeight;
D3DFORMAT FBFormat;
bool VSync;
RECT BlendingRect;
int In2D;

View File

@ -629,7 +629,7 @@ bool Win32Video::NextMode (int *width, int *height, bool *letterbox)
return false;
}
DFrameBuffer *Win32Video::CreateFrameBuffer (int width, int height, bool fullscreen, DFrameBuffer *old)
DFrameBuffer *Win32Video::CreateFrameBuffer (int width, int height, bool bgra, bool fullscreen, DFrameBuffer *old)
{
static int retry = 0;
static int owidth, oheight;
@ -645,7 +645,8 @@ DFrameBuffer *Win32Video::CreateFrameBuffer (int width, int height, bool fullscr
BaseWinFB *fb = static_cast<BaseWinFB *> (old);
if (fb->Width == width &&
fb->Height == height &&
fb->Windowed == !fullscreen)
fb->Windowed == !fullscreen &&
fb->Bgra == bgra)
{
return old;
}
@ -662,12 +663,13 @@ DFrameBuffer *Win32Video::CreateFrameBuffer (int width, int height, bool fullscr
if (D3D != NULL)
{
fb = new D3DFB (m_Adapter, width, height, fullscreen);
fb = new D3DFB (m_Adapter, width, height, bgra, fullscreen);
}
else
{
fb = new DDrawFB (width, height, fullscreen);
}
LOG1 ("New fb created @ %p\n", fb);
// If we could not create the framebuffer, try again with slightly
@ -726,7 +728,7 @@ DFrameBuffer *Win32Video::CreateFrameBuffer (int width, int height, bool fullscr
}
++retry;
fb = static_cast<DDrawFB *>(CreateFrameBuffer (width, height, fullscreen, NULL));
fb = static_cast<DDrawFB *>(CreateFrameBuffer (width, height, bgra, fullscreen, NULL));
}
retry = 0;

View File

@ -72,13 +72,13 @@ BEGIN
" BEGIN\r\n"
" VALUE ""Comments"", ""Thanks to id Software for creating DOOM and then releasing the source code. Thanks also to TeamTNT for creating BOOM, which ZDoom is partially based on. Includes code based on the Cajun Bot 0.97 by Martin Collberg.""\r\n"
" VALUE ""CompanyName"", "" ""\r\n"
" VALUE ""FileDescription"", ""GZDoom""\r\n"
" VALUE ""FileDescription"", ""QZDoom""\r\n"
" VALUE ""FileVersion"", RC_FILEVERSION2\r\n"
" VALUE ""InternalName"", ""GZDoom""\r\n"
" VALUE ""InternalName"", ""QZDoom""\r\n"
" VALUE ""LegalCopyright"", ""Copyright \\u00A9 1993-1996 id Software, 1998-2010 Randy Heit, 2002-2010 Christoph Oelckers, et al.""\r\n"
" VALUE ""LegalTrademarks"", ""DoomR is a Registered Trademark of id Software, Inc.""\r\n"
" VALUE ""OriginalFilename"", ""gzdoom.exe""\r\n"
" VALUE ""ProductName"", ""GZDoom""\r\n"
" VALUE ""OriginalFilename"", ""qzdoom.exe""\r\n"
" VALUE ""ProductName"", ""QZDoom""\r\n"
" VALUE ""ProductVersion"", RC_PRODUCTVERSION2\r\n"
" END\r\n"
" END\r\n"
@ -492,13 +492,13 @@ BEGIN
BEGIN
VALUE "Comments", "Thanks to id Software for creating DOOM and then releasing the source code. Thanks also to TeamTNT for creating BOOM, which ZDoom is partially based on. Includes code based on the Cajun Bot 0.97 by Martin Collberg."
VALUE "CompanyName", " "
VALUE "FileDescription", "GZDoom"
VALUE "FileDescription", "QZDoom"
VALUE "FileVersion", RC_FILEVERSION2
VALUE "InternalName", "GZDoom"
VALUE "InternalName", "QZDoom"
VALUE "LegalCopyright", "Copyright \u00A9 1993-1996 id Software, 1998-2010 Randy Heit, 2002-2010 Christoph Oelckers, et al."
VALUE "LegalTrademarks", "DoomR is a Registered Trademark of id Software, Inc."
VALUE "OriginalFilename", "gzdoom.exe"
VALUE "ProductName", "GZDoom"
VALUE "OriginalFilename", "qzdoom.exe"
VALUE "ProductName", "QZDoom"
VALUE "ProductVersion", RC_PRODUCTVERSION2
END
END

View File

@ -1,3 +1,3 @@
cmake_minimum_required( VERSION 2.8.7 )
add_pk3(gzdoom.pk3 ${CMAKE_CURRENT_SOURCE_DIR}/static)
add_pk3(qzdoom.pk3 ${CMAKE_CURRENT_SOURCE_DIR}/static)

View File

@ -1780,6 +1780,10 @@ DSPLYMNU_BRIGHTNESS = "Brightness";
DSPLYMNU_VSYNC = "Vertical Sync";
DSPLYMNU_CAPFPS = "Rendering Interpolation";
DSPLYMNU_COLUMNMETHOD = "Column render mode";
DSPLYMNU_TRUECOLOR = "True color output";
DSPLYMNU_MINFILTER = "Linear filter when downscaling";
DSPLYMNU_MAGFILTER = "Linear filter when upscaling";
DSPLYMNU_MIPMAP = "Use mipmapped textures";
DSPLYMNU_WIPETYPE = "Screen wipe style";
DSPLYMNU_SHOWENDOOM = "Show ENDOOM screen";
DSPLYMNU_PALLETEHACK = "DirectDraw palette hack"; // Not used

View File

@ -674,6 +674,10 @@ OptionMenu "VideoOptions"
Option "$DSPLYMNU_VSYNC", "vid_vsync", "OnOff"
Option "$DSPLYMNU_CAPFPS", "cl_capfps", "OffOn"
Option "$DSPLYMNU_TRUECOLOR", "swtruecolor", "OnOff"
Option "$DSPLYMNU_MINFILTER", "r_minfilter", "OnOff"
Option "$DSPLYMNU_MAGFILTER", "r_magfilter", "OnOff"
Option "$DSPLYMNU_MIPMAP", "r_mipmap", "OnOff"
StaticText " "
Option "$DSPLYMNU_WIPETYPE", "wipetype", "Wipes"