From 0c5a47ac9035bf7968bcd6cf112ef97df0a3f771 Mon Sep 17 00:00:00 2001 From: Christoph Oelckers Date: Sat, 2 Jun 2018 12:10:06 +0200 Subject: [PATCH 01/17] - fixed: MF3_DONTSPLASH disabled all terrain effects, not just the splash. Untested for now because the report came without any example to check it with. --- src/p_mobj.cpp | 69 ++++++++++++++++++++++++-------------------------- 1 file changed, 33 insertions(+), 36 deletions(-) diff --git a/src/p_mobj.cpp b/src/p_mobj.cpp index a00cb7f10..2f8b89a84 100644 --- a/src/p_mobj.cpp +++ b/src/p_mobj.cpp @@ -6504,9 +6504,6 @@ DEFINE_ACTION_FUNCTION(AActor, GetFloorTerrain) bool P_HitWater (AActor * thing, sector_t * sec, const DVector3 &pos, bool checkabove, bool alert, bool force) { - if (thing->flags3 & MF3_DONTSPLASH) - return false; - if (thing->player && (thing->player->cheats & CF_PREDICTING)) return false; @@ -6595,48 +6592,51 @@ foundone: if (thing->Mass < 10) smallsplash = true; - if (smallsplash && splash->SmallSplash) + if (!(thing->flags3 & MF3_DONTSPLASH)) { - mo = Spawn (splash->SmallSplash, pos, ALLOW_REPLACE); - if (mo) mo->Floorclip += splash->SmallSplashClip; - } - else - { - if (splash->SplashChunk) + if (smallsplash && splash->SmallSplash) { - mo = Spawn (splash->SplashChunk, pos, ALLOW_REPLACE); - mo->target = thing; - if (splash->ChunkXVelShift != 255) + mo = Spawn(splash->SmallSplash, pos, ALLOW_REPLACE); + if (mo) mo->Floorclip += splash->SmallSplashClip; + } + else + { + if (splash->SplashChunk) { - mo->Vel.X = (pr_chunk.Random2() << splash->ChunkXVelShift) / 65536.; + mo = Spawn(splash->SplashChunk, pos, ALLOW_REPLACE); + mo->target = thing; + if (splash->ChunkXVelShift != 255) + { + mo->Vel.X = (pr_chunk.Random2() << splash->ChunkXVelShift) / 65536.; + } + if (splash->ChunkYVelShift != 255) + { + mo->Vel.Y = (pr_chunk.Random2() << splash->ChunkYVelShift) / 65536.; + } + mo->Vel.Z = splash->ChunkBaseZVel + (pr_chunk() << splash->ChunkZVelShift) / 65536.; } - if (splash->ChunkYVelShift != 255) + if (splash->SplashBase) { - mo->Vel.Y = (pr_chunk.Random2() << splash->ChunkYVelShift) / 65536.; + mo = Spawn(splash->SplashBase, pos, ALLOW_REPLACE); + } + if (thing->player && !splash->NoAlert && alert) + { + P_NoiseAlert(thing, thing, true); } - mo->Vel.Z = splash->ChunkBaseZVel + (pr_chunk() << splash->ChunkZVelShift) / 65536.; } - if (splash->SplashBase) + if (mo) { - mo = Spawn (splash->SplashBase, pos, ALLOW_REPLACE); + S_Sound(mo, CHAN_ITEM, smallsplash ? + splash->SmallSplashSound : splash->NormalSplashSound, + 1, ATTN_IDLE); } - if (thing->player && !splash->NoAlert && alert) + else { - P_NoiseAlert (thing, thing, true); + S_Sound(pos, CHAN_ITEM, smallsplash ? + splash->SmallSplashSound : splash->NormalSplashSound, + 1, ATTN_IDLE); } } - if (mo) - { - S_Sound (mo, CHAN_ITEM, smallsplash ? - splash->SmallSplashSound : splash->NormalSplashSound, - 1, ATTN_IDLE); - } - else - { - S_Sound (pos, CHAN_ITEM, smallsplash ? - splash->SmallSplashSound : splash->NormalSplashSound, - 1, ATTN_IDLE); - } // Don't let deep water eat missiles return plane == &sec->floorplane ? Terrains[terrainnum].IsLiquid : false; @@ -6676,9 +6676,6 @@ bool P_HitFloor (AActor *thing) return false; } - if (thing->flags3 & MF3_DONTSPLASH) - return false; - // don't splash if landing on the edge above water/lava/etc.... DVector3 pos; for (m = thing->touching_sectorlist; m; m = m->m_tnext) From decea97a343a27904bab313dc09aac794811d8b5 Mon Sep 17 00:00:00 2001 From: Christoph Oelckers Date: Sat, 2 Jun 2018 12:34:21 +0200 Subject: [PATCH 02/17] - use a linear light ramp on the textured automap for light modes 0 and 1. --- src/v_2ddrawer.cpp | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/src/v_2ddrawer.cpp b/src/v_2ddrawer.cpp index ada1b5878..bbf9105fe 100644 --- a/src/v_2ddrawer.cpp +++ b/src/v_2ddrawer.cpp @@ -323,12 +323,16 @@ void F2DDrawer::AddPoly(FTexture *texture, FVector2 *points, int npoints, // Convert a light level into an unbounded colormap index (shade). // Why the +12? I wish I knew, but experimentation indicates it // is necessary in order to best reproduce Doom's original lighting. - double map = (NUMCOLORMAPS * 2.) - ((lightlevel + 12) * (NUMCOLORMAPS / 128.)); - double fadelevel = clamp((map - 12) / NUMCOLORMAPS, 0.0, 1.0); - // handle the brighter light modes of the hardware renderer. - if (vid_rendermode == 4 && (level.lightmode < 2 || level.lightmode == 4)) + double fadelevel; + + if (vid_rendermode != 4 || (level.lightmode >= 2 && level.lightmode != 4)) { - fadelevel = pow(fadelevel, 1.3); + double map = (NUMCOLORMAPS * 2.) - ((lightlevel + 12) * (NUMCOLORMAPS / 128.)); + fadelevel = clamp((map - 12) / NUMCOLORMAPS, 0.0, 1.0); + } + else + { + fadelevel = 1. - clamp(lightlevel, 0, 255) / 255.f; } RenderCommand poly; From 94d1a73ae806f3363bda9ccfb7b72ffdd6a2e40f Mon Sep 17 00:00:00 2001 From: Christoph Oelckers Date: Sat, 2 Jun 2018 13:07:47 +0200 Subject: [PATCH 03/17] - pass mirroring info to model drawers. Actual evaluation only implemented in OpenGL. Not tested yet because the bug report had no testable example. --- src/gl/models/gl_models.cpp | 8 ++++---- src/gl/models/gl_models.h | 4 ++-- src/polyrenderer/scene/poly_model.cpp | 4 ++-- src/polyrenderer/scene/poly_model.h | 4 ++-- src/r_data/models/models.cpp | 8 ++++++-- src/r_data/models/models.h | 4 ++-- src/resourcefiles/file_zip.cpp | 1 + src/swrenderer/things/r_model.cpp | 4 ++-- src/swrenderer/things/r_model.h | 4 ++-- 9 files changed, 23 insertions(+), 18 deletions(-) diff --git a/src/gl/models/gl_models.cpp b/src/gl/models/gl_models.cpp index 31b7208bc..5c2521b41 100644 --- a/src/gl/models/gl_models.cpp +++ b/src/gl/models/gl_models.cpp @@ -54,7 +54,7 @@ VSMatrix FGLModelRenderer::GetViewToWorldMatrix() return objectToWorldMatrix; } -void FGLModelRenderer::BeginDrawModel(AActor *actor, FSpriteModelFrame *smf, const VSMatrix &objectToWorldMatrix) +void FGLModelRenderer::BeginDrawModel(AActor *actor, FSpriteModelFrame *smf, const VSMatrix &objectToWorldMatrix, bool mirrored) { glDepthFunc(GL_LEQUAL); gl_RenderState.EnableTexture(true); @@ -65,7 +65,7 @@ void FGLModelRenderer::BeginDrawModel(AActor *actor, FSpriteModelFrame *smf, con if (!(actor->RenderStyle == LegacyRenderStyles[STYLE_Normal]) && !(smf->flags & MDL_DONTCULLBACKFACES)) { glEnable(GL_CULL_FACE); - glFrontFace(GL_CW); + glFrontFace((mirrored ^ GLPortal::isMirrored()) ? GL_CW : GL_CCW); } gl_RenderState.mModelMatrix = objectToWorldMatrix; @@ -81,7 +81,7 @@ void FGLModelRenderer::EndDrawModel(AActor *actor, FSpriteModelFrame *smf) glDisable(GL_CULL_FACE); } -void FGLModelRenderer::BeginDrawHUDModel(AActor *actor, const VSMatrix &objectToWorldMatrix) +void FGLModelRenderer::BeginDrawHUDModel(AActor *actor, const VSMatrix &objectToWorldMatrix, bool mirrored) { glDepthFunc(GL_LEQUAL); @@ -91,7 +91,7 @@ void FGLModelRenderer::BeginDrawHUDModel(AActor *actor, const VSMatrix &objectTo if (!(actor->RenderStyle == LegacyRenderStyles[STYLE_Normal])) { glEnable(GL_CULL_FACE); - glFrontFace(GLPortal::isMirrored()? GL_CW : GL_CCW); + glFrontFace((mirrored ^ GLPortal::isMirrored()) ? GL_CW : GL_CCW); } gl_RenderState.mModelMatrix = objectToWorldMatrix; diff --git a/src/gl/models/gl_models.h b/src/gl/models/gl_models.h index 821705c34..0fb82f9f4 100644 --- a/src/gl/models/gl_models.h +++ b/src/gl/models/gl_models.h @@ -37,13 +37,13 @@ public: FGLModelRenderer(int mli) : modellightindex(mli) {} ModelRendererType GetType() const override { return GLModelRendererType; } - void BeginDrawModel(AActor *actor, FSpriteModelFrame *smf, const VSMatrix &objectToWorldMatrix) override; + void BeginDrawModel(AActor *actor, FSpriteModelFrame *smf, const VSMatrix &objectToWorldMatrix, bool mirrored) override; void EndDrawModel(AActor *actor, FSpriteModelFrame *smf) override; IModelVertexBuffer *CreateVertexBuffer(bool needindex, bool singleframe) override; void SetVertexBuffer(IModelVertexBuffer *buffer) override; void ResetVertexBuffer() override; VSMatrix GetViewToWorldMatrix() override; - void BeginDrawHUDModel(AActor *actor, const VSMatrix &objectToWorldMatrix) override; + void BeginDrawHUDModel(AActor *actor, const VSMatrix &objectToWorldMatrix, bool mirrored) override; void EndDrawHUDModel(AActor *actor) override; void SetInterpolation(double interpolation) override; void SetMaterial(FTexture *skin, bool clampNoFilter, int translation) override; diff --git a/src/polyrenderer/scene/poly_model.cpp b/src/polyrenderer/scene/poly_model.cpp index 344749659..5e779f703 100644 --- a/src/polyrenderer/scene/poly_model.cpp +++ b/src/polyrenderer/scene/poly_model.cpp @@ -51,7 +51,7 @@ PolyModelRenderer::PolyModelRenderer(PolyRenderThread *thread, const Mat4f &worl { } -void PolyModelRenderer::BeginDrawModel(AActor *actor, FSpriteModelFrame *smf, const VSMatrix &objectToWorldMatrix) +void PolyModelRenderer::BeginDrawModel(AActor *actor, FSpriteModelFrame *smf, const VSMatrix &objectToWorldMatrix, bool mirrored) { ModelActor = actor; const_cast(objectToWorldMatrix).copy(ObjectToWorld.Matrix); @@ -98,7 +98,7 @@ VSMatrix PolyModelRenderer::GetViewToWorldMatrix() return objectToWorld; } -void PolyModelRenderer::BeginDrawHUDModel(AActor *actor, const VSMatrix &objectToWorldMatrix) +void PolyModelRenderer::BeginDrawHUDModel(AActor *actor, const VSMatrix &objectToWorldMatrix, bool mirrored) { ModelActor = actor; const_cast(objectToWorldMatrix).copy(ObjectToWorld.Matrix); diff --git a/src/polyrenderer/scene/poly_model.h b/src/polyrenderer/scene/poly_model.h index cbfafce59..6e6ec57bc 100644 --- a/src/polyrenderer/scene/poly_model.h +++ b/src/polyrenderer/scene/poly_model.h @@ -36,13 +36,13 @@ public: ModelRendererType GetType() const override { return PolyModelRendererType; } - void BeginDrawModel(AActor *actor, FSpriteModelFrame *smf, const VSMatrix &objectToWorldMatrix) override; + void BeginDrawModel(AActor *actor, FSpriteModelFrame *smf, const VSMatrix &objectToWorldMatrix, bool mirrored) override; void EndDrawModel(AActor *actor, FSpriteModelFrame *smf) override; IModelVertexBuffer *CreateVertexBuffer(bool needindex, bool singleframe) override; void SetVertexBuffer(IModelVertexBuffer *buffer) override; void ResetVertexBuffer() override; VSMatrix GetViewToWorldMatrix() override; - void BeginDrawHUDModel(AActor *actor, const VSMatrix &objectToWorldMatrix) override; + void BeginDrawHUDModel(AActor *actor, const VSMatrix &objectToWorldMatrix, bool mirrored) override; void EndDrawHUDModel(AActor *actor) override; void SetInterpolation(double interpolation) override; void SetMaterial(FTexture *skin, bool clampNoFilter, int translation) override; diff --git a/src/r_data/models/models.cpp b/src/r_data/models/models.cpp index b8b324524..d425741f4 100644 --- a/src/r_data/models/models.cpp +++ b/src/r_data/models/models.cpp @@ -171,7 +171,9 @@ void FModelRenderer::RenderModel(float x, float y, float z, FSpriteModelFrame *s float stretch = (smf->modelIDs[0] != -1 ? Models[smf->modelIDs[0]]->getAspectFactor() : 1.f) / level.info->pixelstretch; objectToWorldMatrix.scale(1, stretch, 1); - BeginDrawModel(actor, smf, objectToWorldMatrix); + float orientation = scaleFactorX * scaleFactorY * scaleFactorZ; + + BeginDrawModel(actor, smf, objectToWorldMatrix, orientation < 0); RenderFrameModels(smf, actor->state, actor->tics, actor->GetClass(), translation); EndDrawModel(actor, smf); } @@ -207,7 +209,9 @@ void FModelRenderer::RenderHUDModel(DPSprite *psp, float ofsX, float ofsY) objectToWorldMatrix.rotate(smf->pitchoffset, 0, 0, 1); objectToWorldMatrix.rotate(-smf->rolloffset, 1, 0, 0); - BeginDrawHUDModel(playermo, objectToWorldMatrix); + float orientation = smf->xscale * smf->yscale * smf->zscale; + + BeginDrawHUDModel(playermo, objectToWorldMatrix, orientation < 0); RenderFrameModels(smf, psp->GetState(), psp->GetTics(), playermo->player->ReadyWeapon->GetClass(), 0); EndDrawHUDModel(playermo); } diff --git a/src/r_data/models/models.h b/src/r_data/models/models.h index b0d0f0230..bb5fb3b84 100644 --- a/src/r_data/models/models.h +++ b/src/r_data/models/models.h @@ -64,7 +64,7 @@ public: virtual ModelRendererType GetType() const = 0; - virtual void BeginDrawModel(AActor *actor, FSpriteModelFrame *smf, const VSMatrix &objectToWorldMatrix) = 0; + virtual void BeginDrawModel(AActor *actor, FSpriteModelFrame *smf, const VSMatrix &objectToWorldMatrix, bool mirrored) = 0; virtual void EndDrawModel(AActor *actor, FSpriteModelFrame *smf) = 0; virtual IModelVertexBuffer *CreateVertexBuffer(bool needindex, bool singleframe) = 0; @@ -74,7 +74,7 @@ public: virtual VSMatrix GetViewToWorldMatrix() = 0; - virtual void BeginDrawHUDModel(AActor *actor, const VSMatrix &objectToWorldMatrix) = 0; + virtual void BeginDrawHUDModel(AActor *actor, const VSMatrix &objectToWorldMatrix, bool mirrored) = 0; virtual void EndDrawHUDModel(AActor *actor) = 0; virtual void SetInterpolation(double interpolation) = 0; diff --git a/src/resourcefiles/file_zip.cpp b/src/resourcefiles/file_zip.cpp index e0cf4df91..b3bfa7a2d 100644 --- a/src/resourcefiles/file_zip.cpp +++ b/src/resourcefiles/file_zip.cpp @@ -40,6 +40,7 @@ #include "v_text.h" #include "w_wad.h" #include "w_zip.h" +#include "i_system.h" #include "ancientzip.h" #define BUFREADCOMMENT (0x400) diff --git a/src/swrenderer/things/r_model.cpp b/src/swrenderer/things/r_model.cpp index 025263e34..15966c706 100644 --- a/src/swrenderer/things/r_model.cpp +++ b/src/swrenderer/things/r_model.cpp @@ -82,7 +82,7 @@ namespace swrenderer { } - void SWModelRenderer::BeginDrawModel(AActor *actor, FSpriteModelFrame *smf, const VSMatrix &objectToWorldMatrix) + void SWModelRenderer::BeginDrawModel(AActor *actor, FSpriteModelFrame *smf, const VSMatrix &objectToWorldMatrix, bool mirrored) { ModelActor = actor; const_cast(objectToWorldMatrix).copy(ObjectToWorld.Matrix); @@ -181,7 +181,7 @@ namespace swrenderer return objectToWorld; } - void SWModelRenderer::BeginDrawHUDModel(AActor *actor, const VSMatrix &objectToWorldMatrix) + void SWModelRenderer::BeginDrawHUDModel(AActor *actor, const VSMatrix &objectToWorldMatrix, bool mirrored) { ModelActor = actor; const_cast(objectToWorldMatrix).copy(ObjectToWorld.Matrix); diff --git a/src/swrenderer/things/r_model.h b/src/swrenderer/things/r_model.h index 004636e69..ee6608358 100644 --- a/src/swrenderer/things/r_model.h +++ b/src/swrenderer/things/r_model.h @@ -56,13 +56,13 @@ namespace swrenderer ModelRendererType GetType() const override { return SWModelRendererType; } - void BeginDrawModel(AActor *actor, FSpriteModelFrame *smf, const VSMatrix &objectToWorldMatrix) override; + void BeginDrawModel(AActor *actor, FSpriteModelFrame *smf, const VSMatrix &objectToWorldMatrix, bool mirrored) override; void EndDrawModel(AActor *actor, FSpriteModelFrame *smf) override; IModelVertexBuffer *CreateVertexBuffer(bool needindex, bool singleframe) override; void SetVertexBuffer(IModelVertexBuffer *buffer) override; void ResetVertexBuffer() override; VSMatrix GetViewToWorldMatrix() override; - void BeginDrawHUDModel(AActor *actor, const VSMatrix &objectToWorldMatrix) override; + void BeginDrawHUDModel(AActor *actor, const VSMatrix &objectToWorldMatrix, bool mirrored) override; void EndDrawHUDModel(AActor *actor) override; void SetInterpolation(double interpolation) override; void SetMaterial(FTexture *skin, bool clampNoFilter, int translation) override; From 0dbcdc8a9ce428e42a1394fdb730ea444930b764 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Sat, 2 Jun 2018 20:35:51 +0200 Subject: [PATCH 04/17] - fix negative rotation speed not working --- src/r_data/models/models.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/r_data/models/models.cpp b/src/r_data/models/models.cpp index d425741f4..316281f65 100644 --- a/src/r_data/models/models.cpp +++ b/src/r_data/models/models.cpp @@ -94,7 +94,7 @@ void FModelRenderer::RenderModel(float x, float y, float z, FSpriteModelFrame *s if (smf->flags & MDL_ROTATING) { - if (smf->rotationSpeed > 0.0000000001) + if (smf->rotationSpeed > 0.0000000001 || smf->rotationSpeed < -0.0000000001) { double turns = (I_GetTime() + I_GetTimeFrac()) / (200.0 / smf->rotationSpeed); turns -= floor(turns); From d0aacd3ba891436e4defe7a5f8149f1b51abc0b3 Mon Sep 17 00:00:00 2001 From: Christoph Oelckers Date: Sat, 2 Jun 2018 23:08:42 +0200 Subject: [PATCH 05/17] - fixed: The translucent border draw list must be rendered with depth writing active. This gets exclusively used by portal borders which means that for walls the setting is irrelevant but for flats it is needed to cover the portal surface so that translucent parts of the outer scene do not bleed through. --- src/gl/scene/gl_scene.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/gl/scene/gl_scene.cpp b/src/gl/scene/gl_scene.cpp index 91d4e4235..ad7a7e494 100644 --- a/src/gl/scene/gl_scene.cpp +++ b/src/gl/scene/gl_scene.cpp @@ -395,7 +395,6 @@ void GLSceneDrawer::RenderTranslucent(FDrawInfo *di) { RenderAll.Clock(); - glDepthMask(false); gl_RenderState.SetCameraPos(r_viewpoint.Pos.X, r_viewpoint.Pos.Y, r_viewpoint.Pos.Z); // final pass: translucent stuff @@ -404,6 +403,7 @@ void GLSceneDrawer::RenderTranslucent(FDrawInfo *di) gl_RenderState.EnableBrightmap(true); di->drawlists[GLDL_TRANSLUCENTBORDER].Draw(di, GLPASS_TRANSLUCENT); + glDepthMask(false); di->DrawSorted(GLDL_TRANSLUCENT); gl_RenderState.EnableBrightmap(false); From a186677092901c4063999c996b3b9f86c36eb27e Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Sun, 3 Jun 2018 01:44:56 +0200 Subject: [PATCH 06/17] - removed the 8x8 block drawing code from softpoly --- src/polyrenderer/drawers/poly_buffer.cpp | 15 +- src/polyrenderer/drawers/poly_buffer.h | 8 - src/polyrenderer/drawers/poly_triangle.cpp | 31 +- src/polyrenderer/drawers/poly_triangle.h | 8 +- src/polyrenderer/drawers/screen_triangle.cpp | 1314 +++--------------- src/polyrenderer/drawers/screen_triangle.h | 5 +- src/polyrenderer/poly_renderer.cpp | 4 +- src/swrenderer/drawers/r_draw.cpp | 4 +- src/swrenderer/scene/r_scene.cpp | 2 +- 9 files changed, 217 insertions(+), 1174 deletions(-) diff --git a/src/polyrenderer/drawers/poly_buffer.cpp b/src/polyrenderer/drawers/poly_buffer.cpp index 0e24509cc..2506bce7e 100644 --- a/src/polyrenderer/drawers/poly_buffer.cpp +++ b/src/polyrenderer/drawers/poly_buffer.cpp @@ -48,8 +48,7 @@ void PolyZBuffer::Resize(int newwidth, int newheight) { width = newwidth; height = newheight; - int count = BlockWidth() * BlockHeight(); - values.resize(count * 64); + values.resize(width * height); } ///////////////////////////////////////////////////////////////////////////// @@ -64,14 +63,6 @@ void PolyStencilBuffer::Clear(int newwidth, int newheight, uint8_t stencil_value { width = newwidth; height = newheight; - int count = BlockWidth() * BlockHeight(); - values.resize(count * 64); - masks.resize(count); - - uint8_t *v = Values(); - uint32_t *m = Masks(); - for (int i = 0; i < count; i++) - { - m[i] = 0xffffff00 | stencil_value; - } + values.resize(width * height); + memset(Values(), stencil_value, width * height); } diff --git a/src/polyrenderer/drawers/poly_buffer.h b/src/polyrenderer/drawers/poly_buffer.h index df79c6e71..af4a78b3a 100644 --- a/src/polyrenderer/drawers/poly_buffer.h +++ b/src/polyrenderer/drawers/poly_buffer.h @@ -33,8 +33,6 @@ public: void Resize(int newwidth, int newheight); int Width() const { return width; } int Height() const { return height; } - int BlockWidth() const { return (width + 7) / 8; } - int BlockHeight() const { return (height + 7) / 8; } float *Values() { return values.data(); } private: @@ -50,16 +48,10 @@ public: void Clear(int newwidth, int newheight, uint8_t stencil_value = 0); int Width() const { return width; } int Height() const { return height; } - int BlockWidth() const { return (width + 7) / 8; } - int BlockHeight() const { return (height + 7) / 8; } uint8_t *Values() { return values.data(); } - uint32_t *Masks() { return masks.data(); } private: int width; int height; - - // 8x8 blocks of stencil values, plus a mask for each block indicating if values are the same for early out stencil testing std::vector values; - std::vector masks; }; diff --git a/src/polyrenderer/drawers/poly_triangle.cpp b/src/polyrenderer/drawers/poly_triangle.cpp index 5661f5c83..992a43d76 100644 --- a/src/polyrenderer/drawers/poly_triangle.cpp +++ b/src/polyrenderer/drawers/poly_triangle.cpp @@ -52,7 +52,7 @@ bool PolyTriangleDrawer::IsBgra() return isBgraRenderTarget; } -void PolyTriangleDrawer::SetViewport(const DrawerCommandQueuePtr &queue, int x, int y, int width, int height, DCanvas *canvas, bool span_drawers) +void PolyTriangleDrawer::SetViewport(const DrawerCommandQueuePtr &queue, int x, int y, int width, int height, DCanvas *canvas) { uint8_t *dest = (uint8_t*)canvas->GetPixels(); int dest_width = canvas->GetWidth(); @@ -74,7 +74,7 @@ void PolyTriangleDrawer::SetViewport(const DrawerCommandQueuePtr &queue, int x, dest_width = clamp(viewport_x + viewport_width, 0, dest_width - offsetx); dest_height = clamp(viewport_y + viewport_height, 0, dest_height - offsety); - queue->Push(viewport_x, viewport_y, viewport_width, viewport_height, dest, dest_width, dest_height, dest_pitch, dest_bgra, span_drawers); + queue->Push(viewport_x, viewport_y, viewport_width, viewport_height, dest, dest_width, dest_height, dest_pitch, dest_bgra); } void PolyTriangleDrawer::SetTransform(const DrawerCommandQueuePtr &queue, const Mat4f *objectToClip) @@ -99,7 +99,7 @@ void PolyTriangleDrawer::SetWeaponScene(const DrawerCommandQueuePtr &queue, bool ///////////////////////////////////////////////////////////////////////////// -void PolyTriangleThreadData::SetViewport(int x, int y, int width, int height, uint8_t *new_dest, int new_dest_width, int new_dest_height, int new_dest_pitch, bool new_dest_bgra, bool new_span_drawers) +void PolyTriangleThreadData::SetViewport(int x, int y, int width, int height, uint8_t *new_dest, int new_dest_width, int new_dest_height, int new_dest_pitch, bool new_dest_bgra) { viewport_x = x; viewport_y = y; @@ -110,7 +110,6 @@ void PolyTriangleThreadData::SetViewport(int x, int y, int width, int height, ui dest_height = new_dest_height; dest_pitch = new_dest_pitch; dest_bgra = new_dest_bgra; - span_drawers = new_span_drawers; ccw = true; weaponScene = false; } @@ -132,9 +131,7 @@ void PolyTriangleThreadData::DrawElements(const PolyDrawArgs &drawargs) args.clipbottom = dest_height; args.uniforms = &drawargs; args.destBgra = dest_bgra; - args.stencilPitch = PolyStencilBuffer::Instance()->BlockWidth(); - args.stencilValues = PolyStencilBuffer::Instance()->Values(); - args.stencilMasks = PolyStencilBuffer::Instance()->Masks(); + args.stencilbuffer = PolyStencilBuffer::Instance()->Values(); args.zbuffer = PolyZBuffer::Instance()->Values(); args.depthOffset = weaponScene ? 1.0f : 0.0f; @@ -191,9 +188,7 @@ void PolyTriangleThreadData::DrawArrays(const PolyDrawArgs &drawargs) args.clipbottom = dest_height; args.uniforms = &drawargs; args.destBgra = dest_bgra; - args.stencilPitch = PolyStencilBuffer::Instance()->BlockWidth(); - args.stencilValues = PolyStencilBuffer::Instance()->Values(); - args.stencilMasks = PolyStencilBuffer::Instance()->Masks(); + args.stencilbuffer = PolyStencilBuffer::Instance()->Values(); args.zbuffer = PolyZBuffer::Instance()->Values(); args.depthOffset = weaponScene ? 1.0f : 0.0f; @@ -379,10 +374,7 @@ void PolyTriangleThreadData::DrawShadedTriangle(const ShadedTriVertex *vert, boo args->v3 = &clippedvert[i - 2]; if (IsFrontfacing(args) == ccw && args->CalculateGradients()) { - if (!span_drawers) - ScreenTriangle::Draw(args, this); - else - ScreenTriangle::DrawSWRender(args, this); + ScreenTriangle::Draw(args, this); } } } @@ -395,10 +387,7 @@ void PolyTriangleThreadData::DrawShadedTriangle(const ShadedTriVertex *vert, boo args->v3 = &clippedvert[i]; if (IsFrontfacing(args) != ccw && args->CalculateGradients()) { - if (!span_drawers) - ScreenTriangle::Draw(args, this); - else - ScreenTriangle::DrawSWRender(args, this); + ScreenTriangle::Draw(args, this); } } } @@ -631,14 +620,14 @@ void PolySetWeaponSceneCommand::Execute(DrawerThread *thread) ///////////////////////////////////////////////////////////////////////////// -PolySetViewportCommand::PolySetViewportCommand(int x, int y, int width, int height, uint8_t *dest, int dest_width, int dest_height, int dest_pitch, bool dest_bgra, bool span_drawers) - : x(x), y(y), width(width), height(height), dest(dest), dest_width(dest_width), dest_height(dest_height), dest_pitch(dest_pitch), dest_bgra(dest_bgra), span_drawers(span_drawers) +PolySetViewportCommand::PolySetViewportCommand(int x, int y, int width, int height, uint8_t *dest, int dest_width, int dest_height, int dest_pitch, bool dest_bgra) + : x(x), y(y), width(width), height(height), dest(dest), dest_width(dest_width), dest_height(dest_height), dest_pitch(dest_pitch), dest_bgra(dest_bgra) { } void PolySetViewportCommand::Execute(DrawerThread *thread) { - PolyTriangleThreadData::Get(thread)->SetViewport(x, y, width, height, dest, dest_width, dest_height, dest_pitch, dest_bgra, span_drawers); + PolyTriangleThreadData::Get(thread)->SetViewport(x, y, width, height, dest, dest_width, dest_height, dest_pitch, dest_bgra); } ///////////////////////////////////////////////////////////////////////////// diff --git a/src/polyrenderer/drawers/poly_triangle.h b/src/polyrenderer/drawers/poly_triangle.h index b10888455..c1e4871db 100644 --- a/src/polyrenderer/drawers/poly_triangle.h +++ b/src/polyrenderer/drawers/poly_triangle.h @@ -33,7 +33,7 @@ class PolyTriangleDrawer { public: static void ClearBuffers(DCanvas *canvas); - static void SetViewport(const DrawerCommandQueuePtr &queue, int x, int y, int width, int height, DCanvas *canvas, bool span_drawers); + static void SetViewport(const DrawerCommandQueuePtr &queue, int x, int y, int width, int height, DCanvas *canvas); static void SetCullCCW(const DrawerCommandQueuePtr &queue, bool ccw); static void SetTwoSided(const DrawerCommandQueuePtr &queue, bool twosided); static void SetWeaponScene(const DrawerCommandQueuePtr &queue, bool enable); @@ -47,7 +47,7 @@ class PolyTriangleThreadData public: PolyTriangleThreadData(int32_t core, int32_t num_cores) : core(core), num_cores(num_cores) { } - void SetViewport(int x, int y, int width, int height, uint8_t *dest, int dest_width, int dest_height, int dest_pitch, bool dest_bgra, bool span_drawers); + void SetViewport(int x, int y, int width, int height, uint8_t *dest, int dest_width, int dest_height, int dest_pitch, bool dest_bgra); void SetTransform(const Mat4f *objectToClip); void SetCullCCW(bool value) { ccw = value; } void SetTwoSided(bool value) { twosided = value; } @@ -88,7 +88,6 @@ private: bool twosided = false; bool weaponScene = false; const Mat4f *objectToClip = nullptr; - bool span_drawers = false; enum { max_additional_vertices = 16 }; }; @@ -144,7 +143,7 @@ private: class PolySetViewportCommand : public DrawerCommand { public: - PolySetViewportCommand(int x, int y, int width, int height, uint8_t *dest, int dest_width, int dest_height, int dest_pitch, bool dest_bgra, bool span_drawers); + PolySetViewportCommand(int x, int y, int width, int height, uint8_t *dest, int dest_width, int dest_height, int dest_pitch, bool dest_bgra); void Execute(DrawerThread *thread) override; FString DebugInfo() override { return "PolySetViewport"; } @@ -159,7 +158,6 @@ private: int dest_height; int dest_pitch; bool dest_bgra; - bool span_drawers; }; class DrawPolyTrianglesCommand : public DrawerCommand diff --git a/src/polyrenderer/drawers/screen_triangle.cpp b/src/polyrenderer/drawers/screen_triangle.cpp index a8a4fcd54..2abdee97b 100644 --- a/src/polyrenderer/drawers/screen_triangle.cpp +++ b/src/polyrenderer/drawers/screen_triangle.cpp @@ -44,1091 +44,6 @@ #include "poly_drawer8.h" #include "x86.h" -class TriangleBlock -{ -public: - TriangleBlock(const TriDrawTriangleArgs *args, PolyTriangleThreadData *thread); - void Render(); - -private: - void RenderSubdivide(int x0, int y0, int x1, int y1); - - enum class CoverageModes { Full, Partial }; - struct CoverageFull { static const int Mode = (int)CoverageModes::Full; }; - struct CoveragePartial { static const int Mode = (int)CoverageModes::Partial; }; - - template - void RenderBlock(int x0, int y0, int x1, int y1); - - const TriDrawTriangleArgs *args; - PolyTriangleThreadData *thread; - - // Block size, standard 8x8 (must be power of two) - static const int q = 8; - - // Deltas - int DX12, DX23, DX31; - int DY12, DY23, DY31; - - // Fixed-point deltas - int FDX12, FDX23, FDX31; - int FDY12, FDY23, FDY31; - - // Half-edge constants - int C1, C2, C3; - - // Stencil buffer - int stencilPitch; - uint8_t * RESTRICT stencilValues; - uint32_t * RESTRICT stencilMasks; - uint8_t stencilTestValue; - uint32_t stencilWriteValue; - - // Viewport clipping - int clipright; - int clipbottom; - - // Depth buffer - float * RESTRICT zbuffer; - int32_t zbufferPitch; - - // Triangle bounding block - int minx, miny; - int maxx, maxy; - - // Active block - int X, Y; - uint32_t Mask0, Mask1; - -#ifndef NO_SSE - __m128i mFDY12Offset; - __m128i mFDY23Offset; - __m128i mFDY31Offset; - __m128i mFDY12x4; - __m128i mFDY23x4; - __m128i mFDY31x4; - __m128i mFDX12; - __m128i mFDX23; - __m128i mFDX31; - __m128i mC1; - __m128i mC2; - __m128i mC3; - __m128i mDX12; - __m128i mDY12; - __m128i mDX23; - __m128i mDY23; - __m128i mDX31; - __m128i mDY31; -#endif - - enum class CoverageResult - { - full, - partial, - none - }; - CoverageResult AreaCoverageTest(int x0, int y0, int x1, int y1); - - void CoverageTest(); - void StencilEqualTest(); - void StencilGreaterEqualTest(); - void DepthTest(const TriDrawTriangleArgs *args); - void ClipTest(); - void StencilWrite(); - void DepthWrite(const TriDrawTriangleArgs *args); -}; - -TriangleBlock::TriangleBlock(const TriDrawTriangleArgs *args, PolyTriangleThreadData *thread) : args(args), thread(thread) -{ - const ShadedTriVertex &v1 = *args->v1; - const ShadedTriVertex &v2 = *args->v2; - const ShadedTriVertex &v3 = *args->v3; - - clipright = args->clipright; - clipbottom = args->clipbottom; - - stencilPitch = args->stencilPitch; - stencilValues = args->stencilValues; - stencilMasks = args->stencilMasks; - stencilTestValue = args->uniforms->StencilTestValue(); - stencilWriteValue = args->uniforms->StencilWriteValue(); - - zbuffer = args->zbuffer; - zbufferPitch = args->stencilPitch; - - // 28.4 fixed-point coordinates -#ifdef NO_SSE - const int Y1 = (int)round(16.0f * v1.y); - const int Y2 = (int)round(16.0f * v2.y); - const int Y3 = (int)round(16.0f * v3.y); - - const int X1 = (int)round(16.0f * v1.x); - const int X2 = (int)round(16.0f * v2.x); - const int X3 = (int)round(16.0f * v3.x); -#else - int tempround[4 * 3]; - __m128 m16 = _mm_set1_ps(16.0f); - __m128 mhalf = _mm_set1_ps(65536.5f); - __m128i m65536 = _mm_set1_epi32(65536); - _mm_storeu_si128((__m128i*)tempround, _mm_sub_epi32(_mm_cvtps_epi32(_mm_add_ps(_mm_mul_ps(_mm_loadu_ps((const float*)&v1), m16), mhalf)), m65536)); - _mm_storeu_si128((__m128i*)(tempround + 4), _mm_sub_epi32(_mm_cvtps_epi32(_mm_add_ps(_mm_mul_ps(_mm_loadu_ps((const float*)&v2), m16), mhalf)), m65536)); - _mm_storeu_si128((__m128i*)(tempround + 8), _mm_sub_epi32(_mm_cvtps_epi32(_mm_add_ps(_mm_mul_ps(_mm_loadu_ps((const float*)&v3), m16), mhalf)), m65536)); - const int X1 = tempround[0]; - const int X2 = tempround[4]; - const int X3 = tempround[8]; - const int Y1 = tempround[1]; - const int Y2 = tempround[5]; - const int Y3 = tempround[9]; -#endif - - // Deltas - DX12 = X1 - X2; - DX23 = X2 - X3; - DX31 = X3 - X1; - - DY12 = Y1 - Y2; - DY23 = Y2 - Y3; - DY31 = Y3 - Y1; - - // Fixed-point deltas - FDX12 = DX12 << 4; - FDX23 = DX23 << 4; - FDX31 = DX31 << 4; - - FDY12 = DY12 << 4; - FDY23 = DY23 << 4; - FDY31 = DY31 << 4; - - // Bounding rectangle - minx = MAX((MIN(MIN(X1, X2), X3) + 0xF) >> 4, 0); - maxx = MIN((MAX(MAX(X1, X2), X3) + 0xF) >> 4, clipright - 1); - miny = MAX((MIN(MIN(Y1, Y2), Y3) + 0xF) >> 4, 0); - maxy = MIN((MAX(MAX(Y1, Y2), Y3) + 0xF) >> 4, clipbottom - 1); - if (minx >= maxx || miny >= maxy) - { - return; - } - - // Start and end in corner of 8x8 block - minx &= ~(q - 1); - miny &= ~(q - 1); - maxx |= q - 1; - maxy |= q - 1; - - // Half-edge constants - C1 = DY12 * X1 - DX12 * Y1; - C2 = DY23 * X2 - DX23 * Y2; - C3 = DY31 * X3 - DX31 * Y3; - - // Correct for fill convention - if (DY12 < 0 || (DY12 == 0 && DX12 > 0)) C1++; - if (DY23 < 0 || (DY23 == 0 && DX23 > 0)) C2++; - if (DY31 < 0 || (DY31 == 0 && DX31 > 0)) C3++; - -#ifndef NO_SSE - mFDY12Offset = _mm_setr_epi32(0, FDY12, FDY12 * 2, FDY12 * 3); - mFDY23Offset = _mm_setr_epi32(0, FDY23, FDY23 * 2, FDY23 * 3); - mFDY31Offset = _mm_setr_epi32(0, FDY31, FDY31 * 2, FDY31 * 3); - mFDY12x4 = _mm_set1_epi32(FDY12 * 4); - mFDY23x4 = _mm_set1_epi32(FDY23 * 4); - mFDY31x4 = _mm_set1_epi32(FDY31 * 4); - mFDX12 = _mm_set1_epi32(FDX12); - mFDX23 = _mm_set1_epi32(FDX23); - mFDX31 = _mm_set1_epi32(FDX31); - mC1 = _mm_set1_epi32(C1); - mC2 = _mm_set1_epi32(C2); - mC3 = _mm_set1_epi32(C3); - mDX12 = _mm_set1_epi32(DX12); - mDY12 = _mm_set1_epi32(DY12); - mDX23 = _mm_set1_epi32(DX23); - mDY23 = _mm_set1_epi32(DY23); - mDX31 = _mm_set1_epi32(DX31); - mDY31 = _mm_set1_epi32(DY31); -#endif -} - -void TriangleBlock::Render() -{ - RenderSubdivide(minx / q, miny / q, (maxx + 1) / q, (maxy + 1) / q); -} - -void TriangleBlock::RenderSubdivide(int x0, int y0, int x1, int y1) -{ - CoverageResult result = AreaCoverageTest(x0 * q, y0 * q, x1 * q, y1 * q); - if (result == CoverageResult::full) - { - RenderBlock(x0 * q, y0 * q, x1 * q, y1 * q); - } - else if (result == CoverageResult::partial) - { - bool doneX = x1 - x0 <= 8; - bool doneY = y1 - y0 <= 8; - if (doneX && doneY) - { - RenderBlock(x0 * q, y0 * q, x1 * q, y1 * q); - } - else - { - int midx = (x0 + x1) >> 1; - int midy = (y0 + y1) >> 1; - if (doneX) - { - RenderSubdivide(x0, y0, x1, midy); - RenderSubdivide(x0, midy, x1, y1); - } - else if (doneY) - { - RenderSubdivide(x0, y0, midx, y1); - RenderSubdivide(midx, y0, x1, y1); - } - else - { - RenderSubdivide(x0, y0, midx, midy); - RenderSubdivide(midx, y0, x1, midy); - RenderSubdivide(x0, midy, midx, y1); - RenderSubdivide(midx, midy, x1, y1); - } - } - } -} - -template -void TriangleBlock::RenderBlock(int x0, int y0, int x1, int y1) -{ - // First block line for this thread - int core = thread->core; - int num_cores = thread->num_cores; - int core_skip = (num_cores - ((y0 / q) - core) % num_cores) % num_cores; - int start_miny = y0 + core_skip * q; - - bool depthTest = args->uniforms->DepthTest(); - bool writeColor = args->uniforms->WriteColor(); - bool writeStencil = args->uniforms->WriteStencil(); - bool writeDepth = args->uniforms->WriteDepth(); - - int bmode = (int)args->uniforms->BlendMode(); - auto drawFunc = args->destBgra ? ScreenTriangle::SpanDrawers32[bmode] : ScreenTriangle::SpanDrawers8[bmode]; - - // Loop through blocks - for (int y = start_miny; y < y1; y += q * num_cores) - { - for (int x = x0; x < x1; x += q) - { - X = x; - Y = y; - - if (CoverageModeT::Mode == (int)CoverageModes::Full) - { - Mask0 = 0xffffffff; - Mask1 = 0xffffffff; - } - else - { - CoverageTest(); - if (Mask0 == 0 && Mask1 == 0) - continue; - } - - ClipTest(); - if (Mask0 == 0 && Mask1 == 0) - continue; - - StencilEqualTest(); - if (Mask0 == 0 && Mask1 == 0) - continue; - - if (depthTest) - { - DepthTest(args); - if (Mask0 == 0 && Mask1 == 0) - continue; - } - - if (writeColor) - { - if (Mask0 == 0xffffffff) - { - drawFunc(Y, X, X + 8, args); - drawFunc(Y + 1, X, X + 8, args); - drawFunc(Y + 2, X, X + 8, args); - drawFunc(Y + 3, X, X + 8, args); - } - else if (Mask0 != 0) - { - uint32_t mask = Mask0; - for (int j = 0; j < 4; j++) - { - int start = 0; - int i; - for (i = 0; i < 8; i++) - { - if (!(mask & 0x80000000)) - { - if (i > start) - drawFunc(Y + j, X + start, X + i, args); - start = i + 1; - } - mask <<= 1; - } - if (i > start) - drawFunc(Y + j, X + start, X + i, args); - } - } - - if (Mask1 == 0xffffffff) - { - drawFunc(Y + 4, X, X + 8, args); - drawFunc(Y + 5, X, X + 8, args); - drawFunc(Y + 6, X, X + 8, args); - drawFunc(Y + 7, X, X + 8, args); - } - else if (Mask1 != 0) - { - uint32_t mask = Mask1; - for (int j = 4; j < 8; j++) - { - int start = 0; - int i; - for (i = 0; i < 8; i++) - { - if (!(mask & 0x80000000)) - { - if (i > start) - drawFunc(Y + j, X + start, X + i, args); - start = i + 1; - } - mask <<= 1; - } - if (i > start) - drawFunc(Y + j, X + start, X + i, args); - } - } - } - - if (writeStencil) - StencilWrite(); - if (writeDepth) - DepthWrite(args); - } - } -} - -#ifdef NO_SSE - -void TriangleBlock::DepthTest(const TriDrawTriangleArgs *args) -{ - int block = (X >> 3) + (Y >> 3) * zbufferPitch; - float *depth = zbuffer + block * 64; - - const ShadedTriVertex &v1 = *args->v1; - - float stepXW = args->gradientX.W; - float stepYW = args->gradientY.W; - float posYW = v1.w + stepXW * (X - v1.x) + stepYW * (Y - v1.y) + args->depthOffset; - - uint32_t mask0 = 0; - uint32_t mask1 = 0; - - for (int iy = 0; iy < 4; iy++) - { - float posXW = posYW; - for (int ix = 0; ix < 8; ix++) - { - bool covered = *depth <= posXW; - mask0 <<= 1; - mask0 |= (uint32_t)covered; - depth++; - posXW += stepXW; - } - posYW += stepYW; - } - - for (int iy = 0; iy < 4; iy++) - { - float posXW = posYW; - for (int ix = 0; ix < 8; ix++) - { - bool covered = *depth <= posXW; - mask1 <<= 1; - mask1 |= (uint32_t)covered; - depth++; - posXW += stepXW; - } - posYW += stepYW; - } - - Mask0 = Mask0 & mask0; - Mask1 = Mask1 & mask1; -} - -#else - -void TriangleBlock::DepthTest(const TriDrawTriangleArgs *args) -{ - int block = (X >> 3) + (Y >> 3) * zbufferPitch; - float *depth = zbuffer + block * 64; - - const ShadedTriVertex &v1 = *args->v1; - - float stepXW = args->gradientX.W; - float stepYW = args->gradientY.W; - float posYW = v1.w + stepXW * (X - v1.x) + stepYW * (Y - v1.y) + args->depthOffset; - - __m128 mposYW = _mm_setr_ps(posYW, posYW + stepXW, posYW + stepXW + stepXW, posYW + stepXW + stepXW + stepXW); - __m128 mstepXW = _mm_set1_ps(stepXW * 4.0f); - __m128 mstepYW = _mm_set1_ps(stepYW); - - uint32_t mask0 = 0; - uint32_t mask1 = 0; - - for (int iy = 0; iy < 4; iy++) - { - __m128 mposXW = mposYW; - for (int ix = 0; ix < 2; ix++) - { - __m128 covered = _mm_cmplt_ps(_mm_loadu_ps(depth), mposXW); - mask0 <<= 4; - mask0 |= _mm_movemask_ps(_mm_shuffle_ps(covered, covered, _MM_SHUFFLE(0, 1, 2, 3))); - depth += 4; - mposXW = _mm_add_ps(mposXW, mstepXW); - } - mposYW = _mm_add_ps(mposYW, mstepYW); - } - - for (int iy = 0; iy < 4; iy++) - { - __m128 mposXW = mposYW; - for (int ix = 0; ix < 2; ix++) - { - __m128 covered = _mm_cmplt_ps(_mm_loadu_ps(depth), mposXW); - mask1 <<= 4; - mask1 |= _mm_movemask_ps(_mm_shuffle_ps(covered, covered, _MM_SHUFFLE(0, 1, 2, 3))); - depth += 4; - mposXW = _mm_add_ps(mposXW, mstepXW); - } - mposYW = _mm_add_ps(mposYW, mstepYW); - } - - Mask0 = Mask0 & mask0; - Mask1 = Mask1 & mask1; -} - -#endif - -void TriangleBlock::ClipTest() -{ - static const uint32_t clipxmask[8] = - { - 0, - 0x80808080, - 0xc0c0c0c0, - 0xe0e0e0e0, - 0xf0f0f0f0, - 0xf8f8f8f8, - 0xfcfcfcfc, - 0xfefefefe - }; - - static const uint32_t clipymask[8] = - { - 0, - 0xff000000, - 0xffff0000, - 0xffffff00, - 0xffffffff, - 0xffffffff, - 0xffffffff, - 0xffffffff - }; - - uint32_t xmask = (X + 8 <= clipright) ? 0xffffffff : clipxmask[clipright - X]; - uint32_t ymask0 = (Y + 4 <= clipbottom) ? 0xffffffff : clipymask[clipbottom - Y]; - uint32_t ymask1 = (Y + 8 <= clipbottom) ? 0xffffffff : clipymask[clipbottom - Y - 4]; - - Mask0 = Mask0 & xmask & ymask0; - Mask1 = Mask1 & xmask & ymask1; -} - -#ifdef NO_SSE - -void TriangleBlock::StencilEqualTest() -{ - // Stencil test the whole block, if possible - int block = (X >> 3) + (Y >> 3) * stencilPitch; - uint8_t *stencilBlock = &stencilValues[block * 64]; - uint32_t *stencilBlockMask = &stencilMasks[block]; - bool blockIsSingleStencil = ((*stencilBlockMask) & 0xffffff00) == 0xffffff00; - bool skipBlock = blockIsSingleStencil && ((*stencilBlockMask) & 0xff) != stencilTestValue; - if (skipBlock) - { - Mask0 = 0; - Mask1 = 0; - } - else if (!blockIsSingleStencil) - { - uint32_t mask0 = 0; - uint32_t mask1 = 0; - - for (int iy = 0; iy < 4; iy++) - { - for (int ix = 0; ix < q; ix++) - { - bool passStencilTest = stencilBlock[ix + iy * q] == stencilTestValue; - mask0 <<= 1; - mask0 |= (uint32_t)passStencilTest; - } - } - - for (int iy = 4; iy < q; iy++) - { - for (int ix = 0; ix < q; ix++) - { - bool passStencilTest = stencilBlock[ix + iy * q] == stencilTestValue; - mask1 <<= 1; - mask1 |= (uint32_t)passStencilTest; - } - } - - Mask0 = Mask0 & mask0; - Mask1 = Mask1 & mask1; - } -} - -#else - -void TriangleBlock::StencilEqualTest() -{ - // Stencil test the whole block, if possible - int block = (X >> 3) + (Y >> 3) * stencilPitch; - uint8_t *stencilBlock = &stencilValues[block * 64]; - uint32_t *stencilBlockMask = &stencilMasks[block]; - bool blockIsSingleStencil = ((*stencilBlockMask) & 0xffffff00) == 0xffffff00; - bool skipBlock = blockIsSingleStencil && ((*stencilBlockMask) & 0xff) != stencilTestValue; - if (skipBlock) - { - Mask0 = 0; - Mask1 = 0; - } - else if (!blockIsSingleStencil) - { - __m128i mstencilTestValue = _mm_set1_epi16(stencilTestValue); - uint32_t mask0 = 0; - uint32_t mask1 = 0; - - for (int iy = 0; iy < 2; iy++) - { - __m128i mstencilBlock = _mm_loadu_si128((const __m128i *)stencilBlock); - - __m128i mstencilTest = _mm_cmpeq_epi16(_mm_unpacklo_epi8(mstencilBlock, _mm_setzero_si128()), mstencilTestValue); - __m128i mstencilTest0 = _mm_unpacklo_epi16(mstencilTest, mstencilTest); - __m128i mstencilTest1 = _mm_unpackhi_epi16(mstencilTest, mstencilTest); - __m128i first = _mm_packs_epi32(_mm_shuffle_epi32(mstencilTest1, _MM_SHUFFLE(0, 1, 2, 3)), _mm_shuffle_epi32(mstencilTest0, _MM_SHUFFLE(0, 1, 2, 3))); - - mstencilTest = _mm_cmpeq_epi16(_mm_unpackhi_epi8(mstencilBlock, _mm_setzero_si128()), mstencilTestValue); - mstencilTest0 = _mm_unpacklo_epi16(mstencilTest, mstencilTest); - mstencilTest1 = _mm_unpackhi_epi16(mstencilTest, mstencilTest); - __m128i second = _mm_packs_epi32(_mm_shuffle_epi32(mstencilTest1, _MM_SHUFFLE(0, 1, 2, 3)), _mm_shuffle_epi32(mstencilTest0, _MM_SHUFFLE(0, 1, 2, 3))); - - mask0 <<= 16; - mask0 |= _mm_movemask_epi8(_mm_packs_epi16(second, first)); - - stencilBlock += 16; - } - - for (int iy = 0; iy < 2; iy++) - { - __m128i mstencilBlock = _mm_loadu_si128((const __m128i *)stencilBlock); - - __m128i mstencilTest = _mm_cmpeq_epi16(_mm_unpacklo_epi8(mstencilBlock, _mm_setzero_si128()), mstencilTestValue); - __m128i mstencilTest0 = _mm_unpacklo_epi16(mstencilTest, mstencilTest); - __m128i mstencilTest1 = _mm_unpackhi_epi16(mstencilTest, mstencilTest); - __m128i first = _mm_packs_epi32(_mm_shuffle_epi32(mstencilTest1, _MM_SHUFFLE(0, 1, 2, 3)), _mm_shuffle_epi32(mstencilTest0, _MM_SHUFFLE(0, 1, 2, 3))); - - mstencilTest = _mm_cmpeq_epi16(_mm_unpackhi_epi8(mstencilBlock, _mm_setzero_si128()), mstencilTestValue); - mstencilTest0 = _mm_unpacklo_epi16(mstencilTest, mstencilTest); - mstencilTest1 = _mm_unpackhi_epi16(mstencilTest, mstencilTest); - __m128i second = _mm_packs_epi32(_mm_shuffle_epi32(mstencilTest1, _MM_SHUFFLE(0, 1, 2, 3)), _mm_shuffle_epi32(mstencilTest0, _MM_SHUFFLE(0, 1, 2, 3))); - - mask1 <<= 16; - mask1 |= _mm_movemask_epi8(_mm_packs_epi16(second, first)); - - stencilBlock += 16; - } - - Mask0 = Mask0 & mask0; - Mask1 = Mask1 & mask1; - } -} - -#endif - -void TriangleBlock::StencilGreaterEqualTest() -{ - // Stencil test the whole block, if possible - int block = (X >> 3) + (Y >> 3) * stencilPitch; - uint8_t *stencilBlock = &stencilValues[block * 64]; - uint32_t *stencilBlockMask = &stencilMasks[block]; - bool blockIsSingleStencil = ((*stencilBlockMask) & 0xffffff00) == 0xffffff00; - bool skipBlock = blockIsSingleStencil && ((*stencilBlockMask) & 0xff) < stencilTestValue; - if (skipBlock) - { - Mask0 = 0; - Mask1 = 0; - } - else if (!blockIsSingleStencil) - { - uint32_t mask0 = 0; - uint32_t mask1 = 0; - - for (int iy = 0; iy < 4; iy++) - { - for (int ix = 0; ix < q; ix++) - { - bool passStencilTest = stencilBlock[ix + iy * q] >= stencilTestValue; - mask0 <<= 1; - mask0 |= (uint32_t)passStencilTest; - } - } - - for (int iy = 4; iy < q; iy++) - { - for (int ix = 0; ix < q; ix++) - { - bool passStencilTest = stencilBlock[ix + iy * q] >= stencilTestValue; - mask1 <<= 1; - mask1 |= (uint32_t)passStencilTest; - } - } - - Mask0 = Mask0 & mask0; - Mask1 = Mask1 & mask1; - } -} - -TriangleBlock::CoverageResult TriangleBlock::AreaCoverageTest(int x0, int y0, int x1, int y1) -{ - // Corners of block - x0 = x0 << 4; - x1 = (x1 - 1) << 4; - y0 = y0 << 4; - y1 = (y1 - 1) << 4; - - // Evaluate half-space functions - bool a00 = C1 + DX12 * y0 - DY12 * x0 > 0; - bool a10 = C1 + DX12 * y0 - DY12 * x1 > 0; - bool a01 = C1 + DX12 * y1 - DY12 * x0 > 0; - bool a11 = C1 + DX12 * y1 - DY12 * x1 > 0; - int a = (a00 << 0) | (a10 << 1) | (a01 << 2) | (a11 << 3); - - bool b00 = C2 + DX23 * y0 - DY23 * x0 > 0; - bool b10 = C2 + DX23 * y0 - DY23 * x1 > 0; - bool b01 = C2 + DX23 * y1 - DY23 * x0 > 0; - bool b11 = C2 + DX23 * y1 - DY23 * x1 > 0; - int b = (b00 << 0) | (b10 << 1) | (b01 << 2) | (b11 << 3); - - bool c00 = C3 + DX31 * y0 - DY31 * x0 > 0; - bool c10 = C3 + DX31 * y0 - DY31 * x1 > 0; - bool c01 = C3 + DX31 * y1 - DY31 * x0 > 0; - bool c11 = C3 + DX31 * y1 - DY31 * x1 > 0; - int c = (c00 << 0) | (c10 << 1) | (c01 << 2) | (c11 << 3); - - if (a == 0 || b == 0 || c == 0) // Skip block when outside an edge - { - return CoverageResult::none; - } - else if (a == 0xf && b == 0xf && c == 0xf) // Accept whole block when totally covered - { - return CoverageResult::full; - } - else // Partially covered block - { - return CoverageResult::partial; - } -} - -#ifdef NO_SSE - -void TriangleBlock::CoverageTest() -{ - // Corners of block - int x0 = X << 4; - int x1 = (X + q - 1) << 4; - int y0 = Y << 4; - int y1 = (Y + q - 1) << 4; - - // Evaluate half-space functions - bool a00 = C1 + DX12 * y0 - DY12 * x0 > 0; - bool a10 = C1 + DX12 * y0 - DY12 * x1 > 0; - bool a01 = C1 + DX12 * y1 - DY12 * x0 > 0; - bool a11 = C1 + DX12 * y1 - DY12 * x1 > 0; - int a = (a00 << 0) | (a10 << 1) | (a01 << 2) | (a11 << 3); - - bool b00 = C2 + DX23 * y0 - DY23 * x0 > 0; - bool b10 = C2 + DX23 * y0 - DY23 * x1 > 0; - bool b01 = C2 + DX23 * y1 - DY23 * x0 > 0; - bool b11 = C2 + DX23 * y1 - DY23 * x1 > 0; - int b = (b00 << 0) | (b10 << 1) | (b01 << 2) | (b11 << 3); - - bool c00 = C3 + DX31 * y0 - DY31 * x0 > 0; - bool c10 = C3 + DX31 * y0 - DY31 * x1 > 0; - bool c01 = C3 + DX31 * y1 - DY31 * x0 > 0; - bool c11 = C3 + DX31 * y1 - DY31 * x1 > 0; - int c = (c00 << 0) | (c10 << 1) | (c01 << 2) | (c11 << 3); - - if (a == 0 || b == 0 || c == 0) // Skip block when outside an edge - { - Mask0 = 0; - Mask1 = 0; - } - else if (a == 0xf && b == 0xf && c == 0xf) // Accept whole block when totally covered - { - Mask0 = 0xffffffff; - Mask1 = 0xffffffff; - } - else // Partially covered block - { - x0 = X << 4; - x1 = (X + q - 1) << 4; - int CY1 = C1 + DX12 * y0 - DY12 * x0; - int CY2 = C2 + DX23 * y0 - DY23 * x0; - int CY3 = C3 + DX31 * y0 - DY31 * x0; - - uint32_t mask0 = 0; - uint32_t mask1 = 0; - - for (int iy = 0; iy < 4; iy++) - { - int CX1 = CY1; - int CX2 = CY2; - int CX3 = CY3; - - for (int ix = 0; ix < q; ix++) - { - bool covered = CX1 > 0 && CX2 > 0 && CX3 > 0; - mask0 <<= 1; - mask0 |= (uint32_t)covered; - - CX1 -= FDY12; - CX2 -= FDY23; - CX3 -= FDY31; - } - - CY1 += FDX12; - CY2 += FDX23; - CY3 += FDX31; - } - - for (int iy = 4; iy < q; iy++) - { - int CX1 = CY1; - int CX2 = CY2; - int CX3 = CY3; - - for (int ix = 0; ix < q; ix++) - { - bool covered = CX1 > 0 && CX2 > 0 && CX3 > 0; - mask1 <<= 1; - mask1 |= (uint32_t)covered; - - CX1 -= FDY12; - CX2 -= FDY23; - CX3 -= FDY31; - } - - CY1 += FDX12; - CY2 += FDX23; - CY3 += FDX31; - } - - Mask0 = mask0; - Mask1 = mask1; - } -} - -#else - -void TriangleBlock::CoverageTest() -{ - // Corners of block - int x0 = X << 4; - int x1 = (X + q - 1) << 4; - int y0 = Y << 4; - int y1 = (Y + q - 1) << 4; - - __m128i mY = _mm_set_epi32(y0, y0, y1, y1); - __m128i mX = _mm_set_epi32(x0, x0, x1, x1); - - // Evaluate half-space functions - __m128i mCY1 = _mm_sub_epi32( - _mm_add_epi32(mC1, _mm_shuffle_epi32(_mm_mul_epu32(mDX12, mY), _MM_SHUFFLE(0, 0, 2, 2))), - _mm_shuffle_epi32(_mm_mul_epu32(mDY12, mX), _MM_SHUFFLE(0, 2, 0, 2))); - __m128i mA = _mm_cmpgt_epi32(mCY1, _mm_setzero_si128()); - - __m128i mCY2 = _mm_sub_epi32( - _mm_add_epi32(mC2, _mm_shuffle_epi32(_mm_mul_epu32(mDX23, mY), _MM_SHUFFLE(0, 0, 2, 2))), - _mm_shuffle_epi32(_mm_mul_epu32(mDY23, mX), _MM_SHUFFLE(0, 2, 0, 2))); - __m128i mB = _mm_cmpgt_epi32(mCY2, _mm_setzero_si128()); - - __m128i mCY3 = _mm_sub_epi32( - _mm_add_epi32(mC3, _mm_shuffle_epi32(_mm_mul_epu32(mDX31, mY), _MM_SHUFFLE(0, 0, 2, 2))), - _mm_shuffle_epi32(_mm_mul_epu32(mDY31, mX), _MM_SHUFFLE(0, 2, 0, 2))); - __m128i mC = _mm_cmpgt_epi32(mCY3, _mm_setzero_si128()); - - int abc = _mm_movemask_epi8(_mm_packs_epi16(_mm_packs_epi32(mA, mB), _mm_packs_epi32(mC, _mm_setzero_si128()))); - - if ((abc & 0xf) == 0 || (abc & 0xf0) == 0 || (abc & 0xf00) == 0) // Skip block when outside an edge - { - Mask0 = 0; - Mask1 = 0; - } - else if (abc == 0xfff) // Accept whole block when totally covered - { - Mask0 = 0xffffffff; - Mask1 = 0xffffffff; - } - else // Partially covered block - { - uint32_t mask0 = 0; - uint32_t mask1 = 0; - - mCY1 = _mm_sub_epi32(_mm_shuffle_epi32(mCY1, _MM_SHUFFLE(0, 0, 0, 0)), mFDY12Offset); - mCY2 = _mm_sub_epi32(_mm_shuffle_epi32(mCY2, _MM_SHUFFLE(0, 0, 0, 0)), mFDY23Offset); - mCY3 = _mm_sub_epi32(_mm_shuffle_epi32(mCY3, _MM_SHUFFLE(0, 0, 0, 0)), mFDY31Offset); - for (int iy = 0; iy < 2; iy++) - { - __m128i mtest0 = _mm_cmpgt_epi32(mCY1, _mm_setzero_si128()); - mtest0 = _mm_and_si128(_mm_cmpgt_epi32(mCY2, _mm_setzero_si128()), mtest0); - mtest0 = _mm_and_si128(_mm_cmpgt_epi32(mCY3, _mm_setzero_si128()), mtest0); - __m128i mtest1 = _mm_cmpgt_epi32(_mm_sub_epi32(mCY1, mFDY12x4), _mm_setzero_si128()); - mtest1 = _mm_and_si128(_mm_cmpgt_epi32(_mm_sub_epi32(mCY2, mFDY23x4), _mm_setzero_si128()), mtest1); - mtest1 = _mm_and_si128(_mm_cmpgt_epi32(_mm_sub_epi32(mCY3, mFDY31x4), _mm_setzero_si128()), mtest1); - mCY1 = _mm_add_epi32(mCY1, mFDX12); - mCY2 = _mm_add_epi32(mCY2, mFDX23); - mCY3 = _mm_add_epi32(mCY3, mFDX31); - __m128i first = _mm_packs_epi32(_mm_shuffle_epi32(mtest1, _MM_SHUFFLE(0, 1, 2, 3)), _mm_shuffle_epi32(mtest0, _MM_SHUFFLE(0, 1, 2, 3))); - - mtest0 = _mm_cmpgt_epi32(mCY1, _mm_setzero_si128()); - mtest0 = _mm_and_si128(_mm_cmpgt_epi32(mCY2, _mm_setzero_si128()), mtest0); - mtest0 = _mm_and_si128(_mm_cmpgt_epi32(mCY3, _mm_setzero_si128()), mtest0); - mtest1 = _mm_cmpgt_epi32(_mm_sub_epi32(mCY1, mFDY12x4), _mm_setzero_si128()); - mtest1 = _mm_and_si128(_mm_cmpgt_epi32(_mm_sub_epi32(mCY2, mFDY23x4), _mm_setzero_si128()), mtest1); - mtest1 = _mm_and_si128(_mm_cmpgt_epi32(_mm_sub_epi32(mCY3, mFDY31x4), _mm_setzero_si128()), mtest1); - mCY1 = _mm_add_epi32(mCY1, mFDX12); - mCY2 = _mm_add_epi32(mCY2, mFDX23); - mCY3 = _mm_add_epi32(mCY3, mFDX31); - __m128i second = _mm_packs_epi32(_mm_shuffle_epi32(mtest1, _MM_SHUFFLE(0, 1, 2, 3)), _mm_shuffle_epi32(mtest0, _MM_SHUFFLE(0, 1, 2, 3))); - - mask0 <<= 16; - mask0 |= _mm_movemask_epi8(_mm_packs_epi16(second, first)); - } - - for (int iy = 0; iy < 2; iy++) - { - __m128i mtest0 = _mm_cmpgt_epi32(mCY1, _mm_setzero_si128()); - mtest0 = _mm_and_si128(_mm_cmpgt_epi32(mCY2, _mm_setzero_si128()), mtest0); - mtest0 = _mm_and_si128(_mm_cmpgt_epi32(mCY3, _mm_setzero_si128()), mtest0); - __m128i mtest1 = _mm_cmpgt_epi32(_mm_sub_epi32(mCY1, mFDY12x4), _mm_setzero_si128()); - mtest1 = _mm_and_si128(_mm_cmpgt_epi32(_mm_sub_epi32(mCY2, mFDY23x4), _mm_setzero_si128()), mtest1); - mtest1 = _mm_and_si128(_mm_cmpgt_epi32(_mm_sub_epi32(mCY3, mFDY31x4), _mm_setzero_si128()), mtest1); - mCY1 = _mm_add_epi32(mCY1, mFDX12); - mCY2 = _mm_add_epi32(mCY2, mFDX23); - mCY3 = _mm_add_epi32(mCY3, mFDX31); - __m128i first = _mm_packs_epi32(_mm_shuffle_epi32(mtest1, _MM_SHUFFLE(0, 1, 2, 3)), _mm_shuffle_epi32(mtest0, _MM_SHUFFLE(0, 1, 2, 3))); - - mtest0 = _mm_cmpgt_epi32(mCY1, _mm_setzero_si128()); - mtest0 = _mm_and_si128(_mm_cmpgt_epi32(mCY2, _mm_setzero_si128()), mtest0); - mtest0 = _mm_and_si128(_mm_cmpgt_epi32(mCY3, _mm_setzero_si128()), mtest0); - mtest1 = _mm_cmpgt_epi32(_mm_sub_epi32(mCY1, mFDY12x4), _mm_setzero_si128()); - mtest1 = _mm_and_si128(_mm_cmpgt_epi32(_mm_sub_epi32(mCY2, mFDY23x4), _mm_setzero_si128()), mtest1); - mtest1 = _mm_and_si128(_mm_cmpgt_epi32(_mm_sub_epi32(mCY3, mFDY31x4), _mm_setzero_si128()), mtest1); - mCY1 = _mm_add_epi32(mCY1, mFDX12); - mCY2 = _mm_add_epi32(mCY2, mFDX23); - mCY3 = _mm_add_epi32(mCY3, mFDX31); - __m128i second = _mm_packs_epi32(_mm_shuffle_epi32(mtest1, _MM_SHUFFLE(0, 1, 2, 3)), _mm_shuffle_epi32(mtest0, _MM_SHUFFLE(0, 1, 2, 3))); - - mask1 <<= 16; - mask1 |= _mm_movemask_epi8(_mm_packs_epi16(second, first)); - } - - Mask0 = mask0; - Mask1 = mask1; - } -} - -#endif - -void TriangleBlock::StencilWrite() -{ - int block = (X >> 3) + (Y >> 3) * stencilPitch; - uint8_t *stencilBlock = &stencilValues[block * 64]; - uint32_t &stencilBlockMask = stencilMasks[block]; - uint32_t writeValue = stencilWriteValue; - - if (Mask0 == 0xffffffff && Mask1 == 0xffffffff) - { - stencilBlockMask = 0xffffff00 | writeValue; - } - else - { - uint32_t mask0 = Mask0; - uint32_t mask1 = Mask1; - - bool isSingleValue = (stencilBlockMask & 0xffffff00) == 0xffffff00; - if (isSingleValue) - { - uint8_t value = stencilBlockMask & 0xff; - for (int v = 0; v < 64; v++) - stencilBlock[v] = value; - stencilBlockMask = 0; - } - - int count = 0; - for (int v = 0; v < 32; v++) - { - if ((mask0 & (1 << 31)) || stencilBlock[v] == writeValue) - { - stencilBlock[v] = writeValue; - count++; - } - mask0 <<= 1; - } - for (int v = 32; v < 64; v++) - { - if ((mask1 & (1 << 31)) || stencilBlock[v] == writeValue) - { - stencilBlock[v] = writeValue; - count++; - } - mask1 <<= 1; - } - - if (count == 64) - stencilBlockMask = 0xffffff00 | writeValue; - } -} - -#ifdef NO_SSE - -void TriangleBlock::DepthWrite(const TriDrawTriangleArgs *args) -{ - int block = (X >> 3) + (Y >> 3) * zbufferPitch; - float *depth = zbuffer + block * 64; - - const ShadedTriVertex &v1 = *args->v1; - - float stepXW = args->gradientX.W; - float stepYW = args->gradientY.W; - float posYW = v1.w + stepXW * (X - v1.x) + stepYW * (Y - v1.y) + args->depthOffset; - - if (Mask0 == 0xffffffff && Mask1 == 0xffffffff) - { - for (int iy = 0; iy < 8; iy++) - { - float posXW = posYW; - for (int ix = 0; ix < 8; ix++) - { - *(depth++) = posXW; - posXW += stepXW; - } - posYW += stepYW; - } - } - else - { - uint32_t mask0 = Mask0; - uint32_t mask1 = Mask1; - - for (int iy = 0; iy < 4; iy++) - { - float posXW = posYW; - for (int ix = 0; ix < 8; ix++) - { - if (mask0 & (1 << 31)) - *depth = posXW; - posXW += stepXW; - mask0 <<= 1; - depth++; - } - posYW += stepYW; - } - - for (int iy = 0; iy < 4; iy++) - { - float posXW = posYW; - for (int ix = 0; ix < 8; ix++) - { - if (mask1 & (1 << 31)) - *depth = posXW; - posXW += stepXW; - mask1 <<= 1; - depth++; - } - posYW += stepYW; - } - } -} - -#else - -void TriangleBlock::DepthWrite(const TriDrawTriangleArgs *args) -{ - int block = (X >> 3) + (Y >> 3) * zbufferPitch; - float *depth = zbuffer + block * 64; - - const ShadedTriVertex &v1 = *args->v1; - - float stepXW = args->gradientX.W; - float stepYW = args->gradientY.W; - float posYW = v1.w + stepXW * (X - v1.x) + stepYW * (Y - v1.y) + args->depthOffset; - - __m128 mposYW = _mm_setr_ps(posYW, posYW + stepXW, posYW + stepXW + stepXW, posYW + stepXW + stepXW + stepXW); - __m128 mstepXW = _mm_set1_ps(stepXW * 4.0f); - __m128 mstepYW = _mm_set1_ps(stepYW); - - if (Mask0 == 0xffffffff && Mask1 == 0xffffffff) - { - for (int iy = 0; iy < 8; iy++) - { - __m128 mposXW = mposYW; - _mm_storeu_ps(depth, mposXW); depth += 4; mposXW = _mm_add_ps(mposXW, mstepXW); - _mm_storeu_ps(depth, mposXW); depth += 4; - mposYW = _mm_add_ps(mposYW, mstepYW); - } - } - else - { - __m128i mxormask = _mm_set1_epi32(0xffffffff); - __m128i topfour = _mm_setr_epi32(1 << 31, 1 << 30, 1 << 29, 1 << 28); - - __m128i mmask0 = _mm_set1_epi32(Mask0); - __m128i mmask1 = _mm_set1_epi32(Mask1); - - for (int iy = 0; iy < 4; iy++) - { - __m128 mposXW = mposYW; - _mm_maskmoveu_si128(_mm_castps_si128(mposXW), _mm_xor_si128(_mm_cmpeq_epi32(_mm_and_si128(mmask0, topfour), _mm_setzero_si128()), mxormask), (char*)depth); mmask0 = _mm_slli_epi32(mmask0, 4); depth += 4; mposXW = _mm_add_ps(mposXW, mstepXW); - _mm_maskmoveu_si128(_mm_castps_si128(mposXW), _mm_xor_si128(_mm_cmpeq_epi32(_mm_and_si128(mmask0, topfour), _mm_setzero_si128()), mxormask), (char*)depth); mmask0 = _mm_slli_epi32(mmask0, 4); depth += 4; - mposYW = _mm_add_ps(mposYW, mstepYW); - } - - for (int iy = 0; iy < 4; iy++) - { - __m128 mposXW = mposYW; - _mm_maskmoveu_si128(_mm_castps_si128(mposXW), _mm_xor_si128(_mm_cmpeq_epi32(_mm_and_si128(mmask1, topfour), _mm_setzero_si128()), mxormask), (char*)depth); mmask1 = _mm_slli_epi32(mmask1, 4); depth += 4; mposXW = _mm_add_ps(mposXW, mstepXW); - _mm_maskmoveu_si128(_mm_castps_si128(mposXW), _mm_xor_si128(_mm_cmpeq_epi32(_mm_and_si128(mmask1, topfour), _mm_setzero_si128()), mxormask), (char*)depth); mmask1 = _mm_slli_epi32(mmask1, 4); depth += 4; - mposYW = _mm_add_ps(mposYW, mstepYW); - } - } -} - -#endif - -void ScreenTriangle::Draw(const TriDrawTriangleArgs *args, PolyTriangleThreadData *thread) -{ - TriangleBlock block(args, thread); - block.Render(); -} - static void SortVertices(const TriDrawTriangleArgs *args, ShadedTriVertex **sortedVertices) { sortedVertices[0] = args->v1; @@ -1143,7 +58,7 @@ static void SortVertices(const TriDrawTriangleArgs *args, ShadedTriVertex **sort std::swap(sortedVertices[1], sortedVertices[2]); } -void ScreenTriangle::DrawSWRender(const TriDrawTriangleArgs *args, PolyTriangleThreadData *thread) +void ScreenTriangle::Draw(const TriDrawTriangleArgs *args, PolyTriangleThreadData *thread) { // Sort vertices by Y position ShadedTriVertex *sortedVertices[3]; @@ -1230,13 +145,22 @@ void ScreenTriangle::DrawSWRender(const TriDrawTriangleArgs *args, PolyTriangleT float v1Y = args->v1->y; float v1W = args->v1->w; + bool depthTest = args->uniforms->DepthTest(); + bool stencilTest = true; + bool writeColor = args->uniforms->WriteColor(); + bool writeStencil = args->uniforms->WriteStencil(); + bool writeDepth = args->uniforms->WriteDepth(); + uint8_t stencilTestValue = args->uniforms->StencilTestValue(); + uint8_t stencilWriteValue = args->uniforms->StencilWriteValue(); + int num_cores = thread->num_cores; for (int y = topY + thread->skipped_by_thread(topY); y < bottomY; y += num_cores) { int x = leftEdge[y]; int xend = rightEdge[y]; - float *zbufferLine = args->zbuffer + args->stencilPitch * 8 * y; + float *zbufferLine = args->zbuffer + args->pitch * y; + uint8_t *stencilLine = args->stencilbuffer + args->pitch * y; float startX = x + (0.5f - v1X); float startY = y + (0.5f - v1Y); @@ -1249,59 +173,211 @@ void ScreenTriangle::DrawSWRender(const TriDrawTriangleArgs *args, PolyTriangleT { int xstart = x; - int xendsse = x + ((xend - x) & ~3); - __m128 mposXW = _mm_add_ps(_mm_set1_ps(posXW), mfirstStepXW); - while (_mm_movemask_ps(_mm_cmple_ps(_mm_loadu_ps(zbufferLine + x), mposXW)) == 15 && x < xendsse) + if (depthTest && stencilTest) { - _mm_storeu_ps(zbufferLine + x, mposXW); - mposXW = _mm_add_ps(mposXW, mstepXW); - x += 4; - } - posXW = _mm_cvtss_f32(mposXW); + int xendsse = x + ((xend - x) / 4); + __m128 mposXW = _mm_add_ps(_mm_set1_ps(posXW), mfirstStepXW); + while (_mm_movemask_ps(_mm_cmple_ps(_mm_loadu_ps(zbufferLine + x), mposXW)) == 15 && + stencilLine[x] == stencilTestValue && + stencilLine[x + 1] == stencilTestValue && + stencilLine[x + 2] == stencilTestValue && + stencilLine[x + 3] == stencilTestValue && + x < xendsse) + { + if (writeDepth) + _mm_storeu_ps(zbufferLine + x, mposXW); + mposXW = _mm_add_ps(mposXW, mstepXW); + x += 4; + } + posXW = _mm_cvtss_f32(mposXW); - while (zbufferLine[x] <= posXW && x < xend) + while (zbufferLine[x] <= posXW && stencilLine[x] == stencilTestValue && x < xend) + { + if (writeDepth) + zbufferLine[x] = posXW; + posXW += stepXW; + x++; + } + } + else if (depthTest) { - zbufferLine[x] = posXW; - posXW += stepXW; - x++; + int xendsse = x + ((xend - x) / 4); + __m128 mposXW = _mm_add_ps(_mm_set1_ps(posXW), mfirstStepXW); + while (_mm_movemask_ps(_mm_cmple_ps(_mm_loadu_ps(zbufferLine + x), mposXW)) == 15 && x < xendsse) + { + if (writeDepth) + _mm_storeu_ps(zbufferLine + x, mposXW); + mposXW = _mm_add_ps(mposXW, mstepXW); + x += 4; + } + posXW = _mm_cvtss_f32(mposXW); + + while (zbufferLine[x] <= posXW && x < xend) + { + if (writeDepth) + zbufferLine[x] = posXW; + posXW += stepXW; + x++; + } + } + else if (stencilTest) + { + while (stencilLine[x] == stencilTestValue && x < xend) + x++; + } + else + { + x = xend; } if (x > xstart) - drawfunc(y, xstart, x, args); - - xendsse = x + ((xend - x) & ~3); - mposXW = _mm_add_ps(_mm_set1_ps(posXW), mfirstStepXW); - while (_mm_movemask_ps(_mm_cmple_ps(_mm_loadu_ps(zbufferLine + x), mposXW)) == 0 && x < xendsse) { - mposXW = _mm_add_ps(mposXW, mstepXW); - x += 4; + if (writeColor) + drawfunc(y, xstart, x, args); + + if (writeStencil) + { + for (int i = xstart; i < x; i++) + stencilLine[i] = stencilWriteValue; + } + + if (!depthTest && writeDepth) + { + for (int i = xstart; i < x; i++) + { + zbufferLine[i] = posXW; + posXW += stepXW; + } + } } - posXW = _mm_cvtss_f32(mposXW); - while (zbufferLine[x] > posXW && x < xend) + if (depthTest && stencilTest) { - posXW += stepXW; - x++; + int xendsse = x + ((xend - x) / 4); + __m128 mposXW = _mm_add_ps(_mm_set1_ps(posXW), mfirstStepXW); + while ((_mm_movemask_ps(_mm_cmple_ps(_mm_loadu_ps(zbufferLine + x), mposXW)) == 0 || + stencilLine[x] != stencilTestValue || + stencilLine[x + 1] != stencilTestValue || + stencilLine[x + 2] != stencilTestValue || + stencilLine[x + 3] != stencilTestValue) && + x < xendsse) + { + mposXW = _mm_add_ps(mposXW, mstepXW); + x += 4; + } + posXW = _mm_cvtss_f32(mposXW); + + while ((zbufferLine[x] > posXW || stencilLine[x] != stencilTestValue) && x < xend) + { + posXW += stepXW; + x++; + } + } + else if (depthTest) + { + int xendsse = x + ((xend - x) / 4); + __m128 mposXW = _mm_add_ps(_mm_set1_ps(posXW), mfirstStepXW); + while (_mm_movemask_ps(_mm_cmple_ps(_mm_loadu_ps(zbufferLine + x), mposXW)) == 0 && x < xendsse) + { + mposXW = _mm_add_ps(mposXW, mstepXW); + x += 4; + } + posXW = _mm_cvtss_f32(mposXW); + + while (zbufferLine[x] > posXW && x < xend) + { + posXW += stepXW; + x++; + } + } + else if (stencilTest) + { + while (stencilLine[x] != stencilTestValue && x < xend) + { + posXW += stepXW; + x++; + } } } #else while (x < xend) { int xstart = x; - while (zbufferLine[x] <= posXW && x < xend) + + if (depthTest && stencilTest) { - zbufferLine[x] = posXW; - posXW += stepXW; - x++; + while (zbufferLine[x] <= posXW && stencilLine[x] == stencilTestValue && x < xend) + { + if (writeDepth) + zbufferLine[x] = posXW; + posXW += stepXW; + x++; + } + } + else if (depthTest) + { + while (zbufferLine[x] <= posXW && x < xend) + { + if (writeDepth) + zbufferLine[x] = posXW; + posXW += stepXW; + x++; + } + } + else if (stencilTest) + { + while (stencilLine[x] == stencilTestValue && x < xend) + x++; + } + else + { + x = xend; } if (x > xstart) - drawfunc(y, xstart, x, args); - - while (zbufferLine[x] > posXW && x < xend) { - posXW += stepXW; - x++; + if (writeColor) + drawfunc(y, xstart, x, args); + + if (writeStencil) + { + for (int i = xstart; i < x; i++) + stencilLine[i] = stencilWriteValue; + } + + if (!depthTest && writeDepth) + { + for (int i = xstart; i < x; i++) + { + zbufferLine[i] = posXW; + posXW += stepXW; + } + } + } + + if (depthTest && stencilTest) + { + while ((zbufferLine[x] > posXW || stencilLine[x] != stencilTestValue) && x < xend) + { + posXW += stepXW; + x++; + } + } + else if (depthTest) + { + while (zbufferLine[x] > posXW && x < xend) + { + posXW += stepXW; + x++; + } + } + else if (stencilTest) + { + while (stencilLine[x] != stencilTestValue && x < xend) + { + posXW += stepXW; + x++; + } } } #endif diff --git a/src/polyrenderer/drawers/screen_triangle.h b/src/polyrenderer/drawers/screen_triangle.h index 036e4a55b..bd3ff132c 100644 --- a/src/polyrenderer/drawers/screen_triangle.h +++ b/src/polyrenderer/drawers/screen_triangle.h @@ -52,9 +52,7 @@ struct TriDrawTriangleArgs ShadedTriVertex *v3; int32_t clipright; int32_t clipbottom; - uint8_t *stencilValues; - uint32_t *stencilMasks; - int32_t stencilPitch; + uint8_t *stencilbuffer; float *zbuffer; const PolyDrawArgs *uniforms; bool destBgra; @@ -170,7 +168,6 @@ class ScreenTriangle { public: static void Draw(const TriDrawTriangleArgs *args, PolyTriangleThreadData *thread); - static void DrawSWRender(const TriDrawTriangleArgs *args, PolyTriangleThreadData *thread); static void(*SpanDrawers8[])(int y, int x0, int x1, const TriDrawTriangleArgs *args); static void(*SpanDrawers32[])(int y, int x0, int x1, const TriDrawTriangleArgs *args); diff --git a/src/polyrenderer/poly_renderer.cpp b/src/polyrenderer/poly_renderer.cpp index 6fee5d841..76d8a937e 100644 --- a/src/polyrenderer/poly_renderer.cpp +++ b/src/polyrenderer/poly_renderer.cpp @@ -201,11 +201,11 @@ void PolyRenderer::SetSceneViewport() height = (screenblocks*SCREENHEIGHT / 10) & ~7; int bottom = SCREENHEIGHT - (height + viewwindowy - ((height - viewheight) / 2)); - PolyTriangleDrawer::SetViewport(Threads.MainThread()->DrawQueue, viewwindowx, SCREENHEIGHT - bottom - height, viewwidth, height, RenderTarget, false); + PolyTriangleDrawer::SetViewport(Threads.MainThread()->DrawQueue, viewwindowx, SCREENHEIGHT - bottom - height, viewwidth, height, RenderTarget); } else // Rendering to camera texture { - PolyTriangleDrawer::SetViewport(Threads.MainThread()->DrawQueue, 0, 0, RenderTarget->GetWidth(), RenderTarget->GetHeight(), RenderTarget, false); + PolyTriangleDrawer::SetViewport(Threads.MainThread()->DrawQueue, 0, 0, RenderTarget->GetWidth(), RenderTarget->GetHeight(), RenderTarget); } } diff --git a/src/swrenderer/drawers/r_draw.cpp b/src/swrenderer/drawers/r_draw.cpp index 9049c949a..8e260daa7 100644 --- a/src/swrenderer/drawers/r_draw.cpp +++ b/src/swrenderer/drawers/r_draw.cpp @@ -272,7 +272,7 @@ namespace swrenderer void Execute(DrawerThread *thread) override { auto zbuffer = PolyZBuffer::Instance(); - int pitch = PolyStencilBuffer::Instance()->BlockWidth() * 8; + int pitch = PolyStencilBuffer::Instance()->Width(); float *values = zbuffer->Values() + y * pitch + x; int cnt = count; @@ -316,7 +316,7 @@ namespace swrenderer return; auto zbuffer = PolyZBuffer::Instance(); - int pitch = PolyStencilBuffer::Instance()->BlockWidth() * 8; + int pitch = PolyStencilBuffer::Instance()->Width(); float *values = zbuffer->Values() + y * pitch; int end = x2; diff --git a/src/swrenderer/scene/r_scene.cpp b/src/swrenderer/scene/r_scene.cpp index a52d271a3..d51eef63a 100644 --- a/src/swrenderer/scene/r_scene.cpp +++ b/src/swrenderer/scene/r_scene.cpp @@ -259,7 +259,7 @@ namespace swrenderer thread->OpaquePass->ResetFakingUnderwater(); // [RH] Hack to make windows into underwater areas possible thread->Portal->SetMainPortal(); - PolyTriangleDrawer::SetViewport(thread->DrawQueue, viewwindowx, viewwindowy, viewwidth, viewheight, thread->Viewport->RenderTarget, true); + PolyTriangleDrawer::SetViewport(thread->DrawQueue, viewwindowx, viewwindowy, viewwidth, viewheight, thread->Viewport->RenderTarget); // Cull things outside the range seen by this thread VisibleSegmentRenderer visitor; From dea1d0259d591c2f5cd0c23a1199af5dfe62a1c8 Mon Sep 17 00:00:00 2001 From: Marisa Kirisame Date: Sun, 3 Jun 2018 02:27:28 +0200 Subject: [PATCH 07/17] Swap front face culling for GL model drawer (CCW should be the default). Added mirroring handling to software models. --- src/gl/models/gl_models.cpp | 4 ++-- src/polyrenderer/scene/poly_model.cpp | 4 ++++ src/swrenderer/things/r_model.cpp | 4 ++++ 3 files changed, 10 insertions(+), 2 deletions(-) diff --git a/src/gl/models/gl_models.cpp b/src/gl/models/gl_models.cpp index 5c2521b41..a4360cf07 100644 --- a/src/gl/models/gl_models.cpp +++ b/src/gl/models/gl_models.cpp @@ -65,7 +65,7 @@ void FGLModelRenderer::BeginDrawModel(AActor *actor, FSpriteModelFrame *smf, con if (!(actor->RenderStyle == LegacyRenderStyles[STYLE_Normal]) && !(smf->flags & MDL_DONTCULLBACKFACES)) { glEnable(GL_CULL_FACE); - glFrontFace((mirrored ^ GLPortal::isMirrored()) ? GL_CW : GL_CCW); + glFrontFace((mirrored ^ GLPortal::isMirrored()) ? GL_CCW : GL_CW); } gl_RenderState.mModelMatrix = objectToWorldMatrix; @@ -91,7 +91,7 @@ void FGLModelRenderer::BeginDrawHUDModel(AActor *actor, const VSMatrix &objectTo if (!(actor->RenderStyle == LegacyRenderStyles[STYLE_Normal])) { glEnable(GL_CULL_FACE); - glFrontFace((mirrored ^ GLPortal::isMirrored()) ? GL_CW : GL_CCW); + glFrontFace((mirrored ^ GLPortal::isMirrored()) ? GL_CCW : GL_CW); } gl_RenderState.mModelMatrix = objectToWorldMatrix; diff --git a/src/polyrenderer/scene/poly_model.cpp b/src/polyrenderer/scene/poly_model.cpp index 5e779f703..04a2d7ba2 100644 --- a/src/polyrenderer/scene/poly_model.cpp +++ b/src/polyrenderer/scene/poly_model.cpp @@ -59,12 +59,14 @@ void PolyModelRenderer::BeginDrawModel(AActor *actor, FSpriteModelFrame *smf, co if (actor->RenderStyle == LegacyRenderStyles[STYLE_Normal] || !!(smf->flags & MDL_DONTCULLBACKFACES)) PolyTriangleDrawer::SetTwoSided(Thread->DrawQueue, true); + PolyTriangleDrawer::SetCullCCW(Thread->DrawQueue, !mirrored); } void PolyModelRenderer::EndDrawModel(AActor *actor, FSpriteModelFrame *smf) { if (actor->RenderStyle == LegacyRenderStyles[STYLE_Normal] || !!(smf->flags & MDL_DONTCULLBACKFACES)) PolyTriangleDrawer::SetTwoSided(Thread->DrawQueue, false); + PolyTriangleDrawer::SetCullCCW(Thread->DrawQueue, true); ModelActor = nullptr; } @@ -107,6 +109,7 @@ void PolyModelRenderer::BeginDrawHUDModel(AActor *actor, const VSMatrix &objectT if (actor->RenderStyle == LegacyRenderStyles[STYLE_Normal]) PolyTriangleDrawer::SetTwoSided(Thread->DrawQueue, true); + PolyTriangleDrawer::SetCullCCW(Thread->DrawQueue, !mirrored); } void PolyModelRenderer::EndDrawHUDModel(AActor *actor) @@ -116,6 +119,7 @@ void PolyModelRenderer::EndDrawHUDModel(AActor *actor) if (actor->RenderStyle == LegacyRenderStyles[STYLE_Normal]) PolyTriangleDrawer::SetTwoSided(Thread->DrawQueue, false); + PolyTriangleDrawer::SetCullCCW(Thread->DrawQueue, true); } void PolyModelRenderer::SetInterpolation(double interpolation) diff --git a/src/swrenderer/things/r_model.cpp b/src/swrenderer/things/r_model.cpp index 15966c706..bf9810e71 100644 --- a/src/swrenderer/things/r_model.cpp +++ b/src/swrenderer/things/r_model.cpp @@ -123,12 +123,14 @@ namespace swrenderer if (actor->RenderStyle == LegacyRenderStyles[STYLE_Normal] || !!(smf->flags & MDL_DONTCULLBACKFACES)) PolyTriangleDrawer::SetTwoSided(Thread->DrawQueue, true); + PolyTriangleDrawer::SetCullCCW(Thread->DrawQueue, !mirrored); } void SWModelRenderer::EndDrawModel(AActor *actor, FSpriteModelFrame *smf) { if (actor->RenderStyle == LegacyRenderStyles[STYLE_Normal] || !!(smf->flags & MDL_DONTCULLBACKFACES)) PolyTriangleDrawer::SetTwoSided(Thread->DrawQueue, false); + PolyTriangleDrawer::SetCullCCW(Thread->DrawQueue, true); ModelActor = nullptr; } @@ -192,6 +194,7 @@ namespace swrenderer if (actor->RenderStyle == LegacyRenderStyles[STYLE_Normal]) PolyTriangleDrawer::SetTwoSided(Thread->DrawQueue, true); + PolyTriangleDrawer::SetCullCCW(Thread->DrawQueue, !mirrored); } void SWModelRenderer::EndDrawHUDModel(AActor *actor) @@ -201,6 +204,7 @@ namespace swrenderer if (actor->RenderStyle == LegacyRenderStyles[STYLE_Normal]) PolyTriangleDrawer::SetTwoSided(Thread->DrawQueue, false); + PolyTriangleDrawer::SetCullCCW(Thread->DrawQueue, true); } void SWModelRenderer::SetInterpolation(double interpolation) From f03c02df43227af296dea5321c19ee2c2df7bcad Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Sun, 3 Jun 2018 13:59:40 +0200 Subject: [PATCH 08/17] - fix gamepic render buffer issues --- src/g_game.cpp | 3 --- src/g_game.h | 2 ++ src/gl/renderer/gl_renderer.cpp | 11 +++++++++-- src/gl/renderer/gl_renderer.h | 2 ++ src/gl/scene/gl_scene.cpp | 28 ++++++++++++++++++---------- src/gl/system/gl_framebuffer.cpp | 13 ++++++------- src/gl/system/gl_framebuffer.h | 2 +- src/v_video.cpp | 2 +- src/v_video.h | 2 +- 9 files changed, 40 insertions(+), 25 deletions(-) diff --git a/src/g_game.cpp b/src/g_game.cpp index 1c5388ac1..a73758f1c 100644 --- a/src/g_game.cpp +++ b/src/g_game.cpp @@ -82,9 +82,6 @@ static FRandom pr_dmspawn ("DMSpawn"); static FRandom pr_pspawn ("PlayerSpawn"); -const int SAVEPICWIDTH = 216; -const int SAVEPICHEIGHT = 162; - bool G_CheckDemoStatus (void); void G_ReadDemoTiccmd (ticcmd_t *cmd, int player); void G_WriteDemoTiccmd (ticcmd_t *cmd, int player, int buf); diff --git a/src/g_game.h b/src/g_game.h index 2ac019059..7ac49a413 100644 --- a/src/g_game.h +++ b/src/g_game.h @@ -102,5 +102,7 @@ class AInventory; extern const AInventory *SendItemUse, *SendItemDrop; extern int SendItemDropAmount; +const int SAVEPICWIDTH = 216; +const int SAVEPICHEIGHT = 162; #endif diff --git a/src/gl/renderer/gl_renderer.cpp b/src/gl/renderer/gl_renderer.cpp index 154fae2d6..c753e261e 100644 --- a/src/gl/renderer/gl_renderer.cpp +++ b/src/gl/renderer/gl_renderer.cpp @@ -35,6 +35,7 @@ #include "p_effect.h" #include "d_player.h" #include "a_dynlight.h" +#include "g_game.h" #include "swrenderer/r_swscene.h" #include "hwrenderer/utility/hw_clock.h" @@ -96,6 +97,8 @@ FGLRenderer::FGLRenderer(OpenGLFrameBuffer *fb) mLights = nullptr; mTonemapPalette = nullptr; mBuffers = nullptr; + mScreenBuffers = nullptr; + mSaveBuffers = nullptr; mPresentShader = nullptr; mPresent3dCheckerShader = nullptr; mPresent3dColumnShader = nullptr; @@ -122,7 +125,9 @@ FGLRenderer::FGLRenderer(OpenGLFrameBuffer *fb) void FGLRenderer::Initialize(int width, int height) { - mBuffers = new FGLRenderBuffers(); + mScreenBuffers = new FGLRenderBuffers(); + mSaveBuffers = new FGLRenderBuffers(); + mBuffers = mScreenBuffers; mLinearDepthShader = new FLinearDepthShader(); mDepthBlurShader = new FDepthBlurShader(); mSSAOShader = new FSSAOShader(); @@ -400,7 +405,9 @@ void FGLRenderer::WriteSavePic(player_t *player, FileWriter *file, int width, in void FGLRenderer::BeginFrame() { - buffersActive = GLRenderer->mBuffers->Setup(screen->mScreenViewport.width, screen->mScreenViewport.height, screen->mSceneViewport.width, screen->mSceneViewport.height); + buffersActive = GLRenderer->mScreenBuffers->Setup(screen->mScreenViewport.width, screen->mScreenViewport.height, screen->mSceneViewport.width, screen->mSceneViewport.height); + if (buffersActive) + buffersActive = GLRenderer->mSaveBuffers->Setup(SAVEPICWIDTH, SAVEPICHEIGHT, SAVEPICWIDTH, SAVEPICHEIGHT); } //=========================================================================== diff --git a/src/gl/renderer/gl_renderer.h b/src/gl/renderer/gl_renderer.h index 7cdf3b01d..22b77acef 100644 --- a/src/gl/renderer/gl_renderer.h +++ b/src/gl/renderer/gl_renderer.h @@ -100,6 +100,8 @@ public: int mOldFBID; FGLRenderBuffers *mBuffers; + FGLRenderBuffers *mScreenBuffers; + FGLRenderBuffers *mSaveBuffers; FLinearDepthShader *mLinearDepthShader; FSSAOShader *mSSAOShader; FDepthBlurShader *mDepthBlurShader; diff --git a/src/gl/scene/gl_scene.cpp b/src/gl/scene/gl_scene.cpp index ad7a7e494..d4aea2768 100644 --- a/src/gl/scene/gl_scene.cpp +++ b/src/gl/scene/gl_scene.cpp @@ -656,7 +656,7 @@ sector_t * GLSceneDrawer::RenderViewpoint (AActor * camera, IntRect * bounds, fl SetFixedColormap(camera->player); // reiterate color map for each eye, so night vision goggles work in both eyes const s3d::EyePose * eye = stereo3dMode.getEyePose(eye_ix); eye->SetUp(); - screen->SetOutputViewport(bounds); + screen->SetViewportRects(bounds); Set3DViewport(mainview); GLRenderer->mDrawingScene2D = true; GLRenderer->mCurrentFoV = fov; @@ -713,20 +713,24 @@ sector_t * GLSceneDrawer::RenderViewpoint (AActor * camera, IntRect * bounds, fl void GLSceneDrawer::WriteSavePic (player_t *player, FileWriter *file, int width, int height) { IntRect bounds; + bounds.left = 0; + bounds.top = 0; + bounds.width = width; + bounds.height = height; + + // if GLRenderer->mVBO is persistently mapped we must be sure the GPU finished reading from it before we fill it with new data. + glFinish(); + + // Switch to render buffers dimensioned for the savepic + GLRenderer->mBuffers = GLRenderer->mSaveBuffers; P_FindParticleSubsectors(); // make sure that all recently spawned particles have a valid subsector. - bounds.left=0; - bounds.top=0; - bounds.width=width; - bounds.height=height; - glFlush(); SetFixedColormap(player); gl_RenderState.SetVertexBuffer(GLRenderer->mVBO); GLRenderer->mVBO->Reset(); if (!gl.legacyMode) GLRenderer->mLights->Clear(); - sector_t *viewsector = RenderViewpoint(players[consoleplayer].camera, &bounds, - r_viewpoint.FieldOfView.Degrees, 1.6f, 1.6f, true, false); + sector_t *viewsector = RenderViewpoint(players[consoleplayer].camera, &bounds, r_viewpoint.FieldOfView.Degrees, 1.6f, 1.6f, true, false); glDisable(GL_STENCIL_TEST); gl_RenderState.SetFixedColormap(CM_DEFAULT); gl_RenderState.SetSoftLightLevel(-1); @@ -737,12 +741,16 @@ void GLSceneDrawer::WriteSavePic (player_t *player, FileWriter *file, int width, screen->Draw2D(); } GLRenderer->CopyToBackbuffer(&bounds, false); - glFlush(); - screen->SetOutputViewport(nullptr); + // strictly speaking not needed as the glReadPixels should block until the scene is rendered, but this is to safeguard against shitty drivers + glFinish(); uint8_t * scr = (uint8_t *)M_Malloc(width * height * 3); glReadPixels(0,0,width, height,GL_RGB,GL_UNSIGNED_BYTE,scr); M_CreatePNG (file, scr + ((height-1) * width * 3), NULL, SS_RGB, width, height, -width * 3, Gamma); M_Free(scr); + + // Switch back the screen render buffers + screen->SetViewportRects(nullptr); + GLRenderer->mBuffers = GLRenderer->mScreenBuffers; } diff --git a/src/gl/system/gl_framebuffer.cpp b/src/gl/system/gl_framebuffer.cpp index 81cbd7eeb..02326afd1 100644 --- a/src/gl/system/gl_framebuffer.cpp +++ b/src/gl/system/gl_framebuffer.cpp @@ -136,7 +136,7 @@ void OpenGLFrameBuffer::InitializeState() glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT); GLRenderer->Initialize(GetWidth(), GetHeight()); - SetOutputViewport(nullptr); + SetViewportRects(nullptr); } //========================================================================== @@ -164,14 +164,11 @@ void OpenGLFrameBuffer::Update() int clientHeight = ViewportScaledHeight(initialWidth, initialHeight); if (clientWidth > 0 && clientHeight > 0 && (Width != clientWidth || Height != clientHeight)) { - // Do not call Resize here because it's only for software canvases Width = clientWidth; Height = clientHeight; V_OutputResized(Width, Height); GLRenderer->mVBO->OutputResized(Width, Height); } - - SetOutputViewport(nullptr); } //=========================================================================== @@ -392,10 +389,11 @@ bool OpenGLFrameBuffer::RenderBuffersEnabled() return FGLRenderBuffers::IsEnabled(); } -void OpenGLFrameBuffer::SetOutputViewport(IntRect *bounds) +void OpenGLFrameBuffer::SetViewportRects(IntRect *bounds) { - Super::SetOutputViewport(bounds); - s3d::Stereo3DMode::getCurrentMode().AdjustViewports(); + Super::SetViewportRects(bounds); + if (!bounds) + s3d::Stereo3DMode::getCurrentMode().AdjustViewports(); } @@ -431,6 +429,7 @@ void OpenGLFrameBuffer::SetClearColor(int color) void OpenGLFrameBuffer::BeginFrame() { + SetViewportRects(nullptr); if (GLRenderer != nullptr) GLRenderer->BeginFrame(); } diff --git a/src/gl/system/gl_framebuffer.h b/src/gl/system/gl_framebuffer.h index 898820cc9..9a92c1f44 100644 --- a/src/gl/system/gl_framebuffer.h +++ b/src/gl/system/gl_framebuffer.h @@ -42,7 +42,7 @@ public: void ResetFixedColormap() override; void BeginFrame() override; bool RenderBuffersEnabled() override; - void SetOutputViewport(IntRect *bounds) override; + void SetViewportRects(IntRect *bounds) override; void BlurScene(float amount) override; // Retrieves a buffer containing image data for a screenshot. diff --git a/src/v_video.cpp b/src/v_video.cpp index 1441a1966..0f754d4c3 100644 --- a/src/v_video.cpp +++ b/src/v_video.cpp @@ -1005,7 +1005,7 @@ void DFrameBuffer::WriteSavePic(player_t *player, FileWriter *file, int width, i // //========================================================================== -void DFrameBuffer::SetOutputViewport(IntRect *bounds) +void DFrameBuffer::SetViewportRects(IntRect *bounds) { if (bounds) { diff --git a/src/v_video.h b/src/v_video.h index f88a65e65..66e38fca5 100644 --- a/src/v_video.h +++ b/src/v_video.h @@ -501,7 +501,7 @@ public: // Calculate gamma table void CalcGamma(float gamma, uint8_t gammalookup[256]); - virtual void SetOutputViewport(IntRect *bounds); + virtual void SetViewportRects(IntRect *bounds); int ScreenToWindowX(int x); int ScreenToWindowY(int y); From f74e74ac4b17cc2ad24c5e58dd750a1c276df70d Mon Sep 17 00:00:00 2001 From: Marisa Kirisame Date: Sun, 3 Jun 2018 14:06:59 +0200 Subject: [PATCH 09/17] Mirroring should be flipped on HUD models since the view to world space transform already inverts one axis --- src/gl/models/gl_models.cpp | 2 +- src/polyrenderer/scene/poly_model.cpp | 2 +- src/swrenderer/things/r_model.cpp | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/gl/models/gl_models.cpp b/src/gl/models/gl_models.cpp index a4360cf07..8669d7bab 100644 --- a/src/gl/models/gl_models.cpp +++ b/src/gl/models/gl_models.cpp @@ -91,7 +91,7 @@ void FGLModelRenderer::BeginDrawHUDModel(AActor *actor, const VSMatrix &objectTo if (!(actor->RenderStyle == LegacyRenderStyles[STYLE_Normal])) { glEnable(GL_CULL_FACE); - glFrontFace((mirrored ^ GLPortal::isMirrored()) ? GL_CCW : GL_CW); + glFrontFace((mirrored ^ GLPortal::isMirrored()) ? GL_CW : GL_CCW); } gl_RenderState.mModelMatrix = objectToWorldMatrix; diff --git a/src/polyrenderer/scene/poly_model.cpp b/src/polyrenderer/scene/poly_model.cpp index 04a2d7ba2..1a5aa842f 100644 --- a/src/polyrenderer/scene/poly_model.cpp +++ b/src/polyrenderer/scene/poly_model.cpp @@ -109,7 +109,7 @@ void PolyModelRenderer::BeginDrawHUDModel(AActor *actor, const VSMatrix &objectT if (actor->RenderStyle == LegacyRenderStyles[STYLE_Normal]) PolyTriangleDrawer::SetTwoSided(Thread->DrawQueue, true); - PolyTriangleDrawer::SetCullCCW(Thread->DrawQueue, !mirrored); + PolyTriangleDrawer::SetCullCCW(Thread->DrawQueue, mirrored); } void PolyModelRenderer::EndDrawHUDModel(AActor *actor) diff --git a/src/swrenderer/things/r_model.cpp b/src/swrenderer/things/r_model.cpp index bf9810e71..d3eac67e0 100644 --- a/src/swrenderer/things/r_model.cpp +++ b/src/swrenderer/things/r_model.cpp @@ -194,7 +194,7 @@ namespace swrenderer if (actor->RenderStyle == LegacyRenderStyles[STYLE_Normal]) PolyTriangleDrawer::SetTwoSided(Thread->DrawQueue, true); - PolyTriangleDrawer::SetCullCCW(Thread->DrawQueue, !mirrored); + PolyTriangleDrawer::SetCullCCW(Thread->DrawQueue, mirrored); } void SWModelRenderer::EndDrawHUDModel(AActor *actor) From b5274534d758c99d9244360fe20b0b4a8c4a828e Mon Sep 17 00:00:00 2001 From: "alexey.lysiuk" Date: Sun, 3 Jun 2018 16:05:14 +0300 Subject: [PATCH 10/17] - fixed linking with sanitizer(s) enabled no more unresolved references to various functions --- src/CMakeLists.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 59dd637a2..4645cfcd5 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -310,6 +310,7 @@ if( ZD_CMAKE_COMPILER_IS_GNUCXX_COMPATIBLE ) set( CMAKE_CXX_FLAGS "${SANITIZER_FLAG} ${CMAKE_CXX_FLAGS}" ) set( CMAKE_C_FLAGS "${SANITIZER_FLAG} ${CMAKE_C_FLAGS}" ) + set( CMAKE_EXE_LINKER_FLAGS "${SANITIZER_FLAG} ${CMAKE_EXE_LINKER_FLAGS}" ) if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU" AND CMAKE_CXX_COMPILER_VERSION VERSION_GREATER "4.5") set( CMAKE_C_FLAGS "-Wno-unused-result ${CMAKE_C_FLAGS}" ) From f8f1148c59460ab136e30ad887017ceaae8b892e Mon Sep 17 00:00:00 2001 From: "alexey.lysiuk" Date: Sun, 3 Jun 2018 17:45:02 +0300 Subject: [PATCH 11/17] - fixed mouse cursor positioning in menu for Cocoa backend With video resolution scaling enabled engine coordinates of mouse cursor were wrong --- src/posix/cocoa/i_input.mm | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/src/posix/cocoa/i_input.mm b/src/posix/cocoa/i_input.mm index 745b0c6dc..78841d906 100644 --- a/src/posix/cocoa/i_input.mm +++ b/src/posix/cocoa/i_input.mm @@ -484,11 +484,14 @@ void NSEventToGameMousePosition(NSEvent* inEvent, event_t* outEvent) const NSPoint viewPos = [view convertPointToBacking:windowRect.origin]; const CGFloat frameHeight = I_GetContentViewSize(window).height; - const CGFloat posX = ( viewPos.x - rbOpts.shiftX) / rbOpts.pixelScale; - const CGFloat posY = (frameHeight - viewPos.y - rbOpts.shiftY) / rbOpts.pixelScale; + outEvent->data1 = static_cast( viewPos.x); + outEvent->data2 = static_cast(frameHeight - viewPos.y); - outEvent->data1 = static_cast(posX); - outEvent->data2 = static_cast(posY); + // Compensate letterbox adjustment done by cross-platform code + // More elegant solution is a bit problematic due to HiDPI/Retina support + outEvent->data2 += (screen->GetTrueHeight() - screen->VideoHeight) / 2; + + screen->ScaleCoordsFromWindow(outEvent->data1, outEvent->data2); } void ProcessMouseMoveInMenu(NSEvent* theEvent) From 352f93c0662516f4a9170e1e46912c95118f46a7 Mon Sep 17 00:00:00 2001 From: "alexey.lysiuk" Date: Sun, 3 Jun 2018 17:46:23 +0300 Subject: [PATCH 12/17] - deleted now useless code from Cocoa backend Let's rely on cross-platform code to manage window dimensions and screen resolution --- src/posix/cocoa/i_common.h | 16 ---------------- src/posix/cocoa/i_video.mm | 27 --------------------------- 2 files changed, 43 deletions(-) diff --git a/src/posix/cocoa/i_common.h b/src/posix/cocoa/i_common.h index be395cf76..f60d82ced 100644 --- a/src/posix/cocoa/i_common.h +++ b/src/posix/cocoa/i_common.h @@ -37,22 +37,6 @@ #import -struct RenderBufferOptions -{ - float pixelScale; - - float shiftX; - float shiftY; - - float width; - float height; - - bool dirty; -}; - -extern RenderBufferOptions rbOpts; - - // Version of AppKit framework we are interested in // The following values are needed to build with earlier SDKs diff --git a/src/posix/cocoa/i_video.mm b/src/posix/cocoa/i_video.mm index 5104a043c..8593f86e7 100644 --- a/src/posix/cocoa/i_video.mm +++ b/src/posix/cocoa/i_video.mm @@ -115,9 +115,6 @@ CUSTOM_CVAR(Bool, vid_autoswitch, true, CVAR_ARCHIVE | CVAR_GLOBALCONFIG | CVAR_ EXTERN_CVAR(Bool, gl_smooth_rendered) -RenderBufferOptions rbOpts; - - // --------------------------------------------------------------------------- @@ -544,20 +541,6 @@ void CocoaVideo::SetFullscreenMode(const int width, const int height) ? [screen convertRectToBacking:screenFrame] : screenFrame; - const float displayWidth = displayRect.size.width; - const float displayHeight = displayRect.size.height; - - const float pixelScaleFactorX = displayWidth / static_cast(width ); - const float pixelScaleFactorY = displayHeight / static_cast(height); - - rbOpts.pixelScale = MIN(pixelScaleFactorX, pixelScaleFactorY); - - rbOpts.width = width * rbOpts.pixelScale; - rbOpts.height = height * rbOpts.pixelScale; - - rbOpts.shiftX = (displayWidth - rbOpts.width ) / 2.0f; - rbOpts.shiftY = (displayHeight - rbOpts.height) / 2.0f; - if (!m_fullscreen) { [m_window setLevel:LEVEL_FULLSCREEN]; @@ -571,14 +554,6 @@ void CocoaVideo::SetFullscreenMode(const int width, const int height) void CocoaVideo::SetWindowedMode(const int width, const int height) { - rbOpts.pixelScale = 1.0f; - - rbOpts.width = static_cast(width ); - rbOpts.height = static_cast(height); - - rbOpts.shiftX = 0.0f; - rbOpts.shiftY = 0.0f; - const NSSize windowPixelSize = NSMakeSize(width, height); const NSSize windowSize = vid_hidpi ? [[m_window contentView] convertSizeFromBacking:windowPixelSize] @@ -621,8 +596,6 @@ void CocoaVideo::SetMode(const int width, const int height, const bool fullscree SetWindowedMode(width, height); } - rbOpts.dirty = true; - const NSSize viewSize = I_GetContentViewSize(m_window); glViewport(0, 0, static_cast(viewSize.width), static_cast(viewSize.height)); From a851a5d1514f4d48fa2fa56c4982e072e1fa012d Mon Sep 17 00:00:00 2001 From: Christoph Oelckers Date: Sun, 3 Jun 2018 17:49:00 +0200 Subject: [PATCH 13/17] - fixed: For melee attacks with a short attack range P_AimLineAttack must check for hits from above and below. This is necessary to be in line with P_LineAttack which does check for those. --- src/p_actionfunctions.cpp | 2 +- src/p_map.cpp | 96 ++++++++++++++++++++- wadsrc/static/zscript/doom/weaponfist.txt | 2 +- wadsrc/static/zscript/hexen/clericmace.txt | 4 +- wadsrc/static/zscript/hexen/fighteraxe.txt | 4 +- wadsrc/static/zscript/hexen/fighterfist.txt | 4 +- 6 files changed, 100 insertions(+), 12 deletions(-) diff --git a/src/p_actionfunctions.cpp b/src/p_actionfunctions.cpp index 2af237b8a..a7a12f9f1 100644 --- a/src/p_actionfunctions.cpp +++ b/src/p_actionfunctions.cpp @@ -1997,7 +1997,7 @@ DEFINE_ACTION_FUNCTION(AStateProvider, A_CustomPunch) angle = self->Angles.Yaw + pr_cwpunch.Random2() * (5.625 / 256); if (range == 0) range = DEFMELEERANGE; - pitch = P_AimLineAttack (self, angle, range, &t); + pitch = P_AimLineAttack (self, angle, range, &t, 0., ALF_CHECK3D); // only use ammo when actually hitting something! if ((flags & CPF_USEAMMO) && t.linetarget && weapon && ACTION_CALL_FROM_PSPRITE()) diff --git a/src/p_map.cpp b/src/p_map.cpp index 89eae7859..8a7134967 100644 --- a/src/p_map.cpp +++ b/src/p_map.cpp @@ -3998,6 +3998,70 @@ struct aim_t SetResult(thing_other, newtrace.thing_other); } + //============================================================================ + // + // Finds where the trace exits an actor to check for hits from above/below + // + //============================================================================ + + double ExitPoint(AActor *thing) + { + // The added check at the exit point only has some value if a 3D distance check is involved + if (!(flags & ALF_CHECK3D)) return -1; + + divline_t trace = { startpos.X, startpos.Y, aimtrace.X, aimtrace.Y }; + divline_t line; + + for (int i = 0; i < 4; ++i) + { + switch (i) + { + case 0: // Top edge + line.y = thing->Y() + thing->radius; + if (trace.y > line.y) continue; + line.x = thing->X() + thing->radius; + line.dx = -thing->radius * 2; + line.dy = 0; + break; + + case 1: // Right edge + line.x = thing->X() + thing->radius; + if (trace.x > line.x) continue; + line.y = thing->Y() - thing->radius; + line.dx = 0; + line.dy = thing->radius * 2; + break; + + case 2: // Bottom edge + line.y = thing->Y() - thing->radius; + if (trace.y < line.y) continue; + line.x = thing->X() - thing->radius; + line.dx = thing->radius * 2; + line.dy = 0; + break; + + case 3: // Left edge + line.x = thing->X() - thing->radius; + if (trace.x < line.x) continue; + line.y = thing->Y() + thing->radius; + line.dx = 0; + line.dy = thing->radius * -2; + break; + } + + // If it is, see if the trace crosses it + if (P_PointOnDivlineSide(line.x, line.y, &trace) != + P_PointOnDivlineSide(line.x + line.dx, line.y + line.dy, &trace)) + { + // It's a hit + double frac = P_InterceptVector(&trace, &line); + if (frac > 1.) frac = 1.; + return frac; + } + } + + return -1.; + } //============================================================================ // @@ -4047,9 +4111,7 @@ struct aim_t intercept_t *in; if (aimdebug) - Printf("Start AimTraverse, start = %f,%f,%f, vect = %f,%f\n", - startpos.X / 65536., startpos.Y / 65536., startpos.Z / 65536., - aimtrace.X / 65536., aimtrace.Y / 65536.); + Printf("Start AimTraverse, start = %f,%f,%f, vect = %f,%f\n", startpos.X, startpos.Y, startpos.Z, aimtrace.X, aimtrace.Y); while ((in = it.Next())) { @@ -4195,12 +4257,38 @@ struct aim_t thingtoppitch = -VecToAngle(dist, th->Top() - shootz); if (thingtoppitch > bottompitch) - continue; // shot over the thing + { + // Check for a hit from above + if (shootz > th->Top()) + { + double exitfrac = ExitPoint(th); + if (exitfrac > 0.) + { + double exitdist = attackrange * exitfrac; + thingtoppitch = -VecToAngle(exitdist, th->Top() - shootz); + if (thingtoppitch > bottompitch) continue; + } + } + else continue; // shot over the thing + } thingbottompitch = -VecToAngle(dist, th->Z() - shootz); if (thingbottompitch < toppitch) + { + // Check for a hit from below + if (shootz < th->Z()) + { + double exitfrac = ExitPoint(th); + if (exitfrac > 0.) + { + double exitdist = attackrange * exitfrac; + thingbottompitch = -VecToAngle(exitdist, th->Z() - shootz); + if (thingbottompitch < toppitch) continue; + } + } continue; // shot under the thing + } if (crossedffloors) { diff --git a/wadsrc/static/zscript/doom/weaponfist.txt b/wadsrc/static/zscript/doom/weaponfist.txt index 6f106e14a..ae0eedddc 100644 --- a/wadsrc/static/zscript/doom/weaponfist.txt +++ b/wadsrc/static/zscript/doom/weaponfist.txt @@ -65,7 +65,7 @@ extend class Actor damage *= 10; double ang = angle + Random2[Punch]() * (5.625 / 256); - double pitch = AimLineAttack (ang, DEFMELEERANGE); + double pitch = AimLineAttack (ang, DEFMELEERANGE, null, 0., ALF_CHECK3D); LineAttack (ang, DEFMELEERANGE, pitch, damage, 'Melee', "BulletPuff", LAF_ISMELEEATTACK, t); diff --git a/wadsrc/static/zscript/hexen/clericmace.txt b/wadsrc/static/zscript/hexen/clericmace.txt index 2198e3cc4..adce6431a 100644 --- a/wadsrc/static/zscript/hexen/clericmace.txt +++ b/wadsrc/static/zscript/hexen/clericmace.txt @@ -66,7 +66,7 @@ class CWeapMace : ClericWeapon for (int j = 1; j >= -1; j -= 2) { double ang = angle + j*i*(45. / 16); - double slope = AimLineAttack(ang, 2 * DEFMELEERANGE, t); + double slope = AimLineAttack(ang, 2 * DEFMELEERANGE, t, 0., ALF_CHECK3D); if (t.linetarget) { LineAttack(ang, 2 * DEFMELEERANGE, slope, damage, 'Melee', "HammerPuff", true, t); @@ -81,7 +81,7 @@ class CWeapMace : ClericWeapon // didn't find any creatures, so try to strike any walls weaponspecial = 0; - double slope = AimLineAttack (angle, DEFMELEERANGE); + double slope = AimLineAttack (angle, DEFMELEERANGE, null, 0., ALF_CHECK3D); LineAttack (angle, DEFMELEERANGE, slope, damage, 'Melee', "HammerPuff"); } } diff --git a/wadsrc/static/zscript/hexen/fighteraxe.txt b/wadsrc/static/zscript/hexen/fighteraxe.txt index 61e509744..a89bd7e5b 100644 --- a/wadsrc/static/zscript/hexen/fighteraxe.txt +++ b/wadsrc/static/zscript/hexen/fighteraxe.txt @@ -245,7 +245,7 @@ class FWeapAxe : FighterWeapon for (int j = 1; j >= -1; j -= 2) { double ang = angle + j*i*(45. / 16); - double slope = AimLineAttack(ang, AXERANGE, t); + double slope = AimLineAttack(ang, AXERANGE, t, 0., ALF_CHECK3D); if (t.linetarget) { LineAttack(ang, AXERANGE, slope, damage, 'Melee', pufftype, true, t); @@ -273,7 +273,7 @@ class FWeapAxe : FighterWeapon // didn't find any creatures, so try to strike any walls self.weaponspecial = 0; - double slope = AimLineAttack (angle, DEFMELEERANGE); + double slope = AimLineAttack (angle, DEFMELEERANGE, null, 0., ALF_CHECK3D); LineAttack (angle, DEFMELEERANGE, slope, damage, 'Melee', pufftype, true); } } diff --git a/wadsrc/static/zscript/hexen/fighterfist.txt b/wadsrc/static/zscript/hexen/fighterfist.txt index 4975bbd62..dd5f02095 100644 --- a/wadsrc/static/zscript/hexen/fighterfist.txt +++ b/wadsrc/static/zscript/hexen/fighterfist.txt @@ -56,7 +56,7 @@ class FWeapFist : FighterWeapon Class pufftype; FTranslatedLineTarget t; - double slope = AimLineAttack (angle, 2*DEFMELEERANGE, t); + double slope = AimLineAttack (angle, 2*DEFMELEERANGE, t, 0., ALF_CHECK3D); if (t.linetarget != null) { if (++weaponspecial >= 3) @@ -117,7 +117,7 @@ class FWeapFist : FighterWeapon // didn't find any creatures, so try to strike any walls weaponspecial = 0; - double slope = AimLineAttack (angle, DEFMELEERANGE); + double slope = AimLineAttack (angle, DEFMELEERANGE, null, 0., ALF_CHECK3D); LineAttack (angle, DEFMELEERANGE, slope, damage, 'Melee', "PunchPuff", true); } From 23fce56b5e14615991de9937dcebc2c740eac6b9 Mon Sep 17 00:00:00 2001 From: Christoph Oelckers Date: Sun, 3 Jun 2018 18:20:50 +0200 Subject: [PATCH 14/17] - fix memory arena allocation alignment for 32 bit systems. --- src/memarena.cpp | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/memarena.cpp b/src/memarena.cpp index d19521edf..2c0336a5e 100644 --- a/src/memarena.cpp +++ b/src/memarena.cpp @@ -55,13 +55,14 @@ struct FMemArena::Block // // RoundPointer // -// Rounds a pointer up to a pointer-sized boundary. +// Rounds a pointer up to the size of the largest integral type. // //========================================================================== static inline void *RoundPointer(void *ptr) { - return (void *)(((size_t)ptr + sizeof(void*) - 1) & ~(sizeof(void*) - 1)); + const auto roundsize = std::max(sizeof(void*), sizeof(double)); + return (void *)(((size_t)ptr + roundsize - 1) & ~(roundsize - 1)); } //========================================================================== From d425fb2d4ab1f38fa1db4f5995b2aa5636195bb1 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Sun, 3 Jun 2018 18:36:37 +0200 Subject: [PATCH 15/17] - replace the old rect drawers with new ones based on render styles --- src/polyrenderer/drawers/poly_draw_args.cpp | 59 +- src/polyrenderer/drawers/poly_draw_args.h | 14 +- src/polyrenderer/drawers/poly_drawer32.h | 476 -------------- src/polyrenderer/drawers/poly_drawer32_sse2.h | 518 --------------- src/polyrenderer/drawers/poly_drawer8.h | 295 --------- src/polyrenderer/drawers/screen_triangle.cpp | 620 ++++++++++++++++-- src/polyrenderer/drawers/screen_triangle.h | 57 -- 7 files changed, 603 insertions(+), 1436 deletions(-) delete mode 100644 src/polyrenderer/drawers/poly_drawer32.h delete mode 100644 src/polyrenderer/drawers/poly_drawer32_sse2.h delete mode 100644 src/polyrenderer/drawers/poly_drawer8.h diff --git a/src/polyrenderer/drawers/poly_draw_args.cpp b/src/polyrenderer/drawers/poly_draw_args.cpp index 8aa79f70b..3e02940cd 100644 --- a/src/polyrenderer/drawers/poly_draw_args.cpp +++ b/src/polyrenderer/drawers/poly_draw_args.cpp @@ -226,6 +226,7 @@ void RectDrawArgs::SetTexture(FTexture *texture, FRenderStyle style) void RectDrawArgs::SetTexture(FTexture *texture, uint32_t translationID, FRenderStyle style) { + // Alphatexture overrides translations. if (translationID != 0xffffffff && translationID != 0 && !(style.Flags & STYLEF_RedIsAlpha)) { FRemapTable *table = TranslationToTable(translationID); @@ -299,61 +300,63 @@ void RectDrawArgs::Draw(PolyRenderThread *thread, double x0, double x1, double y thread->DrawQueue->Push(*this); } -void RectDrawArgs::SetStyle(FRenderStyle renderstyle, double alpha, uint32_t fillcolor, uint32_t translationID, FTexture *tex, bool fullbright) +void RectDrawArgs::SetStyle(const FRenderStyle &renderstyle, double alpha, uint32_t fillcolor, uint32_t translationID, FTexture *tex, bool fullbright) { SetTexture(tex, translationID, renderstyle); + SetColor(0xff000000 | fillcolor, fillcolor >> 24); if (renderstyle == LegacyRenderStyles[STYLE_Normal] || (r_drawfuzz == 0 && renderstyle == LegacyRenderStyles[STYLE_OptFuzzy])) { - SetStyle(Translation() ? RectBlendMode::TranslatedAdd : RectBlendMode::TextureAdd, 1.0, 0.0); + SetStyle(Translation() ? TriBlendMode::NormalTranslated : TriBlendMode::Normal, alpha); } else if (renderstyle == LegacyRenderStyles[STYLE_Add] && fullbright && alpha == 1.0 && !Translation()) { - SetStyle(RectBlendMode::TextureAddSrcColor, 1.0, 1.0); - } - else if (renderstyle == LegacyRenderStyles[STYLE_Add]) - { - SetStyle(Translation() ? RectBlendMode::TranslatedAdd : RectBlendMode::TextureAdd, alpha, 1.0); - } - else if (renderstyle == LegacyRenderStyles[STYLE_Subtract]) - { - SetStyle(Translation() ? RectBlendMode::TranslatedRevSub : RectBlendMode::TextureRevSub, alpha, 1.0); + SetStyle(TriBlendMode::SrcColor, alpha); } else if (renderstyle == LegacyRenderStyles[STYLE_SoulTrans]) { - SetStyle(Translation() ? RectBlendMode::TranslatedAdd : RectBlendMode::TextureAdd, transsouls, 1.0 - transsouls); + SetStyle(Translation() ? TriBlendMode::AddTranslated : TriBlendMode::Add, transsouls); } else if (renderstyle == LegacyRenderStyles[STYLE_Fuzzy] || (r_drawfuzz == 1 && renderstyle == LegacyRenderStyles[STYLE_OptFuzzy])) { SetColor(0xff000000, 0); - SetStyle(RectBlendMode::Fuzz); + SetStyle(TriBlendMode::Fuzzy); } else if (renderstyle == LegacyRenderStyles[STYLE_Shadow] || (r_drawfuzz == 2 && renderstyle == LegacyRenderStyles[STYLE_OptFuzzy])) { - SetStyle(Translation() ? RectBlendMode::TranslatedAdd : RectBlendMode::TextureAdd, 0.0, 160 / 255.0); + SetColor(0xff000000, 0); + SetStyle(Translation() ? TriBlendMode::TranslucentStencilTranslated : TriBlendMode::TranslucentStencil, 1.0 - 160 / 255.0); } - else if (renderstyle == LegacyRenderStyles[STYLE_TranslucentStencil]) + else if (renderstyle == LegacyRenderStyles[STYLE_Stencil]) { - SetColor(0xff000000 | fillcolor, fillcolor >> 24); - SetStyle(RectBlendMode::Stencil, alpha, 1.0 - alpha); + SetStyle(Translation() ? TriBlendMode::StencilTranslated : TriBlendMode::Stencil, alpha); } - else if (renderstyle == LegacyRenderStyles[STYLE_AddStencil]) + else if (renderstyle == LegacyRenderStyles[STYLE_Translucent]) { - SetColor(0xff000000 | fillcolor, fillcolor >> 24); - SetStyle(RectBlendMode::AddStencil, alpha, 1.0); + SetStyle(Translation() ? TriBlendMode::TranslucentTranslated : TriBlendMode::Translucent, alpha); + } + else if (renderstyle == LegacyRenderStyles[STYLE_Add]) + { + SetStyle(Translation() ? TriBlendMode::AddTranslated : TriBlendMode::Add, alpha); } else if (renderstyle == LegacyRenderStyles[STYLE_Shaded]) { - SetColor(0xff000000 | fillcolor, fillcolor >> 24); - SetStyle(RectBlendMode::Shaded, alpha, 1.0 - alpha); + SetStyle(Translation() ? TriBlendMode::ShadedTranslated : TriBlendMode::Shaded, alpha); + } + else if (renderstyle == LegacyRenderStyles[STYLE_TranslucentStencil]) + { + SetStyle(Translation() ? TriBlendMode::TranslucentStencilTranslated : TriBlendMode::TranslucentStencil, alpha); + } + else if (renderstyle == LegacyRenderStyles[STYLE_Subtract]) + { + SetStyle(Translation() ? TriBlendMode::SubtractTranslated : TriBlendMode::Subtract, alpha); + } + else if (renderstyle == LegacyRenderStyles[STYLE_AddStencil]) + { + SetStyle(Translation() ? TriBlendMode::AddStencilTranslated : TriBlendMode::AddStencil, alpha); } else if (renderstyle == LegacyRenderStyles[STYLE_AddShaded]) { - SetColor(0xff000000 | fillcolor, fillcolor >> 24); - SetStyle(RectBlendMode::AddShaded, alpha, 1.0); - } - else - { - SetStyle(Translation() ? RectBlendMode::TranslatedAdd : RectBlendMode::TextureAdd, alpha, 1.0 - alpha); + SetStyle(Translation() ? TriBlendMode::AddShadedTranslated : TriBlendMode::AddShaded, alpha); } } diff --git a/src/polyrenderer/drawers/poly_draw_args.h b/src/polyrenderer/drawers/poly_draw_args.h index 88f174525..2d43ae64f 100644 --- a/src/polyrenderer/drawers/poly_draw_args.h +++ b/src/polyrenderer/drawers/poly_draw_args.h @@ -180,8 +180,8 @@ public: void SetTexture(FTexture *texture, FRenderStyle style); void SetTexture(FTexture *texture, uint32_t translationID, FRenderStyle style); void SetLight(FSWColormap *basecolormap, uint32_t lightlevel); - void SetStyle(RectBlendMode blendmode, double srcalpha = 1.0, double destalpha = 1.0) { mBlendMode = blendmode; mSrcAlpha = (uint32_t)(srcalpha * 256.0 + 0.5); mDestAlpha = (uint32_t)(destalpha * 256.0 + 0.5); } - void SetStyle(FRenderStyle renderstyle, double alpha, uint32_t fillcolor, uint32_t translationID, FTexture *texture, bool fullbright); + void SetStyle(TriBlendMode blendmode, double alpha = 1.0) { mBlendMode = blendmode; mAlpha = (uint32_t)(alpha * 256.0 + 0.5); } + void SetStyle(const FRenderStyle &renderstyle, double alpha, uint32_t fillcolor, uint32_t translationID, FTexture *texture, bool fullbright); void SetColor(uint32_t bgra, uint8_t palindex); void Draw(PolyRenderThread *thread, double x0, double x1, double y0, double y1, double u0, double u1, double v0, double v1); @@ -191,10 +191,9 @@ public: int TextureHeight() const { return mTextureHeight; } const uint8_t *Translation() const { return mTranslation; } - RectBlendMode BlendMode() const { return mBlendMode; } + TriBlendMode BlendMode() const { return mBlendMode; } uint32_t Color() const { return mColor; } - uint32_t SrcAlpha() const { return mSrcAlpha; } - uint32_t DestAlpha() const { return mDestAlpha; } + uint32_t Alpha() const { return mAlpha; } uint32_t Light() const { return mLight; } const uint8_t *BaseColormap() const { return mColormaps; } @@ -225,11 +224,10 @@ private: int mTextureHeight = 0; const uint8_t *mTranslation = nullptr; const uint8_t *mColormaps = nullptr; - RectBlendMode mBlendMode = RectBlendMode::FillOpaque; + TriBlendMode mBlendMode = TriBlendMode::Fill; uint32_t mLight = 0; uint32_t mColor = 0; - uint32_t mSrcAlpha = 0; - uint32_t mDestAlpha = 0; + uint32_t mAlpha = 0; uint16_t mLightAlpha = 0; uint16_t mLightRed = 0; uint16_t mLightGreen = 0; diff --git a/src/polyrenderer/drawers/poly_drawer32.h b/src/polyrenderer/drawers/poly_drawer32.h deleted file mode 100644 index 4dae396de..000000000 --- a/src/polyrenderer/drawers/poly_drawer32.h +++ /dev/null @@ -1,476 +0,0 @@ -/* -** Polygon Doom software renderer -** Copyright (c) 2016 Magnus Norddahl -** -** This software is provided 'as-is', without any express or implied -** warranty. In no event will the authors be held liable for any damages -** arising from the use of this software. -** -** Permission is granted to anyone to use this software for any purpose, -** including commercial applications, and to alter it and redistribute it -** freely, subject to the following restrictions: -** -** 1. The origin of this software must not be misrepresented; you must not -** claim that you wrote the original software. If you use this software -** in a product, an acknowledgment in the product documentation would be -** appreciated but is not required. -** 2. Altered source versions must be plainly marked as such, and must not be -** misrepresented as being the original software. -** 3. This notice may not be removed or altered from any source distribution. -** -*/ - -#pragma once - -#include "screen_triangle.h" - -namespace TriScreenDrawerModes -{ - namespace - { - struct BgraColor - { - uint32_t b, g, r, a; - BgraColor() { } - BgraColor(uint32_t c) : b(BPART(c)), g(GPART(c)), r(RPART(c)), a(APART(c)) { } - BgraColor &operator=(uint32_t c) { b = BPART(c); g = GPART(c); r = RPART(c); a = APART(c); return *this; } - operator uint32_t() const { return MAKEARGB(a, r, g, b); } - }; - } - - template - FORCEINLINE unsigned int Sample32(int32_t u, int32_t v, const uint32_t *texPixels, int texWidth, int texHeight, uint32_t oneU, uint32_t oneV, uint32_t color, const uint32_t *translation) - { - uint32_t texel; - if (SamplerT::Mode == (int)Samplers::Shaded || SamplerT::Mode == (int)Samplers::Stencil || SamplerT::Mode == (int)Samplers::Fill || SamplerT::Mode == (int)Samplers::Fuzz || SamplerT::Mode == (int)Samplers::FogBoundary) - { - return color; - } - else if (SamplerT::Mode == (int)Samplers::Translated) - { - const uint8_t *texpal = (const uint8_t *)texPixels; - uint32_t texelX = ((((uint32_t)u << 8) >> 16) * texWidth) >> 16; - uint32_t texelY = ((((uint32_t)v << 8) >> 16) * texHeight) >> 16; - return translation[texpal[texelX * texHeight + texelY]]; - } - else if (FilterModeT::Mode == (int)FilterModes::Nearest) - { - uint32_t texelX = ((((uint32_t)u << 8) >> 16) * texWidth) >> 16; - uint32_t texelY = ((((uint32_t)v << 8) >> 16) * texHeight) >> 16; - texel = texPixels[texelX * texHeight + texelY]; - } - else - { - u -= oneU >> 1; - v -= oneV >> 1; - - unsigned int frac_x0 = (((uint32_t)u << 8) >> FRACBITS) * texWidth; - unsigned int frac_x1 = ((((uint32_t)u << 8) + oneU) >> FRACBITS) * texWidth; - unsigned int frac_y0 = (((uint32_t)v << 8) >> FRACBITS) * texHeight; - unsigned int frac_y1 = ((((uint32_t)v << 8) + oneV) >> FRACBITS) * texHeight; - unsigned int x0 = frac_x0 >> FRACBITS; - unsigned int x1 = frac_x1 >> FRACBITS; - unsigned int y0 = frac_y0 >> FRACBITS; - unsigned int y1 = frac_y1 >> FRACBITS; - - unsigned int p00 = texPixels[x0 * texHeight + y0]; - unsigned int p01 = texPixels[x0 * texHeight + y1]; - unsigned int p10 = texPixels[x1 * texHeight + y0]; - unsigned int p11 = texPixels[x1 * texHeight + y1]; - - unsigned int inv_a = (frac_x1 >> (FRACBITS - 4)) & 15; - unsigned int inv_b = (frac_y1 >> (FRACBITS - 4)) & 15; - unsigned int a = 16 - inv_a; - unsigned int b = 16 - inv_b; - - unsigned int sred = (RPART(p00) * (a * b) + RPART(p01) * (inv_a * b) + RPART(p10) * (a * inv_b) + RPART(p11) * (inv_a * inv_b) + 127) >> 8; - unsigned int sgreen = (GPART(p00) * (a * b) + GPART(p01) * (inv_a * b) + GPART(p10) * (a * inv_b) + GPART(p11) * (inv_a * inv_b) + 127) >> 8; - unsigned int sblue = (BPART(p00) * (a * b) + BPART(p01) * (inv_a * b) + BPART(p10) * (a * inv_b) + BPART(p11) * (inv_a * inv_b) + 127) >> 8; - unsigned int salpha = (APART(p00) * (a * b) + APART(p01) * (inv_a * b) + APART(p10) * (a * inv_b) + APART(p11) * (inv_a * inv_b) + 127) >> 8; - - texel = (salpha << 24) | (sred << 16) | (sgreen << 8) | sblue; - } - - if (SamplerT::Mode == (int)Samplers::Skycap) - { - int start_fade = 2; // How fast it should fade out - - int alpha_top = clamp(v >> (16 - start_fade), 0, 256); - int alpha_bottom = clamp(((2 << 24) - v) >> (16 - start_fade), 0, 256); - int a = MIN(alpha_top, alpha_bottom); - int inv_a = 256 - a; - - uint32_t r = RPART(texel); - uint32_t g = GPART(texel); - uint32_t b = BPART(texel); - uint32_t fg_a = APART(texel); - uint32_t bg_red = RPART(color); - uint32_t bg_green = GPART(color); - uint32_t bg_blue = BPART(color); - r = (r * a + bg_red * inv_a + 127) >> 8; - g = (g * a + bg_green * inv_a + 127) >> 8; - b = (b * a + bg_blue * inv_a + 127) >> 8; - return MAKEARGB(fg_a, r, g, b); - } - else - { - return texel; - } - } - - template - FORCEINLINE unsigned int SampleShade32(int32_t u, int32_t v, const uint32_t *texPixels, int texWidth, int texHeight, int x, int y) - { - if (SamplerT::Mode == (int)Samplers::Shaded) - { - const uint8_t *texpal = (const uint8_t *)texPixels; - uint32_t texelX = ((((uint32_t)u << 8) >> 16) * texWidth) >> 16; - uint32_t texelY = ((((uint32_t)v << 8) >> 16) * texHeight) >> 16; - unsigned int sampleshadeout = texpal[texelX * texHeight + texelY]; - sampleshadeout += sampleshadeout >> 7; // 255 -> 256 - return sampleshadeout; - } - else if (SamplerT::Mode == (int)Samplers::Stencil) - { - uint32_t texelX = ((((uint32_t)u << 8) >> 16) * texWidth) >> 16; - uint32_t texelY = ((((uint32_t)v << 8) >> 16) * texHeight) >> 16; - unsigned int sampleshadeout = APART(texPixels[texelX * texHeight + texelY]); - sampleshadeout += sampleshadeout >> 7; // 255 -> 256 - return sampleshadeout; - } - else if (SamplerT::Mode == (int)Samplers::Fuzz) - { - using namespace swrenderer; - - uint32_t texelX = ((((uint32_t)u << 8) >> 16) * texWidth) >> 16; - uint32_t texelY = ((((uint32_t)v << 8) >> 16) * texHeight) >> 16; - unsigned int sampleshadeout = APART(texPixels[texelX * texHeight + texelY]); - sampleshadeout += sampleshadeout >> 7; // 255 -> 256 - - fixed_t fuzzscale = (200 << FRACBITS) / viewheight; - - int scaled_x = (x * fuzzscale) >> FRACBITS; - int fuzz_x = fuzz_random_x_offset[scaled_x % FUZZ_RANDOM_X_SIZE] + fuzzpos; - - fixed_t fuzzcount = FUZZTABLE << FRACBITS; - fixed_t fuzz = ((fuzz_x << FRACBITS) + y * fuzzscale) % fuzzcount; - unsigned int alpha = fuzzoffset[fuzz >> FRACBITS]; - - sampleshadeout = (sampleshadeout * alpha) >> 5; - return sampleshadeout; - } - else - { - return 0; - } - } - - FORCEINLINE BgraColor VECTORCALL AddLights(BgraColor material, BgraColor fgcolor, BgraColor dynlight) - { - fgcolor.r = MIN(fgcolor.r + ((material.r * dynlight.r) >> 8), (uint32_t)255); - fgcolor.g = MIN(fgcolor.g + ((material.g * dynlight.g) >> 8), (uint32_t)255); - fgcolor.b = MIN(fgcolor.b + ((material.b * dynlight.b) >> 8), (uint32_t)255); - return fgcolor; - } - - FORCEINLINE BgraColor VECTORCALL CalcDynamicLight(const PolyLight *lights, int num_lights, FVector3 worldpos, FVector3 worldnormal, uint32_t dynlightcolor) - { - BgraColor lit = dynlightcolor; - - for (int i = 0; i != num_lights; i++) - { - FVector3 lightpos = { lights[i].x, lights[i].y, lights[i].z }; - float light_radius = lights[i].radius; - - bool is_attenuated = light_radius < 0.0f; - if (is_attenuated) - light_radius = -light_radius; - - // L = light-pos - // dist = sqrt(dot(L, L)) - // distance_attenuation = 1 - MIN(dist * (1/radius), 1) - FVector3 L = lightpos - worldpos; - float dist2 = L | L; - float rcp_dist = 1.0f / sqrt(dist2); - float dist = dist2 * rcp_dist; - float distance_attenuation = 256.0f - MIN(dist * light_radius, 256.0f); - - // The simple light type - float simple_attenuation = distance_attenuation; - - // The point light type - // diffuse = max(dot(N,normalize(L)),0) * attenuation - float dotNL = worldnormal | (L * rcp_dist); - float point_attenuation = MAX(dotNL, 0.0f) * distance_attenuation; - - uint32_t attenuation = (uint32_t)(is_attenuated ? (int32_t)point_attenuation : (int32_t)simple_attenuation); - - BgraColor light_color = lights[i].color; - lit.r += (light_color.r * attenuation) >> 8; - lit.g += (light_color.g * attenuation) >> 8; - lit.b += (light_color.b * attenuation) >> 8; - } - - lit.r = MIN(lit.r, (uint32_t)256); - lit.g = MIN(lit.g, (uint32_t)256); - lit.b = MIN(lit.b, (uint32_t)256); - return lit; - } - - template - FORCEINLINE BgraColor Shade32(BgraColor fgcolor, BgraColor mlight, uint32_t desaturate, uint32_t inv_desaturate, BgraColor shade_fade, BgraColor shade_light, BgraColor dynlight) - { - BgraColor material = fgcolor; - if (ShadeModeT::Mode == (int)ShadeMode::Simple) - { - fgcolor.r = (fgcolor.r * mlight.r) >> 8; - fgcolor.g = (fgcolor.g * mlight.g) >> 8; - fgcolor.b = (fgcolor.b * mlight.b) >> 8; - } - else if (ShadeModeT::Mode == (int)ShadeMode::Advanced) - { - uint32_t intensity = ((fgcolor.r * 77 + fgcolor.g * 143 + fgcolor.b * 37) >> 8) * desaturate; - fgcolor.r = (((shade_fade.r + ((fgcolor.r * inv_desaturate + intensity) >> 8) * mlight.r) >> 8) * shade_light.r) >> 8; - fgcolor.g = (((shade_fade.g + ((fgcolor.g * inv_desaturate + intensity) >> 8) * mlight.g) >> 8) * shade_light.g) >> 8; - fgcolor.b = (((shade_fade.b + ((fgcolor.b * inv_desaturate + intensity) >> 8) * mlight.b) >> 8) * shade_light.b) >> 8; - } - return AddLights(material, fgcolor, dynlight); - } - - template - FORCEINLINE BgraColor Blend32(BgraColor fgcolor, BgraColor bgcolor, uint32_t ifgcolor, uint32_t ifgshade, uint32_t srcalpha, uint32_t destalpha) - { - if (BlendT::Mode == (int)BlendModes::Opaque) - { - fgcolor.a = 255; - return fgcolor; - } - else if (BlendT::Mode == (int)BlendModes::Masked) - { - return (ifgcolor == 0) ? bgcolor : fgcolor; - } - else if (BlendT::Mode == (int)BlendModes::AddSrcColorOneMinusSrcColor) - { - uint32_t srcred = fgcolor.r + (fgcolor.r >> 7); - uint32_t srcgreen = fgcolor.g + (fgcolor.g >> 7); - uint32_t srcblue = fgcolor.b + (fgcolor.b >> 7); - uint32_t inv_srcred = 256 - srcred; - uint32_t inv_srcgreen = 256 - srcgreen; - uint32_t inv_srcblue = 256 - srcblue; - - BgraColor outcolor; - outcolor.r = (fgcolor.r * srcred + bgcolor.r * inv_srcred) >> 8; - outcolor.g = (fgcolor.g * srcgreen + bgcolor.g * inv_srcgreen) >> 8; - outcolor.b = (fgcolor.b * srcblue + bgcolor.b * inv_srcblue) >> 8; - outcolor.a = 255; - return outcolor; - } - else if (BlendT::Mode == (int)BlendModes::Shaded) - { - uint32_t alpha = ifgshade; - uint32_t inv_alpha = 256 - alpha; - - BgraColor outcolor; - outcolor.r = (fgcolor.r * alpha + bgcolor.r * inv_alpha) >> 8; - outcolor.g = (fgcolor.g * alpha + bgcolor.g * inv_alpha) >> 8; - outcolor.b = (fgcolor.b * alpha + bgcolor.b * inv_alpha) >> 8; - outcolor.a = 255; - return outcolor; - } - else if (BlendT::Mode == (int)BlendModes::AddClampShaded) - { - uint32_t alpha = ifgshade; - BgraColor outcolor; - outcolor.r = ((fgcolor.r * alpha) >> 8) + bgcolor.r; - outcolor.g = ((fgcolor.g * alpha) >> 8) + bgcolor.g; - outcolor.b = ((fgcolor.b * alpha) >> 8) + bgcolor.b; - outcolor.a = 255; - return outcolor; - } - else - { - uint32_t alpha = APART(ifgcolor); - alpha += alpha >> 7; // 255->256 - uint32_t inv_alpha = 256 - alpha; - - uint32_t bgalpha = (destalpha * alpha + (inv_alpha << 8) + 128) >> 8; - uint32_t fgalpha = (srcalpha * alpha + 128) >> 8; - - fgcolor.r *= fgalpha; - fgcolor.g *= fgalpha; - fgcolor.b *= fgalpha; - bgcolor.r *= bgalpha; - bgcolor.g *= bgalpha; - bgcolor.b *= bgalpha; - - BgraColor outcolor; - if (BlendT::Mode == (int)BlendModes::AddClamp) - { - outcolor.r = MIN((fgcolor.r + bgcolor.r) >> 8, 255); - outcolor.g = MIN((fgcolor.g + bgcolor.g) >> 8, 255); - outcolor.b = MIN((fgcolor.b + bgcolor.b) >> 8, 255); - } - else if (BlendT::Mode == (int)BlendModes::SubClamp) - { - outcolor.r = MAX(int32_t(fgcolor.r - bgcolor.r) >> 8, 0); - outcolor.g = MAX(int32_t(fgcolor.g - bgcolor.g) >> 8, 0); - outcolor.b = MAX(int32_t(fgcolor.b - bgcolor.b) >> 8, 0); - } - else if (BlendT::Mode == (int)BlendModes::RevSubClamp) - { - outcolor.r = MAX(int32_t(bgcolor.r - fgcolor.r) >> 8, 0); - outcolor.g = MAX(int32_t(bgcolor.g - fgcolor.g) >> 8, 0); - outcolor.b = MAX(int32_t(bgcolor.b - fgcolor.b) >> 8, 0); - } - outcolor.a = 255; - return outcolor; - } - } -} - -template -class RectScreenDrawer32 -{ -public: - static void Execute(const void *destOrg, int destWidth, int destHeight, int destPitch, const RectDrawArgs *args, PolyTriangleThreadData *thread) - { - using namespace TriScreenDrawerModes; - - if (SamplerT::Mode == (int)Samplers::Fuzz) - { - Loop(destOrg, destWidth, destHeight, destPitch, args, thread); - } - else if (args->SimpleShade()) - { - Loop(destOrg, destWidth, destHeight, destPitch, args, thread); - } - else - { - Loop(destOrg, destWidth, destHeight, destPitch, args, thread); - } - } - -private: - template - FORCEINLINE static void Loop(const void *destOrg, int destWidth, int destHeight, int destPitch, const RectDrawArgs *args, PolyTriangleThreadData *thread) - { - using namespace TriScreenDrawerModes; - - int x0 = clamp((int)(args->X0() + 0.5f), 0, destWidth); - int x1 = clamp((int)(args->X1() + 0.5f), 0, destWidth); - int y0 = clamp((int)(args->Y0() + 0.5f), 0, destHeight); - int y1 = clamp((int)(args->Y1() + 0.5f), 0, destHeight); - - if (x1 <= x0 || y1 <= y0) - return; - - uint32_t srcalpha = args->SrcAlpha(); - uint32_t destalpha = args->DestAlpha(); - - // Setup step variables - float fstepU = (args->U1() - args->U0()) / (args->X1() - args->X0()); - float fstepV = (args->V1() - args->V0()) / (args->Y1() - args->Y0()); - uint32_t startU = (int32_t)((args->U0() + (x0 + 0.5f - args->X0()) * fstepU) * 0x1000000); - uint32_t startV = (int32_t)((args->V0() + (y0 + 0.5f - args->Y0()) * fstepV) * 0x1000000); - uint32_t stepU = (int32_t)(fstepU * 0x1000000); - uint32_t stepV = (int32_t)(fstepV * 0x1000000); - - // Sampling stuff - uint32_t color = args->Color(); - const uint32_t * RESTRICT translation = (const uint32_t *)args->Translation(); - const uint32_t * RESTRICT texPixels = (const uint32_t *)args->TexturePixels(); - uint32_t texWidth = args->TextureWidth(); - uint32_t texHeight = args->TextureHeight(); - uint32_t oneU, oneV; - if (SamplerT::Mode != (int)Samplers::Fill) - { - oneU = ((0x800000 + texWidth - 1) / texWidth) * 2 + 1; - oneV = ((0x800000 + texHeight - 1) / texHeight) * 2 + 1; - } - else - { - oneU = 0; - oneV = 0; - } - - // Setup light - uint32_t lightpos = args->Light(); - lightpos += lightpos >> 7; // 255 -> 256 - BgraColor mlight; - - BgraColor dynlight = 0; - - // Shade constants - int inv_desaturate; - BgraColor shade_fade_lit, shade_light; - int desaturate; - if (ShadeModeT::Mode == (int)ShadeMode::Advanced) - { - uint32_t inv_light = 256 - lightpos; - shade_fade_lit.r = args->ShadeFadeRed() * inv_light; - shade_fade_lit.g = args->ShadeFadeGreen() * inv_light; - shade_fade_lit.b = args->ShadeFadeBlue() * inv_light; - shade_light.r = args->ShadeLightRed(); - shade_light.g = args->ShadeLightGreen(); - shade_light.b = args->ShadeLightBlue(); - desaturate = args->ShadeDesaturate(); - inv_desaturate = 256 - desaturate; - mlight.r = lightpos; - mlight.g = lightpos; - mlight.b = lightpos; - } - else - { - inv_desaturate = 0; - shade_fade_lit.r = 0; - shade_fade_lit.g = 0; - shade_fade_lit.b = 0; - shade_light.r = 0; - shade_light.g = 0; - shade_light.b = 0; - desaturate = 0; - mlight.r = lightpos; - mlight.g = lightpos; - mlight.b = lightpos; - } - - int count = x1 - x0; - - uint32_t posV = startV; - for (int y = y0; y < y1; y++, posV += stepV) - { - int coreBlock = y / 8; - if (coreBlock % thread->num_cores != thread->core) - { - continue; - } - - uint32_t *dest = ((uint32_t*)destOrg) + y * destPitch + x0; - - uint32_t posU = startU; - for (int i = 0; i < count; i++) - { - // Load bgcolor - BgraColor bgcolor; - if (BlendT::Mode != (int)BlendModes::Opaque) - bgcolor = *dest; - else - bgcolor = 0; - - // Sample fgcolor - if (SamplerT::Mode == (int)Samplers::FogBoundary) color = *dest; - unsigned int ifgcolor = Sample32(posU, posV, texPixels, texWidth, texHeight, oneU, oneV, color, translation); - unsigned int ifgshade = SampleShade32(posU, posV, texPixels, texWidth, texHeight, x0 + i, y); - posU += stepU; - - // Shade and blend - BgraColor fgcolor = Shade32(ifgcolor, mlight, desaturate, inv_desaturate, shade_fade_lit, shade_light, dynlight); - BgraColor outcolor = Blend32(fgcolor, bgcolor, ifgcolor, ifgshade, srcalpha, destalpha); - - // Store result - *dest = outcolor; - dest++; - } - } - } -}; diff --git a/src/polyrenderer/drawers/poly_drawer32_sse2.h b/src/polyrenderer/drawers/poly_drawer32_sse2.h deleted file mode 100644 index 4685dd07c..000000000 --- a/src/polyrenderer/drawers/poly_drawer32_sse2.h +++ /dev/null @@ -1,518 +0,0 @@ -/* -** Polygon Doom software renderer -** Copyright (c) 2016 Magnus Norddahl -** -** This software is provided 'as-is', without any express or implied -** warranty. In no event will the authors be held liable for any damages -** arising from the use of this software. -** -** Permission is granted to anyone to use this software for any purpose, -** including commercial applications, and to alter it and redistribute it -** freely, subject to the following restrictions: -** -** 1. The origin of this software must not be misrepresented; you must not -** claim that you wrote the original software. If you use this software -** in a product, an acknowledgment in the product documentation would be -** appreciated but is not required. -** 2. Altered source versions must be plainly marked as such, and must not be -** misrepresented as being the original software. -** 3. This notice may not be removed or altered from any source distribution. -** -*/ - -#pragma once - -#include "screen_triangle.h" - -namespace TriScreenDrawerModes -{ - template - FORCEINLINE unsigned int VECTORCALL Sample32(int32_t u, int32_t v, const uint32_t *texPixels, int texWidth, int texHeight, uint32_t oneU, uint32_t oneV, uint32_t color, const uint32_t *translation) - { - uint32_t texel; - if (SamplerT::Mode == (int)Samplers::Shaded || SamplerT::Mode == (int)Samplers::Stencil || SamplerT::Mode == (int)Samplers::Fill || SamplerT::Mode == (int)Samplers::Fuzz || SamplerT::Mode == (int)Samplers::FogBoundary) - { - return color; - } - else if (SamplerT::Mode == (int)Samplers::Translated) - { - const uint8_t *texpal = (const uint8_t *)texPixels; - uint32_t texelX = ((((uint32_t)u << 8) >> 16) * texWidth) >> 16; - uint32_t texelY = ((((uint32_t)v << 8) >> 16) * texHeight) >> 16; - return translation[texpal[texelX * texHeight + texelY]]; - } - else if (FilterModeT::Mode == (int)FilterModes::Nearest) - { - uint32_t texelX = ((((uint32_t)u << 8) >> 16) * texWidth) >> 16; - uint32_t texelY = ((((uint32_t)v << 8) >> 16) * texHeight) >> 16; - texel = texPixels[texelX * texHeight + texelY]; - } - else - { - u -= oneU >> 1; - v -= oneV >> 1; - - unsigned int frac_x0 = (((uint32_t)u << 8) >> FRACBITS) * texWidth; - unsigned int frac_x1 = ((((uint32_t)u << 8) + oneU) >> FRACBITS) * texWidth; - unsigned int frac_y0 = (((uint32_t)v << 8) >> FRACBITS) * texHeight; - unsigned int frac_y1 = ((((uint32_t)v << 8) + oneV) >> FRACBITS) * texHeight; - unsigned int x0 = frac_x0 >> FRACBITS; - unsigned int x1 = frac_x1 >> FRACBITS; - unsigned int y0 = frac_y0 >> FRACBITS; - unsigned int y1 = frac_y1 >> FRACBITS; - - unsigned int p00 = texPixels[x0 * texHeight + y0]; - unsigned int p01 = texPixels[x0 * texHeight + y1]; - unsigned int p10 = texPixels[x1 * texHeight + y0]; - unsigned int p11 = texPixels[x1 * texHeight + y1]; - - unsigned int inv_a = (frac_x1 >> (FRACBITS - 4)) & 15; - unsigned int inv_b = (frac_y1 >> (FRACBITS - 4)) & 15; - unsigned int a = 16 - inv_a; - unsigned int b = 16 - inv_b; - - unsigned int sred = (RPART(p00) * (a * b) + RPART(p01) * (inv_a * b) + RPART(p10) * (a * inv_b) + RPART(p11) * (inv_a * inv_b) + 127) >> 8; - unsigned int sgreen = (GPART(p00) * (a * b) + GPART(p01) * (inv_a * b) + GPART(p10) * (a * inv_b) + GPART(p11) * (inv_a * inv_b) + 127) >> 8; - unsigned int sblue = (BPART(p00) * (a * b) + BPART(p01) * (inv_a * b) + BPART(p10) * (a * inv_b) + BPART(p11) * (inv_a * inv_b) + 127) >> 8; - unsigned int salpha = (APART(p00) * (a * b) + APART(p01) * (inv_a * b) + APART(p10) * (a * inv_b) + APART(p11) * (inv_a * inv_b) + 127) >> 8; - - texel = (salpha << 24) | (sred << 16) | (sgreen << 8) | sblue; - } - - if (SamplerT::Mode == (int)Samplers::Skycap) - { - int start_fade = 2; // How fast it should fade out - - int alpha_top = clamp(v >> (16 - start_fade), 0, 256); - int alpha_bottom = clamp(((2 << 24) - v) >> (16 - start_fade), 0, 256); - int a = MIN(alpha_top, alpha_bottom); - int inv_a = 256 - a; - - uint32_t r = RPART(texel); - uint32_t g = GPART(texel); - uint32_t b = BPART(texel); - uint32_t fg_a = APART(texel); - uint32_t bg_red = RPART(color); - uint32_t bg_green = GPART(color); - uint32_t bg_blue = BPART(color); - r = (r * a + bg_red * inv_a + 127) >> 8; - g = (g * a + bg_green * inv_a + 127) >> 8; - b = (b * a + bg_blue * inv_a + 127) >> 8; - return MAKEARGB(fg_a, r, g, b); - } - else - { - return texel; - } - } - - template - FORCEINLINE unsigned int VECTORCALL SampleShade32(int32_t u, int32_t v, const uint32_t *texPixels, int texWidth, int texHeight, int x, int y) - { - if (SamplerT::Mode == (int)Samplers::Shaded) - { - const uint8_t *texpal = (const uint8_t *)texPixels; - uint32_t texelX = ((((uint32_t)u << 8) >> 16) * texWidth) >> 16; - uint32_t texelY = ((((uint32_t)v << 8) >> 16) * texHeight) >> 16; - unsigned int sampleshadeout = texpal[texelX * texHeight + texelY]; - sampleshadeout += sampleshadeout >> 7; // 255 -> 256 - return sampleshadeout; - } - else if (SamplerT::Mode == (int)Samplers::Stencil) - { - uint32_t texelX = ((((uint32_t)u << 8) >> 16) * texWidth) >> 16; - uint32_t texelY = ((((uint32_t)v << 8) >> 16) * texHeight) >> 16; - unsigned int sampleshadeout = APART(texPixels[texelX * texHeight + texelY]); - sampleshadeout += sampleshadeout >> 7; // 255 -> 256 - return sampleshadeout; - } - else if (SamplerT::Mode == (int)Samplers::Fuzz) - { - using namespace swrenderer; - - uint32_t texelX = ((((uint32_t)u << 8) >> 16) * texWidth) >> 16; - uint32_t texelY = ((((uint32_t)v << 8) >> 16) * texHeight) >> 16; - unsigned int sampleshadeout = APART(texPixels[texelX * texHeight + texelY]); - sampleshadeout += sampleshadeout >> 7; // 255 -> 256 - - fixed_t fuzzscale = (200 << FRACBITS) / viewheight; - - int scaled_x = (x * fuzzscale) >> FRACBITS; - int fuzz_x = fuzz_random_x_offset[scaled_x % FUZZ_RANDOM_X_SIZE] + fuzzpos; - - fixed_t fuzzcount = FUZZTABLE << FRACBITS; - fixed_t fuzz = ((fuzz_x << FRACBITS) + y * fuzzscale) % fuzzcount; - unsigned int alpha = fuzzoffset[fuzz >> FRACBITS]; - - sampleshadeout = (sampleshadeout * alpha) >> 5; - return sampleshadeout; - } - else - { - return 0; - } - } - - FORCEINLINE __m128i VECTORCALL AddLights(__m128i material, __m128i fgcolor, __m128i dynlight) - { - fgcolor = _mm_add_epi16(fgcolor, _mm_srli_epi16(_mm_mullo_epi16(material, dynlight), 8)); - fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(255)); - return fgcolor; - } - - FORCEINLINE __m128i VECTORCALL CalcDynamicLight(const PolyLight *lights, int num_lights, __m128 worldpos, __m128 worldnormal, uint32_t dynlightcolor) - { - __m128i lit = _mm_unpacklo_epi8(_mm_cvtsi32_si128(dynlightcolor), _mm_setzero_si128()); - lit = _mm_shuffle_epi32(lit, _MM_SHUFFLE(1, 0, 1, 0)); - - for (int i = 0; i != num_lights; i++) - { - __m128 m256 = _mm_set1_ps(256.0f); - __m128 mSignBit = _mm_set1_ps(-0.0f); - - __m128 lightpos = _mm_loadu_ps(&lights[i].x); - __m128 light_radius = _mm_load_ss(&lights[i].radius); - - __m128 is_attenuated = _mm_cmpge_ss(light_radius, _mm_setzero_ps()); - is_attenuated = _mm_shuffle_ps(is_attenuated, is_attenuated, _MM_SHUFFLE(0, 0, 0, 0)); - light_radius = _mm_andnot_ps(mSignBit, light_radius); - - // L = light-pos - // dist = sqrt(dot(L, L)) - // distance_attenuation = 1 - MIN(dist * (1/radius), 1) - __m128 L = _mm_sub_ps(lightpos, worldpos); - __m128 dist2 = _mm_mul_ps(L, L); - dist2 = _mm_add_ss(dist2, _mm_add_ss(_mm_shuffle_ps(dist2, dist2, _MM_SHUFFLE(0, 0, 0, 1)), _mm_shuffle_ps(dist2, dist2, _MM_SHUFFLE(0, 0, 0, 2)))); - __m128 rcp_dist = _mm_rsqrt_ss(dist2); - __m128 dist = _mm_mul_ss(dist2, rcp_dist); - __m128 distance_attenuation = _mm_sub_ss(m256, _mm_min_ss(_mm_mul_ss(dist, light_radius), m256)); - distance_attenuation = _mm_shuffle_ps(distance_attenuation, distance_attenuation, _MM_SHUFFLE(0, 0, 0, 0)); - - // The simple light type - __m128 simple_attenuation = distance_attenuation; - - // The point light type - // diffuse = max(dot(N,normalize(L)),0) * attenuation - __m128 dotNL = _mm_mul_ps(worldnormal, _mm_mul_ps(L, _mm_shuffle_ps(rcp_dist, rcp_dist, _MM_SHUFFLE(0, 0, 0, 0)))); - dotNL = _mm_add_ss(dotNL, _mm_add_ss(_mm_shuffle_ps(dotNL, dotNL, _MM_SHUFFLE(0, 0, 0, 1)), _mm_shuffle_ps(dotNL, dotNL, _MM_SHUFFLE(0, 0, 0, 2)))); - dotNL = _mm_max_ss(dotNL, _mm_setzero_ps()); - __m128 point_attenuation = _mm_mul_ss(dotNL, distance_attenuation); - point_attenuation = _mm_shuffle_ps(point_attenuation, point_attenuation, _MM_SHUFFLE(0, 0, 0, 0)); - - __m128i attenuation = _mm_cvtps_epi32(_mm_or_ps(_mm_and_ps(is_attenuated, simple_attenuation), _mm_andnot_ps(is_attenuated, point_attenuation))); - attenuation = _mm_packs_epi32(_mm_shuffle_epi32(attenuation, _MM_SHUFFLE(0, 0, 0, 0)), _mm_shuffle_epi32(attenuation, _MM_SHUFFLE(1, 1, 1, 1))); - - __m128i light_color = _mm_cvtsi32_si128(lights[i].color); - light_color = _mm_unpacklo_epi8(light_color, _mm_setzero_si128()); - light_color = _mm_shuffle_epi32(light_color, _MM_SHUFFLE(1, 0, 1, 0)); - - lit = _mm_add_epi16(lit, _mm_srli_epi16(_mm_mullo_epi16(light_color, attenuation), 8)); - } - - return _mm_min_epi16(lit, _mm_set1_epi16(256)); - } - - template - FORCEINLINE __m128i VECTORCALL Shade32(__m128i fgcolor, __m128i mlight, unsigned int ifgcolor0, unsigned int ifgcolor1, int desaturate, __m128i inv_desaturate, __m128i shade_fade, __m128i shade_light, __m128i dynlight) - { - __m128i material = fgcolor; - if (ShadeModeT::Mode == (int)ShadeMode::Simple) - { - fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); - } - else if (ShadeModeT::Mode == (int)ShadeMode::Advanced) - { - int blue0 = BPART(ifgcolor0); - int green0 = GPART(ifgcolor0); - int red0 = RPART(ifgcolor0); - int intensity0 = ((red0 * 77 + green0 * 143 + blue0 * 37) >> 8) * desaturate; - - int blue1 = BPART(ifgcolor1); - int green1 = GPART(ifgcolor1); - int red1 = RPART(ifgcolor1); - int intensity1 = ((red1 * 77 + green1 * 143 + blue1 * 37) >> 8) * desaturate; - - __m128i intensity = _mm_set_epi16(0, intensity1, intensity1, intensity1, 0, intensity0, intensity0, intensity0); - - fgcolor = _mm_srli_epi16(_mm_add_epi16(_mm_mullo_epi16(fgcolor, inv_desaturate), intensity), 8); - fgcolor = _mm_mullo_epi16(fgcolor, mlight); - fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); - fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); - } - - return AddLights(material, fgcolor, dynlight); - } - - template - FORCEINLINE __m128i VECTORCALL Blend32(__m128i fgcolor, __m128i bgcolor, unsigned int ifgcolor0, unsigned int ifgcolor1, unsigned int ifgshade0, unsigned int ifgshade1, uint32_t srcalpha, uint32_t destalpha) - { - if (BlendT::Mode == (int)BlendModes::Opaque) - { - __m128i outcolor = fgcolor; - outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); - outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); - return outcolor; - } - else if (BlendT::Mode == (int)BlendModes::Masked) - { - __m128i mask = _mm_cmpeq_epi32(_mm_packus_epi16(fgcolor, _mm_setzero_si128()), _mm_setzero_si128()); - mask = _mm_unpacklo_epi8(mask, _mm_setzero_si128()); - __m128i outcolor = _mm_or_si128(_mm_and_si128(mask, bgcolor), _mm_andnot_si128(mask, fgcolor)); - outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); - outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); - return outcolor; - } - else if (BlendT::Mode == (int)BlendModes::AddSrcColorOneMinusSrcColor) - { - __m128i inv_srccolor = _mm_sub_epi16(_mm_set1_epi16(256), _mm_add_epi16(fgcolor, _mm_srli_epi16(fgcolor, 7))); - __m128i outcolor = _mm_add_epi16(fgcolor, _mm_srli_epi16(_mm_mullo_epi16(bgcolor, inv_srccolor), 8)); - outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); - return outcolor; - } - else if (BlendT::Mode == (int)BlendModes::Shaded) - { - ifgshade0 = (ifgshade0 * srcalpha + 128) >> 8; - ifgshade1 = (ifgshade1 * srcalpha + 128) >> 8; - __m128i alpha = _mm_set_epi16(ifgshade1, ifgshade1, ifgshade1, ifgshade1, ifgshade0, ifgshade0, ifgshade0, ifgshade0); - __m128i inv_alpha = _mm_sub_epi16(_mm_set1_epi16(256), alpha); - - fgcolor = _mm_mullo_epi16(fgcolor, alpha); - bgcolor = _mm_mullo_epi16(bgcolor, inv_alpha); - __m128i outcolor = _mm_srli_epi16(_mm_add_epi16(fgcolor, bgcolor), 8); - outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); - outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); - return outcolor; - } - else if (BlendT::Mode == (int)BlendModes::AddClampShaded) - { - ifgshade0 = (ifgshade0 * srcalpha + 128) >> 8; - ifgshade1 = (ifgshade1 * srcalpha + 128) >> 8; - __m128i alpha = _mm_set_epi16(ifgshade1, ifgshade1, ifgshade1, ifgshade1, ifgshade0, ifgshade0, ifgshade0, ifgshade0); - - fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, alpha), 8); - __m128i outcolor = _mm_add_epi16(fgcolor, bgcolor); - outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); - outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); - return outcolor; - } - else - { - uint32_t alpha0 = APART(ifgcolor0); - uint32_t alpha1 = APART(ifgcolor1); - alpha0 += alpha0 >> 7; // 255->256 - alpha1 += alpha1 >> 7; // 255->256 - uint32_t inv_alpha0 = 256 - alpha0; - uint32_t inv_alpha1 = 256 - alpha1; - - uint32_t bgalpha0 = (destalpha * alpha0 + (inv_alpha0 << 8) + 128) >> 8; - uint32_t bgalpha1 = (destalpha * alpha1 + (inv_alpha1 << 8) + 128) >> 8; - uint32_t fgalpha0 = (srcalpha * alpha0 + 128) >> 8; - uint32_t fgalpha1 = (srcalpha * alpha1 + 128) >> 8; - - __m128i bgalpha = _mm_set_epi16(bgalpha1, bgalpha1, bgalpha1, bgalpha1, bgalpha0, bgalpha0, bgalpha0, bgalpha0); - __m128i fgalpha = _mm_set_epi16(fgalpha1, fgalpha1, fgalpha1, fgalpha1, fgalpha0, fgalpha0, fgalpha0, fgalpha0); - - fgcolor = _mm_mullo_epi16(fgcolor, fgalpha); - bgcolor = _mm_mullo_epi16(bgcolor, bgalpha); - - __m128i fg_lo = _mm_unpacklo_epi16(fgcolor, _mm_setzero_si128()); - __m128i bg_lo = _mm_unpacklo_epi16(bgcolor, _mm_setzero_si128()); - __m128i fg_hi = _mm_unpackhi_epi16(fgcolor, _mm_setzero_si128()); - __m128i bg_hi = _mm_unpackhi_epi16(bgcolor, _mm_setzero_si128()); - - __m128i out_lo, out_hi; - if (BlendT::Mode == (int)BlendModes::AddClamp) - { - out_lo = _mm_add_epi32(fg_lo, bg_lo); - out_hi = _mm_add_epi32(fg_hi, bg_hi); - } - else if (BlendT::Mode == (int)BlendModes::SubClamp) - { - out_lo = _mm_sub_epi32(fg_lo, bg_lo); - out_hi = _mm_sub_epi32(fg_hi, bg_hi); - } - else if (BlendT::Mode == (int)BlendModes::RevSubClamp) - { - out_lo = _mm_sub_epi32(bg_lo, fg_lo); - out_hi = _mm_sub_epi32(bg_hi, fg_hi); - } - - out_lo = _mm_srai_epi32(out_lo, 8); - out_hi = _mm_srai_epi32(out_hi, 8); - __m128i outcolor = _mm_packs_epi32(out_lo, out_hi); - outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); - outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); - return outcolor; - } - } -} - -template -class RectScreenDrawer32 -{ -public: - static void Execute(const void *destOrg, int destWidth, int destHeight, int destPitch, const RectDrawArgs *args, PolyTriangleThreadData *thread) - { - using namespace TriScreenDrawerModes; - - if (args->SimpleShade()) - { - Loop(destOrg, destWidth, destHeight, destPitch, args, thread); - } - else - { - Loop(destOrg, destWidth, destHeight, destPitch, args, thread); - } - } - -private: - template - FORCEINLINE static void VECTORCALL Loop(const void *destOrg, int destWidth, int destHeight, int destPitch, const RectDrawArgs *args, PolyTriangleThreadData *thread) - { - using namespace TriScreenDrawerModes; - - int x0 = clamp((int)(args->X0() + 0.5f), 0, destWidth); - int x1 = clamp((int)(args->X1() + 0.5f), 0, destWidth); - int y0 = clamp((int)(args->Y0() + 0.5f), 0, destHeight); - int y1 = clamp((int)(args->Y1() + 0.5f), 0, destHeight); - - if (x1 <= x0 || y1 <= y0) - return; - - uint32_t srcalpha = args->SrcAlpha(); - uint32_t destalpha = args->DestAlpha(); - - // Setup step variables - float fstepU = (args->U1() - args->U0()) / (args->X1() - args->X0()); - float fstepV = (args->V1() - args->V0()) / (args->Y1() - args->Y0()); - uint32_t startU = (int32_t)((args->U0() + (x0 + 0.5f - args->X0()) * fstepU) * 0x1000000); - uint32_t startV = (int32_t)((args->V0() + (y0 + 0.5f - args->Y0()) * fstepV) * 0x1000000); - uint32_t stepU = (int32_t)(fstepU * 0x1000000); - uint32_t stepV = (int32_t)(fstepV * 0x1000000); - - // Sampling stuff - uint32_t color = args->Color(); - const uint32_t * RESTRICT translation = (const uint32_t *)args->Translation(); - const uint32_t * RESTRICT texPixels = (const uint32_t *)args->TexturePixels(); - uint32_t texWidth = args->TextureWidth(); - uint32_t texHeight = args->TextureHeight(); - uint32_t oneU, oneV; - if (SamplerT::Mode != (int)Samplers::Fill) - { - oneU = ((0x800000 + texWidth - 1) / texWidth) * 2 + 1; - oneV = ((0x800000 + texHeight - 1) / texHeight) * 2 + 1; - } - else - { - oneU = 0; - oneV = 0; - } - - // Shade constants - __m128i inv_desaturate, shade_fade, shade_light; - int desaturate; - if (ShadeModeT::Mode == (int)ShadeMode::Advanced) - { - inv_desaturate = _mm_setr_epi16(256, 256 - args->ShadeDesaturate(), 256 - args->ShadeDesaturate(), 256 - args->ShadeDesaturate(), 256, 256 - args->ShadeDesaturate(), 256 - args->ShadeDesaturate(), 256 - args->ShadeDesaturate()); - shade_fade = _mm_set_epi16(args->ShadeFadeAlpha(), args->ShadeFadeRed(), args->ShadeFadeGreen(), args->ShadeFadeBlue(), args->ShadeFadeAlpha(), args->ShadeFadeRed(), args->ShadeFadeGreen(), args->ShadeFadeBlue()); - shade_light = _mm_set_epi16(args->ShadeLightAlpha(), args->ShadeLightRed(), args->ShadeLightGreen(), args->ShadeLightBlue(), args->ShadeLightAlpha(), args->ShadeLightRed(), args->ShadeLightGreen(), args->ShadeLightBlue()); - desaturate = args->ShadeDesaturate(); - } - else - { - inv_desaturate = _mm_setzero_si128(); - shade_fade = _mm_setzero_si128(); - shade_light = _mm_setzero_si128(); - desaturate = 0; - } - - // Setup light - uint32_t lightpos = args->Light(); - lightpos += lightpos >> 7; // 255 -> 256 - __m128i mlight = _mm_set_epi16(256, lightpos, lightpos, lightpos, 256, lightpos, lightpos, lightpos); - __m128i shade_fade_lit; - if (ShadeModeT::Mode == (int)ShadeMode::Advanced) - { - __m128i inv_light = _mm_sub_epi16(_mm_set_epi16(0, 256, 256, 256, 0, 256, 256, 256), mlight); - shade_fade_lit = _mm_mullo_epi16(shade_fade, inv_light); - } - else - { - shade_fade_lit = _mm_setzero_si128(); - } - - int count = x1 - x0; - int sseCount = count / 2; - - uint32_t posV = startV; - for (int y = y0; y < y1; y++, posV += stepV) - { - int coreBlock = y / 8; - if (coreBlock % thread->num_cores != thread->core) - { - continue; - } - - uint32_t *dest = ((uint32_t*)destOrg) + y * destPitch + x0; - - uint32_t posU = startU; - for (int i = 0; i < sseCount; i++) - { - // Load bgcolor - __m128i bgcolor; - if (BlendT::Mode != (int)BlendModes::Opaque) - bgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)dest), _mm_setzero_si128()); - else - bgcolor = _mm_setzero_si128(); - - // Sample fgcolor - unsigned int ifgcolor[2], ifgshade[2]; - if (SamplerT::Mode == (int)Samplers::FogBoundary) color = dest[0]; - ifgcolor[0] = Sample32(posU, posV, texPixels, texWidth, texHeight, oneU, oneV, color, translation); - ifgshade[0] = SampleShade32(posU, posV, texPixels, texWidth, texHeight, x0 + i * 2, y); - posU += stepU; - - if (SamplerT::Mode == (int)Samplers::FogBoundary) color = dest[1]; - ifgcolor[1] = Sample32(posU, posV, texPixels, texWidth, texHeight, oneU, oneV, color, translation); - ifgshade[1] = SampleShade32(posU, posV, texPixels, texWidth, texHeight, x0 + i * 2 + 1, y); - posU += stepU; - - // Shade and blend - __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); - fgcolor = Shade32(fgcolor, mlight, ifgcolor[0], ifgcolor[1], desaturate, inv_desaturate, shade_fade_lit, shade_light, _mm_setzero_si128()); - __m128i outcolor = Blend32(fgcolor, bgcolor, ifgcolor[0], ifgcolor[1], ifgshade[0], ifgshade[1], srcalpha, destalpha); - - // Store result - _mm_storel_epi64((__m128i*)dest, outcolor); - dest += 2; - } - - if (sseCount * 2 != count) - { - // Load bgcolor - __m128i bgcolor; - if (BlendT::Mode != (int)BlendModes::Opaque) - bgcolor = _mm_unpacklo_epi8(_mm_cvtsi32_si128(*dest), _mm_setzero_si128()); - else - bgcolor = _mm_setzero_si128(); - - // Sample fgcolor - unsigned int ifgcolor[2], ifgshade[2]; - if (SamplerT::Mode == (int)Samplers::FogBoundary) color = *dest; - ifgcolor[0] = Sample32(posU, posV, texPixels, texWidth, texHeight, oneU, oneV, color, translation); - ifgshade[0] = SampleShade32(posU, posV, texPixels, texWidth, texHeight, x0 + sseCount * 2, y); - ifgcolor[1] = 0; - ifgshade[1] = 0; - posU += stepU; - - // Shade and blend - __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); - fgcolor = Shade32(fgcolor, mlight, ifgcolor[0], ifgcolor[1], desaturate, inv_desaturate, shade_fade_lit, shade_light, _mm_setzero_si128()); - __m128i outcolor = Blend32(fgcolor, bgcolor, ifgcolor[0], ifgcolor[1], ifgshade[0], ifgshade[1], srcalpha, destalpha); - - // Store result - *dest = _mm_cvtsi128_si32(outcolor); - } - } - } -}; diff --git a/src/polyrenderer/drawers/poly_drawer8.h b/src/polyrenderer/drawers/poly_drawer8.h deleted file mode 100644 index 1db272885..000000000 --- a/src/polyrenderer/drawers/poly_drawer8.h +++ /dev/null @@ -1,295 +0,0 @@ -/* -** Polygon Doom software renderer -** Copyright (c) 2016 Magnus Norddahl -** -** This software is provided 'as-is', without any express or implied -** warranty. In no event will the authors be held liable for any damages -** arising from the use of this software. -** -** Permission is granted to anyone to use this software for any purpose, -** including commercial applications, and to alter it and redistribute it -** freely, subject to the following restrictions: -** -** 1. The origin of this software must not be misrepresented; you must not -** claim that you wrote the original software. If you use this software -** in a product, an acknowledgment in the product documentation would be -** appreciated but is not required. -** 2. Altered source versions must be plainly marked as such, and must not be -** misrepresented as being the original software. -** 3. This notice may not be removed or altered from any source distribution. -** -*/ - -#pragma once - -#include "screen_triangle.h" - -namespace TriScreenDrawerModes -{ - template - FORCEINLINE unsigned int Sample8(int32_t u, int32_t v, const uint8_t *texPixels, int texWidth, int texHeight, uint32_t color, const uint8_t *translation) - { - uint8_t texel; - if (SamplerT::Mode == (int)Samplers::Shaded || SamplerT::Mode == (int)Samplers::Stencil || SamplerT::Mode == (int)Samplers::Fill || SamplerT::Mode == (int)Samplers::Fuzz || SamplerT::Mode == (int)Samplers::FogBoundary) - { - return color; - } - else if (SamplerT::Mode == (int)Samplers::Translated) - { - uint32_t texelX = ((((uint32_t)u << 8) >> 16) * texWidth) >> 16; - uint32_t texelY = ((((uint32_t)v << 8) >> 16) * texHeight) >> 16; - return translation[texPixels[texelX * texHeight + texelY]]; - } - else - { - uint32_t texelX = ((((uint32_t)u << 8) >> 16) * texWidth) >> 16; - uint32_t texelY = ((((uint32_t)v << 8) >> 16) * texHeight) >> 16; - texel = texPixels[texelX * texHeight + texelY]; - } - - if (SamplerT::Mode == (int)Samplers::Skycap) - { - int start_fade = 2; // How fast it should fade out - - int alpha_top = clamp(v >> (16 - start_fade), 0, 256); - int alpha_bottom = clamp(((2 << 24) - v) >> (16 - start_fade), 0, 256); - int a = MIN(alpha_top, alpha_bottom); - int inv_a = 256 - a; - - if (a == 256) - return texel; - - uint32_t capcolor = GPalette.BaseColors[color].d; - uint32_t texelrgb = GPalette.BaseColors[texel].d; - uint32_t r = RPART(texelrgb); - uint32_t g = GPART(texelrgb); - uint32_t b = BPART(texelrgb); - uint32_t capcolor_red = RPART(capcolor); - uint32_t capcolor_green = GPART(capcolor); - uint32_t capcolor_blue = BPART(capcolor); - r = (r * a + capcolor_red * inv_a + 127) >> 8; - g = (g * a + capcolor_green * inv_a + 127) >> 8; - b = (b * a + capcolor_blue * inv_a + 127) >> 8; - return RGB256k.All[((r >> 2) << 12) | ((g >> 2) << 6) | (b >> 2)]; - } - else - { - return texel; - } - } - - template - FORCEINLINE unsigned int SampleShade8(int32_t u, int32_t v, const uint8_t *texPixels, int texWidth, int texHeight, int x, int y) - { - if (SamplerT::Mode == (int)Samplers::Shaded) - { - uint32_t texelX = ((((uint32_t)u << 8) >> 16) * texWidth) >> 16; - uint32_t texelY = ((((uint32_t)v << 8) >> 16) * texHeight) >> 16; - unsigned int sampleshadeout = texPixels[texelX * texHeight + texelY]; - sampleshadeout += sampleshadeout >> 7; // 255 -> 256 - return sampleshadeout; - } - else if (SamplerT::Mode == (int)Samplers::Stencil) - { - uint32_t texelX = ((((uint32_t)u << 8) >> 16) * texWidth) >> 16; - uint32_t texelY = ((((uint32_t)v << 8) >> 16) * texHeight) >> 16; - return texPixels[texelX * texHeight + texelY] != 0 ? 256 : 0; - } - else if (SamplerT::Mode == (int)Samplers::Fuzz) - { - using namespace swrenderer; - - uint32_t texelX = ((((uint32_t)u << 8) >> 16) * texWidth) >> 16; - uint32_t texelY = ((((uint32_t)v << 8) >> 16) * texHeight) >> 16; - unsigned int sampleshadeout = (texPixels[texelX * texHeight + texelY] != 0) ? 256 : 0; - - fixed_t fuzzscale = (200 << FRACBITS) / viewheight; - - int scaled_x = (x * fuzzscale) >> FRACBITS; - int fuzz_x = fuzz_random_x_offset[scaled_x % FUZZ_RANDOM_X_SIZE] + fuzzpos; - - fixed_t fuzzcount = FUZZTABLE << FRACBITS; - fixed_t fuzz = ((fuzz_x << FRACBITS) + y * fuzzscale) % fuzzcount; - unsigned int alpha = fuzzoffset[fuzz >> FRACBITS]; - - sampleshadeout = (sampleshadeout * alpha) >> 5; - return sampleshadeout; - } - else - { - return 0; - } - } - - template - FORCEINLINE uint8_t ShadeAndBlend8(uint8_t fgcolor, uint8_t bgcolor, uint32_t fgshade, uint32_t lightshade, const uint8_t *colormaps, uint32_t srcalpha, uint32_t destalpha) - { - lightshade = ((256 - lightshade) * NUMCOLORMAPS) & 0xffffff00; - uint8_t shadedfg = colormaps[lightshade + fgcolor]; - - if (BlendT::Mode == (int)BlendModes::Opaque) - { - return shadedfg; - } - else if (BlendT::Mode == (int)BlendModes::Masked) - { - return (fgcolor != 0) ? shadedfg : bgcolor; - } - else if (BlendT::Mode == (int)BlendModes::AddSrcColorOneMinusSrcColor) - { - int32_t fg_r = GPalette.BaseColors[shadedfg].r; - int32_t fg_g = GPalette.BaseColors[shadedfg].g; - int32_t fg_b = GPalette.BaseColors[shadedfg].b; - int32_t bg_r = GPalette.BaseColors[bgcolor].r; - int32_t bg_g = GPalette.BaseColors[bgcolor].g; - int32_t bg_b = GPalette.BaseColors[bgcolor].b; - int32_t inv_fg_r = 256 - (fg_r + (fg_r >> 7)); - int32_t inv_fg_g = 256 - (fg_g + (fg_g >> 7)); - int32_t inv_fg_b = 256 - (fg_b + (fg_b >> 7)); - fg_r = MIN(fg_r + ((bg_r * inv_fg_r + 127) >> 8), 255); - fg_g = MIN(fg_g + ((bg_g * inv_fg_g + 127) >> 8), 255); - fg_b = MIN(fg_b + ((bg_b * inv_fg_b + 127) >> 8), 255); - - shadedfg = RGB256k.All[((fg_r >> 2) << 12) | ((fg_g >> 2) << 6) | (fg_b >> 2)]; - return (fgcolor != 0) ? shadedfg : bgcolor; - } - else if (BlendT::Mode == (int)BlendModes::Shaded) - { - fgshade = (fgshade * srcalpha + 128) >> 8; - uint32_t alpha = fgshade; - uint32_t inv_alpha = 256 - fgshade; - int32_t fg_r = GPalette.BaseColors[shadedfg].r; - int32_t fg_g = GPalette.BaseColors[shadedfg].g; - int32_t fg_b = GPalette.BaseColors[shadedfg].b; - int32_t bg_r = GPalette.BaseColors[bgcolor].r; - int32_t bg_g = GPalette.BaseColors[bgcolor].g; - int32_t bg_b = GPalette.BaseColors[bgcolor].b; - - fg_r = (fg_r * alpha + bg_r * inv_alpha + 127) >> 8; - fg_g = (fg_g * alpha + bg_g * inv_alpha + 127) >> 8; - fg_b = (fg_b * alpha + bg_b * inv_alpha + 127) >> 8; - - shadedfg = RGB256k.All[((fg_r >> 2) << 12) | ((fg_g >> 2) << 6) | (fg_b >> 2)]; - return (alpha != 0) ? shadedfg : bgcolor; - } - else if (BlendT::Mode == (int)BlendModes::AddClampShaded) - { - fgshade = (fgshade * srcalpha + 128) >> 8; - uint32_t alpha = fgshade; - int32_t fg_r = GPalette.BaseColors[shadedfg].r; - int32_t fg_g = GPalette.BaseColors[shadedfg].g; - int32_t fg_b = GPalette.BaseColors[shadedfg].b; - int32_t bg_r = GPalette.BaseColors[bgcolor].r; - int32_t bg_g = GPalette.BaseColors[bgcolor].g; - int32_t bg_b = GPalette.BaseColors[bgcolor].b; - - fg_r = MIN(bg_r + ((fg_r * alpha + 127) >> 8), 255); - fg_g = MIN(bg_g + ((fg_g * alpha + 127) >> 8), 255); - fg_b = MIN(bg_b + ((fg_b * alpha + 127) >> 8), 255); - - shadedfg = RGB256k.All[((fg_r >> 2) << 12) | ((fg_g >> 2) << 6) | (fg_b >> 2)]; - - return (alpha != 0) ? shadedfg : bgcolor; - } - else - { - int32_t fg_r = GPalette.BaseColors[shadedfg].r; - int32_t fg_g = GPalette.BaseColors[shadedfg].g; - int32_t fg_b = GPalette.BaseColors[shadedfg].b; - int32_t bg_r = GPalette.BaseColors[bgcolor].r; - int32_t bg_g = GPalette.BaseColors[bgcolor].g; - int32_t bg_b = GPalette.BaseColors[bgcolor].b; - - if (BlendT::Mode == (int)BlendModes::AddClamp) - { - fg_r = MIN(int32_t(fg_r * srcalpha + bg_r * destalpha + 127) >> 8, 255); - fg_g = MIN(int32_t(fg_g * srcalpha + bg_g * destalpha + 127) >> 8, 255); - fg_b = MIN(int32_t(fg_b * srcalpha + bg_b * destalpha + 127) >> 8, 255); - } - else if (BlendT::Mode == (int)BlendModes::SubClamp) - { - fg_r = MAX(int32_t(fg_r * srcalpha - bg_r * destalpha + 127) >> 8, 0); - fg_g = MAX(int32_t(fg_g * srcalpha - bg_g * destalpha + 127) >> 8, 0); - fg_b = MAX(int32_t(fg_b * srcalpha - bg_b * destalpha + 127) >> 8, 0); - } - else if (BlendT::Mode == (int)BlendModes::RevSubClamp) - { - fg_r = MAX(int32_t(bg_r * srcalpha - fg_r * destalpha + 127) >> 8, 0); - fg_g = MAX(int32_t(bg_g * srcalpha - fg_g * destalpha + 127) >> 8, 0); - fg_b = MAX(int32_t(bg_b * srcalpha - fg_b * destalpha + 127) >> 8, 0); - } - - shadedfg = RGB256k.All[((fg_r >> 2) << 12) | ((fg_g >> 2) << 6) | (fg_b >> 2)]; - return (fgcolor != 0) ? shadedfg : bgcolor; - } - } -} - -template -class RectScreenDrawer8 -{ -public: - static void Execute(const void *destOrg, int destWidth, int destHeight, int destPitch, const RectDrawArgs *args, PolyTriangleThreadData *thread) - { - using namespace TriScreenDrawerModes; - - int x0 = clamp((int)(args->X0() + 0.5f), 0, destWidth); - int x1 = clamp((int)(args->X1() + 0.5f), 0, destWidth); - int y0 = clamp((int)(args->Y0() + 0.5f), 0, destHeight); - int y1 = clamp((int)(args->Y1() + 0.5f), 0, destHeight); - - if (x1 <= x0 || y1 <= y0) - return; - - auto colormaps = args->BaseColormap(); - uint32_t srcalpha = args->SrcAlpha(); - uint32_t destalpha = args->DestAlpha(); - - // Setup step variables - float fstepU = (args->U1() - args->U0()) / (args->X1() - args->X0()); - float fstepV = (args->V1() - args->V0()) / (args->Y1() - args->Y0()); - uint32_t startU = (int32_t)((args->U0() + (x0 + 0.5f - args->X0()) * fstepU) * 0x1000000); - uint32_t startV = (int32_t)((args->V0() + (y0 + 0.5f - args->Y0()) * fstepV) * 0x1000000); - uint32_t stepU = (int32_t)(fstepU * 0x1000000); - uint32_t stepV = (int32_t)(fstepV * 0x1000000); - - // Sampling stuff - uint32_t color = args->Color(); - const uint8_t * RESTRICT translation = args->Translation(); - const uint8_t * RESTRICT texPixels = args->TexturePixels(); - uint32_t texWidth = args->TextureWidth(); - uint32_t texHeight = args->TextureHeight(); - - // Setup light - uint32_t lightshade = args->Light(); - lightshade += lightshade >> 7; // 255 -> 256 - if (SamplerT::Mode == (int)Samplers::Fuzz) lightshade = 256; - - int count = x1 - x0; - - uint32_t posV = startV; - for (int y = y0; y < y1; y++, posV += stepV) - { - int coreBlock = y / 8; - if (coreBlock % thread->num_cores != thread->core) - { - continue; - } - - uint8_t *dest = ((uint8_t*)destOrg) + y * destPitch + x0; - - uint32_t posU = startU; - for (int i = 0; i < count; i++) - { - uint8_t bgcolor = *dest; - if (SamplerT::Mode == (int)Samplers::FogBoundary) color = bgcolor; - uint8_t fgcolor = Sample8(posU, posV, texPixels, texWidth, texHeight, color, translation); - uint32_t fgshade = SampleShade8(posU, posV, texPixels, texWidth, texHeight, x0 + i, y); - *dest = ShadeAndBlend8(fgcolor, bgcolor, fgshade, lightshade, colormaps, srcalpha, destalpha); - - posU += stepU; - dest++; - } - } - } -}; diff --git a/src/polyrenderer/drawers/screen_triangle.cpp b/src/polyrenderer/drawers/screen_triangle.cpp index 2abdee97b..4614dec64 100644 --- a/src/polyrenderer/drawers/screen_triangle.cpp +++ b/src/polyrenderer/drawers/screen_triangle.cpp @@ -36,12 +36,6 @@ #include "poly_triangle.h" #include "swrenderer/drawers/r_draw_rgba.h" #include "screen_triangle.h" -#ifndef NO_SSE -#include "poly_drawer32_sse2.h" -#else -#include "poly_drawer32.h" -#endif -#include "poly_drawer8.h" #include "x86.h" static void SortVertices(const TriDrawTriangleArgs *args, ShadedTriVertex **sortedVertices) @@ -1284,6 +1278,516 @@ void DrawSpan8(int y, int x0, int x1, const TriDrawTriangleArgs *args) } } +template +void DrawRect8(const void *destOrg, int destWidth, int destHeight, int destPitch, const RectDrawArgs *args, PolyTriangleThreadData *thread) +{ + using namespace TriScreenDrawerModes; + + int x0 = clamp((int)(args->X0() + 0.5f), 0, destWidth); + int x1 = clamp((int)(args->X1() + 0.5f), 0, destWidth); + int y0 = clamp((int)(args->Y0() + 0.5f), 0, destHeight); + int y1 = clamp((int)(args->Y1() + 0.5f), 0, destHeight); + + if (x1 <= x0 || y1 <= y0) + return; + + const uint8_t *colormaps, *texPixels, *translation; + int texWidth, texHeight; + uint32_t fillcolor; + int alpha; + uint32_t light; + + texPixels = args->TexturePixels(); + translation = args->Translation(); + texWidth = args->TextureWidth(); + texHeight = args->TextureHeight(); + fillcolor = args->Color(); + alpha = args->Alpha(); + colormaps = args->BaseColormap(); + light = args->Light(); + light += light >> 7; // 255 -> 256 + light = ((256 - light) * NUMCOLORMAPS) & 0xffffff00; + + float fstepU = (args->U1() - args->U0()) / (args->X1() - args->X0()); + float fstepV = (args->V1() - args->V0()) / (args->Y1() - args->Y0()); + uint32_t startU = (int32_t)((args->U0() + (x0 + 0.5f - args->X0()) * fstepU) * 0x1000000); + uint32_t startV = (int32_t)((args->V0() + (y0 + 0.5f - args->Y0()) * fstepV) * 0x1000000); + uint32_t stepU = (int32_t)(fstepU * 0x1000000); + uint32_t stepV = (int32_t)(fstepV * 0x1000000); + + uint32_t posV = startV; + int num_cores = thread->num_cores; + int skip = thread->skipped_by_thread(y0); + posV += skip * stepV; + stepV *= num_cores; + for (int y = y0 + skip; y < y1; y += num_cores, posV += stepV) + { + uint8_t *destLine = ((uint8_t*)destOrg) + y * destPitch; + + uint32_t posU = startU; + for (int x = x0; x < x1; x++) + { + int fg = 0; + int fgalpha = 255; + + if (ModeT::SWFlags & SWSTYLEF_Fill) + { + fg = fillcolor; + } + else if (ModeT::BlendOp != STYLEOP_Fuzz) + { + uint32_t texelX = (((posU << 8) >> 16) * texWidth) >> 16; + uint32_t texelY = (((posV << 8) >> 16) * texHeight) >> 16; + fg = texPixels[texelX * texHeight + texelY]; + + if (ModeT::SWFlags & SWSTYLEF_Translated) + fg = translation[fg]; + + fgalpha = (fg != 0) ? 255 : 0; + } + + if (ModeT::BlendOp == STYLEOP_Fuzz) + { + using namespace swrenderer; + + uint32_t texelX = (((posU << 8) >> 16) * texWidth) >> 16; + uint32_t texelY = (((posV << 8) >> 16) * texHeight) >> 16; + unsigned int sampleshadeout = (texPixels[texelX * texHeight + texelY] != 0) ? 256 : 0; + + fixed_t fuzzscale = (200 << FRACBITS) / viewheight; + + int scaled_x = (x * fuzzscale) >> FRACBITS; + int fuzz_x = fuzz_random_x_offset[scaled_x % FUZZ_RANDOM_X_SIZE] + fuzzpos; + + fixed_t fuzzcount = FUZZTABLE << FRACBITS; + fixed_t fuzz = ((fuzz_x << FRACBITS) + y * fuzzscale) % fuzzcount; + unsigned int alpha = fuzzoffset[fuzz >> FRACBITS]; + + sampleshadeout = (sampleshadeout * alpha) >> 5; + + uint32_t a = 256 - sampleshadeout; + + uint32_t dest = GPalette.BaseColors[destLine[x]].d; + uint32_t r = (RPART(dest) * a) >> 8; + uint32_t g = (GPART(dest) * a) >> 8; + uint32_t b = (BPART(dest) * a) >> 8; + destLine[x] = RGB256k.All[((r >> 2) << 12) | ((g >> 2) << 6) | (b >> 2)]; + } + else + { + if ((ModeT::Flags & STYLEF_ColorIsFixed) && !(ModeT::SWFlags & SWSTYLEF_Fill)) + { + if (ModeT::Flags & STYLEF_RedIsAlpha) + fgalpha = fg; + fg = fillcolor; + } + + if (!(ModeT::Flags & STYLEF_Alpha1)) + { + fgalpha = (fgalpha * alpha) >> 8; + } + + uint8_t shadedfg = colormaps[light + fg]; + + if (ModeT::BlendSrc == STYLEALPHA_One && ModeT::BlendDest == STYLEALPHA_Zero) + { + destLine[x] = shadedfg; + } + else if (ModeT::BlendSrc == STYLEALPHA_One && ModeT::BlendDest == STYLEALPHA_One) + { + uint32_t src = GPalette.BaseColors[shadedfg]; + uint32_t dest = GPalette.BaseColors[destLine[x]]; + + if (ModeT::BlendOp == STYLEOP_Add) + { + uint32_t out_r = MIN(RPART(dest) + RPART(src), 255); + uint32_t out_g = MIN(GPART(dest) + GPART(src), 255); + uint32_t out_b = MIN(BPART(dest) + BPART(src), 255); + destLine[x] = RGB256k.All[((out_r >> 2) << 12) | ((out_g >> 2) << 6) | (out_b >> 2)]; + } + else if (ModeT::BlendOp == STYLEOP_RevSub) + { + uint32_t out_r = MAX(RPART(dest) - RPART(src), 0); + uint32_t out_g = MAX(GPART(dest) - GPART(src), 0); + uint32_t out_b = MAX(BPART(dest) - BPART(src), 0); + destLine[x] = RGB256k.All[((out_r >> 2) << 12) | ((out_g >> 2) << 6) | (out_b >> 2)]; + } + else //if (ModeT::BlendOp == STYLEOP_Sub) + { + uint32_t out_r = MAX(RPART(src) - RPART(dest), 0); + uint32_t out_g = MAX(GPART(src) - GPART(dest), 0); + uint32_t out_b = MAX(BPART(src) - BPART(dest), 0); + destLine[x] = RGB256k.All[((out_r >> 2) << 12) | ((out_g >> 2) << 6) | (out_b >> 2)]; + } + } + else if (ModeT::SWFlags & SWSTYLEF_SrcColorOneMinusSrcColor) + { + uint32_t src = GPalette.BaseColors[shadedfg]; + uint32_t dest = GPalette.BaseColors[destLine[x]]; + + uint32_t sfactor_r = RPART(src); sfactor_r += sfactor_r >> 7; // 255 -> 256 + uint32_t sfactor_g = GPART(src); sfactor_g += sfactor_g >> 7; // 255 -> 256 + uint32_t sfactor_b = BPART(src); sfactor_b += sfactor_b >> 7; // 255 -> 256 + uint32_t sfactor_a = fgalpha; sfactor_a += sfactor_a >> 7; // 255 -> 256 + uint32_t dfactor_r = 256 - sfactor_r; + uint32_t dfactor_g = 256 - sfactor_g; + uint32_t dfactor_b = 256 - sfactor_b; + uint32_t out_r = (RPART(dest) * dfactor_r + RPART(src) * sfactor_r + 128) >> 8; + uint32_t out_g = (GPART(dest) * dfactor_g + GPART(src) * sfactor_g + 128) >> 8; + uint32_t out_b = (BPART(dest) * dfactor_b + BPART(src) * sfactor_b + 128) >> 8; + + destLine[x] = RGB256k.All[((out_r >> 2) << 12) | ((out_g >> 2) << 6) | (out_b >> 2)]; + } + else if (ModeT::BlendSrc == STYLEALPHA_Src && ModeT::BlendDest == STYLEALPHA_InvSrc && fgalpha == 255) + { + destLine[x] = shadedfg; + } + else if (ModeT::BlendSrc != STYLEALPHA_Src || ModeT::BlendDest != STYLEALPHA_InvSrc || fgalpha != 0) + { + uint32_t src = GPalette.BaseColors[shadedfg]; + uint32_t dest = GPalette.BaseColors[destLine[x]]; + + uint32_t sfactor = fgalpha; sfactor += sfactor >> 7; // 255 -> 256 + uint32_t dfactor = 256 - sfactor; + uint32_t src_r = RPART(src) * sfactor; + uint32_t src_g = GPART(src) * sfactor; + uint32_t src_b = BPART(src) * sfactor; + uint32_t dest_r = RPART(dest); + uint32_t dest_g = GPART(dest); + uint32_t dest_b = BPART(dest); + if (ModeT::BlendDest == STYLEALPHA_One) + { + dest_r <<= 8; + dest_g <<= 8; + dest_b <<= 8; + } + else + { + uint32_t dfactor = 256 - sfactor; + dest_r *= dfactor; + dest_g *= dfactor; + dest_b *= dfactor; + } + + uint32_t out_r, out_g, out_b; + if (ModeT::BlendOp == STYLEOP_Add) + { + if (ModeT::BlendDest == STYLEALPHA_One) + { + out_r = MIN((dest_r + src_r + 128) >> 8, 255); + out_g = MIN((dest_g + src_g + 128) >> 8, 255); + out_b = MIN((dest_b + src_b + 128) >> 8, 255); + } + else + { + out_r = (dest_r + src_r + 128) >> 8; + out_g = (dest_g + src_g + 128) >> 8; + out_b = (dest_b + src_b + 128) >> 8; + } + } + else if (ModeT::BlendOp == STYLEOP_RevSub) + { + out_r = MAX(static_cast(dest_r - src_r + 128) >> 8, 0); + out_g = MAX(static_cast(dest_g - src_g + 128) >> 8, 0); + out_b = MAX(static_cast(dest_b - src_b + 128) >> 8, 0); + } + else //if (ModeT::BlendOp == STYLEOP_Sub) + { + out_r = MAX(static_cast(src_r - dest_r + 128) >> 8, 0); + out_g = MAX(static_cast(src_g - dest_g + 128) >> 8, 0); + out_b = MAX(static_cast(src_b - dest_b + 128) >> 8, 0); + } + + destLine[x] = RGB256k.All[((out_r >> 2) << 12) | ((out_g >> 2) << 6) | (out_b >> 2)]; + } + } + + posU += stepU; + } + } +} + +template +void DrawRectOpt32(const void *destOrg, int destWidth, int destHeight, int destPitch, const RectDrawArgs *args, PolyTriangleThreadData *thread) +{ + using namespace TriScreenDrawerModes; + + int x0 = clamp((int)(args->X0() + 0.5f), 0, destWidth); + int x1 = clamp((int)(args->X1() + 0.5f), 0, destWidth); + int y0 = clamp((int)(args->Y0() + 0.5f), 0, destHeight); + int y1 = clamp((int)(args->Y1() + 0.5f), 0, destHeight); + + if (x1 <= x0 || y1 <= y0) + return; + + const uint32_t *texPixels, *translation; + int texWidth, texHeight; + uint32_t fillcolor; + int alpha; + uint32_t light; + uint32_t shade_fade_r, shade_fade_g, shade_fade_b, shade_light_r, shade_light_g, shade_light_b, desaturate, inv_desaturate; + + texPixels = (const uint32_t*)args->TexturePixels(); + translation = (const uint32_t*)args->Translation(); + texWidth = args->TextureWidth(); + texHeight = args->TextureHeight(); + fillcolor = args->Color(); + alpha = args->Alpha(); + light = args->Light(); + light += light >> 7; // 255 -> 256 + + if (OptT::Flags & SWOPT_ColoredFog) + { + shade_fade_r = args->ShadeFadeRed(); + shade_fade_g = args->ShadeFadeGreen(); + shade_fade_b = args->ShadeFadeBlue(); + shade_light_r = args->ShadeLightRed(); + shade_light_g = args->ShadeLightGreen(); + shade_light_b = args->ShadeLightBlue(); + desaturate = args->ShadeDesaturate(); + inv_desaturate = 256 - desaturate; + } + + float fstepU = (args->U1() - args->U0()) / (args->X1() - args->X0()); + float fstepV = (args->V1() - args->V0()) / (args->Y1() - args->Y0()); + uint32_t startU = (int32_t)((args->U0() + (x0 + 0.5f - args->X0()) * fstepU) * 0x1000000); + uint32_t startV = (int32_t)((args->V0() + (y0 + 0.5f - args->Y0()) * fstepV) * 0x1000000); + uint32_t stepU = (int32_t)(fstepU * 0x1000000); + uint32_t stepV = (int32_t)(fstepV * 0x1000000); + + uint32_t posV = startV; + int num_cores = thread->num_cores; + int skip = thread->skipped_by_thread(y0); + posV += skip * stepV; + stepV *= num_cores; + for (int y = y0 + skip; y < y1; y += num_cores, posV += stepV) + { + uint32_t *destLine = ((uint32_t*)destOrg) + y * destPitch; + + uint32_t posU = startU; + for (int x = x0; x < x1; x++) + { + uint32_t fg = 0; + + if (ModeT::SWFlags & SWSTYLEF_Fill) + { + fg = fillcolor; + } + else if (ModeT::SWFlags & SWSTYLEF_FogBoundary) + { + fg = destLine[x]; + } + else if (ModeT::BlendOp != STYLEOP_Fuzz) + { + uint32_t texelX = (((posU << 8) >> 16) * texWidth) >> 16; + uint32_t texelY = (((posV << 8) >> 16) * texHeight) >> 16; + + if (ModeT::SWFlags & SWSTYLEF_Translated) + { + fg = translation[((const uint8_t*)texPixels)[texelX * texHeight + texelY]]; + } + else if (ModeT::Flags & STYLEF_RedIsAlpha) + { + fg = ((const uint8_t*)texPixels)[texelX * texHeight + texelY]; + } + else + { + fg = texPixels[texelX * texHeight + texelY]; + } + } + + if (ModeT::BlendOp == STYLEOP_Fuzz) + { + using namespace swrenderer; + + uint32_t texelX = (((posU << 8) >> 16) * texWidth) >> 16; + uint32_t texelY = (((posV << 8) >> 16) * texHeight) >> 16; + unsigned int sampleshadeout = APART(texPixels[texelX * texHeight + texelY]); + sampleshadeout += sampleshadeout >> 7; // 255 -> 256 + + fixed_t fuzzscale = (200 << FRACBITS) / viewheight; + + int scaled_x = (x * fuzzscale) >> FRACBITS; + int fuzz_x = fuzz_random_x_offset[scaled_x % FUZZ_RANDOM_X_SIZE] + fuzzpos; + + fixed_t fuzzcount = FUZZTABLE << FRACBITS; + fixed_t fuzz = ((fuzz_x << FRACBITS) + y * fuzzscale) % fuzzcount; + unsigned int alpha = fuzzoffset[fuzz >> FRACBITS]; + + sampleshadeout = (sampleshadeout * alpha) >> 5; + + uint32_t a = 256 - sampleshadeout; + + uint32_t dest = destLine[x]; + uint32_t out_r = (RPART(dest) * a) >> 8; + uint32_t out_g = (GPART(dest) * a) >> 8; + uint32_t out_b = (BPART(dest) * a) >> 8; + destLine[x] = MAKEARGB(255, out_r, out_g, out_b); + } + else + { + if ((ModeT::Flags & STYLEF_ColorIsFixed) && !(ModeT::SWFlags & SWSTYLEF_Fill)) + { + if (ModeT::Flags & STYLEF_RedIsAlpha) + fg = (fg << 24) | (fillcolor & 0x00ffffff); + else + fg = (fg & 0xff000000) | (fillcolor & 0x00ffffff); + } + + uint32_t fgalpha = fg >> 24; + + if (!(ModeT::Flags & STYLEF_Alpha1)) + { + fgalpha = (fgalpha * alpha) >> 8; + } + + int lightshade = light; + + uint32_t lit_r = 0, lit_g = 0, lit_b = 0; + + uint32_t shadedfg_r, shadedfg_g, shadedfg_b; + if (OptT::Flags & SWOPT_ColoredFog) + { + uint32_t fg_r = RPART(fg); + uint32_t fg_g = GPART(fg); + uint32_t fg_b = BPART(fg); + uint32_t intensity = ((fg_r * 77 + fg_g * 143 + fg_b * 37) >> 8) * desaturate; + shadedfg_r = (((shade_fade_r + ((fg_r * inv_desaturate + intensity) >> 8) * lightshade) >> 8) * shade_light_r) >> 8; + shadedfg_g = (((shade_fade_g + ((fg_g * inv_desaturate + intensity) >> 8) * lightshade) >> 8) * shade_light_g) >> 8; + shadedfg_b = (((shade_fade_b + ((fg_b * inv_desaturate + intensity) >> 8) * lightshade) >> 8) * shade_light_b) >> 8; + } + else + { + shadedfg_r = (RPART(fg) * lightshade) >> 8; + shadedfg_g = (GPART(fg) * lightshade) >> 8; + shadedfg_b = (BPART(fg) * lightshade) >> 8; + } + + if (ModeT::BlendSrc == STYLEALPHA_One && ModeT::BlendDest == STYLEALPHA_Zero) + { + destLine[x] = MAKEARGB(255, shadedfg_r, shadedfg_g, shadedfg_b); + } + else if (ModeT::BlendSrc == STYLEALPHA_One && ModeT::BlendDest == STYLEALPHA_One) + { + uint32_t dest = destLine[x]; + + if (ModeT::BlendOp == STYLEOP_Add) + { + uint32_t out_r = MIN(RPART(dest) + shadedfg_r, 255); + uint32_t out_g = MIN(GPART(dest) + shadedfg_g, 255); + uint32_t out_b = MIN(BPART(dest) + shadedfg_b, 255); + destLine[x] = MAKEARGB(255, out_r, out_g, out_b); + } + else if (ModeT::BlendOp == STYLEOP_RevSub) + { + uint32_t out_r = MAX(RPART(dest) - shadedfg_r, 0); + uint32_t out_g = MAX(GPART(dest) - shadedfg_g, 0); + uint32_t out_b = MAX(BPART(dest) - shadedfg_b, 0); + destLine[x] = MAKEARGB(255, out_r, out_g, out_b); + } + else //if (ModeT::BlendOp == STYLEOP_Sub) + { + uint32_t out_r = MAX(shadedfg_r - RPART(dest), 0); + uint32_t out_g = MAX(shadedfg_g - GPART(dest), 0); + uint32_t out_b = MAX(shadedfg_b - BPART(dest), 0); + destLine[x] = MAKEARGB(255, out_r, out_g, out_b); + } + } + else if (ModeT::SWFlags & SWSTYLEF_SrcColorOneMinusSrcColor) + { + uint32_t dest = destLine[x]; + + uint32_t sfactor_r = shadedfg_r; sfactor_r += sfactor_r >> 7; // 255 -> 256 + uint32_t sfactor_g = shadedfg_g; sfactor_g += sfactor_g >> 7; // 255 -> 256 + uint32_t sfactor_b = shadedfg_b; sfactor_b += sfactor_b >> 7; // 255 -> 256 + uint32_t sfactor_a = fgalpha; sfactor_a += sfactor_a >> 7; // 255 -> 256 + uint32_t dfactor_r = 256 - sfactor_r; + uint32_t dfactor_g = 256 - sfactor_g; + uint32_t dfactor_b = 256 - sfactor_b; + uint32_t out_r = (RPART(dest) * dfactor_r + shadedfg_r * sfactor_r + 128) >> 8; + uint32_t out_g = (GPART(dest) * dfactor_g + shadedfg_g * sfactor_g + 128) >> 8; + uint32_t out_b = (BPART(dest) * dfactor_b + shadedfg_b * sfactor_b + 128) >> 8; + + destLine[x] = MAKEARGB(255, out_r, out_g, out_b); + } + else if (ModeT::BlendSrc == STYLEALPHA_Src && ModeT::BlendDest == STYLEALPHA_InvSrc && fgalpha == 255) + { + destLine[x] = MAKEARGB(255, shadedfg_r, shadedfg_g, shadedfg_b); + } + else if (ModeT::BlendSrc != STYLEALPHA_Src || ModeT::BlendDest != STYLEALPHA_InvSrc || fgalpha != 0) + { + uint32_t dest = destLine[x]; + + uint32_t sfactor = fgalpha; sfactor += sfactor >> 7; // 255 -> 256 + uint32_t src_r = shadedfg_r * sfactor; + uint32_t src_g = shadedfg_g * sfactor; + uint32_t src_b = shadedfg_b * sfactor; + uint32_t dest_r = RPART(dest); + uint32_t dest_g = GPART(dest); + uint32_t dest_b = BPART(dest); + if (ModeT::BlendDest == STYLEALPHA_One) + { + dest_r <<= 8; + dest_g <<= 8; + dest_b <<= 8; + } + else + { + uint32_t dfactor = 256 - sfactor; + dest_r *= dfactor; + dest_g *= dfactor; + dest_b *= dfactor; + } + + uint32_t out_r, out_g, out_b; + if (ModeT::BlendOp == STYLEOP_Add) + { + if (ModeT::BlendDest == STYLEALPHA_One) + { + out_r = MIN((dest_r + src_r + 128) >> 8, 255); + out_g = MIN((dest_g + src_g + 128) >> 8, 255); + out_b = MIN((dest_b + src_b + 128) >> 8, 255); + } + else + { + out_r = (dest_r + src_r + 128) >> 8; + out_g = (dest_g + src_g + 128) >> 8; + out_b = (dest_b + src_b + 128) >> 8; + } + } + else if (ModeT::BlendOp == STYLEOP_RevSub) + { + out_r = MAX(static_cast(dest_r - src_r + 128) >> 8, 0); + out_g = MAX(static_cast(dest_g - src_g + 128) >> 8, 0); + out_b = MAX(static_cast(dest_b - src_b + 128) >> 8, 0); + } + else //if (ModeT::BlendOp == STYLEOP_Sub) + { + out_r = MAX(static_cast(src_r - dest_r + 128) >> 8, 0); + out_g = MAX(static_cast(src_g - dest_g + 128) >> 8, 0); + out_b = MAX(static_cast(src_b - dest_b + 128) >> 8, 0); + } + + destLine[x] = MAKEARGB(255, out_r, out_g, out_b); + } + } + + posU += stepU; + } + } +} + +template +void DrawRect32(const void *destOrg, int destWidth, int destHeight, int destPitch, const RectDrawArgs *args, PolyTriangleThreadData *thread) +{ + using namespace TriScreenDrawerModes; + + if (args->SimpleShade()) + DrawRectOpt32(destOrg, destWidth, destHeight, destPitch, args, thread); + else + DrawRectOpt32(destOrg, destWidth, destHeight, destPitch, args, thread); +} + void(*ScreenTriangle::SpanDrawers8[])(int, int, int, const TriDrawTriangleArgs *) = { &DrawSpan8, @@ -1350,58 +1854,66 @@ void(*ScreenTriangle::SpanDrawers32[])(int, int, int, const TriDrawTriangleArgs void(*ScreenTriangle::RectDrawers8[])(const void *, int, int, int, const RectDrawArgs *, PolyTriangleThreadData *) = { - &RectScreenDrawer8::Execute, // TextureOpaque - &RectScreenDrawer8::Execute, // TextureMasked - &RectScreenDrawer8::Execute, // TextureAdd - &RectScreenDrawer8::Execute, // TextureSub - &RectScreenDrawer8::Execute, // TextureRevSub - &RectScreenDrawer8::Execute, // TextureAddSrcColor - &RectScreenDrawer8::Execute, // TranslatedOpaque - &RectScreenDrawer8::Execute, // TranslatedMasked - &RectScreenDrawer8::Execute, // TranslatedAdd - &RectScreenDrawer8::Execute, // TranslatedSub - &RectScreenDrawer8::Execute, // TranslatedRevSub - &RectScreenDrawer8::Execute, // TranslatedAddSrcColor - &RectScreenDrawer8::Execute, // Shaded - &RectScreenDrawer8::Execute, // AddShaded - &RectScreenDrawer8::Execute, // Stencil - &RectScreenDrawer8::Execute, // AddStencil - &RectScreenDrawer8::Execute, // FillOpaque - &RectScreenDrawer8::Execute, // FillAdd - &RectScreenDrawer8::Execute, // FillSub - &RectScreenDrawer8::Execute, // FillRevSub - &RectScreenDrawer8::Execute, // FillAddSrcColor - &RectScreenDrawer8::Execute, // Skycap - &RectScreenDrawer8::Execute, // Fuzz - &RectScreenDrawer8::Execute // FogBoundary + &DrawRect8, + &DrawRect8, + &DrawRect8, + &DrawRect8, + &DrawRect8, + &DrawRect8, + &DrawRect8, + &DrawRect8, + &DrawRect8, + &DrawRect8, + &DrawRect8, + &DrawRect8, + &DrawRect8, + &DrawRect8, + &DrawRect8, + &DrawRect8, + &DrawRect8, + &DrawRect8, + &DrawRect8, + &DrawRect8, + &DrawRect8, + &DrawRect8, + &DrawRect8, + &DrawRect8, + &DrawRect8, + &DrawRect8, + &DrawRect8, + &DrawRect8 }; void(*ScreenTriangle::RectDrawers32[])(const void *, int, int, int, const RectDrawArgs *, PolyTriangleThreadData *) = { - &RectScreenDrawer32::Execute, // TextureOpaque - &RectScreenDrawer32::Execute, // TextureMasked - &RectScreenDrawer32::Execute, // TextureAdd - &RectScreenDrawer32::Execute, // TextureSub - &RectScreenDrawer32::Execute, // TextureRevSub - &RectScreenDrawer32::Execute, // TextureAddSrcColor - &RectScreenDrawer32::Execute, // TranslatedOpaque - &RectScreenDrawer32::Execute, // TranslatedMasked - &RectScreenDrawer32::Execute, // TranslatedAdd - &RectScreenDrawer32::Execute, // TranslatedSub - &RectScreenDrawer32::Execute, // TranslatedRevSub - &RectScreenDrawer32::Execute, // TranslatedAddSrcColor - &RectScreenDrawer32::Execute, // Shaded - &RectScreenDrawer32::Execute, // AddShaded - &RectScreenDrawer32::Execute, // Stencil - &RectScreenDrawer32::Execute, // AddStencil - &RectScreenDrawer32::Execute, // FillOpaque - &RectScreenDrawer32::Execute, // FillAdd - &RectScreenDrawer32::Execute, // FillSub - &RectScreenDrawer32::Execute, // FillRevSub - &RectScreenDrawer32::Execute, // FillAddSrcColor - &RectScreenDrawer32::Execute, // Skycap - &RectScreenDrawer32::Execute, // Fuzz - &RectScreenDrawer32::Execute, // FogBoundary + &DrawRect32, + &DrawRect32, + &DrawRect32, + &DrawRect32, + &DrawRect32, + &DrawRect32, + &DrawRect32, + &DrawRect32, + &DrawRect32, + &DrawRect32, + &DrawRect32, + &DrawRect32, + &DrawRect32, + &DrawRect32, + &DrawRect32, + &DrawRect32, + &DrawRect32, + &DrawRect32, + &DrawRect32, + &DrawRect32, + &DrawRect32, + &DrawRect32, + &DrawRect32, + &DrawRect32, + &DrawRect32, + &DrawRect32, + &DrawRect32, + &DrawRect32 }; int ScreenTriangle::FuzzStart = 0; diff --git a/src/polyrenderer/drawers/screen_triangle.h b/src/polyrenderer/drawers/screen_triangle.h index bd3ff132c..a52d897c2 100644 --- a/src/polyrenderer/drawers/screen_triangle.h +++ b/src/polyrenderer/drawers/screen_triangle.h @@ -136,34 +136,6 @@ enum class TriBlendMode AddShadedTranslated }; -enum class RectBlendMode -{ - TextureOpaque, - TextureMasked, - TextureAdd, - TextureSub, - TextureRevSub, - TextureAddSrcColor, - TranslatedOpaque, - TranslatedMasked, - TranslatedAdd, - TranslatedSub, - TranslatedRevSub, - TranslatedAddSrcColor, - Shaded, - AddShaded, - Stencil, - AddStencil, - FillOpaque, - FillAdd, - FillSub, - FillRevSub, - FillAddSrcColor, - Skycap, - Fuzz, - FogBoundary -}; - class ScreenTriangle { public: @@ -219,35 +191,6 @@ namespace TriScreenDrawerModes struct StyleAddStencilTranslated { static const int BlendOp = STYLEOP_Add, BlendSrc = STYLEALPHA_Src, BlendDest = STYLEALPHA_One, Flags = STYLEF_ColorIsFixed, SWFlags = SWSTYLEF_Translated; }; struct StyleAddShadedTranslated { static const int BlendOp = STYLEOP_Add, BlendSrc = STYLEALPHA_Src, BlendDest = STYLEALPHA_One, Flags = STYLEF_RedIsAlpha | STYLEF_ColorIsFixed, SWFlags = SWSTYLEF_Translated; }; - enum class BlendModes { Opaque, Masked, AddClamp, SubClamp, RevSubClamp, AddSrcColorOneMinusSrcColor, Shaded, AddClampShaded }; - struct OpaqueBlend { static const int Mode = (int)BlendModes::Opaque; }; - struct MaskedBlend { static const int Mode = (int)BlendModes::Masked; }; - struct AddClampBlend { static const int Mode = (int)BlendModes::AddClamp; }; - struct SubClampBlend { static const int Mode = (int)BlendModes::SubClamp; }; - struct RevSubClampBlend { static const int Mode = (int)BlendModes::RevSubClamp; }; - struct AddSrcColorBlend { static const int Mode = (int)BlendModes::AddSrcColorOneMinusSrcColor; }; - struct ShadedBlend { static const int Mode = (int)BlendModes::Shaded; }; - struct AddClampShadedBlend { static const int Mode = (int)BlendModes::AddClampShaded; }; - - enum class FilterModes { Nearest, Linear }; - struct NearestFilter { static const int Mode = (int)FilterModes::Nearest; }; - struct LinearFilter { static const int Mode = (int)FilterModes::Linear; }; - - enum class ShadeMode { None, Simple, Advanced }; - struct NoShade { static const int Mode = (int)ShadeMode::None; }; - struct SimpleShade { static const int Mode = (int)ShadeMode::Simple; }; - struct AdvancedShade { static const int Mode = (int)ShadeMode::Advanced; }; - - enum class Samplers { Texture, Fill, Shaded, Stencil, Translated, Skycap, Fuzz, FogBoundary }; - struct TextureSampler { static const int Mode = (int)Samplers::Texture; }; - struct FillSampler { static const int Mode = (int)Samplers::Fill; }; - struct ShadedSampler { static const int Mode = (int)Samplers::Shaded; }; - struct StencilSampler { static const int Mode = (int)Samplers::Stencil; }; - struct TranslatedSampler { static const int Mode = (int)Samplers::Translated; }; - struct SkycapSampler { static const int Mode = (int)Samplers::Skycap; }; - struct FuzzSampler { static const int Mode = (int)Samplers::Fuzz; }; - struct FogBoundarySampler { static const int Mode = (int)Samplers::FogBoundary; }; - enum SWOptFlags { SWOPT_DynLights = 1, From d18f5eecb4221151fdbff0c76c40202b88559870 Mon Sep 17 00:00:00 2001 From: Christoph Oelckers Date: Sun, 3 Jun 2018 19:28:16 +0200 Subject: [PATCH 16/17] - Fixed: Dehacked must not validate parameters for MBF special functions. These won't match the conditions. Additionally this adds disassembly output for the synthesized replacement functions for diagnostics purposes. --- src/d_dehacked.cpp | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/src/d_dehacked.cpp b/src/d_dehacked.cpp index 25b42632e..9688d3cab 100644 --- a/src/d_dehacked.cpp +++ b/src/d_dehacked.cpp @@ -64,6 +64,7 @@ #include "v_text.h" #include "backend/vmbuilder.h" #include "types.h" +#include "m_argv.h" // [SO] Just the way Randy said to do it :) // [RH] Made this CVAR_SERVERINFO @@ -813,6 +814,16 @@ void SetDehParams(FState *state, int codepointer) sfunc->ImplicitArgs = numargs; state->SetAction(sfunc); sfunc->PrintableName.Format("Dehacked.%s.%d.%d", MBFCodePointers[codepointer].name.GetChars(), value1, value2); + + if (Args->CheckParm("-dumpdisasm")) + { + FILE *dump = fopen("disasm.txt", "a"); + if (dump != nullptr) + { + DumpFunction(dump, sfunc, sfunc->PrintableName.GetChars(), (int)sfunc->PrintableName.Len()); + } + fclose(dump); + } } } @@ -2103,12 +2114,15 @@ static int PatchCodePtrs (int dummy) symname.Format("A_%s", Line2); // Let's consider as aliases some redundant MBF pointer + bool ismbfcp = false; for (unsigned int i = 0; i < MBFCodePointers.Size(); i++) { if (!symname.CompareNoCase(MBFCodePointers[i].alias)) { symname = MBFCodePointers[i].name; DPrintf(DMSG_SPAMMY, "%s --> %s\n", MBFCodePointers[i].alias, MBFCodePointers[i].name.GetChars()); + ismbfcp = true; + break; } } @@ -2119,7 +2133,7 @@ static int PatchCodePtrs (int dummy) { Printf(TEXTCOLOR_RED "Frame %d: Unknown code pointer '%s'\n", frame, Line2); } - else + else if (!ismbfcp) // MBF special code pointers will produce errors here because they will receive some args and won't match the conditions here. { TArray &args = sym->Variants[0].ArgFlags; unsigned numargs = sym->GetImplicitArgs(); From 1ac7e5a026a20be7acdc33c39b951b929a3d79f3 Mon Sep 17 00:00:00 2001 From: Marisa Kirisame Date: Sun, 3 Jun 2018 21:11:47 +0200 Subject: [PATCH 17/17] BLOCKASPLAYER flag: treat non-player actors as blockable by "block players" lines --- src/actor.h | 1 + src/p_map.cpp | 6 +++--- src/po_man.cpp | 2 +- src/scripting/thingdef_data.cpp | 1 + 4 files changed, 6 insertions(+), 4 deletions(-) diff --git a/src/actor.h b/src/actor.h index 863e4fb46..2199c205e 100644 --- a/src/actor.h +++ b/src/actor.h @@ -401,6 +401,7 @@ enum ActorFlag8 { MF8_FRIGHTENING = 0x00000001, // for those moments when halloween just won't do MF8_INSCROLLSEC = 0x00000002, // actor is partially inside a scrolling sector + MF8_BLOCKASPLAYER = 0x00000004, // actor is blocked by player-blocking lines even if not a player }; // --- mobj.renderflags --- diff --git a/src/p_map.cpp b/src/p_map.cpp index 8a7134967..0e4f61df8 100644 --- a/src/p_map.cpp +++ b/src/p_map.cpp @@ -930,7 +930,7 @@ bool PIT_CheckLine(FMultiBlockLinesIterator &mit, FMultiBlockLinesIterator::Chec bool NotBlocked = ((tm.thing->flags3 & MF3_NOBLOCKMONST) || ((i_compatflags & COMPATF_NOBLOCKFRIENDS) && (tm.thing->flags & MF_FRIENDLY))); - if (!(Projectile) || (ld->flags & (ML_BLOCKEVERYTHING | ML_BLOCKPROJECTILE))) + if (!(Projectile) || (ld->flags & (ML_BLOCKEVERYTHING | ML_BLOCKPROJECTILE | ML_BLOCK_PLAYERS))) { if (ld->flags & ML_RAILING) { @@ -938,7 +938,7 @@ bool PIT_CheckLine(FMultiBlockLinesIterator &mit, FMultiBlockLinesIterator::Chec } else if ((ld->flags & (ML_BLOCKING | ML_BLOCKEVERYTHING)) || // explicitly blocking everything (!(NotBlocked) && (ld->flags & ML_BLOCKMONSTERS)) || // block monsters only - (tm.thing->player != NULL && (ld->flags & ML_BLOCK_PLAYERS)) || // block players + ((tm.thing->player != NULL) || (tm.thing->flags8 & MF8_BLOCKASPLAYER) && (ld->flags & ML_BLOCK_PLAYERS)) || // block players ((Projectile) && (ld->flags & ML_BLOCKPROJECTILE)) || // block projectiles ((tm.thing->flags & MF_FLOAT) && (ld->flags & ML_BLOCK_FLOATERS))) // block floaters { @@ -3100,7 +3100,7 @@ void FSlide::SlideTraverse(const DVector2 &start, const DVector2 &end) { goto isblocking; } - if (li->flags & ML_BLOCK_PLAYERS && slidemo->player != NULL) + if (li->flags & ML_BLOCK_PLAYERS && ((slidemo->player != NULL) || (slidemo->flags8 & MF8_BLOCKASPLAYER))) { goto isblocking; } diff --git a/src/po_man.cpp b/src/po_man.cpp index 76355d4db..ca12120de 100644 --- a/src/po_man.cpp +++ b/src/po_man.cpp @@ -1157,7 +1157,7 @@ bool FPolyObj::CheckMobjBlocking (side_t *sd) // [TN] Check wether this actor gets blocked by the line. if (ld->backsector != NULL && !(ld->flags & (ML_BLOCKING|ML_BLOCKEVERYTHING)) - && !(ld->flags & ML_BLOCK_PLAYERS && mobj->player) + && !(ld->flags & ML_BLOCK_PLAYERS && (mobj->player || (mobj->flags8 & MF8_BLOCKASPLAYER))) && !(ld->flags & ML_BLOCKMONSTERS && mobj->flags3 & MF3_ISMONSTER) && !((mobj->flags & MF_FLOAT) && (ld->flags & ML_BLOCK_FLOATERS)) && (!(ld->flags & ML_3DMIDTEX) || diff --git a/src/scripting/thingdef_data.cpp b/src/scripting/thingdef_data.cpp index 002c0a81a..1416cbc7a 100644 --- a/src/scripting/thingdef_data.cpp +++ b/src/scripting/thingdef_data.cpp @@ -316,6 +316,7 @@ static FFlagDef ActorFlagDefs[]= DEFINE_FLAG(MF7, FORCEINFIGHTING, AActor, flags7), DEFINE_FLAG(MF8, FRIGHTENING, AActor, flags8), + DEFINE_FLAG(MF8, BLOCKASPLAYER, AActor, flags8), // Effect flags DEFINE_FLAG(FX, VISIBILITYPULSE, AActor, effects),