diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 4645cfcd5..59dd637a2 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -310,7 +310,6 @@ if( ZD_CMAKE_COMPILER_IS_GNUCXX_COMPATIBLE ) set( CMAKE_CXX_FLAGS "${SANITIZER_FLAG} ${CMAKE_CXX_FLAGS}" ) set( CMAKE_C_FLAGS "${SANITIZER_FLAG} ${CMAKE_C_FLAGS}" ) - set( CMAKE_EXE_LINKER_FLAGS "${SANITIZER_FLAG} ${CMAKE_EXE_LINKER_FLAGS}" ) if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU" AND CMAKE_CXX_COMPILER_VERSION VERSION_GREATER "4.5") set( CMAKE_C_FLAGS "-Wno-unused-result ${CMAKE_C_FLAGS}" ) diff --git a/src/actor.h b/src/actor.h index 2199c205e..863e4fb46 100644 --- a/src/actor.h +++ b/src/actor.h @@ -401,7 +401,6 @@ enum ActorFlag8 { MF8_FRIGHTENING = 0x00000001, // for those moments when halloween just won't do MF8_INSCROLLSEC = 0x00000002, // actor is partially inside a scrolling sector - MF8_BLOCKASPLAYER = 0x00000004, // actor is blocked by player-blocking lines even if not a player }; // --- mobj.renderflags --- diff --git a/src/d_dehacked.cpp b/src/d_dehacked.cpp index 9688d3cab..25b42632e 100644 --- a/src/d_dehacked.cpp +++ b/src/d_dehacked.cpp @@ -64,7 +64,6 @@ #include "v_text.h" #include "backend/vmbuilder.h" #include "types.h" -#include "m_argv.h" // [SO] Just the way Randy said to do it :) // [RH] Made this CVAR_SERVERINFO @@ -814,16 +813,6 @@ void SetDehParams(FState *state, int codepointer) sfunc->ImplicitArgs = numargs; state->SetAction(sfunc); sfunc->PrintableName.Format("Dehacked.%s.%d.%d", MBFCodePointers[codepointer].name.GetChars(), value1, value2); - - if (Args->CheckParm("-dumpdisasm")) - { - FILE *dump = fopen("disasm.txt", "a"); - if (dump != nullptr) - { - DumpFunction(dump, sfunc, sfunc->PrintableName.GetChars(), (int)sfunc->PrintableName.Len()); - } - fclose(dump); - } } } @@ -2114,15 +2103,12 @@ static int PatchCodePtrs (int dummy) symname.Format("A_%s", Line2); // Let's consider as aliases some redundant MBF pointer - bool ismbfcp = false; for (unsigned int i = 0; i < MBFCodePointers.Size(); i++) { if (!symname.CompareNoCase(MBFCodePointers[i].alias)) { symname = MBFCodePointers[i].name; DPrintf(DMSG_SPAMMY, "%s --> %s\n", MBFCodePointers[i].alias, MBFCodePointers[i].name.GetChars()); - ismbfcp = true; - break; } } @@ -2133,7 +2119,7 @@ static int PatchCodePtrs (int dummy) { Printf(TEXTCOLOR_RED "Frame %d: Unknown code pointer '%s'\n", frame, Line2); } - else if (!ismbfcp) // MBF special code pointers will produce errors here because they will receive some args and won't match the conditions here. + else { TArray &args = sym->Variants[0].ArgFlags; unsigned numargs = sym->GetImplicitArgs(); diff --git a/src/g_game.cpp b/src/g_game.cpp index a73758f1c..1c5388ac1 100644 --- a/src/g_game.cpp +++ b/src/g_game.cpp @@ -82,6 +82,9 @@ static FRandom pr_dmspawn ("DMSpawn"); static FRandom pr_pspawn ("PlayerSpawn"); +const int SAVEPICWIDTH = 216; +const int SAVEPICHEIGHT = 162; + bool G_CheckDemoStatus (void); void G_ReadDemoTiccmd (ticcmd_t *cmd, int player); void G_WriteDemoTiccmd (ticcmd_t *cmd, int player, int buf); diff --git a/src/g_game.h b/src/g_game.h index 7ac49a413..2ac019059 100644 --- a/src/g_game.h +++ b/src/g_game.h @@ -102,7 +102,5 @@ class AInventory; extern const AInventory *SendItemUse, *SendItemDrop; extern int SendItemDropAmount; -const int SAVEPICWIDTH = 216; -const int SAVEPICHEIGHT = 162; #endif diff --git a/src/gl/models/gl_models.cpp b/src/gl/models/gl_models.cpp index 8669d7bab..31b7208bc 100644 --- a/src/gl/models/gl_models.cpp +++ b/src/gl/models/gl_models.cpp @@ -54,7 +54,7 @@ VSMatrix FGLModelRenderer::GetViewToWorldMatrix() return objectToWorldMatrix; } -void FGLModelRenderer::BeginDrawModel(AActor *actor, FSpriteModelFrame *smf, const VSMatrix &objectToWorldMatrix, bool mirrored) +void FGLModelRenderer::BeginDrawModel(AActor *actor, FSpriteModelFrame *smf, const VSMatrix &objectToWorldMatrix) { glDepthFunc(GL_LEQUAL); gl_RenderState.EnableTexture(true); @@ -65,7 +65,7 @@ void FGLModelRenderer::BeginDrawModel(AActor *actor, FSpriteModelFrame *smf, con if (!(actor->RenderStyle == LegacyRenderStyles[STYLE_Normal]) && !(smf->flags & MDL_DONTCULLBACKFACES)) { glEnable(GL_CULL_FACE); - glFrontFace((mirrored ^ GLPortal::isMirrored()) ? GL_CCW : GL_CW); + glFrontFace(GL_CW); } gl_RenderState.mModelMatrix = objectToWorldMatrix; @@ -81,7 +81,7 @@ void FGLModelRenderer::EndDrawModel(AActor *actor, FSpriteModelFrame *smf) glDisable(GL_CULL_FACE); } -void FGLModelRenderer::BeginDrawHUDModel(AActor *actor, const VSMatrix &objectToWorldMatrix, bool mirrored) +void FGLModelRenderer::BeginDrawHUDModel(AActor *actor, const VSMatrix &objectToWorldMatrix) { glDepthFunc(GL_LEQUAL); @@ -91,7 +91,7 @@ void FGLModelRenderer::BeginDrawHUDModel(AActor *actor, const VSMatrix &objectTo if (!(actor->RenderStyle == LegacyRenderStyles[STYLE_Normal])) { glEnable(GL_CULL_FACE); - glFrontFace((mirrored ^ GLPortal::isMirrored()) ? GL_CW : GL_CCW); + glFrontFace(GLPortal::isMirrored()? GL_CW : GL_CCW); } gl_RenderState.mModelMatrix = objectToWorldMatrix; diff --git a/src/gl/models/gl_models.h b/src/gl/models/gl_models.h index 0fb82f9f4..821705c34 100644 --- a/src/gl/models/gl_models.h +++ b/src/gl/models/gl_models.h @@ -37,13 +37,13 @@ public: FGLModelRenderer(int mli) : modellightindex(mli) {} ModelRendererType GetType() const override { return GLModelRendererType; } - void BeginDrawModel(AActor *actor, FSpriteModelFrame *smf, const VSMatrix &objectToWorldMatrix, bool mirrored) override; + void BeginDrawModel(AActor *actor, FSpriteModelFrame *smf, const VSMatrix &objectToWorldMatrix) override; void EndDrawModel(AActor *actor, FSpriteModelFrame *smf) override; IModelVertexBuffer *CreateVertexBuffer(bool needindex, bool singleframe) override; void SetVertexBuffer(IModelVertexBuffer *buffer) override; void ResetVertexBuffer() override; VSMatrix GetViewToWorldMatrix() override; - void BeginDrawHUDModel(AActor *actor, const VSMatrix &objectToWorldMatrix, bool mirrored) override; + void BeginDrawHUDModel(AActor *actor, const VSMatrix &objectToWorldMatrix) override; void EndDrawHUDModel(AActor *actor) override; void SetInterpolation(double interpolation) override; void SetMaterial(FTexture *skin, bool clampNoFilter, int translation) override; diff --git a/src/gl/renderer/gl_renderer.cpp b/src/gl/renderer/gl_renderer.cpp index c753e261e..154fae2d6 100644 --- a/src/gl/renderer/gl_renderer.cpp +++ b/src/gl/renderer/gl_renderer.cpp @@ -35,7 +35,6 @@ #include "p_effect.h" #include "d_player.h" #include "a_dynlight.h" -#include "g_game.h" #include "swrenderer/r_swscene.h" #include "hwrenderer/utility/hw_clock.h" @@ -97,8 +96,6 @@ FGLRenderer::FGLRenderer(OpenGLFrameBuffer *fb) mLights = nullptr; mTonemapPalette = nullptr; mBuffers = nullptr; - mScreenBuffers = nullptr; - mSaveBuffers = nullptr; mPresentShader = nullptr; mPresent3dCheckerShader = nullptr; mPresent3dColumnShader = nullptr; @@ -125,9 +122,7 @@ FGLRenderer::FGLRenderer(OpenGLFrameBuffer *fb) void FGLRenderer::Initialize(int width, int height) { - mScreenBuffers = new FGLRenderBuffers(); - mSaveBuffers = new FGLRenderBuffers(); - mBuffers = mScreenBuffers; + mBuffers = new FGLRenderBuffers(); mLinearDepthShader = new FLinearDepthShader(); mDepthBlurShader = new FDepthBlurShader(); mSSAOShader = new FSSAOShader(); @@ -405,9 +400,7 @@ void FGLRenderer::WriteSavePic(player_t *player, FileWriter *file, int width, in void FGLRenderer::BeginFrame() { - buffersActive = GLRenderer->mScreenBuffers->Setup(screen->mScreenViewport.width, screen->mScreenViewport.height, screen->mSceneViewport.width, screen->mSceneViewport.height); - if (buffersActive) - buffersActive = GLRenderer->mSaveBuffers->Setup(SAVEPICWIDTH, SAVEPICHEIGHT, SAVEPICWIDTH, SAVEPICHEIGHT); + buffersActive = GLRenderer->mBuffers->Setup(screen->mScreenViewport.width, screen->mScreenViewport.height, screen->mSceneViewport.width, screen->mSceneViewport.height); } //=========================================================================== diff --git a/src/gl/renderer/gl_renderer.h b/src/gl/renderer/gl_renderer.h index 22b77acef..7cdf3b01d 100644 --- a/src/gl/renderer/gl_renderer.h +++ b/src/gl/renderer/gl_renderer.h @@ -100,8 +100,6 @@ public: int mOldFBID; FGLRenderBuffers *mBuffers; - FGLRenderBuffers *mScreenBuffers; - FGLRenderBuffers *mSaveBuffers; FLinearDepthShader *mLinearDepthShader; FSSAOShader *mSSAOShader; FDepthBlurShader *mDepthBlurShader; diff --git a/src/gl/scene/gl_scene.cpp b/src/gl/scene/gl_scene.cpp index d4aea2768..91d4e4235 100644 --- a/src/gl/scene/gl_scene.cpp +++ b/src/gl/scene/gl_scene.cpp @@ -395,6 +395,7 @@ void GLSceneDrawer::RenderTranslucent(FDrawInfo *di) { RenderAll.Clock(); + glDepthMask(false); gl_RenderState.SetCameraPos(r_viewpoint.Pos.X, r_viewpoint.Pos.Y, r_viewpoint.Pos.Z); // final pass: translucent stuff @@ -403,7 +404,6 @@ void GLSceneDrawer::RenderTranslucent(FDrawInfo *di) gl_RenderState.EnableBrightmap(true); di->drawlists[GLDL_TRANSLUCENTBORDER].Draw(di, GLPASS_TRANSLUCENT); - glDepthMask(false); di->DrawSorted(GLDL_TRANSLUCENT); gl_RenderState.EnableBrightmap(false); @@ -656,7 +656,7 @@ sector_t * GLSceneDrawer::RenderViewpoint (AActor * camera, IntRect * bounds, fl SetFixedColormap(camera->player); // reiterate color map for each eye, so night vision goggles work in both eyes const s3d::EyePose * eye = stereo3dMode.getEyePose(eye_ix); eye->SetUp(); - screen->SetViewportRects(bounds); + screen->SetOutputViewport(bounds); Set3DViewport(mainview); GLRenderer->mDrawingScene2D = true; GLRenderer->mCurrentFoV = fov; @@ -713,24 +713,20 @@ sector_t * GLSceneDrawer::RenderViewpoint (AActor * camera, IntRect * bounds, fl void GLSceneDrawer::WriteSavePic (player_t *player, FileWriter *file, int width, int height) { IntRect bounds; - bounds.left = 0; - bounds.top = 0; - bounds.width = width; - bounds.height = height; - - // if GLRenderer->mVBO is persistently mapped we must be sure the GPU finished reading from it before we fill it with new data. - glFinish(); - - // Switch to render buffers dimensioned for the savepic - GLRenderer->mBuffers = GLRenderer->mSaveBuffers; P_FindParticleSubsectors(); // make sure that all recently spawned particles have a valid subsector. + bounds.left=0; + bounds.top=0; + bounds.width=width; + bounds.height=height; + glFlush(); SetFixedColormap(player); gl_RenderState.SetVertexBuffer(GLRenderer->mVBO); GLRenderer->mVBO->Reset(); if (!gl.legacyMode) GLRenderer->mLights->Clear(); - sector_t *viewsector = RenderViewpoint(players[consoleplayer].camera, &bounds, r_viewpoint.FieldOfView.Degrees, 1.6f, 1.6f, true, false); + sector_t *viewsector = RenderViewpoint(players[consoleplayer].camera, &bounds, + r_viewpoint.FieldOfView.Degrees, 1.6f, 1.6f, true, false); glDisable(GL_STENCIL_TEST); gl_RenderState.SetFixedColormap(CM_DEFAULT); gl_RenderState.SetSoftLightLevel(-1); @@ -741,16 +737,12 @@ void GLSceneDrawer::WriteSavePic (player_t *player, FileWriter *file, int width, screen->Draw2D(); } GLRenderer->CopyToBackbuffer(&bounds, false); + glFlush(); - // strictly speaking not needed as the glReadPixels should block until the scene is rendered, but this is to safeguard against shitty drivers - glFinish(); + screen->SetOutputViewport(nullptr); uint8_t * scr = (uint8_t *)M_Malloc(width * height * 3); glReadPixels(0,0,width, height,GL_RGB,GL_UNSIGNED_BYTE,scr); M_CreatePNG (file, scr + ((height-1) * width * 3), NULL, SS_RGB, width, height, -width * 3, Gamma); M_Free(scr); - - // Switch back the screen render buffers - screen->SetViewportRects(nullptr); - GLRenderer->mBuffers = GLRenderer->mScreenBuffers; } diff --git a/src/gl/system/gl_framebuffer.cpp b/src/gl/system/gl_framebuffer.cpp index 02326afd1..81cbd7eeb 100644 --- a/src/gl/system/gl_framebuffer.cpp +++ b/src/gl/system/gl_framebuffer.cpp @@ -136,7 +136,7 @@ void OpenGLFrameBuffer::InitializeState() glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT); GLRenderer->Initialize(GetWidth(), GetHeight()); - SetViewportRects(nullptr); + SetOutputViewport(nullptr); } //========================================================================== @@ -164,11 +164,14 @@ void OpenGLFrameBuffer::Update() int clientHeight = ViewportScaledHeight(initialWidth, initialHeight); if (clientWidth > 0 && clientHeight > 0 && (Width != clientWidth || Height != clientHeight)) { + // Do not call Resize here because it's only for software canvases Width = clientWidth; Height = clientHeight; V_OutputResized(Width, Height); GLRenderer->mVBO->OutputResized(Width, Height); } + + SetOutputViewport(nullptr); } //=========================================================================== @@ -389,11 +392,10 @@ bool OpenGLFrameBuffer::RenderBuffersEnabled() return FGLRenderBuffers::IsEnabled(); } -void OpenGLFrameBuffer::SetViewportRects(IntRect *bounds) +void OpenGLFrameBuffer::SetOutputViewport(IntRect *bounds) { - Super::SetViewportRects(bounds); - if (!bounds) - s3d::Stereo3DMode::getCurrentMode().AdjustViewports(); + Super::SetOutputViewport(bounds); + s3d::Stereo3DMode::getCurrentMode().AdjustViewports(); } @@ -429,7 +431,6 @@ void OpenGLFrameBuffer::SetClearColor(int color) void OpenGLFrameBuffer::BeginFrame() { - SetViewportRects(nullptr); if (GLRenderer != nullptr) GLRenderer->BeginFrame(); } diff --git a/src/gl/system/gl_framebuffer.h b/src/gl/system/gl_framebuffer.h index 9a92c1f44..898820cc9 100644 --- a/src/gl/system/gl_framebuffer.h +++ b/src/gl/system/gl_framebuffer.h @@ -42,7 +42,7 @@ public: void ResetFixedColormap() override; void BeginFrame() override; bool RenderBuffersEnabled() override; - void SetViewportRects(IntRect *bounds) override; + void SetOutputViewport(IntRect *bounds) override; void BlurScene(float amount) override; // Retrieves a buffer containing image data for a screenshot. diff --git a/src/memarena.cpp b/src/memarena.cpp index 2c0336a5e..d19521edf 100644 --- a/src/memarena.cpp +++ b/src/memarena.cpp @@ -55,14 +55,13 @@ struct FMemArena::Block // // RoundPointer // -// Rounds a pointer up to the size of the largest integral type. +// Rounds a pointer up to a pointer-sized boundary. // //========================================================================== static inline void *RoundPointer(void *ptr) { - const auto roundsize = std::max(sizeof(void*), sizeof(double)); - return (void *)(((size_t)ptr + roundsize - 1) & ~(roundsize - 1)); + return (void *)(((size_t)ptr + sizeof(void*) - 1) & ~(sizeof(void*) - 1)); } //========================================================================== diff --git a/src/p_actionfunctions.cpp b/src/p_actionfunctions.cpp index a7a12f9f1..2af237b8a 100644 --- a/src/p_actionfunctions.cpp +++ b/src/p_actionfunctions.cpp @@ -1997,7 +1997,7 @@ DEFINE_ACTION_FUNCTION(AStateProvider, A_CustomPunch) angle = self->Angles.Yaw + pr_cwpunch.Random2() * (5.625 / 256); if (range == 0) range = DEFMELEERANGE; - pitch = P_AimLineAttack (self, angle, range, &t, 0., ALF_CHECK3D); + pitch = P_AimLineAttack (self, angle, range, &t); // only use ammo when actually hitting something! if ((flags & CPF_USEAMMO) && t.linetarget && weapon && ACTION_CALL_FROM_PSPRITE()) diff --git a/src/p_map.cpp b/src/p_map.cpp index 0e4f61df8..89eae7859 100644 --- a/src/p_map.cpp +++ b/src/p_map.cpp @@ -930,7 +930,7 @@ bool PIT_CheckLine(FMultiBlockLinesIterator &mit, FMultiBlockLinesIterator::Chec bool NotBlocked = ((tm.thing->flags3 & MF3_NOBLOCKMONST) || ((i_compatflags & COMPATF_NOBLOCKFRIENDS) && (tm.thing->flags & MF_FRIENDLY))); - if (!(Projectile) || (ld->flags & (ML_BLOCKEVERYTHING | ML_BLOCKPROJECTILE | ML_BLOCK_PLAYERS))) + if (!(Projectile) || (ld->flags & (ML_BLOCKEVERYTHING | ML_BLOCKPROJECTILE))) { if (ld->flags & ML_RAILING) { @@ -938,7 +938,7 @@ bool PIT_CheckLine(FMultiBlockLinesIterator &mit, FMultiBlockLinesIterator::Chec } else if ((ld->flags & (ML_BLOCKING | ML_BLOCKEVERYTHING)) || // explicitly blocking everything (!(NotBlocked) && (ld->flags & ML_BLOCKMONSTERS)) || // block monsters only - ((tm.thing->player != NULL) || (tm.thing->flags8 & MF8_BLOCKASPLAYER) && (ld->flags & ML_BLOCK_PLAYERS)) || // block players + (tm.thing->player != NULL && (ld->flags & ML_BLOCK_PLAYERS)) || // block players ((Projectile) && (ld->flags & ML_BLOCKPROJECTILE)) || // block projectiles ((tm.thing->flags & MF_FLOAT) && (ld->flags & ML_BLOCK_FLOATERS))) // block floaters { @@ -3100,7 +3100,7 @@ void FSlide::SlideTraverse(const DVector2 &start, const DVector2 &end) { goto isblocking; } - if (li->flags & ML_BLOCK_PLAYERS && ((slidemo->player != NULL) || (slidemo->flags8 & MF8_BLOCKASPLAYER))) + if (li->flags & ML_BLOCK_PLAYERS && slidemo->player != NULL) { goto isblocking; } @@ -3998,70 +3998,6 @@ struct aim_t SetResult(thing_other, newtrace.thing_other); } - //============================================================================ - // - // Finds where the trace exits an actor to check for hits from above/below - // - //============================================================================ - - double ExitPoint(AActor *thing) - { - // The added check at the exit point only has some value if a 3D distance check is involved - if (!(flags & ALF_CHECK3D)) return -1; - - divline_t trace = { startpos.X, startpos.Y, aimtrace.X, aimtrace.Y }; - divline_t line; - - for (int i = 0; i < 4; ++i) - { - switch (i) - { - case 0: // Top edge - line.y = thing->Y() + thing->radius; - if (trace.y > line.y) continue; - line.x = thing->X() + thing->radius; - line.dx = -thing->radius * 2; - line.dy = 0; - break; - - case 1: // Right edge - line.x = thing->X() + thing->radius; - if (trace.x > line.x) continue; - line.y = thing->Y() - thing->radius; - line.dx = 0; - line.dy = thing->radius * 2; - break; - - case 2: // Bottom edge - line.y = thing->Y() - thing->radius; - if (trace.y < line.y) continue; - line.x = thing->X() - thing->radius; - line.dx = thing->radius * 2; - line.dy = 0; - break; - - case 3: // Left edge - line.x = thing->X() - thing->radius; - if (trace.x < line.x) continue; - line.y = thing->Y() + thing->radius; - line.dx = 0; - line.dy = thing->radius * -2; - break; - } - - // If it is, see if the trace crosses it - if (P_PointOnDivlineSide(line.x, line.y, &trace) != - P_PointOnDivlineSide(line.x + line.dx, line.y + line.dy, &trace)) - { - // It's a hit - double frac = P_InterceptVector(&trace, &line); - if (frac > 1.) frac = 1.; - return frac; - } - } - - return -1.; - } //============================================================================ // @@ -4111,7 +4047,9 @@ struct aim_t intercept_t *in; if (aimdebug) - Printf("Start AimTraverse, start = %f,%f,%f, vect = %f,%f\n", startpos.X, startpos.Y, startpos.Z, aimtrace.X, aimtrace.Y); + Printf("Start AimTraverse, start = %f,%f,%f, vect = %f,%f\n", + startpos.X / 65536., startpos.Y / 65536., startpos.Z / 65536., + aimtrace.X / 65536., aimtrace.Y / 65536.); while ((in = it.Next())) { @@ -4257,38 +4195,12 @@ struct aim_t thingtoppitch = -VecToAngle(dist, th->Top() - shootz); if (thingtoppitch > bottompitch) - { - // Check for a hit from above - if (shootz > th->Top()) - { - double exitfrac = ExitPoint(th); - if (exitfrac > 0.) - { - double exitdist = attackrange * exitfrac; - thingtoppitch = -VecToAngle(exitdist, th->Top() - shootz); - if (thingtoppitch > bottompitch) continue; - } - } - else continue; // shot over the thing - } + continue; // shot over the thing thingbottompitch = -VecToAngle(dist, th->Z() - shootz); if (thingbottompitch < toppitch) - { - // Check for a hit from below - if (shootz < th->Z()) - { - double exitfrac = ExitPoint(th); - if (exitfrac > 0.) - { - double exitdist = attackrange * exitfrac; - thingbottompitch = -VecToAngle(exitdist, th->Z() - shootz); - if (thingbottompitch < toppitch) continue; - } - } continue; // shot under the thing - } if (crossedffloors) { diff --git a/src/p_mobj.cpp b/src/p_mobj.cpp index 2f8b89a84..a00cb7f10 100644 --- a/src/p_mobj.cpp +++ b/src/p_mobj.cpp @@ -6504,6 +6504,9 @@ DEFINE_ACTION_FUNCTION(AActor, GetFloorTerrain) bool P_HitWater (AActor * thing, sector_t * sec, const DVector3 &pos, bool checkabove, bool alert, bool force) { + if (thing->flags3 & MF3_DONTSPLASH) + return false; + if (thing->player && (thing->player->cheats & CF_PREDICTING)) return false; @@ -6592,51 +6595,48 @@ foundone: if (thing->Mass < 10) smallsplash = true; - if (!(thing->flags3 & MF3_DONTSPLASH)) + if (smallsplash && splash->SmallSplash) { - if (smallsplash && splash->SmallSplash) + mo = Spawn (splash->SmallSplash, pos, ALLOW_REPLACE); + if (mo) mo->Floorclip += splash->SmallSplashClip; + } + else + { + if (splash->SplashChunk) { - mo = Spawn(splash->SmallSplash, pos, ALLOW_REPLACE); - if (mo) mo->Floorclip += splash->SmallSplashClip; - } - else - { - if (splash->SplashChunk) + mo = Spawn (splash->SplashChunk, pos, ALLOW_REPLACE); + mo->target = thing; + if (splash->ChunkXVelShift != 255) { - mo = Spawn(splash->SplashChunk, pos, ALLOW_REPLACE); - mo->target = thing; - if (splash->ChunkXVelShift != 255) - { - mo->Vel.X = (pr_chunk.Random2() << splash->ChunkXVelShift) / 65536.; - } - if (splash->ChunkYVelShift != 255) - { - mo->Vel.Y = (pr_chunk.Random2() << splash->ChunkYVelShift) / 65536.; - } - mo->Vel.Z = splash->ChunkBaseZVel + (pr_chunk() << splash->ChunkZVelShift) / 65536.; + mo->Vel.X = (pr_chunk.Random2() << splash->ChunkXVelShift) / 65536.; } - if (splash->SplashBase) + if (splash->ChunkYVelShift != 255) { - mo = Spawn(splash->SplashBase, pos, ALLOW_REPLACE); - } - if (thing->player && !splash->NoAlert && alert) - { - P_NoiseAlert(thing, thing, true); + mo->Vel.Y = (pr_chunk.Random2() << splash->ChunkYVelShift) / 65536.; } + mo->Vel.Z = splash->ChunkBaseZVel + (pr_chunk() << splash->ChunkZVelShift) / 65536.; } - if (mo) + if (splash->SplashBase) { - S_Sound(mo, CHAN_ITEM, smallsplash ? - splash->SmallSplashSound : splash->NormalSplashSound, - 1, ATTN_IDLE); + mo = Spawn (splash->SplashBase, pos, ALLOW_REPLACE); } - else + if (thing->player && !splash->NoAlert && alert) { - S_Sound(pos, CHAN_ITEM, smallsplash ? - splash->SmallSplashSound : splash->NormalSplashSound, - 1, ATTN_IDLE); + P_NoiseAlert (thing, thing, true); } } + if (mo) + { + S_Sound (mo, CHAN_ITEM, smallsplash ? + splash->SmallSplashSound : splash->NormalSplashSound, + 1, ATTN_IDLE); + } + else + { + S_Sound (pos, CHAN_ITEM, smallsplash ? + splash->SmallSplashSound : splash->NormalSplashSound, + 1, ATTN_IDLE); + } // Don't let deep water eat missiles return plane == &sec->floorplane ? Terrains[terrainnum].IsLiquid : false; @@ -6676,6 +6676,9 @@ bool P_HitFloor (AActor *thing) return false; } + if (thing->flags3 & MF3_DONTSPLASH) + return false; + // don't splash if landing on the edge above water/lava/etc.... DVector3 pos; for (m = thing->touching_sectorlist; m; m = m->m_tnext) diff --git a/src/po_man.cpp b/src/po_man.cpp index ca12120de..76355d4db 100644 --- a/src/po_man.cpp +++ b/src/po_man.cpp @@ -1157,7 +1157,7 @@ bool FPolyObj::CheckMobjBlocking (side_t *sd) // [TN] Check wether this actor gets blocked by the line. if (ld->backsector != NULL && !(ld->flags & (ML_BLOCKING|ML_BLOCKEVERYTHING)) - && !(ld->flags & ML_BLOCK_PLAYERS && (mobj->player || (mobj->flags8 & MF8_BLOCKASPLAYER))) + && !(ld->flags & ML_BLOCK_PLAYERS && mobj->player) && !(ld->flags & ML_BLOCKMONSTERS && mobj->flags3 & MF3_ISMONSTER) && !((mobj->flags & MF_FLOAT) && (ld->flags & ML_BLOCK_FLOATERS)) && (!(ld->flags & ML_3DMIDTEX) || diff --git a/src/polyrenderer/drawers/poly_buffer.cpp b/src/polyrenderer/drawers/poly_buffer.cpp index 2506bce7e..0e24509cc 100644 --- a/src/polyrenderer/drawers/poly_buffer.cpp +++ b/src/polyrenderer/drawers/poly_buffer.cpp @@ -48,7 +48,8 @@ void PolyZBuffer::Resize(int newwidth, int newheight) { width = newwidth; height = newheight; - values.resize(width * height); + int count = BlockWidth() * BlockHeight(); + values.resize(count * 64); } ///////////////////////////////////////////////////////////////////////////// @@ -63,6 +64,14 @@ void PolyStencilBuffer::Clear(int newwidth, int newheight, uint8_t stencil_value { width = newwidth; height = newheight; - values.resize(width * height); - memset(Values(), stencil_value, width * height); + int count = BlockWidth() * BlockHeight(); + values.resize(count * 64); + masks.resize(count); + + uint8_t *v = Values(); + uint32_t *m = Masks(); + for (int i = 0; i < count; i++) + { + m[i] = 0xffffff00 | stencil_value; + } } diff --git a/src/polyrenderer/drawers/poly_buffer.h b/src/polyrenderer/drawers/poly_buffer.h index af4a78b3a..df79c6e71 100644 --- a/src/polyrenderer/drawers/poly_buffer.h +++ b/src/polyrenderer/drawers/poly_buffer.h @@ -33,6 +33,8 @@ public: void Resize(int newwidth, int newheight); int Width() const { return width; } int Height() const { return height; } + int BlockWidth() const { return (width + 7) / 8; } + int BlockHeight() const { return (height + 7) / 8; } float *Values() { return values.data(); } private: @@ -48,10 +50,16 @@ public: void Clear(int newwidth, int newheight, uint8_t stencil_value = 0); int Width() const { return width; } int Height() const { return height; } + int BlockWidth() const { return (width + 7) / 8; } + int BlockHeight() const { return (height + 7) / 8; } uint8_t *Values() { return values.data(); } + uint32_t *Masks() { return masks.data(); } private: int width; int height; + + // 8x8 blocks of stencil values, plus a mask for each block indicating if values are the same for early out stencil testing std::vector values; + std::vector masks; }; diff --git a/src/polyrenderer/drawers/poly_draw_args.cpp b/src/polyrenderer/drawers/poly_draw_args.cpp index 3e02940cd..8aa79f70b 100644 --- a/src/polyrenderer/drawers/poly_draw_args.cpp +++ b/src/polyrenderer/drawers/poly_draw_args.cpp @@ -226,7 +226,6 @@ void RectDrawArgs::SetTexture(FTexture *texture, FRenderStyle style) void RectDrawArgs::SetTexture(FTexture *texture, uint32_t translationID, FRenderStyle style) { - // Alphatexture overrides translations. if (translationID != 0xffffffff && translationID != 0 && !(style.Flags & STYLEF_RedIsAlpha)) { FRemapTable *table = TranslationToTable(translationID); @@ -300,63 +299,61 @@ void RectDrawArgs::Draw(PolyRenderThread *thread, double x0, double x1, double y thread->DrawQueue->Push(*this); } -void RectDrawArgs::SetStyle(const FRenderStyle &renderstyle, double alpha, uint32_t fillcolor, uint32_t translationID, FTexture *tex, bool fullbright) +void RectDrawArgs::SetStyle(FRenderStyle renderstyle, double alpha, uint32_t fillcolor, uint32_t translationID, FTexture *tex, bool fullbright) { SetTexture(tex, translationID, renderstyle); - SetColor(0xff000000 | fillcolor, fillcolor >> 24); if (renderstyle == LegacyRenderStyles[STYLE_Normal] || (r_drawfuzz == 0 && renderstyle == LegacyRenderStyles[STYLE_OptFuzzy])) { - SetStyle(Translation() ? TriBlendMode::NormalTranslated : TriBlendMode::Normal, alpha); + SetStyle(Translation() ? RectBlendMode::TranslatedAdd : RectBlendMode::TextureAdd, 1.0, 0.0); } else if (renderstyle == LegacyRenderStyles[STYLE_Add] && fullbright && alpha == 1.0 && !Translation()) { - SetStyle(TriBlendMode::SrcColor, alpha); + SetStyle(RectBlendMode::TextureAddSrcColor, 1.0, 1.0); + } + else if (renderstyle == LegacyRenderStyles[STYLE_Add]) + { + SetStyle(Translation() ? RectBlendMode::TranslatedAdd : RectBlendMode::TextureAdd, alpha, 1.0); + } + else if (renderstyle == LegacyRenderStyles[STYLE_Subtract]) + { + SetStyle(Translation() ? RectBlendMode::TranslatedRevSub : RectBlendMode::TextureRevSub, alpha, 1.0); } else if (renderstyle == LegacyRenderStyles[STYLE_SoulTrans]) { - SetStyle(Translation() ? TriBlendMode::AddTranslated : TriBlendMode::Add, transsouls); + SetStyle(Translation() ? RectBlendMode::TranslatedAdd : RectBlendMode::TextureAdd, transsouls, 1.0 - transsouls); } else if (renderstyle == LegacyRenderStyles[STYLE_Fuzzy] || (r_drawfuzz == 1 && renderstyle == LegacyRenderStyles[STYLE_OptFuzzy])) { SetColor(0xff000000, 0); - SetStyle(TriBlendMode::Fuzzy); + SetStyle(RectBlendMode::Fuzz); } else if (renderstyle == LegacyRenderStyles[STYLE_Shadow] || (r_drawfuzz == 2 && renderstyle == LegacyRenderStyles[STYLE_OptFuzzy])) { - SetColor(0xff000000, 0); - SetStyle(Translation() ? TriBlendMode::TranslucentStencilTranslated : TriBlendMode::TranslucentStencil, 1.0 - 160 / 255.0); - } - else if (renderstyle == LegacyRenderStyles[STYLE_Stencil]) - { - SetStyle(Translation() ? TriBlendMode::StencilTranslated : TriBlendMode::Stencil, alpha); - } - else if (renderstyle == LegacyRenderStyles[STYLE_Translucent]) - { - SetStyle(Translation() ? TriBlendMode::TranslucentTranslated : TriBlendMode::Translucent, alpha); - } - else if (renderstyle == LegacyRenderStyles[STYLE_Add]) - { - SetStyle(Translation() ? TriBlendMode::AddTranslated : TriBlendMode::Add, alpha); - } - else if (renderstyle == LegacyRenderStyles[STYLE_Shaded]) - { - SetStyle(Translation() ? TriBlendMode::ShadedTranslated : TriBlendMode::Shaded, alpha); + SetStyle(Translation() ? RectBlendMode::TranslatedAdd : RectBlendMode::TextureAdd, 0.0, 160 / 255.0); } else if (renderstyle == LegacyRenderStyles[STYLE_TranslucentStencil]) { - SetStyle(Translation() ? TriBlendMode::TranslucentStencilTranslated : TriBlendMode::TranslucentStencil, alpha); - } - else if (renderstyle == LegacyRenderStyles[STYLE_Subtract]) - { - SetStyle(Translation() ? TriBlendMode::SubtractTranslated : TriBlendMode::Subtract, alpha); + SetColor(0xff000000 | fillcolor, fillcolor >> 24); + SetStyle(RectBlendMode::Stencil, alpha, 1.0 - alpha); } else if (renderstyle == LegacyRenderStyles[STYLE_AddStencil]) { - SetStyle(Translation() ? TriBlendMode::AddStencilTranslated : TriBlendMode::AddStencil, alpha); + SetColor(0xff000000 | fillcolor, fillcolor >> 24); + SetStyle(RectBlendMode::AddStencil, alpha, 1.0); + } + else if (renderstyle == LegacyRenderStyles[STYLE_Shaded]) + { + SetColor(0xff000000 | fillcolor, fillcolor >> 24); + SetStyle(RectBlendMode::Shaded, alpha, 1.0 - alpha); } else if (renderstyle == LegacyRenderStyles[STYLE_AddShaded]) { - SetStyle(Translation() ? TriBlendMode::AddShadedTranslated : TriBlendMode::AddShaded, alpha); + SetColor(0xff000000 | fillcolor, fillcolor >> 24); + SetStyle(RectBlendMode::AddShaded, alpha, 1.0); + } + else + { + SetStyle(Translation() ? RectBlendMode::TranslatedAdd : RectBlendMode::TextureAdd, alpha, 1.0 - alpha); } } diff --git a/src/polyrenderer/drawers/poly_draw_args.h b/src/polyrenderer/drawers/poly_draw_args.h index 2d43ae64f..88f174525 100644 --- a/src/polyrenderer/drawers/poly_draw_args.h +++ b/src/polyrenderer/drawers/poly_draw_args.h @@ -180,8 +180,8 @@ public: void SetTexture(FTexture *texture, FRenderStyle style); void SetTexture(FTexture *texture, uint32_t translationID, FRenderStyle style); void SetLight(FSWColormap *basecolormap, uint32_t lightlevel); - void SetStyle(TriBlendMode blendmode, double alpha = 1.0) { mBlendMode = blendmode; mAlpha = (uint32_t)(alpha * 256.0 + 0.5); } - void SetStyle(const FRenderStyle &renderstyle, double alpha, uint32_t fillcolor, uint32_t translationID, FTexture *texture, bool fullbright); + void SetStyle(RectBlendMode blendmode, double srcalpha = 1.0, double destalpha = 1.0) { mBlendMode = blendmode; mSrcAlpha = (uint32_t)(srcalpha * 256.0 + 0.5); mDestAlpha = (uint32_t)(destalpha * 256.0 + 0.5); } + void SetStyle(FRenderStyle renderstyle, double alpha, uint32_t fillcolor, uint32_t translationID, FTexture *texture, bool fullbright); void SetColor(uint32_t bgra, uint8_t palindex); void Draw(PolyRenderThread *thread, double x0, double x1, double y0, double y1, double u0, double u1, double v0, double v1); @@ -191,9 +191,10 @@ public: int TextureHeight() const { return mTextureHeight; } const uint8_t *Translation() const { return mTranslation; } - TriBlendMode BlendMode() const { return mBlendMode; } + RectBlendMode BlendMode() const { return mBlendMode; } uint32_t Color() const { return mColor; } - uint32_t Alpha() const { return mAlpha; } + uint32_t SrcAlpha() const { return mSrcAlpha; } + uint32_t DestAlpha() const { return mDestAlpha; } uint32_t Light() const { return mLight; } const uint8_t *BaseColormap() const { return mColormaps; } @@ -224,10 +225,11 @@ private: int mTextureHeight = 0; const uint8_t *mTranslation = nullptr; const uint8_t *mColormaps = nullptr; - TriBlendMode mBlendMode = TriBlendMode::Fill; + RectBlendMode mBlendMode = RectBlendMode::FillOpaque; uint32_t mLight = 0; uint32_t mColor = 0; - uint32_t mAlpha = 0; + uint32_t mSrcAlpha = 0; + uint32_t mDestAlpha = 0; uint16_t mLightAlpha = 0; uint16_t mLightRed = 0; uint16_t mLightGreen = 0; diff --git a/src/polyrenderer/drawers/poly_drawer32.h b/src/polyrenderer/drawers/poly_drawer32.h new file mode 100644 index 000000000..4dae396de --- /dev/null +++ b/src/polyrenderer/drawers/poly_drawer32.h @@ -0,0 +1,476 @@ +/* +** Polygon Doom software renderer +** Copyright (c) 2016 Magnus Norddahl +** +** This software is provided 'as-is', without any express or implied +** warranty. In no event will the authors be held liable for any damages +** arising from the use of this software. +** +** Permission is granted to anyone to use this software for any purpose, +** including commercial applications, and to alter it and redistribute it +** freely, subject to the following restrictions: +** +** 1. The origin of this software must not be misrepresented; you must not +** claim that you wrote the original software. If you use this software +** in a product, an acknowledgment in the product documentation would be +** appreciated but is not required. +** 2. Altered source versions must be plainly marked as such, and must not be +** misrepresented as being the original software. +** 3. This notice may not be removed or altered from any source distribution. +** +*/ + +#pragma once + +#include "screen_triangle.h" + +namespace TriScreenDrawerModes +{ + namespace + { + struct BgraColor + { + uint32_t b, g, r, a; + BgraColor() { } + BgraColor(uint32_t c) : b(BPART(c)), g(GPART(c)), r(RPART(c)), a(APART(c)) { } + BgraColor &operator=(uint32_t c) { b = BPART(c); g = GPART(c); r = RPART(c); a = APART(c); return *this; } + operator uint32_t() const { return MAKEARGB(a, r, g, b); } + }; + } + + template + FORCEINLINE unsigned int Sample32(int32_t u, int32_t v, const uint32_t *texPixels, int texWidth, int texHeight, uint32_t oneU, uint32_t oneV, uint32_t color, const uint32_t *translation) + { + uint32_t texel; + if (SamplerT::Mode == (int)Samplers::Shaded || SamplerT::Mode == (int)Samplers::Stencil || SamplerT::Mode == (int)Samplers::Fill || SamplerT::Mode == (int)Samplers::Fuzz || SamplerT::Mode == (int)Samplers::FogBoundary) + { + return color; + } + else if (SamplerT::Mode == (int)Samplers::Translated) + { + const uint8_t *texpal = (const uint8_t *)texPixels; + uint32_t texelX = ((((uint32_t)u << 8) >> 16) * texWidth) >> 16; + uint32_t texelY = ((((uint32_t)v << 8) >> 16) * texHeight) >> 16; + return translation[texpal[texelX * texHeight + texelY]]; + } + else if (FilterModeT::Mode == (int)FilterModes::Nearest) + { + uint32_t texelX = ((((uint32_t)u << 8) >> 16) * texWidth) >> 16; + uint32_t texelY = ((((uint32_t)v << 8) >> 16) * texHeight) >> 16; + texel = texPixels[texelX * texHeight + texelY]; + } + else + { + u -= oneU >> 1; + v -= oneV >> 1; + + unsigned int frac_x0 = (((uint32_t)u << 8) >> FRACBITS) * texWidth; + unsigned int frac_x1 = ((((uint32_t)u << 8) + oneU) >> FRACBITS) * texWidth; + unsigned int frac_y0 = (((uint32_t)v << 8) >> FRACBITS) * texHeight; + unsigned int frac_y1 = ((((uint32_t)v << 8) + oneV) >> FRACBITS) * texHeight; + unsigned int x0 = frac_x0 >> FRACBITS; + unsigned int x1 = frac_x1 >> FRACBITS; + unsigned int y0 = frac_y0 >> FRACBITS; + unsigned int y1 = frac_y1 >> FRACBITS; + + unsigned int p00 = texPixels[x0 * texHeight + y0]; + unsigned int p01 = texPixels[x0 * texHeight + y1]; + unsigned int p10 = texPixels[x1 * texHeight + y0]; + unsigned int p11 = texPixels[x1 * texHeight + y1]; + + unsigned int inv_a = (frac_x1 >> (FRACBITS - 4)) & 15; + unsigned int inv_b = (frac_y1 >> (FRACBITS - 4)) & 15; + unsigned int a = 16 - inv_a; + unsigned int b = 16 - inv_b; + + unsigned int sred = (RPART(p00) * (a * b) + RPART(p01) * (inv_a * b) + RPART(p10) * (a * inv_b) + RPART(p11) * (inv_a * inv_b) + 127) >> 8; + unsigned int sgreen = (GPART(p00) * (a * b) + GPART(p01) * (inv_a * b) + GPART(p10) * (a * inv_b) + GPART(p11) * (inv_a * inv_b) + 127) >> 8; + unsigned int sblue = (BPART(p00) * (a * b) + BPART(p01) * (inv_a * b) + BPART(p10) * (a * inv_b) + BPART(p11) * (inv_a * inv_b) + 127) >> 8; + unsigned int salpha = (APART(p00) * (a * b) + APART(p01) * (inv_a * b) + APART(p10) * (a * inv_b) + APART(p11) * (inv_a * inv_b) + 127) >> 8; + + texel = (salpha << 24) | (sred << 16) | (sgreen << 8) | sblue; + } + + if (SamplerT::Mode == (int)Samplers::Skycap) + { + int start_fade = 2; // How fast it should fade out + + int alpha_top = clamp(v >> (16 - start_fade), 0, 256); + int alpha_bottom = clamp(((2 << 24) - v) >> (16 - start_fade), 0, 256); + int a = MIN(alpha_top, alpha_bottom); + int inv_a = 256 - a; + + uint32_t r = RPART(texel); + uint32_t g = GPART(texel); + uint32_t b = BPART(texel); + uint32_t fg_a = APART(texel); + uint32_t bg_red = RPART(color); + uint32_t bg_green = GPART(color); + uint32_t bg_blue = BPART(color); + r = (r * a + bg_red * inv_a + 127) >> 8; + g = (g * a + bg_green * inv_a + 127) >> 8; + b = (b * a + bg_blue * inv_a + 127) >> 8; + return MAKEARGB(fg_a, r, g, b); + } + else + { + return texel; + } + } + + template + FORCEINLINE unsigned int SampleShade32(int32_t u, int32_t v, const uint32_t *texPixels, int texWidth, int texHeight, int x, int y) + { + if (SamplerT::Mode == (int)Samplers::Shaded) + { + const uint8_t *texpal = (const uint8_t *)texPixels; + uint32_t texelX = ((((uint32_t)u << 8) >> 16) * texWidth) >> 16; + uint32_t texelY = ((((uint32_t)v << 8) >> 16) * texHeight) >> 16; + unsigned int sampleshadeout = texpal[texelX * texHeight + texelY]; + sampleshadeout += sampleshadeout >> 7; // 255 -> 256 + return sampleshadeout; + } + else if (SamplerT::Mode == (int)Samplers::Stencil) + { + uint32_t texelX = ((((uint32_t)u << 8) >> 16) * texWidth) >> 16; + uint32_t texelY = ((((uint32_t)v << 8) >> 16) * texHeight) >> 16; + unsigned int sampleshadeout = APART(texPixels[texelX * texHeight + texelY]); + sampleshadeout += sampleshadeout >> 7; // 255 -> 256 + return sampleshadeout; + } + else if (SamplerT::Mode == (int)Samplers::Fuzz) + { + using namespace swrenderer; + + uint32_t texelX = ((((uint32_t)u << 8) >> 16) * texWidth) >> 16; + uint32_t texelY = ((((uint32_t)v << 8) >> 16) * texHeight) >> 16; + unsigned int sampleshadeout = APART(texPixels[texelX * texHeight + texelY]); + sampleshadeout += sampleshadeout >> 7; // 255 -> 256 + + fixed_t fuzzscale = (200 << FRACBITS) / viewheight; + + int scaled_x = (x * fuzzscale) >> FRACBITS; + int fuzz_x = fuzz_random_x_offset[scaled_x % FUZZ_RANDOM_X_SIZE] + fuzzpos; + + fixed_t fuzzcount = FUZZTABLE << FRACBITS; + fixed_t fuzz = ((fuzz_x << FRACBITS) + y * fuzzscale) % fuzzcount; + unsigned int alpha = fuzzoffset[fuzz >> FRACBITS]; + + sampleshadeout = (sampleshadeout * alpha) >> 5; + return sampleshadeout; + } + else + { + return 0; + } + } + + FORCEINLINE BgraColor VECTORCALL AddLights(BgraColor material, BgraColor fgcolor, BgraColor dynlight) + { + fgcolor.r = MIN(fgcolor.r + ((material.r * dynlight.r) >> 8), (uint32_t)255); + fgcolor.g = MIN(fgcolor.g + ((material.g * dynlight.g) >> 8), (uint32_t)255); + fgcolor.b = MIN(fgcolor.b + ((material.b * dynlight.b) >> 8), (uint32_t)255); + return fgcolor; + } + + FORCEINLINE BgraColor VECTORCALL CalcDynamicLight(const PolyLight *lights, int num_lights, FVector3 worldpos, FVector3 worldnormal, uint32_t dynlightcolor) + { + BgraColor lit = dynlightcolor; + + for (int i = 0; i != num_lights; i++) + { + FVector3 lightpos = { lights[i].x, lights[i].y, lights[i].z }; + float light_radius = lights[i].radius; + + bool is_attenuated = light_radius < 0.0f; + if (is_attenuated) + light_radius = -light_radius; + + // L = light-pos + // dist = sqrt(dot(L, L)) + // distance_attenuation = 1 - MIN(dist * (1/radius), 1) + FVector3 L = lightpos - worldpos; + float dist2 = L | L; + float rcp_dist = 1.0f / sqrt(dist2); + float dist = dist2 * rcp_dist; + float distance_attenuation = 256.0f - MIN(dist * light_radius, 256.0f); + + // The simple light type + float simple_attenuation = distance_attenuation; + + // The point light type + // diffuse = max(dot(N,normalize(L)),0) * attenuation + float dotNL = worldnormal | (L * rcp_dist); + float point_attenuation = MAX(dotNL, 0.0f) * distance_attenuation; + + uint32_t attenuation = (uint32_t)(is_attenuated ? (int32_t)point_attenuation : (int32_t)simple_attenuation); + + BgraColor light_color = lights[i].color; + lit.r += (light_color.r * attenuation) >> 8; + lit.g += (light_color.g * attenuation) >> 8; + lit.b += (light_color.b * attenuation) >> 8; + } + + lit.r = MIN(lit.r, (uint32_t)256); + lit.g = MIN(lit.g, (uint32_t)256); + lit.b = MIN(lit.b, (uint32_t)256); + return lit; + } + + template + FORCEINLINE BgraColor Shade32(BgraColor fgcolor, BgraColor mlight, uint32_t desaturate, uint32_t inv_desaturate, BgraColor shade_fade, BgraColor shade_light, BgraColor dynlight) + { + BgraColor material = fgcolor; + if (ShadeModeT::Mode == (int)ShadeMode::Simple) + { + fgcolor.r = (fgcolor.r * mlight.r) >> 8; + fgcolor.g = (fgcolor.g * mlight.g) >> 8; + fgcolor.b = (fgcolor.b * mlight.b) >> 8; + } + else if (ShadeModeT::Mode == (int)ShadeMode::Advanced) + { + uint32_t intensity = ((fgcolor.r * 77 + fgcolor.g * 143 + fgcolor.b * 37) >> 8) * desaturate; + fgcolor.r = (((shade_fade.r + ((fgcolor.r * inv_desaturate + intensity) >> 8) * mlight.r) >> 8) * shade_light.r) >> 8; + fgcolor.g = (((shade_fade.g + ((fgcolor.g * inv_desaturate + intensity) >> 8) * mlight.g) >> 8) * shade_light.g) >> 8; + fgcolor.b = (((shade_fade.b + ((fgcolor.b * inv_desaturate + intensity) >> 8) * mlight.b) >> 8) * shade_light.b) >> 8; + } + return AddLights(material, fgcolor, dynlight); + } + + template + FORCEINLINE BgraColor Blend32(BgraColor fgcolor, BgraColor bgcolor, uint32_t ifgcolor, uint32_t ifgshade, uint32_t srcalpha, uint32_t destalpha) + { + if (BlendT::Mode == (int)BlendModes::Opaque) + { + fgcolor.a = 255; + return fgcolor; + } + else if (BlendT::Mode == (int)BlendModes::Masked) + { + return (ifgcolor == 0) ? bgcolor : fgcolor; + } + else if (BlendT::Mode == (int)BlendModes::AddSrcColorOneMinusSrcColor) + { + uint32_t srcred = fgcolor.r + (fgcolor.r >> 7); + uint32_t srcgreen = fgcolor.g + (fgcolor.g >> 7); + uint32_t srcblue = fgcolor.b + (fgcolor.b >> 7); + uint32_t inv_srcred = 256 - srcred; + uint32_t inv_srcgreen = 256 - srcgreen; + uint32_t inv_srcblue = 256 - srcblue; + + BgraColor outcolor; + outcolor.r = (fgcolor.r * srcred + bgcolor.r * inv_srcred) >> 8; + outcolor.g = (fgcolor.g * srcgreen + bgcolor.g * inv_srcgreen) >> 8; + outcolor.b = (fgcolor.b * srcblue + bgcolor.b * inv_srcblue) >> 8; + outcolor.a = 255; + return outcolor; + } + else if (BlendT::Mode == (int)BlendModes::Shaded) + { + uint32_t alpha = ifgshade; + uint32_t inv_alpha = 256 - alpha; + + BgraColor outcolor; + outcolor.r = (fgcolor.r * alpha + bgcolor.r * inv_alpha) >> 8; + outcolor.g = (fgcolor.g * alpha + bgcolor.g * inv_alpha) >> 8; + outcolor.b = (fgcolor.b * alpha + bgcolor.b * inv_alpha) >> 8; + outcolor.a = 255; + return outcolor; + } + else if (BlendT::Mode == (int)BlendModes::AddClampShaded) + { + uint32_t alpha = ifgshade; + BgraColor outcolor; + outcolor.r = ((fgcolor.r * alpha) >> 8) + bgcolor.r; + outcolor.g = ((fgcolor.g * alpha) >> 8) + bgcolor.g; + outcolor.b = ((fgcolor.b * alpha) >> 8) + bgcolor.b; + outcolor.a = 255; + return outcolor; + } + else + { + uint32_t alpha = APART(ifgcolor); + alpha += alpha >> 7; // 255->256 + uint32_t inv_alpha = 256 - alpha; + + uint32_t bgalpha = (destalpha * alpha + (inv_alpha << 8) + 128) >> 8; + uint32_t fgalpha = (srcalpha * alpha + 128) >> 8; + + fgcolor.r *= fgalpha; + fgcolor.g *= fgalpha; + fgcolor.b *= fgalpha; + bgcolor.r *= bgalpha; + bgcolor.g *= bgalpha; + bgcolor.b *= bgalpha; + + BgraColor outcolor; + if (BlendT::Mode == (int)BlendModes::AddClamp) + { + outcolor.r = MIN((fgcolor.r + bgcolor.r) >> 8, 255); + outcolor.g = MIN((fgcolor.g + bgcolor.g) >> 8, 255); + outcolor.b = MIN((fgcolor.b + bgcolor.b) >> 8, 255); + } + else if (BlendT::Mode == (int)BlendModes::SubClamp) + { + outcolor.r = MAX(int32_t(fgcolor.r - bgcolor.r) >> 8, 0); + outcolor.g = MAX(int32_t(fgcolor.g - bgcolor.g) >> 8, 0); + outcolor.b = MAX(int32_t(fgcolor.b - bgcolor.b) >> 8, 0); + } + else if (BlendT::Mode == (int)BlendModes::RevSubClamp) + { + outcolor.r = MAX(int32_t(bgcolor.r - fgcolor.r) >> 8, 0); + outcolor.g = MAX(int32_t(bgcolor.g - fgcolor.g) >> 8, 0); + outcolor.b = MAX(int32_t(bgcolor.b - fgcolor.b) >> 8, 0); + } + outcolor.a = 255; + return outcolor; + } + } +} + +template +class RectScreenDrawer32 +{ +public: + static void Execute(const void *destOrg, int destWidth, int destHeight, int destPitch, const RectDrawArgs *args, PolyTriangleThreadData *thread) + { + using namespace TriScreenDrawerModes; + + if (SamplerT::Mode == (int)Samplers::Fuzz) + { + Loop(destOrg, destWidth, destHeight, destPitch, args, thread); + } + else if (args->SimpleShade()) + { + Loop(destOrg, destWidth, destHeight, destPitch, args, thread); + } + else + { + Loop(destOrg, destWidth, destHeight, destPitch, args, thread); + } + } + +private: + template + FORCEINLINE static void Loop(const void *destOrg, int destWidth, int destHeight, int destPitch, const RectDrawArgs *args, PolyTriangleThreadData *thread) + { + using namespace TriScreenDrawerModes; + + int x0 = clamp((int)(args->X0() + 0.5f), 0, destWidth); + int x1 = clamp((int)(args->X1() + 0.5f), 0, destWidth); + int y0 = clamp((int)(args->Y0() + 0.5f), 0, destHeight); + int y1 = clamp((int)(args->Y1() + 0.5f), 0, destHeight); + + if (x1 <= x0 || y1 <= y0) + return; + + uint32_t srcalpha = args->SrcAlpha(); + uint32_t destalpha = args->DestAlpha(); + + // Setup step variables + float fstepU = (args->U1() - args->U0()) / (args->X1() - args->X0()); + float fstepV = (args->V1() - args->V0()) / (args->Y1() - args->Y0()); + uint32_t startU = (int32_t)((args->U0() + (x0 + 0.5f - args->X0()) * fstepU) * 0x1000000); + uint32_t startV = (int32_t)((args->V0() + (y0 + 0.5f - args->Y0()) * fstepV) * 0x1000000); + uint32_t stepU = (int32_t)(fstepU * 0x1000000); + uint32_t stepV = (int32_t)(fstepV * 0x1000000); + + // Sampling stuff + uint32_t color = args->Color(); + const uint32_t * RESTRICT translation = (const uint32_t *)args->Translation(); + const uint32_t * RESTRICT texPixels = (const uint32_t *)args->TexturePixels(); + uint32_t texWidth = args->TextureWidth(); + uint32_t texHeight = args->TextureHeight(); + uint32_t oneU, oneV; + if (SamplerT::Mode != (int)Samplers::Fill) + { + oneU = ((0x800000 + texWidth - 1) / texWidth) * 2 + 1; + oneV = ((0x800000 + texHeight - 1) / texHeight) * 2 + 1; + } + else + { + oneU = 0; + oneV = 0; + } + + // Setup light + uint32_t lightpos = args->Light(); + lightpos += lightpos >> 7; // 255 -> 256 + BgraColor mlight; + + BgraColor dynlight = 0; + + // Shade constants + int inv_desaturate; + BgraColor shade_fade_lit, shade_light; + int desaturate; + if (ShadeModeT::Mode == (int)ShadeMode::Advanced) + { + uint32_t inv_light = 256 - lightpos; + shade_fade_lit.r = args->ShadeFadeRed() * inv_light; + shade_fade_lit.g = args->ShadeFadeGreen() * inv_light; + shade_fade_lit.b = args->ShadeFadeBlue() * inv_light; + shade_light.r = args->ShadeLightRed(); + shade_light.g = args->ShadeLightGreen(); + shade_light.b = args->ShadeLightBlue(); + desaturate = args->ShadeDesaturate(); + inv_desaturate = 256 - desaturate; + mlight.r = lightpos; + mlight.g = lightpos; + mlight.b = lightpos; + } + else + { + inv_desaturate = 0; + shade_fade_lit.r = 0; + shade_fade_lit.g = 0; + shade_fade_lit.b = 0; + shade_light.r = 0; + shade_light.g = 0; + shade_light.b = 0; + desaturate = 0; + mlight.r = lightpos; + mlight.g = lightpos; + mlight.b = lightpos; + } + + int count = x1 - x0; + + uint32_t posV = startV; + for (int y = y0; y < y1; y++, posV += stepV) + { + int coreBlock = y / 8; + if (coreBlock % thread->num_cores != thread->core) + { + continue; + } + + uint32_t *dest = ((uint32_t*)destOrg) + y * destPitch + x0; + + uint32_t posU = startU; + for (int i = 0; i < count; i++) + { + // Load bgcolor + BgraColor bgcolor; + if (BlendT::Mode != (int)BlendModes::Opaque) + bgcolor = *dest; + else + bgcolor = 0; + + // Sample fgcolor + if (SamplerT::Mode == (int)Samplers::FogBoundary) color = *dest; + unsigned int ifgcolor = Sample32(posU, posV, texPixels, texWidth, texHeight, oneU, oneV, color, translation); + unsigned int ifgshade = SampleShade32(posU, posV, texPixels, texWidth, texHeight, x0 + i, y); + posU += stepU; + + // Shade and blend + BgraColor fgcolor = Shade32(ifgcolor, mlight, desaturate, inv_desaturate, shade_fade_lit, shade_light, dynlight); + BgraColor outcolor = Blend32(fgcolor, bgcolor, ifgcolor, ifgshade, srcalpha, destalpha); + + // Store result + *dest = outcolor; + dest++; + } + } + } +}; diff --git a/src/polyrenderer/drawers/poly_drawer32_sse2.h b/src/polyrenderer/drawers/poly_drawer32_sse2.h new file mode 100644 index 000000000..4685dd07c --- /dev/null +++ b/src/polyrenderer/drawers/poly_drawer32_sse2.h @@ -0,0 +1,518 @@ +/* +** Polygon Doom software renderer +** Copyright (c) 2016 Magnus Norddahl +** +** This software is provided 'as-is', without any express or implied +** warranty. In no event will the authors be held liable for any damages +** arising from the use of this software. +** +** Permission is granted to anyone to use this software for any purpose, +** including commercial applications, and to alter it and redistribute it +** freely, subject to the following restrictions: +** +** 1. The origin of this software must not be misrepresented; you must not +** claim that you wrote the original software. If you use this software +** in a product, an acknowledgment in the product documentation would be +** appreciated but is not required. +** 2. Altered source versions must be plainly marked as such, and must not be +** misrepresented as being the original software. +** 3. This notice may not be removed or altered from any source distribution. +** +*/ + +#pragma once + +#include "screen_triangle.h" + +namespace TriScreenDrawerModes +{ + template + FORCEINLINE unsigned int VECTORCALL Sample32(int32_t u, int32_t v, const uint32_t *texPixels, int texWidth, int texHeight, uint32_t oneU, uint32_t oneV, uint32_t color, const uint32_t *translation) + { + uint32_t texel; + if (SamplerT::Mode == (int)Samplers::Shaded || SamplerT::Mode == (int)Samplers::Stencil || SamplerT::Mode == (int)Samplers::Fill || SamplerT::Mode == (int)Samplers::Fuzz || SamplerT::Mode == (int)Samplers::FogBoundary) + { + return color; + } + else if (SamplerT::Mode == (int)Samplers::Translated) + { + const uint8_t *texpal = (const uint8_t *)texPixels; + uint32_t texelX = ((((uint32_t)u << 8) >> 16) * texWidth) >> 16; + uint32_t texelY = ((((uint32_t)v << 8) >> 16) * texHeight) >> 16; + return translation[texpal[texelX * texHeight + texelY]]; + } + else if (FilterModeT::Mode == (int)FilterModes::Nearest) + { + uint32_t texelX = ((((uint32_t)u << 8) >> 16) * texWidth) >> 16; + uint32_t texelY = ((((uint32_t)v << 8) >> 16) * texHeight) >> 16; + texel = texPixels[texelX * texHeight + texelY]; + } + else + { + u -= oneU >> 1; + v -= oneV >> 1; + + unsigned int frac_x0 = (((uint32_t)u << 8) >> FRACBITS) * texWidth; + unsigned int frac_x1 = ((((uint32_t)u << 8) + oneU) >> FRACBITS) * texWidth; + unsigned int frac_y0 = (((uint32_t)v << 8) >> FRACBITS) * texHeight; + unsigned int frac_y1 = ((((uint32_t)v << 8) + oneV) >> FRACBITS) * texHeight; + unsigned int x0 = frac_x0 >> FRACBITS; + unsigned int x1 = frac_x1 >> FRACBITS; + unsigned int y0 = frac_y0 >> FRACBITS; + unsigned int y1 = frac_y1 >> FRACBITS; + + unsigned int p00 = texPixels[x0 * texHeight + y0]; + unsigned int p01 = texPixels[x0 * texHeight + y1]; + unsigned int p10 = texPixels[x1 * texHeight + y0]; + unsigned int p11 = texPixels[x1 * texHeight + y1]; + + unsigned int inv_a = (frac_x1 >> (FRACBITS - 4)) & 15; + unsigned int inv_b = (frac_y1 >> (FRACBITS - 4)) & 15; + unsigned int a = 16 - inv_a; + unsigned int b = 16 - inv_b; + + unsigned int sred = (RPART(p00) * (a * b) + RPART(p01) * (inv_a * b) + RPART(p10) * (a * inv_b) + RPART(p11) * (inv_a * inv_b) + 127) >> 8; + unsigned int sgreen = (GPART(p00) * (a * b) + GPART(p01) * (inv_a * b) + GPART(p10) * (a * inv_b) + GPART(p11) * (inv_a * inv_b) + 127) >> 8; + unsigned int sblue = (BPART(p00) * (a * b) + BPART(p01) * (inv_a * b) + BPART(p10) * (a * inv_b) + BPART(p11) * (inv_a * inv_b) + 127) >> 8; + unsigned int salpha = (APART(p00) * (a * b) + APART(p01) * (inv_a * b) + APART(p10) * (a * inv_b) + APART(p11) * (inv_a * inv_b) + 127) >> 8; + + texel = (salpha << 24) | (sred << 16) | (sgreen << 8) | sblue; + } + + if (SamplerT::Mode == (int)Samplers::Skycap) + { + int start_fade = 2; // How fast it should fade out + + int alpha_top = clamp(v >> (16 - start_fade), 0, 256); + int alpha_bottom = clamp(((2 << 24) - v) >> (16 - start_fade), 0, 256); + int a = MIN(alpha_top, alpha_bottom); + int inv_a = 256 - a; + + uint32_t r = RPART(texel); + uint32_t g = GPART(texel); + uint32_t b = BPART(texel); + uint32_t fg_a = APART(texel); + uint32_t bg_red = RPART(color); + uint32_t bg_green = GPART(color); + uint32_t bg_blue = BPART(color); + r = (r * a + bg_red * inv_a + 127) >> 8; + g = (g * a + bg_green * inv_a + 127) >> 8; + b = (b * a + bg_blue * inv_a + 127) >> 8; + return MAKEARGB(fg_a, r, g, b); + } + else + { + return texel; + } + } + + template + FORCEINLINE unsigned int VECTORCALL SampleShade32(int32_t u, int32_t v, const uint32_t *texPixels, int texWidth, int texHeight, int x, int y) + { + if (SamplerT::Mode == (int)Samplers::Shaded) + { + const uint8_t *texpal = (const uint8_t *)texPixels; + uint32_t texelX = ((((uint32_t)u << 8) >> 16) * texWidth) >> 16; + uint32_t texelY = ((((uint32_t)v << 8) >> 16) * texHeight) >> 16; + unsigned int sampleshadeout = texpal[texelX * texHeight + texelY]; + sampleshadeout += sampleshadeout >> 7; // 255 -> 256 + return sampleshadeout; + } + else if (SamplerT::Mode == (int)Samplers::Stencil) + { + uint32_t texelX = ((((uint32_t)u << 8) >> 16) * texWidth) >> 16; + uint32_t texelY = ((((uint32_t)v << 8) >> 16) * texHeight) >> 16; + unsigned int sampleshadeout = APART(texPixels[texelX * texHeight + texelY]); + sampleshadeout += sampleshadeout >> 7; // 255 -> 256 + return sampleshadeout; + } + else if (SamplerT::Mode == (int)Samplers::Fuzz) + { + using namespace swrenderer; + + uint32_t texelX = ((((uint32_t)u << 8) >> 16) * texWidth) >> 16; + uint32_t texelY = ((((uint32_t)v << 8) >> 16) * texHeight) >> 16; + unsigned int sampleshadeout = APART(texPixels[texelX * texHeight + texelY]); + sampleshadeout += sampleshadeout >> 7; // 255 -> 256 + + fixed_t fuzzscale = (200 << FRACBITS) / viewheight; + + int scaled_x = (x * fuzzscale) >> FRACBITS; + int fuzz_x = fuzz_random_x_offset[scaled_x % FUZZ_RANDOM_X_SIZE] + fuzzpos; + + fixed_t fuzzcount = FUZZTABLE << FRACBITS; + fixed_t fuzz = ((fuzz_x << FRACBITS) + y * fuzzscale) % fuzzcount; + unsigned int alpha = fuzzoffset[fuzz >> FRACBITS]; + + sampleshadeout = (sampleshadeout * alpha) >> 5; + return sampleshadeout; + } + else + { + return 0; + } + } + + FORCEINLINE __m128i VECTORCALL AddLights(__m128i material, __m128i fgcolor, __m128i dynlight) + { + fgcolor = _mm_add_epi16(fgcolor, _mm_srli_epi16(_mm_mullo_epi16(material, dynlight), 8)); + fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(255)); + return fgcolor; + } + + FORCEINLINE __m128i VECTORCALL CalcDynamicLight(const PolyLight *lights, int num_lights, __m128 worldpos, __m128 worldnormal, uint32_t dynlightcolor) + { + __m128i lit = _mm_unpacklo_epi8(_mm_cvtsi32_si128(dynlightcolor), _mm_setzero_si128()); + lit = _mm_shuffle_epi32(lit, _MM_SHUFFLE(1, 0, 1, 0)); + + for (int i = 0; i != num_lights; i++) + { + __m128 m256 = _mm_set1_ps(256.0f); + __m128 mSignBit = _mm_set1_ps(-0.0f); + + __m128 lightpos = _mm_loadu_ps(&lights[i].x); + __m128 light_radius = _mm_load_ss(&lights[i].radius); + + __m128 is_attenuated = _mm_cmpge_ss(light_radius, _mm_setzero_ps()); + is_attenuated = _mm_shuffle_ps(is_attenuated, is_attenuated, _MM_SHUFFLE(0, 0, 0, 0)); + light_radius = _mm_andnot_ps(mSignBit, light_radius); + + // L = light-pos + // dist = sqrt(dot(L, L)) + // distance_attenuation = 1 - MIN(dist * (1/radius), 1) + __m128 L = _mm_sub_ps(lightpos, worldpos); + __m128 dist2 = _mm_mul_ps(L, L); + dist2 = _mm_add_ss(dist2, _mm_add_ss(_mm_shuffle_ps(dist2, dist2, _MM_SHUFFLE(0, 0, 0, 1)), _mm_shuffle_ps(dist2, dist2, _MM_SHUFFLE(0, 0, 0, 2)))); + __m128 rcp_dist = _mm_rsqrt_ss(dist2); + __m128 dist = _mm_mul_ss(dist2, rcp_dist); + __m128 distance_attenuation = _mm_sub_ss(m256, _mm_min_ss(_mm_mul_ss(dist, light_radius), m256)); + distance_attenuation = _mm_shuffle_ps(distance_attenuation, distance_attenuation, _MM_SHUFFLE(0, 0, 0, 0)); + + // The simple light type + __m128 simple_attenuation = distance_attenuation; + + // The point light type + // diffuse = max(dot(N,normalize(L)),0) * attenuation + __m128 dotNL = _mm_mul_ps(worldnormal, _mm_mul_ps(L, _mm_shuffle_ps(rcp_dist, rcp_dist, _MM_SHUFFLE(0, 0, 0, 0)))); + dotNL = _mm_add_ss(dotNL, _mm_add_ss(_mm_shuffle_ps(dotNL, dotNL, _MM_SHUFFLE(0, 0, 0, 1)), _mm_shuffle_ps(dotNL, dotNL, _MM_SHUFFLE(0, 0, 0, 2)))); + dotNL = _mm_max_ss(dotNL, _mm_setzero_ps()); + __m128 point_attenuation = _mm_mul_ss(dotNL, distance_attenuation); + point_attenuation = _mm_shuffle_ps(point_attenuation, point_attenuation, _MM_SHUFFLE(0, 0, 0, 0)); + + __m128i attenuation = _mm_cvtps_epi32(_mm_or_ps(_mm_and_ps(is_attenuated, simple_attenuation), _mm_andnot_ps(is_attenuated, point_attenuation))); + attenuation = _mm_packs_epi32(_mm_shuffle_epi32(attenuation, _MM_SHUFFLE(0, 0, 0, 0)), _mm_shuffle_epi32(attenuation, _MM_SHUFFLE(1, 1, 1, 1))); + + __m128i light_color = _mm_cvtsi32_si128(lights[i].color); + light_color = _mm_unpacklo_epi8(light_color, _mm_setzero_si128()); + light_color = _mm_shuffle_epi32(light_color, _MM_SHUFFLE(1, 0, 1, 0)); + + lit = _mm_add_epi16(lit, _mm_srli_epi16(_mm_mullo_epi16(light_color, attenuation), 8)); + } + + return _mm_min_epi16(lit, _mm_set1_epi16(256)); + } + + template + FORCEINLINE __m128i VECTORCALL Shade32(__m128i fgcolor, __m128i mlight, unsigned int ifgcolor0, unsigned int ifgcolor1, int desaturate, __m128i inv_desaturate, __m128i shade_fade, __m128i shade_light, __m128i dynlight) + { + __m128i material = fgcolor; + if (ShadeModeT::Mode == (int)ShadeMode::Simple) + { + fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); + } + else if (ShadeModeT::Mode == (int)ShadeMode::Advanced) + { + int blue0 = BPART(ifgcolor0); + int green0 = GPART(ifgcolor0); + int red0 = RPART(ifgcolor0); + int intensity0 = ((red0 * 77 + green0 * 143 + blue0 * 37) >> 8) * desaturate; + + int blue1 = BPART(ifgcolor1); + int green1 = GPART(ifgcolor1); + int red1 = RPART(ifgcolor1); + int intensity1 = ((red1 * 77 + green1 * 143 + blue1 * 37) >> 8) * desaturate; + + __m128i intensity = _mm_set_epi16(0, intensity1, intensity1, intensity1, 0, intensity0, intensity0, intensity0); + + fgcolor = _mm_srli_epi16(_mm_add_epi16(_mm_mullo_epi16(fgcolor, inv_desaturate), intensity), 8); + fgcolor = _mm_mullo_epi16(fgcolor, mlight); + fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); + fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); + } + + return AddLights(material, fgcolor, dynlight); + } + + template + FORCEINLINE __m128i VECTORCALL Blend32(__m128i fgcolor, __m128i bgcolor, unsigned int ifgcolor0, unsigned int ifgcolor1, unsigned int ifgshade0, unsigned int ifgshade1, uint32_t srcalpha, uint32_t destalpha) + { + if (BlendT::Mode == (int)BlendModes::Opaque) + { + __m128i outcolor = fgcolor; + outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); + outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); + return outcolor; + } + else if (BlendT::Mode == (int)BlendModes::Masked) + { + __m128i mask = _mm_cmpeq_epi32(_mm_packus_epi16(fgcolor, _mm_setzero_si128()), _mm_setzero_si128()); + mask = _mm_unpacklo_epi8(mask, _mm_setzero_si128()); + __m128i outcolor = _mm_or_si128(_mm_and_si128(mask, bgcolor), _mm_andnot_si128(mask, fgcolor)); + outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); + outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); + return outcolor; + } + else if (BlendT::Mode == (int)BlendModes::AddSrcColorOneMinusSrcColor) + { + __m128i inv_srccolor = _mm_sub_epi16(_mm_set1_epi16(256), _mm_add_epi16(fgcolor, _mm_srli_epi16(fgcolor, 7))); + __m128i outcolor = _mm_add_epi16(fgcolor, _mm_srli_epi16(_mm_mullo_epi16(bgcolor, inv_srccolor), 8)); + outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); + return outcolor; + } + else if (BlendT::Mode == (int)BlendModes::Shaded) + { + ifgshade0 = (ifgshade0 * srcalpha + 128) >> 8; + ifgshade1 = (ifgshade1 * srcalpha + 128) >> 8; + __m128i alpha = _mm_set_epi16(ifgshade1, ifgshade1, ifgshade1, ifgshade1, ifgshade0, ifgshade0, ifgshade0, ifgshade0); + __m128i inv_alpha = _mm_sub_epi16(_mm_set1_epi16(256), alpha); + + fgcolor = _mm_mullo_epi16(fgcolor, alpha); + bgcolor = _mm_mullo_epi16(bgcolor, inv_alpha); + __m128i outcolor = _mm_srli_epi16(_mm_add_epi16(fgcolor, bgcolor), 8); + outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); + outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); + return outcolor; + } + else if (BlendT::Mode == (int)BlendModes::AddClampShaded) + { + ifgshade0 = (ifgshade0 * srcalpha + 128) >> 8; + ifgshade1 = (ifgshade1 * srcalpha + 128) >> 8; + __m128i alpha = _mm_set_epi16(ifgshade1, ifgshade1, ifgshade1, ifgshade1, ifgshade0, ifgshade0, ifgshade0, ifgshade0); + + fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, alpha), 8); + __m128i outcolor = _mm_add_epi16(fgcolor, bgcolor); + outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); + outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); + return outcolor; + } + else + { + uint32_t alpha0 = APART(ifgcolor0); + uint32_t alpha1 = APART(ifgcolor1); + alpha0 += alpha0 >> 7; // 255->256 + alpha1 += alpha1 >> 7; // 255->256 + uint32_t inv_alpha0 = 256 - alpha0; + uint32_t inv_alpha1 = 256 - alpha1; + + uint32_t bgalpha0 = (destalpha * alpha0 + (inv_alpha0 << 8) + 128) >> 8; + uint32_t bgalpha1 = (destalpha * alpha1 + (inv_alpha1 << 8) + 128) >> 8; + uint32_t fgalpha0 = (srcalpha * alpha0 + 128) >> 8; + uint32_t fgalpha1 = (srcalpha * alpha1 + 128) >> 8; + + __m128i bgalpha = _mm_set_epi16(bgalpha1, bgalpha1, bgalpha1, bgalpha1, bgalpha0, bgalpha0, bgalpha0, bgalpha0); + __m128i fgalpha = _mm_set_epi16(fgalpha1, fgalpha1, fgalpha1, fgalpha1, fgalpha0, fgalpha0, fgalpha0, fgalpha0); + + fgcolor = _mm_mullo_epi16(fgcolor, fgalpha); + bgcolor = _mm_mullo_epi16(bgcolor, bgalpha); + + __m128i fg_lo = _mm_unpacklo_epi16(fgcolor, _mm_setzero_si128()); + __m128i bg_lo = _mm_unpacklo_epi16(bgcolor, _mm_setzero_si128()); + __m128i fg_hi = _mm_unpackhi_epi16(fgcolor, _mm_setzero_si128()); + __m128i bg_hi = _mm_unpackhi_epi16(bgcolor, _mm_setzero_si128()); + + __m128i out_lo, out_hi; + if (BlendT::Mode == (int)BlendModes::AddClamp) + { + out_lo = _mm_add_epi32(fg_lo, bg_lo); + out_hi = _mm_add_epi32(fg_hi, bg_hi); + } + else if (BlendT::Mode == (int)BlendModes::SubClamp) + { + out_lo = _mm_sub_epi32(fg_lo, bg_lo); + out_hi = _mm_sub_epi32(fg_hi, bg_hi); + } + else if (BlendT::Mode == (int)BlendModes::RevSubClamp) + { + out_lo = _mm_sub_epi32(bg_lo, fg_lo); + out_hi = _mm_sub_epi32(bg_hi, fg_hi); + } + + out_lo = _mm_srai_epi32(out_lo, 8); + out_hi = _mm_srai_epi32(out_hi, 8); + __m128i outcolor = _mm_packs_epi32(out_lo, out_hi); + outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); + outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); + return outcolor; + } + } +} + +template +class RectScreenDrawer32 +{ +public: + static void Execute(const void *destOrg, int destWidth, int destHeight, int destPitch, const RectDrawArgs *args, PolyTriangleThreadData *thread) + { + using namespace TriScreenDrawerModes; + + if (args->SimpleShade()) + { + Loop(destOrg, destWidth, destHeight, destPitch, args, thread); + } + else + { + Loop(destOrg, destWidth, destHeight, destPitch, args, thread); + } + } + +private: + template + FORCEINLINE static void VECTORCALL Loop(const void *destOrg, int destWidth, int destHeight, int destPitch, const RectDrawArgs *args, PolyTriangleThreadData *thread) + { + using namespace TriScreenDrawerModes; + + int x0 = clamp((int)(args->X0() + 0.5f), 0, destWidth); + int x1 = clamp((int)(args->X1() + 0.5f), 0, destWidth); + int y0 = clamp((int)(args->Y0() + 0.5f), 0, destHeight); + int y1 = clamp((int)(args->Y1() + 0.5f), 0, destHeight); + + if (x1 <= x0 || y1 <= y0) + return; + + uint32_t srcalpha = args->SrcAlpha(); + uint32_t destalpha = args->DestAlpha(); + + // Setup step variables + float fstepU = (args->U1() - args->U0()) / (args->X1() - args->X0()); + float fstepV = (args->V1() - args->V0()) / (args->Y1() - args->Y0()); + uint32_t startU = (int32_t)((args->U0() + (x0 + 0.5f - args->X0()) * fstepU) * 0x1000000); + uint32_t startV = (int32_t)((args->V0() + (y0 + 0.5f - args->Y0()) * fstepV) * 0x1000000); + uint32_t stepU = (int32_t)(fstepU * 0x1000000); + uint32_t stepV = (int32_t)(fstepV * 0x1000000); + + // Sampling stuff + uint32_t color = args->Color(); + const uint32_t * RESTRICT translation = (const uint32_t *)args->Translation(); + const uint32_t * RESTRICT texPixels = (const uint32_t *)args->TexturePixels(); + uint32_t texWidth = args->TextureWidth(); + uint32_t texHeight = args->TextureHeight(); + uint32_t oneU, oneV; + if (SamplerT::Mode != (int)Samplers::Fill) + { + oneU = ((0x800000 + texWidth - 1) / texWidth) * 2 + 1; + oneV = ((0x800000 + texHeight - 1) / texHeight) * 2 + 1; + } + else + { + oneU = 0; + oneV = 0; + } + + // Shade constants + __m128i inv_desaturate, shade_fade, shade_light; + int desaturate; + if (ShadeModeT::Mode == (int)ShadeMode::Advanced) + { + inv_desaturate = _mm_setr_epi16(256, 256 - args->ShadeDesaturate(), 256 - args->ShadeDesaturate(), 256 - args->ShadeDesaturate(), 256, 256 - args->ShadeDesaturate(), 256 - args->ShadeDesaturate(), 256 - args->ShadeDesaturate()); + shade_fade = _mm_set_epi16(args->ShadeFadeAlpha(), args->ShadeFadeRed(), args->ShadeFadeGreen(), args->ShadeFadeBlue(), args->ShadeFadeAlpha(), args->ShadeFadeRed(), args->ShadeFadeGreen(), args->ShadeFadeBlue()); + shade_light = _mm_set_epi16(args->ShadeLightAlpha(), args->ShadeLightRed(), args->ShadeLightGreen(), args->ShadeLightBlue(), args->ShadeLightAlpha(), args->ShadeLightRed(), args->ShadeLightGreen(), args->ShadeLightBlue()); + desaturate = args->ShadeDesaturate(); + } + else + { + inv_desaturate = _mm_setzero_si128(); + shade_fade = _mm_setzero_si128(); + shade_light = _mm_setzero_si128(); + desaturate = 0; + } + + // Setup light + uint32_t lightpos = args->Light(); + lightpos += lightpos >> 7; // 255 -> 256 + __m128i mlight = _mm_set_epi16(256, lightpos, lightpos, lightpos, 256, lightpos, lightpos, lightpos); + __m128i shade_fade_lit; + if (ShadeModeT::Mode == (int)ShadeMode::Advanced) + { + __m128i inv_light = _mm_sub_epi16(_mm_set_epi16(0, 256, 256, 256, 0, 256, 256, 256), mlight); + shade_fade_lit = _mm_mullo_epi16(shade_fade, inv_light); + } + else + { + shade_fade_lit = _mm_setzero_si128(); + } + + int count = x1 - x0; + int sseCount = count / 2; + + uint32_t posV = startV; + for (int y = y0; y < y1; y++, posV += stepV) + { + int coreBlock = y / 8; + if (coreBlock % thread->num_cores != thread->core) + { + continue; + } + + uint32_t *dest = ((uint32_t*)destOrg) + y * destPitch + x0; + + uint32_t posU = startU; + for (int i = 0; i < sseCount; i++) + { + // Load bgcolor + __m128i bgcolor; + if (BlendT::Mode != (int)BlendModes::Opaque) + bgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)dest), _mm_setzero_si128()); + else + bgcolor = _mm_setzero_si128(); + + // Sample fgcolor + unsigned int ifgcolor[2], ifgshade[2]; + if (SamplerT::Mode == (int)Samplers::FogBoundary) color = dest[0]; + ifgcolor[0] = Sample32(posU, posV, texPixels, texWidth, texHeight, oneU, oneV, color, translation); + ifgshade[0] = SampleShade32(posU, posV, texPixels, texWidth, texHeight, x0 + i * 2, y); + posU += stepU; + + if (SamplerT::Mode == (int)Samplers::FogBoundary) color = dest[1]; + ifgcolor[1] = Sample32(posU, posV, texPixels, texWidth, texHeight, oneU, oneV, color, translation); + ifgshade[1] = SampleShade32(posU, posV, texPixels, texWidth, texHeight, x0 + i * 2 + 1, y); + posU += stepU; + + // Shade and blend + __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); + fgcolor = Shade32(fgcolor, mlight, ifgcolor[0], ifgcolor[1], desaturate, inv_desaturate, shade_fade_lit, shade_light, _mm_setzero_si128()); + __m128i outcolor = Blend32(fgcolor, bgcolor, ifgcolor[0], ifgcolor[1], ifgshade[0], ifgshade[1], srcalpha, destalpha); + + // Store result + _mm_storel_epi64((__m128i*)dest, outcolor); + dest += 2; + } + + if (sseCount * 2 != count) + { + // Load bgcolor + __m128i bgcolor; + if (BlendT::Mode != (int)BlendModes::Opaque) + bgcolor = _mm_unpacklo_epi8(_mm_cvtsi32_si128(*dest), _mm_setzero_si128()); + else + bgcolor = _mm_setzero_si128(); + + // Sample fgcolor + unsigned int ifgcolor[2], ifgshade[2]; + if (SamplerT::Mode == (int)Samplers::FogBoundary) color = *dest; + ifgcolor[0] = Sample32(posU, posV, texPixels, texWidth, texHeight, oneU, oneV, color, translation); + ifgshade[0] = SampleShade32(posU, posV, texPixels, texWidth, texHeight, x0 + sseCount * 2, y); + ifgcolor[1] = 0; + ifgshade[1] = 0; + posU += stepU; + + // Shade and blend + __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); + fgcolor = Shade32(fgcolor, mlight, ifgcolor[0], ifgcolor[1], desaturate, inv_desaturate, shade_fade_lit, shade_light, _mm_setzero_si128()); + __m128i outcolor = Blend32(fgcolor, bgcolor, ifgcolor[0], ifgcolor[1], ifgshade[0], ifgshade[1], srcalpha, destalpha); + + // Store result + *dest = _mm_cvtsi128_si32(outcolor); + } + } + } +}; diff --git a/src/polyrenderer/drawers/poly_drawer8.h b/src/polyrenderer/drawers/poly_drawer8.h new file mode 100644 index 000000000..1db272885 --- /dev/null +++ b/src/polyrenderer/drawers/poly_drawer8.h @@ -0,0 +1,295 @@ +/* +** Polygon Doom software renderer +** Copyright (c) 2016 Magnus Norddahl +** +** This software is provided 'as-is', without any express or implied +** warranty. In no event will the authors be held liable for any damages +** arising from the use of this software. +** +** Permission is granted to anyone to use this software for any purpose, +** including commercial applications, and to alter it and redistribute it +** freely, subject to the following restrictions: +** +** 1. The origin of this software must not be misrepresented; you must not +** claim that you wrote the original software. If you use this software +** in a product, an acknowledgment in the product documentation would be +** appreciated but is not required. +** 2. Altered source versions must be plainly marked as such, and must not be +** misrepresented as being the original software. +** 3. This notice may not be removed or altered from any source distribution. +** +*/ + +#pragma once + +#include "screen_triangle.h" + +namespace TriScreenDrawerModes +{ + template + FORCEINLINE unsigned int Sample8(int32_t u, int32_t v, const uint8_t *texPixels, int texWidth, int texHeight, uint32_t color, const uint8_t *translation) + { + uint8_t texel; + if (SamplerT::Mode == (int)Samplers::Shaded || SamplerT::Mode == (int)Samplers::Stencil || SamplerT::Mode == (int)Samplers::Fill || SamplerT::Mode == (int)Samplers::Fuzz || SamplerT::Mode == (int)Samplers::FogBoundary) + { + return color; + } + else if (SamplerT::Mode == (int)Samplers::Translated) + { + uint32_t texelX = ((((uint32_t)u << 8) >> 16) * texWidth) >> 16; + uint32_t texelY = ((((uint32_t)v << 8) >> 16) * texHeight) >> 16; + return translation[texPixels[texelX * texHeight + texelY]]; + } + else + { + uint32_t texelX = ((((uint32_t)u << 8) >> 16) * texWidth) >> 16; + uint32_t texelY = ((((uint32_t)v << 8) >> 16) * texHeight) >> 16; + texel = texPixels[texelX * texHeight + texelY]; + } + + if (SamplerT::Mode == (int)Samplers::Skycap) + { + int start_fade = 2; // How fast it should fade out + + int alpha_top = clamp(v >> (16 - start_fade), 0, 256); + int alpha_bottom = clamp(((2 << 24) - v) >> (16 - start_fade), 0, 256); + int a = MIN(alpha_top, alpha_bottom); + int inv_a = 256 - a; + + if (a == 256) + return texel; + + uint32_t capcolor = GPalette.BaseColors[color].d; + uint32_t texelrgb = GPalette.BaseColors[texel].d; + uint32_t r = RPART(texelrgb); + uint32_t g = GPART(texelrgb); + uint32_t b = BPART(texelrgb); + uint32_t capcolor_red = RPART(capcolor); + uint32_t capcolor_green = GPART(capcolor); + uint32_t capcolor_blue = BPART(capcolor); + r = (r * a + capcolor_red * inv_a + 127) >> 8; + g = (g * a + capcolor_green * inv_a + 127) >> 8; + b = (b * a + capcolor_blue * inv_a + 127) >> 8; + return RGB256k.All[((r >> 2) << 12) | ((g >> 2) << 6) | (b >> 2)]; + } + else + { + return texel; + } + } + + template + FORCEINLINE unsigned int SampleShade8(int32_t u, int32_t v, const uint8_t *texPixels, int texWidth, int texHeight, int x, int y) + { + if (SamplerT::Mode == (int)Samplers::Shaded) + { + uint32_t texelX = ((((uint32_t)u << 8) >> 16) * texWidth) >> 16; + uint32_t texelY = ((((uint32_t)v << 8) >> 16) * texHeight) >> 16; + unsigned int sampleshadeout = texPixels[texelX * texHeight + texelY]; + sampleshadeout += sampleshadeout >> 7; // 255 -> 256 + return sampleshadeout; + } + else if (SamplerT::Mode == (int)Samplers::Stencil) + { + uint32_t texelX = ((((uint32_t)u << 8) >> 16) * texWidth) >> 16; + uint32_t texelY = ((((uint32_t)v << 8) >> 16) * texHeight) >> 16; + return texPixels[texelX * texHeight + texelY] != 0 ? 256 : 0; + } + else if (SamplerT::Mode == (int)Samplers::Fuzz) + { + using namespace swrenderer; + + uint32_t texelX = ((((uint32_t)u << 8) >> 16) * texWidth) >> 16; + uint32_t texelY = ((((uint32_t)v << 8) >> 16) * texHeight) >> 16; + unsigned int sampleshadeout = (texPixels[texelX * texHeight + texelY] != 0) ? 256 : 0; + + fixed_t fuzzscale = (200 << FRACBITS) / viewheight; + + int scaled_x = (x * fuzzscale) >> FRACBITS; + int fuzz_x = fuzz_random_x_offset[scaled_x % FUZZ_RANDOM_X_SIZE] + fuzzpos; + + fixed_t fuzzcount = FUZZTABLE << FRACBITS; + fixed_t fuzz = ((fuzz_x << FRACBITS) + y * fuzzscale) % fuzzcount; + unsigned int alpha = fuzzoffset[fuzz >> FRACBITS]; + + sampleshadeout = (sampleshadeout * alpha) >> 5; + return sampleshadeout; + } + else + { + return 0; + } + } + + template + FORCEINLINE uint8_t ShadeAndBlend8(uint8_t fgcolor, uint8_t bgcolor, uint32_t fgshade, uint32_t lightshade, const uint8_t *colormaps, uint32_t srcalpha, uint32_t destalpha) + { + lightshade = ((256 - lightshade) * NUMCOLORMAPS) & 0xffffff00; + uint8_t shadedfg = colormaps[lightshade + fgcolor]; + + if (BlendT::Mode == (int)BlendModes::Opaque) + { + return shadedfg; + } + else if (BlendT::Mode == (int)BlendModes::Masked) + { + return (fgcolor != 0) ? shadedfg : bgcolor; + } + else if (BlendT::Mode == (int)BlendModes::AddSrcColorOneMinusSrcColor) + { + int32_t fg_r = GPalette.BaseColors[shadedfg].r; + int32_t fg_g = GPalette.BaseColors[shadedfg].g; + int32_t fg_b = GPalette.BaseColors[shadedfg].b; + int32_t bg_r = GPalette.BaseColors[bgcolor].r; + int32_t bg_g = GPalette.BaseColors[bgcolor].g; + int32_t bg_b = GPalette.BaseColors[bgcolor].b; + int32_t inv_fg_r = 256 - (fg_r + (fg_r >> 7)); + int32_t inv_fg_g = 256 - (fg_g + (fg_g >> 7)); + int32_t inv_fg_b = 256 - (fg_b + (fg_b >> 7)); + fg_r = MIN(fg_r + ((bg_r * inv_fg_r + 127) >> 8), 255); + fg_g = MIN(fg_g + ((bg_g * inv_fg_g + 127) >> 8), 255); + fg_b = MIN(fg_b + ((bg_b * inv_fg_b + 127) >> 8), 255); + + shadedfg = RGB256k.All[((fg_r >> 2) << 12) | ((fg_g >> 2) << 6) | (fg_b >> 2)]; + return (fgcolor != 0) ? shadedfg : bgcolor; + } + else if (BlendT::Mode == (int)BlendModes::Shaded) + { + fgshade = (fgshade * srcalpha + 128) >> 8; + uint32_t alpha = fgshade; + uint32_t inv_alpha = 256 - fgshade; + int32_t fg_r = GPalette.BaseColors[shadedfg].r; + int32_t fg_g = GPalette.BaseColors[shadedfg].g; + int32_t fg_b = GPalette.BaseColors[shadedfg].b; + int32_t bg_r = GPalette.BaseColors[bgcolor].r; + int32_t bg_g = GPalette.BaseColors[bgcolor].g; + int32_t bg_b = GPalette.BaseColors[bgcolor].b; + + fg_r = (fg_r * alpha + bg_r * inv_alpha + 127) >> 8; + fg_g = (fg_g * alpha + bg_g * inv_alpha + 127) >> 8; + fg_b = (fg_b * alpha + bg_b * inv_alpha + 127) >> 8; + + shadedfg = RGB256k.All[((fg_r >> 2) << 12) | ((fg_g >> 2) << 6) | (fg_b >> 2)]; + return (alpha != 0) ? shadedfg : bgcolor; + } + else if (BlendT::Mode == (int)BlendModes::AddClampShaded) + { + fgshade = (fgshade * srcalpha + 128) >> 8; + uint32_t alpha = fgshade; + int32_t fg_r = GPalette.BaseColors[shadedfg].r; + int32_t fg_g = GPalette.BaseColors[shadedfg].g; + int32_t fg_b = GPalette.BaseColors[shadedfg].b; + int32_t bg_r = GPalette.BaseColors[bgcolor].r; + int32_t bg_g = GPalette.BaseColors[bgcolor].g; + int32_t bg_b = GPalette.BaseColors[bgcolor].b; + + fg_r = MIN(bg_r + ((fg_r * alpha + 127) >> 8), 255); + fg_g = MIN(bg_g + ((fg_g * alpha + 127) >> 8), 255); + fg_b = MIN(bg_b + ((fg_b * alpha + 127) >> 8), 255); + + shadedfg = RGB256k.All[((fg_r >> 2) << 12) | ((fg_g >> 2) << 6) | (fg_b >> 2)]; + + return (alpha != 0) ? shadedfg : bgcolor; + } + else + { + int32_t fg_r = GPalette.BaseColors[shadedfg].r; + int32_t fg_g = GPalette.BaseColors[shadedfg].g; + int32_t fg_b = GPalette.BaseColors[shadedfg].b; + int32_t bg_r = GPalette.BaseColors[bgcolor].r; + int32_t bg_g = GPalette.BaseColors[bgcolor].g; + int32_t bg_b = GPalette.BaseColors[bgcolor].b; + + if (BlendT::Mode == (int)BlendModes::AddClamp) + { + fg_r = MIN(int32_t(fg_r * srcalpha + bg_r * destalpha + 127) >> 8, 255); + fg_g = MIN(int32_t(fg_g * srcalpha + bg_g * destalpha + 127) >> 8, 255); + fg_b = MIN(int32_t(fg_b * srcalpha + bg_b * destalpha + 127) >> 8, 255); + } + else if (BlendT::Mode == (int)BlendModes::SubClamp) + { + fg_r = MAX(int32_t(fg_r * srcalpha - bg_r * destalpha + 127) >> 8, 0); + fg_g = MAX(int32_t(fg_g * srcalpha - bg_g * destalpha + 127) >> 8, 0); + fg_b = MAX(int32_t(fg_b * srcalpha - bg_b * destalpha + 127) >> 8, 0); + } + else if (BlendT::Mode == (int)BlendModes::RevSubClamp) + { + fg_r = MAX(int32_t(bg_r * srcalpha - fg_r * destalpha + 127) >> 8, 0); + fg_g = MAX(int32_t(bg_g * srcalpha - fg_g * destalpha + 127) >> 8, 0); + fg_b = MAX(int32_t(bg_b * srcalpha - fg_b * destalpha + 127) >> 8, 0); + } + + shadedfg = RGB256k.All[((fg_r >> 2) << 12) | ((fg_g >> 2) << 6) | (fg_b >> 2)]; + return (fgcolor != 0) ? shadedfg : bgcolor; + } + } +} + +template +class RectScreenDrawer8 +{ +public: + static void Execute(const void *destOrg, int destWidth, int destHeight, int destPitch, const RectDrawArgs *args, PolyTriangleThreadData *thread) + { + using namespace TriScreenDrawerModes; + + int x0 = clamp((int)(args->X0() + 0.5f), 0, destWidth); + int x1 = clamp((int)(args->X1() + 0.5f), 0, destWidth); + int y0 = clamp((int)(args->Y0() + 0.5f), 0, destHeight); + int y1 = clamp((int)(args->Y1() + 0.5f), 0, destHeight); + + if (x1 <= x0 || y1 <= y0) + return; + + auto colormaps = args->BaseColormap(); + uint32_t srcalpha = args->SrcAlpha(); + uint32_t destalpha = args->DestAlpha(); + + // Setup step variables + float fstepU = (args->U1() - args->U0()) / (args->X1() - args->X0()); + float fstepV = (args->V1() - args->V0()) / (args->Y1() - args->Y0()); + uint32_t startU = (int32_t)((args->U0() + (x0 + 0.5f - args->X0()) * fstepU) * 0x1000000); + uint32_t startV = (int32_t)((args->V0() + (y0 + 0.5f - args->Y0()) * fstepV) * 0x1000000); + uint32_t stepU = (int32_t)(fstepU * 0x1000000); + uint32_t stepV = (int32_t)(fstepV * 0x1000000); + + // Sampling stuff + uint32_t color = args->Color(); + const uint8_t * RESTRICT translation = args->Translation(); + const uint8_t * RESTRICT texPixels = args->TexturePixels(); + uint32_t texWidth = args->TextureWidth(); + uint32_t texHeight = args->TextureHeight(); + + // Setup light + uint32_t lightshade = args->Light(); + lightshade += lightshade >> 7; // 255 -> 256 + if (SamplerT::Mode == (int)Samplers::Fuzz) lightshade = 256; + + int count = x1 - x0; + + uint32_t posV = startV; + for (int y = y0; y < y1; y++, posV += stepV) + { + int coreBlock = y / 8; + if (coreBlock % thread->num_cores != thread->core) + { + continue; + } + + uint8_t *dest = ((uint8_t*)destOrg) + y * destPitch + x0; + + uint32_t posU = startU; + for (int i = 0; i < count; i++) + { + uint8_t bgcolor = *dest; + if (SamplerT::Mode == (int)Samplers::FogBoundary) color = bgcolor; + uint8_t fgcolor = Sample8(posU, posV, texPixels, texWidth, texHeight, color, translation); + uint32_t fgshade = SampleShade8(posU, posV, texPixels, texWidth, texHeight, x0 + i, y); + *dest = ShadeAndBlend8(fgcolor, bgcolor, fgshade, lightshade, colormaps, srcalpha, destalpha); + + posU += stepU; + dest++; + } + } + } +}; diff --git a/src/polyrenderer/drawers/poly_triangle.cpp b/src/polyrenderer/drawers/poly_triangle.cpp index 992a43d76..5661f5c83 100644 --- a/src/polyrenderer/drawers/poly_triangle.cpp +++ b/src/polyrenderer/drawers/poly_triangle.cpp @@ -52,7 +52,7 @@ bool PolyTriangleDrawer::IsBgra() return isBgraRenderTarget; } -void PolyTriangleDrawer::SetViewport(const DrawerCommandQueuePtr &queue, int x, int y, int width, int height, DCanvas *canvas) +void PolyTriangleDrawer::SetViewport(const DrawerCommandQueuePtr &queue, int x, int y, int width, int height, DCanvas *canvas, bool span_drawers) { uint8_t *dest = (uint8_t*)canvas->GetPixels(); int dest_width = canvas->GetWidth(); @@ -74,7 +74,7 @@ void PolyTriangleDrawer::SetViewport(const DrawerCommandQueuePtr &queue, int x, dest_width = clamp(viewport_x + viewport_width, 0, dest_width - offsetx); dest_height = clamp(viewport_y + viewport_height, 0, dest_height - offsety); - queue->Push(viewport_x, viewport_y, viewport_width, viewport_height, dest, dest_width, dest_height, dest_pitch, dest_bgra); + queue->Push(viewport_x, viewport_y, viewport_width, viewport_height, dest, dest_width, dest_height, dest_pitch, dest_bgra, span_drawers); } void PolyTriangleDrawer::SetTransform(const DrawerCommandQueuePtr &queue, const Mat4f *objectToClip) @@ -99,7 +99,7 @@ void PolyTriangleDrawer::SetWeaponScene(const DrawerCommandQueuePtr &queue, bool ///////////////////////////////////////////////////////////////////////////// -void PolyTriangleThreadData::SetViewport(int x, int y, int width, int height, uint8_t *new_dest, int new_dest_width, int new_dest_height, int new_dest_pitch, bool new_dest_bgra) +void PolyTriangleThreadData::SetViewport(int x, int y, int width, int height, uint8_t *new_dest, int new_dest_width, int new_dest_height, int new_dest_pitch, bool new_dest_bgra, bool new_span_drawers) { viewport_x = x; viewport_y = y; @@ -110,6 +110,7 @@ void PolyTriangleThreadData::SetViewport(int x, int y, int width, int height, ui dest_height = new_dest_height; dest_pitch = new_dest_pitch; dest_bgra = new_dest_bgra; + span_drawers = new_span_drawers; ccw = true; weaponScene = false; } @@ -131,7 +132,9 @@ void PolyTriangleThreadData::DrawElements(const PolyDrawArgs &drawargs) args.clipbottom = dest_height; args.uniforms = &drawargs; args.destBgra = dest_bgra; - args.stencilbuffer = PolyStencilBuffer::Instance()->Values(); + args.stencilPitch = PolyStencilBuffer::Instance()->BlockWidth(); + args.stencilValues = PolyStencilBuffer::Instance()->Values(); + args.stencilMasks = PolyStencilBuffer::Instance()->Masks(); args.zbuffer = PolyZBuffer::Instance()->Values(); args.depthOffset = weaponScene ? 1.0f : 0.0f; @@ -188,7 +191,9 @@ void PolyTriangleThreadData::DrawArrays(const PolyDrawArgs &drawargs) args.clipbottom = dest_height; args.uniforms = &drawargs; args.destBgra = dest_bgra; - args.stencilbuffer = PolyStencilBuffer::Instance()->Values(); + args.stencilPitch = PolyStencilBuffer::Instance()->BlockWidth(); + args.stencilValues = PolyStencilBuffer::Instance()->Values(); + args.stencilMasks = PolyStencilBuffer::Instance()->Masks(); args.zbuffer = PolyZBuffer::Instance()->Values(); args.depthOffset = weaponScene ? 1.0f : 0.0f; @@ -374,7 +379,10 @@ void PolyTriangleThreadData::DrawShadedTriangle(const ShadedTriVertex *vert, boo args->v3 = &clippedvert[i - 2]; if (IsFrontfacing(args) == ccw && args->CalculateGradients()) { - ScreenTriangle::Draw(args, this); + if (!span_drawers) + ScreenTriangle::Draw(args, this); + else + ScreenTriangle::DrawSWRender(args, this); } } } @@ -387,7 +395,10 @@ void PolyTriangleThreadData::DrawShadedTriangle(const ShadedTriVertex *vert, boo args->v3 = &clippedvert[i]; if (IsFrontfacing(args) != ccw && args->CalculateGradients()) { - ScreenTriangle::Draw(args, this); + if (!span_drawers) + ScreenTriangle::Draw(args, this); + else + ScreenTriangle::DrawSWRender(args, this); } } } @@ -620,14 +631,14 @@ void PolySetWeaponSceneCommand::Execute(DrawerThread *thread) ///////////////////////////////////////////////////////////////////////////// -PolySetViewportCommand::PolySetViewportCommand(int x, int y, int width, int height, uint8_t *dest, int dest_width, int dest_height, int dest_pitch, bool dest_bgra) - : x(x), y(y), width(width), height(height), dest(dest), dest_width(dest_width), dest_height(dest_height), dest_pitch(dest_pitch), dest_bgra(dest_bgra) +PolySetViewportCommand::PolySetViewportCommand(int x, int y, int width, int height, uint8_t *dest, int dest_width, int dest_height, int dest_pitch, bool dest_bgra, bool span_drawers) + : x(x), y(y), width(width), height(height), dest(dest), dest_width(dest_width), dest_height(dest_height), dest_pitch(dest_pitch), dest_bgra(dest_bgra), span_drawers(span_drawers) { } void PolySetViewportCommand::Execute(DrawerThread *thread) { - PolyTriangleThreadData::Get(thread)->SetViewport(x, y, width, height, dest, dest_width, dest_height, dest_pitch, dest_bgra); + PolyTriangleThreadData::Get(thread)->SetViewport(x, y, width, height, dest, dest_width, dest_height, dest_pitch, dest_bgra, span_drawers); } ///////////////////////////////////////////////////////////////////////////// diff --git a/src/polyrenderer/drawers/poly_triangle.h b/src/polyrenderer/drawers/poly_triangle.h index c1e4871db..b10888455 100644 --- a/src/polyrenderer/drawers/poly_triangle.h +++ b/src/polyrenderer/drawers/poly_triangle.h @@ -33,7 +33,7 @@ class PolyTriangleDrawer { public: static void ClearBuffers(DCanvas *canvas); - static void SetViewport(const DrawerCommandQueuePtr &queue, int x, int y, int width, int height, DCanvas *canvas); + static void SetViewport(const DrawerCommandQueuePtr &queue, int x, int y, int width, int height, DCanvas *canvas, bool span_drawers); static void SetCullCCW(const DrawerCommandQueuePtr &queue, bool ccw); static void SetTwoSided(const DrawerCommandQueuePtr &queue, bool twosided); static void SetWeaponScene(const DrawerCommandQueuePtr &queue, bool enable); @@ -47,7 +47,7 @@ class PolyTriangleThreadData public: PolyTriangleThreadData(int32_t core, int32_t num_cores) : core(core), num_cores(num_cores) { } - void SetViewport(int x, int y, int width, int height, uint8_t *dest, int dest_width, int dest_height, int dest_pitch, bool dest_bgra); + void SetViewport(int x, int y, int width, int height, uint8_t *dest, int dest_width, int dest_height, int dest_pitch, bool dest_bgra, bool span_drawers); void SetTransform(const Mat4f *objectToClip); void SetCullCCW(bool value) { ccw = value; } void SetTwoSided(bool value) { twosided = value; } @@ -88,6 +88,7 @@ private: bool twosided = false; bool weaponScene = false; const Mat4f *objectToClip = nullptr; + bool span_drawers = false; enum { max_additional_vertices = 16 }; }; @@ -143,7 +144,7 @@ private: class PolySetViewportCommand : public DrawerCommand { public: - PolySetViewportCommand(int x, int y, int width, int height, uint8_t *dest, int dest_width, int dest_height, int dest_pitch, bool dest_bgra); + PolySetViewportCommand(int x, int y, int width, int height, uint8_t *dest, int dest_width, int dest_height, int dest_pitch, bool dest_bgra, bool span_drawers); void Execute(DrawerThread *thread) override; FString DebugInfo() override { return "PolySetViewport"; } @@ -158,6 +159,7 @@ private: int dest_height; int dest_pitch; bool dest_bgra; + bool span_drawers; }; class DrawPolyTrianglesCommand : public DrawerCommand diff --git a/src/polyrenderer/drawers/screen_triangle.cpp b/src/polyrenderer/drawers/screen_triangle.cpp index 4614dec64..a8a4fcd54 100644 --- a/src/polyrenderer/drawers/screen_triangle.cpp +++ b/src/polyrenderer/drawers/screen_triangle.cpp @@ -36,8 +36,1099 @@ #include "poly_triangle.h" #include "swrenderer/drawers/r_draw_rgba.h" #include "screen_triangle.h" +#ifndef NO_SSE +#include "poly_drawer32_sse2.h" +#else +#include "poly_drawer32.h" +#endif +#include "poly_drawer8.h" #include "x86.h" +class TriangleBlock +{ +public: + TriangleBlock(const TriDrawTriangleArgs *args, PolyTriangleThreadData *thread); + void Render(); + +private: + void RenderSubdivide(int x0, int y0, int x1, int y1); + + enum class CoverageModes { Full, Partial }; + struct CoverageFull { static const int Mode = (int)CoverageModes::Full; }; + struct CoveragePartial { static const int Mode = (int)CoverageModes::Partial; }; + + template + void RenderBlock(int x0, int y0, int x1, int y1); + + const TriDrawTriangleArgs *args; + PolyTriangleThreadData *thread; + + // Block size, standard 8x8 (must be power of two) + static const int q = 8; + + // Deltas + int DX12, DX23, DX31; + int DY12, DY23, DY31; + + // Fixed-point deltas + int FDX12, FDX23, FDX31; + int FDY12, FDY23, FDY31; + + // Half-edge constants + int C1, C2, C3; + + // Stencil buffer + int stencilPitch; + uint8_t * RESTRICT stencilValues; + uint32_t * RESTRICT stencilMasks; + uint8_t stencilTestValue; + uint32_t stencilWriteValue; + + // Viewport clipping + int clipright; + int clipbottom; + + // Depth buffer + float * RESTRICT zbuffer; + int32_t zbufferPitch; + + // Triangle bounding block + int minx, miny; + int maxx, maxy; + + // Active block + int X, Y; + uint32_t Mask0, Mask1; + +#ifndef NO_SSE + __m128i mFDY12Offset; + __m128i mFDY23Offset; + __m128i mFDY31Offset; + __m128i mFDY12x4; + __m128i mFDY23x4; + __m128i mFDY31x4; + __m128i mFDX12; + __m128i mFDX23; + __m128i mFDX31; + __m128i mC1; + __m128i mC2; + __m128i mC3; + __m128i mDX12; + __m128i mDY12; + __m128i mDX23; + __m128i mDY23; + __m128i mDX31; + __m128i mDY31; +#endif + + enum class CoverageResult + { + full, + partial, + none + }; + CoverageResult AreaCoverageTest(int x0, int y0, int x1, int y1); + + void CoverageTest(); + void StencilEqualTest(); + void StencilGreaterEqualTest(); + void DepthTest(const TriDrawTriangleArgs *args); + void ClipTest(); + void StencilWrite(); + void DepthWrite(const TriDrawTriangleArgs *args); +}; + +TriangleBlock::TriangleBlock(const TriDrawTriangleArgs *args, PolyTriangleThreadData *thread) : args(args), thread(thread) +{ + const ShadedTriVertex &v1 = *args->v1; + const ShadedTriVertex &v2 = *args->v2; + const ShadedTriVertex &v3 = *args->v3; + + clipright = args->clipright; + clipbottom = args->clipbottom; + + stencilPitch = args->stencilPitch; + stencilValues = args->stencilValues; + stencilMasks = args->stencilMasks; + stencilTestValue = args->uniforms->StencilTestValue(); + stencilWriteValue = args->uniforms->StencilWriteValue(); + + zbuffer = args->zbuffer; + zbufferPitch = args->stencilPitch; + + // 28.4 fixed-point coordinates +#ifdef NO_SSE + const int Y1 = (int)round(16.0f * v1.y); + const int Y2 = (int)round(16.0f * v2.y); + const int Y3 = (int)round(16.0f * v3.y); + + const int X1 = (int)round(16.0f * v1.x); + const int X2 = (int)round(16.0f * v2.x); + const int X3 = (int)round(16.0f * v3.x); +#else + int tempround[4 * 3]; + __m128 m16 = _mm_set1_ps(16.0f); + __m128 mhalf = _mm_set1_ps(65536.5f); + __m128i m65536 = _mm_set1_epi32(65536); + _mm_storeu_si128((__m128i*)tempround, _mm_sub_epi32(_mm_cvtps_epi32(_mm_add_ps(_mm_mul_ps(_mm_loadu_ps((const float*)&v1), m16), mhalf)), m65536)); + _mm_storeu_si128((__m128i*)(tempround + 4), _mm_sub_epi32(_mm_cvtps_epi32(_mm_add_ps(_mm_mul_ps(_mm_loadu_ps((const float*)&v2), m16), mhalf)), m65536)); + _mm_storeu_si128((__m128i*)(tempround + 8), _mm_sub_epi32(_mm_cvtps_epi32(_mm_add_ps(_mm_mul_ps(_mm_loadu_ps((const float*)&v3), m16), mhalf)), m65536)); + const int X1 = tempround[0]; + const int X2 = tempround[4]; + const int X3 = tempround[8]; + const int Y1 = tempround[1]; + const int Y2 = tempround[5]; + const int Y3 = tempround[9]; +#endif + + // Deltas + DX12 = X1 - X2; + DX23 = X2 - X3; + DX31 = X3 - X1; + + DY12 = Y1 - Y2; + DY23 = Y2 - Y3; + DY31 = Y3 - Y1; + + // Fixed-point deltas + FDX12 = DX12 << 4; + FDX23 = DX23 << 4; + FDX31 = DX31 << 4; + + FDY12 = DY12 << 4; + FDY23 = DY23 << 4; + FDY31 = DY31 << 4; + + // Bounding rectangle + minx = MAX((MIN(MIN(X1, X2), X3) + 0xF) >> 4, 0); + maxx = MIN((MAX(MAX(X1, X2), X3) + 0xF) >> 4, clipright - 1); + miny = MAX((MIN(MIN(Y1, Y2), Y3) + 0xF) >> 4, 0); + maxy = MIN((MAX(MAX(Y1, Y2), Y3) + 0xF) >> 4, clipbottom - 1); + if (minx >= maxx || miny >= maxy) + { + return; + } + + // Start and end in corner of 8x8 block + minx &= ~(q - 1); + miny &= ~(q - 1); + maxx |= q - 1; + maxy |= q - 1; + + // Half-edge constants + C1 = DY12 * X1 - DX12 * Y1; + C2 = DY23 * X2 - DX23 * Y2; + C3 = DY31 * X3 - DX31 * Y3; + + // Correct for fill convention + if (DY12 < 0 || (DY12 == 0 && DX12 > 0)) C1++; + if (DY23 < 0 || (DY23 == 0 && DX23 > 0)) C2++; + if (DY31 < 0 || (DY31 == 0 && DX31 > 0)) C3++; + +#ifndef NO_SSE + mFDY12Offset = _mm_setr_epi32(0, FDY12, FDY12 * 2, FDY12 * 3); + mFDY23Offset = _mm_setr_epi32(0, FDY23, FDY23 * 2, FDY23 * 3); + mFDY31Offset = _mm_setr_epi32(0, FDY31, FDY31 * 2, FDY31 * 3); + mFDY12x4 = _mm_set1_epi32(FDY12 * 4); + mFDY23x4 = _mm_set1_epi32(FDY23 * 4); + mFDY31x4 = _mm_set1_epi32(FDY31 * 4); + mFDX12 = _mm_set1_epi32(FDX12); + mFDX23 = _mm_set1_epi32(FDX23); + mFDX31 = _mm_set1_epi32(FDX31); + mC1 = _mm_set1_epi32(C1); + mC2 = _mm_set1_epi32(C2); + mC3 = _mm_set1_epi32(C3); + mDX12 = _mm_set1_epi32(DX12); + mDY12 = _mm_set1_epi32(DY12); + mDX23 = _mm_set1_epi32(DX23); + mDY23 = _mm_set1_epi32(DY23); + mDX31 = _mm_set1_epi32(DX31); + mDY31 = _mm_set1_epi32(DY31); +#endif +} + +void TriangleBlock::Render() +{ + RenderSubdivide(minx / q, miny / q, (maxx + 1) / q, (maxy + 1) / q); +} + +void TriangleBlock::RenderSubdivide(int x0, int y0, int x1, int y1) +{ + CoverageResult result = AreaCoverageTest(x0 * q, y0 * q, x1 * q, y1 * q); + if (result == CoverageResult::full) + { + RenderBlock(x0 * q, y0 * q, x1 * q, y1 * q); + } + else if (result == CoverageResult::partial) + { + bool doneX = x1 - x0 <= 8; + bool doneY = y1 - y0 <= 8; + if (doneX && doneY) + { + RenderBlock(x0 * q, y0 * q, x1 * q, y1 * q); + } + else + { + int midx = (x0 + x1) >> 1; + int midy = (y0 + y1) >> 1; + if (doneX) + { + RenderSubdivide(x0, y0, x1, midy); + RenderSubdivide(x0, midy, x1, y1); + } + else if (doneY) + { + RenderSubdivide(x0, y0, midx, y1); + RenderSubdivide(midx, y0, x1, y1); + } + else + { + RenderSubdivide(x0, y0, midx, midy); + RenderSubdivide(midx, y0, x1, midy); + RenderSubdivide(x0, midy, midx, y1); + RenderSubdivide(midx, midy, x1, y1); + } + } + } +} + +template +void TriangleBlock::RenderBlock(int x0, int y0, int x1, int y1) +{ + // First block line for this thread + int core = thread->core; + int num_cores = thread->num_cores; + int core_skip = (num_cores - ((y0 / q) - core) % num_cores) % num_cores; + int start_miny = y0 + core_skip * q; + + bool depthTest = args->uniforms->DepthTest(); + bool writeColor = args->uniforms->WriteColor(); + bool writeStencil = args->uniforms->WriteStencil(); + bool writeDepth = args->uniforms->WriteDepth(); + + int bmode = (int)args->uniforms->BlendMode(); + auto drawFunc = args->destBgra ? ScreenTriangle::SpanDrawers32[bmode] : ScreenTriangle::SpanDrawers8[bmode]; + + // Loop through blocks + for (int y = start_miny; y < y1; y += q * num_cores) + { + for (int x = x0; x < x1; x += q) + { + X = x; + Y = y; + + if (CoverageModeT::Mode == (int)CoverageModes::Full) + { + Mask0 = 0xffffffff; + Mask1 = 0xffffffff; + } + else + { + CoverageTest(); + if (Mask0 == 0 && Mask1 == 0) + continue; + } + + ClipTest(); + if (Mask0 == 0 && Mask1 == 0) + continue; + + StencilEqualTest(); + if (Mask0 == 0 && Mask1 == 0) + continue; + + if (depthTest) + { + DepthTest(args); + if (Mask0 == 0 && Mask1 == 0) + continue; + } + + if (writeColor) + { + if (Mask0 == 0xffffffff) + { + drawFunc(Y, X, X + 8, args); + drawFunc(Y + 1, X, X + 8, args); + drawFunc(Y + 2, X, X + 8, args); + drawFunc(Y + 3, X, X + 8, args); + } + else if (Mask0 != 0) + { + uint32_t mask = Mask0; + for (int j = 0; j < 4; j++) + { + int start = 0; + int i; + for (i = 0; i < 8; i++) + { + if (!(mask & 0x80000000)) + { + if (i > start) + drawFunc(Y + j, X + start, X + i, args); + start = i + 1; + } + mask <<= 1; + } + if (i > start) + drawFunc(Y + j, X + start, X + i, args); + } + } + + if (Mask1 == 0xffffffff) + { + drawFunc(Y + 4, X, X + 8, args); + drawFunc(Y + 5, X, X + 8, args); + drawFunc(Y + 6, X, X + 8, args); + drawFunc(Y + 7, X, X + 8, args); + } + else if (Mask1 != 0) + { + uint32_t mask = Mask1; + for (int j = 4; j < 8; j++) + { + int start = 0; + int i; + for (i = 0; i < 8; i++) + { + if (!(mask & 0x80000000)) + { + if (i > start) + drawFunc(Y + j, X + start, X + i, args); + start = i + 1; + } + mask <<= 1; + } + if (i > start) + drawFunc(Y + j, X + start, X + i, args); + } + } + } + + if (writeStencil) + StencilWrite(); + if (writeDepth) + DepthWrite(args); + } + } +} + +#ifdef NO_SSE + +void TriangleBlock::DepthTest(const TriDrawTriangleArgs *args) +{ + int block = (X >> 3) + (Y >> 3) * zbufferPitch; + float *depth = zbuffer + block * 64; + + const ShadedTriVertex &v1 = *args->v1; + + float stepXW = args->gradientX.W; + float stepYW = args->gradientY.W; + float posYW = v1.w + stepXW * (X - v1.x) + stepYW * (Y - v1.y) + args->depthOffset; + + uint32_t mask0 = 0; + uint32_t mask1 = 0; + + for (int iy = 0; iy < 4; iy++) + { + float posXW = posYW; + for (int ix = 0; ix < 8; ix++) + { + bool covered = *depth <= posXW; + mask0 <<= 1; + mask0 |= (uint32_t)covered; + depth++; + posXW += stepXW; + } + posYW += stepYW; + } + + for (int iy = 0; iy < 4; iy++) + { + float posXW = posYW; + for (int ix = 0; ix < 8; ix++) + { + bool covered = *depth <= posXW; + mask1 <<= 1; + mask1 |= (uint32_t)covered; + depth++; + posXW += stepXW; + } + posYW += stepYW; + } + + Mask0 = Mask0 & mask0; + Mask1 = Mask1 & mask1; +} + +#else + +void TriangleBlock::DepthTest(const TriDrawTriangleArgs *args) +{ + int block = (X >> 3) + (Y >> 3) * zbufferPitch; + float *depth = zbuffer + block * 64; + + const ShadedTriVertex &v1 = *args->v1; + + float stepXW = args->gradientX.W; + float stepYW = args->gradientY.W; + float posYW = v1.w + stepXW * (X - v1.x) + stepYW * (Y - v1.y) + args->depthOffset; + + __m128 mposYW = _mm_setr_ps(posYW, posYW + stepXW, posYW + stepXW + stepXW, posYW + stepXW + stepXW + stepXW); + __m128 mstepXW = _mm_set1_ps(stepXW * 4.0f); + __m128 mstepYW = _mm_set1_ps(stepYW); + + uint32_t mask0 = 0; + uint32_t mask1 = 0; + + for (int iy = 0; iy < 4; iy++) + { + __m128 mposXW = mposYW; + for (int ix = 0; ix < 2; ix++) + { + __m128 covered = _mm_cmplt_ps(_mm_loadu_ps(depth), mposXW); + mask0 <<= 4; + mask0 |= _mm_movemask_ps(_mm_shuffle_ps(covered, covered, _MM_SHUFFLE(0, 1, 2, 3))); + depth += 4; + mposXW = _mm_add_ps(mposXW, mstepXW); + } + mposYW = _mm_add_ps(mposYW, mstepYW); + } + + for (int iy = 0; iy < 4; iy++) + { + __m128 mposXW = mposYW; + for (int ix = 0; ix < 2; ix++) + { + __m128 covered = _mm_cmplt_ps(_mm_loadu_ps(depth), mposXW); + mask1 <<= 4; + mask1 |= _mm_movemask_ps(_mm_shuffle_ps(covered, covered, _MM_SHUFFLE(0, 1, 2, 3))); + depth += 4; + mposXW = _mm_add_ps(mposXW, mstepXW); + } + mposYW = _mm_add_ps(mposYW, mstepYW); + } + + Mask0 = Mask0 & mask0; + Mask1 = Mask1 & mask1; +} + +#endif + +void TriangleBlock::ClipTest() +{ + static const uint32_t clipxmask[8] = + { + 0, + 0x80808080, + 0xc0c0c0c0, + 0xe0e0e0e0, + 0xf0f0f0f0, + 0xf8f8f8f8, + 0xfcfcfcfc, + 0xfefefefe + }; + + static const uint32_t clipymask[8] = + { + 0, + 0xff000000, + 0xffff0000, + 0xffffff00, + 0xffffffff, + 0xffffffff, + 0xffffffff, + 0xffffffff + }; + + uint32_t xmask = (X + 8 <= clipright) ? 0xffffffff : clipxmask[clipright - X]; + uint32_t ymask0 = (Y + 4 <= clipbottom) ? 0xffffffff : clipymask[clipbottom - Y]; + uint32_t ymask1 = (Y + 8 <= clipbottom) ? 0xffffffff : clipymask[clipbottom - Y - 4]; + + Mask0 = Mask0 & xmask & ymask0; + Mask1 = Mask1 & xmask & ymask1; +} + +#ifdef NO_SSE + +void TriangleBlock::StencilEqualTest() +{ + // Stencil test the whole block, if possible + int block = (X >> 3) + (Y >> 3) * stencilPitch; + uint8_t *stencilBlock = &stencilValues[block * 64]; + uint32_t *stencilBlockMask = &stencilMasks[block]; + bool blockIsSingleStencil = ((*stencilBlockMask) & 0xffffff00) == 0xffffff00; + bool skipBlock = blockIsSingleStencil && ((*stencilBlockMask) & 0xff) != stencilTestValue; + if (skipBlock) + { + Mask0 = 0; + Mask1 = 0; + } + else if (!blockIsSingleStencil) + { + uint32_t mask0 = 0; + uint32_t mask1 = 0; + + for (int iy = 0; iy < 4; iy++) + { + for (int ix = 0; ix < q; ix++) + { + bool passStencilTest = stencilBlock[ix + iy * q] == stencilTestValue; + mask0 <<= 1; + mask0 |= (uint32_t)passStencilTest; + } + } + + for (int iy = 4; iy < q; iy++) + { + for (int ix = 0; ix < q; ix++) + { + bool passStencilTest = stencilBlock[ix + iy * q] == stencilTestValue; + mask1 <<= 1; + mask1 |= (uint32_t)passStencilTest; + } + } + + Mask0 = Mask0 & mask0; + Mask1 = Mask1 & mask1; + } +} + +#else + +void TriangleBlock::StencilEqualTest() +{ + // Stencil test the whole block, if possible + int block = (X >> 3) + (Y >> 3) * stencilPitch; + uint8_t *stencilBlock = &stencilValues[block * 64]; + uint32_t *stencilBlockMask = &stencilMasks[block]; + bool blockIsSingleStencil = ((*stencilBlockMask) & 0xffffff00) == 0xffffff00; + bool skipBlock = blockIsSingleStencil && ((*stencilBlockMask) & 0xff) != stencilTestValue; + if (skipBlock) + { + Mask0 = 0; + Mask1 = 0; + } + else if (!blockIsSingleStencil) + { + __m128i mstencilTestValue = _mm_set1_epi16(stencilTestValue); + uint32_t mask0 = 0; + uint32_t mask1 = 0; + + for (int iy = 0; iy < 2; iy++) + { + __m128i mstencilBlock = _mm_loadu_si128((const __m128i *)stencilBlock); + + __m128i mstencilTest = _mm_cmpeq_epi16(_mm_unpacklo_epi8(mstencilBlock, _mm_setzero_si128()), mstencilTestValue); + __m128i mstencilTest0 = _mm_unpacklo_epi16(mstencilTest, mstencilTest); + __m128i mstencilTest1 = _mm_unpackhi_epi16(mstencilTest, mstencilTest); + __m128i first = _mm_packs_epi32(_mm_shuffle_epi32(mstencilTest1, _MM_SHUFFLE(0, 1, 2, 3)), _mm_shuffle_epi32(mstencilTest0, _MM_SHUFFLE(0, 1, 2, 3))); + + mstencilTest = _mm_cmpeq_epi16(_mm_unpackhi_epi8(mstencilBlock, _mm_setzero_si128()), mstencilTestValue); + mstencilTest0 = _mm_unpacklo_epi16(mstencilTest, mstencilTest); + mstencilTest1 = _mm_unpackhi_epi16(mstencilTest, mstencilTest); + __m128i second = _mm_packs_epi32(_mm_shuffle_epi32(mstencilTest1, _MM_SHUFFLE(0, 1, 2, 3)), _mm_shuffle_epi32(mstencilTest0, _MM_SHUFFLE(0, 1, 2, 3))); + + mask0 <<= 16; + mask0 |= _mm_movemask_epi8(_mm_packs_epi16(second, first)); + + stencilBlock += 16; + } + + for (int iy = 0; iy < 2; iy++) + { + __m128i mstencilBlock = _mm_loadu_si128((const __m128i *)stencilBlock); + + __m128i mstencilTest = _mm_cmpeq_epi16(_mm_unpacklo_epi8(mstencilBlock, _mm_setzero_si128()), mstencilTestValue); + __m128i mstencilTest0 = _mm_unpacklo_epi16(mstencilTest, mstencilTest); + __m128i mstencilTest1 = _mm_unpackhi_epi16(mstencilTest, mstencilTest); + __m128i first = _mm_packs_epi32(_mm_shuffle_epi32(mstencilTest1, _MM_SHUFFLE(0, 1, 2, 3)), _mm_shuffle_epi32(mstencilTest0, _MM_SHUFFLE(0, 1, 2, 3))); + + mstencilTest = _mm_cmpeq_epi16(_mm_unpackhi_epi8(mstencilBlock, _mm_setzero_si128()), mstencilTestValue); + mstencilTest0 = _mm_unpacklo_epi16(mstencilTest, mstencilTest); + mstencilTest1 = _mm_unpackhi_epi16(mstencilTest, mstencilTest); + __m128i second = _mm_packs_epi32(_mm_shuffle_epi32(mstencilTest1, _MM_SHUFFLE(0, 1, 2, 3)), _mm_shuffle_epi32(mstencilTest0, _MM_SHUFFLE(0, 1, 2, 3))); + + mask1 <<= 16; + mask1 |= _mm_movemask_epi8(_mm_packs_epi16(second, first)); + + stencilBlock += 16; + } + + Mask0 = Mask0 & mask0; + Mask1 = Mask1 & mask1; + } +} + +#endif + +void TriangleBlock::StencilGreaterEqualTest() +{ + // Stencil test the whole block, if possible + int block = (X >> 3) + (Y >> 3) * stencilPitch; + uint8_t *stencilBlock = &stencilValues[block * 64]; + uint32_t *stencilBlockMask = &stencilMasks[block]; + bool blockIsSingleStencil = ((*stencilBlockMask) & 0xffffff00) == 0xffffff00; + bool skipBlock = blockIsSingleStencil && ((*stencilBlockMask) & 0xff) < stencilTestValue; + if (skipBlock) + { + Mask0 = 0; + Mask1 = 0; + } + else if (!blockIsSingleStencil) + { + uint32_t mask0 = 0; + uint32_t mask1 = 0; + + for (int iy = 0; iy < 4; iy++) + { + for (int ix = 0; ix < q; ix++) + { + bool passStencilTest = stencilBlock[ix + iy * q] >= stencilTestValue; + mask0 <<= 1; + mask0 |= (uint32_t)passStencilTest; + } + } + + for (int iy = 4; iy < q; iy++) + { + for (int ix = 0; ix < q; ix++) + { + bool passStencilTest = stencilBlock[ix + iy * q] >= stencilTestValue; + mask1 <<= 1; + mask1 |= (uint32_t)passStencilTest; + } + } + + Mask0 = Mask0 & mask0; + Mask1 = Mask1 & mask1; + } +} + +TriangleBlock::CoverageResult TriangleBlock::AreaCoverageTest(int x0, int y0, int x1, int y1) +{ + // Corners of block + x0 = x0 << 4; + x1 = (x1 - 1) << 4; + y0 = y0 << 4; + y1 = (y1 - 1) << 4; + + // Evaluate half-space functions + bool a00 = C1 + DX12 * y0 - DY12 * x0 > 0; + bool a10 = C1 + DX12 * y0 - DY12 * x1 > 0; + bool a01 = C1 + DX12 * y1 - DY12 * x0 > 0; + bool a11 = C1 + DX12 * y1 - DY12 * x1 > 0; + int a = (a00 << 0) | (a10 << 1) | (a01 << 2) | (a11 << 3); + + bool b00 = C2 + DX23 * y0 - DY23 * x0 > 0; + bool b10 = C2 + DX23 * y0 - DY23 * x1 > 0; + bool b01 = C2 + DX23 * y1 - DY23 * x0 > 0; + bool b11 = C2 + DX23 * y1 - DY23 * x1 > 0; + int b = (b00 << 0) | (b10 << 1) | (b01 << 2) | (b11 << 3); + + bool c00 = C3 + DX31 * y0 - DY31 * x0 > 0; + bool c10 = C3 + DX31 * y0 - DY31 * x1 > 0; + bool c01 = C3 + DX31 * y1 - DY31 * x0 > 0; + bool c11 = C3 + DX31 * y1 - DY31 * x1 > 0; + int c = (c00 << 0) | (c10 << 1) | (c01 << 2) | (c11 << 3); + + if (a == 0 || b == 0 || c == 0) // Skip block when outside an edge + { + return CoverageResult::none; + } + else if (a == 0xf && b == 0xf && c == 0xf) // Accept whole block when totally covered + { + return CoverageResult::full; + } + else // Partially covered block + { + return CoverageResult::partial; + } +} + +#ifdef NO_SSE + +void TriangleBlock::CoverageTest() +{ + // Corners of block + int x0 = X << 4; + int x1 = (X + q - 1) << 4; + int y0 = Y << 4; + int y1 = (Y + q - 1) << 4; + + // Evaluate half-space functions + bool a00 = C1 + DX12 * y0 - DY12 * x0 > 0; + bool a10 = C1 + DX12 * y0 - DY12 * x1 > 0; + bool a01 = C1 + DX12 * y1 - DY12 * x0 > 0; + bool a11 = C1 + DX12 * y1 - DY12 * x1 > 0; + int a = (a00 << 0) | (a10 << 1) | (a01 << 2) | (a11 << 3); + + bool b00 = C2 + DX23 * y0 - DY23 * x0 > 0; + bool b10 = C2 + DX23 * y0 - DY23 * x1 > 0; + bool b01 = C2 + DX23 * y1 - DY23 * x0 > 0; + bool b11 = C2 + DX23 * y1 - DY23 * x1 > 0; + int b = (b00 << 0) | (b10 << 1) | (b01 << 2) | (b11 << 3); + + bool c00 = C3 + DX31 * y0 - DY31 * x0 > 0; + bool c10 = C3 + DX31 * y0 - DY31 * x1 > 0; + bool c01 = C3 + DX31 * y1 - DY31 * x0 > 0; + bool c11 = C3 + DX31 * y1 - DY31 * x1 > 0; + int c = (c00 << 0) | (c10 << 1) | (c01 << 2) | (c11 << 3); + + if (a == 0 || b == 0 || c == 0) // Skip block when outside an edge + { + Mask0 = 0; + Mask1 = 0; + } + else if (a == 0xf && b == 0xf && c == 0xf) // Accept whole block when totally covered + { + Mask0 = 0xffffffff; + Mask1 = 0xffffffff; + } + else // Partially covered block + { + x0 = X << 4; + x1 = (X + q - 1) << 4; + int CY1 = C1 + DX12 * y0 - DY12 * x0; + int CY2 = C2 + DX23 * y0 - DY23 * x0; + int CY3 = C3 + DX31 * y0 - DY31 * x0; + + uint32_t mask0 = 0; + uint32_t mask1 = 0; + + for (int iy = 0; iy < 4; iy++) + { + int CX1 = CY1; + int CX2 = CY2; + int CX3 = CY3; + + for (int ix = 0; ix < q; ix++) + { + bool covered = CX1 > 0 && CX2 > 0 && CX3 > 0; + mask0 <<= 1; + mask0 |= (uint32_t)covered; + + CX1 -= FDY12; + CX2 -= FDY23; + CX3 -= FDY31; + } + + CY1 += FDX12; + CY2 += FDX23; + CY3 += FDX31; + } + + for (int iy = 4; iy < q; iy++) + { + int CX1 = CY1; + int CX2 = CY2; + int CX3 = CY3; + + for (int ix = 0; ix < q; ix++) + { + bool covered = CX1 > 0 && CX2 > 0 && CX3 > 0; + mask1 <<= 1; + mask1 |= (uint32_t)covered; + + CX1 -= FDY12; + CX2 -= FDY23; + CX3 -= FDY31; + } + + CY1 += FDX12; + CY2 += FDX23; + CY3 += FDX31; + } + + Mask0 = mask0; + Mask1 = mask1; + } +} + +#else + +void TriangleBlock::CoverageTest() +{ + // Corners of block + int x0 = X << 4; + int x1 = (X + q - 1) << 4; + int y0 = Y << 4; + int y1 = (Y + q - 1) << 4; + + __m128i mY = _mm_set_epi32(y0, y0, y1, y1); + __m128i mX = _mm_set_epi32(x0, x0, x1, x1); + + // Evaluate half-space functions + __m128i mCY1 = _mm_sub_epi32( + _mm_add_epi32(mC1, _mm_shuffle_epi32(_mm_mul_epu32(mDX12, mY), _MM_SHUFFLE(0, 0, 2, 2))), + _mm_shuffle_epi32(_mm_mul_epu32(mDY12, mX), _MM_SHUFFLE(0, 2, 0, 2))); + __m128i mA = _mm_cmpgt_epi32(mCY1, _mm_setzero_si128()); + + __m128i mCY2 = _mm_sub_epi32( + _mm_add_epi32(mC2, _mm_shuffle_epi32(_mm_mul_epu32(mDX23, mY), _MM_SHUFFLE(0, 0, 2, 2))), + _mm_shuffle_epi32(_mm_mul_epu32(mDY23, mX), _MM_SHUFFLE(0, 2, 0, 2))); + __m128i mB = _mm_cmpgt_epi32(mCY2, _mm_setzero_si128()); + + __m128i mCY3 = _mm_sub_epi32( + _mm_add_epi32(mC3, _mm_shuffle_epi32(_mm_mul_epu32(mDX31, mY), _MM_SHUFFLE(0, 0, 2, 2))), + _mm_shuffle_epi32(_mm_mul_epu32(mDY31, mX), _MM_SHUFFLE(0, 2, 0, 2))); + __m128i mC = _mm_cmpgt_epi32(mCY3, _mm_setzero_si128()); + + int abc = _mm_movemask_epi8(_mm_packs_epi16(_mm_packs_epi32(mA, mB), _mm_packs_epi32(mC, _mm_setzero_si128()))); + + if ((abc & 0xf) == 0 || (abc & 0xf0) == 0 || (abc & 0xf00) == 0) // Skip block when outside an edge + { + Mask0 = 0; + Mask1 = 0; + } + else if (abc == 0xfff) // Accept whole block when totally covered + { + Mask0 = 0xffffffff; + Mask1 = 0xffffffff; + } + else // Partially covered block + { + uint32_t mask0 = 0; + uint32_t mask1 = 0; + + mCY1 = _mm_sub_epi32(_mm_shuffle_epi32(mCY1, _MM_SHUFFLE(0, 0, 0, 0)), mFDY12Offset); + mCY2 = _mm_sub_epi32(_mm_shuffle_epi32(mCY2, _MM_SHUFFLE(0, 0, 0, 0)), mFDY23Offset); + mCY3 = _mm_sub_epi32(_mm_shuffle_epi32(mCY3, _MM_SHUFFLE(0, 0, 0, 0)), mFDY31Offset); + for (int iy = 0; iy < 2; iy++) + { + __m128i mtest0 = _mm_cmpgt_epi32(mCY1, _mm_setzero_si128()); + mtest0 = _mm_and_si128(_mm_cmpgt_epi32(mCY2, _mm_setzero_si128()), mtest0); + mtest0 = _mm_and_si128(_mm_cmpgt_epi32(mCY3, _mm_setzero_si128()), mtest0); + __m128i mtest1 = _mm_cmpgt_epi32(_mm_sub_epi32(mCY1, mFDY12x4), _mm_setzero_si128()); + mtest1 = _mm_and_si128(_mm_cmpgt_epi32(_mm_sub_epi32(mCY2, mFDY23x4), _mm_setzero_si128()), mtest1); + mtest1 = _mm_and_si128(_mm_cmpgt_epi32(_mm_sub_epi32(mCY3, mFDY31x4), _mm_setzero_si128()), mtest1); + mCY1 = _mm_add_epi32(mCY1, mFDX12); + mCY2 = _mm_add_epi32(mCY2, mFDX23); + mCY3 = _mm_add_epi32(mCY3, mFDX31); + __m128i first = _mm_packs_epi32(_mm_shuffle_epi32(mtest1, _MM_SHUFFLE(0, 1, 2, 3)), _mm_shuffle_epi32(mtest0, _MM_SHUFFLE(0, 1, 2, 3))); + + mtest0 = _mm_cmpgt_epi32(mCY1, _mm_setzero_si128()); + mtest0 = _mm_and_si128(_mm_cmpgt_epi32(mCY2, _mm_setzero_si128()), mtest0); + mtest0 = _mm_and_si128(_mm_cmpgt_epi32(mCY3, _mm_setzero_si128()), mtest0); + mtest1 = _mm_cmpgt_epi32(_mm_sub_epi32(mCY1, mFDY12x4), _mm_setzero_si128()); + mtest1 = _mm_and_si128(_mm_cmpgt_epi32(_mm_sub_epi32(mCY2, mFDY23x4), _mm_setzero_si128()), mtest1); + mtest1 = _mm_and_si128(_mm_cmpgt_epi32(_mm_sub_epi32(mCY3, mFDY31x4), _mm_setzero_si128()), mtest1); + mCY1 = _mm_add_epi32(mCY1, mFDX12); + mCY2 = _mm_add_epi32(mCY2, mFDX23); + mCY3 = _mm_add_epi32(mCY3, mFDX31); + __m128i second = _mm_packs_epi32(_mm_shuffle_epi32(mtest1, _MM_SHUFFLE(0, 1, 2, 3)), _mm_shuffle_epi32(mtest0, _MM_SHUFFLE(0, 1, 2, 3))); + + mask0 <<= 16; + mask0 |= _mm_movemask_epi8(_mm_packs_epi16(second, first)); + } + + for (int iy = 0; iy < 2; iy++) + { + __m128i mtest0 = _mm_cmpgt_epi32(mCY1, _mm_setzero_si128()); + mtest0 = _mm_and_si128(_mm_cmpgt_epi32(mCY2, _mm_setzero_si128()), mtest0); + mtest0 = _mm_and_si128(_mm_cmpgt_epi32(mCY3, _mm_setzero_si128()), mtest0); + __m128i mtest1 = _mm_cmpgt_epi32(_mm_sub_epi32(mCY1, mFDY12x4), _mm_setzero_si128()); + mtest1 = _mm_and_si128(_mm_cmpgt_epi32(_mm_sub_epi32(mCY2, mFDY23x4), _mm_setzero_si128()), mtest1); + mtest1 = _mm_and_si128(_mm_cmpgt_epi32(_mm_sub_epi32(mCY3, mFDY31x4), _mm_setzero_si128()), mtest1); + mCY1 = _mm_add_epi32(mCY1, mFDX12); + mCY2 = _mm_add_epi32(mCY2, mFDX23); + mCY3 = _mm_add_epi32(mCY3, mFDX31); + __m128i first = _mm_packs_epi32(_mm_shuffle_epi32(mtest1, _MM_SHUFFLE(0, 1, 2, 3)), _mm_shuffle_epi32(mtest0, _MM_SHUFFLE(0, 1, 2, 3))); + + mtest0 = _mm_cmpgt_epi32(mCY1, _mm_setzero_si128()); + mtest0 = _mm_and_si128(_mm_cmpgt_epi32(mCY2, _mm_setzero_si128()), mtest0); + mtest0 = _mm_and_si128(_mm_cmpgt_epi32(mCY3, _mm_setzero_si128()), mtest0); + mtest1 = _mm_cmpgt_epi32(_mm_sub_epi32(mCY1, mFDY12x4), _mm_setzero_si128()); + mtest1 = _mm_and_si128(_mm_cmpgt_epi32(_mm_sub_epi32(mCY2, mFDY23x4), _mm_setzero_si128()), mtest1); + mtest1 = _mm_and_si128(_mm_cmpgt_epi32(_mm_sub_epi32(mCY3, mFDY31x4), _mm_setzero_si128()), mtest1); + mCY1 = _mm_add_epi32(mCY1, mFDX12); + mCY2 = _mm_add_epi32(mCY2, mFDX23); + mCY3 = _mm_add_epi32(mCY3, mFDX31); + __m128i second = _mm_packs_epi32(_mm_shuffle_epi32(mtest1, _MM_SHUFFLE(0, 1, 2, 3)), _mm_shuffle_epi32(mtest0, _MM_SHUFFLE(0, 1, 2, 3))); + + mask1 <<= 16; + mask1 |= _mm_movemask_epi8(_mm_packs_epi16(second, first)); + } + + Mask0 = mask0; + Mask1 = mask1; + } +} + +#endif + +void TriangleBlock::StencilWrite() +{ + int block = (X >> 3) + (Y >> 3) * stencilPitch; + uint8_t *stencilBlock = &stencilValues[block * 64]; + uint32_t &stencilBlockMask = stencilMasks[block]; + uint32_t writeValue = stencilWriteValue; + + if (Mask0 == 0xffffffff && Mask1 == 0xffffffff) + { + stencilBlockMask = 0xffffff00 | writeValue; + } + else + { + uint32_t mask0 = Mask0; + uint32_t mask1 = Mask1; + + bool isSingleValue = (stencilBlockMask & 0xffffff00) == 0xffffff00; + if (isSingleValue) + { + uint8_t value = stencilBlockMask & 0xff; + for (int v = 0; v < 64; v++) + stencilBlock[v] = value; + stencilBlockMask = 0; + } + + int count = 0; + for (int v = 0; v < 32; v++) + { + if ((mask0 & (1 << 31)) || stencilBlock[v] == writeValue) + { + stencilBlock[v] = writeValue; + count++; + } + mask0 <<= 1; + } + for (int v = 32; v < 64; v++) + { + if ((mask1 & (1 << 31)) || stencilBlock[v] == writeValue) + { + stencilBlock[v] = writeValue; + count++; + } + mask1 <<= 1; + } + + if (count == 64) + stencilBlockMask = 0xffffff00 | writeValue; + } +} + +#ifdef NO_SSE + +void TriangleBlock::DepthWrite(const TriDrawTriangleArgs *args) +{ + int block = (X >> 3) + (Y >> 3) * zbufferPitch; + float *depth = zbuffer + block * 64; + + const ShadedTriVertex &v1 = *args->v1; + + float stepXW = args->gradientX.W; + float stepYW = args->gradientY.W; + float posYW = v1.w + stepXW * (X - v1.x) + stepYW * (Y - v1.y) + args->depthOffset; + + if (Mask0 == 0xffffffff && Mask1 == 0xffffffff) + { + for (int iy = 0; iy < 8; iy++) + { + float posXW = posYW; + for (int ix = 0; ix < 8; ix++) + { + *(depth++) = posXW; + posXW += stepXW; + } + posYW += stepYW; + } + } + else + { + uint32_t mask0 = Mask0; + uint32_t mask1 = Mask1; + + for (int iy = 0; iy < 4; iy++) + { + float posXW = posYW; + for (int ix = 0; ix < 8; ix++) + { + if (mask0 & (1 << 31)) + *depth = posXW; + posXW += stepXW; + mask0 <<= 1; + depth++; + } + posYW += stepYW; + } + + for (int iy = 0; iy < 4; iy++) + { + float posXW = posYW; + for (int ix = 0; ix < 8; ix++) + { + if (mask1 & (1 << 31)) + *depth = posXW; + posXW += stepXW; + mask1 <<= 1; + depth++; + } + posYW += stepYW; + } + } +} + +#else + +void TriangleBlock::DepthWrite(const TriDrawTriangleArgs *args) +{ + int block = (X >> 3) + (Y >> 3) * zbufferPitch; + float *depth = zbuffer + block * 64; + + const ShadedTriVertex &v1 = *args->v1; + + float stepXW = args->gradientX.W; + float stepYW = args->gradientY.W; + float posYW = v1.w + stepXW * (X - v1.x) + stepYW * (Y - v1.y) + args->depthOffset; + + __m128 mposYW = _mm_setr_ps(posYW, posYW + stepXW, posYW + stepXW + stepXW, posYW + stepXW + stepXW + stepXW); + __m128 mstepXW = _mm_set1_ps(stepXW * 4.0f); + __m128 mstepYW = _mm_set1_ps(stepYW); + + if (Mask0 == 0xffffffff && Mask1 == 0xffffffff) + { + for (int iy = 0; iy < 8; iy++) + { + __m128 mposXW = mposYW; + _mm_storeu_ps(depth, mposXW); depth += 4; mposXW = _mm_add_ps(mposXW, mstepXW); + _mm_storeu_ps(depth, mposXW); depth += 4; + mposYW = _mm_add_ps(mposYW, mstepYW); + } + } + else + { + __m128i mxormask = _mm_set1_epi32(0xffffffff); + __m128i topfour = _mm_setr_epi32(1 << 31, 1 << 30, 1 << 29, 1 << 28); + + __m128i mmask0 = _mm_set1_epi32(Mask0); + __m128i mmask1 = _mm_set1_epi32(Mask1); + + for (int iy = 0; iy < 4; iy++) + { + __m128 mposXW = mposYW; + _mm_maskmoveu_si128(_mm_castps_si128(mposXW), _mm_xor_si128(_mm_cmpeq_epi32(_mm_and_si128(mmask0, topfour), _mm_setzero_si128()), mxormask), (char*)depth); mmask0 = _mm_slli_epi32(mmask0, 4); depth += 4; mposXW = _mm_add_ps(mposXW, mstepXW); + _mm_maskmoveu_si128(_mm_castps_si128(mposXW), _mm_xor_si128(_mm_cmpeq_epi32(_mm_and_si128(mmask0, topfour), _mm_setzero_si128()), mxormask), (char*)depth); mmask0 = _mm_slli_epi32(mmask0, 4); depth += 4; + mposYW = _mm_add_ps(mposYW, mstepYW); + } + + for (int iy = 0; iy < 4; iy++) + { + __m128 mposXW = mposYW; + _mm_maskmoveu_si128(_mm_castps_si128(mposXW), _mm_xor_si128(_mm_cmpeq_epi32(_mm_and_si128(mmask1, topfour), _mm_setzero_si128()), mxormask), (char*)depth); mmask1 = _mm_slli_epi32(mmask1, 4); depth += 4; mposXW = _mm_add_ps(mposXW, mstepXW); + _mm_maskmoveu_si128(_mm_castps_si128(mposXW), _mm_xor_si128(_mm_cmpeq_epi32(_mm_and_si128(mmask1, topfour), _mm_setzero_si128()), mxormask), (char*)depth); mmask1 = _mm_slli_epi32(mmask1, 4); depth += 4; + mposYW = _mm_add_ps(mposYW, mstepYW); + } + } +} + +#endif + +void ScreenTriangle::Draw(const TriDrawTriangleArgs *args, PolyTriangleThreadData *thread) +{ + TriangleBlock block(args, thread); + block.Render(); +} + static void SortVertices(const TriDrawTriangleArgs *args, ShadedTriVertex **sortedVertices) { sortedVertices[0] = args->v1; @@ -52,7 +1143,7 @@ static void SortVertices(const TriDrawTriangleArgs *args, ShadedTriVertex **sort std::swap(sortedVertices[1], sortedVertices[2]); } -void ScreenTriangle::Draw(const TriDrawTriangleArgs *args, PolyTriangleThreadData *thread) +void ScreenTriangle::DrawSWRender(const TriDrawTriangleArgs *args, PolyTriangleThreadData *thread) { // Sort vertices by Y position ShadedTriVertex *sortedVertices[3]; @@ -139,22 +1230,13 @@ void ScreenTriangle::Draw(const TriDrawTriangleArgs *args, PolyTriangleThreadDat float v1Y = args->v1->y; float v1W = args->v1->w; - bool depthTest = args->uniforms->DepthTest(); - bool stencilTest = true; - bool writeColor = args->uniforms->WriteColor(); - bool writeStencil = args->uniforms->WriteStencil(); - bool writeDepth = args->uniforms->WriteDepth(); - uint8_t stencilTestValue = args->uniforms->StencilTestValue(); - uint8_t stencilWriteValue = args->uniforms->StencilWriteValue(); - int num_cores = thread->num_cores; for (int y = topY + thread->skipped_by_thread(topY); y < bottomY; y += num_cores) { int x = leftEdge[y]; int xend = rightEdge[y]; - float *zbufferLine = args->zbuffer + args->pitch * y; - uint8_t *stencilLine = args->stencilbuffer + args->pitch * y; + float *zbufferLine = args->zbuffer + args->stencilPitch * 8 * y; float startX = x + (0.5f - v1X); float startY = y + (0.5f - v1Y); @@ -167,211 +1249,59 @@ void ScreenTriangle::Draw(const TriDrawTriangleArgs *args, PolyTriangleThreadDat { int xstart = x; - if (depthTest && stencilTest) + int xendsse = x + ((xend - x) & ~3); + __m128 mposXW = _mm_add_ps(_mm_set1_ps(posXW), mfirstStepXW); + while (_mm_movemask_ps(_mm_cmple_ps(_mm_loadu_ps(zbufferLine + x), mposXW)) == 15 && x < xendsse) { - int xendsse = x + ((xend - x) / 4); - __m128 mposXW = _mm_add_ps(_mm_set1_ps(posXW), mfirstStepXW); - while (_mm_movemask_ps(_mm_cmple_ps(_mm_loadu_ps(zbufferLine + x), mposXW)) == 15 && - stencilLine[x] == stencilTestValue && - stencilLine[x + 1] == stencilTestValue && - stencilLine[x + 2] == stencilTestValue && - stencilLine[x + 3] == stencilTestValue && - x < xendsse) - { - if (writeDepth) - _mm_storeu_ps(zbufferLine + x, mposXW); - mposXW = _mm_add_ps(mposXW, mstepXW); - x += 4; - } - posXW = _mm_cvtss_f32(mposXW); + _mm_storeu_ps(zbufferLine + x, mposXW); + mposXW = _mm_add_ps(mposXW, mstepXW); + x += 4; + } + posXW = _mm_cvtss_f32(mposXW); - while (zbufferLine[x] <= posXW && stencilLine[x] == stencilTestValue && x < xend) - { - if (writeDepth) - zbufferLine[x] = posXW; - posXW += stepXW; - x++; - } - } - else if (depthTest) + while (zbufferLine[x] <= posXW && x < xend) { - int xendsse = x + ((xend - x) / 4); - __m128 mposXW = _mm_add_ps(_mm_set1_ps(posXW), mfirstStepXW); - while (_mm_movemask_ps(_mm_cmple_ps(_mm_loadu_ps(zbufferLine + x), mposXW)) == 15 && x < xendsse) - { - if (writeDepth) - _mm_storeu_ps(zbufferLine + x, mposXW); - mposXW = _mm_add_ps(mposXW, mstepXW); - x += 4; - } - posXW = _mm_cvtss_f32(mposXW); - - while (zbufferLine[x] <= posXW && x < xend) - { - if (writeDepth) - zbufferLine[x] = posXW; - posXW += stepXW; - x++; - } - } - else if (stencilTest) - { - while (stencilLine[x] == stencilTestValue && x < xend) - x++; - } - else - { - x = xend; + zbufferLine[x] = posXW; + posXW += stepXW; + x++; } if (x > xstart) + drawfunc(y, xstart, x, args); + + xendsse = x + ((xend - x) & ~3); + mposXW = _mm_add_ps(_mm_set1_ps(posXW), mfirstStepXW); + while (_mm_movemask_ps(_mm_cmple_ps(_mm_loadu_ps(zbufferLine + x), mposXW)) == 0 && x < xendsse) { - if (writeColor) - drawfunc(y, xstart, x, args); - - if (writeStencil) - { - for (int i = xstart; i < x; i++) - stencilLine[i] = stencilWriteValue; - } - - if (!depthTest && writeDepth) - { - for (int i = xstart; i < x; i++) - { - zbufferLine[i] = posXW; - posXW += stepXW; - } - } + mposXW = _mm_add_ps(mposXW, mstepXW); + x += 4; } + posXW = _mm_cvtss_f32(mposXW); - if (depthTest && stencilTest) + while (zbufferLine[x] > posXW && x < xend) { - int xendsse = x + ((xend - x) / 4); - __m128 mposXW = _mm_add_ps(_mm_set1_ps(posXW), mfirstStepXW); - while ((_mm_movemask_ps(_mm_cmple_ps(_mm_loadu_ps(zbufferLine + x), mposXW)) == 0 || - stencilLine[x] != stencilTestValue || - stencilLine[x + 1] != stencilTestValue || - stencilLine[x + 2] != stencilTestValue || - stencilLine[x + 3] != stencilTestValue) && - x < xendsse) - { - mposXW = _mm_add_ps(mposXW, mstepXW); - x += 4; - } - posXW = _mm_cvtss_f32(mposXW); - - while ((zbufferLine[x] > posXW || stencilLine[x] != stencilTestValue) && x < xend) - { - posXW += stepXW; - x++; - } - } - else if (depthTest) - { - int xendsse = x + ((xend - x) / 4); - __m128 mposXW = _mm_add_ps(_mm_set1_ps(posXW), mfirstStepXW); - while (_mm_movemask_ps(_mm_cmple_ps(_mm_loadu_ps(zbufferLine + x), mposXW)) == 0 && x < xendsse) - { - mposXW = _mm_add_ps(mposXW, mstepXW); - x += 4; - } - posXW = _mm_cvtss_f32(mposXW); - - while (zbufferLine[x] > posXW && x < xend) - { - posXW += stepXW; - x++; - } - } - else if (stencilTest) - { - while (stencilLine[x] != stencilTestValue && x < xend) - { - posXW += stepXW; - x++; - } + posXW += stepXW; + x++; } } #else while (x < xend) { int xstart = x; - - if (depthTest && stencilTest) + while (zbufferLine[x] <= posXW && x < xend) { - while (zbufferLine[x] <= posXW && stencilLine[x] == stencilTestValue && x < xend) - { - if (writeDepth) - zbufferLine[x] = posXW; - posXW += stepXW; - x++; - } - } - else if (depthTest) - { - while (zbufferLine[x] <= posXW && x < xend) - { - if (writeDepth) - zbufferLine[x] = posXW; - posXW += stepXW; - x++; - } - } - else if (stencilTest) - { - while (stencilLine[x] == stencilTestValue && x < xend) - x++; - } - else - { - x = xend; + zbufferLine[x] = posXW; + posXW += stepXW; + x++; } if (x > xstart) - { - if (writeColor) - drawfunc(y, xstart, x, args); + drawfunc(y, xstart, x, args); - if (writeStencil) - { - for (int i = xstart; i < x; i++) - stencilLine[i] = stencilWriteValue; - } - - if (!depthTest && writeDepth) - { - for (int i = xstart; i < x; i++) - { - zbufferLine[i] = posXW; - posXW += stepXW; - } - } - } - - if (depthTest && stencilTest) + while (zbufferLine[x] > posXW && x < xend) { - while ((zbufferLine[x] > posXW || stencilLine[x] != stencilTestValue) && x < xend) - { - posXW += stepXW; - x++; - } - } - else if (depthTest) - { - while (zbufferLine[x] > posXW && x < xend) - { - posXW += stepXW; - x++; - } - } - else if (stencilTest) - { - while (stencilLine[x] != stencilTestValue && x < xend) - { - posXW += stepXW; - x++; - } + posXW += stepXW; + x++; } } #endif @@ -1278,516 +2208,6 @@ void DrawSpan8(int y, int x0, int x1, const TriDrawTriangleArgs *args) } } -template -void DrawRect8(const void *destOrg, int destWidth, int destHeight, int destPitch, const RectDrawArgs *args, PolyTriangleThreadData *thread) -{ - using namespace TriScreenDrawerModes; - - int x0 = clamp((int)(args->X0() + 0.5f), 0, destWidth); - int x1 = clamp((int)(args->X1() + 0.5f), 0, destWidth); - int y0 = clamp((int)(args->Y0() + 0.5f), 0, destHeight); - int y1 = clamp((int)(args->Y1() + 0.5f), 0, destHeight); - - if (x1 <= x0 || y1 <= y0) - return; - - const uint8_t *colormaps, *texPixels, *translation; - int texWidth, texHeight; - uint32_t fillcolor; - int alpha; - uint32_t light; - - texPixels = args->TexturePixels(); - translation = args->Translation(); - texWidth = args->TextureWidth(); - texHeight = args->TextureHeight(); - fillcolor = args->Color(); - alpha = args->Alpha(); - colormaps = args->BaseColormap(); - light = args->Light(); - light += light >> 7; // 255 -> 256 - light = ((256 - light) * NUMCOLORMAPS) & 0xffffff00; - - float fstepU = (args->U1() - args->U0()) / (args->X1() - args->X0()); - float fstepV = (args->V1() - args->V0()) / (args->Y1() - args->Y0()); - uint32_t startU = (int32_t)((args->U0() + (x0 + 0.5f - args->X0()) * fstepU) * 0x1000000); - uint32_t startV = (int32_t)((args->V0() + (y0 + 0.5f - args->Y0()) * fstepV) * 0x1000000); - uint32_t stepU = (int32_t)(fstepU * 0x1000000); - uint32_t stepV = (int32_t)(fstepV * 0x1000000); - - uint32_t posV = startV; - int num_cores = thread->num_cores; - int skip = thread->skipped_by_thread(y0); - posV += skip * stepV; - stepV *= num_cores; - for (int y = y0 + skip; y < y1; y += num_cores, posV += stepV) - { - uint8_t *destLine = ((uint8_t*)destOrg) + y * destPitch; - - uint32_t posU = startU; - for (int x = x0; x < x1; x++) - { - int fg = 0; - int fgalpha = 255; - - if (ModeT::SWFlags & SWSTYLEF_Fill) - { - fg = fillcolor; - } - else if (ModeT::BlendOp != STYLEOP_Fuzz) - { - uint32_t texelX = (((posU << 8) >> 16) * texWidth) >> 16; - uint32_t texelY = (((posV << 8) >> 16) * texHeight) >> 16; - fg = texPixels[texelX * texHeight + texelY]; - - if (ModeT::SWFlags & SWSTYLEF_Translated) - fg = translation[fg]; - - fgalpha = (fg != 0) ? 255 : 0; - } - - if (ModeT::BlendOp == STYLEOP_Fuzz) - { - using namespace swrenderer; - - uint32_t texelX = (((posU << 8) >> 16) * texWidth) >> 16; - uint32_t texelY = (((posV << 8) >> 16) * texHeight) >> 16; - unsigned int sampleshadeout = (texPixels[texelX * texHeight + texelY] != 0) ? 256 : 0; - - fixed_t fuzzscale = (200 << FRACBITS) / viewheight; - - int scaled_x = (x * fuzzscale) >> FRACBITS; - int fuzz_x = fuzz_random_x_offset[scaled_x % FUZZ_RANDOM_X_SIZE] + fuzzpos; - - fixed_t fuzzcount = FUZZTABLE << FRACBITS; - fixed_t fuzz = ((fuzz_x << FRACBITS) + y * fuzzscale) % fuzzcount; - unsigned int alpha = fuzzoffset[fuzz >> FRACBITS]; - - sampleshadeout = (sampleshadeout * alpha) >> 5; - - uint32_t a = 256 - sampleshadeout; - - uint32_t dest = GPalette.BaseColors[destLine[x]].d; - uint32_t r = (RPART(dest) * a) >> 8; - uint32_t g = (GPART(dest) * a) >> 8; - uint32_t b = (BPART(dest) * a) >> 8; - destLine[x] = RGB256k.All[((r >> 2) << 12) | ((g >> 2) << 6) | (b >> 2)]; - } - else - { - if ((ModeT::Flags & STYLEF_ColorIsFixed) && !(ModeT::SWFlags & SWSTYLEF_Fill)) - { - if (ModeT::Flags & STYLEF_RedIsAlpha) - fgalpha = fg; - fg = fillcolor; - } - - if (!(ModeT::Flags & STYLEF_Alpha1)) - { - fgalpha = (fgalpha * alpha) >> 8; - } - - uint8_t shadedfg = colormaps[light + fg]; - - if (ModeT::BlendSrc == STYLEALPHA_One && ModeT::BlendDest == STYLEALPHA_Zero) - { - destLine[x] = shadedfg; - } - else if (ModeT::BlendSrc == STYLEALPHA_One && ModeT::BlendDest == STYLEALPHA_One) - { - uint32_t src = GPalette.BaseColors[shadedfg]; - uint32_t dest = GPalette.BaseColors[destLine[x]]; - - if (ModeT::BlendOp == STYLEOP_Add) - { - uint32_t out_r = MIN(RPART(dest) + RPART(src), 255); - uint32_t out_g = MIN(GPART(dest) + GPART(src), 255); - uint32_t out_b = MIN(BPART(dest) + BPART(src), 255); - destLine[x] = RGB256k.All[((out_r >> 2) << 12) | ((out_g >> 2) << 6) | (out_b >> 2)]; - } - else if (ModeT::BlendOp == STYLEOP_RevSub) - { - uint32_t out_r = MAX(RPART(dest) - RPART(src), 0); - uint32_t out_g = MAX(GPART(dest) - GPART(src), 0); - uint32_t out_b = MAX(BPART(dest) - BPART(src), 0); - destLine[x] = RGB256k.All[((out_r >> 2) << 12) | ((out_g >> 2) << 6) | (out_b >> 2)]; - } - else //if (ModeT::BlendOp == STYLEOP_Sub) - { - uint32_t out_r = MAX(RPART(src) - RPART(dest), 0); - uint32_t out_g = MAX(GPART(src) - GPART(dest), 0); - uint32_t out_b = MAX(BPART(src) - BPART(dest), 0); - destLine[x] = RGB256k.All[((out_r >> 2) << 12) | ((out_g >> 2) << 6) | (out_b >> 2)]; - } - } - else if (ModeT::SWFlags & SWSTYLEF_SrcColorOneMinusSrcColor) - { - uint32_t src = GPalette.BaseColors[shadedfg]; - uint32_t dest = GPalette.BaseColors[destLine[x]]; - - uint32_t sfactor_r = RPART(src); sfactor_r += sfactor_r >> 7; // 255 -> 256 - uint32_t sfactor_g = GPART(src); sfactor_g += sfactor_g >> 7; // 255 -> 256 - uint32_t sfactor_b = BPART(src); sfactor_b += sfactor_b >> 7; // 255 -> 256 - uint32_t sfactor_a = fgalpha; sfactor_a += sfactor_a >> 7; // 255 -> 256 - uint32_t dfactor_r = 256 - sfactor_r; - uint32_t dfactor_g = 256 - sfactor_g; - uint32_t dfactor_b = 256 - sfactor_b; - uint32_t out_r = (RPART(dest) * dfactor_r + RPART(src) * sfactor_r + 128) >> 8; - uint32_t out_g = (GPART(dest) * dfactor_g + GPART(src) * sfactor_g + 128) >> 8; - uint32_t out_b = (BPART(dest) * dfactor_b + BPART(src) * sfactor_b + 128) >> 8; - - destLine[x] = RGB256k.All[((out_r >> 2) << 12) | ((out_g >> 2) << 6) | (out_b >> 2)]; - } - else if (ModeT::BlendSrc == STYLEALPHA_Src && ModeT::BlendDest == STYLEALPHA_InvSrc && fgalpha == 255) - { - destLine[x] = shadedfg; - } - else if (ModeT::BlendSrc != STYLEALPHA_Src || ModeT::BlendDest != STYLEALPHA_InvSrc || fgalpha != 0) - { - uint32_t src = GPalette.BaseColors[shadedfg]; - uint32_t dest = GPalette.BaseColors[destLine[x]]; - - uint32_t sfactor = fgalpha; sfactor += sfactor >> 7; // 255 -> 256 - uint32_t dfactor = 256 - sfactor; - uint32_t src_r = RPART(src) * sfactor; - uint32_t src_g = GPART(src) * sfactor; - uint32_t src_b = BPART(src) * sfactor; - uint32_t dest_r = RPART(dest); - uint32_t dest_g = GPART(dest); - uint32_t dest_b = BPART(dest); - if (ModeT::BlendDest == STYLEALPHA_One) - { - dest_r <<= 8; - dest_g <<= 8; - dest_b <<= 8; - } - else - { - uint32_t dfactor = 256 - sfactor; - dest_r *= dfactor; - dest_g *= dfactor; - dest_b *= dfactor; - } - - uint32_t out_r, out_g, out_b; - if (ModeT::BlendOp == STYLEOP_Add) - { - if (ModeT::BlendDest == STYLEALPHA_One) - { - out_r = MIN((dest_r + src_r + 128) >> 8, 255); - out_g = MIN((dest_g + src_g + 128) >> 8, 255); - out_b = MIN((dest_b + src_b + 128) >> 8, 255); - } - else - { - out_r = (dest_r + src_r + 128) >> 8; - out_g = (dest_g + src_g + 128) >> 8; - out_b = (dest_b + src_b + 128) >> 8; - } - } - else if (ModeT::BlendOp == STYLEOP_RevSub) - { - out_r = MAX(static_cast(dest_r - src_r + 128) >> 8, 0); - out_g = MAX(static_cast(dest_g - src_g + 128) >> 8, 0); - out_b = MAX(static_cast(dest_b - src_b + 128) >> 8, 0); - } - else //if (ModeT::BlendOp == STYLEOP_Sub) - { - out_r = MAX(static_cast(src_r - dest_r + 128) >> 8, 0); - out_g = MAX(static_cast(src_g - dest_g + 128) >> 8, 0); - out_b = MAX(static_cast(src_b - dest_b + 128) >> 8, 0); - } - - destLine[x] = RGB256k.All[((out_r >> 2) << 12) | ((out_g >> 2) << 6) | (out_b >> 2)]; - } - } - - posU += stepU; - } - } -} - -template -void DrawRectOpt32(const void *destOrg, int destWidth, int destHeight, int destPitch, const RectDrawArgs *args, PolyTriangleThreadData *thread) -{ - using namespace TriScreenDrawerModes; - - int x0 = clamp((int)(args->X0() + 0.5f), 0, destWidth); - int x1 = clamp((int)(args->X1() + 0.5f), 0, destWidth); - int y0 = clamp((int)(args->Y0() + 0.5f), 0, destHeight); - int y1 = clamp((int)(args->Y1() + 0.5f), 0, destHeight); - - if (x1 <= x0 || y1 <= y0) - return; - - const uint32_t *texPixels, *translation; - int texWidth, texHeight; - uint32_t fillcolor; - int alpha; - uint32_t light; - uint32_t shade_fade_r, shade_fade_g, shade_fade_b, shade_light_r, shade_light_g, shade_light_b, desaturate, inv_desaturate; - - texPixels = (const uint32_t*)args->TexturePixels(); - translation = (const uint32_t*)args->Translation(); - texWidth = args->TextureWidth(); - texHeight = args->TextureHeight(); - fillcolor = args->Color(); - alpha = args->Alpha(); - light = args->Light(); - light += light >> 7; // 255 -> 256 - - if (OptT::Flags & SWOPT_ColoredFog) - { - shade_fade_r = args->ShadeFadeRed(); - shade_fade_g = args->ShadeFadeGreen(); - shade_fade_b = args->ShadeFadeBlue(); - shade_light_r = args->ShadeLightRed(); - shade_light_g = args->ShadeLightGreen(); - shade_light_b = args->ShadeLightBlue(); - desaturate = args->ShadeDesaturate(); - inv_desaturate = 256 - desaturate; - } - - float fstepU = (args->U1() - args->U0()) / (args->X1() - args->X0()); - float fstepV = (args->V1() - args->V0()) / (args->Y1() - args->Y0()); - uint32_t startU = (int32_t)((args->U0() + (x0 + 0.5f - args->X0()) * fstepU) * 0x1000000); - uint32_t startV = (int32_t)((args->V0() + (y0 + 0.5f - args->Y0()) * fstepV) * 0x1000000); - uint32_t stepU = (int32_t)(fstepU * 0x1000000); - uint32_t stepV = (int32_t)(fstepV * 0x1000000); - - uint32_t posV = startV; - int num_cores = thread->num_cores; - int skip = thread->skipped_by_thread(y0); - posV += skip * stepV; - stepV *= num_cores; - for (int y = y0 + skip; y < y1; y += num_cores, posV += stepV) - { - uint32_t *destLine = ((uint32_t*)destOrg) + y * destPitch; - - uint32_t posU = startU; - for (int x = x0; x < x1; x++) - { - uint32_t fg = 0; - - if (ModeT::SWFlags & SWSTYLEF_Fill) - { - fg = fillcolor; - } - else if (ModeT::SWFlags & SWSTYLEF_FogBoundary) - { - fg = destLine[x]; - } - else if (ModeT::BlendOp != STYLEOP_Fuzz) - { - uint32_t texelX = (((posU << 8) >> 16) * texWidth) >> 16; - uint32_t texelY = (((posV << 8) >> 16) * texHeight) >> 16; - - if (ModeT::SWFlags & SWSTYLEF_Translated) - { - fg = translation[((const uint8_t*)texPixels)[texelX * texHeight + texelY]]; - } - else if (ModeT::Flags & STYLEF_RedIsAlpha) - { - fg = ((const uint8_t*)texPixels)[texelX * texHeight + texelY]; - } - else - { - fg = texPixels[texelX * texHeight + texelY]; - } - } - - if (ModeT::BlendOp == STYLEOP_Fuzz) - { - using namespace swrenderer; - - uint32_t texelX = (((posU << 8) >> 16) * texWidth) >> 16; - uint32_t texelY = (((posV << 8) >> 16) * texHeight) >> 16; - unsigned int sampleshadeout = APART(texPixels[texelX * texHeight + texelY]); - sampleshadeout += sampleshadeout >> 7; // 255 -> 256 - - fixed_t fuzzscale = (200 << FRACBITS) / viewheight; - - int scaled_x = (x * fuzzscale) >> FRACBITS; - int fuzz_x = fuzz_random_x_offset[scaled_x % FUZZ_RANDOM_X_SIZE] + fuzzpos; - - fixed_t fuzzcount = FUZZTABLE << FRACBITS; - fixed_t fuzz = ((fuzz_x << FRACBITS) + y * fuzzscale) % fuzzcount; - unsigned int alpha = fuzzoffset[fuzz >> FRACBITS]; - - sampleshadeout = (sampleshadeout * alpha) >> 5; - - uint32_t a = 256 - sampleshadeout; - - uint32_t dest = destLine[x]; - uint32_t out_r = (RPART(dest) * a) >> 8; - uint32_t out_g = (GPART(dest) * a) >> 8; - uint32_t out_b = (BPART(dest) * a) >> 8; - destLine[x] = MAKEARGB(255, out_r, out_g, out_b); - } - else - { - if ((ModeT::Flags & STYLEF_ColorIsFixed) && !(ModeT::SWFlags & SWSTYLEF_Fill)) - { - if (ModeT::Flags & STYLEF_RedIsAlpha) - fg = (fg << 24) | (fillcolor & 0x00ffffff); - else - fg = (fg & 0xff000000) | (fillcolor & 0x00ffffff); - } - - uint32_t fgalpha = fg >> 24; - - if (!(ModeT::Flags & STYLEF_Alpha1)) - { - fgalpha = (fgalpha * alpha) >> 8; - } - - int lightshade = light; - - uint32_t lit_r = 0, lit_g = 0, lit_b = 0; - - uint32_t shadedfg_r, shadedfg_g, shadedfg_b; - if (OptT::Flags & SWOPT_ColoredFog) - { - uint32_t fg_r = RPART(fg); - uint32_t fg_g = GPART(fg); - uint32_t fg_b = BPART(fg); - uint32_t intensity = ((fg_r * 77 + fg_g * 143 + fg_b * 37) >> 8) * desaturate; - shadedfg_r = (((shade_fade_r + ((fg_r * inv_desaturate + intensity) >> 8) * lightshade) >> 8) * shade_light_r) >> 8; - shadedfg_g = (((shade_fade_g + ((fg_g * inv_desaturate + intensity) >> 8) * lightshade) >> 8) * shade_light_g) >> 8; - shadedfg_b = (((shade_fade_b + ((fg_b * inv_desaturate + intensity) >> 8) * lightshade) >> 8) * shade_light_b) >> 8; - } - else - { - shadedfg_r = (RPART(fg) * lightshade) >> 8; - shadedfg_g = (GPART(fg) * lightshade) >> 8; - shadedfg_b = (BPART(fg) * lightshade) >> 8; - } - - if (ModeT::BlendSrc == STYLEALPHA_One && ModeT::BlendDest == STYLEALPHA_Zero) - { - destLine[x] = MAKEARGB(255, shadedfg_r, shadedfg_g, shadedfg_b); - } - else if (ModeT::BlendSrc == STYLEALPHA_One && ModeT::BlendDest == STYLEALPHA_One) - { - uint32_t dest = destLine[x]; - - if (ModeT::BlendOp == STYLEOP_Add) - { - uint32_t out_r = MIN(RPART(dest) + shadedfg_r, 255); - uint32_t out_g = MIN(GPART(dest) + shadedfg_g, 255); - uint32_t out_b = MIN(BPART(dest) + shadedfg_b, 255); - destLine[x] = MAKEARGB(255, out_r, out_g, out_b); - } - else if (ModeT::BlendOp == STYLEOP_RevSub) - { - uint32_t out_r = MAX(RPART(dest) - shadedfg_r, 0); - uint32_t out_g = MAX(GPART(dest) - shadedfg_g, 0); - uint32_t out_b = MAX(BPART(dest) - shadedfg_b, 0); - destLine[x] = MAKEARGB(255, out_r, out_g, out_b); - } - else //if (ModeT::BlendOp == STYLEOP_Sub) - { - uint32_t out_r = MAX(shadedfg_r - RPART(dest), 0); - uint32_t out_g = MAX(shadedfg_g - GPART(dest), 0); - uint32_t out_b = MAX(shadedfg_b - BPART(dest), 0); - destLine[x] = MAKEARGB(255, out_r, out_g, out_b); - } - } - else if (ModeT::SWFlags & SWSTYLEF_SrcColorOneMinusSrcColor) - { - uint32_t dest = destLine[x]; - - uint32_t sfactor_r = shadedfg_r; sfactor_r += sfactor_r >> 7; // 255 -> 256 - uint32_t sfactor_g = shadedfg_g; sfactor_g += sfactor_g >> 7; // 255 -> 256 - uint32_t sfactor_b = shadedfg_b; sfactor_b += sfactor_b >> 7; // 255 -> 256 - uint32_t sfactor_a = fgalpha; sfactor_a += sfactor_a >> 7; // 255 -> 256 - uint32_t dfactor_r = 256 - sfactor_r; - uint32_t dfactor_g = 256 - sfactor_g; - uint32_t dfactor_b = 256 - sfactor_b; - uint32_t out_r = (RPART(dest) * dfactor_r + shadedfg_r * sfactor_r + 128) >> 8; - uint32_t out_g = (GPART(dest) * dfactor_g + shadedfg_g * sfactor_g + 128) >> 8; - uint32_t out_b = (BPART(dest) * dfactor_b + shadedfg_b * sfactor_b + 128) >> 8; - - destLine[x] = MAKEARGB(255, out_r, out_g, out_b); - } - else if (ModeT::BlendSrc == STYLEALPHA_Src && ModeT::BlendDest == STYLEALPHA_InvSrc && fgalpha == 255) - { - destLine[x] = MAKEARGB(255, shadedfg_r, shadedfg_g, shadedfg_b); - } - else if (ModeT::BlendSrc != STYLEALPHA_Src || ModeT::BlendDest != STYLEALPHA_InvSrc || fgalpha != 0) - { - uint32_t dest = destLine[x]; - - uint32_t sfactor = fgalpha; sfactor += sfactor >> 7; // 255 -> 256 - uint32_t src_r = shadedfg_r * sfactor; - uint32_t src_g = shadedfg_g * sfactor; - uint32_t src_b = shadedfg_b * sfactor; - uint32_t dest_r = RPART(dest); - uint32_t dest_g = GPART(dest); - uint32_t dest_b = BPART(dest); - if (ModeT::BlendDest == STYLEALPHA_One) - { - dest_r <<= 8; - dest_g <<= 8; - dest_b <<= 8; - } - else - { - uint32_t dfactor = 256 - sfactor; - dest_r *= dfactor; - dest_g *= dfactor; - dest_b *= dfactor; - } - - uint32_t out_r, out_g, out_b; - if (ModeT::BlendOp == STYLEOP_Add) - { - if (ModeT::BlendDest == STYLEALPHA_One) - { - out_r = MIN((dest_r + src_r + 128) >> 8, 255); - out_g = MIN((dest_g + src_g + 128) >> 8, 255); - out_b = MIN((dest_b + src_b + 128) >> 8, 255); - } - else - { - out_r = (dest_r + src_r + 128) >> 8; - out_g = (dest_g + src_g + 128) >> 8; - out_b = (dest_b + src_b + 128) >> 8; - } - } - else if (ModeT::BlendOp == STYLEOP_RevSub) - { - out_r = MAX(static_cast(dest_r - src_r + 128) >> 8, 0); - out_g = MAX(static_cast(dest_g - src_g + 128) >> 8, 0); - out_b = MAX(static_cast(dest_b - src_b + 128) >> 8, 0); - } - else //if (ModeT::BlendOp == STYLEOP_Sub) - { - out_r = MAX(static_cast(src_r - dest_r + 128) >> 8, 0); - out_g = MAX(static_cast(src_g - dest_g + 128) >> 8, 0); - out_b = MAX(static_cast(src_b - dest_b + 128) >> 8, 0); - } - - destLine[x] = MAKEARGB(255, out_r, out_g, out_b); - } - } - - posU += stepU; - } - } -} - -template -void DrawRect32(const void *destOrg, int destWidth, int destHeight, int destPitch, const RectDrawArgs *args, PolyTriangleThreadData *thread) -{ - using namespace TriScreenDrawerModes; - - if (args->SimpleShade()) - DrawRectOpt32(destOrg, destWidth, destHeight, destPitch, args, thread); - else - DrawRectOpt32(destOrg, destWidth, destHeight, destPitch, args, thread); -} - void(*ScreenTriangle::SpanDrawers8[])(int, int, int, const TriDrawTriangleArgs *) = { &DrawSpan8, @@ -1854,66 +2274,58 @@ void(*ScreenTriangle::SpanDrawers32[])(int, int, int, const TriDrawTriangleArgs void(*ScreenTriangle::RectDrawers8[])(const void *, int, int, int, const RectDrawArgs *, PolyTriangleThreadData *) = { - &DrawRect8, - &DrawRect8, - &DrawRect8, - &DrawRect8, - &DrawRect8, - &DrawRect8, - &DrawRect8, - &DrawRect8, - &DrawRect8, - &DrawRect8, - &DrawRect8, - &DrawRect8, - &DrawRect8, - &DrawRect8, - &DrawRect8, - &DrawRect8, - &DrawRect8, - &DrawRect8, - &DrawRect8, - &DrawRect8, - &DrawRect8, - &DrawRect8, - &DrawRect8, - &DrawRect8, - &DrawRect8, - &DrawRect8, - &DrawRect8, - &DrawRect8 + &RectScreenDrawer8::Execute, // TextureOpaque + &RectScreenDrawer8::Execute, // TextureMasked + &RectScreenDrawer8::Execute, // TextureAdd + &RectScreenDrawer8::Execute, // TextureSub + &RectScreenDrawer8::Execute, // TextureRevSub + &RectScreenDrawer8::Execute, // TextureAddSrcColor + &RectScreenDrawer8::Execute, // TranslatedOpaque + &RectScreenDrawer8::Execute, // TranslatedMasked + &RectScreenDrawer8::Execute, // TranslatedAdd + &RectScreenDrawer8::Execute, // TranslatedSub + &RectScreenDrawer8::Execute, // TranslatedRevSub + &RectScreenDrawer8::Execute, // TranslatedAddSrcColor + &RectScreenDrawer8::Execute, // Shaded + &RectScreenDrawer8::Execute, // AddShaded + &RectScreenDrawer8::Execute, // Stencil + &RectScreenDrawer8::Execute, // AddStencil + &RectScreenDrawer8::Execute, // FillOpaque + &RectScreenDrawer8::Execute, // FillAdd + &RectScreenDrawer8::Execute, // FillSub + &RectScreenDrawer8::Execute, // FillRevSub + &RectScreenDrawer8::Execute, // FillAddSrcColor + &RectScreenDrawer8::Execute, // Skycap + &RectScreenDrawer8::Execute, // Fuzz + &RectScreenDrawer8::Execute // FogBoundary }; void(*ScreenTriangle::RectDrawers32[])(const void *, int, int, int, const RectDrawArgs *, PolyTriangleThreadData *) = { - &DrawRect32, - &DrawRect32, - &DrawRect32, - &DrawRect32, - &DrawRect32, - &DrawRect32, - &DrawRect32, - &DrawRect32, - &DrawRect32, - &DrawRect32, - &DrawRect32, - &DrawRect32, - &DrawRect32, - &DrawRect32, - &DrawRect32, - &DrawRect32, - &DrawRect32, - &DrawRect32, - &DrawRect32, - &DrawRect32, - &DrawRect32, - &DrawRect32, - &DrawRect32, - &DrawRect32, - &DrawRect32, - &DrawRect32, - &DrawRect32, - &DrawRect32 + &RectScreenDrawer32::Execute, // TextureOpaque + &RectScreenDrawer32::Execute, // TextureMasked + &RectScreenDrawer32::Execute, // TextureAdd + &RectScreenDrawer32::Execute, // TextureSub + &RectScreenDrawer32::Execute, // TextureRevSub + &RectScreenDrawer32::Execute, // TextureAddSrcColor + &RectScreenDrawer32::Execute, // TranslatedOpaque + &RectScreenDrawer32::Execute, // TranslatedMasked + &RectScreenDrawer32::Execute, // TranslatedAdd + &RectScreenDrawer32::Execute, // TranslatedSub + &RectScreenDrawer32::Execute, // TranslatedRevSub + &RectScreenDrawer32::Execute, // TranslatedAddSrcColor + &RectScreenDrawer32::Execute, // Shaded + &RectScreenDrawer32::Execute, // AddShaded + &RectScreenDrawer32::Execute, // Stencil + &RectScreenDrawer32::Execute, // AddStencil + &RectScreenDrawer32::Execute, // FillOpaque + &RectScreenDrawer32::Execute, // FillAdd + &RectScreenDrawer32::Execute, // FillSub + &RectScreenDrawer32::Execute, // FillRevSub + &RectScreenDrawer32::Execute, // FillAddSrcColor + &RectScreenDrawer32::Execute, // Skycap + &RectScreenDrawer32::Execute, // Fuzz + &RectScreenDrawer32::Execute, // FogBoundary }; int ScreenTriangle::FuzzStart = 0; diff --git a/src/polyrenderer/drawers/screen_triangle.h b/src/polyrenderer/drawers/screen_triangle.h index a52d897c2..036e4a55b 100644 --- a/src/polyrenderer/drawers/screen_triangle.h +++ b/src/polyrenderer/drawers/screen_triangle.h @@ -52,7 +52,9 @@ struct TriDrawTriangleArgs ShadedTriVertex *v3; int32_t clipright; int32_t clipbottom; - uint8_t *stencilbuffer; + uint8_t *stencilValues; + uint32_t *stencilMasks; + int32_t stencilPitch; float *zbuffer; const PolyDrawArgs *uniforms; bool destBgra; @@ -136,10 +138,39 @@ enum class TriBlendMode AddShadedTranslated }; +enum class RectBlendMode +{ + TextureOpaque, + TextureMasked, + TextureAdd, + TextureSub, + TextureRevSub, + TextureAddSrcColor, + TranslatedOpaque, + TranslatedMasked, + TranslatedAdd, + TranslatedSub, + TranslatedRevSub, + TranslatedAddSrcColor, + Shaded, + AddShaded, + Stencil, + AddStencil, + FillOpaque, + FillAdd, + FillSub, + FillRevSub, + FillAddSrcColor, + Skycap, + Fuzz, + FogBoundary +}; + class ScreenTriangle { public: static void Draw(const TriDrawTriangleArgs *args, PolyTriangleThreadData *thread); + static void DrawSWRender(const TriDrawTriangleArgs *args, PolyTriangleThreadData *thread); static void(*SpanDrawers8[])(int y, int x0, int x1, const TriDrawTriangleArgs *args); static void(*SpanDrawers32[])(int y, int x0, int x1, const TriDrawTriangleArgs *args); @@ -191,6 +222,35 @@ namespace TriScreenDrawerModes struct StyleAddStencilTranslated { static const int BlendOp = STYLEOP_Add, BlendSrc = STYLEALPHA_Src, BlendDest = STYLEALPHA_One, Flags = STYLEF_ColorIsFixed, SWFlags = SWSTYLEF_Translated; }; struct StyleAddShadedTranslated { static const int BlendOp = STYLEOP_Add, BlendSrc = STYLEALPHA_Src, BlendDest = STYLEALPHA_One, Flags = STYLEF_RedIsAlpha | STYLEF_ColorIsFixed, SWFlags = SWSTYLEF_Translated; }; + enum class BlendModes { Opaque, Masked, AddClamp, SubClamp, RevSubClamp, AddSrcColorOneMinusSrcColor, Shaded, AddClampShaded }; + struct OpaqueBlend { static const int Mode = (int)BlendModes::Opaque; }; + struct MaskedBlend { static const int Mode = (int)BlendModes::Masked; }; + struct AddClampBlend { static const int Mode = (int)BlendModes::AddClamp; }; + struct SubClampBlend { static const int Mode = (int)BlendModes::SubClamp; }; + struct RevSubClampBlend { static const int Mode = (int)BlendModes::RevSubClamp; }; + struct AddSrcColorBlend { static const int Mode = (int)BlendModes::AddSrcColorOneMinusSrcColor; }; + struct ShadedBlend { static const int Mode = (int)BlendModes::Shaded; }; + struct AddClampShadedBlend { static const int Mode = (int)BlendModes::AddClampShaded; }; + + enum class FilterModes { Nearest, Linear }; + struct NearestFilter { static const int Mode = (int)FilterModes::Nearest; }; + struct LinearFilter { static const int Mode = (int)FilterModes::Linear; }; + + enum class ShadeMode { None, Simple, Advanced }; + struct NoShade { static const int Mode = (int)ShadeMode::None; }; + struct SimpleShade { static const int Mode = (int)ShadeMode::Simple; }; + struct AdvancedShade { static const int Mode = (int)ShadeMode::Advanced; }; + + enum class Samplers { Texture, Fill, Shaded, Stencil, Translated, Skycap, Fuzz, FogBoundary }; + struct TextureSampler { static const int Mode = (int)Samplers::Texture; }; + struct FillSampler { static const int Mode = (int)Samplers::Fill; }; + struct ShadedSampler { static const int Mode = (int)Samplers::Shaded; }; + struct StencilSampler { static const int Mode = (int)Samplers::Stencil; }; + struct TranslatedSampler { static const int Mode = (int)Samplers::Translated; }; + struct SkycapSampler { static const int Mode = (int)Samplers::Skycap; }; + struct FuzzSampler { static const int Mode = (int)Samplers::Fuzz; }; + struct FogBoundarySampler { static const int Mode = (int)Samplers::FogBoundary; }; + enum SWOptFlags { SWOPT_DynLights = 1, diff --git a/src/polyrenderer/poly_renderer.cpp b/src/polyrenderer/poly_renderer.cpp index 76d8a937e..6fee5d841 100644 --- a/src/polyrenderer/poly_renderer.cpp +++ b/src/polyrenderer/poly_renderer.cpp @@ -201,11 +201,11 @@ void PolyRenderer::SetSceneViewport() height = (screenblocks*SCREENHEIGHT / 10) & ~7; int bottom = SCREENHEIGHT - (height + viewwindowy - ((height - viewheight) / 2)); - PolyTriangleDrawer::SetViewport(Threads.MainThread()->DrawQueue, viewwindowx, SCREENHEIGHT - bottom - height, viewwidth, height, RenderTarget); + PolyTriangleDrawer::SetViewport(Threads.MainThread()->DrawQueue, viewwindowx, SCREENHEIGHT - bottom - height, viewwidth, height, RenderTarget, false); } else // Rendering to camera texture { - PolyTriangleDrawer::SetViewport(Threads.MainThread()->DrawQueue, 0, 0, RenderTarget->GetWidth(), RenderTarget->GetHeight(), RenderTarget); + PolyTriangleDrawer::SetViewport(Threads.MainThread()->DrawQueue, 0, 0, RenderTarget->GetWidth(), RenderTarget->GetHeight(), RenderTarget, false); } } diff --git a/src/polyrenderer/scene/poly_model.cpp b/src/polyrenderer/scene/poly_model.cpp index 1a5aa842f..344749659 100644 --- a/src/polyrenderer/scene/poly_model.cpp +++ b/src/polyrenderer/scene/poly_model.cpp @@ -51,7 +51,7 @@ PolyModelRenderer::PolyModelRenderer(PolyRenderThread *thread, const Mat4f &worl { } -void PolyModelRenderer::BeginDrawModel(AActor *actor, FSpriteModelFrame *smf, const VSMatrix &objectToWorldMatrix, bool mirrored) +void PolyModelRenderer::BeginDrawModel(AActor *actor, FSpriteModelFrame *smf, const VSMatrix &objectToWorldMatrix) { ModelActor = actor; const_cast(objectToWorldMatrix).copy(ObjectToWorld.Matrix); @@ -59,14 +59,12 @@ void PolyModelRenderer::BeginDrawModel(AActor *actor, FSpriteModelFrame *smf, co if (actor->RenderStyle == LegacyRenderStyles[STYLE_Normal] || !!(smf->flags & MDL_DONTCULLBACKFACES)) PolyTriangleDrawer::SetTwoSided(Thread->DrawQueue, true); - PolyTriangleDrawer::SetCullCCW(Thread->DrawQueue, !mirrored); } void PolyModelRenderer::EndDrawModel(AActor *actor, FSpriteModelFrame *smf) { if (actor->RenderStyle == LegacyRenderStyles[STYLE_Normal] || !!(smf->flags & MDL_DONTCULLBACKFACES)) PolyTriangleDrawer::SetTwoSided(Thread->DrawQueue, false); - PolyTriangleDrawer::SetCullCCW(Thread->DrawQueue, true); ModelActor = nullptr; } @@ -100,7 +98,7 @@ VSMatrix PolyModelRenderer::GetViewToWorldMatrix() return objectToWorld; } -void PolyModelRenderer::BeginDrawHUDModel(AActor *actor, const VSMatrix &objectToWorldMatrix, bool mirrored) +void PolyModelRenderer::BeginDrawHUDModel(AActor *actor, const VSMatrix &objectToWorldMatrix) { ModelActor = actor; const_cast(objectToWorldMatrix).copy(ObjectToWorld.Matrix); @@ -109,7 +107,6 @@ void PolyModelRenderer::BeginDrawHUDModel(AActor *actor, const VSMatrix &objectT if (actor->RenderStyle == LegacyRenderStyles[STYLE_Normal]) PolyTriangleDrawer::SetTwoSided(Thread->DrawQueue, true); - PolyTriangleDrawer::SetCullCCW(Thread->DrawQueue, mirrored); } void PolyModelRenderer::EndDrawHUDModel(AActor *actor) @@ -119,7 +116,6 @@ void PolyModelRenderer::EndDrawHUDModel(AActor *actor) if (actor->RenderStyle == LegacyRenderStyles[STYLE_Normal]) PolyTriangleDrawer::SetTwoSided(Thread->DrawQueue, false); - PolyTriangleDrawer::SetCullCCW(Thread->DrawQueue, true); } void PolyModelRenderer::SetInterpolation(double interpolation) diff --git a/src/polyrenderer/scene/poly_model.h b/src/polyrenderer/scene/poly_model.h index 6e6ec57bc..cbfafce59 100644 --- a/src/polyrenderer/scene/poly_model.h +++ b/src/polyrenderer/scene/poly_model.h @@ -36,13 +36,13 @@ public: ModelRendererType GetType() const override { return PolyModelRendererType; } - void BeginDrawModel(AActor *actor, FSpriteModelFrame *smf, const VSMatrix &objectToWorldMatrix, bool mirrored) override; + void BeginDrawModel(AActor *actor, FSpriteModelFrame *smf, const VSMatrix &objectToWorldMatrix) override; void EndDrawModel(AActor *actor, FSpriteModelFrame *smf) override; IModelVertexBuffer *CreateVertexBuffer(bool needindex, bool singleframe) override; void SetVertexBuffer(IModelVertexBuffer *buffer) override; void ResetVertexBuffer() override; VSMatrix GetViewToWorldMatrix() override; - void BeginDrawHUDModel(AActor *actor, const VSMatrix &objectToWorldMatrix, bool mirrored) override; + void BeginDrawHUDModel(AActor *actor, const VSMatrix &objectToWorldMatrix) override; void EndDrawHUDModel(AActor *actor) override; void SetInterpolation(double interpolation) override; void SetMaterial(FTexture *skin, bool clampNoFilter, int translation) override; diff --git a/src/posix/cocoa/i_common.h b/src/posix/cocoa/i_common.h index f60d82ced..be395cf76 100644 --- a/src/posix/cocoa/i_common.h +++ b/src/posix/cocoa/i_common.h @@ -37,6 +37,22 @@ #import +struct RenderBufferOptions +{ + float pixelScale; + + float shiftX; + float shiftY; + + float width; + float height; + + bool dirty; +}; + +extern RenderBufferOptions rbOpts; + + // Version of AppKit framework we are interested in // The following values are needed to build with earlier SDKs diff --git a/src/posix/cocoa/i_input.mm b/src/posix/cocoa/i_input.mm index 78841d906..745b0c6dc 100644 --- a/src/posix/cocoa/i_input.mm +++ b/src/posix/cocoa/i_input.mm @@ -484,14 +484,11 @@ void NSEventToGameMousePosition(NSEvent* inEvent, event_t* outEvent) const NSPoint viewPos = [view convertPointToBacking:windowRect.origin]; const CGFloat frameHeight = I_GetContentViewSize(window).height; - outEvent->data1 = static_cast( viewPos.x); - outEvent->data2 = static_cast(frameHeight - viewPos.y); + const CGFloat posX = ( viewPos.x - rbOpts.shiftX) / rbOpts.pixelScale; + const CGFloat posY = (frameHeight - viewPos.y - rbOpts.shiftY) / rbOpts.pixelScale; - // Compensate letterbox adjustment done by cross-platform code - // More elegant solution is a bit problematic due to HiDPI/Retina support - outEvent->data2 += (screen->GetTrueHeight() - screen->VideoHeight) / 2; - - screen->ScaleCoordsFromWindow(outEvent->data1, outEvent->data2); + outEvent->data1 = static_cast(posX); + outEvent->data2 = static_cast(posY); } void ProcessMouseMoveInMenu(NSEvent* theEvent) diff --git a/src/posix/cocoa/i_video.mm b/src/posix/cocoa/i_video.mm index 8593f86e7..5104a043c 100644 --- a/src/posix/cocoa/i_video.mm +++ b/src/posix/cocoa/i_video.mm @@ -115,6 +115,9 @@ CUSTOM_CVAR(Bool, vid_autoswitch, true, CVAR_ARCHIVE | CVAR_GLOBALCONFIG | CVAR_ EXTERN_CVAR(Bool, gl_smooth_rendered) +RenderBufferOptions rbOpts; + + // --------------------------------------------------------------------------- @@ -541,6 +544,20 @@ void CocoaVideo::SetFullscreenMode(const int width, const int height) ? [screen convertRectToBacking:screenFrame] : screenFrame; + const float displayWidth = displayRect.size.width; + const float displayHeight = displayRect.size.height; + + const float pixelScaleFactorX = displayWidth / static_cast(width ); + const float pixelScaleFactorY = displayHeight / static_cast(height); + + rbOpts.pixelScale = MIN(pixelScaleFactorX, pixelScaleFactorY); + + rbOpts.width = width * rbOpts.pixelScale; + rbOpts.height = height * rbOpts.pixelScale; + + rbOpts.shiftX = (displayWidth - rbOpts.width ) / 2.0f; + rbOpts.shiftY = (displayHeight - rbOpts.height) / 2.0f; + if (!m_fullscreen) { [m_window setLevel:LEVEL_FULLSCREEN]; @@ -554,6 +571,14 @@ void CocoaVideo::SetFullscreenMode(const int width, const int height) void CocoaVideo::SetWindowedMode(const int width, const int height) { + rbOpts.pixelScale = 1.0f; + + rbOpts.width = static_cast(width ); + rbOpts.height = static_cast(height); + + rbOpts.shiftX = 0.0f; + rbOpts.shiftY = 0.0f; + const NSSize windowPixelSize = NSMakeSize(width, height); const NSSize windowSize = vid_hidpi ? [[m_window contentView] convertSizeFromBacking:windowPixelSize] @@ -596,6 +621,8 @@ void CocoaVideo::SetMode(const int width, const int height, const bool fullscree SetWindowedMode(width, height); } + rbOpts.dirty = true; + const NSSize viewSize = I_GetContentViewSize(m_window); glViewport(0, 0, static_cast(viewSize.width), static_cast(viewSize.height)); diff --git a/src/r_data/models/models.cpp b/src/r_data/models/models.cpp index 316281f65..b8b324524 100644 --- a/src/r_data/models/models.cpp +++ b/src/r_data/models/models.cpp @@ -94,7 +94,7 @@ void FModelRenderer::RenderModel(float x, float y, float z, FSpriteModelFrame *s if (smf->flags & MDL_ROTATING) { - if (smf->rotationSpeed > 0.0000000001 || smf->rotationSpeed < -0.0000000001) + if (smf->rotationSpeed > 0.0000000001) { double turns = (I_GetTime() + I_GetTimeFrac()) / (200.0 / smf->rotationSpeed); turns -= floor(turns); @@ -171,9 +171,7 @@ void FModelRenderer::RenderModel(float x, float y, float z, FSpriteModelFrame *s float stretch = (smf->modelIDs[0] != -1 ? Models[smf->modelIDs[0]]->getAspectFactor() : 1.f) / level.info->pixelstretch; objectToWorldMatrix.scale(1, stretch, 1); - float orientation = scaleFactorX * scaleFactorY * scaleFactorZ; - - BeginDrawModel(actor, smf, objectToWorldMatrix, orientation < 0); + BeginDrawModel(actor, smf, objectToWorldMatrix); RenderFrameModels(smf, actor->state, actor->tics, actor->GetClass(), translation); EndDrawModel(actor, smf); } @@ -209,9 +207,7 @@ void FModelRenderer::RenderHUDModel(DPSprite *psp, float ofsX, float ofsY) objectToWorldMatrix.rotate(smf->pitchoffset, 0, 0, 1); objectToWorldMatrix.rotate(-smf->rolloffset, 1, 0, 0); - float orientation = smf->xscale * smf->yscale * smf->zscale; - - BeginDrawHUDModel(playermo, objectToWorldMatrix, orientation < 0); + BeginDrawHUDModel(playermo, objectToWorldMatrix); RenderFrameModels(smf, psp->GetState(), psp->GetTics(), playermo->player->ReadyWeapon->GetClass(), 0); EndDrawHUDModel(playermo); } diff --git a/src/r_data/models/models.h b/src/r_data/models/models.h index bb5fb3b84..b0d0f0230 100644 --- a/src/r_data/models/models.h +++ b/src/r_data/models/models.h @@ -64,7 +64,7 @@ public: virtual ModelRendererType GetType() const = 0; - virtual void BeginDrawModel(AActor *actor, FSpriteModelFrame *smf, const VSMatrix &objectToWorldMatrix, bool mirrored) = 0; + virtual void BeginDrawModel(AActor *actor, FSpriteModelFrame *smf, const VSMatrix &objectToWorldMatrix) = 0; virtual void EndDrawModel(AActor *actor, FSpriteModelFrame *smf) = 0; virtual IModelVertexBuffer *CreateVertexBuffer(bool needindex, bool singleframe) = 0; @@ -74,7 +74,7 @@ public: virtual VSMatrix GetViewToWorldMatrix() = 0; - virtual void BeginDrawHUDModel(AActor *actor, const VSMatrix &objectToWorldMatrix, bool mirrored) = 0; + virtual void BeginDrawHUDModel(AActor *actor, const VSMatrix &objectToWorldMatrix) = 0; virtual void EndDrawHUDModel(AActor *actor) = 0; virtual void SetInterpolation(double interpolation) = 0; diff --git a/src/resourcefiles/file_zip.cpp b/src/resourcefiles/file_zip.cpp index b3bfa7a2d..e0cf4df91 100644 --- a/src/resourcefiles/file_zip.cpp +++ b/src/resourcefiles/file_zip.cpp @@ -40,7 +40,6 @@ #include "v_text.h" #include "w_wad.h" #include "w_zip.h" -#include "i_system.h" #include "ancientzip.h" #define BUFREADCOMMENT (0x400) diff --git a/src/scripting/thingdef_data.cpp b/src/scripting/thingdef_data.cpp index 1416cbc7a..002c0a81a 100644 --- a/src/scripting/thingdef_data.cpp +++ b/src/scripting/thingdef_data.cpp @@ -316,7 +316,6 @@ static FFlagDef ActorFlagDefs[]= DEFINE_FLAG(MF7, FORCEINFIGHTING, AActor, flags7), DEFINE_FLAG(MF8, FRIGHTENING, AActor, flags8), - DEFINE_FLAG(MF8, BLOCKASPLAYER, AActor, flags8), // Effect flags DEFINE_FLAG(FX, VISIBILITYPULSE, AActor, effects), diff --git a/src/swrenderer/drawers/r_draw.cpp b/src/swrenderer/drawers/r_draw.cpp index 8e260daa7..9049c949a 100644 --- a/src/swrenderer/drawers/r_draw.cpp +++ b/src/swrenderer/drawers/r_draw.cpp @@ -272,7 +272,7 @@ namespace swrenderer void Execute(DrawerThread *thread) override { auto zbuffer = PolyZBuffer::Instance(); - int pitch = PolyStencilBuffer::Instance()->Width(); + int pitch = PolyStencilBuffer::Instance()->BlockWidth() * 8; float *values = zbuffer->Values() + y * pitch + x; int cnt = count; @@ -316,7 +316,7 @@ namespace swrenderer return; auto zbuffer = PolyZBuffer::Instance(); - int pitch = PolyStencilBuffer::Instance()->Width(); + int pitch = PolyStencilBuffer::Instance()->BlockWidth() * 8; float *values = zbuffer->Values() + y * pitch; int end = x2; diff --git a/src/swrenderer/scene/r_scene.cpp b/src/swrenderer/scene/r_scene.cpp index d51eef63a..a52d271a3 100644 --- a/src/swrenderer/scene/r_scene.cpp +++ b/src/swrenderer/scene/r_scene.cpp @@ -259,7 +259,7 @@ namespace swrenderer thread->OpaquePass->ResetFakingUnderwater(); // [RH] Hack to make windows into underwater areas possible thread->Portal->SetMainPortal(); - PolyTriangleDrawer::SetViewport(thread->DrawQueue, viewwindowx, viewwindowy, viewwidth, viewheight, thread->Viewport->RenderTarget); + PolyTriangleDrawer::SetViewport(thread->DrawQueue, viewwindowx, viewwindowy, viewwidth, viewheight, thread->Viewport->RenderTarget, true); // Cull things outside the range seen by this thread VisibleSegmentRenderer visitor; diff --git a/src/swrenderer/things/r_model.cpp b/src/swrenderer/things/r_model.cpp index d3eac67e0..025263e34 100644 --- a/src/swrenderer/things/r_model.cpp +++ b/src/swrenderer/things/r_model.cpp @@ -82,7 +82,7 @@ namespace swrenderer { } - void SWModelRenderer::BeginDrawModel(AActor *actor, FSpriteModelFrame *smf, const VSMatrix &objectToWorldMatrix, bool mirrored) + void SWModelRenderer::BeginDrawModel(AActor *actor, FSpriteModelFrame *smf, const VSMatrix &objectToWorldMatrix) { ModelActor = actor; const_cast(objectToWorldMatrix).copy(ObjectToWorld.Matrix); @@ -123,14 +123,12 @@ namespace swrenderer if (actor->RenderStyle == LegacyRenderStyles[STYLE_Normal] || !!(smf->flags & MDL_DONTCULLBACKFACES)) PolyTriangleDrawer::SetTwoSided(Thread->DrawQueue, true); - PolyTriangleDrawer::SetCullCCW(Thread->DrawQueue, !mirrored); } void SWModelRenderer::EndDrawModel(AActor *actor, FSpriteModelFrame *smf) { if (actor->RenderStyle == LegacyRenderStyles[STYLE_Normal] || !!(smf->flags & MDL_DONTCULLBACKFACES)) PolyTriangleDrawer::SetTwoSided(Thread->DrawQueue, false); - PolyTriangleDrawer::SetCullCCW(Thread->DrawQueue, true); ModelActor = nullptr; } @@ -183,7 +181,7 @@ namespace swrenderer return objectToWorld; } - void SWModelRenderer::BeginDrawHUDModel(AActor *actor, const VSMatrix &objectToWorldMatrix, bool mirrored) + void SWModelRenderer::BeginDrawHUDModel(AActor *actor, const VSMatrix &objectToWorldMatrix) { ModelActor = actor; const_cast(objectToWorldMatrix).copy(ObjectToWorld.Matrix); @@ -194,7 +192,6 @@ namespace swrenderer if (actor->RenderStyle == LegacyRenderStyles[STYLE_Normal]) PolyTriangleDrawer::SetTwoSided(Thread->DrawQueue, true); - PolyTriangleDrawer::SetCullCCW(Thread->DrawQueue, mirrored); } void SWModelRenderer::EndDrawHUDModel(AActor *actor) @@ -204,7 +201,6 @@ namespace swrenderer if (actor->RenderStyle == LegacyRenderStyles[STYLE_Normal]) PolyTriangleDrawer::SetTwoSided(Thread->DrawQueue, false); - PolyTriangleDrawer::SetCullCCW(Thread->DrawQueue, true); } void SWModelRenderer::SetInterpolation(double interpolation) diff --git a/src/swrenderer/things/r_model.h b/src/swrenderer/things/r_model.h index ee6608358..004636e69 100644 --- a/src/swrenderer/things/r_model.h +++ b/src/swrenderer/things/r_model.h @@ -56,13 +56,13 @@ namespace swrenderer ModelRendererType GetType() const override { return SWModelRendererType; } - void BeginDrawModel(AActor *actor, FSpriteModelFrame *smf, const VSMatrix &objectToWorldMatrix, bool mirrored) override; + void BeginDrawModel(AActor *actor, FSpriteModelFrame *smf, const VSMatrix &objectToWorldMatrix) override; void EndDrawModel(AActor *actor, FSpriteModelFrame *smf) override; IModelVertexBuffer *CreateVertexBuffer(bool needindex, bool singleframe) override; void SetVertexBuffer(IModelVertexBuffer *buffer) override; void ResetVertexBuffer() override; VSMatrix GetViewToWorldMatrix() override; - void BeginDrawHUDModel(AActor *actor, const VSMatrix &objectToWorldMatrix, bool mirrored) override; + void BeginDrawHUDModel(AActor *actor, const VSMatrix &objectToWorldMatrix) override; void EndDrawHUDModel(AActor *actor) override; void SetInterpolation(double interpolation) override; void SetMaterial(FTexture *skin, bool clampNoFilter, int translation) override; diff --git a/src/v_2ddrawer.cpp b/src/v_2ddrawer.cpp index bbf9105fe..ada1b5878 100644 --- a/src/v_2ddrawer.cpp +++ b/src/v_2ddrawer.cpp @@ -323,16 +323,12 @@ void F2DDrawer::AddPoly(FTexture *texture, FVector2 *points, int npoints, // Convert a light level into an unbounded colormap index (shade). // Why the +12? I wish I knew, but experimentation indicates it // is necessary in order to best reproduce Doom's original lighting. - double fadelevel; - - if (vid_rendermode != 4 || (level.lightmode >= 2 && level.lightmode != 4)) + double map = (NUMCOLORMAPS * 2.) - ((lightlevel + 12) * (NUMCOLORMAPS / 128.)); + double fadelevel = clamp((map - 12) / NUMCOLORMAPS, 0.0, 1.0); + // handle the brighter light modes of the hardware renderer. + if (vid_rendermode == 4 && (level.lightmode < 2 || level.lightmode == 4)) { - double map = (NUMCOLORMAPS * 2.) - ((lightlevel + 12) * (NUMCOLORMAPS / 128.)); - fadelevel = clamp((map - 12) / NUMCOLORMAPS, 0.0, 1.0); - } - else - { - fadelevel = 1. - clamp(lightlevel, 0, 255) / 255.f; + fadelevel = pow(fadelevel, 1.3); } RenderCommand poly; diff --git a/src/v_video.cpp b/src/v_video.cpp index 0f754d4c3..1441a1966 100644 --- a/src/v_video.cpp +++ b/src/v_video.cpp @@ -1005,7 +1005,7 @@ void DFrameBuffer::WriteSavePic(player_t *player, FileWriter *file, int width, i // //========================================================================== -void DFrameBuffer::SetViewportRects(IntRect *bounds) +void DFrameBuffer::SetOutputViewport(IntRect *bounds) { if (bounds) { diff --git a/src/v_video.h b/src/v_video.h index 66e38fca5..f88a65e65 100644 --- a/src/v_video.h +++ b/src/v_video.h @@ -501,7 +501,7 @@ public: // Calculate gamma table void CalcGamma(float gamma, uint8_t gammalookup[256]); - virtual void SetViewportRects(IntRect *bounds); + virtual void SetOutputViewport(IntRect *bounds); int ScreenToWindowX(int x); int ScreenToWindowY(int y); diff --git a/wadsrc/static/zscript/doom/weaponfist.txt b/wadsrc/static/zscript/doom/weaponfist.txt index ae0eedddc..6f106e14a 100644 --- a/wadsrc/static/zscript/doom/weaponfist.txt +++ b/wadsrc/static/zscript/doom/weaponfist.txt @@ -65,7 +65,7 @@ extend class Actor damage *= 10; double ang = angle + Random2[Punch]() * (5.625 / 256); - double pitch = AimLineAttack (ang, DEFMELEERANGE, null, 0., ALF_CHECK3D); + double pitch = AimLineAttack (ang, DEFMELEERANGE); LineAttack (ang, DEFMELEERANGE, pitch, damage, 'Melee', "BulletPuff", LAF_ISMELEEATTACK, t); diff --git a/wadsrc/static/zscript/hexen/clericmace.txt b/wadsrc/static/zscript/hexen/clericmace.txt index adce6431a..2198e3cc4 100644 --- a/wadsrc/static/zscript/hexen/clericmace.txt +++ b/wadsrc/static/zscript/hexen/clericmace.txt @@ -66,7 +66,7 @@ class CWeapMace : ClericWeapon for (int j = 1; j >= -1; j -= 2) { double ang = angle + j*i*(45. / 16); - double slope = AimLineAttack(ang, 2 * DEFMELEERANGE, t, 0., ALF_CHECK3D); + double slope = AimLineAttack(ang, 2 * DEFMELEERANGE, t); if (t.linetarget) { LineAttack(ang, 2 * DEFMELEERANGE, slope, damage, 'Melee', "HammerPuff", true, t); @@ -81,7 +81,7 @@ class CWeapMace : ClericWeapon // didn't find any creatures, so try to strike any walls weaponspecial = 0; - double slope = AimLineAttack (angle, DEFMELEERANGE, null, 0., ALF_CHECK3D); + double slope = AimLineAttack (angle, DEFMELEERANGE); LineAttack (angle, DEFMELEERANGE, slope, damage, 'Melee', "HammerPuff"); } } diff --git a/wadsrc/static/zscript/hexen/fighteraxe.txt b/wadsrc/static/zscript/hexen/fighteraxe.txt index a89bd7e5b..61e509744 100644 --- a/wadsrc/static/zscript/hexen/fighteraxe.txt +++ b/wadsrc/static/zscript/hexen/fighteraxe.txt @@ -245,7 +245,7 @@ class FWeapAxe : FighterWeapon for (int j = 1; j >= -1; j -= 2) { double ang = angle + j*i*(45. / 16); - double slope = AimLineAttack(ang, AXERANGE, t, 0., ALF_CHECK3D); + double slope = AimLineAttack(ang, AXERANGE, t); if (t.linetarget) { LineAttack(ang, AXERANGE, slope, damage, 'Melee', pufftype, true, t); @@ -273,7 +273,7 @@ class FWeapAxe : FighterWeapon // didn't find any creatures, so try to strike any walls self.weaponspecial = 0; - double slope = AimLineAttack (angle, DEFMELEERANGE, null, 0., ALF_CHECK3D); + double slope = AimLineAttack (angle, DEFMELEERANGE); LineAttack (angle, DEFMELEERANGE, slope, damage, 'Melee', pufftype, true); } } diff --git a/wadsrc/static/zscript/hexen/fighterfist.txt b/wadsrc/static/zscript/hexen/fighterfist.txt index dd5f02095..4975bbd62 100644 --- a/wadsrc/static/zscript/hexen/fighterfist.txt +++ b/wadsrc/static/zscript/hexen/fighterfist.txt @@ -56,7 +56,7 @@ class FWeapFist : FighterWeapon Class pufftype; FTranslatedLineTarget t; - double slope = AimLineAttack (angle, 2*DEFMELEERANGE, t, 0., ALF_CHECK3D); + double slope = AimLineAttack (angle, 2*DEFMELEERANGE, t); if (t.linetarget != null) { if (++weaponspecial >= 3) @@ -117,7 +117,7 @@ class FWeapFist : FighterWeapon // didn't find any creatures, so try to strike any walls weaponspecial = 0; - double slope = AimLineAttack (angle, DEFMELEERANGE, null, 0., ALF_CHECK3D); + double slope = AimLineAttack (angle, DEFMELEERANGE); LineAttack (angle, DEFMELEERANGE, slope, damage, 'Melee', "PunchPuff", true); }