diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 59dd637a2..4645cfcd5 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -310,6 +310,7 @@ if( ZD_CMAKE_COMPILER_IS_GNUCXX_COMPATIBLE ) set( CMAKE_CXX_FLAGS "${SANITIZER_FLAG} ${CMAKE_CXX_FLAGS}" ) set( CMAKE_C_FLAGS "${SANITIZER_FLAG} ${CMAKE_C_FLAGS}" ) + set( CMAKE_EXE_LINKER_FLAGS "${SANITIZER_FLAG} ${CMAKE_EXE_LINKER_FLAGS}" ) if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU" AND CMAKE_CXX_COMPILER_VERSION VERSION_GREATER "4.5") set( CMAKE_C_FLAGS "-Wno-unused-result ${CMAKE_C_FLAGS}" ) diff --git a/src/actor.h b/src/actor.h index 863e4fb46..2199c205e 100644 --- a/src/actor.h +++ b/src/actor.h @@ -401,6 +401,7 @@ enum ActorFlag8 { MF8_FRIGHTENING = 0x00000001, // for those moments when halloween just won't do MF8_INSCROLLSEC = 0x00000002, // actor is partially inside a scrolling sector + MF8_BLOCKASPLAYER = 0x00000004, // actor is blocked by player-blocking lines even if not a player }; // --- mobj.renderflags --- diff --git a/src/d_dehacked.cpp b/src/d_dehacked.cpp index 25b42632e..9688d3cab 100644 --- a/src/d_dehacked.cpp +++ b/src/d_dehacked.cpp @@ -64,6 +64,7 @@ #include "v_text.h" #include "backend/vmbuilder.h" #include "types.h" +#include "m_argv.h" // [SO] Just the way Randy said to do it :) // [RH] Made this CVAR_SERVERINFO @@ -813,6 +814,16 @@ void SetDehParams(FState *state, int codepointer) sfunc->ImplicitArgs = numargs; state->SetAction(sfunc); sfunc->PrintableName.Format("Dehacked.%s.%d.%d", MBFCodePointers[codepointer].name.GetChars(), value1, value2); + + if (Args->CheckParm("-dumpdisasm")) + { + FILE *dump = fopen("disasm.txt", "a"); + if (dump != nullptr) + { + DumpFunction(dump, sfunc, sfunc->PrintableName.GetChars(), (int)sfunc->PrintableName.Len()); + } + fclose(dump); + } } } @@ -2103,12 +2114,15 @@ static int PatchCodePtrs (int dummy) symname.Format("A_%s", Line2); // Let's consider as aliases some redundant MBF pointer + bool ismbfcp = false; for (unsigned int i = 0; i < MBFCodePointers.Size(); i++) { if (!symname.CompareNoCase(MBFCodePointers[i].alias)) { symname = MBFCodePointers[i].name; DPrintf(DMSG_SPAMMY, "%s --> %s\n", MBFCodePointers[i].alias, MBFCodePointers[i].name.GetChars()); + ismbfcp = true; + break; } } @@ -2119,7 +2133,7 @@ static int PatchCodePtrs (int dummy) { Printf(TEXTCOLOR_RED "Frame %d: Unknown code pointer '%s'\n", frame, Line2); } - else + else if (!ismbfcp) // MBF special code pointers will produce errors here because they will receive some args and won't match the conditions here. { TArray &args = sym->Variants[0].ArgFlags; unsigned numargs = sym->GetImplicitArgs(); diff --git a/src/g_game.cpp b/src/g_game.cpp index 1c5388ac1..a73758f1c 100644 --- a/src/g_game.cpp +++ b/src/g_game.cpp @@ -82,9 +82,6 @@ static FRandom pr_dmspawn ("DMSpawn"); static FRandom pr_pspawn ("PlayerSpawn"); -const int SAVEPICWIDTH = 216; -const int SAVEPICHEIGHT = 162; - bool G_CheckDemoStatus (void); void G_ReadDemoTiccmd (ticcmd_t *cmd, int player); void G_WriteDemoTiccmd (ticcmd_t *cmd, int player, int buf); diff --git a/src/g_game.h b/src/g_game.h index 2ac019059..7ac49a413 100644 --- a/src/g_game.h +++ b/src/g_game.h @@ -102,5 +102,7 @@ class AInventory; extern const AInventory *SendItemUse, *SendItemDrop; extern int SendItemDropAmount; +const int SAVEPICWIDTH = 216; +const int SAVEPICHEIGHT = 162; #endif diff --git a/src/gl/models/gl_models.cpp b/src/gl/models/gl_models.cpp index 31b7208bc..8669d7bab 100644 --- a/src/gl/models/gl_models.cpp +++ b/src/gl/models/gl_models.cpp @@ -54,7 +54,7 @@ VSMatrix FGLModelRenderer::GetViewToWorldMatrix() return objectToWorldMatrix; } -void FGLModelRenderer::BeginDrawModel(AActor *actor, FSpriteModelFrame *smf, const VSMatrix &objectToWorldMatrix) +void FGLModelRenderer::BeginDrawModel(AActor *actor, FSpriteModelFrame *smf, const VSMatrix &objectToWorldMatrix, bool mirrored) { glDepthFunc(GL_LEQUAL); gl_RenderState.EnableTexture(true); @@ -65,7 +65,7 @@ void FGLModelRenderer::BeginDrawModel(AActor *actor, FSpriteModelFrame *smf, con if (!(actor->RenderStyle == LegacyRenderStyles[STYLE_Normal]) && !(smf->flags & MDL_DONTCULLBACKFACES)) { glEnable(GL_CULL_FACE); - glFrontFace(GL_CW); + glFrontFace((mirrored ^ GLPortal::isMirrored()) ? GL_CCW : GL_CW); } gl_RenderState.mModelMatrix = objectToWorldMatrix; @@ -81,7 +81,7 @@ void FGLModelRenderer::EndDrawModel(AActor *actor, FSpriteModelFrame *smf) glDisable(GL_CULL_FACE); } -void FGLModelRenderer::BeginDrawHUDModel(AActor *actor, const VSMatrix &objectToWorldMatrix) +void FGLModelRenderer::BeginDrawHUDModel(AActor *actor, const VSMatrix &objectToWorldMatrix, bool mirrored) { glDepthFunc(GL_LEQUAL); @@ -91,7 +91,7 @@ void FGLModelRenderer::BeginDrawHUDModel(AActor *actor, const VSMatrix &objectTo if (!(actor->RenderStyle == LegacyRenderStyles[STYLE_Normal])) { glEnable(GL_CULL_FACE); - glFrontFace(GLPortal::isMirrored()? GL_CW : GL_CCW); + glFrontFace((mirrored ^ GLPortal::isMirrored()) ? GL_CW : GL_CCW); } gl_RenderState.mModelMatrix = objectToWorldMatrix; diff --git a/src/gl/models/gl_models.h b/src/gl/models/gl_models.h index 821705c34..0fb82f9f4 100644 --- a/src/gl/models/gl_models.h +++ b/src/gl/models/gl_models.h @@ -37,13 +37,13 @@ public: FGLModelRenderer(int mli) : modellightindex(mli) {} ModelRendererType GetType() const override { return GLModelRendererType; } - void BeginDrawModel(AActor *actor, FSpriteModelFrame *smf, const VSMatrix &objectToWorldMatrix) override; + void BeginDrawModel(AActor *actor, FSpriteModelFrame *smf, const VSMatrix &objectToWorldMatrix, bool mirrored) override; void EndDrawModel(AActor *actor, FSpriteModelFrame *smf) override; IModelVertexBuffer *CreateVertexBuffer(bool needindex, bool singleframe) override; void SetVertexBuffer(IModelVertexBuffer *buffer) override; void ResetVertexBuffer() override; VSMatrix GetViewToWorldMatrix() override; - void BeginDrawHUDModel(AActor *actor, const VSMatrix &objectToWorldMatrix) override; + void BeginDrawHUDModel(AActor *actor, const VSMatrix &objectToWorldMatrix, bool mirrored) override; void EndDrawHUDModel(AActor *actor) override; void SetInterpolation(double interpolation) override; void SetMaterial(FTexture *skin, bool clampNoFilter, int translation) override; diff --git a/src/gl/renderer/gl_renderer.cpp b/src/gl/renderer/gl_renderer.cpp index 154fae2d6..c753e261e 100644 --- a/src/gl/renderer/gl_renderer.cpp +++ b/src/gl/renderer/gl_renderer.cpp @@ -35,6 +35,7 @@ #include "p_effect.h" #include "d_player.h" #include "a_dynlight.h" +#include "g_game.h" #include "swrenderer/r_swscene.h" #include "hwrenderer/utility/hw_clock.h" @@ -96,6 +97,8 @@ FGLRenderer::FGLRenderer(OpenGLFrameBuffer *fb) mLights = nullptr; mTonemapPalette = nullptr; mBuffers = nullptr; + mScreenBuffers = nullptr; + mSaveBuffers = nullptr; mPresentShader = nullptr; mPresent3dCheckerShader = nullptr; mPresent3dColumnShader = nullptr; @@ -122,7 +125,9 @@ FGLRenderer::FGLRenderer(OpenGLFrameBuffer *fb) void FGLRenderer::Initialize(int width, int height) { - mBuffers = new FGLRenderBuffers(); + mScreenBuffers = new FGLRenderBuffers(); + mSaveBuffers = new FGLRenderBuffers(); + mBuffers = mScreenBuffers; mLinearDepthShader = new FLinearDepthShader(); mDepthBlurShader = new FDepthBlurShader(); mSSAOShader = new FSSAOShader(); @@ -400,7 +405,9 @@ void FGLRenderer::WriteSavePic(player_t *player, FileWriter *file, int width, in void FGLRenderer::BeginFrame() { - buffersActive = GLRenderer->mBuffers->Setup(screen->mScreenViewport.width, screen->mScreenViewport.height, screen->mSceneViewport.width, screen->mSceneViewport.height); + buffersActive = GLRenderer->mScreenBuffers->Setup(screen->mScreenViewport.width, screen->mScreenViewport.height, screen->mSceneViewport.width, screen->mSceneViewport.height); + if (buffersActive) + buffersActive = GLRenderer->mSaveBuffers->Setup(SAVEPICWIDTH, SAVEPICHEIGHT, SAVEPICWIDTH, SAVEPICHEIGHT); } //=========================================================================== diff --git a/src/gl/renderer/gl_renderer.h b/src/gl/renderer/gl_renderer.h index 7cdf3b01d..22b77acef 100644 --- a/src/gl/renderer/gl_renderer.h +++ b/src/gl/renderer/gl_renderer.h @@ -100,6 +100,8 @@ public: int mOldFBID; FGLRenderBuffers *mBuffers; + FGLRenderBuffers *mScreenBuffers; + FGLRenderBuffers *mSaveBuffers; FLinearDepthShader *mLinearDepthShader; FSSAOShader *mSSAOShader; FDepthBlurShader *mDepthBlurShader; diff --git a/src/gl/scene/gl_scene.cpp b/src/gl/scene/gl_scene.cpp index 91d4e4235..d4aea2768 100644 --- a/src/gl/scene/gl_scene.cpp +++ b/src/gl/scene/gl_scene.cpp @@ -395,7 +395,6 @@ void GLSceneDrawer::RenderTranslucent(FDrawInfo *di) { RenderAll.Clock(); - glDepthMask(false); gl_RenderState.SetCameraPos(r_viewpoint.Pos.X, r_viewpoint.Pos.Y, r_viewpoint.Pos.Z); // final pass: translucent stuff @@ -404,6 +403,7 @@ void GLSceneDrawer::RenderTranslucent(FDrawInfo *di) gl_RenderState.EnableBrightmap(true); di->drawlists[GLDL_TRANSLUCENTBORDER].Draw(di, GLPASS_TRANSLUCENT); + glDepthMask(false); di->DrawSorted(GLDL_TRANSLUCENT); gl_RenderState.EnableBrightmap(false); @@ -656,7 +656,7 @@ sector_t * GLSceneDrawer::RenderViewpoint (AActor * camera, IntRect * bounds, fl SetFixedColormap(camera->player); // reiterate color map for each eye, so night vision goggles work in both eyes const s3d::EyePose * eye = stereo3dMode.getEyePose(eye_ix); eye->SetUp(); - screen->SetOutputViewport(bounds); + screen->SetViewportRects(bounds); Set3DViewport(mainview); GLRenderer->mDrawingScene2D = true; GLRenderer->mCurrentFoV = fov; @@ -713,20 +713,24 @@ sector_t * GLSceneDrawer::RenderViewpoint (AActor * camera, IntRect * bounds, fl void GLSceneDrawer::WriteSavePic (player_t *player, FileWriter *file, int width, int height) { IntRect bounds; + bounds.left = 0; + bounds.top = 0; + bounds.width = width; + bounds.height = height; + + // if GLRenderer->mVBO is persistently mapped we must be sure the GPU finished reading from it before we fill it with new data. + glFinish(); + + // Switch to render buffers dimensioned for the savepic + GLRenderer->mBuffers = GLRenderer->mSaveBuffers; P_FindParticleSubsectors(); // make sure that all recently spawned particles have a valid subsector. - bounds.left=0; - bounds.top=0; - bounds.width=width; - bounds.height=height; - glFlush(); SetFixedColormap(player); gl_RenderState.SetVertexBuffer(GLRenderer->mVBO); GLRenderer->mVBO->Reset(); if (!gl.legacyMode) GLRenderer->mLights->Clear(); - sector_t *viewsector = RenderViewpoint(players[consoleplayer].camera, &bounds, - r_viewpoint.FieldOfView.Degrees, 1.6f, 1.6f, true, false); + sector_t *viewsector = RenderViewpoint(players[consoleplayer].camera, &bounds, r_viewpoint.FieldOfView.Degrees, 1.6f, 1.6f, true, false); glDisable(GL_STENCIL_TEST); gl_RenderState.SetFixedColormap(CM_DEFAULT); gl_RenderState.SetSoftLightLevel(-1); @@ -737,12 +741,16 @@ void GLSceneDrawer::WriteSavePic (player_t *player, FileWriter *file, int width, screen->Draw2D(); } GLRenderer->CopyToBackbuffer(&bounds, false); - glFlush(); - screen->SetOutputViewport(nullptr); + // strictly speaking not needed as the glReadPixels should block until the scene is rendered, but this is to safeguard against shitty drivers + glFinish(); uint8_t * scr = (uint8_t *)M_Malloc(width * height * 3); glReadPixels(0,0,width, height,GL_RGB,GL_UNSIGNED_BYTE,scr); M_CreatePNG (file, scr + ((height-1) * width * 3), NULL, SS_RGB, width, height, -width * 3, Gamma); M_Free(scr); + + // Switch back the screen render buffers + screen->SetViewportRects(nullptr); + GLRenderer->mBuffers = GLRenderer->mScreenBuffers; } diff --git a/src/gl/system/gl_framebuffer.cpp b/src/gl/system/gl_framebuffer.cpp index 81cbd7eeb..02326afd1 100644 --- a/src/gl/system/gl_framebuffer.cpp +++ b/src/gl/system/gl_framebuffer.cpp @@ -136,7 +136,7 @@ void OpenGLFrameBuffer::InitializeState() glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT); GLRenderer->Initialize(GetWidth(), GetHeight()); - SetOutputViewport(nullptr); + SetViewportRects(nullptr); } //========================================================================== @@ -164,14 +164,11 @@ void OpenGLFrameBuffer::Update() int clientHeight = ViewportScaledHeight(initialWidth, initialHeight); if (clientWidth > 0 && clientHeight > 0 && (Width != clientWidth || Height != clientHeight)) { - // Do not call Resize here because it's only for software canvases Width = clientWidth; Height = clientHeight; V_OutputResized(Width, Height); GLRenderer->mVBO->OutputResized(Width, Height); } - - SetOutputViewport(nullptr); } //=========================================================================== @@ -392,10 +389,11 @@ bool OpenGLFrameBuffer::RenderBuffersEnabled() return FGLRenderBuffers::IsEnabled(); } -void OpenGLFrameBuffer::SetOutputViewport(IntRect *bounds) +void OpenGLFrameBuffer::SetViewportRects(IntRect *bounds) { - Super::SetOutputViewport(bounds); - s3d::Stereo3DMode::getCurrentMode().AdjustViewports(); + Super::SetViewportRects(bounds); + if (!bounds) + s3d::Stereo3DMode::getCurrentMode().AdjustViewports(); } @@ -431,6 +429,7 @@ void OpenGLFrameBuffer::SetClearColor(int color) void OpenGLFrameBuffer::BeginFrame() { + SetViewportRects(nullptr); if (GLRenderer != nullptr) GLRenderer->BeginFrame(); } diff --git a/src/gl/system/gl_framebuffer.h b/src/gl/system/gl_framebuffer.h index 898820cc9..9a92c1f44 100644 --- a/src/gl/system/gl_framebuffer.h +++ b/src/gl/system/gl_framebuffer.h @@ -42,7 +42,7 @@ public: void ResetFixedColormap() override; void BeginFrame() override; bool RenderBuffersEnabled() override; - void SetOutputViewport(IntRect *bounds) override; + void SetViewportRects(IntRect *bounds) override; void BlurScene(float amount) override; // Retrieves a buffer containing image data for a screenshot. diff --git a/src/memarena.cpp b/src/memarena.cpp index d19521edf..2c0336a5e 100644 --- a/src/memarena.cpp +++ b/src/memarena.cpp @@ -55,13 +55,14 @@ struct FMemArena::Block // // RoundPointer // -// Rounds a pointer up to a pointer-sized boundary. +// Rounds a pointer up to the size of the largest integral type. // //========================================================================== static inline void *RoundPointer(void *ptr) { - return (void *)(((size_t)ptr + sizeof(void*) - 1) & ~(sizeof(void*) - 1)); + const auto roundsize = std::max(sizeof(void*), sizeof(double)); + return (void *)(((size_t)ptr + roundsize - 1) & ~(roundsize - 1)); } //========================================================================== diff --git a/src/p_actionfunctions.cpp b/src/p_actionfunctions.cpp index 2af237b8a..a7a12f9f1 100644 --- a/src/p_actionfunctions.cpp +++ b/src/p_actionfunctions.cpp @@ -1997,7 +1997,7 @@ DEFINE_ACTION_FUNCTION(AStateProvider, A_CustomPunch) angle = self->Angles.Yaw + pr_cwpunch.Random2() * (5.625 / 256); if (range == 0) range = DEFMELEERANGE; - pitch = P_AimLineAttack (self, angle, range, &t); + pitch = P_AimLineAttack (self, angle, range, &t, 0., ALF_CHECK3D); // only use ammo when actually hitting something! if ((flags & CPF_USEAMMO) && t.linetarget && weapon && ACTION_CALL_FROM_PSPRITE()) diff --git a/src/p_map.cpp b/src/p_map.cpp index 89eae7859..0e4f61df8 100644 --- a/src/p_map.cpp +++ b/src/p_map.cpp @@ -930,7 +930,7 @@ bool PIT_CheckLine(FMultiBlockLinesIterator &mit, FMultiBlockLinesIterator::Chec bool NotBlocked = ((tm.thing->flags3 & MF3_NOBLOCKMONST) || ((i_compatflags & COMPATF_NOBLOCKFRIENDS) && (tm.thing->flags & MF_FRIENDLY))); - if (!(Projectile) || (ld->flags & (ML_BLOCKEVERYTHING | ML_BLOCKPROJECTILE))) + if (!(Projectile) || (ld->flags & (ML_BLOCKEVERYTHING | ML_BLOCKPROJECTILE | ML_BLOCK_PLAYERS))) { if (ld->flags & ML_RAILING) { @@ -938,7 +938,7 @@ bool PIT_CheckLine(FMultiBlockLinesIterator &mit, FMultiBlockLinesIterator::Chec } else if ((ld->flags & (ML_BLOCKING | ML_BLOCKEVERYTHING)) || // explicitly blocking everything (!(NotBlocked) && (ld->flags & ML_BLOCKMONSTERS)) || // block monsters only - (tm.thing->player != NULL && (ld->flags & ML_BLOCK_PLAYERS)) || // block players + ((tm.thing->player != NULL) || (tm.thing->flags8 & MF8_BLOCKASPLAYER) && (ld->flags & ML_BLOCK_PLAYERS)) || // block players ((Projectile) && (ld->flags & ML_BLOCKPROJECTILE)) || // block projectiles ((tm.thing->flags & MF_FLOAT) && (ld->flags & ML_BLOCK_FLOATERS))) // block floaters { @@ -3100,7 +3100,7 @@ void FSlide::SlideTraverse(const DVector2 &start, const DVector2 &end) { goto isblocking; } - if (li->flags & ML_BLOCK_PLAYERS && slidemo->player != NULL) + if (li->flags & ML_BLOCK_PLAYERS && ((slidemo->player != NULL) || (slidemo->flags8 & MF8_BLOCKASPLAYER))) { goto isblocking; } @@ -3998,6 +3998,70 @@ struct aim_t SetResult(thing_other, newtrace.thing_other); } + //============================================================================ + // + // Finds where the trace exits an actor to check for hits from above/below + // + //============================================================================ + + double ExitPoint(AActor *thing) + { + // The added check at the exit point only has some value if a 3D distance check is involved + if (!(flags & ALF_CHECK3D)) return -1; + + divline_t trace = { startpos.X, startpos.Y, aimtrace.X, aimtrace.Y }; + divline_t line; + + for (int i = 0; i < 4; ++i) + { + switch (i) + { + case 0: // Top edge + line.y = thing->Y() + thing->radius; + if (trace.y > line.y) continue; + line.x = thing->X() + thing->radius; + line.dx = -thing->radius * 2; + line.dy = 0; + break; + + case 1: // Right edge + line.x = thing->X() + thing->radius; + if (trace.x > line.x) continue; + line.y = thing->Y() - thing->radius; + line.dx = 0; + line.dy = thing->radius * 2; + break; + + case 2: // Bottom edge + line.y = thing->Y() - thing->radius; + if (trace.y < line.y) continue; + line.x = thing->X() - thing->radius; + line.dx = thing->radius * 2; + line.dy = 0; + break; + + case 3: // Left edge + line.x = thing->X() - thing->radius; + if (trace.x < line.x) continue; + line.y = thing->Y() + thing->radius; + line.dx = 0; + line.dy = thing->radius * -2; + break; + } + + // If it is, see if the trace crosses it + if (P_PointOnDivlineSide(line.x, line.y, &trace) != + P_PointOnDivlineSide(line.x + line.dx, line.y + line.dy, &trace)) + { + // It's a hit + double frac = P_InterceptVector(&trace, &line); + if (frac > 1.) frac = 1.; + return frac; + } + } + + return -1.; + } //============================================================================ // @@ -4047,9 +4111,7 @@ struct aim_t intercept_t *in; if (aimdebug) - Printf("Start AimTraverse, start = %f,%f,%f, vect = %f,%f\n", - startpos.X / 65536., startpos.Y / 65536., startpos.Z / 65536., - aimtrace.X / 65536., aimtrace.Y / 65536.); + Printf("Start AimTraverse, start = %f,%f,%f, vect = %f,%f\n", startpos.X, startpos.Y, startpos.Z, aimtrace.X, aimtrace.Y); while ((in = it.Next())) { @@ -4195,12 +4257,38 @@ struct aim_t thingtoppitch = -VecToAngle(dist, th->Top() - shootz); if (thingtoppitch > bottompitch) - continue; // shot over the thing + { + // Check for a hit from above + if (shootz > th->Top()) + { + double exitfrac = ExitPoint(th); + if (exitfrac > 0.) + { + double exitdist = attackrange * exitfrac; + thingtoppitch = -VecToAngle(exitdist, th->Top() - shootz); + if (thingtoppitch > bottompitch) continue; + } + } + else continue; // shot over the thing + } thingbottompitch = -VecToAngle(dist, th->Z() - shootz); if (thingbottompitch < toppitch) + { + // Check for a hit from below + if (shootz < th->Z()) + { + double exitfrac = ExitPoint(th); + if (exitfrac > 0.) + { + double exitdist = attackrange * exitfrac; + thingbottompitch = -VecToAngle(exitdist, th->Z() - shootz); + if (thingbottompitch < toppitch) continue; + } + } continue; // shot under the thing + } if (crossedffloors) { diff --git a/src/p_mobj.cpp b/src/p_mobj.cpp index a00cb7f10..2f8b89a84 100644 --- a/src/p_mobj.cpp +++ b/src/p_mobj.cpp @@ -6504,9 +6504,6 @@ DEFINE_ACTION_FUNCTION(AActor, GetFloorTerrain) bool P_HitWater (AActor * thing, sector_t * sec, const DVector3 &pos, bool checkabove, bool alert, bool force) { - if (thing->flags3 & MF3_DONTSPLASH) - return false; - if (thing->player && (thing->player->cheats & CF_PREDICTING)) return false; @@ -6595,48 +6592,51 @@ foundone: if (thing->Mass < 10) smallsplash = true; - if (smallsplash && splash->SmallSplash) + if (!(thing->flags3 & MF3_DONTSPLASH)) { - mo = Spawn (splash->SmallSplash, pos, ALLOW_REPLACE); - if (mo) mo->Floorclip += splash->SmallSplashClip; - } - else - { - if (splash->SplashChunk) + if (smallsplash && splash->SmallSplash) { - mo = Spawn (splash->SplashChunk, pos, ALLOW_REPLACE); - mo->target = thing; - if (splash->ChunkXVelShift != 255) + mo = Spawn(splash->SmallSplash, pos, ALLOW_REPLACE); + if (mo) mo->Floorclip += splash->SmallSplashClip; + } + else + { + if (splash->SplashChunk) { - mo->Vel.X = (pr_chunk.Random2() << splash->ChunkXVelShift) / 65536.; + mo = Spawn(splash->SplashChunk, pos, ALLOW_REPLACE); + mo->target = thing; + if (splash->ChunkXVelShift != 255) + { + mo->Vel.X = (pr_chunk.Random2() << splash->ChunkXVelShift) / 65536.; + } + if (splash->ChunkYVelShift != 255) + { + mo->Vel.Y = (pr_chunk.Random2() << splash->ChunkYVelShift) / 65536.; + } + mo->Vel.Z = splash->ChunkBaseZVel + (pr_chunk() << splash->ChunkZVelShift) / 65536.; } - if (splash->ChunkYVelShift != 255) + if (splash->SplashBase) { - mo->Vel.Y = (pr_chunk.Random2() << splash->ChunkYVelShift) / 65536.; + mo = Spawn(splash->SplashBase, pos, ALLOW_REPLACE); + } + if (thing->player && !splash->NoAlert && alert) + { + P_NoiseAlert(thing, thing, true); } - mo->Vel.Z = splash->ChunkBaseZVel + (pr_chunk() << splash->ChunkZVelShift) / 65536.; } - if (splash->SplashBase) + if (mo) { - mo = Spawn (splash->SplashBase, pos, ALLOW_REPLACE); + S_Sound(mo, CHAN_ITEM, smallsplash ? + splash->SmallSplashSound : splash->NormalSplashSound, + 1, ATTN_IDLE); } - if (thing->player && !splash->NoAlert && alert) + else { - P_NoiseAlert (thing, thing, true); + S_Sound(pos, CHAN_ITEM, smallsplash ? + splash->SmallSplashSound : splash->NormalSplashSound, + 1, ATTN_IDLE); } } - if (mo) - { - S_Sound (mo, CHAN_ITEM, smallsplash ? - splash->SmallSplashSound : splash->NormalSplashSound, - 1, ATTN_IDLE); - } - else - { - S_Sound (pos, CHAN_ITEM, smallsplash ? - splash->SmallSplashSound : splash->NormalSplashSound, - 1, ATTN_IDLE); - } // Don't let deep water eat missiles return plane == &sec->floorplane ? Terrains[terrainnum].IsLiquid : false; @@ -6676,9 +6676,6 @@ bool P_HitFloor (AActor *thing) return false; } - if (thing->flags3 & MF3_DONTSPLASH) - return false; - // don't splash if landing on the edge above water/lava/etc.... DVector3 pos; for (m = thing->touching_sectorlist; m; m = m->m_tnext) diff --git a/src/po_man.cpp b/src/po_man.cpp index 76355d4db..ca12120de 100644 --- a/src/po_man.cpp +++ b/src/po_man.cpp @@ -1157,7 +1157,7 @@ bool FPolyObj::CheckMobjBlocking (side_t *sd) // [TN] Check wether this actor gets blocked by the line. if (ld->backsector != NULL && !(ld->flags & (ML_BLOCKING|ML_BLOCKEVERYTHING)) - && !(ld->flags & ML_BLOCK_PLAYERS && mobj->player) + && !(ld->flags & ML_BLOCK_PLAYERS && (mobj->player || (mobj->flags8 & MF8_BLOCKASPLAYER))) && !(ld->flags & ML_BLOCKMONSTERS && mobj->flags3 & MF3_ISMONSTER) && !((mobj->flags & MF_FLOAT) && (ld->flags & ML_BLOCK_FLOATERS)) && (!(ld->flags & ML_3DMIDTEX) || diff --git a/src/polyrenderer/drawers/poly_buffer.cpp b/src/polyrenderer/drawers/poly_buffer.cpp index 0e24509cc..2506bce7e 100644 --- a/src/polyrenderer/drawers/poly_buffer.cpp +++ b/src/polyrenderer/drawers/poly_buffer.cpp @@ -48,8 +48,7 @@ void PolyZBuffer::Resize(int newwidth, int newheight) { width = newwidth; height = newheight; - int count = BlockWidth() * BlockHeight(); - values.resize(count * 64); + values.resize(width * height); } ///////////////////////////////////////////////////////////////////////////// @@ -64,14 +63,6 @@ void PolyStencilBuffer::Clear(int newwidth, int newheight, uint8_t stencil_value { width = newwidth; height = newheight; - int count = BlockWidth() * BlockHeight(); - values.resize(count * 64); - masks.resize(count); - - uint8_t *v = Values(); - uint32_t *m = Masks(); - for (int i = 0; i < count; i++) - { - m[i] = 0xffffff00 | stencil_value; - } + values.resize(width * height); + memset(Values(), stencil_value, width * height); } diff --git a/src/polyrenderer/drawers/poly_buffer.h b/src/polyrenderer/drawers/poly_buffer.h index df79c6e71..af4a78b3a 100644 --- a/src/polyrenderer/drawers/poly_buffer.h +++ b/src/polyrenderer/drawers/poly_buffer.h @@ -33,8 +33,6 @@ public: void Resize(int newwidth, int newheight); int Width() const { return width; } int Height() const { return height; } - int BlockWidth() const { return (width + 7) / 8; } - int BlockHeight() const { return (height + 7) / 8; } float *Values() { return values.data(); } private: @@ -50,16 +48,10 @@ public: void Clear(int newwidth, int newheight, uint8_t stencil_value = 0); int Width() const { return width; } int Height() const { return height; } - int BlockWidth() const { return (width + 7) / 8; } - int BlockHeight() const { return (height + 7) / 8; } uint8_t *Values() { return values.data(); } - uint32_t *Masks() { return masks.data(); } private: int width; int height; - - // 8x8 blocks of stencil values, plus a mask for each block indicating if values are the same for early out stencil testing std::vector values; - std::vector masks; }; diff --git a/src/polyrenderer/drawers/poly_draw_args.cpp b/src/polyrenderer/drawers/poly_draw_args.cpp index 8aa79f70b..3e02940cd 100644 --- a/src/polyrenderer/drawers/poly_draw_args.cpp +++ b/src/polyrenderer/drawers/poly_draw_args.cpp @@ -226,6 +226,7 @@ void RectDrawArgs::SetTexture(FTexture *texture, FRenderStyle style) void RectDrawArgs::SetTexture(FTexture *texture, uint32_t translationID, FRenderStyle style) { + // Alphatexture overrides translations. if (translationID != 0xffffffff && translationID != 0 && !(style.Flags & STYLEF_RedIsAlpha)) { FRemapTable *table = TranslationToTable(translationID); @@ -299,61 +300,63 @@ void RectDrawArgs::Draw(PolyRenderThread *thread, double x0, double x1, double y thread->DrawQueue->Push(*this); } -void RectDrawArgs::SetStyle(FRenderStyle renderstyle, double alpha, uint32_t fillcolor, uint32_t translationID, FTexture *tex, bool fullbright) +void RectDrawArgs::SetStyle(const FRenderStyle &renderstyle, double alpha, uint32_t fillcolor, uint32_t translationID, FTexture *tex, bool fullbright) { SetTexture(tex, translationID, renderstyle); + SetColor(0xff000000 | fillcolor, fillcolor >> 24); if (renderstyle == LegacyRenderStyles[STYLE_Normal] || (r_drawfuzz == 0 && renderstyle == LegacyRenderStyles[STYLE_OptFuzzy])) { - SetStyle(Translation() ? RectBlendMode::TranslatedAdd : RectBlendMode::TextureAdd, 1.0, 0.0); + SetStyle(Translation() ? TriBlendMode::NormalTranslated : TriBlendMode::Normal, alpha); } else if (renderstyle == LegacyRenderStyles[STYLE_Add] && fullbright && alpha == 1.0 && !Translation()) { - SetStyle(RectBlendMode::TextureAddSrcColor, 1.0, 1.0); - } - else if (renderstyle == LegacyRenderStyles[STYLE_Add]) - { - SetStyle(Translation() ? RectBlendMode::TranslatedAdd : RectBlendMode::TextureAdd, alpha, 1.0); - } - else if (renderstyle == LegacyRenderStyles[STYLE_Subtract]) - { - SetStyle(Translation() ? RectBlendMode::TranslatedRevSub : RectBlendMode::TextureRevSub, alpha, 1.0); + SetStyle(TriBlendMode::SrcColor, alpha); } else if (renderstyle == LegacyRenderStyles[STYLE_SoulTrans]) { - SetStyle(Translation() ? RectBlendMode::TranslatedAdd : RectBlendMode::TextureAdd, transsouls, 1.0 - transsouls); + SetStyle(Translation() ? TriBlendMode::AddTranslated : TriBlendMode::Add, transsouls); } else if (renderstyle == LegacyRenderStyles[STYLE_Fuzzy] || (r_drawfuzz == 1 && renderstyle == LegacyRenderStyles[STYLE_OptFuzzy])) { SetColor(0xff000000, 0); - SetStyle(RectBlendMode::Fuzz); + SetStyle(TriBlendMode::Fuzzy); } else if (renderstyle == LegacyRenderStyles[STYLE_Shadow] || (r_drawfuzz == 2 && renderstyle == LegacyRenderStyles[STYLE_OptFuzzy])) { - SetStyle(Translation() ? RectBlendMode::TranslatedAdd : RectBlendMode::TextureAdd, 0.0, 160 / 255.0); + SetColor(0xff000000, 0); + SetStyle(Translation() ? TriBlendMode::TranslucentStencilTranslated : TriBlendMode::TranslucentStencil, 1.0 - 160 / 255.0); } - else if (renderstyle == LegacyRenderStyles[STYLE_TranslucentStencil]) + else if (renderstyle == LegacyRenderStyles[STYLE_Stencil]) { - SetColor(0xff000000 | fillcolor, fillcolor >> 24); - SetStyle(RectBlendMode::Stencil, alpha, 1.0 - alpha); + SetStyle(Translation() ? TriBlendMode::StencilTranslated : TriBlendMode::Stencil, alpha); } - else if (renderstyle == LegacyRenderStyles[STYLE_AddStencil]) + else if (renderstyle == LegacyRenderStyles[STYLE_Translucent]) { - SetColor(0xff000000 | fillcolor, fillcolor >> 24); - SetStyle(RectBlendMode::AddStencil, alpha, 1.0); + SetStyle(Translation() ? TriBlendMode::TranslucentTranslated : TriBlendMode::Translucent, alpha); + } + else if (renderstyle == LegacyRenderStyles[STYLE_Add]) + { + SetStyle(Translation() ? TriBlendMode::AddTranslated : TriBlendMode::Add, alpha); } else if (renderstyle == LegacyRenderStyles[STYLE_Shaded]) { - SetColor(0xff000000 | fillcolor, fillcolor >> 24); - SetStyle(RectBlendMode::Shaded, alpha, 1.0 - alpha); + SetStyle(Translation() ? TriBlendMode::ShadedTranslated : TriBlendMode::Shaded, alpha); + } + else if (renderstyle == LegacyRenderStyles[STYLE_TranslucentStencil]) + { + SetStyle(Translation() ? TriBlendMode::TranslucentStencilTranslated : TriBlendMode::TranslucentStencil, alpha); + } + else if (renderstyle == LegacyRenderStyles[STYLE_Subtract]) + { + SetStyle(Translation() ? TriBlendMode::SubtractTranslated : TriBlendMode::Subtract, alpha); + } + else if (renderstyle == LegacyRenderStyles[STYLE_AddStencil]) + { + SetStyle(Translation() ? TriBlendMode::AddStencilTranslated : TriBlendMode::AddStencil, alpha); } else if (renderstyle == LegacyRenderStyles[STYLE_AddShaded]) { - SetColor(0xff000000 | fillcolor, fillcolor >> 24); - SetStyle(RectBlendMode::AddShaded, alpha, 1.0); - } - else - { - SetStyle(Translation() ? RectBlendMode::TranslatedAdd : RectBlendMode::TextureAdd, alpha, 1.0 - alpha); + SetStyle(Translation() ? TriBlendMode::AddShadedTranslated : TriBlendMode::AddShaded, alpha); } } diff --git a/src/polyrenderer/drawers/poly_draw_args.h b/src/polyrenderer/drawers/poly_draw_args.h index 88f174525..2d43ae64f 100644 --- a/src/polyrenderer/drawers/poly_draw_args.h +++ b/src/polyrenderer/drawers/poly_draw_args.h @@ -180,8 +180,8 @@ public: void SetTexture(FTexture *texture, FRenderStyle style); void SetTexture(FTexture *texture, uint32_t translationID, FRenderStyle style); void SetLight(FSWColormap *basecolormap, uint32_t lightlevel); - void SetStyle(RectBlendMode blendmode, double srcalpha = 1.0, double destalpha = 1.0) { mBlendMode = blendmode; mSrcAlpha = (uint32_t)(srcalpha * 256.0 + 0.5); mDestAlpha = (uint32_t)(destalpha * 256.0 + 0.5); } - void SetStyle(FRenderStyle renderstyle, double alpha, uint32_t fillcolor, uint32_t translationID, FTexture *texture, bool fullbright); + void SetStyle(TriBlendMode blendmode, double alpha = 1.0) { mBlendMode = blendmode; mAlpha = (uint32_t)(alpha * 256.0 + 0.5); } + void SetStyle(const FRenderStyle &renderstyle, double alpha, uint32_t fillcolor, uint32_t translationID, FTexture *texture, bool fullbright); void SetColor(uint32_t bgra, uint8_t palindex); void Draw(PolyRenderThread *thread, double x0, double x1, double y0, double y1, double u0, double u1, double v0, double v1); @@ -191,10 +191,9 @@ public: int TextureHeight() const { return mTextureHeight; } const uint8_t *Translation() const { return mTranslation; } - RectBlendMode BlendMode() const { return mBlendMode; } + TriBlendMode BlendMode() const { return mBlendMode; } uint32_t Color() const { return mColor; } - uint32_t SrcAlpha() const { return mSrcAlpha; } - uint32_t DestAlpha() const { return mDestAlpha; } + uint32_t Alpha() const { return mAlpha; } uint32_t Light() const { return mLight; } const uint8_t *BaseColormap() const { return mColormaps; } @@ -225,11 +224,10 @@ private: int mTextureHeight = 0; const uint8_t *mTranslation = nullptr; const uint8_t *mColormaps = nullptr; - RectBlendMode mBlendMode = RectBlendMode::FillOpaque; + TriBlendMode mBlendMode = TriBlendMode::Fill; uint32_t mLight = 0; uint32_t mColor = 0; - uint32_t mSrcAlpha = 0; - uint32_t mDestAlpha = 0; + uint32_t mAlpha = 0; uint16_t mLightAlpha = 0; uint16_t mLightRed = 0; uint16_t mLightGreen = 0; diff --git a/src/polyrenderer/drawers/poly_drawer32.h b/src/polyrenderer/drawers/poly_drawer32.h deleted file mode 100644 index 4dae396de..000000000 --- a/src/polyrenderer/drawers/poly_drawer32.h +++ /dev/null @@ -1,476 +0,0 @@ -/* -** Polygon Doom software renderer -** Copyright (c) 2016 Magnus Norddahl -** -** This software is provided 'as-is', without any express or implied -** warranty. In no event will the authors be held liable for any damages -** arising from the use of this software. -** -** Permission is granted to anyone to use this software for any purpose, -** including commercial applications, and to alter it and redistribute it -** freely, subject to the following restrictions: -** -** 1. The origin of this software must not be misrepresented; you must not -** claim that you wrote the original software. If you use this software -** in a product, an acknowledgment in the product documentation would be -** appreciated but is not required. -** 2. Altered source versions must be plainly marked as such, and must not be -** misrepresented as being the original software. -** 3. This notice may not be removed or altered from any source distribution. -** -*/ - -#pragma once - -#include "screen_triangle.h" - -namespace TriScreenDrawerModes -{ - namespace - { - struct BgraColor - { - uint32_t b, g, r, a; - BgraColor() { } - BgraColor(uint32_t c) : b(BPART(c)), g(GPART(c)), r(RPART(c)), a(APART(c)) { } - BgraColor &operator=(uint32_t c) { b = BPART(c); g = GPART(c); r = RPART(c); a = APART(c); return *this; } - operator uint32_t() const { return MAKEARGB(a, r, g, b); } - }; - } - - template - FORCEINLINE unsigned int Sample32(int32_t u, int32_t v, const uint32_t *texPixels, int texWidth, int texHeight, uint32_t oneU, uint32_t oneV, uint32_t color, const uint32_t *translation) - { - uint32_t texel; - if (SamplerT::Mode == (int)Samplers::Shaded || SamplerT::Mode == (int)Samplers::Stencil || SamplerT::Mode == (int)Samplers::Fill || SamplerT::Mode == (int)Samplers::Fuzz || SamplerT::Mode == (int)Samplers::FogBoundary) - { - return color; - } - else if (SamplerT::Mode == (int)Samplers::Translated) - { - const uint8_t *texpal = (const uint8_t *)texPixels; - uint32_t texelX = ((((uint32_t)u << 8) >> 16) * texWidth) >> 16; - uint32_t texelY = ((((uint32_t)v << 8) >> 16) * texHeight) >> 16; - return translation[texpal[texelX * texHeight + texelY]]; - } - else if (FilterModeT::Mode == (int)FilterModes::Nearest) - { - uint32_t texelX = ((((uint32_t)u << 8) >> 16) * texWidth) >> 16; - uint32_t texelY = ((((uint32_t)v << 8) >> 16) * texHeight) >> 16; - texel = texPixels[texelX * texHeight + texelY]; - } - else - { - u -= oneU >> 1; - v -= oneV >> 1; - - unsigned int frac_x0 = (((uint32_t)u << 8) >> FRACBITS) * texWidth; - unsigned int frac_x1 = ((((uint32_t)u << 8) + oneU) >> FRACBITS) * texWidth; - unsigned int frac_y0 = (((uint32_t)v << 8) >> FRACBITS) * texHeight; - unsigned int frac_y1 = ((((uint32_t)v << 8) + oneV) >> FRACBITS) * texHeight; - unsigned int x0 = frac_x0 >> FRACBITS; - unsigned int x1 = frac_x1 >> FRACBITS; - unsigned int y0 = frac_y0 >> FRACBITS; - unsigned int y1 = frac_y1 >> FRACBITS; - - unsigned int p00 = texPixels[x0 * texHeight + y0]; - unsigned int p01 = texPixels[x0 * texHeight + y1]; - unsigned int p10 = texPixels[x1 * texHeight + y0]; - unsigned int p11 = texPixels[x1 * texHeight + y1]; - - unsigned int inv_a = (frac_x1 >> (FRACBITS - 4)) & 15; - unsigned int inv_b = (frac_y1 >> (FRACBITS - 4)) & 15; - unsigned int a = 16 - inv_a; - unsigned int b = 16 - inv_b; - - unsigned int sred = (RPART(p00) * (a * b) + RPART(p01) * (inv_a * b) + RPART(p10) * (a * inv_b) + RPART(p11) * (inv_a * inv_b) + 127) >> 8; - unsigned int sgreen = (GPART(p00) * (a * b) + GPART(p01) * (inv_a * b) + GPART(p10) * (a * inv_b) + GPART(p11) * (inv_a * inv_b) + 127) >> 8; - unsigned int sblue = (BPART(p00) * (a * b) + BPART(p01) * (inv_a * b) + BPART(p10) * (a * inv_b) + BPART(p11) * (inv_a * inv_b) + 127) >> 8; - unsigned int salpha = (APART(p00) * (a * b) + APART(p01) * (inv_a * b) + APART(p10) * (a * inv_b) + APART(p11) * (inv_a * inv_b) + 127) >> 8; - - texel = (salpha << 24) | (sred << 16) | (sgreen << 8) | sblue; - } - - if (SamplerT::Mode == (int)Samplers::Skycap) - { - int start_fade = 2; // How fast it should fade out - - int alpha_top = clamp(v >> (16 - start_fade), 0, 256); - int alpha_bottom = clamp(((2 << 24) - v) >> (16 - start_fade), 0, 256); - int a = MIN(alpha_top, alpha_bottom); - int inv_a = 256 - a; - - uint32_t r = RPART(texel); - uint32_t g = GPART(texel); - uint32_t b = BPART(texel); - uint32_t fg_a = APART(texel); - uint32_t bg_red = RPART(color); - uint32_t bg_green = GPART(color); - uint32_t bg_blue = BPART(color); - r = (r * a + bg_red * inv_a + 127) >> 8; - g = (g * a + bg_green * inv_a + 127) >> 8; - b = (b * a + bg_blue * inv_a + 127) >> 8; - return MAKEARGB(fg_a, r, g, b); - } - else - { - return texel; - } - } - - template - FORCEINLINE unsigned int SampleShade32(int32_t u, int32_t v, const uint32_t *texPixels, int texWidth, int texHeight, int x, int y) - { - if (SamplerT::Mode == (int)Samplers::Shaded) - { - const uint8_t *texpal = (const uint8_t *)texPixels; - uint32_t texelX = ((((uint32_t)u << 8) >> 16) * texWidth) >> 16; - uint32_t texelY = ((((uint32_t)v << 8) >> 16) * texHeight) >> 16; - unsigned int sampleshadeout = texpal[texelX * texHeight + texelY]; - sampleshadeout += sampleshadeout >> 7; // 255 -> 256 - return sampleshadeout; - } - else if (SamplerT::Mode == (int)Samplers::Stencil) - { - uint32_t texelX = ((((uint32_t)u << 8) >> 16) * texWidth) >> 16; - uint32_t texelY = ((((uint32_t)v << 8) >> 16) * texHeight) >> 16; - unsigned int sampleshadeout = APART(texPixels[texelX * texHeight + texelY]); - sampleshadeout += sampleshadeout >> 7; // 255 -> 256 - return sampleshadeout; - } - else if (SamplerT::Mode == (int)Samplers::Fuzz) - { - using namespace swrenderer; - - uint32_t texelX = ((((uint32_t)u << 8) >> 16) * texWidth) >> 16; - uint32_t texelY = ((((uint32_t)v << 8) >> 16) * texHeight) >> 16; - unsigned int sampleshadeout = APART(texPixels[texelX * texHeight + texelY]); - sampleshadeout += sampleshadeout >> 7; // 255 -> 256 - - fixed_t fuzzscale = (200 << FRACBITS) / viewheight; - - int scaled_x = (x * fuzzscale) >> FRACBITS; - int fuzz_x = fuzz_random_x_offset[scaled_x % FUZZ_RANDOM_X_SIZE] + fuzzpos; - - fixed_t fuzzcount = FUZZTABLE << FRACBITS; - fixed_t fuzz = ((fuzz_x << FRACBITS) + y * fuzzscale) % fuzzcount; - unsigned int alpha = fuzzoffset[fuzz >> FRACBITS]; - - sampleshadeout = (sampleshadeout * alpha) >> 5; - return sampleshadeout; - } - else - { - return 0; - } - } - - FORCEINLINE BgraColor VECTORCALL AddLights(BgraColor material, BgraColor fgcolor, BgraColor dynlight) - { - fgcolor.r = MIN(fgcolor.r + ((material.r * dynlight.r) >> 8), (uint32_t)255); - fgcolor.g = MIN(fgcolor.g + ((material.g * dynlight.g) >> 8), (uint32_t)255); - fgcolor.b = MIN(fgcolor.b + ((material.b * dynlight.b) >> 8), (uint32_t)255); - return fgcolor; - } - - FORCEINLINE BgraColor VECTORCALL CalcDynamicLight(const PolyLight *lights, int num_lights, FVector3 worldpos, FVector3 worldnormal, uint32_t dynlightcolor) - { - BgraColor lit = dynlightcolor; - - for (int i = 0; i != num_lights; i++) - { - FVector3 lightpos = { lights[i].x, lights[i].y, lights[i].z }; - float light_radius = lights[i].radius; - - bool is_attenuated = light_radius < 0.0f; - if (is_attenuated) - light_radius = -light_radius; - - // L = light-pos - // dist = sqrt(dot(L, L)) - // distance_attenuation = 1 - MIN(dist * (1/radius), 1) - FVector3 L = lightpos - worldpos; - float dist2 = L | L; - float rcp_dist = 1.0f / sqrt(dist2); - float dist = dist2 * rcp_dist; - float distance_attenuation = 256.0f - MIN(dist * light_radius, 256.0f); - - // The simple light type - float simple_attenuation = distance_attenuation; - - // The point light type - // diffuse = max(dot(N,normalize(L)),0) * attenuation - float dotNL = worldnormal | (L * rcp_dist); - float point_attenuation = MAX(dotNL, 0.0f) * distance_attenuation; - - uint32_t attenuation = (uint32_t)(is_attenuated ? (int32_t)point_attenuation : (int32_t)simple_attenuation); - - BgraColor light_color = lights[i].color; - lit.r += (light_color.r * attenuation) >> 8; - lit.g += (light_color.g * attenuation) >> 8; - lit.b += (light_color.b * attenuation) >> 8; - } - - lit.r = MIN(lit.r, (uint32_t)256); - lit.g = MIN(lit.g, (uint32_t)256); - lit.b = MIN(lit.b, (uint32_t)256); - return lit; - } - - template - FORCEINLINE BgraColor Shade32(BgraColor fgcolor, BgraColor mlight, uint32_t desaturate, uint32_t inv_desaturate, BgraColor shade_fade, BgraColor shade_light, BgraColor dynlight) - { - BgraColor material = fgcolor; - if (ShadeModeT::Mode == (int)ShadeMode::Simple) - { - fgcolor.r = (fgcolor.r * mlight.r) >> 8; - fgcolor.g = (fgcolor.g * mlight.g) >> 8; - fgcolor.b = (fgcolor.b * mlight.b) >> 8; - } - else if (ShadeModeT::Mode == (int)ShadeMode::Advanced) - { - uint32_t intensity = ((fgcolor.r * 77 + fgcolor.g * 143 + fgcolor.b * 37) >> 8) * desaturate; - fgcolor.r = (((shade_fade.r + ((fgcolor.r * inv_desaturate + intensity) >> 8) * mlight.r) >> 8) * shade_light.r) >> 8; - fgcolor.g = (((shade_fade.g + ((fgcolor.g * inv_desaturate + intensity) >> 8) * mlight.g) >> 8) * shade_light.g) >> 8; - fgcolor.b = (((shade_fade.b + ((fgcolor.b * inv_desaturate + intensity) >> 8) * mlight.b) >> 8) * shade_light.b) >> 8; - } - return AddLights(material, fgcolor, dynlight); - } - - template - FORCEINLINE BgraColor Blend32(BgraColor fgcolor, BgraColor bgcolor, uint32_t ifgcolor, uint32_t ifgshade, uint32_t srcalpha, uint32_t destalpha) - { - if (BlendT::Mode == (int)BlendModes::Opaque) - { - fgcolor.a = 255; - return fgcolor; - } - else if (BlendT::Mode == (int)BlendModes::Masked) - { - return (ifgcolor == 0) ? bgcolor : fgcolor; - } - else if (BlendT::Mode == (int)BlendModes::AddSrcColorOneMinusSrcColor) - { - uint32_t srcred = fgcolor.r + (fgcolor.r >> 7); - uint32_t srcgreen = fgcolor.g + (fgcolor.g >> 7); - uint32_t srcblue = fgcolor.b + (fgcolor.b >> 7); - uint32_t inv_srcred = 256 - srcred; - uint32_t inv_srcgreen = 256 - srcgreen; - uint32_t inv_srcblue = 256 - srcblue; - - BgraColor outcolor; - outcolor.r = (fgcolor.r * srcred + bgcolor.r * inv_srcred) >> 8; - outcolor.g = (fgcolor.g * srcgreen + bgcolor.g * inv_srcgreen) >> 8; - outcolor.b = (fgcolor.b * srcblue + bgcolor.b * inv_srcblue) >> 8; - outcolor.a = 255; - return outcolor; - } - else if (BlendT::Mode == (int)BlendModes::Shaded) - { - uint32_t alpha = ifgshade; - uint32_t inv_alpha = 256 - alpha; - - BgraColor outcolor; - outcolor.r = (fgcolor.r * alpha + bgcolor.r * inv_alpha) >> 8; - outcolor.g = (fgcolor.g * alpha + bgcolor.g * inv_alpha) >> 8; - outcolor.b = (fgcolor.b * alpha + bgcolor.b * inv_alpha) >> 8; - outcolor.a = 255; - return outcolor; - } - else if (BlendT::Mode == (int)BlendModes::AddClampShaded) - { - uint32_t alpha = ifgshade; - BgraColor outcolor; - outcolor.r = ((fgcolor.r * alpha) >> 8) + bgcolor.r; - outcolor.g = ((fgcolor.g * alpha) >> 8) + bgcolor.g; - outcolor.b = ((fgcolor.b * alpha) >> 8) + bgcolor.b; - outcolor.a = 255; - return outcolor; - } - else - { - uint32_t alpha = APART(ifgcolor); - alpha += alpha >> 7; // 255->256 - uint32_t inv_alpha = 256 - alpha; - - uint32_t bgalpha = (destalpha * alpha + (inv_alpha << 8) + 128) >> 8; - uint32_t fgalpha = (srcalpha * alpha + 128) >> 8; - - fgcolor.r *= fgalpha; - fgcolor.g *= fgalpha; - fgcolor.b *= fgalpha; - bgcolor.r *= bgalpha; - bgcolor.g *= bgalpha; - bgcolor.b *= bgalpha; - - BgraColor outcolor; - if (BlendT::Mode == (int)BlendModes::AddClamp) - { - outcolor.r = MIN((fgcolor.r + bgcolor.r) >> 8, 255); - outcolor.g = MIN((fgcolor.g + bgcolor.g) >> 8, 255); - outcolor.b = MIN((fgcolor.b + bgcolor.b) >> 8, 255); - } - else if (BlendT::Mode == (int)BlendModes::SubClamp) - { - outcolor.r = MAX(int32_t(fgcolor.r - bgcolor.r) >> 8, 0); - outcolor.g = MAX(int32_t(fgcolor.g - bgcolor.g) >> 8, 0); - outcolor.b = MAX(int32_t(fgcolor.b - bgcolor.b) >> 8, 0); - } - else if (BlendT::Mode == (int)BlendModes::RevSubClamp) - { - outcolor.r = MAX(int32_t(bgcolor.r - fgcolor.r) >> 8, 0); - outcolor.g = MAX(int32_t(bgcolor.g - fgcolor.g) >> 8, 0); - outcolor.b = MAX(int32_t(bgcolor.b - fgcolor.b) >> 8, 0); - } - outcolor.a = 255; - return outcolor; - } - } -} - -template -class RectScreenDrawer32 -{ -public: - static void Execute(const void *destOrg, int destWidth, int destHeight, int destPitch, const RectDrawArgs *args, PolyTriangleThreadData *thread) - { - using namespace TriScreenDrawerModes; - - if (SamplerT::Mode == (int)Samplers::Fuzz) - { - Loop(destOrg, destWidth, destHeight, destPitch, args, thread); - } - else if (args->SimpleShade()) - { - Loop(destOrg, destWidth, destHeight, destPitch, args, thread); - } - else - { - Loop(destOrg, destWidth, destHeight, destPitch, args, thread); - } - } - -private: - template - FORCEINLINE static void Loop(const void *destOrg, int destWidth, int destHeight, int destPitch, const RectDrawArgs *args, PolyTriangleThreadData *thread) - { - using namespace TriScreenDrawerModes; - - int x0 = clamp((int)(args->X0() + 0.5f), 0, destWidth); - int x1 = clamp((int)(args->X1() + 0.5f), 0, destWidth); - int y0 = clamp((int)(args->Y0() + 0.5f), 0, destHeight); - int y1 = clamp((int)(args->Y1() + 0.5f), 0, destHeight); - - if (x1 <= x0 || y1 <= y0) - return; - - uint32_t srcalpha = args->SrcAlpha(); - uint32_t destalpha = args->DestAlpha(); - - // Setup step variables - float fstepU = (args->U1() - args->U0()) / (args->X1() - args->X0()); - float fstepV = (args->V1() - args->V0()) / (args->Y1() - args->Y0()); - uint32_t startU = (int32_t)((args->U0() + (x0 + 0.5f - args->X0()) * fstepU) * 0x1000000); - uint32_t startV = (int32_t)((args->V0() + (y0 + 0.5f - args->Y0()) * fstepV) * 0x1000000); - uint32_t stepU = (int32_t)(fstepU * 0x1000000); - uint32_t stepV = (int32_t)(fstepV * 0x1000000); - - // Sampling stuff - uint32_t color = args->Color(); - const uint32_t * RESTRICT translation = (const uint32_t *)args->Translation(); - const uint32_t * RESTRICT texPixels = (const uint32_t *)args->TexturePixels(); - uint32_t texWidth = args->TextureWidth(); - uint32_t texHeight = args->TextureHeight(); - uint32_t oneU, oneV; - if (SamplerT::Mode != (int)Samplers::Fill) - { - oneU = ((0x800000 + texWidth - 1) / texWidth) * 2 + 1; - oneV = ((0x800000 + texHeight - 1) / texHeight) * 2 + 1; - } - else - { - oneU = 0; - oneV = 0; - } - - // Setup light - uint32_t lightpos = args->Light(); - lightpos += lightpos >> 7; // 255 -> 256 - BgraColor mlight; - - BgraColor dynlight = 0; - - // Shade constants - int inv_desaturate; - BgraColor shade_fade_lit, shade_light; - int desaturate; - if (ShadeModeT::Mode == (int)ShadeMode::Advanced) - { - uint32_t inv_light = 256 - lightpos; - shade_fade_lit.r = args->ShadeFadeRed() * inv_light; - shade_fade_lit.g = args->ShadeFadeGreen() * inv_light; - shade_fade_lit.b = args->ShadeFadeBlue() * inv_light; - shade_light.r = args->ShadeLightRed(); - shade_light.g = args->ShadeLightGreen(); - shade_light.b = args->ShadeLightBlue(); - desaturate = args->ShadeDesaturate(); - inv_desaturate = 256 - desaturate; - mlight.r = lightpos; - mlight.g = lightpos; - mlight.b = lightpos; - } - else - { - inv_desaturate = 0; - shade_fade_lit.r = 0; - shade_fade_lit.g = 0; - shade_fade_lit.b = 0; - shade_light.r = 0; - shade_light.g = 0; - shade_light.b = 0; - desaturate = 0; - mlight.r = lightpos; - mlight.g = lightpos; - mlight.b = lightpos; - } - - int count = x1 - x0; - - uint32_t posV = startV; - for (int y = y0; y < y1; y++, posV += stepV) - { - int coreBlock = y / 8; - if (coreBlock % thread->num_cores != thread->core) - { - continue; - } - - uint32_t *dest = ((uint32_t*)destOrg) + y * destPitch + x0; - - uint32_t posU = startU; - for (int i = 0; i < count; i++) - { - // Load bgcolor - BgraColor bgcolor; - if (BlendT::Mode != (int)BlendModes::Opaque) - bgcolor = *dest; - else - bgcolor = 0; - - // Sample fgcolor - if (SamplerT::Mode == (int)Samplers::FogBoundary) color = *dest; - unsigned int ifgcolor = Sample32(posU, posV, texPixels, texWidth, texHeight, oneU, oneV, color, translation); - unsigned int ifgshade = SampleShade32(posU, posV, texPixels, texWidth, texHeight, x0 + i, y); - posU += stepU; - - // Shade and blend - BgraColor fgcolor = Shade32(ifgcolor, mlight, desaturate, inv_desaturate, shade_fade_lit, shade_light, dynlight); - BgraColor outcolor = Blend32(fgcolor, bgcolor, ifgcolor, ifgshade, srcalpha, destalpha); - - // Store result - *dest = outcolor; - dest++; - } - } - } -}; diff --git a/src/polyrenderer/drawers/poly_drawer32_sse2.h b/src/polyrenderer/drawers/poly_drawer32_sse2.h deleted file mode 100644 index 4685dd07c..000000000 --- a/src/polyrenderer/drawers/poly_drawer32_sse2.h +++ /dev/null @@ -1,518 +0,0 @@ -/* -** Polygon Doom software renderer -** Copyright (c) 2016 Magnus Norddahl -** -** This software is provided 'as-is', without any express or implied -** warranty. In no event will the authors be held liable for any damages -** arising from the use of this software. -** -** Permission is granted to anyone to use this software for any purpose, -** including commercial applications, and to alter it and redistribute it -** freely, subject to the following restrictions: -** -** 1. The origin of this software must not be misrepresented; you must not -** claim that you wrote the original software. If you use this software -** in a product, an acknowledgment in the product documentation would be -** appreciated but is not required. -** 2. Altered source versions must be plainly marked as such, and must not be -** misrepresented as being the original software. -** 3. This notice may not be removed or altered from any source distribution. -** -*/ - -#pragma once - -#include "screen_triangle.h" - -namespace TriScreenDrawerModes -{ - template - FORCEINLINE unsigned int VECTORCALL Sample32(int32_t u, int32_t v, const uint32_t *texPixels, int texWidth, int texHeight, uint32_t oneU, uint32_t oneV, uint32_t color, const uint32_t *translation) - { - uint32_t texel; - if (SamplerT::Mode == (int)Samplers::Shaded || SamplerT::Mode == (int)Samplers::Stencil || SamplerT::Mode == (int)Samplers::Fill || SamplerT::Mode == (int)Samplers::Fuzz || SamplerT::Mode == (int)Samplers::FogBoundary) - { - return color; - } - else if (SamplerT::Mode == (int)Samplers::Translated) - { - const uint8_t *texpal = (const uint8_t *)texPixels; - uint32_t texelX = ((((uint32_t)u << 8) >> 16) * texWidth) >> 16; - uint32_t texelY = ((((uint32_t)v << 8) >> 16) * texHeight) >> 16; - return translation[texpal[texelX * texHeight + texelY]]; - } - else if (FilterModeT::Mode == (int)FilterModes::Nearest) - { - uint32_t texelX = ((((uint32_t)u << 8) >> 16) * texWidth) >> 16; - uint32_t texelY = ((((uint32_t)v << 8) >> 16) * texHeight) >> 16; - texel = texPixels[texelX * texHeight + texelY]; - } - else - { - u -= oneU >> 1; - v -= oneV >> 1; - - unsigned int frac_x0 = (((uint32_t)u << 8) >> FRACBITS) * texWidth; - unsigned int frac_x1 = ((((uint32_t)u << 8) + oneU) >> FRACBITS) * texWidth; - unsigned int frac_y0 = (((uint32_t)v << 8) >> FRACBITS) * texHeight; - unsigned int frac_y1 = ((((uint32_t)v << 8) + oneV) >> FRACBITS) * texHeight; - unsigned int x0 = frac_x0 >> FRACBITS; - unsigned int x1 = frac_x1 >> FRACBITS; - unsigned int y0 = frac_y0 >> FRACBITS; - unsigned int y1 = frac_y1 >> FRACBITS; - - unsigned int p00 = texPixels[x0 * texHeight + y0]; - unsigned int p01 = texPixels[x0 * texHeight + y1]; - unsigned int p10 = texPixels[x1 * texHeight + y0]; - unsigned int p11 = texPixels[x1 * texHeight + y1]; - - unsigned int inv_a = (frac_x1 >> (FRACBITS - 4)) & 15; - unsigned int inv_b = (frac_y1 >> (FRACBITS - 4)) & 15; - unsigned int a = 16 - inv_a; - unsigned int b = 16 - inv_b; - - unsigned int sred = (RPART(p00) * (a * b) + RPART(p01) * (inv_a * b) + RPART(p10) * (a * inv_b) + RPART(p11) * (inv_a * inv_b) + 127) >> 8; - unsigned int sgreen = (GPART(p00) * (a * b) + GPART(p01) * (inv_a * b) + GPART(p10) * (a * inv_b) + GPART(p11) * (inv_a * inv_b) + 127) >> 8; - unsigned int sblue = (BPART(p00) * (a * b) + BPART(p01) * (inv_a * b) + BPART(p10) * (a * inv_b) + BPART(p11) * (inv_a * inv_b) + 127) >> 8; - unsigned int salpha = (APART(p00) * (a * b) + APART(p01) * (inv_a * b) + APART(p10) * (a * inv_b) + APART(p11) * (inv_a * inv_b) + 127) >> 8; - - texel = (salpha << 24) | (sred << 16) | (sgreen << 8) | sblue; - } - - if (SamplerT::Mode == (int)Samplers::Skycap) - { - int start_fade = 2; // How fast it should fade out - - int alpha_top = clamp(v >> (16 - start_fade), 0, 256); - int alpha_bottom = clamp(((2 << 24) - v) >> (16 - start_fade), 0, 256); - int a = MIN(alpha_top, alpha_bottom); - int inv_a = 256 - a; - - uint32_t r = RPART(texel); - uint32_t g = GPART(texel); - uint32_t b = BPART(texel); - uint32_t fg_a = APART(texel); - uint32_t bg_red = RPART(color); - uint32_t bg_green = GPART(color); - uint32_t bg_blue = BPART(color); - r = (r * a + bg_red * inv_a + 127) >> 8; - g = (g * a + bg_green * inv_a + 127) >> 8; - b = (b * a + bg_blue * inv_a + 127) >> 8; - return MAKEARGB(fg_a, r, g, b); - } - else - { - return texel; - } - } - - template - FORCEINLINE unsigned int VECTORCALL SampleShade32(int32_t u, int32_t v, const uint32_t *texPixels, int texWidth, int texHeight, int x, int y) - { - if (SamplerT::Mode == (int)Samplers::Shaded) - { - const uint8_t *texpal = (const uint8_t *)texPixels; - uint32_t texelX = ((((uint32_t)u << 8) >> 16) * texWidth) >> 16; - uint32_t texelY = ((((uint32_t)v << 8) >> 16) * texHeight) >> 16; - unsigned int sampleshadeout = texpal[texelX * texHeight + texelY]; - sampleshadeout += sampleshadeout >> 7; // 255 -> 256 - return sampleshadeout; - } - else if (SamplerT::Mode == (int)Samplers::Stencil) - { - uint32_t texelX = ((((uint32_t)u << 8) >> 16) * texWidth) >> 16; - uint32_t texelY = ((((uint32_t)v << 8) >> 16) * texHeight) >> 16; - unsigned int sampleshadeout = APART(texPixels[texelX * texHeight + texelY]); - sampleshadeout += sampleshadeout >> 7; // 255 -> 256 - return sampleshadeout; - } - else if (SamplerT::Mode == (int)Samplers::Fuzz) - { - using namespace swrenderer; - - uint32_t texelX = ((((uint32_t)u << 8) >> 16) * texWidth) >> 16; - uint32_t texelY = ((((uint32_t)v << 8) >> 16) * texHeight) >> 16; - unsigned int sampleshadeout = APART(texPixels[texelX * texHeight + texelY]); - sampleshadeout += sampleshadeout >> 7; // 255 -> 256 - - fixed_t fuzzscale = (200 << FRACBITS) / viewheight; - - int scaled_x = (x * fuzzscale) >> FRACBITS; - int fuzz_x = fuzz_random_x_offset[scaled_x % FUZZ_RANDOM_X_SIZE] + fuzzpos; - - fixed_t fuzzcount = FUZZTABLE << FRACBITS; - fixed_t fuzz = ((fuzz_x << FRACBITS) + y * fuzzscale) % fuzzcount; - unsigned int alpha = fuzzoffset[fuzz >> FRACBITS]; - - sampleshadeout = (sampleshadeout * alpha) >> 5; - return sampleshadeout; - } - else - { - return 0; - } - } - - FORCEINLINE __m128i VECTORCALL AddLights(__m128i material, __m128i fgcolor, __m128i dynlight) - { - fgcolor = _mm_add_epi16(fgcolor, _mm_srli_epi16(_mm_mullo_epi16(material, dynlight), 8)); - fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(255)); - return fgcolor; - } - - FORCEINLINE __m128i VECTORCALL CalcDynamicLight(const PolyLight *lights, int num_lights, __m128 worldpos, __m128 worldnormal, uint32_t dynlightcolor) - { - __m128i lit = _mm_unpacklo_epi8(_mm_cvtsi32_si128(dynlightcolor), _mm_setzero_si128()); - lit = _mm_shuffle_epi32(lit, _MM_SHUFFLE(1, 0, 1, 0)); - - for (int i = 0; i != num_lights; i++) - { - __m128 m256 = _mm_set1_ps(256.0f); - __m128 mSignBit = _mm_set1_ps(-0.0f); - - __m128 lightpos = _mm_loadu_ps(&lights[i].x); - __m128 light_radius = _mm_load_ss(&lights[i].radius); - - __m128 is_attenuated = _mm_cmpge_ss(light_radius, _mm_setzero_ps()); - is_attenuated = _mm_shuffle_ps(is_attenuated, is_attenuated, _MM_SHUFFLE(0, 0, 0, 0)); - light_radius = _mm_andnot_ps(mSignBit, light_radius); - - // L = light-pos - // dist = sqrt(dot(L, L)) - // distance_attenuation = 1 - MIN(dist * (1/radius), 1) - __m128 L = _mm_sub_ps(lightpos, worldpos); - __m128 dist2 = _mm_mul_ps(L, L); - dist2 = _mm_add_ss(dist2, _mm_add_ss(_mm_shuffle_ps(dist2, dist2, _MM_SHUFFLE(0, 0, 0, 1)), _mm_shuffle_ps(dist2, dist2, _MM_SHUFFLE(0, 0, 0, 2)))); - __m128 rcp_dist = _mm_rsqrt_ss(dist2); - __m128 dist = _mm_mul_ss(dist2, rcp_dist); - __m128 distance_attenuation = _mm_sub_ss(m256, _mm_min_ss(_mm_mul_ss(dist, light_radius), m256)); - distance_attenuation = _mm_shuffle_ps(distance_attenuation, distance_attenuation, _MM_SHUFFLE(0, 0, 0, 0)); - - // The simple light type - __m128 simple_attenuation = distance_attenuation; - - // The point light type - // diffuse = max(dot(N,normalize(L)),0) * attenuation - __m128 dotNL = _mm_mul_ps(worldnormal, _mm_mul_ps(L, _mm_shuffle_ps(rcp_dist, rcp_dist, _MM_SHUFFLE(0, 0, 0, 0)))); - dotNL = _mm_add_ss(dotNL, _mm_add_ss(_mm_shuffle_ps(dotNL, dotNL, _MM_SHUFFLE(0, 0, 0, 1)), _mm_shuffle_ps(dotNL, dotNL, _MM_SHUFFLE(0, 0, 0, 2)))); - dotNL = _mm_max_ss(dotNL, _mm_setzero_ps()); - __m128 point_attenuation = _mm_mul_ss(dotNL, distance_attenuation); - point_attenuation = _mm_shuffle_ps(point_attenuation, point_attenuation, _MM_SHUFFLE(0, 0, 0, 0)); - - __m128i attenuation = _mm_cvtps_epi32(_mm_or_ps(_mm_and_ps(is_attenuated, simple_attenuation), _mm_andnot_ps(is_attenuated, point_attenuation))); - attenuation = _mm_packs_epi32(_mm_shuffle_epi32(attenuation, _MM_SHUFFLE(0, 0, 0, 0)), _mm_shuffle_epi32(attenuation, _MM_SHUFFLE(1, 1, 1, 1))); - - __m128i light_color = _mm_cvtsi32_si128(lights[i].color); - light_color = _mm_unpacklo_epi8(light_color, _mm_setzero_si128()); - light_color = _mm_shuffle_epi32(light_color, _MM_SHUFFLE(1, 0, 1, 0)); - - lit = _mm_add_epi16(lit, _mm_srli_epi16(_mm_mullo_epi16(light_color, attenuation), 8)); - } - - return _mm_min_epi16(lit, _mm_set1_epi16(256)); - } - - template - FORCEINLINE __m128i VECTORCALL Shade32(__m128i fgcolor, __m128i mlight, unsigned int ifgcolor0, unsigned int ifgcolor1, int desaturate, __m128i inv_desaturate, __m128i shade_fade, __m128i shade_light, __m128i dynlight) - { - __m128i material = fgcolor; - if (ShadeModeT::Mode == (int)ShadeMode::Simple) - { - fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); - } - else if (ShadeModeT::Mode == (int)ShadeMode::Advanced) - { - int blue0 = BPART(ifgcolor0); - int green0 = GPART(ifgcolor0); - int red0 = RPART(ifgcolor0); - int intensity0 = ((red0 * 77 + green0 * 143 + blue0 * 37) >> 8) * desaturate; - - int blue1 = BPART(ifgcolor1); - int green1 = GPART(ifgcolor1); - int red1 = RPART(ifgcolor1); - int intensity1 = ((red1 * 77 + green1 * 143 + blue1 * 37) >> 8) * desaturate; - - __m128i intensity = _mm_set_epi16(0, intensity1, intensity1, intensity1, 0, intensity0, intensity0, intensity0); - - fgcolor = _mm_srli_epi16(_mm_add_epi16(_mm_mullo_epi16(fgcolor, inv_desaturate), intensity), 8); - fgcolor = _mm_mullo_epi16(fgcolor, mlight); - fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); - fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); - } - - return AddLights(material, fgcolor, dynlight); - } - - template - FORCEINLINE __m128i VECTORCALL Blend32(__m128i fgcolor, __m128i bgcolor, unsigned int ifgcolor0, unsigned int ifgcolor1, unsigned int ifgshade0, unsigned int ifgshade1, uint32_t srcalpha, uint32_t destalpha) - { - if (BlendT::Mode == (int)BlendModes::Opaque) - { - __m128i outcolor = fgcolor; - outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); - outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); - return outcolor; - } - else if (BlendT::Mode == (int)BlendModes::Masked) - { - __m128i mask = _mm_cmpeq_epi32(_mm_packus_epi16(fgcolor, _mm_setzero_si128()), _mm_setzero_si128()); - mask = _mm_unpacklo_epi8(mask, _mm_setzero_si128()); - __m128i outcolor = _mm_or_si128(_mm_and_si128(mask, bgcolor), _mm_andnot_si128(mask, fgcolor)); - outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); - outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); - return outcolor; - } - else if (BlendT::Mode == (int)BlendModes::AddSrcColorOneMinusSrcColor) - { - __m128i inv_srccolor = _mm_sub_epi16(_mm_set1_epi16(256), _mm_add_epi16(fgcolor, _mm_srli_epi16(fgcolor, 7))); - __m128i outcolor = _mm_add_epi16(fgcolor, _mm_srli_epi16(_mm_mullo_epi16(bgcolor, inv_srccolor), 8)); - outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); - return outcolor; - } - else if (BlendT::Mode == (int)BlendModes::Shaded) - { - ifgshade0 = (ifgshade0 * srcalpha + 128) >> 8; - ifgshade1 = (ifgshade1 * srcalpha + 128) >> 8; - __m128i alpha = _mm_set_epi16(ifgshade1, ifgshade1, ifgshade1, ifgshade1, ifgshade0, ifgshade0, ifgshade0, ifgshade0); - __m128i inv_alpha = _mm_sub_epi16(_mm_set1_epi16(256), alpha); - - fgcolor = _mm_mullo_epi16(fgcolor, alpha); - bgcolor = _mm_mullo_epi16(bgcolor, inv_alpha); - __m128i outcolor = _mm_srli_epi16(_mm_add_epi16(fgcolor, bgcolor), 8); - outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); - outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); - return outcolor; - } - else if (BlendT::Mode == (int)BlendModes::AddClampShaded) - { - ifgshade0 = (ifgshade0 * srcalpha + 128) >> 8; - ifgshade1 = (ifgshade1 * srcalpha + 128) >> 8; - __m128i alpha = _mm_set_epi16(ifgshade1, ifgshade1, ifgshade1, ifgshade1, ifgshade0, ifgshade0, ifgshade0, ifgshade0); - - fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, alpha), 8); - __m128i outcolor = _mm_add_epi16(fgcolor, bgcolor); - outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); - outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); - return outcolor; - } - else - { - uint32_t alpha0 = APART(ifgcolor0); - uint32_t alpha1 = APART(ifgcolor1); - alpha0 += alpha0 >> 7; // 255->256 - alpha1 += alpha1 >> 7; // 255->256 - uint32_t inv_alpha0 = 256 - alpha0; - uint32_t inv_alpha1 = 256 - alpha1; - - uint32_t bgalpha0 = (destalpha * alpha0 + (inv_alpha0 << 8) + 128) >> 8; - uint32_t bgalpha1 = (destalpha * alpha1 + (inv_alpha1 << 8) + 128) >> 8; - uint32_t fgalpha0 = (srcalpha * alpha0 + 128) >> 8; - uint32_t fgalpha1 = (srcalpha * alpha1 + 128) >> 8; - - __m128i bgalpha = _mm_set_epi16(bgalpha1, bgalpha1, bgalpha1, bgalpha1, bgalpha0, bgalpha0, bgalpha0, bgalpha0); - __m128i fgalpha = _mm_set_epi16(fgalpha1, fgalpha1, fgalpha1, fgalpha1, fgalpha0, fgalpha0, fgalpha0, fgalpha0); - - fgcolor = _mm_mullo_epi16(fgcolor, fgalpha); - bgcolor = _mm_mullo_epi16(bgcolor, bgalpha); - - __m128i fg_lo = _mm_unpacklo_epi16(fgcolor, _mm_setzero_si128()); - __m128i bg_lo = _mm_unpacklo_epi16(bgcolor, _mm_setzero_si128()); - __m128i fg_hi = _mm_unpackhi_epi16(fgcolor, _mm_setzero_si128()); - __m128i bg_hi = _mm_unpackhi_epi16(bgcolor, _mm_setzero_si128()); - - __m128i out_lo, out_hi; - if (BlendT::Mode == (int)BlendModes::AddClamp) - { - out_lo = _mm_add_epi32(fg_lo, bg_lo); - out_hi = _mm_add_epi32(fg_hi, bg_hi); - } - else if (BlendT::Mode == (int)BlendModes::SubClamp) - { - out_lo = _mm_sub_epi32(fg_lo, bg_lo); - out_hi = _mm_sub_epi32(fg_hi, bg_hi); - } - else if (BlendT::Mode == (int)BlendModes::RevSubClamp) - { - out_lo = _mm_sub_epi32(bg_lo, fg_lo); - out_hi = _mm_sub_epi32(bg_hi, fg_hi); - } - - out_lo = _mm_srai_epi32(out_lo, 8); - out_hi = _mm_srai_epi32(out_hi, 8); - __m128i outcolor = _mm_packs_epi32(out_lo, out_hi); - outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); - outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); - return outcolor; - } - } -} - -template -class RectScreenDrawer32 -{ -public: - static void Execute(const void *destOrg, int destWidth, int destHeight, int destPitch, const RectDrawArgs *args, PolyTriangleThreadData *thread) - { - using namespace TriScreenDrawerModes; - - if (args->SimpleShade()) - { - Loop(destOrg, destWidth, destHeight, destPitch, args, thread); - } - else - { - Loop(destOrg, destWidth, destHeight, destPitch, args, thread); - } - } - -private: - template - FORCEINLINE static void VECTORCALL Loop(const void *destOrg, int destWidth, int destHeight, int destPitch, const RectDrawArgs *args, PolyTriangleThreadData *thread) - { - using namespace TriScreenDrawerModes; - - int x0 = clamp((int)(args->X0() + 0.5f), 0, destWidth); - int x1 = clamp((int)(args->X1() + 0.5f), 0, destWidth); - int y0 = clamp((int)(args->Y0() + 0.5f), 0, destHeight); - int y1 = clamp((int)(args->Y1() + 0.5f), 0, destHeight); - - if (x1 <= x0 || y1 <= y0) - return; - - uint32_t srcalpha = args->SrcAlpha(); - uint32_t destalpha = args->DestAlpha(); - - // Setup step variables - float fstepU = (args->U1() - args->U0()) / (args->X1() - args->X0()); - float fstepV = (args->V1() - args->V0()) / (args->Y1() - args->Y0()); - uint32_t startU = (int32_t)((args->U0() + (x0 + 0.5f - args->X0()) * fstepU) * 0x1000000); - uint32_t startV = (int32_t)((args->V0() + (y0 + 0.5f - args->Y0()) * fstepV) * 0x1000000); - uint32_t stepU = (int32_t)(fstepU * 0x1000000); - uint32_t stepV = (int32_t)(fstepV * 0x1000000); - - // Sampling stuff - uint32_t color = args->Color(); - const uint32_t * RESTRICT translation = (const uint32_t *)args->Translation(); - const uint32_t * RESTRICT texPixels = (const uint32_t *)args->TexturePixels(); - uint32_t texWidth = args->TextureWidth(); - uint32_t texHeight = args->TextureHeight(); - uint32_t oneU, oneV; - if (SamplerT::Mode != (int)Samplers::Fill) - { - oneU = ((0x800000 + texWidth - 1) / texWidth) * 2 + 1; - oneV = ((0x800000 + texHeight - 1) / texHeight) * 2 + 1; - } - else - { - oneU = 0; - oneV = 0; - } - - // Shade constants - __m128i inv_desaturate, shade_fade, shade_light; - int desaturate; - if (ShadeModeT::Mode == (int)ShadeMode::Advanced) - { - inv_desaturate = _mm_setr_epi16(256, 256 - args->ShadeDesaturate(), 256 - args->ShadeDesaturate(), 256 - args->ShadeDesaturate(), 256, 256 - args->ShadeDesaturate(), 256 - args->ShadeDesaturate(), 256 - args->ShadeDesaturate()); - shade_fade = _mm_set_epi16(args->ShadeFadeAlpha(), args->ShadeFadeRed(), args->ShadeFadeGreen(), args->ShadeFadeBlue(), args->ShadeFadeAlpha(), args->ShadeFadeRed(), args->ShadeFadeGreen(), args->ShadeFadeBlue()); - shade_light = _mm_set_epi16(args->ShadeLightAlpha(), args->ShadeLightRed(), args->ShadeLightGreen(), args->ShadeLightBlue(), args->ShadeLightAlpha(), args->ShadeLightRed(), args->ShadeLightGreen(), args->ShadeLightBlue()); - desaturate = args->ShadeDesaturate(); - } - else - { - inv_desaturate = _mm_setzero_si128(); - shade_fade = _mm_setzero_si128(); - shade_light = _mm_setzero_si128(); - desaturate = 0; - } - - // Setup light - uint32_t lightpos = args->Light(); - lightpos += lightpos >> 7; // 255 -> 256 - __m128i mlight = _mm_set_epi16(256, lightpos, lightpos, lightpos, 256, lightpos, lightpos, lightpos); - __m128i shade_fade_lit; - if (ShadeModeT::Mode == (int)ShadeMode::Advanced) - { - __m128i inv_light = _mm_sub_epi16(_mm_set_epi16(0, 256, 256, 256, 0, 256, 256, 256), mlight); - shade_fade_lit = _mm_mullo_epi16(shade_fade, inv_light); - } - else - { - shade_fade_lit = _mm_setzero_si128(); - } - - int count = x1 - x0; - int sseCount = count / 2; - - uint32_t posV = startV; - for (int y = y0; y < y1; y++, posV += stepV) - { - int coreBlock = y / 8; - if (coreBlock % thread->num_cores != thread->core) - { - continue; - } - - uint32_t *dest = ((uint32_t*)destOrg) + y * destPitch + x0; - - uint32_t posU = startU; - for (int i = 0; i < sseCount; i++) - { - // Load bgcolor - __m128i bgcolor; - if (BlendT::Mode != (int)BlendModes::Opaque) - bgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)dest), _mm_setzero_si128()); - else - bgcolor = _mm_setzero_si128(); - - // Sample fgcolor - unsigned int ifgcolor[2], ifgshade[2]; - if (SamplerT::Mode == (int)Samplers::FogBoundary) color = dest[0]; - ifgcolor[0] = Sample32(posU, posV, texPixels, texWidth, texHeight, oneU, oneV, color, translation); - ifgshade[0] = SampleShade32(posU, posV, texPixels, texWidth, texHeight, x0 + i * 2, y); - posU += stepU; - - if (SamplerT::Mode == (int)Samplers::FogBoundary) color = dest[1]; - ifgcolor[1] = Sample32(posU, posV, texPixels, texWidth, texHeight, oneU, oneV, color, translation); - ifgshade[1] = SampleShade32(posU, posV, texPixels, texWidth, texHeight, x0 + i * 2 + 1, y); - posU += stepU; - - // Shade and blend - __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); - fgcolor = Shade32(fgcolor, mlight, ifgcolor[0], ifgcolor[1], desaturate, inv_desaturate, shade_fade_lit, shade_light, _mm_setzero_si128()); - __m128i outcolor = Blend32(fgcolor, bgcolor, ifgcolor[0], ifgcolor[1], ifgshade[0], ifgshade[1], srcalpha, destalpha); - - // Store result - _mm_storel_epi64((__m128i*)dest, outcolor); - dest += 2; - } - - if (sseCount * 2 != count) - { - // Load bgcolor - __m128i bgcolor; - if (BlendT::Mode != (int)BlendModes::Opaque) - bgcolor = _mm_unpacklo_epi8(_mm_cvtsi32_si128(*dest), _mm_setzero_si128()); - else - bgcolor = _mm_setzero_si128(); - - // Sample fgcolor - unsigned int ifgcolor[2], ifgshade[2]; - if (SamplerT::Mode == (int)Samplers::FogBoundary) color = *dest; - ifgcolor[0] = Sample32(posU, posV, texPixels, texWidth, texHeight, oneU, oneV, color, translation); - ifgshade[0] = SampleShade32(posU, posV, texPixels, texWidth, texHeight, x0 + sseCount * 2, y); - ifgcolor[1] = 0; - ifgshade[1] = 0; - posU += stepU; - - // Shade and blend - __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); - fgcolor = Shade32(fgcolor, mlight, ifgcolor[0], ifgcolor[1], desaturate, inv_desaturate, shade_fade_lit, shade_light, _mm_setzero_si128()); - __m128i outcolor = Blend32(fgcolor, bgcolor, ifgcolor[0], ifgcolor[1], ifgshade[0], ifgshade[1], srcalpha, destalpha); - - // Store result - *dest = _mm_cvtsi128_si32(outcolor); - } - } - } -}; diff --git a/src/polyrenderer/drawers/poly_drawer8.h b/src/polyrenderer/drawers/poly_drawer8.h deleted file mode 100644 index 1db272885..000000000 --- a/src/polyrenderer/drawers/poly_drawer8.h +++ /dev/null @@ -1,295 +0,0 @@ -/* -** Polygon Doom software renderer -** Copyright (c) 2016 Magnus Norddahl -** -** This software is provided 'as-is', without any express or implied -** warranty. In no event will the authors be held liable for any damages -** arising from the use of this software. -** -** Permission is granted to anyone to use this software for any purpose, -** including commercial applications, and to alter it and redistribute it -** freely, subject to the following restrictions: -** -** 1. The origin of this software must not be misrepresented; you must not -** claim that you wrote the original software. If you use this software -** in a product, an acknowledgment in the product documentation would be -** appreciated but is not required. -** 2. Altered source versions must be plainly marked as such, and must not be -** misrepresented as being the original software. -** 3. This notice may not be removed or altered from any source distribution. -** -*/ - -#pragma once - -#include "screen_triangle.h" - -namespace TriScreenDrawerModes -{ - template - FORCEINLINE unsigned int Sample8(int32_t u, int32_t v, const uint8_t *texPixels, int texWidth, int texHeight, uint32_t color, const uint8_t *translation) - { - uint8_t texel; - if (SamplerT::Mode == (int)Samplers::Shaded || SamplerT::Mode == (int)Samplers::Stencil || SamplerT::Mode == (int)Samplers::Fill || SamplerT::Mode == (int)Samplers::Fuzz || SamplerT::Mode == (int)Samplers::FogBoundary) - { - return color; - } - else if (SamplerT::Mode == (int)Samplers::Translated) - { - uint32_t texelX = ((((uint32_t)u << 8) >> 16) * texWidth) >> 16; - uint32_t texelY = ((((uint32_t)v << 8) >> 16) * texHeight) >> 16; - return translation[texPixels[texelX * texHeight + texelY]]; - } - else - { - uint32_t texelX = ((((uint32_t)u << 8) >> 16) * texWidth) >> 16; - uint32_t texelY = ((((uint32_t)v << 8) >> 16) * texHeight) >> 16; - texel = texPixels[texelX * texHeight + texelY]; - } - - if (SamplerT::Mode == (int)Samplers::Skycap) - { - int start_fade = 2; // How fast it should fade out - - int alpha_top = clamp(v >> (16 - start_fade), 0, 256); - int alpha_bottom = clamp(((2 << 24) - v) >> (16 - start_fade), 0, 256); - int a = MIN(alpha_top, alpha_bottom); - int inv_a = 256 - a; - - if (a == 256) - return texel; - - uint32_t capcolor = GPalette.BaseColors[color].d; - uint32_t texelrgb = GPalette.BaseColors[texel].d; - uint32_t r = RPART(texelrgb); - uint32_t g = GPART(texelrgb); - uint32_t b = BPART(texelrgb); - uint32_t capcolor_red = RPART(capcolor); - uint32_t capcolor_green = GPART(capcolor); - uint32_t capcolor_blue = BPART(capcolor); - r = (r * a + capcolor_red * inv_a + 127) >> 8; - g = (g * a + capcolor_green * inv_a + 127) >> 8; - b = (b * a + capcolor_blue * inv_a + 127) >> 8; - return RGB256k.All[((r >> 2) << 12) | ((g >> 2) << 6) | (b >> 2)]; - } - else - { - return texel; - } - } - - template - FORCEINLINE unsigned int SampleShade8(int32_t u, int32_t v, const uint8_t *texPixels, int texWidth, int texHeight, int x, int y) - { - if (SamplerT::Mode == (int)Samplers::Shaded) - { - uint32_t texelX = ((((uint32_t)u << 8) >> 16) * texWidth) >> 16; - uint32_t texelY = ((((uint32_t)v << 8) >> 16) * texHeight) >> 16; - unsigned int sampleshadeout = texPixels[texelX * texHeight + texelY]; - sampleshadeout += sampleshadeout >> 7; // 255 -> 256 - return sampleshadeout; - } - else if (SamplerT::Mode == (int)Samplers::Stencil) - { - uint32_t texelX = ((((uint32_t)u << 8) >> 16) * texWidth) >> 16; - uint32_t texelY = ((((uint32_t)v << 8) >> 16) * texHeight) >> 16; - return texPixels[texelX * texHeight + texelY] != 0 ? 256 : 0; - } - else if (SamplerT::Mode == (int)Samplers::Fuzz) - { - using namespace swrenderer; - - uint32_t texelX = ((((uint32_t)u << 8) >> 16) * texWidth) >> 16; - uint32_t texelY = ((((uint32_t)v << 8) >> 16) * texHeight) >> 16; - unsigned int sampleshadeout = (texPixels[texelX * texHeight + texelY] != 0) ? 256 : 0; - - fixed_t fuzzscale = (200 << FRACBITS) / viewheight; - - int scaled_x = (x * fuzzscale) >> FRACBITS; - int fuzz_x = fuzz_random_x_offset[scaled_x % FUZZ_RANDOM_X_SIZE] + fuzzpos; - - fixed_t fuzzcount = FUZZTABLE << FRACBITS; - fixed_t fuzz = ((fuzz_x << FRACBITS) + y * fuzzscale) % fuzzcount; - unsigned int alpha = fuzzoffset[fuzz >> FRACBITS]; - - sampleshadeout = (sampleshadeout * alpha) >> 5; - return sampleshadeout; - } - else - { - return 0; - } - } - - template - FORCEINLINE uint8_t ShadeAndBlend8(uint8_t fgcolor, uint8_t bgcolor, uint32_t fgshade, uint32_t lightshade, const uint8_t *colormaps, uint32_t srcalpha, uint32_t destalpha) - { - lightshade = ((256 - lightshade) * NUMCOLORMAPS) & 0xffffff00; - uint8_t shadedfg = colormaps[lightshade + fgcolor]; - - if (BlendT::Mode == (int)BlendModes::Opaque) - { - return shadedfg; - } - else if (BlendT::Mode == (int)BlendModes::Masked) - { - return (fgcolor != 0) ? shadedfg : bgcolor; - } - else if (BlendT::Mode == (int)BlendModes::AddSrcColorOneMinusSrcColor) - { - int32_t fg_r = GPalette.BaseColors[shadedfg].r; - int32_t fg_g = GPalette.BaseColors[shadedfg].g; - int32_t fg_b = GPalette.BaseColors[shadedfg].b; - int32_t bg_r = GPalette.BaseColors[bgcolor].r; - int32_t bg_g = GPalette.BaseColors[bgcolor].g; - int32_t bg_b = GPalette.BaseColors[bgcolor].b; - int32_t inv_fg_r = 256 - (fg_r + (fg_r >> 7)); - int32_t inv_fg_g = 256 - (fg_g + (fg_g >> 7)); - int32_t inv_fg_b = 256 - (fg_b + (fg_b >> 7)); - fg_r = MIN(fg_r + ((bg_r * inv_fg_r + 127) >> 8), 255); - fg_g = MIN(fg_g + ((bg_g * inv_fg_g + 127) >> 8), 255); - fg_b = MIN(fg_b + ((bg_b * inv_fg_b + 127) >> 8), 255); - - shadedfg = RGB256k.All[((fg_r >> 2) << 12) | ((fg_g >> 2) << 6) | (fg_b >> 2)]; - return (fgcolor != 0) ? shadedfg : bgcolor; - } - else if (BlendT::Mode == (int)BlendModes::Shaded) - { - fgshade = (fgshade * srcalpha + 128) >> 8; - uint32_t alpha = fgshade; - uint32_t inv_alpha = 256 - fgshade; - int32_t fg_r = GPalette.BaseColors[shadedfg].r; - int32_t fg_g = GPalette.BaseColors[shadedfg].g; - int32_t fg_b = GPalette.BaseColors[shadedfg].b; - int32_t bg_r = GPalette.BaseColors[bgcolor].r; - int32_t bg_g = GPalette.BaseColors[bgcolor].g; - int32_t bg_b = GPalette.BaseColors[bgcolor].b; - - fg_r = (fg_r * alpha + bg_r * inv_alpha + 127) >> 8; - fg_g = (fg_g * alpha + bg_g * inv_alpha + 127) >> 8; - fg_b = (fg_b * alpha + bg_b * inv_alpha + 127) >> 8; - - shadedfg = RGB256k.All[((fg_r >> 2) << 12) | ((fg_g >> 2) << 6) | (fg_b >> 2)]; - return (alpha != 0) ? shadedfg : bgcolor; - } - else if (BlendT::Mode == (int)BlendModes::AddClampShaded) - { - fgshade = (fgshade * srcalpha + 128) >> 8; - uint32_t alpha = fgshade; - int32_t fg_r = GPalette.BaseColors[shadedfg].r; - int32_t fg_g = GPalette.BaseColors[shadedfg].g; - int32_t fg_b = GPalette.BaseColors[shadedfg].b; - int32_t bg_r = GPalette.BaseColors[bgcolor].r; - int32_t bg_g = GPalette.BaseColors[bgcolor].g; - int32_t bg_b = GPalette.BaseColors[bgcolor].b; - - fg_r = MIN(bg_r + ((fg_r * alpha + 127) >> 8), 255); - fg_g = MIN(bg_g + ((fg_g * alpha + 127) >> 8), 255); - fg_b = MIN(bg_b + ((fg_b * alpha + 127) >> 8), 255); - - shadedfg = RGB256k.All[((fg_r >> 2) << 12) | ((fg_g >> 2) << 6) | (fg_b >> 2)]; - - return (alpha != 0) ? shadedfg : bgcolor; - } - else - { - int32_t fg_r = GPalette.BaseColors[shadedfg].r; - int32_t fg_g = GPalette.BaseColors[shadedfg].g; - int32_t fg_b = GPalette.BaseColors[shadedfg].b; - int32_t bg_r = GPalette.BaseColors[bgcolor].r; - int32_t bg_g = GPalette.BaseColors[bgcolor].g; - int32_t bg_b = GPalette.BaseColors[bgcolor].b; - - if (BlendT::Mode == (int)BlendModes::AddClamp) - { - fg_r = MIN(int32_t(fg_r * srcalpha + bg_r * destalpha + 127) >> 8, 255); - fg_g = MIN(int32_t(fg_g * srcalpha + bg_g * destalpha + 127) >> 8, 255); - fg_b = MIN(int32_t(fg_b * srcalpha + bg_b * destalpha + 127) >> 8, 255); - } - else if (BlendT::Mode == (int)BlendModes::SubClamp) - { - fg_r = MAX(int32_t(fg_r * srcalpha - bg_r * destalpha + 127) >> 8, 0); - fg_g = MAX(int32_t(fg_g * srcalpha - bg_g * destalpha + 127) >> 8, 0); - fg_b = MAX(int32_t(fg_b * srcalpha - bg_b * destalpha + 127) >> 8, 0); - } - else if (BlendT::Mode == (int)BlendModes::RevSubClamp) - { - fg_r = MAX(int32_t(bg_r * srcalpha - fg_r * destalpha + 127) >> 8, 0); - fg_g = MAX(int32_t(bg_g * srcalpha - fg_g * destalpha + 127) >> 8, 0); - fg_b = MAX(int32_t(bg_b * srcalpha - fg_b * destalpha + 127) >> 8, 0); - } - - shadedfg = RGB256k.All[((fg_r >> 2) << 12) | ((fg_g >> 2) << 6) | (fg_b >> 2)]; - return (fgcolor != 0) ? shadedfg : bgcolor; - } - } -} - -template -class RectScreenDrawer8 -{ -public: - static void Execute(const void *destOrg, int destWidth, int destHeight, int destPitch, const RectDrawArgs *args, PolyTriangleThreadData *thread) - { - using namespace TriScreenDrawerModes; - - int x0 = clamp((int)(args->X0() + 0.5f), 0, destWidth); - int x1 = clamp((int)(args->X1() + 0.5f), 0, destWidth); - int y0 = clamp((int)(args->Y0() + 0.5f), 0, destHeight); - int y1 = clamp((int)(args->Y1() + 0.5f), 0, destHeight); - - if (x1 <= x0 || y1 <= y0) - return; - - auto colormaps = args->BaseColormap(); - uint32_t srcalpha = args->SrcAlpha(); - uint32_t destalpha = args->DestAlpha(); - - // Setup step variables - float fstepU = (args->U1() - args->U0()) / (args->X1() - args->X0()); - float fstepV = (args->V1() - args->V0()) / (args->Y1() - args->Y0()); - uint32_t startU = (int32_t)((args->U0() + (x0 + 0.5f - args->X0()) * fstepU) * 0x1000000); - uint32_t startV = (int32_t)((args->V0() + (y0 + 0.5f - args->Y0()) * fstepV) * 0x1000000); - uint32_t stepU = (int32_t)(fstepU * 0x1000000); - uint32_t stepV = (int32_t)(fstepV * 0x1000000); - - // Sampling stuff - uint32_t color = args->Color(); - const uint8_t * RESTRICT translation = args->Translation(); - const uint8_t * RESTRICT texPixels = args->TexturePixels(); - uint32_t texWidth = args->TextureWidth(); - uint32_t texHeight = args->TextureHeight(); - - // Setup light - uint32_t lightshade = args->Light(); - lightshade += lightshade >> 7; // 255 -> 256 - if (SamplerT::Mode == (int)Samplers::Fuzz) lightshade = 256; - - int count = x1 - x0; - - uint32_t posV = startV; - for (int y = y0; y < y1; y++, posV += stepV) - { - int coreBlock = y / 8; - if (coreBlock % thread->num_cores != thread->core) - { - continue; - } - - uint8_t *dest = ((uint8_t*)destOrg) + y * destPitch + x0; - - uint32_t posU = startU; - for (int i = 0; i < count; i++) - { - uint8_t bgcolor = *dest; - if (SamplerT::Mode == (int)Samplers::FogBoundary) color = bgcolor; - uint8_t fgcolor = Sample8(posU, posV, texPixels, texWidth, texHeight, color, translation); - uint32_t fgshade = SampleShade8(posU, posV, texPixels, texWidth, texHeight, x0 + i, y); - *dest = ShadeAndBlend8(fgcolor, bgcolor, fgshade, lightshade, colormaps, srcalpha, destalpha); - - posU += stepU; - dest++; - } - } - } -}; diff --git a/src/polyrenderer/drawers/poly_triangle.cpp b/src/polyrenderer/drawers/poly_triangle.cpp index 5661f5c83..992a43d76 100644 --- a/src/polyrenderer/drawers/poly_triangle.cpp +++ b/src/polyrenderer/drawers/poly_triangle.cpp @@ -52,7 +52,7 @@ bool PolyTriangleDrawer::IsBgra() return isBgraRenderTarget; } -void PolyTriangleDrawer::SetViewport(const DrawerCommandQueuePtr &queue, int x, int y, int width, int height, DCanvas *canvas, bool span_drawers) +void PolyTriangleDrawer::SetViewport(const DrawerCommandQueuePtr &queue, int x, int y, int width, int height, DCanvas *canvas) { uint8_t *dest = (uint8_t*)canvas->GetPixels(); int dest_width = canvas->GetWidth(); @@ -74,7 +74,7 @@ void PolyTriangleDrawer::SetViewport(const DrawerCommandQueuePtr &queue, int x, dest_width = clamp(viewport_x + viewport_width, 0, dest_width - offsetx); dest_height = clamp(viewport_y + viewport_height, 0, dest_height - offsety); - queue->Push(viewport_x, viewport_y, viewport_width, viewport_height, dest, dest_width, dest_height, dest_pitch, dest_bgra, span_drawers); + queue->Push(viewport_x, viewport_y, viewport_width, viewport_height, dest, dest_width, dest_height, dest_pitch, dest_bgra); } void PolyTriangleDrawer::SetTransform(const DrawerCommandQueuePtr &queue, const Mat4f *objectToClip) @@ -99,7 +99,7 @@ void PolyTriangleDrawer::SetWeaponScene(const DrawerCommandQueuePtr &queue, bool ///////////////////////////////////////////////////////////////////////////// -void PolyTriangleThreadData::SetViewport(int x, int y, int width, int height, uint8_t *new_dest, int new_dest_width, int new_dest_height, int new_dest_pitch, bool new_dest_bgra, bool new_span_drawers) +void PolyTriangleThreadData::SetViewport(int x, int y, int width, int height, uint8_t *new_dest, int new_dest_width, int new_dest_height, int new_dest_pitch, bool new_dest_bgra) { viewport_x = x; viewport_y = y; @@ -110,7 +110,6 @@ void PolyTriangleThreadData::SetViewport(int x, int y, int width, int height, ui dest_height = new_dest_height; dest_pitch = new_dest_pitch; dest_bgra = new_dest_bgra; - span_drawers = new_span_drawers; ccw = true; weaponScene = false; } @@ -132,9 +131,7 @@ void PolyTriangleThreadData::DrawElements(const PolyDrawArgs &drawargs) args.clipbottom = dest_height; args.uniforms = &drawargs; args.destBgra = dest_bgra; - args.stencilPitch = PolyStencilBuffer::Instance()->BlockWidth(); - args.stencilValues = PolyStencilBuffer::Instance()->Values(); - args.stencilMasks = PolyStencilBuffer::Instance()->Masks(); + args.stencilbuffer = PolyStencilBuffer::Instance()->Values(); args.zbuffer = PolyZBuffer::Instance()->Values(); args.depthOffset = weaponScene ? 1.0f : 0.0f; @@ -191,9 +188,7 @@ void PolyTriangleThreadData::DrawArrays(const PolyDrawArgs &drawargs) args.clipbottom = dest_height; args.uniforms = &drawargs; args.destBgra = dest_bgra; - args.stencilPitch = PolyStencilBuffer::Instance()->BlockWidth(); - args.stencilValues = PolyStencilBuffer::Instance()->Values(); - args.stencilMasks = PolyStencilBuffer::Instance()->Masks(); + args.stencilbuffer = PolyStencilBuffer::Instance()->Values(); args.zbuffer = PolyZBuffer::Instance()->Values(); args.depthOffset = weaponScene ? 1.0f : 0.0f; @@ -379,10 +374,7 @@ void PolyTriangleThreadData::DrawShadedTriangle(const ShadedTriVertex *vert, boo args->v3 = &clippedvert[i - 2]; if (IsFrontfacing(args) == ccw && args->CalculateGradients()) { - if (!span_drawers) - ScreenTriangle::Draw(args, this); - else - ScreenTriangle::DrawSWRender(args, this); + ScreenTriangle::Draw(args, this); } } } @@ -395,10 +387,7 @@ void PolyTriangleThreadData::DrawShadedTriangle(const ShadedTriVertex *vert, boo args->v3 = &clippedvert[i]; if (IsFrontfacing(args) != ccw && args->CalculateGradients()) { - if (!span_drawers) - ScreenTriangle::Draw(args, this); - else - ScreenTriangle::DrawSWRender(args, this); + ScreenTriangle::Draw(args, this); } } } @@ -631,14 +620,14 @@ void PolySetWeaponSceneCommand::Execute(DrawerThread *thread) ///////////////////////////////////////////////////////////////////////////// -PolySetViewportCommand::PolySetViewportCommand(int x, int y, int width, int height, uint8_t *dest, int dest_width, int dest_height, int dest_pitch, bool dest_bgra, bool span_drawers) - : x(x), y(y), width(width), height(height), dest(dest), dest_width(dest_width), dest_height(dest_height), dest_pitch(dest_pitch), dest_bgra(dest_bgra), span_drawers(span_drawers) +PolySetViewportCommand::PolySetViewportCommand(int x, int y, int width, int height, uint8_t *dest, int dest_width, int dest_height, int dest_pitch, bool dest_bgra) + : x(x), y(y), width(width), height(height), dest(dest), dest_width(dest_width), dest_height(dest_height), dest_pitch(dest_pitch), dest_bgra(dest_bgra) { } void PolySetViewportCommand::Execute(DrawerThread *thread) { - PolyTriangleThreadData::Get(thread)->SetViewport(x, y, width, height, dest, dest_width, dest_height, dest_pitch, dest_bgra, span_drawers); + PolyTriangleThreadData::Get(thread)->SetViewport(x, y, width, height, dest, dest_width, dest_height, dest_pitch, dest_bgra); } ///////////////////////////////////////////////////////////////////////////// diff --git a/src/polyrenderer/drawers/poly_triangle.h b/src/polyrenderer/drawers/poly_triangle.h index b10888455..c1e4871db 100644 --- a/src/polyrenderer/drawers/poly_triangle.h +++ b/src/polyrenderer/drawers/poly_triangle.h @@ -33,7 +33,7 @@ class PolyTriangleDrawer { public: static void ClearBuffers(DCanvas *canvas); - static void SetViewport(const DrawerCommandQueuePtr &queue, int x, int y, int width, int height, DCanvas *canvas, bool span_drawers); + static void SetViewport(const DrawerCommandQueuePtr &queue, int x, int y, int width, int height, DCanvas *canvas); static void SetCullCCW(const DrawerCommandQueuePtr &queue, bool ccw); static void SetTwoSided(const DrawerCommandQueuePtr &queue, bool twosided); static void SetWeaponScene(const DrawerCommandQueuePtr &queue, bool enable); @@ -47,7 +47,7 @@ class PolyTriangleThreadData public: PolyTriangleThreadData(int32_t core, int32_t num_cores) : core(core), num_cores(num_cores) { } - void SetViewport(int x, int y, int width, int height, uint8_t *dest, int dest_width, int dest_height, int dest_pitch, bool dest_bgra, bool span_drawers); + void SetViewport(int x, int y, int width, int height, uint8_t *dest, int dest_width, int dest_height, int dest_pitch, bool dest_bgra); void SetTransform(const Mat4f *objectToClip); void SetCullCCW(bool value) { ccw = value; } void SetTwoSided(bool value) { twosided = value; } @@ -88,7 +88,6 @@ private: bool twosided = false; bool weaponScene = false; const Mat4f *objectToClip = nullptr; - bool span_drawers = false; enum { max_additional_vertices = 16 }; }; @@ -144,7 +143,7 @@ private: class PolySetViewportCommand : public DrawerCommand { public: - PolySetViewportCommand(int x, int y, int width, int height, uint8_t *dest, int dest_width, int dest_height, int dest_pitch, bool dest_bgra, bool span_drawers); + PolySetViewportCommand(int x, int y, int width, int height, uint8_t *dest, int dest_width, int dest_height, int dest_pitch, bool dest_bgra); void Execute(DrawerThread *thread) override; FString DebugInfo() override { return "PolySetViewport"; } @@ -159,7 +158,6 @@ private: int dest_height; int dest_pitch; bool dest_bgra; - bool span_drawers; }; class DrawPolyTrianglesCommand : public DrawerCommand diff --git a/src/polyrenderer/drawers/screen_triangle.cpp b/src/polyrenderer/drawers/screen_triangle.cpp index a8a4fcd54..4614dec64 100644 --- a/src/polyrenderer/drawers/screen_triangle.cpp +++ b/src/polyrenderer/drawers/screen_triangle.cpp @@ -36,1099 +36,8 @@ #include "poly_triangle.h" #include "swrenderer/drawers/r_draw_rgba.h" #include "screen_triangle.h" -#ifndef NO_SSE -#include "poly_drawer32_sse2.h" -#else -#include "poly_drawer32.h" -#endif -#include "poly_drawer8.h" #include "x86.h" -class TriangleBlock -{ -public: - TriangleBlock(const TriDrawTriangleArgs *args, PolyTriangleThreadData *thread); - void Render(); - -private: - void RenderSubdivide(int x0, int y0, int x1, int y1); - - enum class CoverageModes { Full, Partial }; - struct CoverageFull { static const int Mode = (int)CoverageModes::Full; }; - struct CoveragePartial { static const int Mode = (int)CoverageModes::Partial; }; - - template - void RenderBlock(int x0, int y0, int x1, int y1); - - const TriDrawTriangleArgs *args; - PolyTriangleThreadData *thread; - - // Block size, standard 8x8 (must be power of two) - static const int q = 8; - - // Deltas - int DX12, DX23, DX31; - int DY12, DY23, DY31; - - // Fixed-point deltas - int FDX12, FDX23, FDX31; - int FDY12, FDY23, FDY31; - - // Half-edge constants - int C1, C2, C3; - - // Stencil buffer - int stencilPitch; - uint8_t * RESTRICT stencilValues; - uint32_t * RESTRICT stencilMasks; - uint8_t stencilTestValue; - uint32_t stencilWriteValue; - - // Viewport clipping - int clipright; - int clipbottom; - - // Depth buffer - float * RESTRICT zbuffer; - int32_t zbufferPitch; - - // Triangle bounding block - int minx, miny; - int maxx, maxy; - - // Active block - int X, Y; - uint32_t Mask0, Mask1; - -#ifndef NO_SSE - __m128i mFDY12Offset; - __m128i mFDY23Offset; - __m128i mFDY31Offset; - __m128i mFDY12x4; - __m128i mFDY23x4; - __m128i mFDY31x4; - __m128i mFDX12; - __m128i mFDX23; - __m128i mFDX31; - __m128i mC1; - __m128i mC2; - __m128i mC3; - __m128i mDX12; - __m128i mDY12; - __m128i mDX23; - __m128i mDY23; - __m128i mDX31; - __m128i mDY31; -#endif - - enum class CoverageResult - { - full, - partial, - none - }; - CoverageResult AreaCoverageTest(int x0, int y0, int x1, int y1); - - void CoverageTest(); - void StencilEqualTest(); - void StencilGreaterEqualTest(); - void DepthTest(const TriDrawTriangleArgs *args); - void ClipTest(); - void StencilWrite(); - void DepthWrite(const TriDrawTriangleArgs *args); -}; - -TriangleBlock::TriangleBlock(const TriDrawTriangleArgs *args, PolyTriangleThreadData *thread) : args(args), thread(thread) -{ - const ShadedTriVertex &v1 = *args->v1; - const ShadedTriVertex &v2 = *args->v2; - const ShadedTriVertex &v3 = *args->v3; - - clipright = args->clipright; - clipbottom = args->clipbottom; - - stencilPitch = args->stencilPitch; - stencilValues = args->stencilValues; - stencilMasks = args->stencilMasks; - stencilTestValue = args->uniforms->StencilTestValue(); - stencilWriteValue = args->uniforms->StencilWriteValue(); - - zbuffer = args->zbuffer; - zbufferPitch = args->stencilPitch; - - // 28.4 fixed-point coordinates -#ifdef NO_SSE - const int Y1 = (int)round(16.0f * v1.y); - const int Y2 = (int)round(16.0f * v2.y); - const int Y3 = (int)round(16.0f * v3.y); - - const int X1 = (int)round(16.0f * v1.x); - const int X2 = (int)round(16.0f * v2.x); - const int X3 = (int)round(16.0f * v3.x); -#else - int tempround[4 * 3]; - __m128 m16 = _mm_set1_ps(16.0f); - __m128 mhalf = _mm_set1_ps(65536.5f); - __m128i m65536 = _mm_set1_epi32(65536); - _mm_storeu_si128((__m128i*)tempround, _mm_sub_epi32(_mm_cvtps_epi32(_mm_add_ps(_mm_mul_ps(_mm_loadu_ps((const float*)&v1), m16), mhalf)), m65536)); - _mm_storeu_si128((__m128i*)(tempround + 4), _mm_sub_epi32(_mm_cvtps_epi32(_mm_add_ps(_mm_mul_ps(_mm_loadu_ps((const float*)&v2), m16), mhalf)), m65536)); - _mm_storeu_si128((__m128i*)(tempround + 8), _mm_sub_epi32(_mm_cvtps_epi32(_mm_add_ps(_mm_mul_ps(_mm_loadu_ps((const float*)&v3), m16), mhalf)), m65536)); - const int X1 = tempround[0]; - const int X2 = tempround[4]; - const int X3 = tempround[8]; - const int Y1 = tempround[1]; - const int Y2 = tempround[5]; - const int Y3 = tempround[9]; -#endif - - // Deltas - DX12 = X1 - X2; - DX23 = X2 - X3; - DX31 = X3 - X1; - - DY12 = Y1 - Y2; - DY23 = Y2 - Y3; - DY31 = Y3 - Y1; - - // Fixed-point deltas - FDX12 = DX12 << 4; - FDX23 = DX23 << 4; - FDX31 = DX31 << 4; - - FDY12 = DY12 << 4; - FDY23 = DY23 << 4; - FDY31 = DY31 << 4; - - // Bounding rectangle - minx = MAX((MIN(MIN(X1, X2), X3) + 0xF) >> 4, 0); - maxx = MIN((MAX(MAX(X1, X2), X3) + 0xF) >> 4, clipright - 1); - miny = MAX((MIN(MIN(Y1, Y2), Y3) + 0xF) >> 4, 0); - maxy = MIN((MAX(MAX(Y1, Y2), Y3) + 0xF) >> 4, clipbottom - 1); - if (minx >= maxx || miny >= maxy) - { - return; - } - - // Start and end in corner of 8x8 block - minx &= ~(q - 1); - miny &= ~(q - 1); - maxx |= q - 1; - maxy |= q - 1; - - // Half-edge constants - C1 = DY12 * X1 - DX12 * Y1; - C2 = DY23 * X2 - DX23 * Y2; - C3 = DY31 * X3 - DX31 * Y3; - - // Correct for fill convention - if (DY12 < 0 || (DY12 == 0 && DX12 > 0)) C1++; - if (DY23 < 0 || (DY23 == 0 && DX23 > 0)) C2++; - if (DY31 < 0 || (DY31 == 0 && DX31 > 0)) C3++; - -#ifndef NO_SSE - mFDY12Offset = _mm_setr_epi32(0, FDY12, FDY12 * 2, FDY12 * 3); - mFDY23Offset = _mm_setr_epi32(0, FDY23, FDY23 * 2, FDY23 * 3); - mFDY31Offset = _mm_setr_epi32(0, FDY31, FDY31 * 2, FDY31 * 3); - mFDY12x4 = _mm_set1_epi32(FDY12 * 4); - mFDY23x4 = _mm_set1_epi32(FDY23 * 4); - mFDY31x4 = _mm_set1_epi32(FDY31 * 4); - mFDX12 = _mm_set1_epi32(FDX12); - mFDX23 = _mm_set1_epi32(FDX23); - mFDX31 = _mm_set1_epi32(FDX31); - mC1 = _mm_set1_epi32(C1); - mC2 = _mm_set1_epi32(C2); - mC3 = _mm_set1_epi32(C3); - mDX12 = _mm_set1_epi32(DX12); - mDY12 = _mm_set1_epi32(DY12); - mDX23 = _mm_set1_epi32(DX23); - mDY23 = _mm_set1_epi32(DY23); - mDX31 = _mm_set1_epi32(DX31); - mDY31 = _mm_set1_epi32(DY31); -#endif -} - -void TriangleBlock::Render() -{ - RenderSubdivide(minx / q, miny / q, (maxx + 1) / q, (maxy + 1) / q); -} - -void TriangleBlock::RenderSubdivide(int x0, int y0, int x1, int y1) -{ - CoverageResult result = AreaCoverageTest(x0 * q, y0 * q, x1 * q, y1 * q); - if (result == CoverageResult::full) - { - RenderBlock(x0 * q, y0 * q, x1 * q, y1 * q); - } - else if (result == CoverageResult::partial) - { - bool doneX = x1 - x0 <= 8; - bool doneY = y1 - y0 <= 8; - if (doneX && doneY) - { - RenderBlock(x0 * q, y0 * q, x1 * q, y1 * q); - } - else - { - int midx = (x0 + x1) >> 1; - int midy = (y0 + y1) >> 1; - if (doneX) - { - RenderSubdivide(x0, y0, x1, midy); - RenderSubdivide(x0, midy, x1, y1); - } - else if (doneY) - { - RenderSubdivide(x0, y0, midx, y1); - RenderSubdivide(midx, y0, x1, y1); - } - else - { - RenderSubdivide(x0, y0, midx, midy); - RenderSubdivide(midx, y0, x1, midy); - RenderSubdivide(x0, midy, midx, y1); - RenderSubdivide(midx, midy, x1, y1); - } - } - } -} - -template -void TriangleBlock::RenderBlock(int x0, int y0, int x1, int y1) -{ - // First block line for this thread - int core = thread->core; - int num_cores = thread->num_cores; - int core_skip = (num_cores - ((y0 / q) - core) % num_cores) % num_cores; - int start_miny = y0 + core_skip * q; - - bool depthTest = args->uniforms->DepthTest(); - bool writeColor = args->uniforms->WriteColor(); - bool writeStencil = args->uniforms->WriteStencil(); - bool writeDepth = args->uniforms->WriteDepth(); - - int bmode = (int)args->uniforms->BlendMode(); - auto drawFunc = args->destBgra ? ScreenTriangle::SpanDrawers32[bmode] : ScreenTriangle::SpanDrawers8[bmode]; - - // Loop through blocks - for (int y = start_miny; y < y1; y += q * num_cores) - { - for (int x = x0; x < x1; x += q) - { - X = x; - Y = y; - - if (CoverageModeT::Mode == (int)CoverageModes::Full) - { - Mask0 = 0xffffffff; - Mask1 = 0xffffffff; - } - else - { - CoverageTest(); - if (Mask0 == 0 && Mask1 == 0) - continue; - } - - ClipTest(); - if (Mask0 == 0 && Mask1 == 0) - continue; - - StencilEqualTest(); - if (Mask0 == 0 && Mask1 == 0) - continue; - - if (depthTest) - { - DepthTest(args); - if (Mask0 == 0 && Mask1 == 0) - continue; - } - - if (writeColor) - { - if (Mask0 == 0xffffffff) - { - drawFunc(Y, X, X + 8, args); - drawFunc(Y + 1, X, X + 8, args); - drawFunc(Y + 2, X, X + 8, args); - drawFunc(Y + 3, X, X + 8, args); - } - else if (Mask0 != 0) - { - uint32_t mask = Mask0; - for (int j = 0; j < 4; j++) - { - int start = 0; - int i; - for (i = 0; i < 8; i++) - { - if (!(mask & 0x80000000)) - { - if (i > start) - drawFunc(Y + j, X + start, X + i, args); - start = i + 1; - } - mask <<= 1; - } - if (i > start) - drawFunc(Y + j, X + start, X + i, args); - } - } - - if (Mask1 == 0xffffffff) - { - drawFunc(Y + 4, X, X + 8, args); - drawFunc(Y + 5, X, X + 8, args); - drawFunc(Y + 6, X, X + 8, args); - drawFunc(Y + 7, X, X + 8, args); - } - else if (Mask1 != 0) - { - uint32_t mask = Mask1; - for (int j = 4; j < 8; j++) - { - int start = 0; - int i; - for (i = 0; i < 8; i++) - { - if (!(mask & 0x80000000)) - { - if (i > start) - drawFunc(Y + j, X + start, X + i, args); - start = i + 1; - } - mask <<= 1; - } - if (i > start) - drawFunc(Y + j, X + start, X + i, args); - } - } - } - - if (writeStencil) - StencilWrite(); - if (writeDepth) - DepthWrite(args); - } - } -} - -#ifdef NO_SSE - -void TriangleBlock::DepthTest(const TriDrawTriangleArgs *args) -{ - int block = (X >> 3) + (Y >> 3) * zbufferPitch; - float *depth = zbuffer + block * 64; - - const ShadedTriVertex &v1 = *args->v1; - - float stepXW = args->gradientX.W; - float stepYW = args->gradientY.W; - float posYW = v1.w + stepXW * (X - v1.x) + stepYW * (Y - v1.y) + args->depthOffset; - - uint32_t mask0 = 0; - uint32_t mask1 = 0; - - for (int iy = 0; iy < 4; iy++) - { - float posXW = posYW; - for (int ix = 0; ix < 8; ix++) - { - bool covered = *depth <= posXW; - mask0 <<= 1; - mask0 |= (uint32_t)covered; - depth++; - posXW += stepXW; - } - posYW += stepYW; - } - - for (int iy = 0; iy < 4; iy++) - { - float posXW = posYW; - for (int ix = 0; ix < 8; ix++) - { - bool covered = *depth <= posXW; - mask1 <<= 1; - mask1 |= (uint32_t)covered; - depth++; - posXW += stepXW; - } - posYW += stepYW; - } - - Mask0 = Mask0 & mask0; - Mask1 = Mask1 & mask1; -} - -#else - -void TriangleBlock::DepthTest(const TriDrawTriangleArgs *args) -{ - int block = (X >> 3) + (Y >> 3) * zbufferPitch; - float *depth = zbuffer + block * 64; - - const ShadedTriVertex &v1 = *args->v1; - - float stepXW = args->gradientX.W; - float stepYW = args->gradientY.W; - float posYW = v1.w + stepXW * (X - v1.x) + stepYW * (Y - v1.y) + args->depthOffset; - - __m128 mposYW = _mm_setr_ps(posYW, posYW + stepXW, posYW + stepXW + stepXW, posYW + stepXW + stepXW + stepXW); - __m128 mstepXW = _mm_set1_ps(stepXW * 4.0f); - __m128 mstepYW = _mm_set1_ps(stepYW); - - uint32_t mask0 = 0; - uint32_t mask1 = 0; - - for (int iy = 0; iy < 4; iy++) - { - __m128 mposXW = mposYW; - for (int ix = 0; ix < 2; ix++) - { - __m128 covered = _mm_cmplt_ps(_mm_loadu_ps(depth), mposXW); - mask0 <<= 4; - mask0 |= _mm_movemask_ps(_mm_shuffle_ps(covered, covered, _MM_SHUFFLE(0, 1, 2, 3))); - depth += 4; - mposXW = _mm_add_ps(mposXW, mstepXW); - } - mposYW = _mm_add_ps(mposYW, mstepYW); - } - - for (int iy = 0; iy < 4; iy++) - { - __m128 mposXW = mposYW; - for (int ix = 0; ix < 2; ix++) - { - __m128 covered = _mm_cmplt_ps(_mm_loadu_ps(depth), mposXW); - mask1 <<= 4; - mask1 |= _mm_movemask_ps(_mm_shuffle_ps(covered, covered, _MM_SHUFFLE(0, 1, 2, 3))); - depth += 4; - mposXW = _mm_add_ps(mposXW, mstepXW); - } - mposYW = _mm_add_ps(mposYW, mstepYW); - } - - Mask0 = Mask0 & mask0; - Mask1 = Mask1 & mask1; -} - -#endif - -void TriangleBlock::ClipTest() -{ - static const uint32_t clipxmask[8] = - { - 0, - 0x80808080, - 0xc0c0c0c0, - 0xe0e0e0e0, - 0xf0f0f0f0, - 0xf8f8f8f8, - 0xfcfcfcfc, - 0xfefefefe - }; - - static const uint32_t clipymask[8] = - { - 0, - 0xff000000, - 0xffff0000, - 0xffffff00, - 0xffffffff, - 0xffffffff, - 0xffffffff, - 0xffffffff - }; - - uint32_t xmask = (X + 8 <= clipright) ? 0xffffffff : clipxmask[clipright - X]; - uint32_t ymask0 = (Y + 4 <= clipbottom) ? 0xffffffff : clipymask[clipbottom - Y]; - uint32_t ymask1 = (Y + 8 <= clipbottom) ? 0xffffffff : clipymask[clipbottom - Y - 4]; - - Mask0 = Mask0 & xmask & ymask0; - Mask1 = Mask1 & xmask & ymask1; -} - -#ifdef NO_SSE - -void TriangleBlock::StencilEqualTest() -{ - // Stencil test the whole block, if possible - int block = (X >> 3) + (Y >> 3) * stencilPitch; - uint8_t *stencilBlock = &stencilValues[block * 64]; - uint32_t *stencilBlockMask = &stencilMasks[block]; - bool blockIsSingleStencil = ((*stencilBlockMask) & 0xffffff00) == 0xffffff00; - bool skipBlock = blockIsSingleStencil && ((*stencilBlockMask) & 0xff) != stencilTestValue; - if (skipBlock) - { - Mask0 = 0; - Mask1 = 0; - } - else if (!blockIsSingleStencil) - { - uint32_t mask0 = 0; - uint32_t mask1 = 0; - - for (int iy = 0; iy < 4; iy++) - { - for (int ix = 0; ix < q; ix++) - { - bool passStencilTest = stencilBlock[ix + iy * q] == stencilTestValue; - mask0 <<= 1; - mask0 |= (uint32_t)passStencilTest; - } - } - - for (int iy = 4; iy < q; iy++) - { - for (int ix = 0; ix < q; ix++) - { - bool passStencilTest = stencilBlock[ix + iy * q] == stencilTestValue; - mask1 <<= 1; - mask1 |= (uint32_t)passStencilTest; - } - } - - Mask0 = Mask0 & mask0; - Mask1 = Mask1 & mask1; - } -} - -#else - -void TriangleBlock::StencilEqualTest() -{ - // Stencil test the whole block, if possible - int block = (X >> 3) + (Y >> 3) * stencilPitch; - uint8_t *stencilBlock = &stencilValues[block * 64]; - uint32_t *stencilBlockMask = &stencilMasks[block]; - bool blockIsSingleStencil = ((*stencilBlockMask) & 0xffffff00) == 0xffffff00; - bool skipBlock = blockIsSingleStencil && ((*stencilBlockMask) & 0xff) != stencilTestValue; - if (skipBlock) - { - Mask0 = 0; - Mask1 = 0; - } - else if (!blockIsSingleStencil) - { - __m128i mstencilTestValue = _mm_set1_epi16(stencilTestValue); - uint32_t mask0 = 0; - uint32_t mask1 = 0; - - for (int iy = 0; iy < 2; iy++) - { - __m128i mstencilBlock = _mm_loadu_si128((const __m128i *)stencilBlock); - - __m128i mstencilTest = _mm_cmpeq_epi16(_mm_unpacklo_epi8(mstencilBlock, _mm_setzero_si128()), mstencilTestValue); - __m128i mstencilTest0 = _mm_unpacklo_epi16(mstencilTest, mstencilTest); - __m128i mstencilTest1 = _mm_unpackhi_epi16(mstencilTest, mstencilTest); - __m128i first = _mm_packs_epi32(_mm_shuffle_epi32(mstencilTest1, _MM_SHUFFLE(0, 1, 2, 3)), _mm_shuffle_epi32(mstencilTest0, _MM_SHUFFLE(0, 1, 2, 3))); - - mstencilTest = _mm_cmpeq_epi16(_mm_unpackhi_epi8(mstencilBlock, _mm_setzero_si128()), mstencilTestValue); - mstencilTest0 = _mm_unpacklo_epi16(mstencilTest, mstencilTest); - mstencilTest1 = _mm_unpackhi_epi16(mstencilTest, mstencilTest); - __m128i second = _mm_packs_epi32(_mm_shuffle_epi32(mstencilTest1, _MM_SHUFFLE(0, 1, 2, 3)), _mm_shuffle_epi32(mstencilTest0, _MM_SHUFFLE(0, 1, 2, 3))); - - mask0 <<= 16; - mask0 |= _mm_movemask_epi8(_mm_packs_epi16(second, first)); - - stencilBlock += 16; - } - - for (int iy = 0; iy < 2; iy++) - { - __m128i mstencilBlock = _mm_loadu_si128((const __m128i *)stencilBlock); - - __m128i mstencilTest = _mm_cmpeq_epi16(_mm_unpacklo_epi8(mstencilBlock, _mm_setzero_si128()), mstencilTestValue); - __m128i mstencilTest0 = _mm_unpacklo_epi16(mstencilTest, mstencilTest); - __m128i mstencilTest1 = _mm_unpackhi_epi16(mstencilTest, mstencilTest); - __m128i first = _mm_packs_epi32(_mm_shuffle_epi32(mstencilTest1, _MM_SHUFFLE(0, 1, 2, 3)), _mm_shuffle_epi32(mstencilTest0, _MM_SHUFFLE(0, 1, 2, 3))); - - mstencilTest = _mm_cmpeq_epi16(_mm_unpackhi_epi8(mstencilBlock, _mm_setzero_si128()), mstencilTestValue); - mstencilTest0 = _mm_unpacklo_epi16(mstencilTest, mstencilTest); - mstencilTest1 = _mm_unpackhi_epi16(mstencilTest, mstencilTest); - __m128i second = _mm_packs_epi32(_mm_shuffle_epi32(mstencilTest1, _MM_SHUFFLE(0, 1, 2, 3)), _mm_shuffle_epi32(mstencilTest0, _MM_SHUFFLE(0, 1, 2, 3))); - - mask1 <<= 16; - mask1 |= _mm_movemask_epi8(_mm_packs_epi16(second, first)); - - stencilBlock += 16; - } - - Mask0 = Mask0 & mask0; - Mask1 = Mask1 & mask1; - } -} - -#endif - -void TriangleBlock::StencilGreaterEqualTest() -{ - // Stencil test the whole block, if possible - int block = (X >> 3) + (Y >> 3) * stencilPitch; - uint8_t *stencilBlock = &stencilValues[block * 64]; - uint32_t *stencilBlockMask = &stencilMasks[block]; - bool blockIsSingleStencil = ((*stencilBlockMask) & 0xffffff00) == 0xffffff00; - bool skipBlock = blockIsSingleStencil && ((*stencilBlockMask) & 0xff) < stencilTestValue; - if (skipBlock) - { - Mask0 = 0; - Mask1 = 0; - } - else if (!blockIsSingleStencil) - { - uint32_t mask0 = 0; - uint32_t mask1 = 0; - - for (int iy = 0; iy < 4; iy++) - { - for (int ix = 0; ix < q; ix++) - { - bool passStencilTest = stencilBlock[ix + iy * q] >= stencilTestValue; - mask0 <<= 1; - mask0 |= (uint32_t)passStencilTest; - } - } - - for (int iy = 4; iy < q; iy++) - { - for (int ix = 0; ix < q; ix++) - { - bool passStencilTest = stencilBlock[ix + iy * q] >= stencilTestValue; - mask1 <<= 1; - mask1 |= (uint32_t)passStencilTest; - } - } - - Mask0 = Mask0 & mask0; - Mask1 = Mask1 & mask1; - } -} - -TriangleBlock::CoverageResult TriangleBlock::AreaCoverageTest(int x0, int y0, int x1, int y1) -{ - // Corners of block - x0 = x0 << 4; - x1 = (x1 - 1) << 4; - y0 = y0 << 4; - y1 = (y1 - 1) << 4; - - // Evaluate half-space functions - bool a00 = C1 + DX12 * y0 - DY12 * x0 > 0; - bool a10 = C1 + DX12 * y0 - DY12 * x1 > 0; - bool a01 = C1 + DX12 * y1 - DY12 * x0 > 0; - bool a11 = C1 + DX12 * y1 - DY12 * x1 > 0; - int a = (a00 << 0) | (a10 << 1) | (a01 << 2) | (a11 << 3); - - bool b00 = C2 + DX23 * y0 - DY23 * x0 > 0; - bool b10 = C2 + DX23 * y0 - DY23 * x1 > 0; - bool b01 = C2 + DX23 * y1 - DY23 * x0 > 0; - bool b11 = C2 + DX23 * y1 - DY23 * x1 > 0; - int b = (b00 << 0) | (b10 << 1) | (b01 << 2) | (b11 << 3); - - bool c00 = C3 + DX31 * y0 - DY31 * x0 > 0; - bool c10 = C3 + DX31 * y0 - DY31 * x1 > 0; - bool c01 = C3 + DX31 * y1 - DY31 * x0 > 0; - bool c11 = C3 + DX31 * y1 - DY31 * x1 > 0; - int c = (c00 << 0) | (c10 << 1) | (c01 << 2) | (c11 << 3); - - if (a == 0 || b == 0 || c == 0) // Skip block when outside an edge - { - return CoverageResult::none; - } - else if (a == 0xf && b == 0xf && c == 0xf) // Accept whole block when totally covered - { - return CoverageResult::full; - } - else // Partially covered block - { - return CoverageResult::partial; - } -} - -#ifdef NO_SSE - -void TriangleBlock::CoverageTest() -{ - // Corners of block - int x0 = X << 4; - int x1 = (X + q - 1) << 4; - int y0 = Y << 4; - int y1 = (Y + q - 1) << 4; - - // Evaluate half-space functions - bool a00 = C1 + DX12 * y0 - DY12 * x0 > 0; - bool a10 = C1 + DX12 * y0 - DY12 * x1 > 0; - bool a01 = C1 + DX12 * y1 - DY12 * x0 > 0; - bool a11 = C1 + DX12 * y1 - DY12 * x1 > 0; - int a = (a00 << 0) | (a10 << 1) | (a01 << 2) | (a11 << 3); - - bool b00 = C2 + DX23 * y0 - DY23 * x0 > 0; - bool b10 = C2 + DX23 * y0 - DY23 * x1 > 0; - bool b01 = C2 + DX23 * y1 - DY23 * x0 > 0; - bool b11 = C2 + DX23 * y1 - DY23 * x1 > 0; - int b = (b00 << 0) | (b10 << 1) | (b01 << 2) | (b11 << 3); - - bool c00 = C3 + DX31 * y0 - DY31 * x0 > 0; - bool c10 = C3 + DX31 * y0 - DY31 * x1 > 0; - bool c01 = C3 + DX31 * y1 - DY31 * x0 > 0; - bool c11 = C3 + DX31 * y1 - DY31 * x1 > 0; - int c = (c00 << 0) | (c10 << 1) | (c01 << 2) | (c11 << 3); - - if (a == 0 || b == 0 || c == 0) // Skip block when outside an edge - { - Mask0 = 0; - Mask1 = 0; - } - else if (a == 0xf && b == 0xf && c == 0xf) // Accept whole block when totally covered - { - Mask0 = 0xffffffff; - Mask1 = 0xffffffff; - } - else // Partially covered block - { - x0 = X << 4; - x1 = (X + q - 1) << 4; - int CY1 = C1 + DX12 * y0 - DY12 * x0; - int CY2 = C2 + DX23 * y0 - DY23 * x0; - int CY3 = C3 + DX31 * y0 - DY31 * x0; - - uint32_t mask0 = 0; - uint32_t mask1 = 0; - - for (int iy = 0; iy < 4; iy++) - { - int CX1 = CY1; - int CX2 = CY2; - int CX3 = CY3; - - for (int ix = 0; ix < q; ix++) - { - bool covered = CX1 > 0 && CX2 > 0 && CX3 > 0; - mask0 <<= 1; - mask0 |= (uint32_t)covered; - - CX1 -= FDY12; - CX2 -= FDY23; - CX3 -= FDY31; - } - - CY1 += FDX12; - CY2 += FDX23; - CY3 += FDX31; - } - - for (int iy = 4; iy < q; iy++) - { - int CX1 = CY1; - int CX2 = CY2; - int CX3 = CY3; - - for (int ix = 0; ix < q; ix++) - { - bool covered = CX1 > 0 && CX2 > 0 && CX3 > 0; - mask1 <<= 1; - mask1 |= (uint32_t)covered; - - CX1 -= FDY12; - CX2 -= FDY23; - CX3 -= FDY31; - } - - CY1 += FDX12; - CY2 += FDX23; - CY3 += FDX31; - } - - Mask0 = mask0; - Mask1 = mask1; - } -} - -#else - -void TriangleBlock::CoverageTest() -{ - // Corners of block - int x0 = X << 4; - int x1 = (X + q - 1) << 4; - int y0 = Y << 4; - int y1 = (Y + q - 1) << 4; - - __m128i mY = _mm_set_epi32(y0, y0, y1, y1); - __m128i mX = _mm_set_epi32(x0, x0, x1, x1); - - // Evaluate half-space functions - __m128i mCY1 = _mm_sub_epi32( - _mm_add_epi32(mC1, _mm_shuffle_epi32(_mm_mul_epu32(mDX12, mY), _MM_SHUFFLE(0, 0, 2, 2))), - _mm_shuffle_epi32(_mm_mul_epu32(mDY12, mX), _MM_SHUFFLE(0, 2, 0, 2))); - __m128i mA = _mm_cmpgt_epi32(mCY1, _mm_setzero_si128()); - - __m128i mCY2 = _mm_sub_epi32( - _mm_add_epi32(mC2, _mm_shuffle_epi32(_mm_mul_epu32(mDX23, mY), _MM_SHUFFLE(0, 0, 2, 2))), - _mm_shuffle_epi32(_mm_mul_epu32(mDY23, mX), _MM_SHUFFLE(0, 2, 0, 2))); - __m128i mB = _mm_cmpgt_epi32(mCY2, _mm_setzero_si128()); - - __m128i mCY3 = _mm_sub_epi32( - _mm_add_epi32(mC3, _mm_shuffle_epi32(_mm_mul_epu32(mDX31, mY), _MM_SHUFFLE(0, 0, 2, 2))), - _mm_shuffle_epi32(_mm_mul_epu32(mDY31, mX), _MM_SHUFFLE(0, 2, 0, 2))); - __m128i mC = _mm_cmpgt_epi32(mCY3, _mm_setzero_si128()); - - int abc = _mm_movemask_epi8(_mm_packs_epi16(_mm_packs_epi32(mA, mB), _mm_packs_epi32(mC, _mm_setzero_si128()))); - - if ((abc & 0xf) == 0 || (abc & 0xf0) == 0 || (abc & 0xf00) == 0) // Skip block when outside an edge - { - Mask0 = 0; - Mask1 = 0; - } - else if (abc == 0xfff) // Accept whole block when totally covered - { - Mask0 = 0xffffffff; - Mask1 = 0xffffffff; - } - else // Partially covered block - { - uint32_t mask0 = 0; - uint32_t mask1 = 0; - - mCY1 = _mm_sub_epi32(_mm_shuffle_epi32(mCY1, _MM_SHUFFLE(0, 0, 0, 0)), mFDY12Offset); - mCY2 = _mm_sub_epi32(_mm_shuffle_epi32(mCY2, _MM_SHUFFLE(0, 0, 0, 0)), mFDY23Offset); - mCY3 = _mm_sub_epi32(_mm_shuffle_epi32(mCY3, _MM_SHUFFLE(0, 0, 0, 0)), mFDY31Offset); - for (int iy = 0; iy < 2; iy++) - { - __m128i mtest0 = _mm_cmpgt_epi32(mCY1, _mm_setzero_si128()); - mtest0 = _mm_and_si128(_mm_cmpgt_epi32(mCY2, _mm_setzero_si128()), mtest0); - mtest0 = _mm_and_si128(_mm_cmpgt_epi32(mCY3, _mm_setzero_si128()), mtest0); - __m128i mtest1 = _mm_cmpgt_epi32(_mm_sub_epi32(mCY1, mFDY12x4), _mm_setzero_si128()); - mtest1 = _mm_and_si128(_mm_cmpgt_epi32(_mm_sub_epi32(mCY2, mFDY23x4), _mm_setzero_si128()), mtest1); - mtest1 = _mm_and_si128(_mm_cmpgt_epi32(_mm_sub_epi32(mCY3, mFDY31x4), _mm_setzero_si128()), mtest1); - mCY1 = _mm_add_epi32(mCY1, mFDX12); - mCY2 = _mm_add_epi32(mCY2, mFDX23); - mCY3 = _mm_add_epi32(mCY3, mFDX31); - __m128i first = _mm_packs_epi32(_mm_shuffle_epi32(mtest1, _MM_SHUFFLE(0, 1, 2, 3)), _mm_shuffle_epi32(mtest0, _MM_SHUFFLE(0, 1, 2, 3))); - - mtest0 = _mm_cmpgt_epi32(mCY1, _mm_setzero_si128()); - mtest0 = _mm_and_si128(_mm_cmpgt_epi32(mCY2, _mm_setzero_si128()), mtest0); - mtest0 = _mm_and_si128(_mm_cmpgt_epi32(mCY3, _mm_setzero_si128()), mtest0); - mtest1 = _mm_cmpgt_epi32(_mm_sub_epi32(mCY1, mFDY12x4), _mm_setzero_si128()); - mtest1 = _mm_and_si128(_mm_cmpgt_epi32(_mm_sub_epi32(mCY2, mFDY23x4), _mm_setzero_si128()), mtest1); - mtest1 = _mm_and_si128(_mm_cmpgt_epi32(_mm_sub_epi32(mCY3, mFDY31x4), _mm_setzero_si128()), mtest1); - mCY1 = _mm_add_epi32(mCY1, mFDX12); - mCY2 = _mm_add_epi32(mCY2, mFDX23); - mCY3 = _mm_add_epi32(mCY3, mFDX31); - __m128i second = _mm_packs_epi32(_mm_shuffle_epi32(mtest1, _MM_SHUFFLE(0, 1, 2, 3)), _mm_shuffle_epi32(mtest0, _MM_SHUFFLE(0, 1, 2, 3))); - - mask0 <<= 16; - mask0 |= _mm_movemask_epi8(_mm_packs_epi16(second, first)); - } - - for (int iy = 0; iy < 2; iy++) - { - __m128i mtest0 = _mm_cmpgt_epi32(mCY1, _mm_setzero_si128()); - mtest0 = _mm_and_si128(_mm_cmpgt_epi32(mCY2, _mm_setzero_si128()), mtest0); - mtest0 = _mm_and_si128(_mm_cmpgt_epi32(mCY3, _mm_setzero_si128()), mtest0); - __m128i mtest1 = _mm_cmpgt_epi32(_mm_sub_epi32(mCY1, mFDY12x4), _mm_setzero_si128()); - mtest1 = _mm_and_si128(_mm_cmpgt_epi32(_mm_sub_epi32(mCY2, mFDY23x4), _mm_setzero_si128()), mtest1); - mtest1 = _mm_and_si128(_mm_cmpgt_epi32(_mm_sub_epi32(mCY3, mFDY31x4), _mm_setzero_si128()), mtest1); - mCY1 = _mm_add_epi32(mCY1, mFDX12); - mCY2 = _mm_add_epi32(mCY2, mFDX23); - mCY3 = _mm_add_epi32(mCY3, mFDX31); - __m128i first = _mm_packs_epi32(_mm_shuffle_epi32(mtest1, _MM_SHUFFLE(0, 1, 2, 3)), _mm_shuffle_epi32(mtest0, _MM_SHUFFLE(0, 1, 2, 3))); - - mtest0 = _mm_cmpgt_epi32(mCY1, _mm_setzero_si128()); - mtest0 = _mm_and_si128(_mm_cmpgt_epi32(mCY2, _mm_setzero_si128()), mtest0); - mtest0 = _mm_and_si128(_mm_cmpgt_epi32(mCY3, _mm_setzero_si128()), mtest0); - mtest1 = _mm_cmpgt_epi32(_mm_sub_epi32(mCY1, mFDY12x4), _mm_setzero_si128()); - mtest1 = _mm_and_si128(_mm_cmpgt_epi32(_mm_sub_epi32(mCY2, mFDY23x4), _mm_setzero_si128()), mtest1); - mtest1 = _mm_and_si128(_mm_cmpgt_epi32(_mm_sub_epi32(mCY3, mFDY31x4), _mm_setzero_si128()), mtest1); - mCY1 = _mm_add_epi32(mCY1, mFDX12); - mCY2 = _mm_add_epi32(mCY2, mFDX23); - mCY3 = _mm_add_epi32(mCY3, mFDX31); - __m128i second = _mm_packs_epi32(_mm_shuffle_epi32(mtest1, _MM_SHUFFLE(0, 1, 2, 3)), _mm_shuffle_epi32(mtest0, _MM_SHUFFLE(0, 1, 2, 3))); - - mask1 <<= 16; - mask1 |= _mm_movemask_epi8(_mm_packs_epi16(second, first)); - } - - Mask0 = mask0; - Mask1 = mask1; - } -} - -#endif - -void TriangleBlock::StencilWrite() -{ - int block = (X >> 3) + (Y >> 3) * stencilPitch; - uint8_t *stencilBlock = &stencilValues[block * 64]; - uint32_t &stencilBlockMask = stencilMasks[block]; - uint32_t writeValue = stencilWriteValue; - - if (Mask0 == 0xffffffff && Mask1 == 0xffffffff) - { - stencilBlockMask = 0xffffff00 | writeValue; - } - else - { - uint32_t mask0 = Mask0; - uint32_t mask1 = Mask1; - - bool isSingleValue = (stencilBlockMask & 0xffffff00) == 0xffffff00; - if (isSingleValue) - { - uint8_t value = stencilBlockMask & 0xff; - for (int v = 0; v < 64; v++) - stencilBlock[v] = value; - stencilBlockMask = 0; - } - - int count = 0; - for (int v = 0; v < 32; v++) - { - if ((mask0 & (1 << 31)) || stencilBlock[v] == writeValue) - { - stencilBlock[v] = writeValue; - count++; - } - mask0 <<= 1; - } - for (int v = 32; v < 64; v++) - { - if ((mask1 & (1 << 31)) || stencilBlock[v] == writeValue) - { - stencilBlock[v] = writeValue; - count++; - } - mask1 <<= 1; - } - - if (count == 64) - stencilBlockMask = 0xffffff00 | writeValue; - } -} - -#ifdef NO_SSE - -void TriangleBlock::DepthWrite(const TriDrawTriangleArgs *args) -{ - int block = (X >> 3) + (Y >> 3) * zbufferPitch; - float *depth = zbuffer + block * 64; - - const ShadedTriVertex &v1 = *args->v1; - - float stepXW = args->gradientX.W; - float stepYW = args->gradientY.W; - float posYW = v1.w + stepXW * (X - v1.x) + stepYW * (Y - v1.y) + args->depthOffset; - - if (Mask0 == 0xffffffff && Mask1 == 0xffffffff) - { - for (int iy = 0; iy < 8; iy++) - { - float posXW = posYW; - for (int ix = 0; ix < 8; ix++) - { - *(depth++) = posXW; - posXW += stepXW; - } - posYW += stepYW; - } - } - else - { - uint32_t mask0 = Mask0; - uint32_t mask1 = Mask1; - - for (int iy = 0; iy < 4; iy++) - { - float posXW = posYW; - for (int ix = 0; ix < 8; ix++) - { - if (mask0 & (1 << 31)) - *depth = posXW; - posXW += stepXW; - mask0 <<= 1; - depth++; - } - posYW += stepYW; - } - - for (int iy = 0; iy < 4; iy++) - { - float posXW = posYW; - for (int ix = 0; ix < 8; ix++) - { - if (mask1 & (1 << 31)) - *depth = posXW; - posXW += stepXW; - mask1 <<= 1; - depth++; - } - posYW += stepYW; - } - } -} - -#else - -void TriangleBlock::DepthWrite(const TriDrawTriangleArgs *args) -{ - int block = (X >> 3) + (Y >> 3) * zbufferPitch; - float *depth = zbuffer + block * 64; - - const ShadedTriVertex &v1 = *args->v1; - - float stepXW = args->gradientX.W; - float stepYW = args->gradientY.W; - float posYW = v1.w + stepXW * (X - v1.x) + stepYW * (Y - v1.y) + args->depthOffset; - - __m128 mposYW = _mm_setr_ps(posYW, posYW + stepXW, posYW + stepXW + stepXW, posYW + stepXW + stepXW + stepXW); - __m128 mstepXW = _mm_set1_ps(stepXW * 4.0f); - __m128 mstepYW = _mm_set1_ps(stepYW); - - if (Mask0 == 0xffffffff && Mask1 == 0xffffffff) - { - for (int iy = 0; iy < 8; iy++) - { - __m128 mposXW = mposYW; - _mm_storeu_ps(depth, mposXW); depth += 4; mposXW = _mm_add_ps(mposXW, mstepXW); - _mm_storeu_ps(depth, mposXW); depth += 4; - mposYW = _mm_add_ps(mposYW, mstepYW); - } - } - else - { - __m128i mxormask = _mm_set1_epi32(0xffffffff); - __m128i topfour = _mm_setr_epi32(1 << 31, 1 << 30, 1 << 29, 1 << 28); - - __m128i mmask0 = _mm_set1_epi32(Mask0); - __m128i mmask1 = _mm_set1_epi32(Mask1); - - for (int iy = 0; iy < 4; iy++) - { - __m128 mposXW = mposYW; - _mm_maskmoveu_si128(_mm_castps_si128(mposXW), _mm_xor_si128(_mm_cmpeq_epi32(_mm_and_si128(mmask0, topfour), _mm_setzero_si128()), mxormask), (char*)depth); mmask0 = _mm_slli_epi32(mmask0, 4); depth += 4; mposXW = _mm_add_ps(mposXW, mstepXW); - _mm_maskmoveu_si128(_mm_castps_si128(mposXW), _mm_xor_si128(_mm_cmpeq_epi32(_mm_and_si128(mmask0, topfour), _mm_setzero_si128()), mxormask), (char*)depth); mmask0 = _mm_slli_epi32(mmask0, 4); depth += 4; - mposYW = _mm_add_ps(mposYW, mstepYW); - } - - for (int iy = 0; iy < 4; iy++) - { - __m128 mposXW = mposYW; - _mm_maskmoveu_si128(_mm_castps_si128(mposXW), _mm_xor_si128(_mm_cmpeq_epi32(_mm_and_si128(mmask1, topfour), _mm_setzero_si128()), mxormask), (char*)depth); mmask1 = _mm_slli_epi32(mmask1, 4); depth += 4; mposXW = _mm_add_ps(mposXW, mstepXW); - _mm_maskmoveu_si128(_mm_castps_si128(mposXW), _mm_xor_si128(_mm_cmpeq_epi32(_mm_and_si128(mmask1, topfour), _mm_setzero_si128()), mxormask), (char*)depth); mmask1 = _mm_slli_epi32(mmask1, 4); depth += 4; - mposYW = _mm_add_ps(mposYW, mstepYW); - } - } -} - -#endif - -void ScreenTriangle::Draw(const TriDrawTriangleArgs *args, PolyTriangleThreadData *thread) -{ - TriangleBlock block(args, thread); - block.Render(); -} - static void SortVertices(const TriDrawTriangleArgs *args, ShadedTriVertex **sortedVertices) { sortedVertices[0] = args->v1; @@ -1143,7 +52,7 @@ static void SortVertices(const TriDrawTriangleArgs *args, ShadedTriVertex **sort std::swap(sortedVertices[1], sortedVertices[2]); } -void ScreenTriangle::DrawSWRender(const TriDrawTriangleArgs *args, PolyTriangleThreadData *thread) +void ScreenTriangle::Draw(const TriDrawTriangleArgs *args, PolyTriangleThreadData *thread) { // Sort vertices by Y position ShadedTriVertex *sortedVertices[3]; @@ -1230,13 +139,22 @@ void ScreenTriangle::DrawSWRender(const TriDrawTriangleArgs *args, PolyTriangleT float v1Y = args->v1->y; float v1W = args->v1->w; + bool depthTest = args->uniforms->DepthTest(); + bool stencilTest = true; + bool writeColor = args->uniforms->WriteColor(); + bool writeStencil = args->uniforms->WriteStencil(); + bool writeDepth = args->uniforms->WriteDepth(); + uint8_t stencilTestValue = args->uniforms->StencilTestValue(); + uint8_t stencilWriteValue = args->uniforms->StencilWriteValue(); + int num_cores = thread->num_cores; for (int y = topY + thread->skipped_by_thread(topY); y < bottomY; y += num_cores) { int x = leftEdge[y]; int xend = rightEdge[y]; - float *zbufferLine = args->zbuffer + args->stencilPitch * 8 * y; + float *zbufferLine = args->zbuffer + args->pitch * y; + uint8_t *stencilLine = args->stencilbuffer + args->pitch * y; float startX = x + (0.5f - v1X); float startY = y + (0.5f - v1Y); @@ -1249,59 +167,211 @@ void ScreenTriangle::DrawSWRender(const TriDrawTriangleArgs *args, PolyTriangleT { int xstart = x; - int xendsse = x + ((xend - x) & ~3); - __m128 mposXW = _mm_add_ps(_mm_set1_ps(posXW), mfirstStepXW); - while (_mm_movemask_ps(_mm_cmple_ps(_mm_loadu_ps(zbufferLine + x), mposXW)) == 15 && x < xendsse) + if (depthTest && stencilTest) { - _mm_storeu_ps(zbufferLine + x, mposXW); - mposXW = _mm_add_ps(mposXW, mstepXW); - x += 4; - } - posXW = _mm_cvtss_f32(mposXW); + int xendsse = x + ((xend - x) / 4); + __m128 mposXW = _mm_add_ps(_mm_set1_ps(posXW), mfirstStepXW); + while (_mm_movemask_ps(_mm_cmple_ps(_mm_loadu_ps(zbufferLine + x), mposXW)) == 15 && + stencilLine[x] == stencilTestValue && + stencilLine[x + 1] == stencilTestValue && + stencilLine[x + 2] == stencilTestValue && + stencilLine[x + 3] == stencilTestValue && + x < xendsse) + { + if (writeDepth) + _mm_storeu_ps(zbufferLine + x, mposXW); + mposXW = _mm_add_ps(mposXW, mstepXW); + x += 4; + } + posXW = _mm_cvtss_f32(mposXW); - while (zbufferLine[x] <= posXW && x < xend) + while (zbufferLine[x] <= posXW && stencilLine[x] == stencilTestValue && x < xend) + { + if (writeDepth) + zbufferLine[x] = posXW; + posXW += stepXW; + x++; + } + } + else if (depthTest) { - zbufferLine[x] = posXW; - posXW += stepXW; - x++; + int xendsse = x + ((xend - x) / 4); + __m128 mposXW = _mm_add_ps(_mm_set1_ps(posXW), mfirstStepXW); + while (_mm_movemask_ps(_mm_cmple_ps(_mm_loadu_ps(zbufferLine + x), mposXW)) == 15 && x < xendsse) + { + if (writeDepth) + _mm_storeu_ps(zbufferLine + x, mposXW); + mposXW = _mm_add_ps(mposXW, mstepXW); + x += 4; + } + posXW = _mm_cvtss_f32(mposXW); + + while (zbufferLine[x] <= posXW && x < xend) + { + if (writeDepth) + zbufferLine[x] = posXW; + posXW += stepXW; + x++; + } + } + else if (stencilTest) + { + while (stencilLine[x] == stencilTestValue && x < xend) + x++; + } + else + { + x = xend; } if (x > xstart) - drawfunc(y, xstart, x, args); - - xendsse = x + ((xend - x) & ~3); - mposXW = _mm_add_ps(_mm_set1_ps(posXW), mfirstStepXW); - while (_mm_movemask_ps(_mm_cmple_ps(_mm_loadu_ps(zbufferLine + x), mposXW)) == 0 && x < xendsse) { - mposXW = _mm_add_ps(mposXW, mstepXW); - x += 4; + if (writeColor) + drawfunc(y, xstart, x, args); + + if (writeStencil) + { + for (int i = xstart; i < x; i++) + stencilLine[i] = stencilWriteValue; + } + + if (!depthTest && writeDepth) + { + for (int i = xstart; i < x; i++) + { + zbufferLine[i] = posXW; + posXW += stepXW; + } + } } - posXW = _mm_cvtss_f32(mposXW); - while (zbufferLine[x] > posXW && x < xend) + if (depthTest && stencilTest) { - posXW += stepXW; - x++; + int xendsse = x + ((xend - x) / 4); + __m128 mposXW = _mm_add_ps(_mm_set1_ps(posXW), mfirstStepXW); + while ((_mm_movemask_ps(_mm_cmple_ps(_mm_loadu_ps(zbufferLine + x), mposXW)) == 0 || + stencilLine[x] != stencilTestValue || + stencilLine[x + 1] != stencilTestValue || + stencilLine[x + 2] != stencilTestValue || + stencilLine[x + 3] != stencilTestValue) && + x < xendsse) + { + mposXW = _mm_add_ps(mposXW, mstepXW); + x += 4; + } + posXW = _mm_cvtss_f32(mposXW); + + while ((zbufferLine[x] > posXW || stencilLine[x] != stencilTestValue) && x < xend) + { + posXW += stepXW; + x++; + } + } + else if (depthTest) + { + int xendsse = x + ((xend - x) / 4); + __m128 mposXW = _mm_add_ps(_mm_set1_ps(posXW), mfirstStepXW); + while (_mm_movemask_ps(_mm_cmple_ps(_mm_loadu_ps(zbufferLine + x), mposXW)) == 0 && x < xendsse) + { + mposXW = _mm_add_ps(mposXW, mstepXW); + x += 4; + } + posXW = _mm_cvtss_f32(mposXW); + + while (zbufferLine[x] > posXW && x < xend) + { + posXW += stepXW; + x++; + } + } + else if (stencilTest) + { + while (stencilLine[x] != stencilTestValue && x < xend) + { + posXW += stepXW; + x++; + } } } #else while (x < xend) { int xstart = x; - while (zbufferLine[x] <= posXW && x < xend) + + if (depthTest && stencilTest) { - zbufferLine[x] = posXW; - posXW += stepXW; - x++; + while (zbufferLine[x] <= posXW && stencilLine[x] == stencilTestValue && x < xend) + { + if (writeDepth) + zbufferLine[x] = posXW; + posXW += stepXW; + x++; + } + } + else if (depthTest) + { + while (zbufferLine[x] <= posXW && x < xend) + { + if (writeDepth) + zbufferLine[x] = posXW; + posXW += stepXW; + x++; + } + } + else if (stencilTest) + { + while (stencilLine[x] == stencilTestValue && x < xend) + x++; + } + else + { + x = xend; } if (x > xstart) - drawfunc(y, xstart, x, args); - - while (zbufferLine[x] > posXW && x < xend) { - posXW += stepXW; - x++; + if (writeColor) + drawfunc(y, xstart, x, args); + + if (writeStencil) + { + for (int i = xstart; i < x; i++) + stencilLine[i] = stencilWriteValue; + } + + if (!depthTest && writeDepth) + { + for (int i = xstart; i < x; i++) + { + zbufferLine[i] = posXW; + posXW += stepXW; + } + } + } + + if (depthTest && stencilTest) + { + while ((zbufferLine[x] > posXW || stencilLine[x] != stencilTestValue) && x < xend) + { + posXW += stepXW; + x++; + } + } + else if (depthTest) + { + while (zbufferLine[x] > posXW && x < xend) + { + posXW += stepXW; + x++; + } + } + else if (stencilTest) + { + while (stencilLine[x] != stencilTestValue && x < xend) + { + posXW += stepXW; + x++; + } } } #endif @@ -2208,6 +1278,516 @@ void DrawSpan8(int y, int x0, int x1, const TriDrawTriangleArgs *args) } } +template +void DrawRect8(const void *destOrg, int destWidth, int destHeight, int destPitch, const RectDrawArgs *args, PolyTriangleThreadData *thread) +{ + using namespace TriScreenDrawerModes; + + int x0 = clamp((int)(args->X0() + 0.5f), 0, destWidth); + int x1 = clamp((int)(args->X1() + 0.5f), 0, destWidth); + int y0 = clamp((int)(args->Y0() + 0.5f), 0, destHeight); + int y1 = clamp((int)(args->Y1() + 0.5f), 0, destHeight); + + if (x1 <= x0 || y1 <= y0) + return; + + const uint8_t *colormaps, *texPixels, *translation; + int texWidth, texHeight; + uint32_t fillcolor; + int alpha; + uint32_t light; + + texPixels = args->TexturePixels(); + translation = args->Translation(); + texWidth = args->TextureWidth(); + texHeight = args->TextureHeight(); + fillcolor = args->Color(); + alpha = args->Alpha(); + colormaps = args->BaseColormap(); + light = args->Light(); + light += light >> 7; // 255 -> 256 + light = ((256 - light) * NUMCOLORMAPS) & 0xffffff00; + + float fstepU = (args->U1() - args->U0()) / (args->X1() - args->X0()); + float fstepV = (args->V1() - args->V0()) / (args->Y1() - args->Y0()); + uint32_t startU = (int32_t)((args->U0() + (x0 + 0.5f - args->X0()) * fstepU) * 0x1000000); + uint32_t startV = (int32_t)((args->V0() + (y0 + 0.5f - args->Y0()) * fstepV) * 0x1000000); + uint32_t stepU = (int32_t)(fstepU * 0x1000000); + uint32_t stepV = (int32_t)(fstepV * 0x1000000); + + uint32_t posV = startV; + int num_cores = thread->num_cores; + int skip = thread->skipped_by_thread(y0); + posV += skip * stepV; + stepV *= num_cores; + for (int y = y0 + skip; y < y1; y += num_cores, posV += stepV) + { + uint8_t *destLine = ((uint8_t*)destOrg) + y * destPitch; + + uint32_t posU = startU; + for (int x = x0; x < x1; x++) + { + int fg = 0; + int fgalpha = 255; + + if (ModeT::SWFlags & SWSTYLEF_Fill) + { + fg = fillcolor; + } + else if (ModeT::BlendOp != STYLEOP_Fuzz) + { + uint32_t texelX = (((posU << 8) >> 16) * texWidth) >> 16; + uint32_t texelY = (((posV << 8) >> 16) * texHeight) >> 16; + fg = texPixels[texelX * texHeight + texelY]; + + if (ModeT::SWFlags & SWSTYLEF_Translated) + fg = translation[fg]; + + fgalpha = (fg != 0) ? 255 : 0; + } + + if (ModeT::BlendOp == STYLEOP_Fuzz) + { + using namespace swrenderer; + + uint32_t texelX = (((posU << 8) >> 16) * texWidth) >> 16; + uint32_t texelY = (((posV << 8) >> 16) * texHeight) >> 16; + unsigned int sampleshadeout = (texPixels[texelX * texHeight + texelY] != 0) ? 256 : 0; + + fixed_t fuzzscale = (200 << FRACBITS) / viewheight; + + int scaled_x = (x * fuzzscale) >> FRACBITS; + int fuzz_x = fuzz_random_x_offset[scaled_x % FUZZ_RANDOM_X_SIZE] + fuzzpos; + + fixed_t fuzzcount = FUZZTABLE << FRACBITS; + fixed_t fuzz = ((fuzz_x << FRACBITS) + y * fuzzscale) % fuzzcount; + unsigned int alpha = fuzzoffset[fuzz >> FRACBITS]; + + sampleshadeout = (sampleshadeout * alpha) >> 5; + + uint32_t a = 256 - sampleshadeout; + + uint32_t dest = GPalette.BaseColors[destLine[x]].d; + uint32_t r = (RPART(dest) * a) >> 8; + uint32_t g = (GPART(dest) * a) >> 8; + uint32_t b = (BPART(dest) * a) >> 8; + destLine[x] = RGB256k.All[((r >> 2) << 12) | ((g >> 2) << 6) | (b >> 2)]; + } + else + { + if ((ModeT::Flags & STYLEF_ColorIsFixed) && !(ModeT::SWFlags & SWSTYLEF_Fill)) + { + if (ModeT::Flags & STYLEF_RedIsAlpha) + fgalpha = fg; + fg = fillcolor; + } + + if (!(ModeT::Flags & STYLEF_Alpha1)) + { + fgalpha = (fgalpha * alpha) >> 8; + } + + uint8_t shadedfg = colormaps[light + fg]; + + if (ModeT::BlendSrc == STYLEALPHA_One && ModeT::BlendDest == STYLEALPHA_Zero) + { + destLine[x] = shadedfg; + } + else if (ModeT::BlendSrc == STYLEALPHA_One && ModeT::BlendDest == STYLEALPHA_One) + { + uint32_t src = GPalette.BaseColors[shadedfg]; + uint32_t dest = GPalette.BaseColors[destLine[x]]; + + if (ModeT::BlendOp == STYLEOP_Add) + { + uint32_t out_r = MIN(RPART(dest) + RPART(src), 255); + uint32_t out_g = MIN(GPART(dest) + GPART(src), 255); + uint32_t out_b = MIN(BPART(dest) + BPART(src), 255); + destLine[x] = RGB256k.All[((out_r >> 2) << 12) | ((out_g >> 2) << 6) | (out_b >> 2)]; + } + else if (ModeT::BlendOp == STYLEOP_RevSub) + { + uint32_t out_r = MAX(RPART(dest) - RPART(src), 0); + uint32_t out_g = MAX(GPART(dest) - GPART(src), 0); + uint32_t out_b = MAX(BPART(dest) - BPART(src), 0); + destLine[x] = RGB256k.All[((out_r >> 2) << 12) | ((out_g >> 2) << 6) | (out_b >> 2)]; + } + else //if (ModeT::BlendOp == STYLEOP_Sub) + { + uint32_t out_r = MAX(RPART(src) - RPART(dest), 0); + uint32_t out_g = MAX(GPART(src) - GPART(dest), 0); + uint32_t out_b = MAX(BPART(src) - BPART(dest), 0); + destLine[x] = RGB256k.All[((out_r >> 2) << 12) | ((out_g >> 2) << 6) | (out_b >> 2)]; + } + } + else if (ModeT::SWFlags & SWSTYLEF_SrcColorOneMinusSrcColor) + { + uint32_t src = GPalette.BaseColors[shadedfg]; + uint32_t dest = GPalette.BaseColors[destLine[x]]; + + uint32_t sfactor_r = RPART(src); sfactor_r += sfactor_r >> 7; // 255 -> 256 + uint32_t sfactor_g = GPART(src); sfactor_g += sfactor_g >> 7; // 255 -> 256 + uint32_t sfactor_b = BPART(src); sfactor_b += sfactor_b >> 7; // 255 -> 256 + uint32_t sfactor_a = fgalpha; sfactor_a += sfactor_a >> 7; // 255 -> 256 + uint32_t dfactor_r = 256 - sfactor_r; + uint32_t dfactor_g = 256 - sfactor_g; + uint32_t dfactor_b = 256 - sfactor_b; + uint32_t out_r = (RPART(dest) * dfactor_r + RPART(src) * sfactor_r + 128) >> 8; + uint32_t out_g = (GPART(dest) * dfactor_g + GPART(src) * sfactor_g + 128) >> 8; + uint32_t out_b = (BPART(dest) * dfactor_b + BPART(src) * sfactor_b + 128) >> 8; + + destLine[x] = RGB256k.All[((out_r >> 2) << 12) | ((out_g >> 2) << 6) | (out_b >> 2)]; + } + else if (ModeT::BlendSrc == STYLEALPHA_Src && ModeT::BlendDest == STYLEALPHA_InvSrc && fgalpha == 255) + { + destLine[x] = shadedfg; + } + else if (ModeT::BlendSrc != STYLEALPHA_Src || ModeT::BlendDest != STYLEALPHA_InvSrc || fgalpha != 0) + { + uint32_t src = GPalette.BaseColors[shadedfg]; + uint32_t dest = GPalette.BaseColors[destLine[x]]; + + uint32_t sfactor = fgalpha; sfactor += sfactor >> 7; // 255 -> 256 + uint32_t dfactor = 256 - sfactor; + uint32_t src_r = RPART(src) * sfactor; + uint32_t src_g = GPART(src) * sfactor; + uint32_t src_b = BPART(src) * sfactor; + uint32_t dest_r = RPART(dest); + uint32_t dest_g = GPART(dest); + uint32_t dest_b = BPART(dest); + if (ModeT::BlendDest == STYLEALPHA_One) + { + dest_r <<= 8; + dest_g <<= 8; + dest_b <<= 8; + } + else + { + uint32_t dfactor = 256 - sfactor; + dest_r *= dfactor; + dest_g *= dfactor; + dest_b *= dfactor; + } + + uint32_t out_r, out_g, out_b; + if (ModeT::BlendOp == STYLEOP_Add) + { + if (ModeT::BlendDest == STYLEALPHA_One) + { + out_r = MIN((dest_r + src_r + 128) >> 8, 255); + out_g = MIN((dest_g + src_g + 128) >> 8, 255); + out_b = MIN((dest_b + src_b + 128) >> 8, 255); + } + else + { + out_r = (dest_r + src_r + 128) >> 8; + out_g = (dest_g + src_g + 128) >> 8; + out_b = (dest_b + src_b + 128) >> 8; + } + } + else if (ModeT::BlendOp == STYLEOP_RevSub) + { + out_r = MAX(static_cast(dest_r - src_r + 128) >> 8, 0); + out_g = MAX(static_cast(dest_g - src_g + 128) >> 8, 0); + out_b = MAX(static_cast(dest_b - src_b + 128) >> 8, 0); + } + else //if (ModeT::BlendOp == STYLEOP_Sub) + { + out_r = MAX(static_cast(src_r - dest_r + 128) >> 8, 0); + out_g = MAX(static_cast(src_g - dest_g + 128) >> 8, 0); + out_b = MAX(static_cast(src_b - dest_b + 128) >> 8, 0); + } + + destLine[x] = RGB256k.All[((out_r >> 2) << 12) | ((out_g >> 2) << 6) | (out_b >> 2)]; + } + } + + posU += stepU; + } + } +} + +template +void DrawRectOpt32(const void *destOrg, int destWidth, int destHeight, int destPitch, const RectDrawArgs *args, PolyTriangleThreadData *thread) +{ + using namespace TriScreenDrawerModes; + + int x0 = clamp((int)(args->X0() + 0.5f), 0, destWidth); + int x1 = clamp((int)(args->X1() + 0.5f), 0, destWidth); + int y0 = clamp((int)(args->Y0() + 0.5f), 0, destHeight); + int y1 = clamp((int)(args->Y1() + 0.5f), 0, destHeight); + + if (x1 <= x0 || y1 <= y0) + return; + + const uint32_t *texPixels, *translation; + int texWidth, texHeight; + uint32_t fillcolor; + int alpha; + uint32_t light; + uint32_t shade_fade_r, shade_fade_g, shade_fade_b, shade_light_r, shade_light_g, shade_light_b, desaturate, inv_desaturate; + + texPixels = (const uint32_t*)args->TexturePixels(); + translation = (const uint32_t*)args->Translation(); + texWidth = args->TextureWidth(); + texHeight = args->TextureHeight(); + fillcolor = args->Color(); + alpha = args->Alpha(); + light = args->Light(); + light += light >> 7; // 255 -> 256 + + if (OptT::Flags & SWOPT_ColoredFog) + { + shade_fade_r = args->ShadeFadeRed(); + shade_fade_g = args->ShadeFadeGreen(); + shade_fade_b = args->ShadeFadeBlue(); + shade_light_r = args->ShadeLightRed(); + shade_light_g = args->ShadeLightGreen(); + shade_light_b = args->ShadeLightBlue(); + desaturate = args->ShadeDesaturate(); + inv_desaturate = 256 - desaturate; + } + + float fstepU = (args->U1() - args->U0()) / (args->X1() - args->X0()); + float fstepV = (args->V1() - args->V0()) / (args->Y1() - args->Y0()); + uint32_t startU = (int32_t)((args->U0() + (x0 + 0.5f - args->X0()) * fstepU) * 0x1000000); + uint32_t startV = (int32_t)((args->V0() + (y0 + 0.5f - args->Y0()) * fstepV) * 0x1000000); + uint32_t stepU = (int32_t)(fstepU * 0x1000000); + uint32_t stepV = (int32_t)(fstepV * 0x1000000); + + uint32_t posV = startV; + int num_cores = thread->num_cores; + int skip = thread->skipped_by_thread(y0); + posV += skip * stepV; + stepV *= num_cores; + for (int y = y0 + skip; y < y1; y += num_cores, posV += stepV) + { + uint32_t *destLine = ((uint32_t*)destOrg) + y * destPitch; + + uint32_t posU = startU; + for (int x = x0; x < x1; x++) + { + uint32_t fg = 0; + + if (ModeT::SWFlags & SWSTYLEF_Fill) + { + fg = fillcolor; + } + else if (ModeT::SWFlags & SWSTYLEF_FogBoundary) + { + fg = destLine[x]; + } + else if (ModeT::BlendOp != STYLEOP_Fuzz) + { + uint32_t texelX = (((posU << 8) >> 16) * texWidth) >> 16; + uint32_t texelY = (((posV << 8) >> 16) * texHeight) >> 16; + + if (ModeT::SWFlags & SWSTYLEF_Translated) + { + fg = translation[((const uint8_t*)texPixels)[texelX * texHeight + texelY]]; + } + else if (ModeT::Flags & STYLEF_RedIsAlpha) + { + fg = ((const uint8_t*)texPixels)[texelX * texHeight + texelY]; + } + else + { + fg = texPixels[texelX * texHeight + texelY]; + } + } + + if (ModeT::BlendOp == STYLEOP_Fuzz) + { + using namespace swrenderer; + + uint32_t texelX = (((posU << 8) >> 16) * texWidth) >> 16; + uint32_t texelY = (((posV << 8) >> 16) * texHeight) >> 16; + unsigned int sampleshadeout = APART(texPixels[texelX * texHeight + texelY]); + sampleshadeout += sampleshadeout >> 7; // 255 -> 256 + + fixed_t fuzzscale = (200 << FRACBITS) / viewheight; + + int scaled_x = (x * fuzzscale) >> FRACBITS; + int fuzz_x = fuzz_random_x_offset[scaled_x % FUZZ_RANDOM_X_SIZE] + fuzzpos; + + fixed_t fuzzcount = FUZZTABLE << FRACBITS; + fixed_t fuzz = ((fuzz_x << FRACBITS) + y * fuzzscale) % fuzzcount; + unsigned int alpha = fuzzoffset[fuzz >> FRACBITS]; + + sampleshadeout = (sampleshadeout * alpha) >> 5; + + uint32_t a = 256 - sampleshadeout; + + uint32_t dest = destLine[x]; + uint32_t out_r = (RPART(dest) * a) >> 8; + uint32_t out_g = (GPART(dest) * a) >> 8; + uint32_t out_b = (BPART(dest) * a) >> 8; + destLine[x] = MAKEARGB(255, out_r, out_g, out_b); + } + else + { + if ((ModeT::Flags & STYLEF_ColorIsFixed) && !(ModeT::SWFlags & SWSTYLEF_Fill)) + { + if (ModeT::Flags & STYLEF_RedIsAlpha) + fg = (fg << 24) | (fillcolor & 0x00ffffff); + else + fg = (fg & 0xff000000) | (fillcolor & 0x00ffffff); + } + + uint32_t fgalpha = fg >> 24; + + if (!(ModeT::Flags & STYLEF_Alpha1)) + { + fgalpha = (fgalpha * alpha) >> 8; + } + + int lightshade = light; + + uint32_t lit_r = 0, lit_g = 0, lit_b = 0; + + uint32_t shadedfg_r, shadedfg_g, shadedfg_b; + if (OptT::Flags & SWOPT_ColoredFog) + { + uint32_t fg_r = RPART(fg); + uint32_t fg_g = GPART(fg); + uint32_t fg_b = BPART(fg); + uint32_t intensity = ((fg_r * 77 + fg_g * 143 + fg_b * 37) >> 8) * desaturate; + shadedfg_r = (((shade_fade_r + ((fg_r * inv_desaturate + intensity) >> 8) * lightshade) >> 8) * shade_light_r) >> 8; + shadedfg_g = (((shade_fade_g + ((fg_g * inv_desaturate + intensity) >> 8) * lightshade) >> 8) * shade_light_g) >> 8; + shadedfg_b = (((shade_fade_b + ((fg_b * inv_desaturate + intensity) >> 8) * lightshade) >> 8) * shade_light_b) >> 8; + } + else + { + shadedfg_r = (RPART(fg) * lightshade) >> 8; + shadedfg_g = (GPART(fg) * lightshade) >> 8; + shadedfg_b = (BPART(fg) * lightshade) >> 8; + } + + if (ModeT::BlendSrc == STYLEALPHA_One && ModeT::BlendDest == STYLEALPHA_Zero) + { + destLine[x] = MAKEARGB(255, shadedfg_r, shadedfg_g, shadedfg_b); + } + else if (ModeT::BlendSrc == STYLEALPHA_One && ModeT::BlendDest == STYLEALPHA_One) + { + uint32_t dest = destLine[x]; + + if (ModeT::BlendOp == STYLEOP_Add) + { + uint32_t out_r = MIN(RPART(dest) + shadedfg_r, 255); + uint32_t out_g = MIN(GPART(dest) + shadedfg_g, 255); + uint32_t out_b = MIN(BPART(dest) + shadedfg_b, 255); + destLine[x] = MAKEARGB(255, out_r, out_g, out_b); + } + else if (ModeT::BlendOp == STYLEOP_RevSub) + { + uint32_t out_r = MAX(RPART(dest) - shadedfg_r, 0); + uint32_t out_g = MAX(GPART(dest) - shadedfg_g, 0); + uint32_t out_b = MAX(BPART(dest) - shadedfg_b, 0); + destLine[x] = MAKEARGB(255, out_r, out_g, out_b); + } + else //if (ModeT::BlendOp == STYLEOP_Sub) + { + uint32_t out_r = MAX(shadedfg_r - RPART(dest), 0); + uint32_t out_g = MAX(shadedfg_g - GPART(dest), 0); + uint32_t out_b = MAX(shadedfg_b - BPART(dest), 0); + destLine[x] = MAKEARGB(255, out_r, out_g, out_b); + } + } + else if (ModeT::SWFlags & SWSTYLEF_SrcColorOneMinusSrcColor) + { + uint32_t dest = destLine[x]; + + uint32_t sfactor_r = shadedfg_r; sfactor_r += sfactor_r >> 7; // 255 -> 256 + uint32_t sfactor_g = shadedfg_g; sfactor_g += sfactor_g >> 7; // 255 -> 256 + uint32_t sfactor_b = shadedfg_b; sfactor_b += sfactor_b >> 7; // 255 -> 256 + uint32_t sfactor_a = fgalpha; sfactor_a += sfactor_a >> 7; // 255 -> 256 + uint32_t dfactor_r = 256 - sfactor_r; + uint32_t dfactor_g = 256 - sfactor_g; + uint32_t dfactor_b = 256 - sfactor_b; + uint32_t out_r = (RPART(dest) * dfactor_r + shadedfg_r * sfactor_r + 128) >> 8; + uint32_t out_g = (GPART(dest) * dfactor_g + shadedfg_g * sfactor_g + 128) >> 8; + uint32_t out_b = (BPART(dest) * dfactor_b + shadedfg_b * sfactor_b + 128) >> 8; + + destLine[x] = MAKEARGB(255, out_r, out_g, out_b); + } + else if (ModeT::BlendSrc == STYLEALPHA_Src && ModeT::BlendDest == STYLEALPHA_InvSrc && fgalpha == 255) + { + destLine[x] = MAKEARGB(255, shadedfg_r, shadedfg_g, shadedfg_b); + } + else if (ModeT::BlendSrc != STYLEALPHA_Src || ModeT::BlendDest != STYLEALPHA_InvSrc || fgalpha != 0) + { + uint32_t dest = destLine[x]; + + uint32_t sfactor = fgalpha; sfactor += sfactor >> 7; // 255 -> 256 + uint32_t src_r = shadedfg_r * sfactor; + uint32_t src_g = shadedfg_g * sfactor; + uint32_t src_b = shadedfg_b * sfactor; + uint32_t dest_r = RPART(dest); + uint32_t dest_g = GPART(dest); + uint32_t dest_b = BPART(dest); + if (ModeT::BlendDest == STYLEALPHA_One) + { + dest_r <<= 8; + dest_g <<= 8; + dest_b <<= 8; + } + else + { + uint32_t dfactor = 256 - sfactor; + dest_r *= dfactor; + dest_g *= dfactor; + dest_b *= dfactor; + } + + uint32_t out_r, out_g, out_b; + if (ModeT::BlendOp == STYLEOP_Add) + { + if (ModeT::BlendDest == STYLEALPHA_One) + { + out_r = MIN((dest_r + src_r + 128) >> 8, 255); + out_g = MIN((dest_g + src_g + 128) >> 8, 255); + out_b = MIN((dest_b + src_b + 128) >> 8, 255); + } + else + { + out_r = (dest_r + src_r + 128) >> 8; + out_g = (dest_g + src_g + 128) >> 8; + out_b = (dest_b + src_b + 128) >> 8; + } + } + else if (ModeT::BlendOp == STYLEOP_RevSub) + { + out_r = MAX(static_cast(dest_r - src_r + 128) >> 8, 0); + out_g = MAX(static_cast(dest_g - src_g + 128) >> 8, 0); + out_b = MAX(static_cast(dest_b - src_b + 128) >> 8, 0); + } + else //if (ModeT::BlendOp == STYLEOP_Sub) + { + out_r = MAX(static_cast(src_r - dest_r + 128) >> 8, 0); + out_g = MAX(static_cast(src_g - dest_g + 128) >> 8, 0); + out_b = MAX(static_cast(src_b - dest_b + 128) >> 8, 0); + } + + destLine[x] = MAKEARGB(255, out_r, out_g, out_b); + } + } + + posU += stepU; + } + } +} + +template +void DrawRect32(const void *destOrg, int destWidth, int destHeight, int destPitch, const RectDrawArgs *args, PolyTriangleThreadData *thread) +{ + using namespace TriScreenDrawerModes; + + if (args->SimpleShade()) + DrawRectOpt32(destOrg, destWidth, destHeight, destPitch, args, thread); + else + DrawRectOpt32(destOrg, destWidth, destHeight, destPitch, args, thread); +} + void(*ScreenTriangle::SpanDrawers8[])(int, int, int, const TriDrawTriangleArgs *) = { &DrawSpan8, @@ -2274,58 +1854,66 @@ void(*ScreenTriangle::SpanDrawers32[])(int, int, int, const TriDrawTriangleArgs void(*ScreenTriangle::RectDrawers8[])(const void *, int, int, int, const RectDrawArgs *, PolyTriangleThreadData *) = { - &RectScreenDrawer8::Execute, // TextureOpaque - &RectScreenDrawer8::Execute, // TextureMasked - &RectScreenDrawer8::Execute, // TextureAdd - &RectScreenDrawer8::Execute, // TextureSub - &RectScreenDrawer8::Execute, // TextureRevSub - &RectScreenDrawer8::Execute, // TextureAddSrcColor - &RectScreenDrawer8::Execute, // TranslatedOpaque - &RectScreenDrawer8::Execute, // TranslatedMasked - &RectScreenDrawer8::Execute, // TranslatedAdd - &RectScreenDrawer8::Execute, // TranslatedSub - &RectScreenDrawer8::Execute, // TranslatedRevSub - &RectScreenDrawer8::Execute, // TranslatedAddSrcColor - &RectScreenDrawer8::Execute, // Shaded - &RectScreenDrawer8::Execute, // AddShaded - &RectScreenDrawer8::Execute, // Stencil - &RectScreenDrawer8::Execute, // AddStencil - &RectScreenDrawer8::Execute, // FillOpaque - &RectScreenDrawer8::Execute, // FillAdd - &RectScreenDrawer8::Execute, // FillSub - &RectScreenDrawer8::Execute, // FillRevSub - &RectScreenDrawer8::Execute, // FillAddSrcColor - &RectScreenDrawer8::Execute, // Skycap - &RectScreenDrawer8::Execute, // Fuzz - &RectScreenDrawer8::Execute // FogBoundary + &DrawRect8, + &DrawRect8, + &DrawRect8, + &DrawRect8, + &DrawRect8, + &DrawRect8, + &DrawRect8, + &DrawRect8, + &DrawRect8, + &DrawRect8, + &DrawRect8, + &DrawRect8, + &DrawRect8, + &DrawRect8, + &DrawRect8, + &DrawRect8, + &DrawRect8, + &DrawRect8, + &DrawRect8, + &DrawRect8, + &DrawRect8, + &DrawRect8, + &DrawRect8, + &DrawRect8, + &DrawRect8, + &DrawRect8, + &DrawRect8, + &DrawRect8 }; void(*ScreenTriangle::RectDrawers32[])(const void *, int, int, int, const RectDrawArgs *, PolyTriangleThreadData *) = { - &RectScreenDrawer32::Execute, // TextureOpaque - &RectScreenDrawer32::Execute, // TextureMasked - &RectScreenDrawer32::Execute, // TextureAdd - &RectScreenDrawer32::Execute, // TextureSub - &RectScreenDrawer32::Execute, // TextureRevSub - &RectScreenDrawer32::Execute, // TextureAddSrcColor - &RectScreenDrawer32::Execute, // TranslatedOpaque - &RectScreenDrawer32::Execute, // TranslatedMasked - &RectScreenDrawer32::Execute, // TranslatedAdd - &RectScreenDrawer32::Execute, // TranslatedSub - &RectScreenDrawer32::Execute, // TranslatedRevSub - &RectScreenDrawer32::Execute, // TranslatedAddSrcColor - &RectScreenDrawer32::Execute, // Shaded - &RectScreenDrawer32::Execute, // AddShaded - &RectScreenDrawer32::Execute, // Stencil - &RectScreenDrawer32::Execute, // AddStencil - &RectScreenDrawer32::Execute, // FillOpaque - &RectScreenDrawer32::Execute, // FillAdd - &RectScreenDrawer32::Execute, // FillSub - &RectScreenDrawer32::Execute, // FillRevSub - &RectScreenDrawer32::Execute, // FillAddSrcColor - &RectScreenDrawer32::Execute, // Skycap - &RectScreenDrawer32::Execute, // Fuzz - &RectScreenDrawer32::Execute, // FogBoundary + &DrawRect32, + &DrawRect32, + &DrawRect32, + &DrawRect32, + &DrawRect32, + &DrawRect32, + &DrawRect32, + &DrawRect32, + &DrawRect32, + &DrawRect32, + &DrawRect32, + &DrawRect32, + &DrawRect32, + &DrawRect32, + &DrawRect32, + &DrawRect32, + &DrawRect32, + &DrawRect32, + &DrawRect32, + &DrawRect32, + &DrawRect32, + &DrawRect32, + &DrawRect32, + &DrawRect32, + &DrawRect32, + &DrawRect32, + &DrawRect32, + &DrawRect32 }; int ScreenTriangle::FuzzStart = 0; diff --git a/src/polyrenderer/drawers/screen_triangle.h b/src/polyrenderer/drawers/screen_triangle.h index 036e4a55b..a52d897c2 100644 --- a/src/polyrenderer/drawers/screen_triangle.h +++ b/src/polyrenderer/drawers/screen_triangle.h @@ -52,9 +52,7 @@ struct TriDrawTriangleArgs ShadedTriVertex *v3; int32_t clipright; int32_t clipbottom; - uint8_t *stencilValues; - uint32_t *stencilMasks; - int32_t stencilPitch; + uint8_t *stencilbuffer; float *zbuffer; const PolyDrawArgs *uniforms; bool destBgra; @@ -138,39 +136,10 @@ enum class TriBlendMode AddShadedTranslated }; -enum class RectBlendMode -{ - TextureOpaque, - TextureMasked, - TextureAdd, - TextureSub, - TextureRevSub, - TextureAddSrcColor, - TranslatedOpaque, - TranslatedMasked, - TranslatedAdd, - TranslatedSub, - TranslatedRevSub, - TranslatedAddSrcColor, - Shaded, - AddShaded, - Stencil, - AddStencil, - FillOpaque, - FillAdd, - FillSub, - FillRevSub, - FillAddSrcColor, - Skycap, - Fuzz, - FogBoundary -}; - class ScreenTriangle { public: static void Draw(const TriDrawTriangleArgs *args, PolyTriangleThreadData *thread); - static void DrawSWRender(const TriDrawTriangleArgs *args, PolyTriangleThreadData *thread); static void(*SpanDrawers8[])(int y, int x0, int x1, const TriDrawTriangleArgs *args); static void(*SpanDrawers32[])(int y, int x0, int x1, const TriDrawTriangleArgs *args); @@ -222,35 +191,6 @@ namespace TriScreenDrawerModes struct StyleAddStencilTranslated { static const int BlendOp = STYLEOP_Add, BlendSrc = STYLEALPHA_Src, BlendDest = STYLEALPHA_One, Flags = STYLEF_ColorIsFixed, SWFlags = SWSTYLEF_Translated; }; struct StyleAddShadedTranslated { static const int BlendOp = STYLEOP_Add, BlendSrc = STYLEALPHA_Src, BlendDest = STYLEALPHA_One, Flags = STYLEF_RedIsAlpha | STYLEF_ColorIsFixed, SWFlags = SWSTYLEF_Translated; }; - enum class BlendModes { Opaque, Masked, AddClamp, SubClamp, RevSubClamp, AddSrcColorOneMinusSrcColor, Shaded, AddClampShaded }; - struct OpaqueBlend { static const int Mode = (int)BlendModes::Opaque; }; - struct MaskedBlend { static const int Mode = (int)BlendModes::Masked; }; - struct AddClampBlend { static const int Mode = (int)BlendModes::AddClamp; }; - struct SubClampBlend { static const int Mode = (int)BlendModes::SubClamp; }; - struct RevSubClampBlend { static const int Mode = (int)BlendModes::RevSubClamp; }; - struct AddSrcColorBlend { static const int Mode = (int)BlendModes::AddSrcColorOneMinusSrcColor; }; - struct ShadedBlend { static const int Mode = (int)BlendModes::Shaded; }; - struct AddClampShadedBlend { static const int Mode = (int)BlendModes::AddClampShaded; }; - - enum class FilterModes { Nearest, Linear }; - struct NearestFilter { static const int Mode = (int)FilterModes::Nearest; }; - struct LinearFilter { static const int Mode = (int)FilterModes::Linear; }; - - enum class ShadeMode { None, Simple, Advanced }; - struct NoShade { static const int Mode = (int)ShadeMode::None; }; - struct SimpleShade { static const int Mode = (int)ShadeMode::Simple; }; - struct AdvancedShade { static const int Mode = (int)ShadeMode::Advanced; }; - - enum class Samplers { Texture, Fill, Shaded, Stencil, Translated, Skycap, Fuzz, FogBoundary }; - struct TextureSampler { static const int Mode = (int)Samplers::Texture; }; - struct FillSampler { static const int Mode = (int)Samplers::Fill; }; - struct ShadedSampler { static const int Mode = (int)Samplers::Shaded; }; - struct StencilSampler { static const int Mode = (int)Samplers::Stencil; }; - struct TranslatedSampler { static const int Mode = (int)Samplers::Translated; }; - struct SkycapSampler { static const int Mode = (int)Samplers::Skycap; }; - struct FuzzSampler { static const int Mode = (int)Samplers::Fuzz; }; - struct FogBoundarySampler { static const int Mode = (int)Samplers::FogBoundary; }; - enum SWOptFlags { SWOPT_DynLights = 1, diff --git a/src/polyrenderer/poly_renderer.cpp b/src/polyrenderer/poly_renderer.cpp index 6fee5d841..76d8a937e 100644 --- a/src/polyrenderer/poly_renderer.cpp +++ b/src/polyrenderer/poly_renderer.cpp @@ -201,11 +201,11 @@ void PolyRenderer::SetSceneViewport() height = (screenblocks*SCREENHEIGHT / 10) & ~7; int bottom = SCREENHEIGHT - (height + viewwindowy - ((height - viewheight) / 2)); - PolyTriangleDrawer::SetViewport(Threads.MainThread()->DrawQueue, viewwindowx, SCREENHEIGHT - bottom - height, viewwidth, height, RenderTarget, false); + PolyTriangleDrawer::SetViewport(Threads.MainThread()->DrawQueue, viewwindowx, SCREENHEIGHT - bottom - height, viewwidth, height, RenderTarget); } else // Rendering to camera texture { - PolyTriangleDrawer::SetViewport(Threads.MainThread()->DrawQueue, 0, 0, RenderTarget->GetWidth(), RenderTarget->GetHeight(), RenderTarget, false); + PolyTriangleDrawer::SetViewport(Threads.MainThread()->DrawQueue, 0, 0, RenderTarget->GetWidth(), RenderTarget->GetHeight(), RenderTarget); } } diff --git a/src/polyrenderer/scene/poly_model.cpp b/src/polyrenderer/scene/poly_model.cpp index 344749659..1a5aa842f 100644 --- a/src/polyrenderer/scene/poly_model.cpp +++ b/src/polyrenderer/scene/poly_model.cpp @@ -51,7 +51,7 @@ PolyModelRenderer::PolyModelRenderer(PolyRenderThread *thread, const Mat4f &worl { } -void PolyModelRenderer::BeginDrawModel(AActor *actor, FSpriteModelFrame *smf, const VSMatrix &objectToWorldMatrix) +void PolyModelRenderer::BeginDrawModel(AActor *actor, FSpriteModelFrame *smf, const VSMatrix &objectToWorldMatrix, bool mirrored) { ModelActor = actor; const_cast(objectToWorldMatrix).copy(ObjectToWorld.Matrix); @@ -59,12 +59,14 @@ void PolyModelRenderer::BeginDrawModel(AActor *actor, FSpriteModelFrame *smf, co if (actor->RenderStyle == LegacyRenderStyles[STYLE_Normal] || !!(smf->flags & MDL_DONTCULLBACKFACES)) PolyTriangleDrawer::SetTwoSided(Thread->DrawQueue, true); + PolyTriangleDrawer::SetCullCCW(Thread->DrawQueue, !mirrored); } void PolyModelRenderer::EndDrawModel(AActor *actor, FSpriteModelFrame *smf) { if (actor->RenderStyle == LegacyRenderStyles[STYLE_Normal] || !!(smf->flags & MDL_DONTCULLBACKFACES)) PolyTriangleDrawer::SetTwoSided(Thread->DrawQueue, false); + PolyTriangleDrawer::SetCullCCW(Thread->DrawQueue, true); ModelActor = nullptr; } @@ -98,7 +100,7 @@ VSMatrix PolyModelRenderer::GetViewToWorldMatrix() return objectToWorld; } -void PolyModelRenderer::BeginDrawHUDModel(AActor *actor, const VSMatrix &objectToWorldMatrix) +void PolyModelRenderer::BeginDrawHUDModel(AActor *actor, const VSMatrix &objectToWorldMatrix, bool mirrored) { ModelActor = actor; const_cast(objectToWorldMatrix).copy(ObjectToWorld.Matrix); @@ -107,6 +109,7 @@ void PolyModelRenderer::BeginDrawHUDModel(AActor *actor, const VSMatrix &objectT if (actor->RenderStyle == LegacyRenderStyles[STYLE_Normal]) PolyTriangleDrawer::SetTwoSided(Thread->DrawQueue, true); + PolyTriangleDrawer::SetCullCCW(Thread->DrawQueue, mirrored); } void PolyModelRenderer::EndDrawHUDModel(AActor *actor) @@ -116,6 +119,7 @@ void PolyModelRenderer::EndDrawHUDModel(AActor *actor) if (actor->RenderStyle == LegacyRenderStyles[STYLE_Normal]) PolyTriangleDrawer::SetTwoSided(Thread->DrawQueue, false); + PolyTriangleDrawer::SetCullCCW(Thread->DrawQueue, true); } void PolyModelRenderer::SetInterpolation(double interpolation) diff --git a/src/polyrenderer/scene/poly_model.h b/src/polyrenderer/scene/poly_model.h index cbfafce59..6e6ec57bc 100644 --- a/src/polyrenderer/scene/poly_model.h +++ b/src/polyrenderer/scene/poly_model.h @@ -36,13 +36,13 @@ public: ModelRendererType GetType() const override { return PolyModelRendererType; } - void BeginDrawModel(AActor *actor, FSpriteModelFrame *smf, const VSMatrix &objectToWorldMatrix) override; + void BeginDrawModel(AActor *actor, FSpriteModelFrame *smf, const VSMatrix &objectToWorldMatrix, bool mirrored) override; void EndDrawModel(AActor *actor, FSpriteModelFrame *smf) override; IModelVertexBuffer *CreateVertexBuffer(bool needindex, bool singleframe) override; void SetVertexBuffer(IModelVertexBuffer *buffer) override; void ResetVertexBuffer() override; VSMatrix GetViewToWorldMatrix() override; - void BeginDrawHUDModel(AActor *actor, const VSMatrix &objectToWorldMatrix) override; + void BeginDrawHUDModel(AActor *actor, const VSMatrix &objectToWorldMatrix, bool mirrored) override; void EndDrawHUDModel(AActor *actor) override; void SetInterpolation(double interpolation) override; void SetMaterial(FTexture *skin, bool clampNoFilter, int translation) override; diff --git a/src/posix/cocoa/i_common.h b/src/posix/cocoa/i_common.h index be395cf76..f60d82ced 100644 --- a/src/posix/cocoa/i_common.h +++ b/src/posix/cocoa/i_common.h @@ -37,22 +37,6 @@ #import -struct RenderBufferOptions -{ - float pixelScale; - - float shiftX; - float shiftY; - - float width; - float height; - - bool dirty; -}; - -extern RenderBufferOptions rbOpts; - - // Version of AppKit framework we are interested in // The following values are needed to build with earlier SDKs diff --git a/src/posix/cocoa/i_input.mm b/src/posix/cocoa/i_input.mm index 745b0c6dc..78841d906 100644 --- a/src/posix/cocoa/i_input.mm +++ b/src/posix/cocoa/i_input.mm @@ -484,11 +484,14 @@ void NSEventToGameMousePosition(NSEvent* inEvent, event_t* outEvent) const NSPoint viewPos = [view convertPointToBacking:windowRect.origin]; const CGFloat frameHeight = I_GetContentViewSize(window).height; - const CGFloat posX = ( viewPos.x - rbOpts.shiftX) / rbOpts.pixelScale; - const CGFloat posY = (frameHeight - viewPos.y - rbOpts.shiftY) / rbOpts.pixelScale; + outEvent->data1 = static_cast( viewPos.x); + outEvent->data2 = static_cast(frameHeight - viewPos.y); - outEvent->data1 = static_cast(posX); - outEvent->data2 = static_cast(posY); + // Compensate letterbox adjustment done by cross-platform code + // More elegant solution is a bit problematic due to HiDPI/Retina support + outEvent->data2 += (screen->GetTrueHeight() - screen->VideoHeight) / 2; + + screen->ScaleCoordsFromWindow(outEvent->data1, outEvent->data2); } void ProcessMouseMoveInMenu(NSEvent* theEvent) diff --git a/src/posix/cocoa/i_video.mm b/src/posix/cocoa/i_video.mm index 5104a043c..8593f86e7 100644 --- a/src/posix/cocoa/i_video.mm +++ b/src/posix/cocoa/i_video.mm @@ -115,9 +115,6 @@ CUSTOM_CVAR(Bool, vid_autoswitch, true, CVAR_ARCHIVE | CVAR_GLOBALCONFIG | CVAR_ EXTERN_CVAR(Bool, gl_smooth_rendered) -RenderBufferOptions rbOpts; - - // --------------------------------------------------------------------------- @@ -544,20 +541,6 @@ void CocoaVideo::SetFullscreenMode(const int width, const int height) ? [screen convertRectToBacking:screenFrame] : screenFrame; - const float displayWidth = displayRect.size.width; - const float displayHeight = displayRect.size.height; - - const float pixelScaleFactorX = displayWidth / static_cast(width ); - const float pixelScaleFactorY = displayHeight / static_cast(height); - - rbOpts.pixelScale = MIN(pixelScaleFactorX, pixelScaleFactorY); - - rbOpts.width = width * rbOpts.pixelScale; - rbOpts.height = height * rbOpts.pixelScale; - - rbOpts.shiftX = (displayWidth - rbOpts.width ) / 2.0f; - rbOpts.shiftY = (displayHeight - rbOpts.height) / 2.0f; - if (!m_fullscreen) { [m_window setLevel:LEVEL_FULLSCREEN]; @@ -571,14 +554,6 @@ void CocoaVideo::SetFullscreenMode(const int width, const int height) void CocoaVideo::SetWindowedMode(const int width, const int height) { - rbOpts.pixelScale = 1.0f; - - rbOpts.width = static_cast(width ); - rbOpts.height = static_cast(height); - - rbOpts.shiftX = 0.0f; - rbOpts.shiftY = 0.0f; - const NSSize windowPixelSize = NSMakeSize(width, height); const NSSize windowSize = vid_hidpi ? [[m_window contentView] convertSizeFromBacking:windowPixelSize] @@ -621,8 +596,6 @@ void CocoaVideo::SetMode(const int width, const int height, const bool fullscree SetWindowedMode(width, height); } - rbOpts.dirty = true; - const NSSize viewSize = I_GetContentViewSize(m_window); glViewport(0, 0, static_cast(viewSize.width), static_cast(viewSize.height)); diff --git a/src/r_data/models/models.cpp b/src/r_data/models/models.cpp index b8b324524..316281f65 100644 --- a/src/r_data/models/models.cpp +++ b/src/r_data/models/models.cpp @@ -94,7 +94,7 @@ void FModelRenderer::RenderModel(float x, float y, float z, FSpriteModelFrame *s if (smf->flags & MDL_ROTATING) { - if (smf->rotationSpeed > 0.0000000001) + if (smf->rotationSpeed > 0.0000000001 || smf->rotationSpeed < -0.0000000001) { double turns = (I_GetTime() + I_GetTimeFrac()) / (200.0 / smf->rotationSpeed); turns -= floor(turns); @@ -171,7 +171,9 @@ void FModelRenderer::RenderModel(float x, float y, float z, FSpriteModelFrame *s float stretch = (smf->modelIDs[0] != -1 ? Models[smf->modelIDs[0]]->getAspectFactor() : 1.f) / level.info->pixelstretch; objectToWorldMatrix.scale(1, stretch, 1); - BeginDrawModel(actor, smf, objectToWorldMatrix); + float orientation = scaleFactorX * scaleFactorY * scaleFactorZ; + + BeginDrawModel(actor, smf, objectToWorldMatrix, orientation < 0); RenderFrameModels(smf, actor->state, actor->tics, actor->GetClass(), translation); EndDrawModel(actor, smf); } @@ -207,7 +209,9 @@ void FModelRenderer::RenderHUDModel(DPSprite *psp, float ofsX, float ofsY) objectToWorldMatrix.rotate(smf->pitchoffset, 0, 0, 1); objectToWorldMatrix.rotate(-smf->rolloffset, 1, 0, 0); - BeginDrawHUDModel(playermo, objectToWorldMatrix); + float orientation = smf->xscale * smf->yscale * smf->zscale; + + BeginDrawHUDModel(playermo, objectToWorldMatrix, orientation < 0); RenderFrameModels(smf, psp->GetState(), psp->GetTics(), playermo->player->ReadyWeapon->GetClass(), 0); EndDrawHUDModel(playermo); } diff --git a/src/r_data/models/models.h b/src/r_data/models/models.h index b0d0f0230..bb5fb3b84 100644 --- a/src/r_data/models/models.h +++ b/src/r_data/models/models.h @@ -64,7 +64,7 @@ public: virtual ModelRendererType GetType() const = 0; - virtual void BeginDrawModel(AActor *actor, FSpriteModelFrame *smf, const VSMatrix &objectToWorldMatrix) = 0; + virtual void BeginDrawModel(AActor *actor, FSpriteModelFrame *smf, const VSMatrix &objectToWorldMatrix, bool mirrored) = 0; virtual void EndDrawModel(AActor *actor, FSpriteModelFrame *smf) = 0; virtual IModelVertexBuffer *CreateVertexBuffer(bool needindex, bool singleframe) = 0; @@ -74,7 +74,7 @@ public: virtual VSMatrix GetViewToWorldMatrix() = 0; - virtual void BeginDrawHUDModel(AActor *actor, const VSMatrix &objectToWorldMatrix) = 0; + virtual void BeginDrawHUDModel(AActor *actor, const VSMatrix &objectToWorldMatrix, bool mirrored) = 0; virtual void EndDrawHUDModel(AActor *actor) = 0; virtual void SetInterpolation(double interpolation) = 0; diff --git a/src/resourcefiles/file_zip.cpp b/src/resourcefiles/file_zip.cpp index e0cf4df91..b3bfa7a2d 100644 --- a/src/resourcefiles/file_zip.cpp +++ b/src/resourcefiles/file_zip.cpp @@ -40,6 +40,7 @@ #include "v_text.h" #include "w_wad.h" #include "w_zip.h" +#include "i_system.h" #include "ancientzip.h" #define BUFREADCOMMENT (0x400) diff --git a/src/scripting/thingdef_data.cpp b/src/scripting/thingdef_data.cpp index 002c0a81a..1416cbc7a 100644 --- a/src/scripting/thingdef_data.cpp +++ b/src/scripting/thingdef_data.cpp @@ -316,6 +316,7 @@ static FFlagDef ActorFlagDefs[]= DEFINE_FLAG(MF7, FORCEINFIGHTING, AActor, flags7), DEFINE_FLAG(MF8, FRIGHTENING, AActor, flags8), + DEFINE_FLAG(MF8, BLOCKASPLAYER, AActor, flags8), // Effect flags DEFINE_FLAG(FX, VISIBILITYPULSE, AActor, effects), diff --git a/src/swrenderer/drawers/r_draw.cpp b/src/swrenderer/drawers/r_draw.cpp index 9049c949a..8e260daa7 100644 --- a/src/swrenderer/drawers/r_draw.cpp +++ b/src/swrenderer/drawers/r_draw.cpp @@ -272,7 +272,7 @@ namespace swrenderer void Execute(DrawerThread *thread) override { auto zbuffer = PolyZBuffer::Instance(); - int pitch = PolyStencilBuffer::Instance()->BlockWidth() * 8; + int pitch = PolyStencilBuffer::Instance()->Width(); float *values = zbuffer->Values() + y * pitch + x; int cnt = count; @@ -316,7 +316,7 @@ namespace swrenderer return; auto zbuffer = PolyZBuffer::Instance(); - int pitch = PolyStencilBuffer::Instance()->BlockWidth() * 8; + int pitch = PolyStencilBuffer::Instance()->Width(); float *values = zbuffer->Values() + y * pitch; int end = x2; diff --git a/src/swrenderer/scene/r_scene.cpp b/src/swrenderer/scene/r_scene.cpp index a52d271a3..d51eef63a 100644 --- a/src/swrenderer/scene/r_scene.cpp +++ b/src/swrenderer/scene/r_scene.cpp @@ -259,7 +259,7 @@ namespace swrenderer thread->OpaquePass->ResetFakingUnderwater(); // [RH] Hack to make windows into underwater areas possible thread->Portal->SetMainPortal(); - PolyTriangleDrawer::SetViewport(thread->DrawQueue, viewwindowx, viewwindowy, viewwidth, viewheight, thread->Viewport->RenderTarget, true); + PolyTriangleDrawer::SetViewport(thread->DrawQueue, viewwindowx, viewwindowy, viewwidth, viewheight, thread->Viewport->RenderTarget); // Cull things outside the range seen by this thread VisibleSegmentRenderer visitor; diff --git a/src/swrenderer/things/r_model.cpp b/src/swrenderer/things/r_model.cpp index 025263e34..d3eac67e0 100644 --- a/src/swrenderer/things/r_model.cpp +++ b/src/swrenderer/things/r_model.cpp @@ -82,7 +82,7 @@ namespace swrenderer { } - void SWModelRenderer::BeginDrawModel(AActor *actor, FSpriteModelFrame *smf, const VSMatrix &objectToWorldMatrix) + void SWModelRenderer::BeginDrawModel(AActor *actor, FSpriteModelFrame *smf, const VSMatrix &objectToWorldMatrix, bool mirrored) { ModelActor = actor; const_cast(objectToWorldMatrix).copy(ObjectToWorld.Matrix); @@ -123,12 +123,14 @@ namespace swrenderer if (actor->RenderStyle == LegacyRenderStyles[STYLE_Normal] || !!(smf->flags & MDL_DONTCULLBACKFACES)) PolyTriangleDrawer::SetTwoSided(Thread->DrawQueue, true); + PolyTriangleDrawer::SetCullCCW(Thread->DrawQueue, !mirrored); } void SWModelRenderer::EndDrawModel(AActor *actor, FSpriteModelFrame *smf) { if (actor->RenderStyle == LegacyRenderStyles[STYLE_Normal] || !!(smf->flags & MDL_DONTCULLBACKFACES)) PolyTriangleDrawer::SetTwoSided(Thread->DrawQueue, false); + PolyTriangleDrawer::SetCullCCW(Thread->DrawQueue, true); ModelActor = nullptr; } @@ -181,7 +183,7 @@ namespace swrenderer return objectToWorld; } - void SWModelRenderer::BeginDrawHUDModel(AActor *actor, const VSMatrix &objectToWorldMatrix) + void SWModelRenderer::BeginDrawHUDModel(AActor *actor, const VSMatrix &objectToWorldMatrix, bool mirrored) { ModelActor = actor; const_cast(objectToWorldMatrix).copy(ObjectToWorld.Matrix); @@ -192,6 +194,7 @@ namespace swrenderer if (actor->RenderStyle == LegacyRenderStyles[STYLE_Normal]) PolyTriangleDrawer::SetTwoSided(Thread->DrawQueue, true); + PolyTriangleDrawer::SetCullCCW(Thread->DrawQueue, mirrored); } void SWModelRenderer::EndDrawHUDModel(AActor *actor) @@ -201,6 +204,7 @@ namespace swrenderer if (actor->RenderStyle == LegacyRenderStyles[STYLE_Normal]) PolyTriangleDrawer::SetTwoSided(Thread->DrawQueue, false); + PolyTriangleDrawer::SetCullCCW(Thread->DrawQueue, true); } void SWModelRenderer::SetInterpolation(double interpolation) diff --git a/src/swrenderer/things/r_model.h b/src/swrenderer/things/r_model.h index 004636e69..ee6608358 100644 --- a/src/swrenderer/things/r_model.h +++ b/src/swrenderer/things/r_model.h @@ -56,13 +56,13 @@ namespace swrenderer ModelRendererType GetType() const override { return SWModelRendererType; } - void BeginDrawModel(AActor *actor, FSpriteModelFrame *smf, const VSMatrix &objectToWorldMatrix) override; + void BeginDrawModel(AActor *actor, FSpriteModelFrame *smf, const VSMatrix &objectToWorldMatrix, bool mirrored) override; void EndDrawModel(AActor *actor, FSpriteModelFrame *smf) override; IModelVertexBuffer *CreateVertexBuffer(bool needindex, bool singleframe) override; void SetVertexBuffer(IModelVertexBuffer *buffer) override; void ResetVertexBuffer() override; VSMatrix GetViewToWorldMatrix() override; - void BeginDrawHUDModel(AActor *actor, const VSMatrix &objectToWorldMatrix) override; + void BeginDrawHUDModel(AActor *actor, const VSMatrix &objectToWorldMatrix, bool mirrored) override; void EndDrawHUDModel(AActor *actor) override; void SetInterpolation(double interpolation) override; void SetMaterial(FTexture *skin, bool clampNoFilter, int translation) override; diff --git a/src/v_2ddrawer.cpp b/src/v_2ddrawer.cpp index ada1b5878..bbf9105fe 100644 --- a/src/v_2ddrawer.cpp +++ b/src/v_2ddrawer.cpp @@ -323,12 +323,16 @@ void F2DDrawer::AddPoly(FTexture *texture, FVector2 *points, int npoints, // Convert a light level into an unbounded colormap index (shade). // Why the +12? I wish I knew, but experimentation indicates it // is necessary in order to best reproduce Doom's original lighting. - double map = (NUMCOLORMAPS * 2.) - ((lightlevel + 12) * (NUMCOLORMAPS / 128.)); - double fadelevel = clamp((map - 12) / NUMCOLORMAPS, 0.0, 1.0); - // handle the brighter light modes of the hardware renderer. - if (vid_rendermode == 4 && (level.lightmode < 2 || level.lightmode == 4)) + double fadelevel; + + if (vid_rendermode != 4 || (level.lightmode >= 2 && level.lightmode != 4)) { - fadelevel = pow(fadelevel, 1.3); + double map = (NUMCOLORMAPS * 2.) - ((lightlevel + 12) * (NUMCOLORMAPS / 128.)); + fadelevel = clamp((map - 12) / NUMCOLORMAPS, 0.0, 1.0); + } + else + { + fadelevel = 1. - clamp(lightlevel, 0, 255) / 255.f; } RenderCommand poly; diff --git a/src/v_video.cpp b/src/v_video.cpp index 1441a1966..0f754d4c3 100644 --- a/src/v_video.cpp +++ b/src/v_video.cpp @@ -1005,7 +1005,7 @@ void DFrameBuffer::WriteSavePic(player_t *player, FileWriter *file, int width, i // //========================================================================== -void DFrameBuffer::SetOutputViewport(IntRect *bounds) +void DFrameBuffer::SetViewportRects(IntRect *bounds) { if (bounds) { diff --git a/src/v_video.h b/src/v_video.h index f88a65e65..66e38fca5 100644 --- a/src/v_video.h +++ b/src/v_video.h @@ -501,7 +501,7 @@ public: // Calculate gamma table void CalcGamma(float gamma, uint8_t gammalookup[256]); - virtual void SetOutputViewport(IntRect *bounds); + virtual void SetViewportRects(IntRect *bounds); int ScreenToWindowX(int x); int ScreenToWindowY(int y); diff --git a/wadsrc/static/zscript/doom/weaponfist.txt b/wadsrc/static/zscript/doom/weaponfist.txt index 6f106e14a..ae0eedddc 100644 --- a/wadsrc/static/zscript/doom/weaponfist.txt +++ b/wadsrc/static/zscript/doom/weaponfist.txt @@ -65,7 +65,7 @@ extend class Actor damage *= 10; double ang = angle + Random2[Punch]() * (5.625 / 256); - double pitch = AimLineAttack (ang, DEFMELEERANGE); + double pitch = AimLineAttack (ang, DEFMELEERANGE, null, 0., ALF_CHECK3D); LineAttack (ang, DEFMELEERANGE, pitch, damage, 'Melee', "BulletPuff", LAF_ISMELEEATTACK, t); diff --git a/wadsrc/static/zscript/hexen/clericmace.txt b/wadsrc/static/zscript/hexen/clericmace.txt index 2198e3cc4..adce6431a 100644 --- a/wadsrc/static/zscript/hexen/clericmace.txt +++ b/wadsrc/static/zscript/hexen/clericmace.txt @@ -66,7 +66,7 @@ class CWeapMace : ClericWeapon for (int j = 1; j >= -1; j -= 2) { double ang = angle + j*i*(45. / 16); - double slope = AimLineAttack(ang, 2 * DEFMELEERANGE, t); + double slope = AimLineAttack(ang, 2 * DEFMELEERANGE, t, 0., ALF_CHECK3D); if (t.linetarget) { LineAttack(ang, 2 * DEFMELEERANGE, slope, damage, 'Melee', "HammerPuff", true, t); @@ -81,7 +81,7 @@ class CWeapMace : ClericWeapon // didn't find any creatures, so try to strike any walls weaponspecial = 0; - double slope = AimLineAttack (angle, DEFMELEERANGE); + double slope = AimLineAttack (angle, DEFMELEERANGE, null, 0., ALF_CHECK3D); LineAttack (angle, DEFMELEERANGE, slope, damage, 'Melee', "HammerPuff"); } } diff --git a/wadsrc/static/zscript/hexen/fighteraxe.txt b/wadsrc/static/zscript/hexen/fighteraxe.txt index 61e509744..a89bd7e5b 100644 --- a/wadsrc/static/zscript/hexen/fighteraxe.txt +++ b/wadsrc/static/zscript/hexen/fighteraxe.txt @@ -245,7 +245,7 @@ class FWeapAxe : FighterWeapon for (int j = 1; j >= -1; j -= 2) { double ang = angle + j*i*(45. / 16); - double slope = AimLineAttack(ang, AXERANGE, t); + double slope = AimLineAttack(ang, AXERANGE, t, 0., ALF_CHECK3D); if (t.linetarget) { LineAttack(ang, AXERANGE, slope, damage, 'Melee', pufftype, true, t); @@ -273,7 +273,7 @@ class FWeapAxe : FighterWeapon // didn't find any creatures, so try to strike any walls self.weaponspecial = 0; - double slope = AimLineAttack (angle, DEFMELEERANGE); + double slope = AimLineAttack (angle, DEFMELEERANGE, null, 0., ALF_CHECK3D); LineAttack (angle, DEFMELEERANGE, slope, damage, 'Melee', pufftype, true); } } diff --git a/wadsrc/static/zscript/hexen/fighterfist.txt b/wadsrc/static/zscript/hexen/fighterfist.txt index 4975bbd62..dd5f02095 100644 --- a/wadsrc/static/zscript/hexen/fighterfist.txt +++ b/wadsrc/static/zscript/hexen/fighterfist.txt @@ -56,7 +56,7 @@ class FWeapFist : FighterWeapon Class pufftype; FTranslatedLineTarget t; - double slope = AimLineAttack (angle, 2*DEFMELEERANGE, t); + double slope = AimLineAttack (angle, 2*DEFMELEERANGE, t, 0., ALF_CHECK3D); if (t.linetarget != null) { if (++weaponspecial >= 3) @@ -117,7 +117,7 @@ class FWeapFist : FighterWeapon // didn't find any creatures, so try to strike any walls weaponspecial = 0; - double slope = AimLineAttack (angle, DEFMELEERANGE); + double slope = AimLineAttack (angle, DEFMELEERANGE, null, 0., ALF_CHECK3D); LineAttack (angle, DEFMELEERANGE, slope, damage, 'Melee', "PunchPuff", true); }