diff --git a/src/rendering/swrenderer/drawers/r_draw.cpp b/src/rendering/swrenderer/drawers/r_draw.cpp
index 797f65626..2d191b328 100644
--- a/src/rendering/swrenderer/drawers/r_draw.cpp
+++ b/src/rendering/swrenderer/drawers/r_draw.cpp
@@ -221,7 +221,238 @@ namespace swrenderer
 
 	/////////////////////////////////////////////////////////////////////////
 
-	void SWPixelFormatDrawers::DrawDepthColumn(const WallColumnDrawerArgs& args, float idepth)
+	DrawWallCommand::DrawWallCommand(const WallDrawerArgs& args) : wallargs(args)
+	{
+	}
+
+	void DrawWallCommand::Execute(DrawerThread* thread)
+	{
+		if (!thread->columndrawer)
+			thread->columndrawer = std::make_shared<WallColumnDrawerArgs>();
+
+		WallColumnDrawerArgs& drawerargs = *thread->columndrawer.get();
+		drawerargs.wallargs = &wallargs;
+
+		bool haslights = r_dynlights && wallargs.lightlist;
+		if (haslights)
+		{
+			float dx = wallargs.WallC.tright.X - wallargs.WallC.tleft.X;
+			float dy = wallargs.WallC.tright.Y - wallargs.WallC.tleft.Y;
+			float length = sqrt(dx * dx + dy * dy);
+			drawerargs.dc_normal.X = dy / length;
+			drawerargs.dc_normal.Y = -dx / length;
+			drawerargs.dc_normal.Z = 0.0f;
+		}
+
+		drawerargs.SetTextureFracBits(wallargs.fracbits);
+
+		float curlight = wallargs.lightpos;
+		float lightstep = wallargs.lightstep;
+		int shade = wallargs.Shade();
+
+		if (wallargs.fixedlight)
+		{
+			curlight = wallargs.FixedLight();
+			lightstep = 0;
+		}
+
+		float upos = wallargs.texcoords.upos, ustepX = wallargs.texcoords.ustepX, ustepY = wallargs.texcoords.ustepY;
+		float vpos = wallargs.texcoords.vpos, vstepX = wallargs.texcoords.vstepX, vstepY = wallargs.texcoords.vstepY;
+		float wpos = wallargs.texcoords.wpos, wstepX = wallargs.texcoords.wstepX, wstepY = wallargs.texcoords.wstepY;
+		float startX = wallargs.texcoords.startX;
+
+		int x1 = wallargs.x1;
+		int x2 = wallargs.x2;
+
+		upos += ustepX * (x1 + 0.5f - startX);
+		vpos += vstepX * (x1 + 0.5f - startX);
+		wpos += wstepX * (x1 + 0.5f - startX);
+
+		float centerY = wallargs.CenterY;
+		centerY -= 0.5f;
+
+		auto uwal = wallargs.uwal;
+		auto dwal = wallargs.dwal;
+		for (int x = x1; x < x2; x++)
+		{
+			int y1 = uwal[x];
+			int y2 = dwal[x];
+			if (y2 > y1)
+			{
+				drawerargs.SetLight(curlight, shade);
+				if (haslights)
+					SetLights(drawerargs, x, y1);
+				else
+					drawerargs.dc_num_lights = 0;
+
+				float dy = (y1 - centerY);
+				float u = upos + ustepY * dy;
+				float v = vpos + vstepY * dy;
+				float w = wpos + wstepY * dy;
+				float scaleU = ustepX;
+				float scaleV = vstepY;
+				w = 1.0f / w;
+				u *= w;
+				v *= w;
+				scaleU *= w;
+				scaleV *= w;
+
+				uint32_t texelX = (uint32_t)(int64_t)((u - std::floor(u)) * 0x1'0000'0000LL);
+				uint32_t texelY = (uint32_t)(int64_t)((v - std::floor(v)) * 0x1'0000'0000LL);
+				uint32_t texelStepX = (uint32_t)(int64_t)(scaleU * 0x1'0000'0000LL);
+				uint32_t texelStepY = (uint32_t)(int64_t)(scaleV * 0x1'0000'0000LL);
+
+				if (wallargs.fracbits != 32)
+					DrawWallColumn8(thread, drawerargs, x, y1, y2, texelX, texelY, texelStepY);
+				else
+					DrawWallColumn32(thread, drawerargs, x, y1, y2, texelX, texelY, texelStepX, texelStepY);
+			}
+
+			upos += ustepX;
+			vpos += vstepX;
+			wpos += wstepX;
+			curlight += lightstep;
+		}
+
+		if (r_modelscene)
+		{
+			for (int x = x1; x < x2; x++)
+			{
+				int y1 = uwal[x];
+				int y2 = dwal[x];
+				if (y2 > y1)
+				{
+					int count = y2 - y1;
+
+					float w1 = 1.0f / wallargs.WallC.sz1;
+					float w2 = 1.0f / wallargs.WallC.sz2;
+					float t = (x - wallargs.WallC.sx1 + 0.5f) / (wallargs.WallC.sx2 - wallargs.WallC.sx1);
+					float wcol = w1 * (1.0f - t) + w2 * t;
+					float zcol = 1.0f / wcol;
+					float zbufferdepth = 1.0f / (zcol / wallargs.FocalTangent);
+
+					drawerargs.SetDest(x, y1);
+					drawerargs.SetCount(count);
+					DrawDepthColumn(thread, drawerargs, zbufferdepth);
+				}
+			}
+		}
+	}
+
+	void DrawWallCommand::DrawWallColumn32(DrawerThread* thread, WallColumnDrawerArgs& drawerargs, int x, int y1, int y2, uint32_t texelX, uint32_t texelY, uint32_t texelStepX, uint32_t texelStepY)
+	{
+		int texwidth = wallargs.texwidth;
+		int texheight = wallargs.texheight;
+
+		double xmagnitude = fabs(static_cast<int32_t>(texelStepX)* (1.0 / 0x1'0000'0000LL));
+		double ymagnitude = fabs(static_cast<int32_t>(texelStepY)* (1.0 / 0x1'0000'0000LL));
+		double magnitude = MAX(ymagnitude, xmagnitude);
+		double min_lod = -1000.0;
+		double lod = MAX(log2(magnitude) + r_lod_bias, min_lod);
+		bool magnifying = lod < 0.0f;
+
+		int mipmap_offset = 0;
+		int mip_width = texwidth;
+		int mip_height = texheight;
+		if (wallargs.mipmapped && mip_width > 1 && mip_height > 1)
+		{
+			int level = (int)lod;
+			while (level > 0 && mip_width > 1 && mip_height > 1)
+			{
+				mipmap_offset += mip_width * mip_height;
+				level--;
+				mip_width = MAX(mip_width >> 1, 1);
+				mip_height = MAX(mip_height >> 1, 1);
+			}
+		}
+
+		const uint32_t* pixels = static_cast<const uint32_t*>(wallargs.texpixels) + mipmap_offset;
+		fixed_t xxoffset = (texelX >> 16)* mip_width;
+
+		const uint8_t* source;
+		const uint8_t* source2;
+		uint32_t texturefracx;
+		bool filter_nearest = (magnifying && !r_magfilter) || (!magnifying && !r_minfilter);
+		if (filter_nearest)
+		{
+			int tx = (xxoffset >> FRACBITS) % mip_width;
+			source = (uint8_t*)(pixels + tx * mip_height);
+			source2 = nullptr;
+			texturefracx = 0;
+		}
+		else
+		{
+			xxoffset -= FRACUNIT / 2;
+			int tx0 = (xxoffset >> FRACBITS) % mip_width;
+			if (tx0 < 0)
+				tx0 += mip_width;
+			int tx1 = (tx0 + 1) % mip_width;
+			source = (uint8_t*)(pixels + tx0 * mip_height);
+			source2 = (uint8_t*)(pixels + tx1 * mip_height);
+			texturefracx = (xxoffset >> (FRACBITS - 4)) & 15;
+		}
+
+		int count = y2 - y1;
+		drawerargs.SetDest(x, y1);
+		drawerargs.SetCount(count);
+		drawerargs.SetTexture(source, source2, mip_height);
+		drawerargs.SetTextureUPos(texturefracx);
+		drawerargs.SetTextureVPos(texelY);
+		drawerargs.SetTextureVStep(texelStepY);
+		DrawColumn(thread, drawerargs);
+	}
+
+	void DrawWallCommand::DrawWallColumn8(DrawerThread* thread, WallColumnDrawerArgs& drawerargs, int x, int y1, int y2, uint32_t texelX, uint32_t texelY, uint32_t texelStepY)
+	{
+		int texwidth = wallargs.texwidth;
+		int texheight = wallargs.texheight;
+		int fracbits = wallargs.fracbits;
+		uint32_t uv_max = texheight << fracbits;
+
+		const uint8_t* pixels = static_cast<const uint8_t*>(wallargs.texpixels) + (((texelX >> 16)* texwidth) >> 16)* texheight;
+
+		texelY = (static_cast<uint64_t>(texelY)* texheight) >> (32 - fracbits);
+		texelStepY = (static_cast<uint64_t>(texelStepY)* texheight) >> (32 - fracbits);
+
+		drawerargs.SetTexture(pixels, nullptr, texheight);
+		drawerargs.SetTextureVStep(texelStepY);
+
+		if (uv_max == 0 || texelStepY == 0) // power of two
+		{
+			int count = y2 - y1;
+
+			drawerargs.SetDest(x, y1);
+			drawerargs.SetCount(count);
+			drawerargs.SetTextureVPos(texelY);
+			DrawColumn(thread, drawerargs);
+		}
+		else
+		{
+			uint32_t left = y2 - y1;
+			int y = y1;
+			while (left > 0)
+			{
+				uint32_t available = uv_max - texelY;
+				uint32_t next_uv_wrap = available / texelStepY;
+				if (available % texelStepY != 0)
+					next_uv_wrap++;
+				uint32_t count = MIN(left, next_uv_wrap);
+
+				drawerargs.SetDest(x, y);
+				drawerargs.SetCount(count);
+				drawerargs.SetTextureVPos(texelY);
+				DrawColumn(thread, drawerargs);
+
+				y += count;
+				left -= count;
+				texelY += texelStepY * count;
+				if (texelY >= uv_max)
+					texelY -= uv_max;
+			}
+		}
+	}
+
+	void DrawWallCommand::DrawDepthColumn(DrawerThread* thread, const WallColumnDrawerArgs& args, float idepth)
 	{
 		int x, y, count;
 
@@ -246,11 +477,15 @@ namespace swrenderer
 		}
 		count = args.Count();
 
-		auto zbuffer = thread->Poly->depthstencil;
+		auto zbuffer = PolyTriangleThreadData::Get(thread)->depthstencil;
 		int pitch = zbuffer->Width();
 		float* values = zbuffer->DepthValues() + y * pitch + x;
 		int cnt = count;
 
+		values = thread->dest_for_thread(y, pitch, values);
+		cnt = thread->count_for_thread(y, cnt);
+		pitch *= thread->num_cores;
+
 		float depth = idepth;
 		for (int i = 0; i < cnt; i++)
 		{
@@ -259,7 +494,7 @@ namespace swrenderer
 		}
 	}
 
-	void SWPixelFormatDrawers::SetLights(WallColumnDrawerArgs& drawerargs, int x, int y1, const WallDrawerArgs& wallargs)
+	void DrawWallCommand::SetLights(WallColumnDrawerArgs& drawerargs, int x, int y1)
 	{
 		bool mirror = !!(wallargs.PortalMirrorFlags & RF_XFLIP);
 		int tx = x;
@@ -325,74 +560,130 @@ namespace swrenderer
 
 	/////////////////////////////////////////////////////////////////////////
 
+	class DepthSkyColumnCommand : public DrawerCommand
+	{
+	public:
+		DepthSkyColumnCommand(const SkyDrawerArgs &args, float idepth) : idepth(idepth)
+		{
+			auto rendertarget = args.Viewport()->RenderTarget;
+			if (rendertarget->IsBgra())
+			{
+				uint32_t *destorg = (uint32_t*)rendertarget->GetPixels();
+				destorg += viewwindowx + viewwindowy * rendertarget->GetPitch();
+				uint32_t *dest = (uint32_t*)args.Dest();
+				int offset = (int)(ptrdiff_t)(dest - destorg);
+				x = offset % rendertarget->GetPitch();
+				y = offset / rendertarget->GetPitch();
+			}
+			else
+			{
+				uint8_t *destorg = rendertarget->GetPixels();
+				destorg += viewwindowx + viewwindowy * rendertarget->GetPitch();
+				uint8_t *dest = (uint8_t*)args.Dest();
+				int offset = (int)(ptrdiff_t)(dest - destorg);
+				x = offset % rendertarget->GetPitch();
+				y = offset / rendertarget->GetPitch();
+			}
+			count = args.Count();
+		}
+
+		void Execute(DrawerThread *thread) override
+		{
+			auto zbuffer = PolyTriangleThreadData::Get(thread)->depthstencil;
+			int pitch = zbuffer->Width();
+			float *values = zbuffer->DepthValues() + y * pitch + x;
+			int cnt = count;
+
+			values = thread->dest_for_thread(y, pitch, values);
+			cnt = thread->count_for_thread(y, cnt);
+			pitch *= thread->num_cores;
+
+			float depth = idepth;
+			for (int i = 0; i < cnt; i++)
+			{
+				*values = depth;
+				values += pitch;
+			}
+		}
+
+	private:
+		int x, y, count;
+		float idepth;
+	};
+
+	// #define DEPTH_DEBUG
+
+	class DepthSpanCommand : public DrawerCommand
+	{
+	public:
+		DepthSpanCommand(const SpanDrawerArgs &args, float idepth1, float idepth2) : idepth1(idepth1), idepth2(idepth2)
+		{
+			y = args.DestY();
+			x1 = args.DestX1();
+			x2 = args.DestX2();
+			#ifdef DEPTH_DEBUG
+			dest = (uint32_t*)args.Viewport()->GetDest(0, args.DestY());
+			#endif
+		}
+
+		void Execute(DrawerThread *thread) override
+		{
+			if (thread->skipped_by_thread(y))
+				return;
+
+			auto zbuffer = PolyTriangleThreadData::Get(thread)->depthstencil;
+			int pitch = zbuffer->Width();
+			float *values = zbuffer->DepthValues() + y * pitch;
+			int end = x2;
+
+			if (idepth1 == idepth2)
+			{
+				float depth = idepth1;
+				#ifdef DEPTH_DEBUG
+				uint32_t gray = clamp<int32_t>((int32_t)(1.0f / depth / 4.0f), 0, 255);
+				uint32_t color = MAKEARGB(255, gray, gray, gray);
+				#endif
+				for (int x = x1; x <= end; x++)
+				{
+					values[x] = depth;
+					#ifdef DEPTH_DEBUG
+					dest[x] = color;
+					#endif
+				}
+			}
+			else
+			{
+				float depth = idepth1;
+				float step = (idepth2 - idepth1) / (x2 - x1 + 1);
+				for (int x = x1; x <= end; x++)
+				{
+					#ifdef DEPTH_DEBUG
+					uint32_t gray = clamp<int32_t>((int32_t)(1.0f / depth / 4.0f), 0, 255);
+					uint32_t color = MAKEARGB(255, gray, gray, gray);
+					dest[x] = color;
+					#endif
+
+					values[x] = depth;
+					depth += step;
+				}
+			}
+		}
+
+	private:
+		int y, x1, x2;
+		float idepth1, idepth2;
+		#ifdef DEPTH_DEBUG
+		uint32_t *dest;
+		#endif
+	};
+
 	void SWPixelFormatDrawers::DrawDepthSkyColumn(const SkyDrawerArgs &args, float idepth)
 	{
-		int x, y, count;
-		auto rendertarget = args.Viewport()->RenderTarget;
-		if (rendertarget->IsBgra())
-		{
-			uint32_t* destorg = (uint32_t*)rendertarget->GetPixels();
-			destorg += viewwindowx + viewwindowy * rendertarget->GetPitch();
-			uint32_t* dest = (uint32_t*)args.Dest();
-			int offset = (int)(ptrdiff_t)(dest - destorg);
-			x = offset % rendertarget->GetPitch();
-			y = offset / rendertarget->GetPitch();
-		}
-		else
-		{
-			uint8_t* destorg = rendertarget->GetPixels();
-			destorg += viewwindowx + viewwindowy * rendertarget->GetPitch();
-			uint8_t* dest = (uint8_t*)args.Dest();
-			int offset = (int)(ptrdiff_t)(dest - destorg);
-			x = offset % rendertarget->GetPitch();
-			y = offset / rendertarget->GetPitch();
-		}
-		count = args.Count();
-
-		auto zbuffer = thread->Poly->depthstencil;
-		int pitch = zbuffer->Width();
-		float* values = zbuffer->DepthValues() + y * pitch + x;
-		int cnt = count;
-
-		float depth = idepth;
-		for (int i = 0; i < cnt; i++)
-		{
-			*values = depth;
-			values += pitch;
-		}
+		Queue->Push<DepthSkyColumnCommand>(args, idepth);
 	}
 
 	void SWPixelFormatDrawers::DrawDepthSpan(const SpanDrawerArgs &args, float idepth1, float idepth2)
 	{
-		int y = args.DestY();
-		int x1 = args.DestX1();
-		int x2 = args.DestX2();
-
-		auto zbuffer = thread->Poly->depthstencil;
-		int pitch = zbuffer->Width();
-		float *values = zbuffer->DepthValues() + x1 + y * pitch;
-
-		int count = x2 - x1 + 1;
-
-		if (idepth1 == idepth2)
-		{
-			float depth = idepth1;
-			for (int i = 0; i < count; i++)
-			{
-				*values = depth;
-				values++;
-			}
-		}
-		else
-		{
-			float depth = idepth1;
-			float step = (idepth2 - idepth1) / (x2 - x1 + 1);
-			for (int i = 0; i < count; i++)
-			{
-				*values = depth;
-				values++;
-				depth += step;
-			}
-		}
+		Queue->Push<DepthSpanCommand>(args, idepth1, idepth2);
 	}
 }
diff --git a/src/rendering/swrenderer/drawers/r_draw.h b/src/rendering/swrenderer/drawers/r_draw.h
index be2619595..32eba2db0 100644
--- a/src/rendering/swrenderer/drawers/r_draw.h
+++ b/src/rendering/swrenderer/drawers/r_draw.h
@@ -19,12 +19,14 @@ EXTERN_CVAR(Float, transsouls);
 EXTERN_CVAR(Bool, r_dynlights);
 EXTERN_CVAR(Bool, r_fuzzscale);
 
+class DrawerCommandQueue;
+typedef std::shared_ptr<DrawerCommandQueue> DrawerCommandQueuePtr;
+
 namespace swrenderer
 {
 	class DrawerArgs;
 	class SkyDrawerArgs;
 	class WallDrawerArgs;
-	class WallColumnDrawerArgs;
 	class SpanDrawerArgs;
 	class SpriteDrawerArgs;
 	class VoxelBlock;
@@ -53,8 +55,8 @@ namespace swrenderer
 	class SWPixelFormatDrawers
 	{
 	public:
-		SWPixelFormatDrawers(RenderThread* thread) : thread(thread) { }
-		virtual ~SWPixelFormatDrawers() = default;
+		SWPixelFormatDrawers(DrawerCommandQueuePtr queue) : Queue(queue) { }
+		virtual ~SWPixelFormatDrawers() { }
 		virtual void DrawWall(const WallDrawerArgs &args) = 0;
 		virtual void DrawWallMasked(const WallDrawerArgs &args) = 0;
 		virtual void DrawWallAdd(const WallDrawerArgs &args) = 0;
@@ -92,15 +94,11 @@ namespace swrenderer
 		virtual void DrawTiltedSpan(const SpanDrawerArgs &args, const FVector3 &plane_sz, const FVector3 &plane_su, const FVector3 &plane_sv, bool plane_shade, int planeshade, float planelightfloat, fixed_t pviewx, fixed_t pviewy, FDynamicColormap *basecolormap) = 0;
 		virtual void DrawColoredSpan(const SpanDrawerArgs &args) = 0;
 		virtual void DrawFogBoundaryLine(const SpanDrawerArgs &args) = 0;
-		virtual void DrawParticleColumn(int x, int yl, int ycount, uint32_t fg, uint32_t alpha, uint32_t fracposx) = 0;
 
-		void DrawDepthColumn(const WallColumnDrawerArgs& args, float idepth);
 		void DrawDepthSkyColumn(const SkyDrawerArgs &args, float idepth);
 		void DrawDepthSpan(const SpanDrawerArgs &args, float idepth1, float idepth2);
-
-		void SetLights(WallColumnDrawerArgs& drawerargs, int x, int y1, const WallDrawerArgs& wallargs);
-
-		RenderThread* thread = nullptr;
+		
+		DrawerCommandQueuePtr Queue;
 	};
 
 	void R_InitShadeMaps();
diff --git a/src/rendering/swrenderer/drawers/r_draw_pal.cpp b/src/rendering/swrenderer/drawers/r_draw_pal.cpp
index 8cc7a0fc8..af42a7f35 100644
--- a/src/rendering/swrenderer/drawers/r_draw_pal.cpp
+++ b/src/rendering/swrenderer/drawers/r_draw_pal.cpp
@@ -93,7 +93,7 @@ EXTERN_CVAR(Int, gl_particles_style)
 
 namespace swrenderer
 {
-	uint8_t SWPalDrawers::AddLightsColumn(const DrawerLight *lights, int num_lights, float viewpos_z, uint8_t fg, uint8_t material)
+	uint8_t PalWall1Command::AddLights(const DrawerLight *lights, int num_lights, float viewpos_z, uint8_t fg, uint8_t material)
 	{
 		uint32_t lit_r = 0;
 		uint32_t lit_g = 0;
@@ -146,16 +146,12 @@ namespace swrenderer
 		return RGB256k.All[((lit_r >> 2) << 12) | ((lit_g >> 2) << 6) | (lit_b >> 2)];
 	}
 
-	template<>
-	void SWPalDrawers::DrawWallColumn<DrawWallModeNormal>(const WallColumnDrawerArgs& args)
+	void DrawWall1PalCommand::DrawColumn(DrawerThread *thread, const WallColumnDrawerArgs& args)
 	{
-		int count = args.Count();
-		if (count <= 0)
-			return;
-
 		uint32_t fracstep = args.TextureVStep();
 		uint32_t frac = args.TextureVPos();
 		uint8_t *colormap = args.Colormap(args.Viewport());
+		int count = args.Count();
 		const uint8_t *source = args.TexturePixels();
 		uint8_t *dest = args.Dest();
 		int bits = args.TextureFracBits();
@@ -165,6 +161,15 @@ namespace swrenderer
 		float viewpos_z = args.dc_viewpos.Z;
 		float step_viewpos_z = args.dc_viewpos_step.Z;
 
+		count = thread->count_for_thread(args.DestY(), count);
+		if (count <= 0)
+			return;
+
+		dest = thread->dest_for_thread(args.DestY(), pitch, dest);
+		frac += fracstep * thread->skipped_by_thread(args.DestY());
+		fracstep *= thread->num_cores;
+		pitch *= thread->num_cores;
+
 		if (num_dynlights == 0)
 		{
 			do
@@ -179,9 +184,12 @@ namespace swrenderer
 			float viewpos_z = args.dc_viewpos.Z;
 			float step_viewpos_z = args.dc_viewpos_step.Z;
 
+			viewpos_z += step_viewpos_z * thread->skipped_by_thread(args.DestY());
+			step_viewpos_z *= thread->num_cores;
+
 			do
 			{
-				*dest = AddLightsColumn(dynlights, num_dynlights, viewpos_z, colormap[source[frac >> bits]], source[frac >> bits]);
+				*dest = AddLights(dynlights, num_dynlights, viewpos_z, colormap[source[frac >> bits]], source[frac >> bits]);
 				viewpos_z += step_viewpos_z;
 				frac += fracstep;
 				dest += pitch;
@@ -189,16 +197,12 @@ namespace swrenderer
 		}
 	}
 
-	template<>
-	void SWPalDrawers::DrawWallColumn<DrawWallModeMasked>(const WallColumnDrawerArgs& args)
+	void DrawWallMasked1PalCommand::DrawColumn(DrawerThread *thread, const WallColumnDrawerArgs& args)
 	{
-		int count = args.Count();
-		if (count <= 0)
-			return;
-
 		uint32_t fracstep = args.TextureVStep();
 		uint32_t frac = args.TextureVPos();
 		uint8_t *colormap = args.Colormap(args.Viewport());
+		int count = args.Count();
 		const uint8_t *source = args.TexturePixels();
 		uint8_t *dest = args.Dest();
 		int bits = args.TextureFracBits();
@@ -208,6 +212,15 @@ namespace swrenderer
 		float viewpos_z = args.dc_viewpos.Z;
 		float step_viewpos_z = args.dc_viewpos_step.Z;
 
+		count = thread->count_for_thread(args.DestY(), count);
+		if (count <= 0)
+			return;
+
+		dest = thread->dest_for_thread(args.DestY(), pitch, dest);
+		frac += fracstep * thread->skipped_by_thread(args.DestY());
+		fracstep *= thread->num_cores;
+		pitch *= thread->num_cores;
+
 		if (num_dynlights == 0)
 		{
 			do
@@ -226,12 +239,15 @@ namespace swrenderer
 			float viewpos_z = args.dc_viewpos.Z;
 			float step_viewpos_z = args.dc_viewpos_step.Z;
 
+			viewpos_z += step_viewpos_z * thread->skipped_by_thread(args.DestY());
+			step_viewpos_z *= thread->num_cores;
+
 			do
 			{
 				uint8_t pix = source[frac >> bits];
 				if (pix != 0)
 				{
-					*dest = AddLightsColumn(dynlights, num_dynlights, viewpos_z, colormap[pix], pix);
+					*dest = AddLights(dynlights, num_dynlights, viewpos_z, colormap[pix], pix);
 				}
 				viewpos_z += step_viewpos_z;
 				frac += fracstep;
@@ -240,16 +256,12 @@ namespace swrenderer
 		}
 	}
 
-	template<>
-	void SWPalDrawers::DrawWallColumn<DrawWallModeAdd>(const WallColumnDrawerArgs& args)
+	void DrawWallAdd1PalCommand::DrawColumn(DrawerThread *thread, const WallColumnDrawerArgs& args)
 	{
-		int count = args.Count();
-		if (count <= 0)
-			return;
-
 		uint32_t fracstep = args.TextureVStep();
 		uint32_t frac = args.TextureVPos();
 		uint8_t *colormap = args.Colormap(args.Viewport());
+		int count = args.Count();
 		const uint8_t *source = args.TexturePixels();
 		uint8_t *dest = args.Dest();
 		int bits = args.TextureFracBits();
@@ -258,6 +270,15 @@ namespace swrenderer
 		uint32_t *fg2rgb = args.SrcBlend();
 		uint32_t *bg2rgb = args.DestBlend();
 
+		count = thread->count_for_thread(args.DestY(), count);
+		if (count <= 0)
+			return;
+
+		dest = thread->dest_for_thread(args.DestY(), pitch, dest);
+		frac += fracstep * thread->skipped_by_thread(args.DestY());
+		fracstep *= thread->num_cores;
+		pitch *= thread->num_cores;
+
 		if (!r_blendmethod)
 		{
 			do
@@ -297,16 +318,12 @@ namespace swrenderer
 		}
 	}
 
-	template<>
-	void SWPalDrawers::DrawWallColumn<DrawWallModeAddClamp>(const WallColumnDrawerArgs& args)
+	void DrawWallAddClamp1PalCommand::DrawColumn(DrawerThread *thread, const WallColumnDrawerArgs& args)
 	{
-		int count = args.Count();
-		if (count <= 0)
-			return;
-
 		uint32_t fracstep = args.TextureVStep();
 		uint32_t frac = args.TextureVPos();
 		uint8_t *colormap = args.Colormap(args.Viewport());
+		int count = args.Count();
 		const uint8_t *source = args.TexturePixels();
 		uint8_t *dest = args.Dest();
 		int bits = args.TextureFracBits();
@@ -319,6 +336,17 @@ namespace swrenderer
 		uint32_t *fg2rgb = args.SrcBlend();
 		uint32_t *bg2rgb = args.DestBlend();
 
+		count = thread->count_for_thread(args.DestY(), count);
+		if (count <= 0)
+			return;
+
+		dest = thread->dest_for_thread(args.DestY(), pitch, dest);
+		frac += fracstep * thread->skipped_by_thread(args.DestY());
+		fracstep *= thread->num_cores;
+		pitch *= thread->num_cores;
+		viewpos_z += step_viewpos_z * thread->skipped_by_thread(args.DestY());
+		step_viewpos_z *= thread->num_cores;
+
 		if (!r_blendmethod)
 		{
 			do
@@ -326,7 +354,7 @@ namespace swrenderer
 				uint8_t pix = source[frac >> bits];
 				if (pix != 0)
 				{
-					uint8_t lit = num_dynlights != 0 ? AddLightsColumn(dynlights, num_dynlights, viewpos_z, colormap[pix], pix) : colormap[pix];
+					uint8_t lit = num_dynlights != 0 ? AddLights(dynlights, num_dynlights, viewpos_z, colormap[pix], pix) : colormap[pix];
 
 					uint32_t a = fg2rgb[lit] + bg2rgb[*dest];
 					uint32_t b = a;
@@ -350,7 +378,7 @@ namespace swrenderer
 				uint8_t pix = source[frac >> bits];
 				if (pix != 0)
 				{
-					uint8_t lit = num_dynlights != 0 ? AddLightsColumn(dynlights, num_dynlights, viewpos_z, colormap[pix], pix) : colormap[pix];
+					uint8_t lit = num_dynlights != 0 ? AddLights(dynlights, num_dynlights, viewpos_z, colormap[pix], pix) : colormap[pix];
 
 					uint32_t r = MIN(GPalette.BaseColors[lit].r + GPalette.BaseColors[*dest].r, 255);
 					uint32_t g = MIN(GPalette.BaseColors[lit].g + GPalette.BaseColors[*dest].g, 255);
@@ -364,16 +392,12 @@ namespace swrenderer
 		}
 	}
 
-	template<>
-	void SWPalDrawers::DrawWallColumn<DrawWallModeSubClamp>(const WallColumnDrawerArgs& args)
+	void DrawWallSubClamp1PalCommand::DrawColumn(DrawerThread *thread, const WallColumnDrawerArgs& args)
 	{
-		int count = args.Count();
-		if (count <= 0)
-			return;
-
 		uint32_t fracstep = args.TextureVStep();
 		uint32_t frac = args.TextureVPos();
 		uint8_t *colormap = args.Colormap(args.Viewport());
+		int count = args.Count();
 		const uint8_t *source = args.TexturePixels();
 		uint8_t *dest = args.Dest();
 		int bits = args.TextureFracBits();
@@ -386,6 +410,17 @@ namespace swrenderer
 		uint32_t *fg2rgb = args.SrcBlend();
 		uint32_t *bg2rgb = args.DestBlend();
 
+		count = thread->count_for_thread(args.DestY(), count);
+		if (count <= 0)
+			return;
+
+		dest = thread->dest_for_thread(args.DestY(), pitch, dest);
+		frac += fracstep * thread->skipped_by_thread(args.DestY());
+		fracstep *= thread->num_cores;
+		pitch *= thread->num_cores;
+		viewpos_z += step_viewpos_z * thread->skipped_by_thread(args.DestY());
+		step_viewpos_z *= thread->num_cores;
+
 		if (!r_blendmethod)
 		{
 			do
@@ -393,7 +428,7 @@ namespace swrenderer
 				uint8_t pix = source[frac >> bits];
 				if (pix != 0)
 				{
-					uint8_t lit = num_dynlights != 0 ? AddLightsColumn(dynlights, num_dynlights, viewpos_z, colormap[pix], pix) : colormap[pix];
+					uint8_t lit = num_dynlights != 0 ? AddLights(dynlights, num_dynlights, viewpos_z, colormap[pix], pix) : colormap[pix];
 
 					uint32_t a = (fg2rgb[lit] | 0x40100400) - bg2rgb[*dest];
 					uint32_t b = a;
@@ -416,7 +451,7 @@ namespace swrenderer
 				uint8_t pix = source[frac >> bits];
 				if (pix != 0)
 				{
-					uint8_t lit = num_dynlights != 0 ? AddLightsColumn(dynlights, num_dynlights, viewpos_z, colormap[pix], pix) : colormap[pix];
+					uint8_t lit = num_dynlights != 0 ? AddLights(dynlights, num_dynlights, viewpos_z, colormap[pix], pix) : colormap[pix];
 
 					int r = clamp(-GPalette.BaseColors[lit].r + GPalette.BaseColors[*dest].r, 0, 255);
 					int g = clamp(-GPalette.BaseColors[lit].g + GPalette.BaseColors[*dest].g, 0, 255);
@@ -430,16 +465,12 @@ namespace swrenderer
 		}
 	}
 
-	template<>
-	void SWPalDrawers::DrawWallColumn<DrawWallModeRevSubClamp>(const WallColumnDrawerArgs& args)
+	void DrawWallRevSubClamp1PalCommand::DrawColumn(DrawerThread *thread, const WallColumnDrawerArgs& args)
 	{
-		int count = args.Count();
-		if (count <= 0)
-			return;
-
 		uint32_t fracstep = args.TextureVStep();
 		uint32_t frac = args.TextureVPos();
 		uint8_t *colormap = args.Colormap(args.Viewport());
+		int count = args.Count();
 		const uint8_t *source = args.TexturePixels();
 		uint8_t *dest = args.Dest();
 		int bits = args.TextureFracBits();
@@ -452,6 +483,17 @@ namespace swrenderer
 		uint32_t *fg2rgb = args.SrcBlend();
 		uint32_t *bg2rgb = args.DestBlend();
 
+		count = thread->count_for_thread(args.DestY(), count);
+		if (count <= 0)
+			return;
+
+		dest = thread->dest_for_thread(args.DestY(), pitch, dest);
+		frac += fracstep * thread->skipped_by_thread(args.DestY());
+		fracstep *= thread->num_cores;
+		pitch *= thread->num_cores;
+		viewpos_z += step_viewpos_z * thread->skipped_by_thread(args.DestY());
+		step_viewpos_z *= thread->num_cores;
+
 		if (!r_blendmethod)
 		{
 			do
@@ -459,7 +501,7 @@ namespace swrenderer
 				uint8_t pix = source[frac >> bits];
 				if (pix != 0)
 				{
-					uint8_t lit = num_dynlights != 0 ? AddLightsColumn(dynlights, num_dynlights, viewpos_z, colormap[pix], pix) : colormap[pix];
+					uint8_t lit = num_dynlights != 0 ? AddLights(dynlights, num_dynlights, viewpos_z, colormap[pix], pix) : colormap[pix];
 
 					uint32_t a = (bg2rgb[*dest] | 0x40100400) - fg2rgb[lit];
 					uint32_t b = a;
@@ -482,7 +524,7 @@ namespace swrenderer
 				uint8_t pix = source[frac >> bits];
 				if (pix != 0)
 				{
-					uint8_t lit = num_dynlights != 0 ? AddLightsColumn(dynlights, num_dynlights, viewpos_z, colormap[pix], pix) : colormap[pix];
+					uint8_t lit = num_dynlights != 0 ? AddLights(dynlights, num_dynlights, viewpos_z, colormap[pix], pix) : colormap[pix];
 
 					int r = clamp(GPalette.BaseColors[lit].r - GPalette.BaseColors[*dest].r, 0, 255);
 					int g = clamp(GPalette.BaseColors[lit].g - GPalette.BaseColors[*dest].g, 0, 255);
@@ -498,7 +540,11 @@ namespace swrenderer
 
 	/////////////////////////////////////////////////////////////////////////
 
-	void SWPalDrawers::DrawSingleSkyColumn(const SkyDrawerArgs& args)
+	PalSkyCommand::PalSkyCommand(const SkyDrawerArgs &args) : args(args)
+	{
+	}
+
+	void DrawSingleSky1PalCommand::Execute(DrawerThread *thread)
 	{
 		uint8_t *dest = args.Dest();
 		int pitch = args.Viewport()->RenderTarget->GetPitch();
@@ -508,20 +554,9 @@ namespace swrenderer
 		int32_t frac = args.TextureVPos();
 		int32_t fracstep = args.TextureVStep();
 
-		int count = args.Count();
-
-		if (!args.FadeSky())
-		{
-			for (int index = 0; index < count; index++)
-			{
-				uint32_t sample_index = (((((uint32_t)frac) << 8) >> FRACBITS) * textureheight0) >> FRACBITS;
-				*dest = source0[sample_index];
-				dest += pitch;
-				frac += fracstep;
-			}
-
-			return;
-		}
+		int num_cores = thread->num_cores;
+		int skipped = thread->skipped_by_thread(args.DestY());
+		int count = skipped + thread->count_for_thread(args.DestY(), args.Count()) * num_cores;
 
 		// Find bands for top solid color, top fade, center textured, bottom fade, bottom solid color:
 		int start_fade = 2; // How fast it should fade out
@@ -535,6 +570,26 @@ namespace swrenderer
 		start_fadebottom_y = clamp(start_fadebottom_y, 0, count);
 		end_fadebottom_y = clamp(end_fadebottom_y, 0, count);
 
+		dest = thread->dest_for_thread(args.DestY(), pitch, dest);
+		frac += fracstep * skipped;
+		fracstep *= num_cores;
+		pitch *= num_cores;
+
+		if (!args.FadeSky())
+		{
+			count = thread->count_for_thread(args.DestY(), args.Count());
+
+			for (int index = 0; index < count; index++)
+			{
+				uint32_t sample_index = (((((uint32_t)frac) << 8) >> FRACBITS) * textureheight0) >> FRACBITS;
+				*dest = source0[sample_index];
+				dest += pitch;
+				frac += fracstep;
+			}
+
+			return;
+		}
+
 		uint32_t solid_top = args.SolidTopColor();
 		uint32_t solid_bottom = args.SolidBottomColor();
 
@@ -549,7 +604,7 @@ namespace swrenderer
 
 		const uint32_t *palette = (const uint32_t *)GPalette.BaseColors;
 
-		int index = 0;
+		int index = skipped;
 
 		// Top solid color:
 		while (index < start_fadetop_y)
@@ -557,7 +612,7 @@ namespace swrenderer
 			*dest = solid_top_fill;
 			dest += pitch;
 			frac += fracstep;
-			index++;
+			index += num_cores;
 		}
 
 		// Top fade:
@@ -579,7 +634,7 @@ namespace swrenderer
 
 			frac += fracstep;
 			dest += pitch;
-			index++;
+			index += num_cores;
 		}
 
 		// Textured center:
@@ -590,7 +645,7 @@ namespace swrenderer
 
 			frac += fracstep;
 			dest += pitch;
-			index++;
+			index += num_cores;
 		}
 
 		// Fade bottom:
@@ -612,7 +667,7 @@ namespace swrenderer
 
 			frac += fracstep;
 			dest += pitch;
-			index++;
+			index += num_cores;
 		}
 
 		// Bottom solid color:
@@ -620,11 +675,11 @@ namespace swrenderer
 		{
 			*dest = solid_bottom_fill;
 			dest += pitch;
-			index++;
+			index += num_cores;
 		}
 	}
 
-	void SWPalDrawers::DrawDoubleSkyColumn(const SkyDrawerArgs& args)
+	void DrawDoubleSky1PalCommand::Execute(DrawerThread *thread)
 	{
 		uint8_t *dest = args.Dest();
 		int pitch = args.Viewport()->RenderTarget->GetPitch();
@@ -636,9 +691,31 @@ namespace swrenderer
 		int32_t frac = args.TextureVPos();
 		int32_t fracstep = args.TextureVStep();
 
-		int count = args.Count();
+		int num_cores = thread->num_cores;
+		int skipped = thread->skipped_by_thread(args.DestY());
+		int count = skipped + thread->count_for_thread(args.DestY(), args.Count()) * num_cores;
+
+		// Find bands for top solid color, top fade, center textured, bottom fade, bottom solid color:
+		int start_fade = 2; // How fast it should fade out
+		int fade_length = (1 << (24 - start_fade));
+		int start_fadetop_y = (-frac) / fracstep;
+		int end_fadetop_y = (fade_length - frac) / fracstep;
+		int start_fadebottom_y = ((2 << 24) - fade_length - frac) / fracstep;
+		int end_fadebottom_y = ((2 << 24) - frac) / fracstep;
+		start_fadetop_y = clamp(start_fadetop_y, 0, count);
+		end_fadetop_y = clamp(end_fadetop_y, 0, count);
+		start_fadebottom_y = clamp(start_fadebottom_y, 0, count);
+		end_fadebottom_y = clamp(end_fadebottom_y, 0, count);
+
+		dest = thread->dest_for_thread(args.DestY(), pitch, dest);
+		frac += fracstep * skipped;
+		fracstep *= num_cores;
+		pitch *= num_cores;
+
 		if (!args.FadeSky())
 		{
+			count = thread->count_for_thread(args.DestY(), count);
+
 			for (int index = 0; index < count; index++)
 			{
 				uint32_t sample_index = (((((uint32_t)frac) << 8) >> FRACBITS) * textureheight0) >> FRACBITS;
@@ -657,18 +734,6 @@ namespace swrenderer
 			return;
 		}
 
-		// Find bands for top solid color, top fade, center textured, bottom fade, bottom solid color:
-		int start_fade = 2; // How fast it should fade out
-		int fade_length = (1 << (24 - start_fade));
-		int start_fadetop_y = (-frac) / fracstep;
-		int end_fadetop_y = (fade_length - frac) / fracstep;
-		int start_fadebottom_y = ((2 << 24) - fade_length - frac) / fracstep;
-		int end_fadebottom_y = ((2 << 24) - frac) / fracstep;
-		start_fadetop_y = clamp(start_fadetop_y, 0, count);
-		end_fadetop_y = clamp(end_fadetop_y, 0, count);
-		start_fadebottom_y = clamp(start_fadebottom_y, 0, count);
-		end_fadebottom_y = clamp(end_fadebottom_y, 0, count);
-
 		uint32_t solid_top = args.SolidTopColor();
 		uint32_t solid_bottom = args.SolidBottomColor();
 
@@ -683,7 +748,7 @@ namespace swrenderer
 
 		const uint32_t *palette = (const uint32_t *)GPalette.BaseColors;
 
-		int index = 0;
+		int index = skipped;
 
 		// Top solid color:
 		while (index < start_fadetop_y)
@@ -691,7 +756,7 @@ namespace swrenderer
 			*dest = solid_top_fill;
 			dest += pitch;
 			frac += fracstep;
-			index++;
+			index += num_cores;
 		}
 
 		// Top fade:
@@ -718,7 +783,7 @@ namespace swrenderer
 
 			frac += fracstep;
 			dest += pitch;
-			index++;
+			index += num_cores;
 		}
 
 		// Textured center:
@@ -735,7 +800,7 @@ namespace swrenderer
 
 			frac += fracstep;
 			dest += pitch;
-			index++;
+			index += num_cores;
 		}
 
 		// Fade bottom:
@@ -762,7 +827,7 @@ namespace swrenderer
 
 			frac += fracstep;
 			dest += pitch;
-			index++;
+			index += num_cores;
 		}
 
 		// Bottom solid color:
@@ -770,13 +835,17 @@ namespace swrenderer
 		{
 			*dest = solid_bottom_fill;
 			dest += pitch;
-			index++;
+			index += num_cores;
 		}
 	}
 
 	/////////////////////////////////////////////////////////////////////////
 
-	uint8_t SWPalDrawers::AddLights(uint8_t fg, uint8_t material, uint32_t lit_r, uint32_t lit_g, uint32_t lit_b)
+	PalColumnCommand::PalColumnCommand(const SpriteDrawerArgs &args) : args(args)
+	{
+	}
+
+	uint8_t PalColumnCommand::AddLights(uint8_t fg, uint8_t material, uint32_t lit_r, uint32_t lit_g, uint32_t lit_b)
 	{
 		if (lit_r == 0 && lit_g == 0 && lit_b == 0)
 			return fg;
@@ -792,7 +861,7 @@ namespace swrenderer
 		return RGB256k.All[((lit_r >> 2) << 12) | ((lit_g >> 2) << 6) | (lit_b >> 2)];
 	}
 
-	void SWPalDrawers::DrawColumn(const SpriteDrawerArgs& args)
+	void DrawColumnPalCommand::Execute(DrawerThread *thread)
 	{
 		int count;
 		uint8_t *dest;
@@ -800,8 +869,6 @@ namespace swrenderer
 		fixed_t fracstep;
 
 		count = args.Count();
-		if (count <= 0)
-			return;
 
 		// Framebuffer destination address.
 		dest = args.Dest();
@@ -811,7 +878,15 @@ namespace swrenderer
 		fracstep = args.TextureVStep();
 		frac = args.TextureVPos();
 
+		count = thread->count_for_thread(args.DestY(), count);
+		if (count <= 0)
+			return;
+
 		int pitch = args.Viewport()->RenderTarget->GetPitch();
+		dest = thread->dest_for_thread(args.DestY(), pitch, dest);
+		frac += fracstep * thread->skipped_by_thread(args.DestY());
+		fracstep *= thread->num_cores;
+		pitch *= thread->num_cores;
 
 		// [RH] Get local copies of these variables so that the compiler
 		//		has a better chance of optimizing this well.
@@ -856,15 +931,21 @@ namespace swrenderer
 		}
 	}
 
-	void SWPalDrawers::FillColumn(const SpriteDrawerArgs& args)
+	void FillColumnPalCommand::Execute(DrawerThread *thread)
 	{
-		int count = args.Count();
+		int count;
+		uint8_t *dest;
+
+		count = args.Count();
+		dest = args.Dest();
+
+		count = thread->count_for_thread(args.DestY(), count);
 		if (count <= 0)
 			return;
 
-		uint8_t* dest = args.Dest();
-
 		int pitch = args.Viewport()->RenderTarget->GetPitch();
+		dest = thread->dest_for_thread(args.DestY(), pitch, dest);
+		pitch *= thread->num_cores;
 
 		uint8_t color = args.SolidColor();
 		do
@@ -874,17 +955,26 @@ namespace swrenderer
 		} while (--count);
 	}
 
-	void SWPalDrawers::FillAddColumn(const SpriteDrawerArgs& args)
+	void FillColumnAddPalCommand::Execute(DrawerThread *thread)
 	{
-		int count = args.Count();
+		int count;
+		uint8_t *dest;
+
+		count = args.Count();
+		dest = args.Dest();
+		uint32_t *bg2rgb;
+		uint32_t fg;
+
+		bg2rgb = args.DestBlend();
+		fg = args.SrcColorIndex();
+		int pitch = args.Viewport()->RenderTarget->GetPitch();
+
+		count = thread->count_for_thread(args.DestY(), count);
 		if (count <= 0)
 			return;
 
-		uint8_t* dest = args.Dest();
-		int pitch = args.Viewport()->RenderTarget->GetPitch();
-
-		uint32_t *bg2rgb = args.DestBlend();
-		uint32_t fg = args.SrcColorIndex();
+		dest = thread->dest_for_thread(args.DestY(), pitch, dest);
+		pitch *= thread->num_cores;
 
 		const PalEntry* pal = GPalette.BaseColors;
 
@@ -917,14 +1007,12 @@ namespace swrenderer
 		}
 	}
 
-	void SWPalDrawers::FillAddClampColumn(const SpriteDrawerArgs& args)
+	void FillColumnAddClampPalCommand::Execute(DrawerThread *thread)
 	{
 		int count;
 		uint8_t *dest;
 
 		count = args.Count();
-		if (count <= 0)
-			return;
 
 		dest = args.Dest();
 		uint32_t *bg2rgb;
@@ -934,6 +1022,13 @@ namespace swrenderer
 		fg = args.SrcColorIndex();
 		int pitch = args.Viewport()->RenderTarget->GetPitch();
 
+		count = thread->count_for_thread(args.DestY(), count);
+		if (count <= 0)
+			return;
+
+		dest = thread->dest_for_thread(args.DestY(), pitch, dest);
+		pitch *= thread->num_cores;
+
 		const PalEntry* pal = GPalette.BaseColors;
 
 		if (!r_blendmethod)
@@ -971,17 +1066,25 @@ namespace swrenderer
 		}
 	}
 
-	void SWPalDrawers::FillSubClampColumn(const SpriteDrawerArgs& args)
+	void FillColumnSubClampPalCommand::Execute(DrawerThread *thread)
 	{
-		int count = args.Count();
+		int count;
+		uint8_t *dest;
+
+		count = args.Count();
+
+		dest = args.Dest();
+		uint32_t *bg2rgb = args.DestBlend();
+		uint32_t fg = args.SrcColorIndex();
+
+		int pitch = args.Viewport()->RenderTarget->GetPitch();
+
+		count = thread->count_for_thread(args.DestY(), count);
 		if (count <= 0)
 			return;
 
-		uint8_t* dest = args.Dest();
-		int pitch = args.Viewport()->RenderTarget->GetPitch();
-
-		uint32_t *bg2rgb = args.DestBlend();
-		uint32_t fg = args.SrcColorIndex();
+		dest = thread->dest_for_thread(args.DestY(), pitch, dest);
+		pitch *= thread->num_cores;
 
 		const PalEntry* palette = GPalette.BaseColors;
 
@@ -1021,18 +1124,28 @@ namespace swrenderer
 		}
 	}
 
-	void SWPalDrawers::FillRevSubClampColumn(const SpriteDrawerArgs& args)
+	void FillColumnRevSubClampPalCommand::Execute(DrawerThread *thread)
 	{
-		int count = args.Count();
+		int count;
+		uint8_t *dest;
+
+		count = args.Count();
 		if (count <= 0)
 			return;
 
-		uint8_t* dest = args.Dest();
-		int pitch = args.Viewport()->RenderTarget->GetPitch();
-
+		dest = args.Dest();
 		uint32_t *bg2rgb = args.DestBlend();
 		uint32_t fg = args.SrcColorIndex();
 
+		int pitch = args.Viewport()->RenderTarget->GetPitch();
+
+		count = thread->count_for_thread(args.DestY(), count);
+		if (count <= 0)
+			return;
+
+		dest = thread->dest_for_thread(args.DestY(), pitch, dest);
+		pitch *= thread->num_cores;
+
 		const PalEntry *palette = GPalette.BaseColors;
 
 		if (!r_blendmethod)
@@ -1071,17 +1184,28 @@ namespace swrenderer
 		}
 	}
 
-	void SWPalDrawers::DrawAddColumn(const SpriteDrawerArgs& args)
+	void DrawColumnAddPalCommand::Execute(DrawerThread *thread)
 	{
-		int count = args.Count();
+		int count;
+		uint8_t *dest;
+		fixed_t frac;
+		fixed_t fracstep;
+
+		count = args.Count();
+		dest = args.Dest();
+
+		fracstep = args.TextureVStep();
+		frac = args.TextureVPos();
+
+		count = thread->count_for_thread(args.DestY(), count);
 		if (count <= 0)
 			return;
 
-		uint8_t* dest = args.Dest();
 		int pitch = args.Viewport()->RenderTarget->GetPitch();
-
-		fixed_t fracstep = args.TextureVStep();
-		fixed_t frac = args.TextureVPos();
+		dest = thread->dest_for_thread(args.DestY(), pitch, dest);
+		frac += fracstep * thread->skipped_by_thread(args.DestY());
+		fracstep *= thread->num_cores;
+		pitch *= thread->num_cores;
 
 		uint32_t *fg2rgb = args.SrcBlend();
 		uint32_t *bg2rgb = args.DestBlend();
@@ -1123,17 +1247,29 @@ namespace swrenderer
 		}
 	}
 
-	void SWPalDrawers::DrawTranslatedColumn(const SpriteDrawerArgs& args)
+	void DrawColumnTranslatedPalCommand::Execute(DrawerThread *thread)
 	{
-		int count = args.Count();
+		int 				count;
+		uint8_t*				dest;
+		fixed_t 			frac;
+		fixed_t 			fracstep;
+
+		count = args.Count();
+
+		dest = args.Dest();
+
+		fracstep = args.TextureVStep();
+		frac = args.TextureVPos();
+
+		count = thread->count_for_thread(args.DestY(), count);
 		if (count <= 0)
 			return;
 
-		uint8_t* dest = args.Dest();
 		int pitch = args.Viewport()->RenderTarget->GetPitch();
-
-		fixed_t fracstep = args.TextureVStep();
-		fixed_t frac = args.TextureVPos();
+		dest = thread->dest_for_thread(args.DestY(), pitch, dest);
+		frac += fracstep * thread->skipped_by_thread(args.DestY());
+		fracstep *= thread->num_cores;
+		pitch *= thread->num_cores;
 
 		// [RH] Local copies of global vars to improve compiler optimizations
 		const uint8_t *colormap = args.Colormap(args.Viewport());
@@ -1149,17 +1285,28 @@ namespace swrenderer
 		} while (--count);
 	}
 
-	void SWPalDrawers::DrawTranslatedAddColumn(const SpriteDrawerArgs& args)
+	void DrawColumnTlatedAddPalCommand::Execute(DrawerThread *thread)
 	{
-		int count = args.Count();
+		int count;
+		uint8_t *dest;
+		fixed_t frac;
+		fixed_t fracstep;
+
+		count = args.Count();
+		dest = args.Dest();
+
+		fracstep = args.TextureVStep();
+		frac = args.TextureVPos();
+
+		count = thread->count_for_thread(args.DestY(), count);
 		if (count <= 0)
 			return;
 
-		uint8_t* dest = args.Dest();
 		int pitch = args.Viewport()->RenderTarget->GetPitch();
-
-		fixed_t fracstep = args.TextureVStep();
-		fixed_t frac = args.TextureVPos();
+		dest = thread->dest_for_thread(args.DestY(), pitch, dest);
+		frac += fracstep * thread->skipped_by_thread(args.DestY());
+		fracstep *= thread->num_cores;
+		pitch *= thread->num_cores;
 
 		uint32_t *fg2rgb = args.SrcBlend();
 		uint32_t *bg2rgb = args.DestBlend();
@@ -1202,17 +1349,27 @@ namespace swrenderer
 		}
 	}
 
-	void SWPalDrawers::DrawShadedColumn(const SpriteDrawerArgs& args)
+	void DrawColumnShadedPalCommand::Execute(DrawerThread *thread)
 	{
-		int count = args.Count();
+		int  count;
+		uint8_t *dest;
+		fixed_t frac, fracstep;
+
+		count = args.Count();
+		dest = args.Dest();
+
+		fracstep = args.TextureVStep();
+		frac = args.TextureVPos();
+
+		count = thread->count_for_thread(args.DestY(), count);
 		if (count <= 0)
 			return;
 
-		uint8_t* dest = args.Dest();
 		int pitch = args.Viewport()->RenderTarget->GetPitch();
-
-		fixed_t fracstep = args.TextureVStep();
-		fixed_t frac = args.TextureVPos();
+		dest = thread->dest_for_thread(args.DestY(), pitch, dest);
+		frac += fracstep * thread->skipped_by_thread(args.DestY());
+		fracstep *= thread->num_cores;
+		pitch *= thread->num_cores;
 
 		const uint8_t *source = args.TexturePixels();
 		const uint8_t *colormap = args.Colormap(args.Viewport());
@@ -1256,17 +1413,27 @@ namespace swrenderer
 		}
 	}
 
-	void SWPalDrawers::DrawAddClampShadedColumn(const SpriteDrawerArgs& args)
+	void DrawColumnAddClampShadedPalCommand::Execute(DrawerThread *thread)
 	{
-		int count = args.Count();
+		int  count;
+		uint8_t *dest;
+		fixed_t frac, fracstep;
+
+		count = args.Count();
+		dest = args.Dest();
+
+		fracstep = args.TextureVStep();
+		frac = args.TextureVPos();
+
+		count = thread->count_for_thread(args.DestY(), count);
 		if (count <= 0)
 			return;
 
-		uint8_t* dest = args.Dest();
 		int pitch = args.Viewport()->RenderTarget->GetPitch();
-
-		fixed_t fracstep = args.TextureVStep();
-		fixed_t frac = args.TextureVPos();
+		dest = thread->dest_for_thread(args.DestY(), pitch, dest);
+		frac += fracstep * thread->skipped_by_thread(args.DestY());
+		fracstep *= thread->num_cores;
+		pitch *= thread->num_cores;
 
 		const uint8_t *source = args.TexturePixels();
 		const uint8_t *colormap = args.Colormap(args.Viewport());
@@ -1288,17 +1455,28 @@ namespace swrenderer
 		} while (--count);
 	}
 
-	void SWPalDrawers::DrawAddClampColumn(const SpriteDrawerArgs& args)
+	void DrawColumnAddClampPalCommand::Execute(DrawerThread *thread)
 	{
-		int count = args.Count();
+		int count;
+		uint8_t *dest;
+		fixed_t frac;
+		fixed_t fracstep;
+
+		count = args.Count();
+		dest = args.Dest();
+
+		fracstep = args.TextureVStep();
+		frac = args.TextureVPos();
+
+		count = thread->count_for_thread(args.DestY(), count);
 		if (count <= 0)
 			return;
 
-		uint8_t* dest = args.Dest();
 		int pitch = args.Viewport()->RenderTarget->GetPitch();
-
-		fixed_t fracstep = args.TextureVStep();
-		fixed_t frac = args.TextureVPos();
+		dest = thread->dest_for_thread(args.DestY(), pitch, dest);
+		frac += fracstep * thread->skipped_by_thread(args.DestY());
+		fracstep *= thread->num_cores;
+		pitch *= thread->num_cores;
 
 		const uint8_t *colormap = args.Colormap(args.Viewport());
 		const uint8_t *source = args.TexturePixels();
@@ -1341,17 +1519,28 @@ namespace swrenderer
 		}
 	}
 
-	void SWPalDrawers::DrawAddClampTranslatedColumn(const SpriteDrawerArgs& args)
+	void DrawColumnAddClampTranslatedPalCommand::Execute(DrawerThread *thread)
 	{
-		int count = args.Count();
+		int count;
+		uint8_t *dest;
+		fixed_t frac;
+		fixed_t fracstep;
+
+		count = args.Count();
+		dest = args.Dest();
+
+		fracstep = args.TextureVStep();
+		frac = args.TextureVPos();
+
+		count = thread->count_for_thread(args.DestY(), count);
 		if (count <= 0)
 			return;
 
-		uint8_t* dest = args.Dest();
 		int pitch = args.Viewport()->RenderTarget->GetPitch();
-
-		fixed_t fracstep = args.TextureVStep();
-		fixed_t frac = args.TextureVPos();
+		dest = thread->dest_for_thread(args.DestY(), pitch, dest);
+		frac += fracstep * thread->skipped_by_thread(args.DestY());
+		fracstep *= thread->num_cores;
+		pitch *= thread->num_cores;
 
 		const uint8_t *translation = args.TranslationMap();
 		const uint8_t *colormap = args.Colormap(args.Viewport());
@@ -1395,17 +1584,28 @@ namespace swrenderer
 		}
 	}
 
-	void SWPalDrawers::DrawSubClampColumn(const SpriteDrawerArgs& args)
+	void DrawColumnSubClampPalCommand::Execute(DrawerThread *thread)
 	{
-		int count = args.Count();
+		int count;
+		uint8_t *dest;
+		fixed_t frac;
+		fixed_t fracstep;
+
+		count = args.Count();
+		dest = args.Dest();
+
+		fracstep = args.TextureVStep();
+		frac = args.TextureVPos();
+
+		count = thread->count_for_thread(args.DestY(), count);
 		if (count <= 0)
 			return;
 
-		uint8_t* dest = args.Dest();
 		int pitch = args.Viewport()->RenderTarget->GetPitch();
-
-		fixed_t fracstep = args.TextureVStep();
-		fixed_t frac = args.TextureVPos();
+		dest = thread->dest_for_thread(args.DestY(), pitch, dest);
+		frac += fracstep * thread->skipped_by_thread(args.DestY());
+		fracstep *= thread->num_cores;
+		pitch *= thread->num_cores;
 
 		const uint8_t *colormap = args.Colormap(args.Viewport());
 		const uint8_t *source = args.TexturePixels();
@@ -1447,17 +1647,28 @@ namespace swrenderer
 		}
 	}
 
-	void SWPalDrawers::DrawSubClampTranslatedColumn(const SpriteDrawerArgs& args)
+	void DrawColumnSubClampTranslatedPalCommand::Execute(DrawerThread *thread)
 	{
-		int count = args.Count();
+		int count;
+		uint8_t *dest;
+		fixed_t frac;
+		fixed_t fracstep;
+
+		count = args.Count();
+		dest = args.Dest();
+
+		fracstep = args.TextureVStep();
+		frac = args.TextureVPos();
+
+		count = thread->count_for_thread(args.DestY(), count);
 		if (count <= 0)
 			return;
 
-		uint8_t* dest = args.Dest();
 		int pitch = args.Viewport()->RenderTarget->GetPitch();
-
-		fixed_t fracstep = args.TextureVStep();
-		fixed_t frac = args.TextureVPos();
+		dest = thread->dest_for_thread(args.DestY(), pitch, dest);
+		frac += fracstep * thread->skipped_by_thread(args.DestY());
+		fracstep *= thread->num_cores;
+		pitch *= thread->num_cores;
 
 		const uint8_t *translation = args.TranslationMap();
 		const uint8_t *colormap = args.Colormap(args.Viewport());
@@ -1500,17 +1711,28 @@ namespace swrenderer
 		}
 	}
 
-	void SWPalDrawers::DrawRevSubClampColumn(const SpriteDrawerArgs& args)
+	void DrawColumnRevSubClampPalCommand::Execute(DrawerThread *thread)
 	{
-		int count = args.Count();
+		int count;
+		uint8_t *dest;
+		fixed_t frac;
+		fixed_t fracstep;
+
+		count = args.Count();
+		dest = args.Dest();
+
+		fracstep = args.TextureVStep();
+		frac = args.TextureVPos();
+
+		count = thread->count_for_thread(args.DestY(), count);
 		if (count <= 0)
 			return;
 
-		uint8_t* dest = args.Dest();
 		int pitch = args.Viewport()->RenderTarget->GetPitch();
-
-		fixed_t fracstep = args.TextureVStep();
-		fixed_t frac = args.TextureVPos();
+		dest = thread->dest_for_thread(args.DestY(), pitch, dest);
+		frac += fracstep * thread->skipped_by_thread(args.DestY());
+		fracstep *= thread->num_cores;
+		pitch *= thread->num_cores;
 
 		const uint8_t *colormap = args.Colormap(args.Viewport());
 		const uint8_t *source = args.TexturePixels();
@@ -1552,17 +1774,28 @@ namespace swrenderer
 		}
 	}
 
-	void SWPalDrawers::DrawRevSubClampTranslatedColumn(const SpriteDrawerArgs& args)
+	void DrawColumnRevSubClampTranslatedPalCommand::Execute(DrawerThread *thread)
 	{
-		int count = args.Count();
+		int count;
+		uint8_t *dest;
+		fixed_t frac;
+		fixed_t fracstep;
+
+		count = args.Count();
+		dest = args.Dest();
+
+		fracstep = args.TextureVStep();
+		frac = args.TextureVPos();
+
+		count = thread->count_for_thread(args.DestY(), count);
 		if (count <= 0)
 			return;
 
-		uint8_t* dest = args.Dest();
 		int pitch = args.Viewport()->RenderTarget->GetPitch();
-
-		fixed_t fracstep = args.TextureVStep();
-		fixed_t frac = args.TextureVPos();
+		dest = thread->dest_for_thread(args.DestY(), pitch, dest);
+		frac += fracstep * thread->skipped_by_thread(args.DestY());
+		fracstep *= thread->num_cores;
+		pitch *= thread->num_cores;
 
 		const uint8_t *translation = args.TranslationMap();
 		const uint8_t *colormap = args.Colormap(args.Viewport());
@@ -1607,21 +1840,24 @@ namespace swrenderer
 
 	/////////////////////////////////////////////////////////////////////////////
 
-	void SWPalDrawers::DrawScaledFuzzColumn(const SpriteDrawerArgs& args)
+	DrawScaledFuzzColumnPalCommand::DrawScaledFuzzColumnPalCommand(const SpriteDrawerArgs &drawerargs)
 	{
-		int _yl = args.FuzzY1();
-		int _yh = args.FuzzY2();
-		int _x = args.FuzzX();
-		uint8_t* _destorg = args.Viewport()->GetDest(0, 0);
-		int _pitch = args.Viewport()->RenderTarget->GetPitch();
-		int _fuzzpos = fuzzpos;
-		int _fuzzviewheight = fuzzviewheight;
+		_x = drawerargs.FuzzX();
+		_yl = drawerargs.FuzzY1();
+		_yh = drawerargs.FuzzY2();
+		_destorg = drawerargs.Viewport()->GetDest(0, 0);
+		_pitch = drawerargs.Viewport()->RenderTarget->GetPitch();
+		_fuzzpos = fuzzpos;
+		_fuzzviewheight = fuzzviewheight;
+	}
 
+	void DrawScaledFuzzColumnPalCommand::Execute(DrawerThread *thread)
+	{
 		int x = _x;
 		int yl = MAX(_yl, 1);
 		int yh = MIN(_yh, _fuzzviewheight);
 
-		int count = yh - yl + 1;
+		int count = thread->count_for_thread(yl, yh - yl + 1);
 		if (count <= 0) return;
 
 		int pitch = _pitch;
@@ -1632,7 +1868,14 @@ namespace swrenderer
 
 		fixed_t fuzzstep = (200 << FRACBITS) / _fuzzviewheight;
 		fixed_t fuzzcount = FUZZTABLE << FRACBITS;
-		fixed_t fuzz = ((fuzz_x << FRACBITS) + yl * fuzzstep) % fuzzcount;
+		fixed_t fuzz = (fuzz_x << FRACBITS) + yl * fuzzstep;
+
+		dest = thread->dest_for_thread(yl, pitch, dest);
+		pitch *= thread->num_cores;
+
+		fuzz += fuzzstep * thread->skipped_by_thread(yl);
+		fuzz %= fuzzcount;
+		fuzzstep *= thread->num_cores;
 
 		uint8_t *map = NormalLight.Maps;
 
@@ -1651,30 +1894,34 @@ namespace swrenderer
 
 	/////////////////////////////////////////////////////////////////////////
 
-	void SWPalDrawers::DrawUnscaledFuzzColumn(const SpriteDrawerArgs& args)
+	DrawFuzzColumnPalCommand::DrawFuzzColumnPalCommand(const SpriteDrawerArgs &args)
 	{
-		int _yl = args.FuzzY1();
-		int _yh = args.FuzzY2();
-		int _x = args.FuzzX();
-		uint8_t* _destorg = args.Viewport()->GetDest(0, 0);
-		int _pitch = args.Viewport()->RenderTarget->GetPitch();
-		int _fuzzpos = fuzzpos;
-		int _fuzzviewheight = fuzzviewheight;
+		_yl = args.FuzzY1();
+		_yh = args.FuzzY2();
+		_x = args.FuzzX();
+		_destorg = args.Viewport()->GetDest(0, 0);
+		_pitch = args.Viewport()->RenderTarget->GetPitch();
+		_fuzzpos = fuzzpos;
+		_fuzzviewheight = fuzzviewheight;
+	}
 
+	void DrawFuzzColumnPalCommand::Execute(DrawerThread *thread)
+	{
 		int yl = MAX(_yl, 1);
 		int yh = MIN(_yh, _fuzzviewheight);
 
-		int count = yh - yl + 1;
+		int count = thread->count_for_thread(yl, yh - yl + 1);
 
 		// Zero length.
 		if (count <= 0)
 			return;
 
 		int pitch = _pitch;
-		uint8_t *dest = yl * pitch + _x + _destorg;
+		uint8_t *dest = thread->dest_for_thread(yl, pitch, yl * pitch + _x + _destorg);
 
-		int fuzzstep = 1;
-		int fuzz = _fuzzpos % FUZZTABLE;
+		pitch = pitch * thread->num_cores;
+		int fuzzstep = thread->num_cores;
+		int fuzz = (_fuzzpos + thread->skipped_by_thread(yl)) % FUZZTABLE;
 
 #ifndef ORIGINAL_FUZZ
 
@@ -1705,6 +1952,8 @@ namespace swrenderer
 
 		uint8_t *map = &NormalLight.Maps[6 * 256];
 
+		yl += thread->skipped_by_thread(yl);
+
 		// Handle the case where we would go out of bounds at the top:
 		if (yl < fuzzstep)
 		{
@@ -1761,7 +2010,32 @@ namespace swrenderer
 
 	/////////////////////////////////////////////////////////////////////////
 
-	uint8_t SWPalDrawers::AddLightsSpan(const DrawerLight *lights, int num_lights, float viewpos_x, uint8_t fg, uint8_t material)
+	PalSpanCommand::PalSpanCommand(const SpanDrawerArgs &args)
+	{
+		_source = args.TexturePixels();
+		_colormap = args.Colormap(args.Viewport());
+		_xfrac = args.TextureUPos();
+		_yfrac = args.TextureVPos();
+		_y = args.DestY();
+		_x1 = args.DestX1();
+		_x2 = args.DestX2();
+		_dest = args.Viewport()->GetDest(_x1, _y);
+		_xstep = args.TextureUStep();
+		_ystep = args.TextureVStep();
+		_srcwidth = args.TextureWidth();
+		_srcheight = args.TextureHeight();
+		_srcblend = args.SrcBlend();
+		_destblend = args.DestBlend();
+		_color = args.SolidColor();
+		_srcalpha = args.SrcAlpha();
+		_destalpha = args.DestAlpha();
+		_dynlights = args.dc_lights;
+		_num_dynlights = args.dc_num_lights;
+		_viewpos_x = args.dc_viewpos.X;
+		_step_viewpos_x = args.dc_viewpos_step.X;
+	}
+
+	uint8_t PalSpanCommand::AddLights(const DrawerLight *lights, int num_lights, float viewpos_x, uint8_t fg, uint8_t material)
 	{
 		uint32_t lit_r = 0;
 		uint32_t lit_g = 0;
@@ -1814,29 +2088,10 @@ namespace swrenderer
 		return RGB256k.All[((lit_r >> 2) << 12) | ((lit_g >> 2) << 6) | (lit_b >> 2)];
 	}
 
-	void SWPalDrawers::DrawSpan(const SpanDrawerArgs& args)
+	void DrawSpanPalCommand::Execute(DrawerThread *thread)
 	{
-		const uint8_t* _source = args.TexturePixels();
-		const uint8_t* _colormap = args.Colormap(args.Viewport());
-		uint32_t _xfrac = args.TextureUPos();
-		uint32_t _yfrac = args.TextureVPos();
-		int _y = args.DestY();
-		int _x1 = args.DestX1();
-		int _x2 = args.DestX2();
-		uint8_t* _dest = args.Viewport()->GetDest(_x1, _y);
-		uint32_t _xstep = args.TextureUStep();
-		uint32_t _ystep = args.TextureVStep();
-		int _srcwidth = args.TextureWidth();
-		int _srcheight = args.TextureHeight();
-		uint32_t* _srcblend = args.SrcBlend();
-		uint32_t* _destblend = args.DestBlend();
-		int _color = args.SolidColor();
-		fixed_t _srcalpha = args.SrcAlpha();
-		fixed_t _destalpha = args.DestAlpha();
-		DrawerLight* _dynlights = args.dc_lights;
-		int _num_dynlights = args.dc_num_lights;
-		float _viewpos_x = args.dc_viewpos.X;
-		float _step_viewpos_x = args.dc_viewpos_step.X;
+		if (thread->line_skipped_by_thread(_y))
+			return;
 
 		uint32_t xfrac;
 		uint32_t yfrac;
@@ -1890,7 +2145,7 @@ namespace swrenderer
 
 				// Lookup pixel from flat texture tile,
 				//  re-index using light/colormap.
-				*dest++ = AddLightsSpan(dynlights, num_dynlights, viewpos_x, colormap[source[spot]], source[spot]);
+				*dest++ = AddLights(dynlights, num_dynlights, viewpos_x, colormap[source[spot]], source[spot]);
 
 				// Next step in u,v.
 				xfrac += xstep;
@@ -1910,7 +2165,7 @@ namespace swrenderer
 
 				// Lookup pixel from flat texture tile,
 				//  re-index using light/colormap.
-				*dest++ = AddLightsSpan(dynlights, num_dynlights, viewpos_x, colormap[source[spot]], source[spot]);
+				*dest++ = AddLights(dynlights, num_dynlights, viewpos_x, colormap[source[spot]], source[spot]);
 
 				// Next step in u,v.
 				xfrac += xstep;
@@ -1920,29 +2175,10 @@ namespace swrenderer
 		}
 	}
 
-	void SWPalDrawers::DrawSpanMasked(const SpanDrawerArgs& args)
+	void DrawSpanMaskedPalCommand::Execute(DrawerThread *thread)
 	{
-		const uint8_t* _source = args.TexturePixels();
-		const uint8_t* _colormap = args.Colormap(args.Viewport());
-		uint32_t _xfrac = args.TextureUPos();
-		uint32_t _yfrac = args.TextureVPos();
-		int _y = args.DestY();
-		int _x1 = args.DestX1();
-		int _x2 = args.DestX2();
-		uint8_t* _dest = args.Viewport()->GetDest(_x1, _y);
-		uint32_t _xstep = args.TextureUStep();
-		uint32_t _ystep = args.TextureVStep();
-		int _srcwidth = args.TextureWidth();
-		int _srcheight = args.TextureHeight();
-		uint32_t* _srcblend = args.SrcBlend();
-		uint32_t* _destblend = args.DestBlend();
-		int _color = args.SolidColor();
-		fixed_t _srcalpha = args.SrcAlpha();
-		fixed_t _destalpha = args.DestAlpha();
-		DrawerLight* _dynlights = args.dc_lights;
-		int _num_dynlights = args.dc_num_lights;
-		float _viewpos_x = args.dc_viewpos.X;
-		float _step_viewpos_x = args.dc_viewpos_step.X;
+		if (thread->line_skipped_by_thread(_y))
+			return;
 
 		uint32_t xfrac;
 		uint32_t yfrac;
@@ -1980,7 +2216,7 @@ namespace swrenderer
 				texdata = source[spot];
 				if (texdata != 0)
 				{
-					*dest = num_dynlights != 0 ? AddLightsSpan(dynlights, num_dynlights, viewpos_x, colormap[texdata], texdata) : colormap[texdata];
+					*dest = num_dynlights != 0 ? AddLights(dynlights, num_dynlights, viewpos_x, colormap[texdata], texdata) : colormap[texdata];
 				}
 				dest++;
 				xfrac += xstep;
@@ -2001,7 +2237,7 @@ namespace swrenderer
 				texdata = source[spot];
 				if (texdata != 0)
 				{
-					*dest = num_dynlights != 0 ? AddLightsSpan(dynlights, num_dynlights, viewpos_x, colormap[texdata], texdata) : colormap[texdata];
+					*dest = num_dynlights != 0 ? AddLights(dynlights, num_dynlights, viewpos_x, colormap[texdata], texdata) : colormap[texdata];
 				}
 				dest++;
 				xfrac += xstep;
@@ -2011,29 +2247,10 @@ namespace swrenderer
 		}
 	}
 
-	void SWPalDrawers::DrawSpanTranslucent(const SpanDrawerArgs& args)
+	void DrawSpanTranslucentPalCommand::Execute(DrawerThread *thread)
 	{
-		const uint8_t* _source = args.TexturePixels();
-		const uint8_t* _colormap = args.Colormap(args.Viewport());
-		uint32_t _xfrac = args.TextureUPos();
-		uint32_t _yfrac = args.TextureVPos();
-		int _y = args.DestY();
-		int _x1 = args.DestX1();
-		int _x2 = args.DestX2();
-		uint8_t* _dest = args.Viewport()->GetDest(_x1, _y);
-		uint32_t _xstep = args.TextureUStep();
-		uint32_t _ystep = args.TextureVStep();
-		int _srcwidth = args.TextureWidth();
-		int _srcheight = args.TextureHeight();
-		uint32_t* _srcblend = args.SrcBlend();
-		uint32_t* _destblend = args.DestBlend();
-		int _color = args.SolidColor();
-		fixed_t _srcalpha = args.SrcAlpha();
-		fixed_t _destalpha = args.DestAlpha();
-		DrawerLight* _dynlights = args.dc_lights;
-		int _num_dynlights = args.dc_num_lights;
-		float _viewpos_x = args.dc_viewpos.X;
-		float _step_viewpos_x = args.dc_viewpos_step.X;
+		if (thread->line_skipped_by_thread(_y))
+			return;
 
 		uint32_t xfrac;
 		uint32_t yfrac;
@@ -2072,7 +2289,7 @@ namespace swrenderer
 				do
 				{
 					spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6));
-					uint32_t fg = num_dynlights != 0 ? AddLightsSpan(dynlights, num_dynlights, viewpos_x, colormap[source[spot]], source[spot]) : colormap[source[spot]];
+					uint32_t fg = num_dynlights != 0 ? AddLights(dynlights, num_dynlights, viewpos_x, colormap[source[spot]], source[spot]) : colormap[source[spot]];
 					uint32_t bg = *dest;
 					fg = fg2rgb[fg];
 					bg = bg2rgb[bg];
@@ -2091,7 +2308,7 @@ namespace swrenderer
 				do
 				{
 					spot = (((xfrac >> 16) * srcwidth) >> 16) * srcheight + (((yfrac >> 16) * srcheight) >> 16);
-					uint32_t fg = num_dynlights != 0 ? AddLightsSpan(dynlights, num_dynlights, viewpos_x, colormap[source[spot]], source[spot]) : colormap[source[spot]];
+					uint32_t fg = num_dynlights != 0 ? AddLights(dynlights, num_dynlights, viewpos_x, colormap[source[spot]], source[spot]) : colormap[source[spot]];
 					uint32_t bg = *dest;
 					fg = fg2rgb[fg];
 					bg = bg2rgb[bg];
@@ -2111,7 +2328,7 @@ namespace swrenderer
 				do
 				{
 					spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6));
-					uint32_t fg = num_dynlights != 0 ? AddLightsSpan(dynlights, num_dynlights, viewpos_x, colormap[source[spot]], source[spot]) : colormap[source[spot]];
+					uint32_t fg = num_dynlights != 0 ? AddLights(dynlights, num_dynlights, viewpos_x, colormap[source[spot]], source[spot]) : colormap[source[spot]];
 					uint32_t bg = *dest;
 					int r = MAX((palette[fg].r * _srcalpha + palette[bg].r * _destalpha)>>18, 0);
 					int g = MAX((palette[fg].g * _srcalpha + palette[bg].g * _destalpha)>>18, 0);
@@ -2131,7 +2348,7 @@ namespace swrenderer
 				do
 				{
 					spot = (((xfrac >> 16) * srcwidth) >> 16) * srcheight + (((yfrac >> 16) * srcheight) >> 16);
-					uint32_t fg = num_dynlights != 0 ? AddLightsSpan(dynlights, num_dynlights, viewpos_x, colormap[source[spot]], source[spot]) : colormap[source[spot]];
+					uint32_t fg = num_dynlights != 0 ? AddLights(dynlights, num_dynlights, viewpos_x, colormap[source[spot]], source[spot]) : colormap[source[spot]];
 					uint32_t bg = *dest;
 					int r = MAX((palette[fg].r * _srcalpha + palette[bg].r * _destalpha)>>18, 0);
 					int g = MAX((palette[fg].g * _srcalpha + palette[bg].g * _destalpha)>>18, 0);
@@ -2146,29 +2363,10 @@ namespace swrenderer
 		}
 	}
 
-	void SWPalDrawers::DrawSpanMaskedTranslucent(const SpanDrawerArgs& args)
+	void DrawSpanMaskedTranslucentPalCommand::Execute(DrawerThread *thread)
 	{
-		const uint8_t* _source = args.TexturePixels();
-		const uint8_t* _colormap = args.Colormap(args.Viewport());
-		uint32_t _xfrac = args.TextureUPos();
-		uint32_t _yfrac = args.TextureVPos();
-		int _y = args.DestY();
-		int _x1 = args.DestX1();
-		int _x2 = args.DestX2();
-		uint8_t* _dest = args.Viewport()->GetDest(_x1, _y);
-		uint32_t _xstep = args.TextureUStep();
-		uint32_t _ystep = args.TextureVStep();
-		int _srcwidth = args.TextureWidth();
-		int _srcheight = args.TextureHeight();
-		uint32_t* _srcblend = args.SrcBlend();
-		uint32_t* _destblend = args.DestBlend();
-		int _color = args.SolidColor();
-		fixed_t _srcalpha = args.SrcAlpha();
-		fixed_t _destalpha = args.DestAlpha();
-		DrawerLight* _dynlights = args.dc_lights;
-		int _num_dynlights = args.dc_num_lights;
-		float _viewpos_x = args.dc_viewpos.X;
-		float _step_viewpos_x = args.dc_viewpos_step.X;
+		if (thread->line_skipped_by_thread(_y))
+			return;
 
 		uint32_t xfrac;
 		uint32_t yfrac;
@@ -2212,7 +2410,7 @@ namespace swrenderer
 					texdata = source[spot];
 					if (texdata != 0)
 					{
-						uint32_t fg = num_dynlights != 0 ? AddLightsSpan(dynlights, num_dynlights, viewpos_x, colormap[texdata], texdata) : colormap[texdata];
+						uint32_t fg = num_dynlights != 0 ? AddLights(dynlights, num_dynlights, viewpos_x, colormap[texdata], texdata) : colormap[texdata];
 						uint32_t bg = *dest;
 						fg = fg2rgb[fg];
 						bg = bg2rgb[bg];
@@ -2238,7 +2436,7 @@ namespace swrenderer
 					texdata = source[spot];
 					if (texdata != 0)
 					{
-						uint32_t fg = num_dynlights != 0 ? AddLightsSpan(dynlights, num_dynlights, viewpos_x, colormap[texdata], texdata) : colormap[texdata];
+						uint32_t fg = num_dynlights != 0 ? AddLights(dynlights, num_dynlights, viewpos_x, colormap[texdata], texdata) : colormap[texdata];
 						uint32_t bg = *dest;
 						fg = fg2rgb[fg];
 						bg = bg2rgb[bg];
@@ -2265,7 +2463,7 @@ namespace swrenderer
 					texdata = source[spot];
 					if (texdata != 0)
 					{
-						uint32_t fg = num_dynlights != 0 ? AddLightsSpan(dynlights, num_dynlights, viewpos_x, colormap[texdata], texdata) : colormap[texdata];
+						uint32_t fg = num_dynlights != 0 ? AddLights(dynlights, num_dynlights, viewpos_x, colormap[texdata], texdata) : colormap[texdata];
 						uint32_t bg = *dest;
 						int r = MAX((palette[fg].r * _srcalpha + palette[bg].r * _destalpha)>>18, 0);
 						int g = MAX((palette[fg].g * _srcalpha + palette[bg].g * _destalpha)>>18, 0);
@@ -2291,7 +2489,7 @@ namespace swrenderer
 					texdata = source[spot];
 					if (texdata != 0)
 					{
-						uint32_t fg = num_dynlights != 0 ? AddLightsSpan(dynlights, num_dynlights, viewpos_x, colormap[texdata], texdata) : colormap[texdata];
+						uint32_t fg = num_dynlights != 0 ? AddLights(dynlights, num_dynlights, viewpos_x, colormap[texdata], texdata) : colormap[texdata];
 						uint32_t bg = *dest;
 						int r = MAX((palette[fg].r * _srcalpha + palette[bg].r * _destalpha)>>18, 0);
 						int g = MAX((palette[fg].g * _srcalpha + palette[bg].g * _destalpha)>>18, 0);
@@ -2307,29 +2505,10 @@ namespace swrenderer
 		}
 	}
 
-	void SWPalDrawers::DrawSpanAddClamp(const SpanDrawerArgs& args)
+	void DrawSpanAddClampPalCommand::Execute(DrawerThread *thread)
 	{
-		const uint8_t* _source = args.TexturePixels();
-		const uint8_t* _colormap = args.Colormap(args.Viewport());
-		uint32_t _xfrac = args.TextureUPos();
-		uint32_t _yfrac = args.TextureVPos();
-		int _y = args.DestY();
-		int _x1 = args.DestX1();
-		int _x2 = args.DestX2();
-		uint8_t* _dest = args.Viewport()->GetDest(_x1, _y);
-		uint32_t _xstep = args.TextureUStep();
-		uint32_t _ystep = args.TextureVStep();
-		int _srcwidth = args.TextureWidth();
-		int _srcheight = args.TextureHeight();
-		uint32_t* _srcblend = args.SrcBlend();
-		uint32_t* _destblend = args.DestBlend();
-		int _color = args.SolidColor();
-		fixed_t _srcalpha = args.SrcAlpha();
-		fixed_t _destalpha = args.DestAlpha();
-		DrawerLight* _dynlights = args.dc_lights;
-		int _num_dynlights = args.dc_num_lights;
-		float _viewpos_x = args.dc_viewpos.X;
-		float _step_viewpos_x = args.dc_viewpos_step.X;
+		if (thread->line_skipped_by_thread(_y))
+			return;
 
 		uint32_t xfrac;
 		uint32_t yfrac;
@@ -2367,7 +2546,7 @@ namespace swrenderer
 				do
 				{
 					spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6));
-					uint32_t fg = num_dynlights != 0 ? AddLightsSpan(dynlights, num_dynlights, viewpos_x, colormap[source[spot]], source[spot]) : colormap[source[spot]];
+					uint32_t fg = num_dynlights != 0 ? AddLights(dynlights, num_dynlights, viewpos_x, colormap[source[spot]], source[spot]) : colormap[source[spot]];
 					uint32_t a = fg2rgb[fg] + bg2rgb[*dest];
 					uint32_t b = a;
 
@@ -2390,7 +2569,7 @@ namespace swrenderer
 				do
 				{
 					spot = (((xfrac >> 16) * srcwidth) >> 16) * srcheight + (((yfrac >> 16) * srcheight) >> 16);
-					uint32_t fg = num_dynlights != 0 ? AddLightsSpan(dynlights, num_dynlights, viewpos_x, colormap[source[spot]], source[spot]) : colormap[source[spot]];
+					uint32_t fg = num_dynlights != 0 ? AddLights(dynlights, num_dynlights, viewpos_x, colormap[source[spot]], source[spot]) : colormap[source[spot]];
 					uint32_t a = fg2rgb[fg] + bg2rgb[*dest];
 					uint32_t b = a;
 
@@ -2414,7 +2593,7 @@ namespace swrenderer
 				do
 				{
 					spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6));
-					uint32_t fg = num_dynlights != 0 ? AddLightsSpan(dynlights, num_dynlights, viewpos_x, colormap[source[spot]], source[spot]) : colormap[source[spot]];
+					uint32_t fg = num_dynlights != 0 ? AddLights(dynlights, num_dynlights, viewpos_x, colormap[source[spot]], source[spot]) : colormap[source[spot]];
 					uint32_t bg = *dest;
 					int r = MAX((palette[fg].r * _srcalpha + palette[bg].r * _destalpha)>>18, 0);
 					int g = MAX((palette[fg].g * _srcalpha + palette[bg].g * _destalpha)>>18, 0);
@@ -2434,7 +2613,7 @@ namespace swrenderer
 				do
 				{
 					spot = (((xfrac >> 16) * srcwidth) >> 16) * srcheight + (((yfrac >> 16) * srcheight) >> 16);
-					uint32_t fg = num_dynlights != 0 ? AddLightsSpan(dynlights, num_dynlights, viewpos_x, colormap[source[spot]], source[spot]) : colormap[source[spot]];
+					uint32_t fg = num_dynlights != 0 ? AddLights(dynlights, num_dynlights, viewpos_x, colormap[source[spot]], source[spot]) : colormap[source[spot]];
 					uint32_t bg = *dest;
 					int r = MAX((palette[fg].r * _srcalpha + palette[bg].r * _destalpha)>>18, 0);
 					int g = MAX((palette[fg].g * _srcalpha + palette[bg].g * _destalpha)>>18, 0);
@@ -2449,29 +2628,10 @@ namespace swrenderer
 		}
 	}
 
-	void SWPalDrawers::DrawSpanMaskedAddClamp(const SpanDrawerArgs& args)
+	void DrawSpanMaskedAddClampPalCommand::Execute(DrawerThread *thread)
 	{
-		const uint8_t* _source = args.TexturePixels();
-		const uint8_t* _colormap = args.Colormap(args.Viewport());
-		uint32_t _xfrac = args.TextureUPos();
-		uint32_t _yfrac = args.TextureVPos();
-		int _y = args.DestY();
-		int _x1 = args.DestX1();
-		int _x2 = args.DestX2();
-		uint8_t* _dest = args.Viewport()->GetDest(_x1, _y);
-		uint32_t _xstep = args.TextureUStep();
-		uint32_t _ystep = args.TextureVStep();
-		int _srcwidth = args.TextureWidth();
-		int _srcheight = args.TextureHeight();
-		uint32_t* _srcblend = args.SrcBlend();
-		uint32_t* _destblend = args.DestBlend();
-		int _color = args.SolidColor();
-		fixed_t _srcalpha = args.SrcAlpha();
-		fixed_t _destalpha = args.DestAlpha();
-		DrawerLight* _dynlights = args.dc_lights;
-		int _num_dynlights = args.dc_num_lights;
-		float _viewpos_x = args.dc_viewpos.X;
-		float _step_viewpos_x = args.dc_viewpos_step.X;
+		if (thread->line_skipped_by_thread(_y))
+			return;
 
 		uint32_t xfrac;
 		uint32_t yfrac;
@@ -2514,7 +2674,7 @@ namespace swrenderer
 					texdata = source[spot];
 					if (texdata != 0)
 					{
-						uint32_t fg = num_dynlights != 0 ? AddLightsSpan(dynlights, num_dynlights, viewpos_x, colormap[texdata], texdata) : colormap[texdata];
+						uint32_t fg = num_dynlights != 0 ? AddLights(dynlights, num_dynlights, viewpos_x, colormap[texdata], texdata) : colormap[texdata];
 						uint32_t a = fg2rgb[fg] + bg2rgb[*dest];
 						uint32_t b = a;
 	
@@ -2544,7 +2704,7 @@ namespace swrenderer
 					texdata = source[spot];
 					if (texdata != 0)
 					{
-						uint32_t fg = num_dynlights != 0 ? AddLightsSpan(dynlights, num_dynlights, viewpos_x, colormap[texdata], texdata) : colormap[texdata];
+						uint32_t fg = num_dynlights != 0 ? AddLights(dynlights, num_dynlights, viewpos_x, colormap[texdata], texdata) : colormap[texdata];
 						uint32_t a = fg2rgb[fg] + bg2rgb[*dest];
 						uint32_t b = a;
 	
@@ -2575,7 +2735,7 @@ namespace swrenderer
 					texdata = source[spot];
 					if (texdata != 0)
 					{
-						uint32_t fg = num_dynlights != 0 ? AddLightsSpan(dynlights, num_dynlights, viewpos_x, colormap[texdata], texdata) : colormap[texdata];
+						uint32_t fg = num_dynlights != 0 ? AddLights(dynlights, num_dynlights, viewpos_x, colormap[texdata], texdata) : colormap[texdata];
 						uint32_t bg = *dest;
 						int r = MAX((palette[fg].r * _srcalpha + palette[bg].r * _destalpha)>>18, 0);
 						int g = MAX((palette[fg].g * _srcalpha + palette[bg].g * _destalpha)>>18, 0);
@@ -2601,7 +2761,7 @@ namespace swrenderer
 					texdata = source[spot];
 					if (texdata != 0)
 					{
-						uint32_t fg = num_dynlights != 0 ? AddLightsSpan(dynlights, num_dynlights, viewpos_x, colormap[texdata], texdata) : colormap[texdata];
+						uint32_t fg = num_dynlights != 0 ? AddLights(dynlights, num_dynlights, viewpos_x, colormap[texdata], texdata) : colormap[texdata];
 						uint32_t bg = *dest;
 						int r = MAX((palette[fg].r * _srcalpha + palette[bg].r * _destalpha)>>18, 0);
 						int g = MAX((palette[fg].g * _srcalpha + palette[bg].g * _destalpha)>>18, 0);
@@ -2617,49 +2777,37 @@ namespace swrenderer
 		}
 	}
 
-	void SWPalDrawers::FillSpan(const SpanDrawerArgs& args)
+	void FillSpanPalCommand::Execute(DrawerThread *thread)
 	{
-		const uint8_t* _source = args.TexturePixels();
-		const uint8_t* _colormap = args.Colormap(args.Viewport());
-		uint32_t _xfrac = args.TextureUPos();
-		uint32_t _yfrac = args.TextureVPos();
-		int _y = args.DestY();
-		int _x1 = args.DestX1();
-		int _x2 = args.DestX2();
-		uint8_t* _dest = args.Viewport()->GetDest(_x1, _y);
-		uint32_t _xstep = args.TextureUStep();
-		uint32_t _ystep = args.TextureVStep();
-		int _srcwidth = args.TextureWidth();
-		int _srcheight = args.TextureHeight();
-		uint32_t* _srcblend = args.SrcBlend();
-		uint32_t* _destblend = args.DestBlend();
-		int _color = args.SolidColor();
-		fixed_t _srcalpha = args.SrcAlpha();
-		fixed_t _destalpha = args.DestAlpha();
-		DrawerLight* _dynlights = args.dc_lights;
-		int _num_dynlights = args.dc_num_lights;
-		float _viewpos_x = args.dc_viewpos.X;
-		float _step_viewpos_x = args.dc_viewpos_step.X;
+		if (thread->line_skipped_by_thread(_y))
+			return;
 
 		memset(_dest, _color, _x2 - _x1 + 1);
 	}
 
 	/////////////////////////////////////////////////////////////////////////
 
-	void SWPalDrawers::DrawTiltedSpan(const SpanDrawerArgs& args, const FVector3& plane_sz, const FVector3& plane_su, const FVector3& plane_sv, bool is_planeshaded, int planeshade, float planelightfloat, fixed_t pviewx, fixed_t pviewy, FDynamicColormap* basecolormap)
+	DrawTiltedSpanPalCommand::DrawTiltedSpanPalCommand(const SpanDrawerArgs &args, const FVector3 &plane_sz, const FVector3 &plane_su, const FVector3 &plane_sv, bool plane_shade, int planeshade, float planelightfloat, fixed_t pviewx, fixed_t pviewy, FDynamicColormap *basecolormap)
+		: plane_sz(plane_sz), plane_su(plane_su), plane_sv(plane_sv), plane_shade(plane_shade), planeshade(planeshade), planelightfloat(planelightfloat), pviewx(pviewx), pviewy(pviewy)
 	{
-		int y = args.DestY();
-		int x1 = args.DestX1();
-		int x2 = args.DestX2();
-		RenderViewport* viewport = args.Viewport();
-		const uint8_t* _colormap = args.Colormap(args.Viewport());
-		uint8_t* _dest = args.Viewport()->GetDest(x1, y);
-		int _ybits = args.TextureHeightBits();
-		int _xbits = args.TextureWidthBits();
-		const uint8_t* _source = args.TexturePixels();
-		uint8_t* basecolormapdata = basecolormap->Maps;
+		y = args.DestY();
+		x1 = args.DestX1();
+		x2 = args.DestX2();
+		viewport = args.Viewport();
+		_colormap = args.Colormap(args.Viewport());
+		_dest = args.Viewport()->GetDest(x1, y);
+		_ybits = args.TextureHeightBits();
+		_xbits = args.TextureWidthBits();
+		_source = args.TexturePixels();
+		basecolormapdata = basecolormap->Maps;
+	}
 
-		const uint8_t **tiltlighting = this->tiltlighting;
+	void DrawTiltedSpanPalCommand::Execute(DrawerThread *thread)
+	{
+		if (thread->line_skipped_by_thread(y))
+			return;
+
+		const uint8_t **tiltlighting = thread->tiltlighting;
 
 		int width = x2 - x1;
 		double iz, uz, vz;
@@ -2670,11 +2818,11 @@ namespace swrenderer
 		iz = plane_sz[2] + plane_sz[1] * (viewport->viewwindow.centery - y) + plane_sz[0] * (x1 - viewport->viewwindow.centerx);
 
 		// Lighting is simple. It's just linear interpolation from start to end
-		if (is_planeshaded)
+		if (plane_shade)
 		{
 			uz = (iz + plane_sz[0] * width) * planelightfloat;
 			vz = iz * planelightfloat;
-			CalcTiltedLighting(vz, uz, width, planeshade, basecolormapdata);
+			CalcTiltedLighting(vz, uz, width, thread);
 		}
 		else
 		{
@@ -2791,9 +2939,9 @@ namespace swrenderer
 
 	// Calculates the lighting for one row of a tilted plane. If the definition
 	// of GETPALOOKUP changes, this needs to change, too.
-	void SWPalDrawers::CalcTiltedLighting(double lstart, double lend, int width, int planeshade, uint8_t* basecolormapdata)
+	void DrawTiltedSpanPalCommand::CalcTiltedLighting(double lstart, double lend, int width, DrawerThread *thread)
 	{
-		const uint8_t **tiltlighting = this->tiltlighting;
+		const uint8_t **tiltlighting = thread->tiltlighting;
 
 		uint8_t *lightstart = basecolormapdata + (GETPALOOKUP(lstart, planeshade) << COLORMAPSHIFT);
 		uint8_t *lightend = basecolormapdata + (GETPALOOKUP(lend, planeshade) << COLORMAPSHIFT);
@@ -2819,25 +2967,38 @@ namespace swrenderer
 
 	/////////////////////////////////////////////////////////////////////////
 
-	void SWPalDrawers::DrawColoredSpan(const SpanDrawerArgs& args)
+	DrawColoredSpanPalCommand::DrawColoredSpanPalCommand(const SpanDrawerArgs &args) : PalSpanCommand(args)
 	{
-		int y = args.DestY();
-		int x1 = args.DestX1();
-		int x2 = args.DestX2();
-		int color = args.SolidColor();
-		uint8_t* _dest = args.Viewport()->GetDest(0, y);
+		y = args.DestY();
+		x1 = args.DestX1();
+		x2 = args.DestX2();
+		color = args.SolidColor();
+		dest = args.Viewport()->GetDest(x1, y);
+	}
+
+	void DrawColoredSpanPalCommand::Execute(DrawerThread *thread)
+	{
+		if (thread->line_skipped_by_thread(y))
+			return;
+
 		memset(_dest, color, x2 - x1 + 1);
 	}
 
 	/////////////////////////////////////////////////////////////////////////
 
-	void SWPalDrawers::DrawFogBoundaryLine(const SpanDrawerArgs& args)
+	DrawFogBoundaryLinePalCommand::DrawFogBoundaryLinePalCommand(const SpanDrawerArgs &args) : PalSpanCommand(args)
 	{
-		int y = args.DestY();
-		int x1 = args.DestX1();
-		int x2 = args.DestX2();
-		const uint8_t* _colormap = args.Colormap(args.Viewport());
-		uint8_t* _dest = args.Viewport()->GetDest(0, y);
+		y = args.DestY();
+		x1 = args.DestX1();
+		x2 = args.DestX2();
+		_colormap = args.Colormap(args.Viewport());
+		_dest = args.Viewport()->GetDest(0, y);
+	}
+
+	void DrawFogBoundaryLinePalCommand::Execute(DrawerThread *thread)
+	{
+		if (thread->line_skipped_by_thread(y))
+			return;
 
 		const uint8_t *colormap = _colormap;
 		uint8_t *dest = _dest;
@@ -2850,21 +3011,34 @@ namespace swrenderer
 	
 	/////////////////////////////////////////////////////////////////////////////
 
-	void SWPalDrawers::DrawParticleColumn(int x, int _dest_y, int _count, uint32_t _fg, uint32_t _alpha, uint32_t _fracposx)
+	DrawParticleColumnPalCommand::DrawParticleColumnPalCommand(uint8_t *dest, int dest_y, int pitch, int count, uint32_t fg, uint32_t alpha, uint32_t fracposx)
 	{
-		uint8_t* dest = thread->Viewport->GetDest(x, _dest_y);
-		int pitch = thread->Viewport->RenderTarget->GetPitch();
+		_dest = dest;
+		_pitch = pitch;
+		_count = count;
+		_fg = fg;
+		_alpha = alpha;
+		_fracposx = fracposx;
+		_dest_y = dest_y;
+	}
 
-		int count = _count;
+	void DrawParticleColumnPalCommand::Execute(DrawerThread *thread)
+	{
+		int count = thread->count_for_thread(_dest_y, _count);
 		if (count <= 0)
 			return;
 
+		int pitch = _pitch;
+		uint8_t *dest = thread->dest_for_thread(_dest_y, pitch, _dest);
+		pitch = pitch * thread->num_cores;
+
 		int particle_texture_index = MIN<int>(gl_particles_style, NUM_PARTICLE_TEXTURES - 1);
 		const uint32_t *source = &particle_texture[particle_texture_index][(_fracposx >> FRACBITS) * PARTICLE_TEXTURE_SIZE];
 		uint32_t particle_alpha = _alpha;
 
 		uint32_t fracstep = PARTICLE_TEXTURE_SIZE * FRACUNIT / _count;
-		uint32_t fracpos = fracstep / 2;
+		uint32_t fracpos = fracstep * thread->skipped_by_thread(_dest_y) + fracstep / 2;
+		fracstep *= thread->num_cores;
 
 		uint32_t fg_red = (_fg >> 16) & 0xff;
 		uint32_t fg_green = (_fg >> 8) & 0xff;
@@ -2892,7 +3066,11 @@ namespace swrenderer
 
 	/////////////////////////////////////////////////////////////////////////////
 
-	void SWPalDrawers::DrawVoxelBlocks(const SpriteDrawerArgs& args, const VoxelBlock* blocks, int blockcount)
+	DrawVoxelBlocksPalCommand::DrawVoxelBlocksPalCommand(const SpriteDrawerArgs &args, const VoxelBlock *blocks, int blockcount) : args(args), blocks(blocks), blockcount(blockcount)
+	{
+	}
+
+	void DrawVoxelBlocksPalCommand::Execute(DrawerThread *thread)
 	{
 		int destpitch = args.Viewport()->RenderTarget->GetPitch();
 		uint8_t *destorig = args.Viewport()->RenderTarget->GetPixels();
@@ -2911,6 +3089,12 @@ namespace swrenderer
 			int pitch = destpitch;
 			uint8_t *dest = destorig + (block.x + block.y * pitch);
 
+			count = thread->count_for_thread(block.y, count);
+			dest = thread->dest_for_thread(block.y, pitch, dest);
+			fracpos += iscale * thread->skipped_by_thread(block.y);
+			iscale *= thread->num_cores;
+			pitch *= thread->num_cores;
+
 			if (width == 1)
 			{
 				while (count > 0)
@@ -2977,164 +3161,4 @@ namespace swrenderer
 			}
 		}
 	}
-
-	template<typename DrawerT>
-	void SWPalDrawers::DrawWallColumns(const WallDrawerArgs& wallargs)
-	{
-		wallcolargs.wallargs = &wallargs;
-
-		bool haslights = r_dynlights && wallargs.lightlist;
-		if (haslights)
-		{
-			float dx = wallargs.WallC.tright.X - wallargs.WallC.tleft.X;
-			float dy = wallargs.WallC.tright.Y - wallargs.WallC.tleft.Y;
-			float length = sqrt(dx * dx + dy * dy);
-			wallcolargs.dc_normal.X = dy / length;
-			wallcolargs.dc_normal.Y = -dx / length;
-			wallcolargs.dc_normal.Z = 0.0f;
-		}
-
-		wallcolargs.SetTextureFracBits(wallargs.fracbits);
-
-		float curlight = wallargs.lightpos;
-		float lightstep = wallargs.lightstep;
-		int shade = wallargs.Shade();
-
-		if (wallargs.fixedlight)
-		{
-			curlight = wallargs.FixedLight();
-			lightstep = 0;
-		}
-
-		float upos = wallargs.texcoords.upos, ustepX = wallargs.texcoords.ustepX, ustepY = wallargs.texcoords.ustepY;
-		float vpos = wallargs.texcoords.vpos, vstepX = wallargs.texcoords.vstepX, vstepY = wallargs.texcoords.vstepY;
-		float wpos = wallargs.texcoords.wpos, wstepX = wallargs.texcoords.wstepX, wstepY = wallargs.texcoords.wstepY;
-		float startX = wallargs.texcoords.startX;
-
-		int x1 = wallargs.x1;
-		int x2 = wallargs.x2;
-
-		upos += ustepX * (x1 + 0.5f - startX);
-		vpos += vstepX * (x1 + 0.5f - startX);
-		wpos += wstepX * (x1 + 0.5f - startX);
-
-		float centerY = wallargs.CenterY;
-		centerY -= 0.5f;
-
-		auto uwal = wallargs.uwal;
-		auto dwal = wallargs.dwal;
-		for (int x = x1; x < x2; x++)
-		{
-			int y1 = uwal[x];
-			int y2 = dwal[x];
-			if (y2 > y1)
-			{
-				wallcolargs.SetLight(curlight, shade);
-				if (haslights)
-					SetLights(wallcolargs, x, y1, wallargs);
-				else
-					wallcolargs.dc_num_lights = 0;
-
-				float dy = (y1 - centerY);
-				float u = upos + ustepY * dy;
-				float v = vpos + vstepY * dy;
-				float w = wpos + wstepY * dy;
-				float scaleU = ustepX;
-				float scaleV = vstepY;
-				w = 1.0f / w;
-				u *= w;
-				v *= w;
-				scaleU *= w;
-				scaleV *= w;
-
-				uint32_t texelX = (uint32_t)(int64_t)((u - std::floor(u)) * 0x1'0000'0000LL);
-				uint32_t texelY = (uint32_t)(int64_t)((v - std::floor(v)) * 0x1'0000'0000LL);
-				uint32_t texelStepX = (uint32_t)(int64_t)(scaleU * 0x1'0000'0000LL);
-				uint32_t texelStepY = (uint32_t)(int64_t)(scaleV * 0x1'0000'0000LL);
-
-				DrawWallColumn8<DrawerT>(wallcolargs, x, y1, y2, texelX, texelY, texelStepY);
-			}
-
-			upos += ustepX;
-			vpos += vstepX;
-			wpos += wstepX;
-			curlight += lightstep;
-		}
-
-		if (r_modelscene)
-		{
-			for (int x = x1; x < x2; x++)
-			{
-				int y1 = uwal[x];
-				int y2 = dwal[x];
-				if (y2 > y1)
-				{
-					int count = y2 - y1;
-
-					float w1 = 1.0f / wallargs.WallC.sz1;
-					float w2 = 1.0f / wallargs.WallC.sz2;
-					float t = (x - wallargs.WallC.sx1 + 0.5f) / (wallargs.WallC.sx2 - wallargs.WallC.sx1);
-					float wcol = w1 * (1.0f - t) + w2 * t;
-					float zcol = 1.0f / wcol;
-					float zbufferdepth = 1.0f / (zcol / wallargs.FocalTangent);
-
-					wallcolargs.SetDest(x, y1);
-					wallcolargs.SetCount(count);
-					DrawDepthColumn(wallcolargs, zbufferdepth);
-				}
-			}
-		}
-	}
-
-	template<typename DrawerT>
-	void SWPalDrawers::DrawWallColumn8(WallColumnDrawerArgs& drawerargs, int x, int y1, int y2, uint32_t texelX, uint32_t texelY, uint32_t texelStepY)
-	{
-		auto& wallargs = *drawerargs.wallargs;
-		int texwidth = wallargs.texwidth;
-		int texheight = wallargs.texheight;
-		int fracbits = wallargs.fracbits;
-		uint32_t uv_max = texheight << fracbits;
-
-		const uint8_t* pixels = static_cast<const uint8_t*>(wallargs.texpixels) + (((texelX >> 16) * texwidth) >> 16) * texheight;
-
-		texelY = (static_cast<uint64_t>(texelY) * texheight) >> (32 - fracbits);
-		texelStepY = (static_cast<uint64_t>(texelStepY) * texheight) >> (32 - fracbits);
-
-		drawerargs.SetTexture(pixels, nullptr, texheight);
-		drawerargs.SetTextureVStep(texelStepY);
-
-		if (uv_max == 0 || texelStepY == 0) // power of two
-		{
-			int count = y2 - y1;
-
-			drawerargs.SetDest(x, y1);
-			drawerargs.SetCount(count);
-			drawerargs.SetTextureVPos(texelY);
-			DrawWallColumn<DrawerT>(drawerargs);
-		}
-		else
-		{
-			uint32_t left = y2 - y1;
-			int y = y1;
-			while (left > 0)
-			{
-				uint32_t available = uv_max - texelY;
-				uint32_t next_uv_wrap = available / texelStepY;
-				if (available % texelStepY != 0)
-					next_uv_wrap++;
-				uint32_t count = MIN(left, next_uv_wrap);
-
-				drawerargs.SetDest(x, y);
-				drawerargs.SetCount(count);
-				drawerargs.SetTextureVPos(texelY);
-				DrawWallColumn<DrawerT>(drawerargs);
-
-				y += count;
-				left -= count;
-				texelY += texelStepY * count;
-				if (texelY >= uv_max)
-					texelY -= uv_max;
-			}
-		}
-	}
 }
diff --git a/src/rendering/swrenderer/drawers/r_draw_pal.h b/src/rendering/swrenderer/drawers/r_draw_pal.h
index 9777560e6..14b1e3bc2 100644
--- a/src/rendering/swrenderer/drawers/r_draw_pal.h
+++ b/src/rendering/swrenderer/drawers/r_draw_pal.h
@@ -107,81 +107,289 @@ namespace swrenderer
 		int mShade = 0;
 	};
 
-	struct DrawWallModeNormal;
-	struct DrawWallModeMasked;
-	struct DrawWallModeAdd;
-	struct DrawWallModeAddClamp;
-	struct DrawWallModeSubClamp;
-	struct DrawWallModeRevSubClamp;
+	class DrawWallCommand : public DrawerCommand
+	{
+	public:
+		DrawWallCommand(const WallDrawerArgs& args);
+		void Execute(DrawerThread* thread) override;
+
+	protected:
+		virtual void DrawColumn(DrawerThread* thread, const WallColumnDrawerArgs& args) = 0;
+
+	private:
+		void DrawWallColumn32(DrawerThread* thread, WallColumnDrawerArgs& drawerargs, int x, int y1, int y2, uint32_t texelX, uint32_t texelY, uint32_t texelStepX, uint32_t texelStepY);
+		void DrawWallColumn8(DrawerThread* thread, WallColumnDrawerArgs& drawerargs, int x, int y1, int y2, uint32_t texelX, uint32_t texelY, uint32_t texelStepY);
+		void DrawDepthColumn(DrawerThread* thread, const WallColumnDrawerArgs& args, float idepth);
+		void SetLights(WallColumnDrawerArgs& drawerargs, int x, int y1);
+
+		WallDrawerArgs wallargs;
+	};
+
+	class PalWall1Command : public DrawWallCommand
+	{
+	public:
+		PalWall1Command(const WallDrawerArgs &args) : DrawWallCommand(args) { }
+
+	protected:
+		inline static uint8_t AddLights(const DrawerLight *lights, int num_lights, float viewpos_z, uint8_t fg, uint8_t material);
+	};
+
+	class DrawWall1PalCommand : public PalWall1Command { public: using PalWall1Command::PalWall1Command; void DrawColumn(DrawerThread *thread, const WallColumnDrawerArgs& args) override; };
+	class DrawWallMasked1PalCommand : public PalWall1Command { public: using PalWall1Command::PalWall1Command; void DrawColumn(DrawerThread *thread, const WallColumnDrawerArgs& args) override; };
+	class DrawWallAdd1PalCommand : public PalWall1Command { public: using PalWall1Command::PalWall1Command; void DrawColumn(DrawerThread *thread, const WallColumnDrawerArgs& args) override; };
+	class DrawWallAddClamp1PalCommand : public PalWall1Command { public: using PalWall1Command::PalWall1Command; void DrawColumn(DrawerThread *thread, const WallColumnDrawerArgs& args) override; };
+	class DrawWallSubClamp1PalCommand : public PalWall1Command { public: using PalWall1Command::PalWall1Command; void DrawColumn(DrawerThread *thread, const WallColumnDrawerArgs& args) override; };
+	class DrawWallRevSubClamp1PalCommand : public PalWall1Command { public: using PalWall1Command::PalWall1Command; void DrawColumn(DrawerThread *thread, const WallColumnDrawerArgs& args) override; };
+
+	class PalSkyCommand : public DrawerCommand
+	{
+	public:
+		PalSkyCommand(const SkyDrawerArgs &args);
+
+	protected:
+		SkyDrawerArgs args;
+	};
+
+	class DrawSingleSky1PalCommand : public PalSkyCommand { public: using PalSkyCommand::PalSkyCommand; void Execute(DrawerThread *thread) override; };
+	class DrawDoubleSky1PalCommand : public PalSkyCommand { public: using PalSkyCommand::PalSkyCommand; void Execute(DrawerThread *thread) override; };
+
+	class PalColumnCommand : public DrawerCommand
+	{
+	public:
+		PalColumnCommand(const SpriteDrawerArgs &args);
+
+		SpriteDrawerArgs args;
+
+	protected:
+		uint8_t AddLights(uint8_t fg, uint8_t material, uint32_t lit_r, uint32_t lit_g, uint32_t lit_b);
+	};
+
+	class DrawColumnPalCommand : public PalColumnCommand { public: using PalColumnCommand::PalColumnCommand; void Execute(DrawerThread *thread) override; };
+	class FillColumnPalCommand : public PalColumnCommand { public: using PalColumnCommand::PalColumnCommand; void Execute(DrawerThread *thread) override; };
+	class FillColumnAddPalCommand : public PalColumnCommand { public: using PalColumnCommand::PalColumnCommand; void Execute(DrawerThread *thread) override; };
+	class FillColumnAddClampPalCommand : public PalColumnCommand { public: using PalColumnCommand::PalColumnCommand; void Execute(DrawerThread *thread) override; };
+	class FillColumnSubClampPalCommand : public PalColumnCommand { public: using PalColumnCommand::PalColumnCommand; void Execute(DrawerThread *thread) override; };
+	class FillColumnRevSubClampPalCommand : public PalColumnCommand { public: using PalColumnCommand::PalColumnCommand; void Execute(DrawerThread *thread) override; };
+	class DrawColumnAddPalCommand : public PalColumnCommand { public: using PalColumnCommand::PalColumnCommand; void Execute(DrawerThread *thread) override; };
+	class DrawColumnTranslatedPalCommand : public PalColumnCommand { public: using PalColumnCommand::PalColumnCommand; void Execute(DrawerThread *thread) override; };
+	class DrawColumnTlatedAddPalCommand : public PalColumnCommand { public: using PalColumnCommand::PalColumnCommand; void Execute(DrawerThread *thread) override; };
+	class DrawColumnShadedPalCommand : public PalColumnCommand { public: using PalColumnCommand::PalColumnCommand; void Execute(DrawerThread *thread) override; };
+	class DrawColumnAddClampShadedPalCommand : public PalColumnCommand { public: using PalColumnCommand::PalColumnCommand; void Execute(DrawerThread *thread) override; };
+	class DrawColumnAddClampPalCommand : public PalColumnCommand { public: using PalColumnCommand::PalColumnCommand; void Execute(DrawerThread *thread) override; };
+	class DrawColumnAddClampTranslatedPalCommand : public PalColumnCommand { public: using PalColumnCommand::PalColumnCommand; void Execute(DrawerThread *thread) override; };
+	class DrawColumnSubClampPalCommand : public PalColumnCommand { public: using PalColumnCommand::PalColumnCommand; void Execute(DrawerThread *thread) override; };
+	class DrawColumnSubClampTranslatedPalCommand : public PalColumnCommand { public: using PalColumnCommand::PalColumnCommand; void Execute(DrawerThread *thread) override; };
+	class DrawColumnRevSubClampPalCommand : public PalColumnCommand { public: using PalColumnCommand::PalColumnCommand; void Execute(DrawerThread *thread) override; };
+	class DrawColumnRevSubClampTranslatedPalCommand : public PalColumnCommand { public: using PalColumnCommand::PalColumnCommand; void Execute(DrawerThread *thread) override; };
+
+	class DrawFuzzColumnPalCommand : public DrawerCommand
+	{
+	public:
+		DrawFuzzColumnPalCommand(const SpriteDrawerArgs &args);
+		void Execute(DrawerThread *thread) override;
+
+	private:
+		int _yl;
+		int _yh;
+		int _x;
+		uint8_t *_destorg;
+		int _pitch;
+		int _fuzzpos;
+		int _fuzzviewheight;
+	};
+
+	class DrawScaledFuzzColumnPalCommand : public DrawerCommand
+	{
+	public:
+		DrawScaledFuzzColumnPalCommand(const SpriteDrawerArgs &drawerargs);
+		void Execute(DrawerThread *thread) override;
+
+	private:
+		int _x;
+		int _yl;
+		int _yh;
+		uint8_t *_destorg;
+		int _pitch;
+		int _fuzzpos;
+		int _fuzzviewheight;
+	};
+
+	class PalSpanCommand : public DrawerCommand
+	{
+	public:
+		PalSpanCommand(const SpanDrawerArgs &args);
+
+	protected:
+		inline static uint8_t AddLights(const DrawerLight *lights, int num_lights, float viewpos_x, uint8_t fg, uint8_t material);
+
+		const uint8_t *_source;
+		const uint8_t *_colormap;
+		uint32_t _xfrac;
+		uint32_t _yfrac;
+		int _y;
+		int _x1;
+		int _x2;
+		uint8_t *_dest;
+		uint32_t _xstep;
+		uint32_t _ystep;
+		int _srcwidth;
+		int _srcheight;
+		uint32_t *_srcblend;
+		uint32_t *_destblend;
+		int _color;
+		fixed_t _srcalpha;
+		fixed_t _destalpha;
+		DrawerLight *_dynlights;
+		int _num_dynlights;
+		float _viewpos_x;
+		float _step_viewpos_x;
+	};
+
+	class DrawSpanPalCommand : public PalSpanCommand { public: using PalSpanCommand::PalSpanCommand; void Execute(DrawerThread *thread) override; };
+	class DrawSpanMaskedPalCommand : public PalSpanCommand { public: using PalSpanCommand::PalSpanCommand; void Execute(DrawerThread *thread) override; };
+	class DrawSpanTranslucentPalCommand : public PalSpanCommand { public: using PalSpanCommand::PalSpanCommand; void Execute(DrawerThread *thread) override; };
+	class DrawSpanMaskedTranslucentPalCommand : public PalSpanCommand { public: using PalSpanCommand::PalSpanCommand; void Execute(DrawerThread *thread) override; };
+	class DrawSpanAddClampPalCommand : public PalSpanCommand { public: using PalSpanCommand::PalSpanCommand; void Execute(DrawerThread *thread) override; };
+	class DrawSpanMaskedAddClampPalCommand : public PalSpanCommand { public: using PalSpanCommand::PalSpanCommand; void Execute(DrawerThread *thread) override; };
+	class FillSpanPalCommand : public PalSpanCommand { public: using PalSpanCommand::PalSpanCommand; void Execute(DrawerThread *thread) override; };
+
+	class DrawTiltedSpanPalCommand : public DrawerCommand
+	{
+	public:
+		DrawTiltedSpanPalCommand(const SpanDrawerArgs &args, const FVector3 &plane_sz, const FVector3 &plane_su, const FVector3 &plane_sv, bool plane_shade, int planeshade, float planelightfloat, fixed_t pviewx, fixed_t pviewy, FDynamicColormap *basecolormap);
+		void Execute(DrawerThread *thread) override;
+
+	private:
+		void CalcTiltedLighting(double lval, double lend, int width, DrawerThread *thread);
+
+		int y;
+		int x1;
+		int x2;
+		FVector3 plane_sz;
+		FVector3 plane_su;
+		FVector3 plane_sv;
+		bool plane_shade;
+		int planeshade;
+		float planelightfloat;
+		fixed_t pviewx;
+		fixed_t pviewy;
+
+		const uint8_t *_colormap;
+		uint8_t *_dest;
+		int _ybits;
+		int _xbits;
+		const uint8_t *_source;
+		uint8_t *basecolormapdata;
+		RenderViewport *viewport;
+	};
+
+	class DrawColoredSpanPalCommand : public PalSpanCommand
+	{
+	public:
+		DrawColoredSpanPalCommand(const SpanDrawerArgs &args);
+		void Execute(DrawerThread *thread) override;
+
+	private:
+		int y;
+		int x1;
+		int x2;
+		int color;
+		uint8_t *dest;
+	};
+
+	class DrawFogBoundaryLinePalCommand : public PalSpanCommand
+	{
+	public:
+		DrawFogBoundaryLinePalCommand(const SpanDrawerArgs &args);
+		void Execute(DrawerThread *thread) override;
+
+	private:
+		int y, x1, x2;
+		const uint8_t *_colormap;
+		uint8_t *_dest;
+	};
+	
+	class DrawParticleColumnPalCommand : public DrawerCommand
+	{
+	public:
+		DrawParticleColumnPalCommand(uint8_t *dest, int dest_y, int pitch, int count, uint32_t fg, uint32_t alpha, uint32_t fracposx);
+		void Execute(DrawerThread *thread) override;
+
+	private:
+		uint8_t *_dest;
+		int _dest_y;
+		int _pitch;
+		int _count;
+		uint32_t _fg;
+		uint32_t _alpha;
+		uint32_t _fracposx;
+	};
+
+	class DrawVoxelBlocksPalCommand : public DrawerCommand
+	{
+	public:
+		DrawVoxelBlocksPalCommand(const SpriteDrawerArgs &args, const VoxelBlock *blocks, int blockcount);
+		void Execute(DrawerThread *thread) override;
+
+	private:
+		SpriteDrawerArgs args;
+		const VoxelBlock *blocks;
+		int blockcount;
+	};
 
 	class SWPalDrawers : public SWPixelFormatDrawers
 	{
 	public:
 		using SWPixelFormatDrawers::SWPixelFormatDrawers;
 		
-		void DrawWall(const WallDrawerArgs &args) override { DrawWallColumns<DrawWallModeNormal>(args); }
-		void DrawWallMasked(const WallDrawerArgs &args) override { DrawWallColumns<DrawWallModeMasked>(args); }
-		void DrawWallAdd(const WallDrawerArgs &args) override { DrawWallColumns<DrawWallModeAdd>(args); }
-		void DrawWallAddClamp(const WallDrawerArgs &args) override { DrawWallColumns<DrawWallModeAddClamp>(args); }
-		void DrawWallSubClamp(const WallDrawerArgs &args) override { DrawWallColumns<DrawWallModeSubClamp>(args); }
-		void DrawWallRevSubClamp(const WallDrawerArgs &args) override { DrawWallColumns<DrawWallModeRevSubClamp>(args); }
-		void DrawSingleSkyColumn(const SkyDrawerArgs& args) override;
-		void DrawDoubleSkyColumn(const SkyDrawerArgs& args) override;
-		void DrawColumn(const SpriteDrawerArgs& args) override;
-		void FillColumn(const SpriteDrawerArgs& args) override;
-		void FillAddColumn(const SpriteDrawerArgs& args) override;
-		void FillAddClampColumn(const SpriteDrawerArgs& args) override;
-		void FillSubClampColumn(const SpriteDrawerArgs& args) override;
-		void FillRevSubClampColumn(const SpriteDrawerArgs& args) override;
+		void DrawWall(const WallDrawerArgs &args) override { Queue->Push<DrawWall1PalCommand>(args); }
+		void DrawWallMasked(const WallDrawerArgs &args) override { Queue->Push<DrawWallMasked1PalCommand>(args); }
+		void DrawWallAdd(const WallDrawerArgs &args) override { Queue->Push<DrawWallAdd1PalCommand>(args); }
+		void DrawWallAddClamp(const WallDrawerArgs &args) override { Queue->Push<DrawWallAddClamp1PalCommand>(args); }
+		void DrawWallSubClamp(const WallDrawerArgs &args) override { Queue->Push<DrawWallSubClamp1PalCommand>(args); }
+		void DrawWallRevSubClamp(const WallDrawerArgs &args) override { Queue->Push<DrawWallRevSubClamp1PalCommand>(args); }
+		void DrawSingleSkyColumn(const SkyDrawerArgs &args) override { Queue->Push<DrawSingleSky1PalCommand>(args); }
+		void DrawDoubleSkyColumn(const SkyDrawerArgs &args) override { Queue->Push<DrawDoubleSky1PalCommand>(args); }
+		void DrawColumn(const SpriteDrawerArgs &args) override { Queue->Push<DrawColumnPalCommand>(args); }
+		void FillColumn(const SpriteDrawerArgs &args) override { Queue->Push<FillColumnPalCommand>(args); }
+		void FillAddColumn(const SpriteDrawerArgs &args) override { Queue->Push<FillColumnAddPalCommand>(args); }
+		void FillAddClampColumn(const SpriteDrawerArgs &args) override { Queue->Push<FillColumnAddClampPalCommand>(args); }
+		void FillSubClampColumn(const SpriteDrawerArgs &args) override { Queue->Push<FillColumnSubClampPalCommand>(args); }
+		void FillRevSubClampColumn(const SpriteDrawerArgs &args) override { Queue->Push<FillColumnRevSubClampPalCommand>(args); }
 		void DrawFuzzColumn(const SpriteDrawerArgs &args) override
 		{
 			if (r_fuzzscale)
-				DrawScaledFuzzColumn(args);
+				Queue->Push<DrawScaledFuzzColumnPalCommand>(args);
 			else
-				DrawUnscaledFuzzColumn(args);
+				Queue->Push<DrawFuzzColumnPalCommand>(args);
 			R_UpdateFuzzPos(args);
 		}
-		void DrawAddColumn(const SpriteDrawerArgs& args) override;
-		void DrawTranslatedColumn(const SpriteDrawerArgs& args) override;
-		void DrawTranslatedAddColumn(const SpriteDrawerArgs& args) override;
-		void DrawShadedColumn(const SpriteDrawerArgs& args) override;
-		void DrawAddClampShadedColumn(const SpriteDrawerArgs& args) override;
-		void DrawAddClampColumn(const SpriteDrawerArgs& args) override;
-		void DrawAddClampTranslatedColumn(const SpriteDrawerArgs& args) override;
-		void DrawSubClampColumn(const SpriteDrawerArgs& args) override;
-		void DrawSubClampTranslatedColumn(const SpriteDrawerArgs& args) override;
-		void DrawRevSubClampColumn(const SpriteDrawerArgs& args) override;
-		void DrawRevSubClampTranslatedColumn(const SpriteDrawerArgs& args) override;
-		void DrawVoxelBlocks(const SpriteDrawerArgs& args, const VoxelBlock* blocks, int blockcount) override;
-		void DrawSpan(const SpanDrawerArgs& args) override;
-		void DrawSpanMasked(const SpanDrawerArgs& args) override;
-		void DrawSpanTranslucent(const SpanDrawerArgs& args) override;
-		void DrawSpanMaskedTranslucent(const SpanDrawerArgs& args) override;
-		void DrawSpanAddClamp(const SpanDrawerArgs& args) override;
-		void DrawSpanMaskedAddClamp(const SpanDrawerArgs& args) override;
-		void FillSpan(const SpanDrawerArgs& args) override;
-		void DrawTiltedSpan(const SpanDrawerArgs& args, const FVector3& plane_sz, const FVector3& plane_su, const FVector3& plane_sv, bool plane_shade, int planeshade, float planelightfloat, fixed_t pviewx, fixed_t pviewy, FDynamicColormap* basecolormap) override;
-		void DrawColoredSpan(const SpanDrawerArgs& args) override;
-		void DrawFogBoundaryLine(const SpanDrawerArgs& args) override;
+		void DrawAddColumn(const SpriteDrawerArgs &args) override { Queue->Push<DrawColumnAddPalCommand>(args); }
+		void DrawTranslatedColumn(const SpriteDrawerArgs &args) override { Queue->Push<DrawColumnTranslatedPalCommand>(args); }
+		void DrawTranslatedAddColumn(const SpriteDrawerArgs &args) override { Queue->Push<DrawColumnTlatedAddPalCommand>(args); }
+		void DrawShadedColumn(const SpriteDrawerArgs &args) override { Queue->Push<DrawColumnShadedPalCommand>(args); }
+		void DrawAddClampShadedColumn(const SpriteDrawerArgs &args) override { Queue->Push<DrawColumnAddClampShadedPalCommand>(args); }
+		void DrawAddClampColumn(const SpriteDrawerArgs &args) override { Queue->Push<DrawColumnAddClampPalCommand>(args); }
+		void DrawAddClampTranslatedColumn(const SpriteDrawerArgs &args) override { Queue->Push<DrawColumnAddClampTranslatedPalCommand>(args); }
+		void DrawSubClampColumn(const SpriteDrawerArgs &args) override { Queue->Push<DrawColumnSubClampPalCommand>(args); }
+		void DrawSubClampTranslatedColumn(const SpriteDrawerArgs &args) override { Queue->Push<DrawColumnSubClampTranslatedPalCommand>(args); }
+		void DrawRevSubClampColumn(const SpriteDrawerArgs &args) override { Queue->Push<DrawColumnRevSubClampPalCommand>(args); }
+		void DrawRevSubClampTranslatedColumn(const SpriteDrawerArgs &args) override { Queue->Push<DrawColumnRevSubClampTranslatedPalCommand>(args); }
+		void DrawVoxelBlocks(const SpriteDrawerArgs &args, const VoxelBlock *blocks, int blockcount) override { Queue->Push<DrawVoxelBlocksPalCommand>(args, blocks, blockcount); }
+		void DrawSpan(const SpanDrawerArgs &args) override { Queue->Push<DrawSpanPalCommand>(args); }
+		void DrawSpanMasked(const SpanDrawerArgs &args) override { Queue->Push<DrawSpanMaskedPalCommand>(args); }
+		void DrawSpanTranslucent(const SpanDrawerArgs &args) override { Queue->Push<DrawSpanTranslucentPalCommand>(args); }
+		void DrawSpanMaskedTranslucent(const SpanDrawerArgs &args) override { Queue->Push<DrawSpanMaskedTranslucentPalCommand>(args); }
+		void DrawSpanAddClamp(const SpanDrawerArgs &args) override { Queue->Push<DrawSpanAddClampPalCommand>(args); }
+		void DrawSpanMaskedAddClamp(const SpanDrawerArgs &args) override { Queue->Push<DrawSpanMaskedAddClampPalCommand>(args); }
+		void FillSpan(const SpanDrawerArgs &args) override { Queue->Push<FillSpanPalCommand>(args); }
 
-		void DrawParticleColumn(int x, int yl, int ycount, uint32_t fg, uint32_t alpha, uint32_t fracposx) override;
+		void DrawTiltedSpan(const SpanDrawerArgs &args, const FVector3 &plane_sz, const FVector3 &plane_su, const FVector3 &plane_sv, bool plane_shade, int planeshade, float planelightfloat, fixed_t pviewx, fixed_t pviewy, FDynamicColormap *basecolormap) override
+		{
+			Queue->Push<DrawTiltedSpanPalCommand>(args, plane_sz, plane_su, plane_sv, plane_shade, planeshade, planelightfloat, pviewx, pviewy, basecolormap);
+		}
 
-		void DrawScaledFuzzColumn(const SpriteDrawerArgs& args);
-		void DrawUnscaledFuzzColumn(const SpriteDrawerArgs& args);
-
-		inline static uint8_t AddLightsColumn(const DrawerLight* lights, int num_lights, float viewpos_z, uint8_t fg, uint8_t material);
-		inline static uint8_t AddLightsSpan(const DrawerLight* lights, int num_lights, float viewpos_z, uint8_t fg, uint8_t material);
-		inline static uint8_t AddLights(uint8_t fg, uint8_t material, uint32_t lit_r, uint32_t lit_g, uint32_t lit_b);
-
-		void CalcTiltedLighting(double lstart, double lend, int width, int planeshade, uint8_t* basecolormapdata);
-
-		template<typename DrawerT> void DrawWallColumns(const WallDrawerArgs& args);
-		template<typename DrawerT> void DrawWallColumn8(WallColumnDrawerArgs& drawerargs, int x, int y1, int y2, uint32_t texelX, uint32_t texelY, uint32_t texelStepY);
-		template<typename DrawerT> void DrawWallColumn(const WallColumnDrawerArgs& args);
-
-		// Working buffer used by the tilted (sloped) span drawer
-		const uint8_t* tiltlighting[MAXWIDTH];
-
-		WallColumnDrawerArgs wallcolargs;
+		void DrawColoredSpan(const SpanDrawerArgs &args) override { Queue->Push<DrawColoredSpanPalCommand>(args); }
+		void DrawFogBoundaryLine(const SpanDrawerArgs &args) override { Queue->Push<DrawFogBoundaryLinePalCommand>(args); }
 	};
 }
diff --git a/src/rendering/swrenderer/drawers/r_draw_rgba.cpp b/src/rendering/swrenderer/drawers/r_draw_rgba.cpp
index b6360981c..10bb6be63 100644
--- a/src/rendering/swrenderer/drawers/r_draw_rgba.cpp
+++ b/src/rendering/swrenderer/drawers/r_draw_rgba.cpp
@@ -82,185 +82,193 @@ namespace swrenderer
 {
 	void SWTruecolorDrawers::DrawWall(const WallDrawerArgs &args)
 	{
-		DrawWallColumns<DrawWall32Command>(args);
+		Queue->Push<DrawWall32Command>(args);
 	}
 	
 	void SWTruecolorDrawers::DrawWallMasked(const WallDrawerArgs &args)
 	{
-		DrawWallColumns<DrawWallMasked32Command>(args);
+		Queue->Push<DrawWallMasked32Command>(args);
 	}
 	
 	void SWTruecolorDrawers::DrawWallAdd(const WallDrawerArgs &args)
 	{
-		DrawWallColumns<DrawWallAddClamp32Command>(args);
+		Queue->Push<DrawWallAddClamp32Command>(args);
 	}
 	
 	void SWTruecolorDrawers::DrawWallAddClamp(const WallDrawerArgs &args)
 	{
-		DrawWallColumns<DrawWallAddClamp32Command>(args);
+		Queue->Push<DrawWallAddClamp32Command>(args);
 	}
 	
 	void SWTruecolorDrawers::DrawWallSubClamp(const WallDrawerArgs &args)
 	{
-		DrawWallColumns<DrawWallSubClamp32Command>(args);
+		Queue->Push<DrawWallSubClamp32Command>(args);
 	}
 	
 	void SWTruecolorDrawers::DrawWallRevSubClamp(const WallDrawerArgs &args)
 	{
-		DrawWallColumns<DrawWallRevSubClamp32Command>(args);
+		Queue->Push<DrawWallRevSubClamp32Command>(args);
 	}
 	
 	void SWTruecolorDrawers::DrawColumn(const SpriteDrawerArgs &args)
 	{
-		DrawSprite32Command::DrawColumn(args);
+		Queue->Push<DrawSprite32Command>(args);
 	}
 
 	void SWTruecolorDrawers::FillColumn(const SpriteDrawerArgs &args)
 	{
-		FillSprite32Command::DrawColumn(args);
+		Queue->Push<FillSprite32Command>(args);
 	}
 
 	void SWTruecolorDrawers::FillAddColumn(const SpriteDrawerArgs &args)
 	{
-		FillSpriteAddClamp32Command::DrawColumn(args);
+		Queue->Push<FillSpriteAddClamp32Command>(args);
 	}
 
 	void SWTruecolorDrawers::FillAddClampColumn(const SpriteDrawerArgs &args)
 	{
-		FillSpriteAddClamp32Command::DrawColumn(args);
+		Queue->Push<FillSpriteAddClamp32Command>(args);
 	}
 
 	void SWTruecolorDrawers::FillSubClampColumn(const SpriteDrawerArgs &args)
 	{
-		FillSpriteSubClamp32Command::DrawColumn(args);
+		Queue->Push<FillSpriteSubClamp32Command>(args);
 	}
 
 	void SWTruecolorDrawers::FillRevSubClampColumn(const SpriteDrawerArgs &args)
 	{
-		FillSpriteRevSubClamp32Command::DrawColumn(args);
+		Queue->Push<FillSpriteRevSubClamp32Command>(args);
 	}
 
 	void SWTruecolorDrawers::DrawFuzzColumn(const SpriteDrawerArgs &args)
 	{
 		if (r_fuzzscale)
-			DrawScaledFuzzColumn(args);
+			Queue->Push<DrawScaledFuzzColumnRGBACommand>(args);
 		else
-			DrawUnscaledFuzzColumn(args);
+			Queue->Push<DrawFuzzColumnRGBACommand>(args);
 		R_UpdateFuzzPos(args);
 	}
 
 	void SWTruecolorDrawers::DrawAddColumn(const SpriteDrawerArgs &args)
 	{
-		DrawSpriteAddClamp32Command::DrawColumn(args);
+		Queue->Push<DrawSpriteAddClamp32Command>(args);
 	}
 
 	void SWTruecolorDrawers::DrawTranslatedColumn(const SpriteDrawerArgs &args)
 	{
-		DrawSpriteTranslated32Command::DrawColumn(args);
+		Queue->Push<DrawSpriteTranslated32Command>(args);
 	}
 
 	void SWTruecolorDrawers::DrawTranslatedAddColumn(const SpriteDrawerArgs &args)
 	{
-		DrawSpriteTranslatedAddClamp32Command::DrawColumn(args);
+		Queue->Push<DrawSpriteTranslatedAddClamp32Command>(args);
 	}
 
 	void SWTruecolorDrawers::DrawShadedColumn(const SpriteDrawerArgs &args)
 	{
-		DrawSpriteShaded32Command::DrawColumn(args);
+		Queue->Push<DrawSpriteShaded32Command>(args);
 	}
 
 	void SWTruecolorDrawers::DrawAddClampShadedColumn(const SpriteDrawerArgs &args)
 	{
-		DrawSpriteAddClampShaded32Command::DrawColumn(args);
+		Queue->Push<DrawSpriteAddClampShaded32Command>(args);
 	}
 
 	void SWTruecolorDrawers::DrawAddClampColumn(const SpriteDrawerArgs &args)
 	{
-		DrawSpriteAddClamp32Command::DrawColumn(args);
+		Queue->Push<DrawSpriteAddClamp32Command>(args);
 	}
 
 	void SWTruecolorDrawers::DrawAddClampTranslatedColumn(const SpriteDrawerArgs &args)
 	{
-		DrawSpriteTranslatedAddClamp32Command::DrawColumn(args);
+		Queue->Push<DrawSpriteTranslatedAddClamp32Command>(args);
 	}
 
 	void SWTruecolorDrawers::DrawSubClampColumn(const SpriteDrawerArgs &args)
 	{
-		DrawSpriteSubClamp32Command::DrawColumn(args);
+		Queue->Push<DrawSpriteSubClamp32Command>(args);
 	}
 
 	void SWTruecolorDrawers::DrawSubClampTranslatedColumn(const SpriteDrawerArgs &args)
 	{
-		DrawSpriteTranslatedSubClamp32Command::DrawColumn(args);
+		Queue->Push<DrawSpriteTranslatedSubClamp32Command>(args);
 	}
 
 	void SWTruecolorDrawers::DrawRevSubClampColumn(const SpriteDrawerArgs &args)
 	{
-		DrawSpriteRevSubClamp32Command::DrawColumn(args);
+		Queue->Push<DrawSpriteRevSubClamp32Command>(args);
 	}
 
 	void SWTruecolorDrawers::DrawRevSubClampTranslatedColumn(const SpriteDrawerArgs &args)
 	{
-		DrawSpriteTranslatedRevSubClamp32Command::DrawColumn(args);
+		Queue->Push<DrawSpriteTranslatedRevSubClamp32Command>(args);
+	}
+
+	void SWTruecolorDrawers::DrawVoxelBlocks(const SpriteDrawerArgs &args, const VoxelBlock *blocks, int blockcount)
+	{
+		Queue->Push<DrawVoxelBlocksRGBACommand>(args, blocks, blockcount);
 	}
 
 	void SWTruecolorDrawers::DrawSpan(const SpanDrawerArgs &args)
 	{
-		DrawSpan32Command::DrawColumn(args);
+		Queue->Push<DrawSpan32Command>(args);
 	}
 	
 	void SWTruecolorDrawers::DrawSpanMasked(const SpanDrawerArgs &args)
 	{
-		DrawSpanMasked32Command::DrawColumn(args);
+		Queue->Push<DrawSpanMasked32Command>(args);
 	}
 	
 	void SWTruecolorDrawers::DrawSpanTranslucent(const SpanDrawerArgs &args)
 	{
-		DrawSpanTranslucent32Command::DrawColumn(args);
+		Queue->Push<DrawSpanTranslucent32Command>(args);
 	}
 	
 	void SWTruecolorDrawers::DrawSpanMaskedTranslucent(const SpanDrawerArgs &args)
 	{
-		DrawSpanAddClamp32Command::DrawColumn(args);
+		Queue->Push<DrawSpanAddClamp32Command>(args);
 	}
 	
 	void SWTruecolorDrawers::DrawSpanAddClamp(const SpanDrawerArgs &args)
 	{
-		DrawSpanTranslucent32Command::DrawColumn(args);
+		Queue->Push<DrawSpanTranslucent32Command>(args);
 	}
 	
 	void SWTruecolorDrawers::DrawSpanMaskedAddClamp(const SpanDrawerArgs &args)
 	{
-		DrawSpanAddClamp32Command::DrawColumn(args);
+		Queue->Push<DrawSpanAddClamp32Command>(args);
 	}
 	
 	void SWTruecolorDrawers::DrawSingleSkyColumn(const SkyDrawerArgs &args)
 	{
-		DrawSkySingle32Command::DrawColumn(args);
+		Queue->Push<DrawSkySingle32Command>(args);
 	}
 	
 	void SWTruecolorDrawers::DrawDoubleSkyColumn(const SkyDrawerArgs &args)
 	{
-		DrawSkyDouble32Command::DrawColumn(args);
+		Queue->Push<DrawSkyDouble32Command>(args);
 	}
 
 	/////////////////////////////////////////////////////////////////////////////
 
-	void SWTruecolorDrawers::DrawScaledFuzzColumn(const SpriteDrawerArgs& drawerargs)
+	DrawScaledFuzzColumnRGBACommand::DrawScaledFuzzColumnRGBACommand(const SpriteDrawerArgs &drawerargs)
 	{
-		int _x = drawerargs.FuzzX();
-		int _yl = drawerargs.FuzzY1();
-		int _yh = drawerargs.FuzzY2();
-		uint8_t* RESTRICT _destorg = drawerargs.Viewport()->GetDest(0, 0);
-		int _pitch = drawerargs.Viewport()->RenderTarget->GetPitch();
-		int _fuzzpos = fuzzpos;
-		int _fuzzviewheight = fuzzviewheight;
+		_x = drawerargs.FuzzX();
+		_yl = drawerargs.FuzzY1();
+		_yh = drawerargs.FuzzY2();
+		_destorg = drawerargs.Viewport()->GetDest(0, 0);
+		_pitch = drawerargs.Viewport()->RenderTarget->GetPitch();
+		_fuzzpos = fuzzpos;
+		_fuzzviewheight = fuzzviewheight;
+	}
 
+	void DrawScaledFuzzColumnRGBACommand::Execute(DrawerThread *thread)
+	{
 		int x = _x;
 		int yl = MAX(_yl, 1);
 		int yh = MIN(_yh, _fuzzviewheight);
 
-		int count = yh - yl + 1;
+		int count = thread->count_for_thread(yl, yh - yl + 1);
 		if (count <= 0) return;
 
 		int pitch = _pitch;
@@ -271,7 +279,14 @@ namespace swrenderer
 
 		fixed_t fuzzstep = (200 << FRACBITS) / _fuzzviewheight;
 		fixed_t fuzzcount = FUZZTABLE << FRACBITS;
-		fixed_t fuzz = ((fuzz_x << FRACBITS) + yl * fuzzstep) % fuzzcount;
+		fixed_t fuzz = (fuzz_x << FRACBITS) + yl * fuzzstep;
+
+		dest = thread->dest_for_thread(yl, pitch, dest);
+		pitch *= thread->num_cores;
+
+		fuzz += fuzzstep * thread->skipped_by_thread(yl);
+		fuzz %= fuzzcount;
+		fuzzstep *= thread->num_cores;
 
 		while (count > 0)
 		{
@@ -294,30 +309,33 @@ namespace swrenderer
 
 	/////////////////////////////////////////////////////////////////////////////
 
-	void SWTruecolorDrawers::DrawUnscaledFuzzColumn(const SpriteDrawerArgs& drawerargs)
+	DrawFuzzColumnRGBACommand::DrawFuzzColumnRGBACommand(const SpriteDrawerArgs &drawerargs)
 	{
-		int _x = drawerargs.FuzzX();
-		int _yl = drawerargs.FuzzY1();
-		int _yh = drawerargs.FuzzY2();
-		uint8_t* RESTRICT _destorg = drawerargs.Viewport()->GetDest(0, 0);
-		int _pitch = drawerargs.Viewport()->RenderTarget->GetPitch();
-		int _fuzzpos = fuzzpos;
-		int _fuzzviewheight = fuzzviewheight;
+		_x = drawerargs.FuzzX();
+		_yl = drawerargs.FuzzY1();
+		_yh = drawerargs.FuzzY2();
+		_destorg = drawerargs.Viewport()->GetDest(0, 0);
+		_pitch = drawerargs.Viewport()->RenderTarget->GetPitch();
+		_fuzzpos = fuzzpos;
+		_fuzzviewheight = fuzzviewheight;
+	}
 
+	void DrawFuzzColumnRGBACommand::Execute(DrawerThread *thread)
+	{
 		int yl = MAX(_yl, 1);
 		int yh = MIN(_yh, _fuzzviewheight);
 
-		int count = yh - yl + 1;
+		int count = thread->count_for_thread(yl, yh - yl + 1);
 
 		// Zero length.
 		if (count <= 0)
 			return;
 
-		uint32_t *dest = _pitch * yl + _x + (uint32_t*)_destorg;
-		int pitch = _pitch;
+		uint32_t *dest = thread->dest_for_thread(yl, _pitch, _pitch * yl + _x + (uint32_t*)_destorg);
+		int pitch = _pitch * thread->num_cores;
 
-		int fuzzstep = 1;
-		int fuzz = _fuzzpos % FUZZTABLE;
+		int fuzzstep = thread->num_cores;
+		int fuzz = (_fuzzpos + thread->skipped_by_thread(yl)) % FUZZTABLE;
 
 #ifndef ORIGINAL_FUZZ
 
@@ -349,6 +367,8 @@ namespace swrenderer
 
 #else
 
+		yl += thread->skipped_by_thread(yl);
+
 		// Handle the case where we would go out of bounds at the top:
 		if (yl < fuzzstep)
 		{
@@ -423,14 +443,20 @@ namespace swrenderer
 
 	/////////////////////////////////////////////////////////////////////////////
 
-	void SWTruecolorDrawers::FillSpan(const SpanDrawerArgs& drawerargs)
+	FillSpanRGBACommand::FillSpanRGBACommand(const SpanDrawerArgs &drawerargs)
 	{
-		int _x1 = drawerargs.DestX1();
-		int _x2 = drawerargs.DestX2();
-		int _y = drawerargs.DestY();
-		uint8_t* RESTRICT _dest = drawerargs.Viewport()->GetDest(_x1, _y);
-		fixed_t _light = drawerargs.Light();
-		int _color = drawerargs.SolidColor();
+		_x1 = drawerargs.DestX1();
+		_x2 = drawerargs.DestX2();
+		_y = drawerargs.DestY();
+		_dest = drawerargs.Viewport()->GetDest(_x1, _y);
+		_light = drawerargs.Light();
+		_color = drawerargs.SolidColor();
+	}
+
+	void FillSpanRGBACommand::Execute(DrawerThread *thread)
+	{
+		if (thread->line_skipped_by_thread(_y))
+			return;
 
 		uint32_t *dest = (uint32_t*)_dest;
 		int count = (_x2 - _x1 + 1);
@@ -442,14 +468,20 @@ namespace swrenderer
 
 	/////////////////////////////////////////////////////////////////////////////
 
-	void SWTruecolorDrawers::DrawFogBoundaryLine(const SpanDrawerArgs& drawerargs)
+	DrawFogBoundaryLineRGBACommand::DrawFogBoundaryLineRGBACommand(const SpanDrawerArgs &drawerargs)
 	{
-		int _y = drawerargs.DestY();
-		int _x = drawerargs.DestX1();
-		int _x2 = drawerargs.DestX2();
-		uint8_t* RESTRICT _line = drawerargs.Viewport()->GetDest(0, _y);
-		fixed_t _light = drawerargs.Light();
-		ShadeConstants constants = drawerargs.ColormapConstants();
+		_y = drawerargs.DestY();
+		_x = drawerargs.DestX1();
+		_x2 = drawerargs.DestX2();
+		_line = drawerargs.Viewport()->GetDest(0, _y);
+		_light = drawerargs.Light();
+		_shade_constants = drawerargs.ColormapConstants();
+	}
+
+	void DrawFogBoundaryLineRGBACommand::Execute(DrawerThread *thread)
+	{
+		if (thread->line_skipped_by_thread(_y))
+			return;
 
 		int y = _y;
 		int x = _x;
@@ -458,6 +490,7 @@ namespace swrenderer
 		uint32_t *dest = (uint32_t*)_line;
 
 		uint32_t light = LightBgra::calc_light_multiplier(_light);
+		ShadeConstants constants = _shade_constants;
 
 		do
 		{
@@ -497,18 +530,32 @@ namespace swrenderer
 
 	/////////////////////////////////////////////////////////////////////////////
 
-	void SWTruecolorDrawers::DrawTiltedSpan(const SpanDrawerArgs& drawerargs, const FVector3& _plane_sz, const FVector3& _plane_su, const FVector3& _plane_sv, bool _plane_shade, int _planeshade, float _planelightfloat, fixed_t _pviewx, fixed_t _pviewy, FDynamicColormap* _basecolormap)
+	DrawTiltedSpanRGBACommand::DrawTiltedSpanRGBACommand(const SpanDrawerArgs &drawerargs, const FVector3 &plane_sz, const FVector3 &plane_su, const FVector3 &plane_sv, bool plane_shade, int planeshade, float planelightfloat, fixed_t pviewx, fixed_t pviewy)
 	{
-		int _x1 = drawerargs.DestX1();
-		int _x2 = drawerargs.DestX2();
-		int _y = drawerargs.DestY();
-		uint8_t* _dest = drawerargs.Viewport()->GetDest(_x1, _y);
-		fixed_t _light = drawerargs.Light();
-		ShadeConstants _shade_constants = drawerargs.ColormapConstants();
-		const uint32_t* _source = (const uint32_t*)drawerargs.TexturePixels();
-		int _xbits = drawerargs.TextureWidthBits();
-		int _ybits = drawerargs.TextureHeightBits();
-		RenderViewport* viewport = drawerargs.Viewport();
+		_x1 = drawerargs.DestX1();
+		_x2 = drawerargs.DestX2();
+		_y = drawerargs.DestY();
+		_dest = drawerargs.Viewport()->GetDest(_x1, _y);
+		_light = drawerargs.Light();
+		_shade_constants = drawerargs.ColormapConstants();
+		_plane_sz = plane_sz;
+		_plane_su = plane_su;
+		_plane_sv = plane_sv;
+		_plane_shade = plane_shade;
+		_planeshade = planeshade;
+		_planelightfloat = planelightfloat;
+		_pviewx = pviewx;
+		_pviewy = pviewy;
+		_source = (const uint32_t*)drawerargs.TexturePixels();
+		_xbits = drawerargs.TextureWidthBits();
+		_ybits = drawerargs.TextureHeightBits();
+		viewport = drawerargs.Viewport();
+	}
+
+	void DrawTiltedSpanRGBACommand::Execute(DrawerThread *thread)
+	{
+		if (thread->line_skipped_by_thread(_y))
+			return;
 
 		//#define SPANSIZE 32
 		//#define INVSPAN 0.03125f
@@ -613,14 +660,20 @@ namespace swrenderer
 
 	/////////////////////////////////////////////////////////////////////////////
 
-	void SWTruecolorDrawers::DrawColoredSpan(const SpanDrawerArgs& drawerargs)
+	DrawColoredSpanRGBACommand::DrawColoredSpanRGBACommand(const SpanDrawerArgs &drawerargs)
 	{
-		int _y = drawerargs.DestY();
-		int _x1 = drawerargs.DestX1();
-		int _x2 = drawerargs.DestX2();
-		uint8_t* RESTRICT _dest = drawerargs.Viewport()->GetDest(_x1, _y);
-		fixed_t _light = drawerargs.Light();
-		int _color = drawerargs.SolidColor();
+		_y = drawerargs.DestY();
+		_x1 = drawerargs.DestX1();
+		_x2 = drawerargs.DestX2();
+		_dest = drawerargs.Viewport()->GetDest(_x1, _y);
+		_light = drawerargs.Light();
+		_color = drawerargs.SolidColor();
+	}
+
+	void DrawColoredSpanRGBACommand::Execute(DrawerThread *thread)
+	{
+		if (thread->line_skipped_by_thread(_y))
+			return;
 
 		int y = _y;
 		int x1 = _x1;
@@ -637,13 +690,12 @@ namespace swrenderer
 	/////////////////////////////////////////////////////////////////////////////
 
 #if 0
-#ifdef NO_SSE
-	void SWTruecolorDrawers::ApplySpecialColormap(FSpecialColormap* colormap, DFrameBuffer* screen)
+	ApplySpecialColormapRGBACommand::ApplySpecialColormapRGBACommand(FSpecialColormap *colormap, DFrameBuffer *screen)
 	{
-		uint8_t* buffer = screen->GetBuffer();
-		int pitch = screen->GetPitch();
-		int width = screen->GetWidth();
-		int height = screen->GetHeight();
+		buffer = screen->GetBuffer();
+		pitch = screen->GetPitch();
+		width = screen->GetWidth();
+		height = screen->GetHeight();
 
 		start_red = (int)(colormap->ColorizeStart[0] * 255);
 		start_green = (int)(colormap->ColorizeStart[1] * 255);
@@ -651,9 +703,13 @@ namespace swrenderer
 		end_red = (int)(colormap->ColorizeEnd[0] * 255);
 		end_green = (int)(colormap->ColorizeEnd[1] * 255);
 		end_blue = (int)(colormap->ColorizeEnd[2] * 255);
+	}
 
-		int y = 0;
-		int count = height;
+#ifdef NO_SSE
+	void ApplySpecialColormapRGBACommand::Execute(DrawerThread *thread)
+	{
+		int y = thread->skipped_by_thread(0);
+		int count = thread->count_for_thread(0, height);
 		while (count > 0)
 		{
 			uint8_t *pixels = buffer + y * pitch * 4;
@@ -678,27 +734,15 @@ namespace swrenderer
 
 				pixels += 4;
 			}
-			y++;
+			y += thread->num_cores;
 			count--;
 		}
 	}
 #else
-	void SWTruecolorDrawers::ApplySpecialColormap(FSpecialColormap* colormap, DFrameBuffer* screen)
+	void ApplySpecialColormapRGBACommand::Execute(DrawerThread *thread)
 	{
-		uint8_t* buffer = screen->GetBuffer();
-		int pitch = screen->GetPitch();
-		int width = screen->GetWidth();
-		int height = screen->GetHeight();
-
-		start_red = (int)(colormap->ColorizeStart[0] * 255);
-		start_green = (int)(colormap->ColorizeStart[1] * 255);
-		start_blue = (int)(colormap->ColorizeStart[2] * 255);
-		end_red = (int)(colormap->ColorizeEnd[0] * 255);
-		end_green = (int)(colormap->ColorizeEnd[1] * 255);
-		end_blue = (int)(colormap->ColorizeEnd[2] * 255);
-
-		int y = 0;
-		int count = height;
+		int y = thread->skipped_by_thread(0);
+		int count = thread->count_for_thread(0, height);
 		__m128i gray_weight = _mm_set_epi16(256, 77, 143, 37, 256, 77, 143, 37);
 		__m128i start_end = _mm_set_epi16(255, start_red, start_green, start_blue, 255, end_red, end_green, end_blue);
 		while (count > 0)
@@ -792,7 +836,7 @@ namespace swrenderer
 				pixels += 4;
 			}
 
-			y++;
+			y += thread->num_cores;
 			count--;
 		}
 	}
@@ -801,21 +845,33 @@ namespace swrenderer
 
 	/////////////////////////////////////////////////////////////////////////////
 
-	void SWTruecolorDrawers::DrawParticleColumn(int x, int _dest_y, int _count, uint32_t _fg, uint32_t _alpha, uint32_t _fracposx)
+	DrawParticleColumnRGBACommand::DrawParticleColumnRGBACommand(uint32_t *dest, int dest_y, int pitch, int count, uint32_t fg, uint32_t alpha, uint32_t fracposx)
 	{
-		uint32_t* dest = (uint32_t*)thread->Viewport->GetDest(x, _dest_y);
-		int pitch = thread->Viewport->RenderTarget->GetPitch();
+		_dest = dest;
+		_pitch = pitch;
+		_count = count;
+		_fg = fg;
+		_alpha = alpha;
+		_fracposx = fracposx;
+		_dest_y = dest_y;
+	}
 
-		int count = _count;
+	void DrawParticleColumnRGBACommand::Execute(DrawerThread *thread)
+	{
+		int count = thread->count_for_thread(_dest_y, _count);
 		if (count <= 0)
 			return;
 
+		uint32_t *dest = thread->dest_for_thread(_dest_y, _pitch, _dest);
+		int pitch = _pitch * thread->num_cores;
+
 		int particle_texture_index = MIN<int>(gl_particles_style, NUM_PARTICLE_TEXTURES - 1);
 		const uint32_t *source = &particle_texture[particle_texture_index][(_fracposx >> FRACBITS) * PARTICLE_TEXTURE_SIZE];
 		uint32_t particle_alpha = _alpha;
 
 		uint32_t fracstep = PARTICLE_TEXTURE_SIZE * FRACUNIT / _count;
-		uint32_t fracpos = fracstep / 2;
+		uint32_t fracpos = fracstep * thread->skipped_by_thread(_dest_y) + fracstep / 2;
+		fracstep *= thread->num_cores;
 
 		uint32_t fg_red = (_fg >> 16) & 0xff;
 		uint32_t fg_green = (_fg >> 8) & 0xff;
@@ -842,208 +898,37 @@ namespace swrenderer
 
 	/////////////////////////////////////////////////////////////////////////////
 
-	void SWTruecolorDrawers::DrawVoxelBlocks(const SpriteDrawerArgs& args, const VoxelBlock* blocks, int blockcount)
+	DrawVoxelBlocksRGBACommand::DrawVoxelBlocksRGBACommand(const SpriteDrawerArgs &args, const VoxelBlock *blocks, int blockcount) : args(args), blocks(blocks), blockcount(blockcount)
+	{
+	}
+
+	void DrawVoxelBlocksRGBACommand::Execute(DrawerThread *thread)
 	{
 		int pitch = args.Viewport()->RenderTarget->GetPitch();
 		uint8_t *destorig = args.Viewport()->RenderTarget->GetPixels();
 
-		SpriteDrawerArgs drawerargs = args;
-		drawerargs.dc_texturefracx = 0;
-		drawerargs.dc_source2 = 0;
+		DrawSprite32Command drawer(args);
+		drawer.args.dc_texturefracx = 0;
+		drawer.args.dc_source2 = 0;
 		for (int i = 0; i < blockcount; i++)
 		{
 			const VoxelBlock &block = blocks[i];
 
 			double v = block.vPos / (double)block.voxelsCount / FRACUNIT;
 			double vstep = block.vStep / (double)block.voxelsCount / FRACUNIT;
-			drawerargs.dc_texturefrac = (int)(v * (1 << 30));
-			drawerargs.dc_iscale = (int)(vstep * (1 << 30));
-			drawerargs.dc_source = block.voxels;
-			drawerargs.dc_textureheight = block.voxelsCount;
-			drawerargs.dc_count = block.height;
-			drawerargs.dc_dest_y = block.y;
-			drawerargs.dc_dest = destorig + (block.x + block.y * pitch) * 4;
+			drawer.args.dc_texturefrac = (int)(v * (1 << 30));
+			drawer.args.dc_iscale = (int)(vstep * (1 << 30));
+			drawer.args.dc_source = block.voxels;
+			drawer.args.dc_textureheight = block.voxelsCount;
+			drawer.args.dc_count = block.height;
+			drawer.args.dc_dest_y = block.y;
+			drawer.args.dc_dest = destorig + (block.x + block.y * pitch) * 4;
 
 			for (int j = 0; j < block.width; j++)
 			{
-				DrawSprite32Command::DrawColumn(drawerargs);
-				drawerargs.dc_dest += 4;
+				drawer.Execute(thread);
+				drawer.args.dc_dest += 4;
 			}
 		}
 	}
-
-	/////////////////////////////////////////////////////////////////////////////
-
-	template<typename DrawerT>
-	void SWTruecolorDrawers::DrawWallColumns(const WallDrawerArgs& wallargs)
-	{
-		wallcolargs.wallargs = &wallargs;
-
-		bool haslights = r_dynlights && wallargs.lightlist;
-		if (haslights)
-		{
-			float dx = wallargs.WallC.tright.X - wallargs.WallC.tleft.X;
-			float dy = wallargs.WallC.tright.Y - wallargs.WallC.tleft.Y;
-			float length = sqrt(dx * dx + dy * dy);
-			wallcolargs.dc_normal.X = dy / length;
-			wallcolargs.dc_normal.Y = -dx / length;
-			wallcolargs.dc_normal.Z = 0.0f;
-		}
-
-		wallcolargs.SetTextureFracBits(wallargs.fracbits);
-
-		float curlight = wallargs.lightpos;
-		float lightstep = wallargs.lightstep;
-		int shade = wallargs.Shade();
-
-		if (wallargs.fixedlight)
-		{
-			curlight = wallargs.FixedLight();
-			lightstep = 0;
-		}
-
-		float upos = wallargs.texcoords.upos, ustepX = wallargs.texcoords.ustepX, ustepY = wallargs.texcoords.ustepY;
-		float vpos = wallargs.texcoords.vpos, vstepX = wallargs.texcoords.vstepX, vstepY = wallargs.texcoords.vstepY;
-		float wpos = wallargs.texcoords.wpos, wstepX = wallargs.texcoords.wstepX, wstepY = wallargs.texcoords.wstepY;
-		float startX = wallargs.texcoords.startX;
-
-		int x1 = wallargs.x1;
-		int x2 = wallargs.x2;
-
-		upos += ustepX * (x1 + 0.5f - startX);
-		vpos += vstepX * (x1 + 0.5f - startX);
-		wpos += wstepX * (x1 + 0.5f - startX);
-
-		float centerY = wallargs.CenterY;
-		centerY -= 0.5f;
-
-		auto uwal = wallargs.uwal;
-		auto dwal = wallargs.dwal;
-		for (int x = x1; x < x2; x++)
-		{
-			int y1 = uwal[x];
-			int y2 = dwal[x];
-			if (y2 > y1)
-			{
-				wallcolargs.SetLight(curlight, shade);
-				if (haslights)
-					SetLights(wallcolargs, x, y1, wallargs);
-				else
-					wallcolargs.dc_num_lights = 0;
-
-				float dy = (y1 - centerY);
-				float u = upos + ustepY * dy;
-				float v = vpos + vstepY * dy;
-				float w = wpos + wstepY * dy;
-				float scaleU = ustepX;
-				float scaleV = vstepY;
-				w = 1.0f / w;
-				u *= w;
-				v *= w;
-				scaleU *= w;
-				scaleV *= w;
-
-				uint32_t texelX = (uint32_t)(int64_t)((u - std::floor(u)) * 0x1'0000'0000LL);
-				uint32_t texelY = (uint32_t)(int64_t)((v - std::floor(v)) * 0x1'0000'0000LL);
-				uint32_t texelStepX = (uint32_t)(int64_t)(scaleU * 0x1'0000'0000LL);
-				uint32_t texelStepY = (uint32_t)(int64_t)(scaleV * 0x1'0000'0000LL);
-
-				DrawWallColumn32<DrawerT>(wallcolargs, x, y1, y2, texelX, texelY, texelStepX, texelStepY);
-			}
-
-			upos += ustepX;
-			vpos += vstepX;
-			wpos += wstepX;
-			curlight += lightstep;
-		}
-
-		if (r_modelscene)
-		{
-			for (int x = x1; x < x2; x++)
-			{
-				int y1 = uwal[x];
-				int y2 = dwal[x];
-				if (y2 > y1)
-				{
-					int count = y2 - y1;
-
-					float w1 = 1.0f / wallargs.WallC.sz1;
-					float w2 = 1.0f / wallargs.WallC.sz2;
-					float t = (x - wallargs.WallC.sx1 + 0.5f) / (wallargs.WallC.sx2 - wallargs.WallC.sx1);
-					float wcol = w1 * (1.0f - t) + w2 * t;
-					float zcol = 1.0f / wcol;
-					float zbufferdepth = 1.0f / (zcol / wallargs.FocalTangent);
-
-					wallcolargs.SetDest(x, y1);
-					wallcolargs.SetCount(count);
-					DrawDepthColumn(wallcolargs, zbufferdepth);
-				}
-			}
-		}
-	}
-
-	template<typename DrawerT>
-	void SWTruecolorDrawers::DrawWallColumn32(WallColumnDrawerArgs& drawerargs, int x, int y1, int y2, uint32_t texelX, uint32_t texelY, uint32_t texelStepX, uint32_t texelStepY)
-	{
-		auto& wallargs = *drawerargs.wallargs;
-		int texwidth = wallargs.texwidth;
-		int texheight = wallargs.texheight;
-
-		double xmagnitude = fabs(static_cast<int32_t>(texelStepX) * (1.0 / 0x1'0000'0000LL));
-		double ymagnitude = fabs(static_cast<int32_t>(texelStepY) * (1.0 / 0x1'0000'0000LL));
-		double magnitude = MAX(ymagnitude, xmagnitude);
-		double min_lod = -1000.0;
-		double lod = MAX(log2(magnitude) + r_lod_bias, min_lod);
-		bool magnifying = lod < 0.0f;
-
-		int mipmap_offset = 0;
-		int mip_width = texwidth;
-		int mip_height = texheight;
-		if (wallargs.mipmapped && mip_width > 1 && mip_height > 1)
-		{
-			int level = (int)lod;
-			while (level > 0 && mip_width > 1 && mip_height > 1)
-			{
-				mipmap_offset += mip_width * mip_height;
-				level--;
-				mip_width = MAX(mip_width >> 1, 1);
-				mip_height = MAX(mip_height >> 1, 1);
-			}
-		}
-
-		const uint32_t* pixels = static_cast<const uint32_t*>(wallargs.texpixels) + mipmap_offset;
-		fixed_t xxoffset = (texelX >> 16) * mip_width;
-
-		const uint8_t* source;
-		const uint8_t* source2;
-		uint32_t texturefracx;
-		bool filter_nearest = (magnifying && !r_magfilter) || (!magnifying && !r_minfilter);
-		if (filter_nearest)
-		{
-			int tx = (xxoffset >> FRACBITS) % mip_width;
-			source = (uint8_t*)(pixels + tx * mip_height);
-			source2 = nullptr;
-			texturefracx = 0;
-		}
-		else
-		{
-			xxoffset -= FRACUNIT / 2;
-			int tx0 = (xxoffset >> FRACBITS) % mip_width;
-			if (tx0 < 0)
-				tx0 += mip_width;
-			int tx1 = (tx0 + 1) % mip_width;
-			source = (uint8_t*)(pixels + tx0 * mip_height);
-			source2 = (uint8_t*)(pixels + tx1 * mip_height);
-			texturefracx = (xxoffset >> (FRACBITS - 4)) & 15;
-		}
-
-		int count = y2 - y1;
-		drawerargs.SetDest(x, y1);
-		drawerargs.SetCount(count);
-		drawerargs.SetTexture(source, source2, mip_height);
-		drawerargs.SetTextureUPos(texturefracx);
-		drawerargs.SetTextureVPos(texelY);
-		drawerargs.SetTextureVStep(texelStepY);
-		DrawerT::DrawColumn(drawerargs);
-	}
 }
diff --git a/src/rendering/swrenderer/drawers/r_draw_rgba.h b/src/rendering/swrenderer/drawers/r_draw_rgba.h
index 0babc563b..2182909ee 100644
--- a/src/rendering/swrenderer/drawers/r_draw_rgba.h
+++ b/src/rendering/swrenderer/drawers/r_draw_rgba.h
@@ -25,7 +25,6 @@
 #include "r_draw.h"
 #include "v_palette.h"
 #include "r_thread.h"
-#include "r_draw_pal.h"
 #include "swrenderer/viewport/r_skydrawer.h"
 #include "swrenderer/viewport/r_spandrawer.h"
 #include "swrenderer/viewport/r_walldrawer.h"
@@ -71,6 +70,125 @@ namespace swrenderer
 	#define VECTORCALL
 	#endif
 
+	class DrawFuzzColumnRGBACommand : public DrawerCommand
+	{
+		int _x;
+		int _yl;
+		int _yh;
+		uint8_t * RESTRICT _destorg;
+		int _pitch;
+		int _fuzzpos;
+		int _fuzzviewheight;
+
+	public:
+		DrawFuzzColumnRGBACommand(const SpriteDrawerArgs &drawerargs);
+		void Execute(DrawerThread *thread) override;
+	};
+
+	class DrawScaledFuzzColumnRGBACommand : public DrawerCommand
+	{
+		int _x;
+		int _yl;
+		int _yh;
+		uint8_t * RESTRICT _destorg;
+		int _pitch;
+		int _fuzzpos;
+		int _fuzzviewheight;
+
+	public:
+		DrawScaledFuzzColumnRGBACommand(const SpriteDrawerArgs &drawerargs);
+		void Execute(DrawerThread *thread) override;
+	};
+
+	class FillSpanRGBACommand : public DrawerCommand
+	{
+		int _x1;
+		int _x2;
+		int _y;
+		uint8_t * RESTRICT _dest;
+		fixed_t _light;
+		int _color;
+
+	public:
+		FillSpanRGBACommand(const SpanDrawerArgs &drawerargs);
+		void Execute(DrawerThread *thread) override;
+	};
+
+	class DrawFogBoundaryLineRGBACommand : public DrawerCommand
+	{
+		int _y;
+		int _x;
+		int _x2;
+		uint8_t * RESTRICT _line;
+		fixed_t _light;
+		ShadeConstants _shade_constants;
+
+	public:
+		DrawFogBoundaryLineRGBACommand(const SpanDrawerArgs &drawerargs);
+		void Execute(DrawerThread *thread) override;
+	};
+
+	class DrawTiltedSpanRGBACommand : public DrawerCommand
+	{
+		int _x1;
+		int _x2;
+		int _y;
+		uint8_t * RESTRICT _dest;
+		fixed_t _light;
+		ShadeConstants _shade_constants;
+		FVector3 _plane_sz;
+		FVector3 _plane_su;
+		FVector3 _plane_sv;
+		bool _plane_shade;
+		int _planeshade;
+		float _planelightfloat;
+		fixed_t _pviewx;
+		fixed_t _pviewy;
+		int _xbits;
+		int _ybits;
+		const uint32_t * RESTRICT _source;
+		RenderViewport *viewport;
+
+	public:
+		DrawTiltedSpanRGBACommand(const SpanDrawerArgs &drawerargs, const FVector3 &plane_sz, const FVector3 &plane_su, const FVector3 &plane_sv, bool plane_shade, int planeshade, float planelightfloat, fixed_t pviewx, fixed_t pviewy);
+		void Execute(DrawerThread *thread) override;
+	};
+
+	class DrawColoredSpanRGBACommand : public DrawerCommand
+	{
+		int _y;
+		int _x1;
+		int _x2;
+		uint8_t * RESTRICT _dest;
+		fixed_t _light;
+		int _color;
+
+	public:
+		DrawColoredSpanRGBACommand(const SpanDrawerArgs &drawerargs);
+
+		void Execute(DrawerThread *thread) override;
+	};
+
+#if 0
+	class ApplySpecialColormapRGBACommand : public DrawerCommand
+	{
+		uint8_t *buffer;
+		int pitch;
+		int width;
+		int height;
+		int start_red;
+		int start_green;
+		int start_blue;
+		int end_red;
+		int end_green;
+		int end_blue;
+
+	public:
+		ApplySpecialColormapRGBACommand(FSpecialColormap *colormap, DFrameBuffer *screen);
+		void Execute(DrawerThread *thread) override;
+	};
+#endif
+
 	template<typename CommandType, typename BlendMode>
 	class DrawerBlendCommand : public CommandType
 	{
@@ -89,6 +207,38 @@ namespace swrenderer
 
 	/////////////////////////////////////////////////////////////////////////////
 
+	class DrawParticleColumnRGBACommand : public DrawerCommand
+	{
+	public:
+		DrawParticleColumnRGBACommand(uint32_t *dest, int dest_y, int pitch, int count, uint32_t fg, uint32_t alpha, uint32_t fracposx);
+		void Execute(DrawerThread *thread) override;
+
+	private:
+		uint32_t *_dest;
+		int _dest_y;
+		int _pitch;
+		int _count;
+		uint32_t _fg;
+		uint32_t _alpha;
+		uint32_t _fracposx;
+	};
+
+	/////////////////////////////////////////////////////////////////////////////
+
+	class DrawVoxelBlocksRGBACommand : public DrawerCommand
+	{
+	public:
+		DrawVoxelBlocksRGBACommand(const SpriteDrawerArgs &args, const VoxelBlock *blocks, int blockcount);
+		void Execute(DrawerThread *thread) override;
+
+	private:
+		SpriteDrawerArgs args;
+		const VoxelBlock *blocks;
+		int blockcount;
+	};
+
+	/////////////////////////////////////////////////////////////////////////////
+
 	class SWTruecolorDrawers : public SWPixelFormatDrawers
 	{
 	public:
@@ -127,19 +277,15 @@ namespace swrenderer
 		void DrawSpanMaskedTranslucent(const SpanDrawerArgs &args) override;
 		void DrawSpanAddClamp(const SpanDrawerArgs &args) override;
 		void DrawSpanMaskedAddClamp(const SpanDrawerArgs &args) override;
-		void FillSpan(const SpanDrawerArgs& args) override;
-		void DrawTiltedSpan(const SpanDrawerArgs& args, const FVector3& plane_sz, const FVector3& plane_su, const FVector3& plane_sv, bool plane_shade, int planeshade, float planelightfloat, fixed_t pviewx, fixed_t pviewy, FDynamicColormap* basecolormap) override;
-		void DrawColoredSpan(const SpanDrawerArgs& args) override;
-		void DrawFogBoundaryLine(const SpanDrawerArgs& args) override;
-		void DrawParticleColumn(int x, int yl, int ycount, uint32_t fg, uint32_t alpha, uint32_t fracposx) override;
+		void FillSpan(const SpanDrawerArgs &args) override { Queue->Push<FillSpanRGBACommand>(args); }
 
-		void DrawScaledFuzzColumn(const SpriteDrawerArgs& args);
-		void DrawUnscaledFuzzColumn(const SpriteDrawerArgs& args);
+		void DrawTiltedSpan(const SpanDrawerArgs &args, const FVector3 &plane_sz, const FVector3 &plane_su, const FVector3 &plane_sv, bool plane_shade, int planeshade, float planelightfloat, fixed_t pviewx, fixed_t pviewy, FDynamicColormap *basecolormap) override
+		{
+			Queue->Push<DrawTiltedSpanRGBACommand>(args, plane_sz, plane_su, plane_sv, plane_shade, planeshade, planelightfloat, pviewx, pviewy);
+		}
 
-		template<typename DrawerT> void DrawWallColumns(const WallDrawerArgs& args);
-		template<typename DrawerT> void DrawWallColumn32(WallColumnDrawerArgs& drawerargs, int x, int y1, int y2, uint32_t texelX, uint32_t texelY, uint32_t texelStepX, uint32_t texelStepY);
-
-		WallColumnDrawerArgs wallcolargs;
+		void DrawColoredSpan(const SpanDrawerArgs &args) override { Queue->Push<DrawColoredSpanRGBACommand>(args); }
+		void DrawFogBoundaryLine(const SpanDrawerArgs &args) override { Queue->Push<DrawFogBoundaryLineRGBACommand>(args); }
 	};
 
 	/////////////////////////////////////////////////////////////////////////////
diff --git a/src/rendering/swrenderer/drawers/r_draw_sky32.h b/src/rendering/swrenderer/drawers/r_draw_sky32.h
index df208afab..bc65e41e2 100644
--- a/src/rendering/swrenderer/drawers/r_draw_sky32.h
+++ b/src/rendering/swrenderer/drawers/r_draw_sky32.h
@@ -27,10 +27,16 @@
 
 namespace swrenderer
 {
-	class DrawSkySingle32Command
+
+	class DrawSkySingle32Command : public DrawerCommand
 	{
+	protected:
+		SkyDrawerArgs args;
+		
 	public:
-		static void DrawColumn(const SkyDrawerArgs& args)
+		DrawSkySingle32Command(const SkyDrawerArgs &args) : args(args) { }
+		
+		void Execute(DrawerThread *thread) override
 		{
 			uint32_t *dest = (uint32_t *)args.Dest();
 			int pitch = args.Viewport()->RenderTarget->GetPitch();
@@ -44,7 +50,9 @@ namespace swrenderer
 			uint32_t solid_bottom = args.SolidBottomColor();
 			bool fadeSky = args.FadeSky();
 
-			int count = args.Count();
+			int num_cores = thread->num_cores;
+			int skipped = thread->skipped_by_thread(args.DestY());
+			int count = skipped + thread->count_for_thread(args.DestY(), args.Count()) * num_cores;
 
 			// Find bands for top solid color, top fade, center textured, bottom fade, bottom solid color:
 			int start_fade = 2; // How fast it should fade out
@@ -58,8 +66,15 @@ namespace swrenderer
 			start_fadebottom_y = clamp(start_fadebottom_y, 0, count);
 			end_fadebottom_y = clamp(end_fadebottom_y, 0, count);
 
+			dest = thread->dest_for_thread(args.DestY(), pitch, dest);
+			frac += fracstep * skipped;
+			fracstep *= num_cores;
+			pitch *= num_cores;
+
 			if (!fadeSky)
 			{
+				int count = thread->count_for_thread(args.DestY(), args.Count());
+
 				for (int index = 0; index < count; index++)
 				{
 					uint32_t sample_index = (((((uint32_t)frac) << 8) >> FRACBITS) * textureheight0) >> FRACBITS;
@@ -74,7 +89,7 @@ namespace swrenderer
 			BgraColor solid_top_fill = solid_top;
 			BgraColor solid_bottom_fill = solid_bottom;
 
-			int index = 0;
+			int index = skipped;
 
 			// Top solid color:
 			while (index < start_fadetop_y)
@@ -82,7 +97,7 @@ namespace swrenderer
 				*dest = solid_top;
 				dest += pitch;
 				frac += fracstep;
-				index++;
+				index += num_cores;
 			}
 
 			// Top fade:
@@ -102,7 +117,7 @@ namespace swrenderer
 
 				frac += fracstep;
 				dest += pitch;
-				index++;
+				index += num_cores;
 			}
 
 			// Textured center:
@@ -113,7 +128,7 @@ namespace swrenderer
 
 				frac += fracstep;
 				dest += pitch;
-				index++;
+				index += num_cores;
 			}
 
 			// Fade bottom:
@@ -133,7 +148,7 @@ namespace swrenderer
 
 				frac += fracstep;
 				dest += pitch;
-				index++;
+				index += num_cores;
 			}
 
 			// Bottom solid color:
@@ -141,15 +156,20 @@ namespace swrenderer
 			{
 				*dest = solid_bottom;
 				dest += pitch;
-				index++;
+				index += num_cores;
 			}
 		}
 	};
 	
-	class DrawSkyDouble32Command
+	class DrawSkyDouble32Command : public DrawerCommand
 	{
+	protected:
+		SkyDrawerArgs args;
+		
 	public:
-		static void DrawColumn(const SkyDrawerArgs& args)
+		DrawSkyDouble32Command(const SkyDrawerArgs &args) : args(args) { }
+		
+		void Execute(DrawerThread *thread) override
 		{
 			uint32_t *dest = (uint32_t *)args.Dest();
 			int pitch = args.Viewport()->RenderTarget->GetPitch();
@@ -161,7 +181,9 @@ namespace swrenderer
 			int32_t frac = args.TextureVPos();
 			int32_t fracstep = args.TextureVStep();
 
-			int count = args.Count();
+			int num_cores = thread->num_cores;
+			int skipped = thread->skipped_by_thread(args.DestY());
+			int count = skipped + thread->count_for_thread(args.DestY(), args.Count()) * num_cores;
 
 			uint32_t solid_top = args.SolidTopColor();
 			uint32_t solid_bottom = args.SolidBottomColor();
@@ -179,8 +201,15 @@ namespace swrenderer
 			start_fadebottom_y = clamp(start_fadebottom_y, 0, count);
 			end_fadebottom_y = clamp(end_fadebottom_y, 0, count);
 
+			dest = thread->dest_for_thread(args.DestY(), pitch, dest);
+			frac += fracstep * skipped;
+			fracstep *= num_cores;
+			pitch *= num_cores;
+
 			if (!fadeSky)
 			{
+				count = thread->count_for_thread(args.DestY(), count);
+
 				for (int index = 0; index < count; index++)
 				{
 					uint32_t sample_index = (((((uint32_t)frac) << 8) >> FRACBITS) * textureheight0) >> FRACBITS;
@@ -202,7 +231,7 @@ namespace swrenderer
 			BgraColor solid_top_fill = solid_top;
 			BgraColor solid_bottom_fill = solid_bottom;
 
-			int index = 0;
+			int index = skipped;
 
 			// Top solid color:
 			while (index < start_fadetop_y)
@@ -210,7 +239,7 @@ namespace swrenderer
 				*dest = solid_top;
 				dest += pitch;
 				frac += fracstep;
-				index++;
+				index += num_cores;
 			}
 
 			// Top fade:
@@ -233,7 +262,7 @@ namespace swrenderer
 
 				frac += fracstep;
 				dest += pitch;
-				index++;
+				index += num_cores;
 			}
 
 			// Textured center:
@@ -250,7 +279,7 @@ namespace swrenderer
 
 				frac += fracstep;
 				dest += pitch;
-				index++;
+				index += num_cores;
 			}
 
 			// Fade bottom:
@@ -273,7 +302,7 @@ namespace swrenderer
 
 				frac += fracstep;
 				dest += pitch;
-				index++;
+				index += num_cores;
 			}
 
 			// Bottom solid color:
@@ -281,7 +310,7 @@ namespace swrenderer
 			{
 				*dest = solid_bottom;
 				dest += pitch;
-				index++;
+				index += num_cores;
 			}
 		}
 	};
diff --git a/src/rendering/swrenderer/drawers/r_draw_sky32_sse2.h b/src/rendering/swrenderer/drawers/r_draw_sky32_sse2.h
index 998833416..64afca87a 100644
--- a/src/rendering/swrenderer/drawers/r_draw_sky32_sse2.h
+++ b/src/rendering/swrenderer/drawers/r_draw_sky32_sse2.h
@@ -27,10 +27,15 @@
 
 namespace swrenderer
 {
-	class DrawSkySingle32Command
+	class DrawSkySingle32Command : public DrawerCommand
 	{
+	protected:
+		SkyDrawerArgs args;
+		
 	public:
-		static void DrawColumn(const SkyDrawerArgs& args)
+		DrawSkySingle32Command(const SkyDrawerArgs &args) : args(args) { }
+		
+		void Execute(DrawerThread *thread) override
 		{
 			uint32_t *dest = (uint32_t *)args.Dest();
 			int pitch = args.Viewport()->RenderTarget->GetPitch();
@@ -44,7 +49,9 @@ namespace swrenderer
 			uint32_t solid_bottom = args.SolidBottomColor();
 			bool fadeSky = args.FadeSky();
 
-			int count = args.Count();
+			int num_cores = thread->num_cores;
+			int skipped = thread->skipped_by_thread(args.DestY());
+			int count = skipped + thread->count_for_thread(args.DestY(), args.Count()) * num_cores;
 
 			// Find bands for top solid color, top fade, center textured, bottom fade, bottom solid color:
 			int start_fade = 2; // How fast it should fade out
@@ -58,8 +65,15 @@ namespace swrenderer
 			start_fadebottom_y = clamp(start_fadebottom_y, 0, count);
 			end_fadebottom_y = clamp(end_fadebottom_y, 0, count);
 
+			dest = thread->dest_for_thread(args.DestY(), pitch, dest);
+			frac += fracstep * skipped;
+			fracstep *= num_cores;
+			pitch *= num_cores;
+
 			if (!fadeSky)
 			{
+				int count = thread->count_for_thread(args.DestY(), args.Count());
+
 				for (int index = 0; index < count; index++)
 				{
 					uint32_t sample_index = (((((uint32_t)frac) << 8) >> FRACBITS) * textureheight0) >> FRACBITS;
@@ -74,7 +88,7 @@ namespace swrenderer
 			__m128i solid_top_fill = _mm_unpacklo_epi8(_mm_cvtsi32_si128(solid_top), _mm_setzero_si128());
 			__m128i solid_bottom_fill = _mm_unpacklo_epi8(_mm_cvtsi32_si128(solid_bottom), _mm_setzero_si128());
 
-			int index = 0;
+			int index = skipped;
 
 			// Top solid color:
 			while (index < start_fadetop_y)
@@ -82,7 +96,7 @@ namespace swrenderer
 				*dest = solid_top;
 				dest += pitch;
 				frac += fracstep;
-				index++;
+				index += num_cores;
 			}
 
 			// Top fade:
@@ -100,7 +114,7 @@ namespace swrenderer
 
 				frac += fracstep;
 				dest += pitch;
-				index++;
+				index += num_cores;
 			}
 
 			// Textured center:
@@ -111,7 +125,7 @@ namespace swrenderer
 
 				frac += fracstep;
 				dest += pitch;
-				index++;
+				index += num_cores;
 			}
 
 			// Fade bottom:
@@ -129,7 +143,7 @@ namespace swrenderer
 
 				frac += fracstep;
 				dest += pitch;
-				index++;
+				index += num_cores;
 			}
 
 			// Bottom solid color:
@@ -137,15 +151,20 @@ namespace swrenderer
 			{
 				*dest = solid_bottom;
 				dest += pitch;
-				index++;
+				index += num_cores;
 			}
 		}
 	};
 	
-	class DrawSkyDouble32Command
+	class DrawSkyDouble32Command : public DrawerCommand
 	{
+	protected:
+		SkyDrawerArgs args;
+		
 	public:
-		static void DrawColumn(const SkyDrawerArgs& args)
+		DrawSkyDouble32Command(const SkyDrawerArgs &args) : args(args) { }
+		
+		void Execute(DrawerThread *thread) override
 		{
 			uint32_t *dest = (uint32_t *)args.Dest();
 			int pitch = args.Viewport()->RenderTarget->GetPitch();
@@ -157,14 +176,35 @@ namespace swrenderer
 			int32_t frac = args.TextureVPos();
 			int32_t fracstep = args.TextureVStep();
 
-			int count = args.Count();
+			int num_cores = thread->num_cores;
+			int skipped = thread->skipped_by_thread(args.DestY());
+			int count = skipped + thread->count_for_thread(args.DestY(), args.Count()) * num_cores;
 
 			uint32_t solid_top = args.SolidTopColor();
 			uint32_t solid_bottom = args.SolidBottomColor();
 			bool fadeSky = args.FadeSky();
+			
+			// Find bands for top solid color, top fade, center textured, bottom fade, bottom solid color:
+			int start_fade = 2; // How fast it should fade out
+			int fade_length = (1 << (24 - start_fade));
+			int start_fadetop_y = (-frac) / fracstep;
+			int end_fadetop_y = (fade_length - frac) / fracstep;
+			int start_fadebottom_y = ((2 << 24) - fade_length - frac) / fracstep;
+			int end_fadebottom_y = ((2 << 24) - frac) / fracstep;
+			start_fadetop_y = clamp(start_fadetop_y, 0, count);
+			end_fadetop_y = clamp(end_fadetop_y, 0, count);
+			start_fadebottom_y = clamp(start_fadebottom_y, 0, count);
+			end_fadebottom_y = clamp(end_fadebottom_y, 0, count);
+
+			dest = thread->dest_for_thread(args.DestY(), pitch, dest);
+			frac += fracstep * skipped;
+			fracstep *= num_cores;
+			pitch *= num_cores;
 
 			if (!fadeSky)
 			{
+				count = thread->count_for_thread(args.DestY(), count);
+
 				for (int index = 0; index < count; index++)
 				{
 					uint32_t sample_index = (((((uint32_t)frac) << 8) >> FRACBITS) * textureheight0) >> FRACBITS;
@@ -183,22 +223,10 @@ namespace swrenderer
 				return;
 			}
 
-			// Find bands for top solid color, top fade, center textured, bottom fade, bottom solid color:
-			int start_fade = 2; // How fast it should fade out
-			int fade_length = (1 << (24 - start_fade));
-			int start_fadetop_y = (-frac) / fracstep;
-			int end_fadetop_y = (fade_length - frac) / fracstep;
-			int start_fadebottom_y = ((2 << 24) - fade_length - frac) / fracstep;
-			int end_fadebottom_y = ((2 << 24) - frac) / fracstep;
-			start_fadetop_y = clamp(start_fadetop_y, 0, count);
-			end_fadetop_y = clamp(end_fadetop_y, 0, count);
-			start_fadebottom_y = clamp(start_fadebottom_y, 0, count);
-			end_fadebottom_y = clamp(end_fadebottom_y, 0, count);
-
 			__m128i solid_top_fill = _mm_unpacklo_epi8(_mm_cvtsi32_si128(solid_top), _mm_setzero_si128());
 			__m128i solid_bottom_fill = _mm_unpacklo_epi8(_mm_cvtsi32_si128(solid_bottom), _mm_setzero_si128());
 
-			int index = 0;
+			int index = skipped;
 
 			// Top solid color:
 			while (index < start_fadetop_y)
@@ -206,7 +234,7 @@ namespace swrenderer
 				*dest = solid_top;
 				dest += pitch;
 				frac += fracstep;
-				index++;
+				index += num_cores;
 			}
 
 			// Top fade:
@@ -229,7 +257,7 @@ namespace swrenderer
 
 				frac += fracstep;
 				dest += pitch;
-				index++;
+				index += num_cores;
 			}
 
 			// Textured center:
@@ -246,7 +274,7 @@ namespace swrenderer
 
 				frac += fracstep;
 				dest += pitch;
-				index++;
+				index += num_cores;
 			}
 
 			// Fade bottom:
@@ -269,7 +297,7 @@ namespace swrenderer
 
 				frac += fracstep;
 				dest += pitch;
-				index++;
+				index += num_cores;
 			}
 
 			// Bottom solid color:
@@ -277,7 +305,7 @@ namespace swrenderer
 			{
 				*dest = solid_bottom;
 				dest += pitch;
-				index++;
+				index += num_cores;
 			}
 		}
 	};
diff --git a/src/rendering/swrenderer/drawers/r_draw_span32.h b/src/rendering/swrenderer/drawers/r_draw_span32.h
index 9534c496b..f0b83c796 100644
--- a/src/rendering/swrenderer/drawers/r_draw_span32.h
+++ b/src/rendering/swrenderer/drawers/r_draw_span32.h
@@ -51,9 +51,14 @@ namespace swrenderer
 	}
 
 	template<typename BlendT>
-	class DrawSpan32T
+	class DrawSpan32T : public DrawerCommand
 	{
+	protected:
+		SpanDrawerArgs args;
+
 	public:
+		DrawSpan32T(const SpanDrawerArgs &drawerargs) : args(drawerargs) { }
+
 		struct TextureData
 		{
 			uint32_t width;
@@ -67,10 +72,12 @@ namespace swrenderer
 			const uint32_t *source;
 		};
 
-		static void DrawColumn(const SpanDrawerArgs& args)
+		void Execute(DrawerThread *thread) override
 		{
 			using namespace DrawSpan32TModes;
 
+			if (thread->line_skipped_by_thread(args.DestY())) return;
+			
 			TextureData texdata;
 			texdata.width = args.TextureWidth();
 			texdata.height = args.TextureHeight();
@@ -112,16 +119,16 @@ namespace swrenderer
 				if (is_nearest_filter)
 				{
 					if (is_64x64)
-						Loop<SimpleShade, NearestFilter, TextureSize64x64>(args, texdata, shade_constants);
+						Loop<SimpleShade, NearestFilter, TextureSize64x64>(thread, texdata, shade_constants);
 					else
-						Loop<SimpleShade, NearestFilter, TextureSizeAny>(args, texdata, shade_constants);
+						Loop<SimpleShade, NearestFilter, TextureSizeAny>(thread, texdata, shade_constants);
 				}
 				else
 				{
 					if (is_64x64)
-						Loop<SimpleShade, LinearFilter, TextureSize64x64>(args, texdata, shade_constants);
+						Loop<SimpleShade, LinearFilter, TextureSize64x64>(thread, texdata, shade_constants);
 					else
-						Loop<SimpleShade, LinearFilter, TextureSizeAny>(args, texdata, shade_constants);
+						Loop<SimpleShade, LinearFilter, TextureSizeAny>(thread, texdata, shade_constants);
 				}
 			}
 			else
@@ -129,22 +136,22 @@ namespace swrenderer
 				if (is_nearest_filter)
 				{
 					if (is_64x64)
-						Loop<AdvancedShade, NearestFilter, TextureSize64x64>(args, texdata, shade_constants);
+						Loop<AdvancedShade, NearestFilter, TextureSize64x64>(thread, texdata, shade_constants);
 					else
-						Loop<AdvancedShade, NearestFilter, TextureSizeAny>(args, texdata, shade_constants);
+						Loop<AdvancedShade, NearestFilter, TextureSizeAny>(thread, texdata, shade_constants);
 				}
 				else
 				{
 					if (is_64x64)
-						Loop<AdvancedShade, LinearFilter, TextureSize64x64>(args, texdata, shade_constants);
+						Loop<AdvancedShade, LinearFilter, TextureSize64x64>(thread, texdata, shade_constants);
 					else
-						Loop<AdvancedShade, LinearFilter, TextureSizeAny>(args, texdata, shade_constants);
+						Loop<AdvancedShade, LinearFilter, TextureSizeAny>(thread, texdata, shade_constants);
 				}
 			}
 		}
 
 		template<typename ShadeModeT, typename FilterModeT, typename TextureSizeT>
-		FORCEINLINE static void Loop(const SpanDrawerArgs& args, TextureData texdata, ShadeConstants shade_constants)
+		FORCEINLINE void Loop(DrawerThread *thread, TextureData texdata, ShadeConstants shade_constants)
 		{
 			using namespace DrawSpan32TModes;
 
@@ -222,7 +229,7 @@ namespace swrenderer
 		}
 
 		template<typename FilterModeT, typename TextureSizeT>
-		FORCEINLINE static uint32_t Sample(uint32_t width, uint32_t height, uint32_t xone, uint32_t yone, uint32_t xstep, uint32_t ystep, uint32_t xfrac, uint32_t yfrac, const uint32_t *source)
+		FORCEINLINE uint32_t Sample(uint32_t width, uint32_t height, uint32_t xone, uint32_t yone, uint32_t xstep, uint32_t ystep, uint32_t xfrac, uint32_t yfrac, const uint32_t *source)
 		{
 			using namespace DrawSpan32TModes;
 
@@ -284,7 +291,7 @@ namespace swrenderer
 		}
 
 		template<typename ShadeModeT>
-		FORCEINLINE static BgraColor Shade(BgraColor fgcolor, uint32_t light, uint32_t desaturate, uint32_t inv_desaturate, BgraColor shade_fade, BgraColor shade_light, const DrawerLight *lights, int num_lights, float viewpos_x)
+		FORCEINLINE BgraColor Shade(BgraColor fgcolor, uint32_t light, uint32_t desaturate, uint32_t inv_desaturate, BgraColor shade_fade, BgraColor shade_light, const DrawerLight *lights, int num_lights, float viewpos_x)
 		{
 			using namespace DrawSpan32TModes;
 
@@ -306,7 +313,7 @@ namespace swrenderer
 			return AddLights(material, fgcolor, lights, num_lights, viewpos_x);
 		}
 
-		FORCEINLINE static BgraColor AddLights(BgraColor material, BgraColor fgcolor, const DrawerLight *lights, int num_lights, float viewpos_x)
+		FORCEINLINE BgraColor AddLights(BgraColor material, BgraColor fgcolor, const DrawerLight *lights, int num_lights, float viewpos_x)
 		{
 			using namespace DrawSpan32TModes;
 
@@ -358,7 +365,7 @@ namespace swrenderer
 			return fgcolor;
 		}
 
-		FORCEINLINE static BgraColor Blend(BgraColor fgcolor, BgraColor bgcolor, uint32_t srcalpha, uint32_t destalpha, unsigned int ifgcolor)
+		FORCEINLINE BgraColor Blend(BgraColor fgcolor, BgraColor bgcolor, uint32_t srcalpha, uint32_t destalpha, unsigned int ifgcolor)
 		{
 			using namespace DrawSpan32TModes;
 
diff --git a/src/rendering/swrenderer/drawers/r_draw_span32_sse2.h b/src/rendering/swrenderer/drawers/r_draw_span32_sse2.h
index 527545a9d..f5823a3eb 100644
--- a/src/rendering/swrenderer/drawers/r_draw_span32_sse2.h
+++ b/src/rendering/swrenderer/drawers/r_draw_span32_sse2.h
@@ -51,9 +51,14 @@ namespace swrenderer
 	}
 
 	template<typename BlendT>
-	class DrawSpan32T
+	class DrawSpan32T : public DrawerCommand
 	{
+	protected:
+		SpanDrawerArgs args;
+
 	public:
+		DrawSpan32T(const SpanDrawerArgs &drawerargs) : args(drawerargs) { }
+
 		struct TextureData
 		{
 			uint32_t width;
@@ -67,10 +72,12 @@ namespace swrenderer
 			const uint32_t *source;
 		};
 
-		static void DrawColumn(const SpanDrawerArgs& args)
+		void Execute(DrawerThread *thread) override
 		{
 			using namespace DrawSpan32TModes;
 
+			if (thread->line_skipped_by_thread(args.DestY())) return;
+			
 			TextureData texdata;
 			texdata.width = args.TextureWidth();
 			texdata.height = args.TextureHeight();
@@ -112,16 +119,16 @@ namespace swrenderer
 				if (is_nearest_filter)
 				{
 					if (is_64x64)
-						Loop<SimpleShade, NearestFilter, TextureSize64x64>(args, texdata, shade_constants);
+						Loop<SimpleShade, NearestFilter, TextureSize64x64>(thread, texdata, shade_constants);
 					else
-						Loop<SimpleShade, NearestFilter, TextureSizeAny>(args, texdata, shade_constants);
+						Loop<SimpleShade, NearestFilter, TextureSizeAny>(thread, texdata, shade_constants);
 				}
 				else
 				{
 					if (is_64x64)
-						Loop<SimpleShade, LinearFilter, TextureSize64x64>(args, texdata, shade_constants);
+						Loop<SimpleShade, LinearFilter, TextureSize64x64>(thread, texdata, shade_constants);
 					else
-						Loop<SimpleShade, LinearFilter, TextureSizeAny>(args, texdata, shade_constants);
+						Loop<SimpleShade, LinearFilter, TextureSizeAny>(thread, texdata, shade_constants);
 				}
 			}
 			else
@@ -129,22 +136,22 @@ namespace swrenderer
 				if (is_nearest_filter)
 				{
 					if (is_64x64)
-						Loop<AdvancedShade, NearestFilter, TextureSize64x64>(args, texdata, shade_constants);
+						Loop<AdvancedShade, NearestFilter, TextureSize64x64>(thread, texdata, shade_constants);
 					else
-						Loop<AdvancedShade, NearestFilter, TextureSizeAny>(args, texdata, shade_constants);
+						Loop<AdvancedShade, NearestFilter, TextureSizeAny>(thread, texdata, shade_constants);
 				}
 				else
 				{
 					if (is_64x64)
-						Loop<AdvancedShade, LinearFilter, TextureSize64x64>(args, texdata, shade_constants);
+						Loop<AdvancedShade, LinearFilter, TextureSize64x64>(thread, texdata, shade_constants);
 					else
-						Loop<AdvancedShade, LinearFilter, TextureSizeAny>(args, texdata, shade_constants);
+						Loop<AdvancedShade, LinearFilter, TextureSizeAny>(thread, texdata, shade_constants);
 				}
 			}
 		}
 
 		template<typename ShadeModeT, typename FilterModeT, typename TextureSizeT>
-		FORCEINLINE static void VECTORCALL Loop(const SpanDrawerArgs& args, TextureData texdata, ShadeConstants shade_constants)
+		FORCEINLINE void VECTORCALL Loop(DrawerThread *thread, TextureData texdata, ShadeConstants shade_constants)
 		{
 			using namespace DrawSpan32TModes;
 
@@ -256,7 +263,7 @@ namespace swrenderer
 		}
 
 		template<typename FilterModeT, typename TextureSizeT>
-		FORCEINLINE static unsigned int VECTORCALL Sample(uint32_t width, uint32_t height, uint32_t xone, uint32_t yone, uint32_t xstep, uint32_t ystep, uint32_t xfrac, uint32_t yfrac, const uint32_t *source)
+		FORCEINLINE unsigned int VECTORCALL Sample(uint32_t width, uint32_t height, uint32_t xone, uint32_t yone, uint32_t xstep, uint32_t ystep, uint32_t xfrac, uint32_t yfrac, const uint32_t *source)
 		{
 			using namespace DrawSpan32TModes;
 
@@ -318,7 +325,7 @@ namespace swrenderer
 		}
 
 		template<typename ShadeModeT>
-		FORCEINLINE static __m128i VECTORCALL Shade(__m128i fgcolor, __m128i mlight, unsigned int ifgcolor0, unsigned int ifgcolor1, int desaturate, __m128i inv_desaturate, __m128i shade_fade, __m128i shade_light, const DrawerLight *lights, int num_lights, __m128 viewpos_x)
+		FORCEINLINE __m128i VECTORCALL Shade(__m128i fgcolor, __m128i mlight, unsigned int ifgcolor0, unsigned int ifgcolor1, int desaturate, __m128i inv_desaturate, __m128i shade_fade, __m128i shade_light, const DrawerLight *lights, int num_lights, __m128 viewpos_x)
 		{
 			using namespace DrawSpan32TModes;
 
@@ -350,7 +357,7 @@ namespace swrenderer
 			return AddLights(material, fgcolor, lights, num_lights, viewpos_x);
 		}
 
-		FORCEINLINE static __m128i VECTORCALL AddLights(__m128i material, __m128i fgcolor, const DrawerLight *lights, int num_lights, __m128 viewpos_x)
+		FORCEINLINE __m128i VECTORCALL AddLights(__m128i material, __m128i fgcolor, const DrawerLight *lights, int num_lights, __m128 viewpos_x)
 		{
 			using namespace DrawSpan32TModes;
 
@@ -399,7 +406,7 @@ namespace swrenderer
 			return fgcolor;
 		}
 
-		FORCEINLINE static __m128i VECTORCALL Blend(__m128i fgcolor, __m128i bgcolor, uint32_t srcalpha, uint32_t destalpha, unsigned int ifgcolor0, unsigned int ifgcolor1)
+		FORCEINLINE __m128i VECTORCALL Blend(__m128i fgcolor, __m128i bgcolor, uint32_t srcalpha, uint32_t destalpha, unsigned int ifgcolor0, unsigned int ifgcolor1)
 		{
 			using namespace DrawSpan32TModes;
 
diff --git a/src/rendering/swrenderer/drawers/r_draw_sprite32.h b/src/rendering/swrenderer/drawers/r_draw_sprite32.h
index 7fa300f82..e40264689 100644
--- a/src/rendering/swrenderer/drawers/r_draw_sprite32.h
+++ b/src/rendering/swrenderer/drawers/r_draw_sprite32.h
@@ -54,10 +54,14 @@ namespace swrenderer
 	}
 
 	template<typename BlendT, typename SamplerT>
-	class DrawSprite32T
+	class DrawSprite32T : public DrawerCommand
 	{
 	public:
-		static void DrawColumn(const SpriteDrawerArgs& args)
+		SpriteDrawerArgs args;
+
+		DrawSprite32T(const SpriteDrawerArgs &drawerargs) : args(drawerargs) { }
+
+		void Execute(DrawerThread *thread) override
 		{
 			using namespace DrawSprite32TModes;
 
@@ -70,33 +74,33 @@ namespace swrenderer
 				if (shade_constants.simple_shade)
 				{
 					if (is_nearest_filter)
-						Loop<SimpleShade, NearestFilter>(args, shade_constants);
+						Loop<SimpleShade, NearestFilter>(thread, shade_constants);
 					else
-						Loop<SimpleShade, LinearFilter>(args, shade_constants);
+						Loop<SimpleShade, LinearFilter>(thread, shade_constants);
 				}
 				else
 				{
 					if (is_nearest_filter)
-						Loop<AdvancedShade, NearestFilter>(args, shade_constants);
+						Loop<AdvancedShade, NearestFilter>(thread, shade_constants);
 					else
-						Loop<AdvancedShade, LinearFilter>(args, shade_constants);
+						Loop<AdvancedShade, LinearFilter>(thread, shade_constants);
 				}
 			}
 			else // no linear filtering for translated, shaded or fill
 			{
 				if (shade_constants.simple_shade)
 				{
-					Loop<SimpleShade, NearestFilter>(args, shade_constants);
+					Loop<SimpleShade, NearestFilter>(thread, shade_constants);
 				}
 				else
 				{
-					Loop<AdvancedShade, NearestFilter>(args, shade_constants);
+					Loop<AdvancedShade, NearestFilter>(thread, shade_constants);
 				}
 			}
 		}
 
 		template<typename ShadeModeT, typename FilterModeT>
-		FORCEINLINE static void Loop(const SpriteDrawerArgs& args, ShadeConstants shade_constants)
+		FORCEINLINE void Loop(DrawerThread *thread, ShadeConstants shade_constants)
 		{
 			using namespace DrawSprite32TModes;
 
@@ -167,8 +171,6 @@ namespace swrenderer
 			}
 
 			int count = args.Count();
-			if (count <= 0) return;
-
 			int pitch = args.Viewport()->RenderTarget->GetPitch();
 			uint32_t fracstep = args.TextureVStep();
 			uint32_t frac = args.TextureVPos();
@@ -176,6 +178,13 @@ namespace swrenderer
 			uint32_t *dest = (uint32_t*)args.Dest();
 			int dest_y = args.DestY();
 
+			count = thread->count_for_thread(dest_y, count);
+			if (count <= 0) return;
+			frac += thread->skipped_by_thread(dest_y) * fracstep;
+			dest = thread->dest_for_thread(dest_y, pitch, dest);
+			fracstep *= thread->num_cores;
+			pitch *= thread->num_cores;
+
 			if (FilterModeT::Mode == (int)FilterModes::Linear)
 			{
 				frac -= one / 2;
@@ -211,7 +220,7 @@ namespace swrenderer
 		}
 
 		template<typename FilterModeT>
-		FORCEINLINE static BgraColor Sample(uint32_t frac, const uint32_t *source, const uint32_t *source2, const uint32_t *translation, int textureheight, uint32_t one, uint32_t texturefracx, uint32_t color, uint32_t srccolor)
+		FORCEINLINE BgraColor Sample(uint32_t frac, const uint32_t *source, const uint32_t *source2, const uint32_t *translation, int textureheight, uint32_t one, uint32_t texturefracx, uint32_t color, uint32_t srccolor)
 		{
 			using namespace DrawSprite32TModes;
 
@@ -260,7 +269,7 @@ namespace swrenderer
 			}
 		}
 
-		FORCEINLINE static uint32_t SampleShade(uint32_t frac, const uint32_t *source, const uint8_t *colormap)
+		FORCEINLINE uint32_t SampleShade(uint32_t frac, const uint32_t *source, const uint8_t *colormap)
 		{
 			using namespace DrawSprite32TModes;
 
@@ -277,7 +286,7 @@ namespace swrenderer
 		}
 
 		template<typename ShadeModeT>
-		FORCEINLINE static BgraColor Shade(BgraColor fgcolor, BgraColor mlight, uint32_t desaturate, uint32_t inv_desaturate, BgraColor shade_fade, BgraColor shade_light, BgraColor lightcontrib)
+		FORCEINLINE BgraColor Shade(BgraColor fgcolor, BgraColor mlight, uint32_t desaturate, uint32_t inv_desaturate, BgraColor shade_fade, BgraColor shade_light, BgraColor lightcontrib)
 		{
 			using namespace DrawSprite32TModes;
 
@@ -307,7 +316,7 @@ namespace swrenderer
 			}
 		}
 
-		FORCEINLINE static BgraColor Blend(BgraColor fgcolor, BgraColor bgcolor, uint32_t ifgcolor, uint32_t ifgshade, uint32_t srcalpha, uint32_t destalpha)
+		FORCEINLINE BgraColor Blend(BgraColor fgcolor, BgraColor bgcolor, uint32_t ifgcolor, uint32_t ifgshade, uint32_t srcalpha, uint32_t destalpha)
 		{
 			using namespace DrawSprite32TModes;
 
diff --git a/src/rendering/swrenderer/drawers/r_draw_sprite32_sse2.h b/src/rendering/swrenderer/drawers/r_draw_sprite32_sse2.h
index d59f1d204..d07dbc585 100644
--- a/src/rendering/swrenderer/drawers/r_draw_sprite32_sse2.h
+++ b/src/rendering/swrenderer/drawers/r_draw_sprite32_sse2.h
@@ -54,10 +54,14 @@ namespace swrenderer
 	}
 
 	template<typename BlendT, typename SamplerT>
-	class DrawSprite32T
+	class DrawSprite32T : public DrawerCommand
 	{
 	public:
-		static void DrawColumn(const SpriteDrawerArgs& args)
+		SpriteDrawerArgs args;
+
+		DrawSprite32T(const SpriteDrawerArgs &drawerargs) : args(drawerargs) { }
+
+		void Execute(DrawerThread *thread) override
 		{
 			using namespace DrawSprite32TModes;
 
@@ -70,33 +74,33 @@ namespace swrenderer
 				if (shade_constants.simple_shade)
 				{
 					if (is_nearest_filter)
-						Loop<SimpleShade, NearestFilter>(args, shade_constants);
+						Loop<SimpleShade, NearestFilter>(thread, shade_constants);
 					else
-						Loop<SimpleShade, LinearFilter>(args, shade_constants);
+						Loop<SimpleShade, LinearFilter>(thread, shade_constants);
 				}
 				else
 				{
 					if (is_nearest_filter)
-						Loop<AdvancedShade, NearestFilter>(args, shade_constants);
+						Loop<AdvancedShade, NearestFilter>(thread, shade_constants);
 					else
-						Loop<AdvancedShade, LinearFilter>(args, shade_constants);
+						Loop<AdvancedShade, LinearFilter>(thread, shade_constants);
 				}
 			}
 			else // no linear filtering for translated, shaded or fill
 			{
 				if (shade_constants.simple_shade)
 				{
-					Loop<SimpleShade, NearestFilter>(args, shade_constants);
+					Loop<SimpleShade, NearestFilter>(thread, shade_constants);
 				}
 				else
 				{
-					Loop<AdvancedShade, NearestFilter>(args, shade_constants);
+					Loop<AdvancedShade, NearestFilter>(thread, shade_constants);
 				}
 			}
 		}
 
 		template<typename ShadeModeT, typename FilterModeT>
-		FORCEINLINE static void VECTORCALL Loop(const SpriteDrawerArgs& args, ShadeConstants shade_constants)
+		FORCEINLINE void VECTORCALL Loop(DrawerThread *thread, ShadeConstants shade_constants)
 		{
 			using namespace DrawSprite32TModes;
 
@@ -158,7 +162,6 @@ namespace swrenderer
 			}
 
 			int count = args.Count();
-			if (count <= 0) return;
 			int pitch = args.Viewport()->RenderTarget->GetPitch();
 			uint32_t fracstep = args.TextureVStep();
 			uint32_t frac = args.TextureVPos();
@@ -166,6 +169,13 @@ namespace swrenderer
 			uint32_t *dest = (uint32_t*)args.Dest();
 			int dest_y = args.DestY();
 
+			count = thread->count_for_thread(dest_y, count);
+			if (count <= 0) return;
+			frac += thread->skipped_by_thread(dest_y) * fracstep;
+			dest = thread->dest_for_thread(dest_y, pitch, dest);
+			fracstep *= thread->num_cores;
+			pitch *= thread->num_cores;
+
 			if (FilterModeT::Mode == (int)FilterModes::Linear)
 			{
 				frac -= one / 2;
@@ -245,7 +255,7 @@ namespace swrenderer
 		}
 
 		template<typename FilterModeT>
-		FORCEINLINE static unsigned int VECTORCALL Sample(uint32_t frac, const uint32_t *source, const uint32_t *source2, const uint32_t *translation, int textureheight, uint32_t one, uint32_t texturefracx, uint32_t color, uint32_t srccolor)
+		FORCEINLINE unsigned int VECTORCALL Sample(uint32_t frac, const uint32_t *source, const uint32_t *source2, const uint32_t *translation, int textureheight, uint32_t one, uint32_t texturefracx, uint32_t color, uint32_t srccolor)
 		{
 			using namespace DrawSprite32TModes;
 
@@ -294,7 +304,7 @@ namespace swrenderer
 			}
 		}
 
-		FORCEINLINE static unsigned int VECTORCALL SampleShade(uint32_t frac, const uint32_t *source, const uint8_t *colormap)
+		FORCEINLINE unsigned int VECTORCALL SampleShade(uint32_t frac, const uint32_t *source, const uint8_t *colormap)
 		{
 			using namespace DrawSprite32TModes;
 
@@ -311,7 +321,7 @@ namespace swrenderer
 		}
 
 		template<typename ShadeModeT>
-		FORCEINLINE static __m128i VECTORCALL Shade(__m128i fgcolor, __m128i mlight, unsigned int ifgcolor0, unsigned int ifgcolor1, int desaturate, __m128i inv_desaturate, __m128i shade_fade, __m128i shade_light, __m128i lightcontrib)
+		FORCEINLINE __m128i VECTORCALL Shade(__m128i fgcolor, __m128i mlight, unsigned int ifgcolor0, unsigned int ifgcolor1, int desaturate, __m128i inv_desaturate, __m128i shade_fade, __m128i shade_light, __m128i lightcontrib)
 		{
 			using namespace DrawSprite32TModes;
 
@@ -350,7 +360,7 @@ namespace swrenderer
 			}
 		}
 
-		FORCEINLINE static __m128i VECTORCALL Blend(__m128i fgcolor, __m128i bgcolor, unsigned int ifgcolor0, unsigned int ifgcolor1, unsigned int ifgshade0, unsigned int ifgshade1, uint32_t srcalpha, uint32_t destalpha)
+		FORCEINLINE __m128i VECTORCALL Blend(__m128i fgcolor, __m128i bgcolor, unsigned int ifgcolor0, unsigned int ifgcolor1, unsigned int ifgshade0, unsigned int ifgshade1, uint32_t srcalpha, uint32_t destalpha)
 		{
 			using namespace DrawSprite32TModes;
 
diff --git a/src/rendering/swrenderer/drawers/r_draw_wall32.h b/src/rendering/swrenderer/drawers/r_draw_wall32.h
index 65f7025b2..2f4012901 100644
--- a/src/rendering/swrenderer/drawers/r_draw_wall32.h
+++ b/src/rendering/swrenderer/drawers/r_draw_wall32.h
@@ -47,10 +47,12 @@ namespace swrenderer
 	}
 
 	template<typename BlendT>
-	class DrawWall32T
+	class DrawWall32T : public DrawWallCommand
 	{
 	public:
-		static void DrawColumn(const WallColumnDrawerArgs& args)
+		DrawWall32T(const WallDrawerArgs &drawerargs) : DrawWallCommand(drawerargs) { }
+
+		void DrawColumn(DrawerThread *thread, const WallColumnDrawerArgs& args) override
 		{
 			using namespace DrawWall32TModes;
 
@@ -60,27 +62,24 @@ namespace swrenderer
 			if (shade_constants.simple_shade)
 			{
 				if (is_nearest_filter)
-					Loop<SimpleShade, NearestFilter>(args, shade_constants);
+					Loop<SimpleShade, NearestFilter>(thread, args, shade_constants);
 				else
-					Loop<SimpleShade, LinearFilter>(args, shade_constants);
+					Loop<SimpleShade, LinearFilter>(thread, args, shade_constants);
 			}
 			else
 			{
 				if (is_nearest_filter)
-					Loop<AdvancedShade, NearestFilter>(args, shade_constants);
+					Loop<AdvancedShade, NearestFilter>(thread, args, shade_constants);
 				else
-					Loop<AdvancedShade, LinearFilter>(args, shade_constants);
+					Loop<AdvancedShade, LinearFilter>(thread, args, shade_constants);
 			}
 		}
 
 		template<typename ShadeModeT, typename FilterModeT>
-		FORCEINLINE static void Loop(const WallColumnDrawerArgs& args, ShadeConstants shade_constants)
+		FORCEINLINE void Loop(DrawerThread *thread, const WallColumnDrawerArgs& args, ShadeConstants shade_constants)
 		{
 			using namespace DrawWall32TModes;
 
-			int count = args.Count();
-			if (count <= 0) return;
-
 			const uint32_t *source = (const uint32_t*)args.TexturePixels();
 			const uint32_t *source2 = (const uint32_t*)args.TexturePixels2();
 			int textureheight = args.TextureHeight();
@@ -116,6 +115,7 @@ namespace swrenderer
 				desaturate = 0;
 			}
 
+			int count = args.Count();
 			int pitch = args.Viewport()->RenderTarget->GetPitch();
 			uint32_t fracstep = args.TextureVStep();
 			uint32_t frac = args.TextureVPos();
@@ -125,8 +125,15 @@ namespace swrenderer
 
 			auto lights = args.dc_lights;
 			auto num_lights = args.dc_num_lights;
-			float viewpos_z = args.dc_viewpos.Z;
-			float step_viewpos_z = args.dc_viewpos_step.Z;
+			float viewpos_z = args.dc_viewpos.Z + args.dc_viewpos_step.Z * thread->skipped_by_thread(dest_y);
+			float step_viewpos_z = args.dc_viewpos_step.Z * thread->num_cores;
+
+			count = thread->count_for_thread(dest_y, count);
+			if (count <= 0) return;
+			frac += thread->skipped_by_thread(dest_y) * fracstep;
+			dest = thread->dest_for_thread(dest_y, pitch, dest);
+			fracstep *= thread->num_cores;
+			pitch *= thread->num_cores;
 
 			if (FilterModeT::Mode == (int)FilterModes::Linear)
 			{
@@ -160,7 +167,7 @@ namespace swrenderer
 		}
 
 		template<typename FilterModeT>
-		FORCEINLINE static BgraColor Sample(uint32_t frac, const uint32_t *source, const uint32_t *source2, int textureheight, uint32_t one, uint32_t texturefracx)
+		FORCEINLINE BgraColor Sample(uint32_t frac, const uint32_t *source, const uint32_t *source2, int textureheight, uint32_t one, uint32_t texturefracx)
 		{
 			using namespace DrawWall32TModes;
 
@@ -196,7 +203,7 @@ namespace swrenderer
 		}
 
 		template<typename ShadeModeT>
-		FORCEINLINE static BgraColor Shade(BgraColor fgcolor, uint32_t light, uint32_t desaturate, uint32_t inv_desaturate, BgraColor shade_fade, BgraColor shade_light, const DrawerLight *lights, int num_lights, float viewpos_z)
+		FORCEINLINE BgraColor Shade(BgraColor fgcolor, uint32_t light, uint32_t desaturate, uint32_t inv_desaturate, BgraColor shade_fade, BgraColor shade_light, const DrawerLight *lights, int num_lights, float viewpos_z)
 		{
 			using namespace DrawWall32TModes;
 
@@ -218,7 +225,7 @@ namespace swrenderer
 			return AddLights(material, fgcolor, lights, num_lights, viewpos_z);
 		}
 
-		FORCEINLINE static BgraColor AddLights(BgraColor material, BgraColor fgcolor, const DrawerLight *lights, int num_lights, float viewpos_z)
+		FORCEINLINE BgraColor AddLights(BgraColor material, BgraColor fgcolor, const DrawerLight *lights, int num_lights, float viewpos_z)
 		{
 			using namespace DrawWall32TModes;
 
@@ -270,7 +277,7 @@ namespace swrenderer
 			return fgcolor;
 		}
 
-		FORCEINLINE static BgraColor Blend(BgraColor fgcolor, BgraColor bgcolor, unsigned int ifgcolor, uint32_t srcalpha, uint32_t destalpha)
+		FORCEINLINE BgraColor Blend(BgraColor fgcolor, BgraColor bgcolor, unsigned int ifgcolor, uint32_t srcalpha, uint32_t destalpha)
 		{
 			using namespace DrawWall32TModes;
 
diff --git a/src/rendering/swrenderer/drawers/r_draw_wall32_sse2.h b/src/rendering/swrenderer/drawers/r_draw_wall32_sse2.h
index 4ea8d4d75..5316a9e03 100644
--- a/src/rendering/swrenderer/drawers/r_draw_wall32_sse2.h
+++ b/src/rendering/swrenderer/drawers/r_draw_wall32_sse2.h
@@ -47,10 +47,12 @@ namespace swrenderer
 	}
 
 	template<typename BlendT>
-	class DrawWall32T
+	class DrawWall32T : public DrawWallCommand
 	{
 	public:
-		static void DrawColumn(const WallColumnDrawerArgs& args)
+		DrawWall32T(const WallDrawerArgs &drawerargs) : DrawWallCommand(drawerargs) { }
+
+		void DrawColumn(DrawerThread *thread, const WallColumnDrawerArgs& args) override
 		{
 			using namespace DrawWall32TModes;
 
@@ -60,21 +62,21 @@ namespace swrenderer
 			if (shade_constants.simple_shade)
 			{
 				if (is_nearest_filter)
-					Loop<SimpleShade, NearestFilter>(args, shade_constants);
+					Loop<SimpleShade, NearestFilter>(thread, args, shade_constants);
 				else
-					Loop<SimpleShade, LinearFilter>(args, shade_constants);
+					Loop<SimpleShade, LinearFilter>(thread, args, shade_constants);
 			}
 			else
 			{
 				if (is_nearest_filter)
-					Loop<AdvancedShade, NearestFilter>(args, shade_constants);
+					Loop<AdvancedShade, NearestFilter>(thread, args, shade_constants);
 				else
-					Loop<AdvancedShade, LinearFilter>(args, shade_constants);
+					Loop<AdvancedShade, LinearFilter>(thread, args, shade_constants);
 			}
 		}
 
 		template<typename ShadeModeT, typename FilterModeT>
-		FORCEINLINE static void VECTORCALL Loop(const WallColumnDrawerArgs& args, ShadeConstants shade_constants)
+		FORCEINLINE void VECTORCALL Loop(DrawerThread *thread, const WallColumnDrawerArgs& args, ShadeConstants shade_constants)
 		{
 			using namespace DrawWall32TModes;
 
@@ -108,8 +110,6 @@ namespace swrenderer
 			}
 
 			int count = args.Count();
-			if (count <= 0) return;
-
 			int pitch = args.Viewport()->RenderTarget->GetPitch();
 			uint32_t fracstep = args.TextureVStep();
 			uint32_t frac = args.TextureVPos();
@@ -119,11 +119,18 @@ namespace swrenderer
 
 			auto lights = args.dc_lights;
 			auto num_lights = args.dc_num_lights;
-			float vpz = args.dc_viewpos.Z;
-			float stepvpz = args.dc_viewpos_step.Z;
+			float vpz = args.dc_viewpos.Z + args.dc_viewpos_step.Z * thread->skipped_by_thread(dest_y);
+			float stepvpz = args.dc_viewpos_step.Z * thread->num_cores;
 			__m128 viewpos_z = _mm_setr_ps(vpz, vpz + stepvpz, 0.0f, 0.0f);
 			__m128 step_viewpos_z = _mm_set1_ps(stepvpz * 2.0f);
 
+			count = thread->count_for_thread(dest_y, count);
+			if (count <= 0) return;
+			frac += thread->skipped_by_thread(dest_y) * fracstep;
+			dest = thread->dest_for_thread(dest_y, pitch, dest);
+			fracstep *= thread->num_cores;
+			pitch *= thread->num_cores;
+
 			if (FilterModeT::Mode == (int)FilterModes::Linear)
 			{
 				frac -= one / 2;
@@ -196,7 +203,7 @@ namespace swrenderer
 		}
 
 		template<typename FilterModeT>
-		FORCEINLINE static unsigned int VECTORCALL Sample(uint32_t frac, const uint32_t *source, const uint32_t *source2, int textureheight, uint32_t one, uint32_t texturefracx)
+		FORCEINLINE unsigned int VECTORCALL Sample(uint32_t frac, const uint32_t *source, const uint32_t *source2, int textureheight, uint32_t one, uint32_t texturefracx)
 		{
 			using namespace DrawWall32TModes;
 
@@ -232,7 +239,7 @@ namespace swrenderer
 		}
 
 		template<typename ShadeModeT>
-		FORCEINLINE static __m128i VECTORCALL Shade(__m128i fgcolor, __m128i mlight, unsigned int ifgcolor0, unsigned int ifgcolor1, int desaturate, __m128i inv_desaturate, __m128i shade_fade, __m128i shade_light, const DrawerLight *lights, int num_lights, __m128 viewpos_z)
+		FORCEINLINE __m128i VECTORCALL Shade(__m128i fgcolor, __m128i mlight, unsigned int ifgcolor0, unsigned int ifgcolor1, int desaturate, __m128i inv_desaturate, __m128i shade_fade, __m128i shade_light, const DrawerLight *lights, int num_lights, __m128 viewpos_z)
 		{
 			using namespace DrawWall32TModes;
 
@@ -264,7 +271,7 @@ namespace swrenderer
 			return AddLights(material, fgcolor, lights, num_lights, viewpos_z);
 		}
 
-		FORCEINLINE static __m128i VECTORCALL AddLights(__m128i material, __m128i fgcolor, const DrawerLight *lights, int num_lights, __m128 viewpos_z)
+		FORCEINLINE __m128i VECTORCALL AddLights(__m128i material, __m128i fgcolor, const DrawerLight *lights, int num_lights, __m128 viewpos_z)
 		{
 			using namespace DrawWall32TModes;
 
@@ -313,7 +320,7 @@ namespace swrenderer
 			return fgcolor;
 		}
 
-		FORCEINLINE static __m128i VECTORCALL Blend(__m128i fgcolor, __m128i bgcolor, unsigned int ifgcolor0, unsigned int ifgcolor1, uint32_t srcalpha, uint32_t destalpha)
+		FORCEINLINE __m128i VECTORCALL Blend(__m128i fgcolor, __m128i bgcolor, unsigned int ifgcolor0, unsigned int ifgcolor1, uint32_t srcalpha, uint32_t destalpha)
 		{
 			using namespace DrawWall32TModes;
 
diff --git a/src/rendering/swrenderer/line/r_walldraw.cpp b/src/rendering/swrenderer/line/r_walldraw.cpp
index 1d4a4c64c..61945fefb 100644
--- a/src/rendering/swrenderer/line/r_walldraw.cpp
+++ b/src/rendering/swrenderer/line/r_walldraw.cpp
@@ -81,6 +81,8 @@ namespace swrenderer
 		mLight.SetColormap(lightsector, curline);
 		mLight.SetLightLeft(Thread, WallC);
 
+		Thread->PrepareTexture(pic, DefaultRenderStyle()); // Get correct render style? Shaded won't get here.
+
 		CameraLight* cameraLight = CameraLight::Instance();
 		if (cameraLight->FixedColormap() || cameraLight->FixedLightLevel() >= 0 || !(lightsector->e && lightsector->e->XFloor.lightlist.Size()))
 		{
diff --git a/src/rendering/swrenderer/plane/r_skyplane.cpp b/src/rendering/swrenderer/plane/r_skyplane.cpp
index 48639768a..cb1069913 100644
--- a/src/rendering/swrenderer/plane/r_skyplane.cpp
+++ b/src/rendering/swrenderer/plane/r_skyplane.cpp
@@ -59,6 +59,8 @@ CVAR(Bool, r_linearsky, false, CVAR_ARCHIVE | CVAR_GLOBALCONFIG);
 EXTERN_CVAR(Int, r_skymode)
 EXTERN_CVAR(Bool, cl_oldfreelooklimit)
 
+std::pair<PalEntry, PalEntry>& R_GetSkyCapColor(FGameTexture* tex);
+
 namespace swrenderer
 {
 	static FSoftwareTexture *GetSWTex(FTextureID texid, bool allownull = true)
@@ -214,6 +216,9 @@ namespace swrenderer
 
 		drawerargs.SetStyle();
 
+		Thread->PrepareTexture(frontskytex, DefaultRenderStyle());
+		Thread->PrepareTexture(backskytex, DefaultRenderStyle());
+
 		DrawSky(pl);
 	}
 
@@ -250,8 +255,6 @@ namespace swrenderer
 		angle1 = UMulScale16(ang, frontcyl) + frontpos;
 		angle2 = UMulScale16(ang, backcyl) + backpos;
 
-		auto skycapcolors = Thread->GetSkyCapColor(frontskytex);
-
 		drawerargs.SetFrontTexture(Thread, frontskytex, angle1);
 		drawerargs.SetBackTexture(Thread, backskytex, angle2);
 		drawerargs.SetTextureVStep(uv_step);
@@ -259,8 +262,9 @@ namespace swrenderer
 		drawerargs.SetDest(viewport, start_x, y1);
 		drawerargs.SetCount(y2 - y1);
 		drawerargs.SetFadeSky(r_skymode == 2 && !(Level->flags & LEVEL_FORCETILEDSKY));
-		drawerargs.SetSolidTop(skycapcolors.first);
-		drawerargs.SetSolidBottom(skycapcolors.second);
+		auto& col = R_GetSkyCapColor(frontskytex->GetTexture());
+		drawerargs.SetSolidTop(col.first);
+		drawerargs.SetSolidBottom(col.second);
 
 		if (!backskytex)
 			drawerargs.DrawSingleSkyColumn(Thread);
diff --git a/src/rendering/swrenderer/r_renderthread.cpp b/src/rendering/swrenderer/r_renderthread.cpp
index 4c3e4d2c6..f2fb7bb99 100644
--- a/src/rendering/swrenderer/r_renderthread.cpp
+++ b/src/rendering/swrenderer/r_renderthread.cpp
@@ -53,9 +53,6 @@
 #include "swrenderer/drawers/r_draw_pal.h"
 #include "swrenderer/viewport/r_viewport.h"
 #include "r_memory.h"
-#include "common/rendering/polyrenderer/drawers/poly_thread.h"
-
-std::pair<PalEntry, PalEntry>& R_GetSkyCapColor(FGameTexture* tex);
 
 namespace swrenderer
 {
@@ -66,6 +63,7 @@ namespace swrenderer
 		FrameMemory.reset(new RenderMemory());
 		Viewport.reset(new RenderViewport());
 		Light.reset(new LightVisibility());
+		DrawQueue.reset(new DrawerCommandQueue(FrameMemory.get()));
 		OpaquePass.reset(new RenderOpaquePass(this));
 		TranslucentPass.reset(new RenderTranslucentPass(this));
 		SpriteList.reset(new VisibleSpriteList());
@@ -75,9 +73,8 @@ namespace swrenderer
 		PlaneList.reset(new VisiblePlaneList(this));
 		DrawSegments.reset(new DrawSegmentList(this));
 		ClipSegments.reset(new RenderClipSegment());
-		Poly.reset(new PolyTriangleThreadData(0, 1, 0, 1, 0, screen->GetHeight()));
-		tc_drawers.reset(new SWTruecolorDrawers(this));
-		pal_drawers.reset(new SWPalDrawers(this));
+		tc_drawers.reset(new SWTruecolorDrawers(DrawQueue));
+		pal_drawers.reset(new SWPalDrawers(DrawQueue));
 	}
 
 	RenderThread::~RenderThread()
@@ -92,13 +89,33 @@ namespace swrenderer
 			return pal_drawers.get();
 	}
 
-	std::mutex loadmutex;
+	static std::mutex loadmutex;
+	void RenderThread::PrepareTexture(FSoftwareTexture *texture, FRenderStyle style)	{
+		if (texture == nullptr)
+			return;
+
+		// Textures may not have loaded/refreshed yet. The shared code doing
+		// this is not thread safe. By calling GetPixels in a mutex lock we
+		// make sure that only one thread is loading a texture at any given
+		// time.
+		//
+		// It is critical that this function is called before any direct
+		// calls to GetPixels for this to work.
 
-	std::pair<PalEntry, PalEntry> RenderThread::GetSkyCapColor(FSoftwareTexture* tex)
-	{
 		std::unique_lock<std::mutex> lock(loadmutex);
-		std::pair<PalEntry, PalEntry> colors = R_GetSkyCapColor(tex->GetTexture());
-		return colors;
+
+		const FSoftwareTextureSpan *spans;
+		if (Viewport->RenderTarget->IsBgra())
+		{
+			texture->GetPixelsBgra();
+			texture->GetColumnBgra(0, &spans);
+		}
+		else
+		{
+			bool alpha = !!(style.Flags & STYLEF_RedIsAlpha);
+			texture->GetPixels(alpha);
+			texture->GetColumn(alpha, 0, &spans);
+	}
 	}
 
 	static std::mutex polyobjmutex;
diff --git a/src/rendering/swrenderer/r_renderthread.h b/src/rendering/swrenderer/r_renderthread.h
index ba862c775..5e7b4f818 100644
--- a/src/rendering/swrenderer/r_renderthread.h
+++ b/src/rendering/swrenderer/r_renderthread.h
@@ -25,8 +25,9 @@
 #include <memory>
 #include <thread>
 
+class DrawerCommandQueue;
+typedef std::shared_ptr<DrawerCommandQueue> DrawerCommandQueuePtr;
 class RenderMemory;
-class PolyTriangleThreadData;
 struct FDynamicLight;
 
 EXTERN_CVAR(Bool, r_models);
@@ -50,7 +51,6 @@ namespace swrenderer
 	class SWPixelFormatDrawers;
 	class SWTruecolorDrawers;
 	class SWPalDrawers;
-	class WallColumnDrawerArgs;
 
 	class RenderThread
 	{
@@ -75,7 +75,7 @@ namespace swrenderer
 		std::unique_ptr<RenderClipSegment> ClipSegments;
 		std::unique_ptr<RenderViewport> Viewport;
 		std::unique_ptr<LightVisibility> Light;
-		std::unique_ptr<PolyTriangleThreadData> Poly;
+		DrawerCommandQueuePtr DrawQueue;
 
 		TArray<FDynamicLight*> AddedLightsArray;
 
@@ -87,11 +87,11 @@ namespace swrenderer
 
 		SWPixelFormatDrawers *Drawers(RenderViewport *viewport);
 
+		// Make sure texture can accessed safely
+		void PrepareTexture(FSoftwareTexture *texture, FRenderStyle style);
+
 		// Setup poly object in a threadsafe manner
 		void PreparePolyObject(subsector_t *sub);
-
-		// Retrieve skycap color in a threadsafe way
-		std::pair<PalEntry, PalEntry> GetSkyCapColor(FSoftwareTexture* tex);
 		
 	private:
 		std::unique_ptr<SWTruecolorDrawers> tc_drawers;
diff --git a/src/rendering/swrenderer/scene/r_scene.cpp b/src/rendering/swrenderer/scene/r_scene.cpp
index 2558a2f2a..7c6e6f6a3 100644
--- a/src/rendering/swrenderer/scene/r_scene.cpp
+++ b/src/rendering/swrenderer/scene/r_scene.cpp
@@ -66,14 +66,14 @@ void PeekThreadedErrorPane();
 EXTERN_CVAR(Int, r_clearbuffer)
 EXTERN_CVAR(Int, r_debug_draw)
 
-CVAR(Int, r_scene_multithreaded, 1, 0);
+CVAR(Int, r_scene_multithreaded, 0, 0);
 CVAR(Bool, r_models, true, CVAR_ARCHIVE | CVAR_GLOBALCONFIG);
 
 bool r_modelscene = false;
 
 namespace swrenderer
 {
-	cycle_t WallCycles, PlaneCycles, MaskedCycles;
+	cycle_t WallCycles, PlaneCycles, MaskedCycles, DrawerWaitCycles;
 	
 	RenderScene::RenderScene()
 	{
@@ -131,6 +131,7 @@ namespace swrenderer
 				for (int i = 0; i < size; i++)
 					dest[i] = bgracolor.d;
 			}
+			DrawerThreads::ResetDebugDrawPos();
 		}
 
 		RenderActorView(player->mo, true, false);
@@ -140,8 +141,11 @@ namespace swrenderer
 			auto copyqueue = std::make_shared<DrawerCommandQueue>(MainThread()->FrameMemory.get());
 			copyqueue->Push<MemcpyCommand>(videobuffer, bufferpitch, target->GetPixels(), target->GetWidth(), target->GetHeight(), target->GetPitch(), target->IsBgra() ? 4 : 1);
 			DrawerThreads::Execute(copyqueue);
-			DrawerThreads::WaitForWorkers();
 		}
+
+		DrawerWaitCycles.Clock();
+		DrawerThreads::WaitForWorkers();
+		DrawerWaitCycles.Unclock();
 	}
 
 	void RenderScene::RenderActorView(AActor *actor, bool renderPlayerSprites, bool dontmaplines)
@@ -149,6 +153,7 @@ namespace swrenderer
 		WallCycles.Reset();
 		PlaneCycles.Reset();
 		MaskedCycles.Reset();
+		DrawerWaitCycles.Reset();
 		
 		R_SetupFrame(MainThread()->Viewport->viewpoint, MainThread()->Viewport->viewwindow, actor);
 
@@ -189,7 +194,14 @@ namespace swrenderer
 
 	void RenderScene::RenderPSprites()
 	{
+		// Player sprites needs to be rendered after all the slices because they may be hardware accelerated.
+		// If they are not hardware accelerated the drawers must run after all sliced drawers finished.
+		DrawerWaitCycles.Clock();
+		DrawerThreads::WaitForWorkers();
+		DrawerWaitCycles.Unclock();
+		MainThread()->DrawQueue->Clear();
 		MainThread()->PlayerSprites->Render();
+		DrawerThreads::Execute(MainThread()->DrawQueue);
 	}
 
 	void RenderScene::RenderThreadSlices()
@@ -219,7 +231,6 @@ namespace swrenderer
 			Threads[i]->X2 = viewwidth * (i + 1) / numThreads;
 		}
 		run_id++;
-		FSoftwareTexture::CurrentUpdate = run_id;
 		start_lock.unlock();
 
 		// Notify threads to run
@@ -256,6 +267,7 @@ namespace swrenderer
 
 	void RenderScene::RenderThreadSlice(RenderThread *thread)
 	{
+		thread->DrawQueue->Clear();
 		thread->FrameMemory->Clear();
 		thread->Clip3D->Cleanup();
 		thread->Clip3D->ResetClip(); // reset clips (floor/ceiling)
@@ -294,37 +306,7 @@ namespace swrenderer
 			thread->TranslucentPass->Render();
 		}
 
-#if 0 // shows the render slice edges
-		if (thread->Viewport->RenderTarget->IsBgra())
-		{
-			uint32_t* left = (uint32_t*)thread->Viewport->GetDest(thread->X1, 0);
-			uint32_t* right = (uint32_t*)thread->Viewport->GetDest(thread->X2 - 1, 0);
-			int pitch = thread->Viewport->RenderTarget->GetPitch();
-			uint32_t c = MAKEARGB(255, 0, 0, 0);
-			for (int i = 0; i < viewheight; i++)
-			{
-				*left = c;
-				*right = c;
-				left += pitch;
-				right += pitch;
-			}
-		}
-		else
-		{
-			uint8_t* left = (uint8_t*)thread->Viewport->GetDest(thread->X1, 0);
-			uint8_t* right = (uint8_t*)thread->Viewport->GetDest(thread->X2 - 1, 0);
-			int pitch = thread->Viewport->RenderTarget->GetPitch();
-			int r = 0, g = 0, b = 0;
-			uint8_t c = RGB32k.RGB[(r >> 3)][(g >> 3)][(b >> 3)];
-			for (int i = 0; i < viewheight; i++)
-			{
-				*left = c;
-				*right = c;
-				left += pitch;
-				right += pitch;
-			}
-		}
-#endif
+		DrawerThreads::Execute(thread->DrawQueue);
 	}
 
 	void RenderScene::StartThreads(size_t numThreads)
@@ -408,6 +390,9 @@ namespace swrenderer
 
 		// Render:
 		RenderActorView(actor, false, dontmaplines);
+		DrawerWaitCycles.Clock();
+		DrawerThreads::WaitForWorkers();
+		DrawerWaitCycles.Unclock();
 
 		viewport->RenderingToCanvas = false;
 
@@ -433,12 +418,12 @@ namespace swrenderer
 	ADD_STAT(fps)
 	{
 		FString out;
-		out.Format("frame=%04.1f ms  walls=%04.1f ms  planes=%04.1f ms  masked=%04.1f ms",
-			FrameCycles.TimeMS(), WallCycles.TimeMS(), PlaneCycles.TimeMS(), MaskedCycles.TimeMS());
+		out.Format("frame=%04.1f ms  walls=%04.1f ms  planes=%04.1f ms  masked=%04.1f ms  drawers=%04.1f ms",
+			FrameCycles.TimeMS(), WallCycles.TimeMS(), PlaneCycles.TimeMS(), MaskedCycles.TimeMS(), DrawerWaitCycles.TimeMS());
 		return out;
 	}
 
-	static double f_acc, w_acc, p_acc, m_acc;
+	static double f_acc, w_acc, p_acc, m_acc, drawer_acc;
 	static int acc_c;
 
 	ADD_STAT(fps_accumulated)
@@ -447,10 +432,11 @@ namespace swrenderer
 		w_acc += WallCycles.TimeMS();
 		p_acc += PlaneCycles.TimeMS();
 		m_acc += MaskedCycles.TimeMS();
+		drawer_acc += DrawerWaitCycles.TimeMS();
 		acc_c++;
 		FString out;
-		out.Format("frame=%04.1f ms  walls=%04.1f ms  planes=%04.1f ms  masked=%04.1f ms  %d counts",
-			f_acc / acc_c, w_acc / acc_c, p_acc / acc_c, m_acc / acc_c, acc_c);
+		out.Format("frame=%04.1f ms  walls=%04.1f ms  planes=%04.1f ms  masked=%04.1f ms  drawers=%04.1f ms  %d counts",
+			f_acc / acc_c, w_acc / acc_c, p_acc / acc_c, m_acc / acc_c, drawer_acc / acc_c, acc_c);
 		Printf(PRINT_LOG, "%s\n", out.GetChars());
 		return out;
 	}
diff --git a/src/rendering/swrenderer/textures/r_swtexture.cpp b/src/rendering/swrenderer/textures/r_swtexture.cpp
index 662c12d57..c41837d36 100644
--- a/src/rendering/swrenderer/textures/r_swtexture.cpp
+++ b/src/rendering/swrenderer/textures/r_swtexture.cpp
@@ -39,7 +39,7 @@
 #include "m_alloc.h"
 #include "imagehelpers.h"
 #include "texturemanager.h"
-#include <mutex>
+
 
 inline EUpscaleFlags scaleFlagFromUseType(ETextureType useType)
 {
@@ -119,7 +119,7 @@ void FSoftwareTexture::CalcBitSize ()
 //
 //==========================================================================
 
-const uint8_t *FSoftwareTexture::GetPixelsLocked(int style)
+const uint8_t *FSoftwareTexture::GetPixels(int style)
 {
 	if (Pixels.Size() == 0 || CheckModified(style))
 	{
@@ -158,7 +158,13 @@ const uint8_t *FSoftwareTexture::GetPixelsLocked(int style)
 	return Pixels.Data();
 }
 
-const uint32_t *FSoftwareTexture::GetPixelsBgraLocked()
+//==========================================================================
+//
+//
+//
+//==========================================================================
+
+const uint32_t *FSoftwareTexture::GetPixelsBgra()
 {
 	if (PixelsBgra.Size() == 0 || CheckModified(2))
 	{
@@ -191,31 +197,60 @@ const uint32_t *FSoftwareTexture::GetPixelsBgraLocked()
 //
 //==========================================================================
 
-int FSoftwareTexture::CurrentUpdate = 0;
-namespace swrenderer { extern std::mutex loadmutex; }
-
-void FSoftwareTexture::UpdatePixels(int index)
+const uint8_t *FSoftwareTexture::GetColumn(int index, unsigned int column, const FSoftwareTextureSpan **spans_out)
 {
-	std::unique_lock<std::mutex> lock(swrenderer::loadmutex);
-	if (Unlockeddata[index].LastUpdate != CurrentUpdate)
+	auto Pixeldata = GetPixels(index);
+	if ((unsigned)column >= (unsigned)GetPhysicalWidth())
 	{
-		if (index != 2)
+		if (WidthMask + 1 == GetPhysicalWidth())
 		{
-			const uint8_t* Pixeldata = GetPixelsLocked(index);
-			if (Spandata[index] == nullptr)
-				Spandata[index] = CreateSpans(Pixeldata);
-			Unlockeddata[index].Pixels = Pixeldata;
-			Unlockeddata[index].LastUpdate = CurrentUpdate;
+			column &= WidthMask;
 		}
 		else
 		{
-			const uint32_t* Pixeldata = GetPixelsBgraLocked();
-			if (Spandata[index] == nullptr)
-				Spandata[index] = CreateSpans(Pixeldata);
-			Unlockeddata[index].Pixels = Pixeldata;
-			Unlockeddata[index].LastUpdate = CurrentUpdate;
+			column %= GetPhysicalWidth();
 		}
 	}
+	if (spans_out != nullptr)
+	{
+		if (Spandata[index] == nullptr)
+		{
+			Spandata[index] = CreateSpans(Pixeldata);
+		}
+		*spans_out = Spandata[index][column];
+	}
+	return Pixeldata + column * GetPhysicalHeight();
+}
+
+//==========================================================================
+//
+// 
+//
+//==========================================================================
+
+const uint32_t *FSoftwareTexture::GetColumnBgra(unsigned int column, const FSoftwareTextureSpan **spans_out)
+{
+	auto Pixeldata = GetPixelsBgra();
+	if ((unsigned)column >= (unsigned)GetPhysicalWidth())
+	{
+		if (WidthMask + 1 == GetPhysicalWidth())
+		{
+			column &= WidthMask;
+		}
+		else
+		{
+			column %= GetPhysicalWidth();
+		}
+	}
+	if (spans_out != nullptr)
+	{
+		if (Spandata[2] == nullptr)
+		{
+			Spandata[2] = CreateSpans(Pixeldata);
+		}
+		*spans_out = Spandata[2][column];
+	}
+	return Pixeldata + column * GetPhysicalHeight();
 }
 
 //==========================================================================
@@ -527,23 +562,15 @@ void FSoftwareTexture::FreeAllSpans()
 	}
 }
 
-// Note: this function needs to be thread safe
 FSoftwareTexture* GetSoftwareTexture(FGameTexture* tex)
 {
 	FSoftwareTexture* SoftwareTexture = static_cast<FSoftwareTexture*>(tex->GetSoftwareTexture());
 	if (!SoftwareTexture)
 	{
-		static std::mutex loadmutex;
-		std::unique_lock<std::mutex> lock(loadmutex);
-
-		SoftwareTexture = static_cast<FSoftwareTexture*>(tex->GetSoftwareTexture());
-		if (!SoftwareTexture)
-		{
-			if (tex->isSoftwareCanvas()) SoftwareTexture = new FSWCanvasTexture(tex);
-			else if (tex->isWarped()) SoftwareTexture = new FWarpTexture(tex, tex->isWarped());
-			else SoftwareTexture = new FSoftwareTexture(tex);
-			tex->SetSoftwareTexture(SoftwareTexture);
-		}
+		if (tex->isSoftwareCanvas()) SoftwareTexture = new FSWCanvasTexture(tex);
+		else if (tex->isWarped()) SoftwareTexture = new FWarpTexture(tex, tex->isWarped());
+		else SoftwareTexture = new FSoftwareTexture(tex);
+		tex->SetSoftwareTexture(SoftwareTexture);
 	}
 	return SoftwareTexture;
 }
@@ -555,7 +582,6 @@ CUSTOM_CVAR(Bool, vid_nopalsubstitutions, false, CVAR_ARCHIVE | CVAR_NOINITCALL)
 	R_InitSkyMap();
 }
 
-// Note: this function needs to be thread safe
 FSoftwareTexture* GetPalettedSWTexture(FTextureID texid, bool animate, bool checkcompat, bool allownull)
 {
 	bool needpal = !vid_nopalsubstitutions && !V_IsTrueColor();
diff --git a/src/rendering/swrenderer/textures/r_swtexture.h b/src/rendering/swrenderer/textures/r_swtexture.h
index 54597d0c6..f053f8552 100644
--- a/src/rendering/swrenderer/textures/r_swtexture.h
+++ b/src/rendering/swrenderer/textures/r_swtexture.h
@@ -20,11 +20,6 @@ protected:
 	FTexture *mSource;
 	TArray<uint8_t> Pixels;
 	TArray<uint32_t> PixelsBgra;
-	struct
-	{
-		const void* Pixels = nullptr;
-		int LastUpdate = -1;
-	} Unlockeddata[3];
 	FSoftwareTextureSpan **Spandata[3] = { };
 	DVector2 Scale;
 	uint8_t WidthBits = 0, HeightBits = 0;
@@ -99,7 +94,6 @@ public:
 	{
 		Pixels.Reset();
 		PixelsBgra.Reset();
-		for (auto& d : Unlockeddata) d = {};
 	}
 	
 	// Returns true if the next call to GetPixels() will return an image different from the
@@ -116,69 +110,16 @@ public:
 	virtual bool Mipmapped() { return true; }
 
 	// Returns a single column of the texture
-	const uint8_t* GetColumn(int style, unsigned int column, const FSoftwareTextureSpan** spans_out)
-	{
-		column = WrapColumn(column);
-		const uint8_t* pixels = GetPixels(style);
-		if (spans_out)
-			*spans_out = Spandata[style][column];
-		return pixels + column * GetPhysicalHeight();
-	}
+	virtual const uint8_t *GetColumn(int style, unsigned int column, const FSoftwareTextureSpan **spans_out);
 
 	// Returns a single column of the texture, in BGRA8 format
-	const uint32_t* GetColumnBgra(unsigned int column, const FSoftwareTextureSpan** spans_out)
-	{
-		column = WrapColumn(column);
-		const uint32_t* pixels = GetPixelsBgra();
-		if (spans_out)
-			*spans_out = Spandata[2][column];
-		return pixels + column * GetPhysicalHeight();
-	}
-
-	unsigned int WrapColumn(unsigned int column)
-	{
-		if ((unsigned)column >= (unsigned)GetPhysicalWidth())
-		{
-			if (WidthMask + 1 == GetPhysicalWidth())
-			{
-				column &= WidthMask;
-			}
-			else
-			{
-				column %= GetPhysicalWidth();
-			}
-		}
-		return column;
-	}
+	virtual const uint32_t *GetColumnBgra(unsigned int column, const FSoftwareTextureSpan **spans_out);
 
 	// Returns the whole texture, stored in column-major order, in BGRA8 format
-	const uint32_t* GetPixelsBgra()
-	{
-		int style = 2;
-		if (Unlockeddata[2].LastUpdate == CurrentUpdate)
-		{
-			return static_cast<const uint32_t*>(Unlockeddata[style].Pixels);
-		}
-		else
-		{
-			UpdatePixels(style);
-			return static_cast<const uint32_t*>(Unlockeddata[style].Pixels);
-		}
-	}
+	virtual const uint32_t *GetPixelsBgra();
 
 	// Returns the whole texture, stored in column-major order
-	const uint8_t* GetPixels(int style)
-	{
-		if (Unlockeddata[style].LastUpdate == CurrentUpdate)
-		{
-			return static_cast<const uint8_t*>(Unlockeddata[style].Pixels);
-		}
-		else
-		{
-			UpdatePixels(style);
-			return static_cast<const uint8_t*>(Unlockeddata[style].Pixels);
-		}
-	}
+	virtual const uint8_t *GetPixels(int style);
 
 	const uint8_t *GetPixels(FRenderStyle style)
 	{
@@ -198,11 +139,6 @@ public:
 		return GetColumn(alpha, column, spans_out);
 	}
 
-	static int CurrentUpdate;
-	void UpdatePixels(int style);
-
-	virtual const uint32_t* GetPixelsBgraLocked();
-	virtual const uint8_t* GetPixelsLocked(int style);
 };
 
 // A texture that returns a wiggly version of another texture.
@@ -218,8 +154,8 @@ class FWarpTexture : public FSoftwareTexture
 public:
 	FWarpTexture (FGameTexture *source, int warptype);
 
-	const uint32_t *GetPixelsBgraLocked() override;
-	const uint8_t *GetPixelsLocked(int style) override;
+	const uint32_t *GetPixelsBgra() override;
+	const uint8_t *GetPixels(int style) override;
 	bool CheckModified (int which) override;
 	void GenerateBgraMipmapsFast();
 
@@ -243,8 +179,8 @@ public:
 	~FSWCanvasTexture();
 
 	// Returns the whole texture, stored in column-major order
-	const uint32_t *GetPixelsBgraLocked() override;
-	const uint8_t *GetPixelsLocked(int style) override;
+	const uint32_t *GetPixelsBgra() override;
+	const uint8_t *GetPixels(int style) override;
 
 	virtual void Unload() override;
 	void UpdatePixels(bool truecolor);
diff --git a/src/rendering/swrenderer/textures/swcanvastexture.cpp b/src/rendering/swrenderer/textures/swcanvastexture.cpp
index 91385fe0c..db9471774 100644
--- a/src/rendering/swrenderer/textures/swcanvastexture.cpp
+++ b/src/rendering/swrenderer/textures/swcanvastexture.cpp
@@ -77,7 +77,7 @@ FSWCanvasTexture::~FSWCanvasTexture()
 //
 //==========================================================================
 
-const uint8_t *FSWCanvasTexture::GetPixelsLocked(int style)
+const uint8_t *FSWCanvasTexture::GetPixels(int style)
 {
 	static_cast<FCanvasTexture*>(mSource)->NeedUpdate();
 	if (Canvas == nullptr)
@@ -94,7 +94,7 @@ const uint8_t *FSWCanvasTexture::GetPixelsLocked(int style)
 //
 //==========================================================================
 
-const uint32_t *FSWCanvasTexture::GetPixelsBgraLocked()
+const uint32_t *FSWCanvasTexture::GetPixelsBgra()
 {
 	static_cast<FCanvasTexture*>(mSource)->NeedUpdate();
 	if (CanvasBgra == nullptr)
diff --git a/src/rendering/swrenderer/textures/warptexture.cpp b/src/rendering/swrenderer/textures/warptexture.cpp
index ce3cb2f52..49869f8d6 100644
--- a/src/rendering/swrenderer/textures/warptexture.cpp
+++ b/src/rendering/swrenderer/textures/warptexture.cpp
@@ -57,7 +57,7 @@ bool FWarpTexture::CheckModified (int style)
 	return screen->FrameTime != GenTime[style];
 }
 
-const uint32_t *FWarpTexture::GetPixelsBgraLocked()
+const uint32_t *FWarpTexture::GetPixelsBgra()
 {
 	uint64_t time = screen->FrameTime;
 	uint64_t resizeMult = gl_texture_hqresizemult;
@@ -67,7 +67,7 @@ const uint32_t *FWarpTexture::GetPixelsBgraLocked()
 		if (gl_texture_hqresizemode == 0 || gl_texture_hqresizemult < 1 || !(gl_texture_hqresize_targets & 1))
 			resizeMult = 1;
 
-		auto otherpix = FSoftwareTexture::GetPixelsBgraLocked();
+		auto otherpix = FSoftwareTexture::GetPixelsBgra();
 		WarpedPixelsRgba.Resize(unsigned(GetWidth() * GetHeight() * resizeMult * resizeMult * 4 / 3 + 1));
 		WarpBuffer(WarpedPixelsRgba.Data(), otherpix, int(GetWidth() * resizeMult), int(GetHeight() * resizeMult), WidthOffsetMultiplier, HeightOffsetMultiplier, time, mTexture->GetShaderSpeed(), bWarped);
 		GenerateBgraMipmapsFast();
@@ -78,7 +78,7 @@ const uint32_t *FWarpTexture::GetPixelsBgraLocked()
 }
 
 
-const uint8_t *FWarpTexture::GetPixelsLocked(int index)
+const uint8_t *FWarpTexture::GetPixels(int index)
 {
 	uint64_t time = screen->FrameTime;
 	uint64_t resizeMult = gl_texture_hqresizemult;
@@ -88,7 +88,7 @@ const uint8_t *FWarpTexture::GetPixelsLocked(int index)
 		if (gl_texture_hqresizemode == 0 || gl_texture_hqresizemult < 1 || !(gl_texture_hqresize_targets & 1))
 			resizeMult = 1;
 
-		const uint8_t *otherpix = FSoftwareTexture::GetPixelsLocked(index);
+		const uint8_t *otherpix = FSoftwareTexture::GetPixels(index);
 		WarpedPixels[index].Resize(unsigned(GetWidth() * GetHeight() * resizeMult * resizeMult));
 		WarpBuffer(WarpedPixels[index].Data(), otherpix, int(GetWidth() * resizeMult), int(GetHeight() * resizeMult), WidthOffsetMultiplier, HeightOffsetMultiplier, time, mTexture->GetShaderSpeed(), bWarped);
 		FreeAllSpans();
diff --git a/src/rendering/swrenderer/things/r_decal.cpp b/src/rendering/swrenderer/things/r_decal.cpp
index ffe06c0a1..df8f6fb38 100644
--- a/src/rendering/swrenderer/things/r_decal.cpp
+++ b/src/rendering/swrenderer/things/r_decal.cpp
@@ -240,6 +240,7 @@ namespace swrenderer
 			bool visible = drawerargs.SetStyle(thread->Viewport.get(), decal->RenderStyle, (float)decal->Alpha, decal->Translation, decal->AlphaColor, cmlight);
 			if (visible)
 			{
+				thread->PrepareTexture(WallSpriteTile, decal->RenderStyle);
 				drawerargs.DrawMasked(thread, zpos + WallSpriteTile->GetTopOffset(0) * decal->ScaleY, decal->ScaleY, decal->RenderFlags & RF_XFLIP, decal->RenderFlags & RF_YFLIP, WallC, clipper->x1, clipper->x2, light, WallSpriteTile, mfloorclip, mceilingclip, decal->RenderStyle);
 			}
 
diff --git a/src/rendering/swrenderer/things/r_particle.cpp b/src/rendering/swrenderer/things/r_particle.cpp
index 6e9f2b00e..a91ddb473 100644
--- a/src/rendering/swrenderer/things/r_particle.cpp
+++ b/src/rendering/swrenderer/things/r_particle.cpp
@@ -231,6 +231,7 @@ namespace swrenderer
 	{
 		auto vis = this;
 
+		int spacing;
 		uint8_t color = vis->Light.BaseColormap->Maps[vis->startfrac];
 		int yl = vis->y1;
 		int ycount = vis->y2 - yl + 1;
@@ -249,18 +250,33 @@ namespace swrenderer
 		uint32_t alpha = fglevel * 256 / FRACUNIT;
 		
 		auto viewport = thread->Viewport.get();
-		auto drawers = thread->Drawers(viewport);
+
+		spacing = viewport->RenderTarget->GetPitch();
 
 		uint32_t fracstepx = PARTICLE_TEXTURE_SIZE * FRACUNIT / countbase;
 		uint32_t fracposx = fracstepx / 2;
 
 		RenderTranslucentPass *translucentPass = thread->TranslucentPass.get();
 
-		for (int x = x1; x < (x1 + countbase); x++, fracposx += fracstepx)
+		if (viewport->RenderTarget->IsBgra())
 		{
-			if (translucentPass->ClipSpriteColumnWithPortals(x, vis))
-				continue;
-			drawers->DrawParticleColumn(x, yl, ycount, fg, alpha, fracposx);
+			for (int x = x1; x < (x1 + countbase); x++, fracposx += fracstepx)
+			{
+				if (translucentPass->ClipSpriteColumnWithPortals(x, vis))
+					continue;
+				uint32_t *dest = (uint32_t*)viewport->GetDest(x, yl);
+				thread->DrawQueue->Push<DrawParticleColumnRGBACommand>(dest, yl, spacing, ycount, fg, alpha, fracposx);
+			}
+		}
+		else
+		{
+			for (int x = x1; x < (x1 + countbase); x++, fracposx += fracstepx)
+			{
+				if (translucentPass->ClipSpriteColumnWithPortals(x, vis))
+					continue;
+				uint8_t *dest = viewport->GetDest(x, yl);
+				thread->DrawQueue->Push<DrawParticleColumnPalCommand>(dest, yl, spacing, ycount, fg, alpha, fracposx);
+			}
 		}
 	}
 
diff --git a/src/rendering/swrenderer/things/r_sprite.cpp b/src/rendering/swrenderer/things/r_sprite.cpp
index 8f47c524b..2a7722a58 100644
--- a/src/rendering/swrenderer/things/r_sprite.cpp
+++ b/src/rendering/swrenderer/things/r_sprite.cpp
@@ -262,7 +262,8 @@ namespace swrenderer
 		{
 			RenderTranslucentPass *translucentPass = thread->TranslucentPass.get();
 			short portalfloorclip[MAXWIDTH];
-			for (int x = x1; x < x2; x++)
+			int x2 = wallc.sx2;
+			for (int x = wallc.sx1; x < x2; x++)
 			{
 				if (translucentPass->ClipSpriteColumnWithPortals(x, this))
 					portalfloorclip[x] = mceilingclip[x];
@@ -270,6 +271,8 @@ namespace swrenderer
 					portalfloorclip[x] = mfloorclip[x];
 			}
 
+			thread->PrepareTexture(pic, RenderStyle);
+
 			ProjectedWallLight mlight;
 			mlight.SetSpriteLight();
 
diff --git a/src/rendering/swrenderer/things/r_wallsprite.cpp b/src/rendering/swrenderer/things/r_wallsprite.cpp
index bc14fc148..bccea75a6 100644
--- a/src/rendering/swrenderer/things/r_wallsprite.cpp
+++ b/src/rendering/swrenderer/things/r_wallsprite.cpp
@@ -176,6 +176,8 @@ namespace swrenderer
 		// Draw it
 		auto WallSpriteTile = spr->pic;
 
+		thread->PrepareTexture(WallSpriteTile, spr->RenderStyle);
+
 		RenderTranslucentPass* translucentPass = thread->TranslucentPass.get();
 		short floorclip[MAXWIDTH];
 		for (int x = x1; x < x2; x++)
diff --git a/src/rendering/swrenderer/viewport/r_spandrawer.cpp b/src/rendering/swrenderer/viewport/r_spandrawer.cpp
index c101ac80c..51da4fb46 100644
--- a/src/rendering/swrenderer/viewport/r_spandrawer.cpp
+++ b/src/rendering/swrenderer/viewport/r_spandrawer.cpp
@@ -32,6 +32,8 @@ namespace swrenderer
 
 	void SpanDrawerArgs::SetTexture(RenderThread *thread, FSoftwareTexture *tex)
 	{
+		thread->PrepareTexture(tex, DefaultRenderStyle());
+
 		ds_texwidth = tex->GetPhysicalWidth();
 		ds_texheight = tex->GetPhysicalHeight();
 		ds_xbits = tex->GetWidthBits();
diff --git a/src/rendering/swrenderer/viewport/r_spritedrawer.h b/src/rendering/swrenderer/viewport/r_spritedrawer.h
index e1c504f7c..2ab3b97cc 100644
--- a/src/rendering/swrenderer/viewport/r_spritedrawer.h
+++ b/src/rendering/swrenderer/viewport/r_spritedrawer.h
@@ -111,7 +111,7 @@ namespace swrenderer
 
 		RenderViewport *dc_viewport = nullptr;
 
-		friend class SWTruecolorDrawers;
-		friend class SWPalDrawers;
+		friend class DrawVoxelBlocksRGBACommand;
+		friend class DrawVoxelBlocksPalCommand;
 	};
 }