diff --git a/src/swrenderer/drawers/r_draw_pal.cpp b/src/swrenderer/drawers/r_draw_pal.cpp index b9501d157..9af01cffb 100644 --- a/src/swrenderer/drawers/r_draw_pal.cpp +++ b/src/swrenderer/drawers/r_draw_pal.cpp @@ -551,7 +551,6 @@ namespace swrenderer void DrawSingleSky1PalCommand::Execute(DrawerThread *thread) { uint8_t *dest = args.Dest(); - int count = args.Count(); int pitch = args.Viewport()->RenderTarget->GetPitch(); const uint8_t *source0 = args.FrontTexturePixels(); int textureheight0 = args.FrontTextureHeight(); @@ -559,6 +558,25 @@ namespace swrenderer int32_t frac = args.TextureVPos(); int32_t fracstep = args.TextureVStep(); + if (!args.FadeSky()) + { + int count = thread->count_for_thread(args.DestY(), args.Count()); + + for (int index = 0; index < count; index++) + { + uint32_t sample_index = (((((uint32_t)frac) << 8) >> FRACBITS) * textureheight0) >> FRACBITS; + *dest = source0[sample_index]; + dest += pitch; + frac += fracstep; + } + + return; + } + + int num_cores = thread->num_cores; + int skipped = thread->skipped_by_thread(args.DestY()); + int count = skipped + thread->count_for_thread(args.DestY(), args.Count()) * num_cores; + // Find bands for top solid color, top fade, center textured, bottom fade, bottom solid color: int start_fade = 2; // How fast it should fade out int fade_length = (1 << (24 - start_fade)); @@ -571,28 +589,11 @@ namespace swrenderer start_fadebottom_y = clamp(start_fadebottom_y, 0, count); end_fadebottom_y = clamp(end_fadebottom_y, 0, count); - int num_cores = thread->num_cores; - int skipped = thread->skipped_by_thread(args.DestY()); dest = thread->dest_for_thread(args.DestY(), pitch, dest); frac += fracstep * skipped; fracstep *= num_cores; pitch *= num_cores; - if (!args.FadeSky()) - { - count = thread->count_for_thread(args.DestY(), count); - - for (int index = 0; index < count; index++) - { - uint32_t sample_index = (((((uint32_t)frac) << 8) >> FRACBITS) * textureheight0) >> FRACBITS; - *dest = source0[sample_index]; - dest += pitch; - frac += fracstep; - } - - return; - } - uint32_t solid_top = args.SolidTopColor(); uint32_t solid_bottom = args.SolidBottomColor(); diff --git a/src/swrenderer/drawers/r_draw_sky32.h b/src/swrenderer/drawers/r_draw_sky32.h index 59c8826fa..864002446 100644 --- a/src/swrenderer/drawers/r_draw_sky32.h +++ b/src/swrenderer/drawers/r_draw_sky32.h @@ -39,7 +39,6 @@ namespace swrenderer void Execute(DrawerThread *thread) override { uint32_t *dest = (uint32_t *)args.Dest(); - int count = args.Count(); int pitch = args.Viewport()->RenderTarget->GetPitch(); const uint32_t *source0 = (const uint32_t *)args.FrontTexturePixels(); int textureheight0 = args.FrontTextureHeight(); @@ -51,6 +50,25 @@ namespace swrenderer uint32_t solid_bottom = args.SolidBottomColor(); bool fadeSky = args.FadeSky(); + if (!fadeSky) + { + int count = thread->count_for_thread(args.DestY(), args.Count()); + + for (int index = 0; index < count; index++) + { + uint32_t sample_index = (((((uint32_t)frac) << 8) >> FRACBITS) * textureheight0) >> FRACBITS; + *dest = source0[sample_index]; + dest += pitch; + frac += fracstep; + } + + return; + } + + int num_cores = thread->num_cores; + int skipped = thread->skipped_by_thread(args.DestY()); + int count = skipped + thread->count_for_thread(args.DestY(), args.Count()) * num_cores; + // Find bands for top solid color, top fade, center textured, bottom fade, bottom solid color: int start_fade = 2; // How fast it should fade out int fade_length = (1 << (24 - start_fade)); @@ -63,28 +81,11 @@ namespace swrenderer start_fadebottom_y = clamp(start_fadebottom_y, 0, count); end_fadebottom_y = clamp(end_fadebottom_y, 0, count); - int num_cores = thread->num_cores; - int skipped = thread->skipped_by_thread(args.DestY()); dest = thread->dest_for_thread(args.DestY(), pitch, dest); frac += fracstep * skipped; fracstep *= num_cores; pitch *= num_cores; - if (!fadeSky) - { - count = thread->count_for_thread(args.DestY(), count); - - for (int index = 0; index < count; index++) - { - uint32_t sample_index = (((((uint32_t)frac) << 8) >> FRACBITS) * textureheight0) >> FRACBITS; - *dest = source0[sample_index]; - dest += pitch; - frac += fracstep; - } - - return; - } - BgraColor solid_top_fill = solid_top; BgraColor solid_bottom_fill = solid_bottom; diff --git a/src/swrenderer/drawers/r_draw_sky32_sse2.h b/src/swrenderer/drawers/r_draw_sky32_sse2.h index 401d5bad1..56fd50300 100644 --- a/src/swrenderer/drawers/r_draw_sky32_sse2.h +++ b/src/swrenderer/drawers/r_draw_sky32_sse2.h @@ -38,7 +38,6 @@ namespace swrenderer void Execute(DrawerThread *thread) override { uint32_t *dest = (uint32_t *)args.Dest(); - int count = args.Count(); int pitch = args.Viewport()->RenderTarget->GetPitch(); const uint32_t *source0 = (const uint32_t *)args.FrontTexturePixels(); int textureheight0 = args.FrontTextureHeight(); @@ -50,6 +49,25 @@ namespace swrenderer uint32_t solid_bottom = args.SolidBottomColor(); bool fadeSky = args.FadeSky(); + if (!fadeSky) + { + int count = thread->count_for_thread(args.DestY(), args.Count()); + + for (int index = 0; index < count; index++) + { + uint32_t sample_index = (((((uint32_t)frac) << 8) >> FRACBITS) * textureheight0) >> FRACBITS; + *dest = source0[sample_index]; + dest += pitch; + frac += fracstep; + } + + return; + } + + int num_cores = thread->num_cores; + int skipped = thread->skipped_by_thread(args.DestY()); + int count = skipped + thread->count_for_thread(args.DestY(), args.Count()) * num_cores; + // Find bands for top solid color, top fade, center textured, bottom fade, bottom solid color: int start_fade = 2; // How fast it should fade out int fade_length = (1 << (24 - start_fade)); @@ -62,28 +80,11 @@ namespace swrenderer start_fadebottom_y = clamp(start_fadebottom_y, 0, count); end_fadebottom_y = clamp(end_fadebottom_y, 0, count); - int num_cores = thread->num_cores; - int skipped = thread->skipped_by_thread(args.DestY()); dest = thread->dest_for_thread(args.DestY(), pitch, dest); frac += fracstep * skipped; fracstep *= num_cores; pitch *= num_cores; - if (!fadeSky) - { - count = thread->count_for_thread(args.DestY(), count); - - for (int index = 0; index < count; index++) - { - uint32_t sample_index = (((((uint32_t)frac) << 8) >> FRACBITS) * textureheight0) >> FRACBITS; - *dest = source0[sample_index]; - dest += pitch; - frac += fracstep; - } - - return; - } - __m128i solid_top_fill = _mm_unpacklo_epi8(_mm_cvtsi32_si128(solid_top), _mm_setzero_si128()); __m128i solid_bottom_fill = _mm_unpacklo_epi8(_mm_cvtsi32_si128(solid_bottom), _mm_setzero_si128()); diff --git a/src/swrenderer/drawers/r_thread.cpp b/src/swrenderer/drawers/r_thread.cpp index 31a6f1915..5aa157f27 100644 --- a/src/swrenderer/drawers/r_thread.cpp +++ b/src/swrenderer/drawers/r_thread.cpp @@ -140,6 +140,8 @@ void DrawerThreads::WorkerMain(DrawerThread *thread) // Grab the commands DrawerCommandQueuePtr list = active_commands[thread->current_queue]; thread->current_queue++; + thread->numa_start_y = thread->numa_node * viewheight / thread->num_numa_nodes; + thread->numa_end_y = (thread->numa_node + 1) * viewheight / thread->num_numa_nodes; start_lock.unlock(); // Do the work: @@ -206,8 +208,6 @@ void DrawerThreads::StartThreads() thread->num_cores = I_GetNumaNodeThreadCount(numaNode); thread->numa_node = numaNode; thread->num_numa_nodes = I_GetNumaNodeCount(); - thread->numa_start_y = numaNode * viewheight / I_GetNumaNodeCount(); - thread->numa_end_y = (numaNode + 1) * viewheight / I_GetNumaNodeCount(); thread->thread = std::thread([=]() { queue->WorkerMain(thread); }); I_SetThreadNumaNode(thread->thread, numaNode); } @@ -223,8 +223,6 @@ void DrawerThreads::StartThreads() thread->num_cores = num_threads; thread->numa_node = 0; thread->num_numa_nodes = 1; - thread->numa_start_y = 0; - thread->numa_end_y = viewheight; thread->thread = std::thread([=]() { queue->WorkerMain(thread); }); I_SetThreadNumaNode(thread->thread, 0); } @@ -288,7 +286,4 @@ void MemcpyCommand::Execute(DrawerThread *thread) d += dstep; s += sstep; } - - thread->numa_start_y = thread->numa_node * viewheight / thread->num_numa_nodes; - thread->numa_end_y = (thread->numa_node + 1) * viewheight / thread->num_numa_nodes; }