- fix sky drawers not staying within their numa node

This commit is contained in:
Magnus Norddahl 2018-12-16 11:31:05 +01:00
parent 1422a95aa8
commit 88751a320c
4 changed files with 59 additions and 61 deletions

View file

@ -551,7 +551,6 @@ namespace swrenderer
void DrawSingleSky1PalCommand::Execute(DrawerThread *thread) void DrawSingleSky1PalCommand::Execute(DrawerThread *thread)
{ {
uint8_t *dest = args.Dest(); uint8_t *dest = args.Dest();
int count = args.Count();
int pitch = args.Viewport()->RenderTarget->GetPitch(); int pitch = args.Viewport()->RenderTarget->GetPitch();
const uint8_t *source0 = args.FrontTexturePixels(); const uint8_t *source0 = args.FrontTexturePixels();
int textureheight0 = args.FrontTextureHeight(); int textureheight0 = args.FrontTextureHeight();
@ -559,6 +558,25 @@ namespace swrenderer
int32_t frac = args.TextureVPos(); int32_t frac = args.TextureVPos();
int32_t fracstep = args.TextureVStep(); int32_t fracstep = args.TextureVStep();
if (!args.FadeSky())
{
int count = thread->count_for_thread(args.DestY(), args.Count());
for (int index = 0; index < count; index++)
{
uint32_t sample_index = (((((uint32_t)frac) << 8) >> FRACBITS) * textureheight0) >> FRACBITS;
*dest = source0[sample_index];
dest += pitch;
frac += fracstep;
}
return;
}
int num_cores = thread->num_cores;
int skipped = thread->skipped_by_thread(args.DestY());
int count = skipped + thread->count_for_thread(args.DestY(), args.Count()) * num_cores;
// Find bands for top solid color, top fade, center textured, bottom fade, bottom solid color: // Find bands for top solid color, top fade, center textured, bottom fade, bottom solid color:
int start_fade = 2; // How fast it should fade out int start_fade = 2; // How fast it should fade out
int fade_length = (1 << (24 - start_fade)); int fade_length = (1 << (24 - start_fade));
@ -571,28 +589,11 @@ namespace swrenderer
start_fadebottom_y = clamp(start_fadebottom_y, 0, count); start_fadebottom_y = clamp(start_fadebottom_y, 0, count);
end_fadebottom_y = clamp(end_fadebottom_y, 0, count); end_fadebottom_y = clamp(end_fadebottom_y, 0, count);
int num_cores = thread->num_cores;
int skipped = thread->skipped_by_thread(args.DestY());
dest = thread->dest_for_thread(args.DestY(), pitch, dest); dest = thread->dest_for_thread(args.DestY(), pitch, dest);
frac += fracstep * skipped; frac += fracstep * skipped;
fracstep *= num_cores; fracstep *= num_cores;
pitch *= num_cores; pitch *= num_cores;
if (!args.FadeSky())
{
count = thread->count_for_thread(args.DestY(), count);
for (int index = 0; index < count; index++)
{
uint32_t sample_index = (((((uint32_t)frac) << 8) >> FRACBITS) * textureheight0) >> FRACBITS;
*dest = source0[sample_index];
dest += pitch;
frac += fracstep;
}
return;
}
uint32_t solid_top = args.SolidTopColor(); uint32_t solid_top = args.SolidTopColor();
uint32_t solid_bottom = args.SolidBottomColor(); uint32_t solid_bottom = args.SolidBottomColor();

View file

@ -39,7 +39,6 @@ namespace swrenderer
void Execute(DrawerThread *thread) override void Execute(DrawerThread *thread) override
{ {
uint32_t *dest = (uint32_t *)args.Dest(); uint32_t *dest = (uint32_t *)args.Dest();
int count = args.Count();
int pitch = args.Viewport()->RenderTarget->GetPitch(); int pitch = args.Viewport()->RenderTarget->GetPitch();
const uint32_t *source0 = (const uint32_t *)args.FrontTexturePixels(); const uint32_t *source0 = (const uint32_t *)args.FrontTexturePixels();
int textureheight0 = args.FrontTextureHeight(); int textureheight0 = args.FrontTextureHeight();
@ -51,6 +50,25 @@ namespace swrenderer
uint32_t solid_bottom = args.SolidBottomColor(); uint32_t solid_bottom = args.SolidBottomColor();
bool fadeSky = args.FadeSky(); bool fadeSky = args.FadeSky();
if (!fadeSky)
{
int count = thread->count_for_thread(args.DestY(), args.Count());
for (int index = 0; index < count; index++)
{
uint32_t sample_index = (((((uint32_t)frac) << 8) >> FRACBITS) * textureheight0) >> FRACBITS;
*dest = source0[sample_index];
dest += pitch;
frac += fracstep;
}
return;
}
int num_cores = thread->num_cores;
int skipped = thread->skipped_by_thread(args.DestY());
int count = skipped + thread->count_for_thread(args.DestY(), args.Count()) * num_cores;
// Find bands for top solid color, top fade, center textured, bottom fade, bottom solid color: // Find bands for top solid color, top fade, center textured, bottom fade, bottom solid color:
int start_fade = 2; // How fast it should fade out int start_fade = 2; // How fast it should fade out
int fade_length = (1 << (24 - start_fade)); int fade_length = (1 << (24 - start_fade));
@ -63,28 +81,11 @@ namespace swrenderer
start_fadebottom_y = clamp(start_fadebottom_y, 0, count); start_fadebottom_y = clamp(start_fadebottom_y, 0, count);
end_fadebottom_y = clamp(end_fadebottom_y, 0, count); end_fadebottom_y = clamp(end_fadebottom_y, 0, count);
int num_cores = thread->num_cores;
int skipped = thread->skipped_by_thread(args.DestY());
dest = thread->dest_for_thread(args.DestY(), pitch, dest); dest = thread->dest_for_thread(args.DestY(), pitch, dest);
frac += fracstep * skipped; frac += fracstep * skipped;
fracstep *= num_cores; fracstep *= num_cores;
pitch *= num_cores; pitch *= num_cores;
if (!fadeSky)
{
count = thread->count_for_thread(args.DestY(), count);
for (int index = 0; index < count; index++)
{
uint32_t sample_index = (((((uint32_t)frac) << 8) >> FRACBITS) * textureheight0) >> FRACBITS;
*dest = source0[sample_index];
dest += pitch;
frac += fracstep;
}
return;
}
BgraColor solid_top_fill = solid_top; BgraColor solid_top_fill = solid_top;
BgraColor solid_bottom_fill = solid_bottom; BgraColor solid_bottom_fill = solid_bottom;

View file

@ -38,7 +38,6 @@ namespace swrenderer
void Execute(DrawerThread *thread) override void Execute(DrawerThread *thread) override
{ {
uint32_t *dest = (uint32_t *)args.Dest(); uint32_t *dest = (uint32_t *)args.Dest();
int count = args.Count();
int pitch = args.Viewport()->RenderTarget->GetPitch(); int pitch = args.Viewport()->RenderTarget->GetPitch();
const uint32_t *source0 = (const uint32_t *)args.FrontTexturePixels(); const uint32_t *source0 = (const uint32_t *)args.FrontTexturePixels();
int textureheight0 = args.FrontTextureHeight(); int textureheight0 = args.FrontTextureHeight();
@ -50,6 +49,25 @@ namespace swrenderer
uint32_t solid_bottom = args.SolidBottomColor(); uint32_t solid_bottom = args.SolidBottomColor();
bool fadeSky = args.FadeSky(); bool fadeSky = args.FadeSky();
if (!fadeSky)
{
int count = thread->count_for_thread(args.DestY(), args.Count());
for (int index = 0; index < count; index++)
{
uint32_t sample_index = (((((uint32_t)frac) << 8) >> FRACBITS) * textureheight0) >> FRACBITS;
*dest = source0[sample_index];
dest += pitch;
frac += fracstep;
}
return;
}
int num_cores = thread->num_cores;
int skipped = thread->skipped_by_thread(args.DestY());
int count = skipped + thread->count_for_thread(args.DestY(), args.Count()) * num_cores;
// Find bands for top solid color, top fade, center textured, bottom fade, bottom solid color: // Find bands for top solid color, top fade, center textured, bottom fade, bottom solid color:
int start_fade = 2; // How fast it should fade out int start_fade = 2; // How fast it should fade out
int fade_length = (1 << (24 - start_fade)); int fade_length = (1 << (24 - start_fade));
@ -62,28 +80,11 @@ namespace swrenderer
start_fadebottom_y = clamp(start_fadebottom_y, 0, count); start_fadebottom_y = clamp(start_fadebottom_y, 0, count);
end_fadebottom_y = clamp(end_fadebottom_y, 0, count); end_fadebottom_y = clamp(end_fadebottom_y, 0, count);
int num_cores = thread->num_cores;
int skipped = thread->skipped_by_thread(args.DestY());
dest = thread->dest_for_thread(args.DestY(), pitch, dest); dest = thread->dest_for_thread(args.DestY(), pitch, dest);
frac += fracstep * skipped; frac += fracstep * skipped;
fracstep *= num_cores; fracstep *= num_cores;
pitch *= num_cores; pitch *= num_cores;
if (!fadeSky)
{
count = thread->count_for_thread(args.DestY(), count);
for (int index = 0; index < count; index++)
{
uint32_t sample_index = (((((uint32_t)frac) << 8) >> FRACBITS) * textureheight0) >> FRACBITS;
*dest = source0[sample_index];
dest += pitch;
frac += fracstep;
}
return;
}
__m128i solid_top_fill = _mm_unpacklo_epi8(_mm_cvtsi32_si128(solid_top), _mm_setzero_si128()); __m128i solid_top_fill = _mm_unpacklo_epi8(_mm_cvtsi32_si128(solid_top), _mm_setzero_si128());
__m128i solid_bottom_fill = _mm_unpacklo_epi8(_mm_cvtsi32_si128(solid_bottom), _mm_setzero_si128()); __m128i solid_bottom_fill = _mm_unpacklo_epi8(_mm_cvtsi32_si128(solid_bottom), _mm_setzero_si128());

View file

@ -140,6 +140,8 @@ void DrawerThreads::WorkerMain(DrawerThread *thread)
// Grab the commands // Grab the commands
DrawerCommandQueuePtr list = active_commands[thread->current_queue]; DrawerCommandQueuePtr list = active_commands[thread->current_queue];
thread->current_queue++; thread->current_queue++;
thread->numa_start_y = thread->numa_node * viewheight / thread->num_numa_nodes;
thread->numa_end_y = (thread->numa_node + 1) * viewheight / thread->num_numa_nodes;
start_lock.unlock(); start_lock.unlock();
// Do the work: // Do the work:
@ -206,8 +208,6 @@ void DrawerThreads::StartThreads()
thread->num_cores = I_GetNumaNodeThreadCount(numaNode); thread->num_cores = I_GetNumaNodeThreadCount(numaNode);
thread->numa_node = numaNode; thread->numa_node = numaNode;
thread->num_numa_nodes = I_GetNumaNodeCount(); thread->num_numa_nodes = I_GetNumaNodeCount();
thread->numa_start_y = numaNode * viewheight / I_GetNumaNodeCount();
thread->numa_end_y = (numaNode + 1) * viewheight / I_GetNumaNodeCount();
thread->thread = std::thread([=]() { queue->WorkerMain(thread); }); thread->thread = std::thread([=]() { queue->WorkerMain(thread); });
I_SetThreadNumaNode(thread->thread, numaNode); I_SetThreadNumaNode(thread->thread, numaNode);
} }
@ -223,8 +223,6 @@ void DrawerThreads::StartThreads()
thread->num_cores = num_threads; thread->num_cores = num_threads;
thread->numa_node = 0; thread->numa_node = 0;
thread->num_numa_nodes = 1; thread->num_numa_nodes = 1;
thread->numa_start_y = 0;
thread->numa_end_y = viewheight;
thread->thread = std::thread([=]() { queue->WorkerMain(thread); }); thread->thread = std::thread([=]() { queue->WorkerMain(thread); });
I_SetThreadNumaNode(thread->thread, 0); I_SetThreadNumaNode(thread->thread, 0);
} }
@ -288,7 +286,4 @@ void MemcpyCommand::Execute(DrawerThread *thread)
d += dstep; d += dstep;
s += sstep; s += sstep;
} }
thread->numa_start_y = thread->numa_node * viewheight / thread->num_numa_nodes;
thread->numa_end_y = (thread->numa_node + 1) * viewheight / thread->num_numa_nodes;
} }