mirror of
https://github.com/ZDoom/qzdoom.git
synced 2025-01-18 23:21:41 +00:00
- fix sky drawers not staying within their numa node
This commit is contained in:
parent
1422a95aa8
commit
88751a320c
4 changed files with 59 additions and 61 deletions
|
@ -551,7 +551,6 @@ namespace swrenderer
|
||||||
void DrawSingleSky1PalCommand::Execute(DrawerThread *thread)
|
void DrawSingleSky1PalCommand::Execute(DrawerThread *thread)
|
||||||
{
|
{
|
||||||
uint8_t *dest = args.Dest();
|
uint8_t *dest = args.Dest();
|
||||||
int count = args.Count();
|
|
||||||
int pitch = args.Viewport()->RenderTarget->GetPitch();
|
int pitch = args.Viewport()->RenderTarget->GetPitch();
|
||||||
const uint8_t *source0 = args.FrontTexturePixels();
|
const uint8_t *source0 = args.FrontTexturePixels();
|
||||||
int textureheight0 = args.FrontTextureHeight();
|
int textureheight0 = args.FrontTextureHeight();
|
||||||
|
@ -559,6 +558,25 @@ namespace swrenderer
|
||||||
int32_t frac = args.TextureVPos();
|
int32_t frac = args.TextureVPos();
|
||||||
int32_t fracstep = args.TextureVStep();
|
int32_t fracstep = args.TextureVStep();
|
||||||
|
|
||||||
|
if (!args.FadeSky())
|
||||||
|
{
|
||||||
|
int count = thread->count_for_thread(args.DestY(), args.Count());
|
||||||
|
|
||||||
|
for (int index = 0; index < count; index++)
|
||||||
|
{
|
||||||
|
uint32_t sample_index = (((((uint32_t)frac) << 8) >> FRACBITS) * textureheight0) >> FRACBITS;
|
||||||
|
*dest = source0[sample_index];
|
||||||
|
dest += pitch;
|
||||||
|
frac += fracstep;
|
||||||
|
}
|
||||||
|
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
int num_cores = thread->num_cores;
|
||||||
|
int skipped = thread->skipped_by_thread(args.DestY());
|
||||||
|
int count = skipped + thread->count_for_thread(args.DestY(), args.Count()) * num_cores;
|
||||||
|
|
||||||
// Find bands for top solid color, top fade, center textured, bottom fade, bottom solid color:
|
// Find bands for top solid color, top fade, center textured, bottom fade, bottom solid color:
|
||||||
int start_fade = 2; // How fast it should fade out
|
int start_fade = 2; // How fast it should fade out
|
||||||
int fade_length = (1 << (24 - start_fade));
|
int fade_length = (1 << (24 - start_fade));
|
||||||
|
@ -571,28 +589,11 @@ namespace swrenderer
|
||||||
start_fadebottom_y = clamp(start_fadebottom_y, 0, count);
|
start_fadebottom_y = clamp(start_fadebottom_y, 0, count);
|
||||||
end_fadebottom_y = clamp(end_fadebottom_y, 0, count);
|
end_fadebottom_y = clamp(end_fadebottom_y, 0, count);
|
||||||
|
|
||||||
int num_cores = thread->num_cores;
|
|
||||||
int skipped = thread->skipped_by_thread(args.DestY());
|
|
||||||
dest = thread->dest_for_thread(args.DestY(), pitch, dest);
|
dest = thread->dest_for_thread(args.DestY(), pitch, dest);
|
||||||
frac += fracstep * skipped;
|
frac += fracstep * skipped;
|
||||||
fracstep *= num_cores;
|
fracstep *= num_cores;
|
||||||
pitch *= num_cores;
|
pitch *= num_cores;
|
||||||
|
|
||||||
if (!args.FadeSky())
|
|
||||||
{
|
|
||||||
count = thread->count_for_thread(args.DestY(), count);
|
|
||||||
|
|
||||||
for (int index = 0; index < count; index++)
|
|
||||||
{
|
|
||||||
uint32_t sample_index = (((((uint32_t)frac) << 8) >> FRACBITS) * textureheight0) >> FRACBITS;
|
|
||||||
*dest = source0[sample_index];
|
|
||||||
dest += pitch;
|
|
||||||
frac += fracstep;
|
|
||||||
}
|
|
||||||
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
uint32_t solid_top = args.SolidTopColor();
|
uint32_t solid_top = args.SolidTopColor();
|
||||||
uint32_t solid_bottom = args.SolidBottomColor();
|
uint32_t solid_bottom = args.SolidBottomColor();
|
||||||
|
|
||||||
|
|
|
@ -39,7 +39,6 @@ namespace swrenderer
|
||||||
void Execute(DrawerThread *thread) override
|
void Execute(DrawerThread *thread) override
|
||||||
{
|
{
|
||||||
uint32_t *dest = (uint32_t *)args.Dest();
|
uint32_t *dest = (uint32_t *)args.Dest();
|
||||||
int count = args.Count();
|
|
||||||
int pitch = args.Viewport()->RenderTarget->GetPitch();
|
int pitch = args.Viewport()->RenderTarget->GetPitch();
|
||||||
const uint32_t *source0 = (const uint32_t *)args.FrontTexturePixels();
|
const uint32_t *source0 = (const uint32_t *)args.FrontTexturePixels();
|
||||||
int textureheight0 = args.FrontTextureHeight();
|
int textureheight0 = args.FrontTextureHeight();
|
||||||
|
@ -51,6 +50,25 @@ namespace swrenderer
|
||||||
uint32_t solid_bottom = args.SolidBottomColor();
|
uint32_t solid_bottom = args.SolidBottomColor();
|
||||||
bool fadeSky = args.FadeSky();
|
bool fadeSky = args.FadeSky();
|
||||||
|
|
||||||
|
if (!fadeSky)
|
||||||
|
{
|
||||||
|
int count = thread->count_for_thread(args.DestY(), args.Count());
|
||||||
|
|
||||||
|
for (int index = 0; index < count; index++)
|
||||||
|
{
|
||||||
|
uint32_t sample_index = (((((uint32_t)frac) << 8) >> FRACBITS) * textureheight0) >> FRACBITS;
|
||||||
|
*dest = source0[sample_index];
|
||||||
|
dest += pitch;
|
||||||
|
frac += fracstep;
|
||||||
|
}
|
||||||
|
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
int num_cores = thread->num_cores;
|
||||||
|
int skipped = thread->skipped_by_thread(args.DestY());
|
||||||
|
int count = skipped + thread->count_for_thread(args.DestY(), args.Count()) * num_cores;
|
||||||
|
|
||||||
// Find bands for top solid color, top fade, center textured, bottom fade, bottom solid color:
|
// Find bands for top solid color, top fade, center textured, bottom fade, bottom solid color:
|
||||||
int start_fade = 2; // How fast it should fade out
|
int start_fade = 2; // How fast it should fade out
|
||||||
int fade_length = (1 << (24 - start_fade));
|
int fade_length = (1 << (24 - start_fade));
|
||||||
|
@ -63,28 +81,11 @@ namespace swrenderer
|
||||||
start_fadebottom_y = clamp(start_fadebottom_y, 0, count);
|
start_fadebottom_y = clamp(start_fadebottom_y, 0, count);
|
||||||
end_fadebottom_y = clamp(end_fadebottom_y, 0, count);
|
end_fadebottom_y = clamp(end_fadebottom_y, 0, count);
|
||||||
|
|
||||||
int num_cores = thread->num_cores;
|
|
||||||
int skipped = thread->skipped_by_thread(args.DestY());
|
|
||||||
dest = thread->dest_for_thread(args.DestY(), pitch, dest);
|
dest = thread->dest_for_thread(args.DestY(), pitch, dest);
|
||||||
frac += fracstep * skipped;
|
frac += fracstep * skipped;
|
||||||
fracstep *= num_cores;
|
fracstep *= num_cores;
|
||||||
pitch *= num_cores;
|
pitch *= num_cores;
|
||||||
|
|
||||||
if (!fadeSky)
|
|
||||||
{
|
|
||||||
count = thread->count_for_thread(args.DestY(), count);
|
|
||||||
|
|
||||||
for (int index = 0; index < count; index++)
|
|
||||||
{
|
|
||||||
uint32_t sample_index = (((((uint32_t)frac) << 8) >> FRACBITS) * textureheight0) >> FRACBITS;
|
|
||||||
*dest = source0[sample_index];
|
|
||||||
dest += pitch;
|
|
||||||
frac += fracstep;
|
|
||||||
}
|
|
||||||
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
BgraColor solid_top_fill = solid_top;
|
BgraColor solid_top_fill = solid_top;
|
||||||
BgraColor solid_bottom_fill = solid_bottom;
|
BgraColor solid_bottom_fill = solid_bottom;
|
||||||
|
|
||||||
|
|
|
@ -38,7 +38,6 @@ namespace swrenderer
|
||||||
void Execute(DrawerThread *thread) override
|
void Execute(DrawerThread *thread) override
|
||||||
{
|
{
|
||||||
uint32_t *dest = (uint32_t *)args.Dest();
|
uint32_t *dest = (uint32_t *)args.Dest();
|
||||||
int count = args.Count();
|
|
||||||
int pitch = args.Viewport()->RenderTarget->GetPitch();
|
int pitch = args.Viewport()->RenderTarget->GetPitch();
|
||||||
const uint32_t *source0 = (const uint32_t *)args.FrontTexturePixels();
|
const uint32_t *source0 = (const uint32_t *)args.FrontTexturePixels();
|
||||||
int textureheight0 = args.FrontTextureHeight();
|
int textureheight0 = args.FrontTextureHeight();
|
||||||
|
@ -50,6 +49,25 @@ namespace swrenderer
|
||||||
uint32_t solid_bottom = args.SolidBottomColor();
|
uint32_t solid_bottom = args.SolidBottomColor();
|
||||||
bool fadeSky = args.FadeSky();
|
bool fadeSky = args.FadeSky();
|
||||||
|
|
||||||
|
if (!fadeSky)
|
||||||
|
{
|
||||||
|
int count = thread->count_for_thread(args.DestY(), args.Count());
|
||||||
|
|
||||||
|
for (int index = 0; index < count; index++)
|
||||||
|
{
|
||||||
|
uint32_t sample_index = (((((uint32_t)frac) << 8) >> FRACBITS) * textureheight0) >> FRACBITS;
|
||||||
|
*dest = source0[sample_index];
|
||||||
|
dest += pitch;
|
||||||
|
frac += fracstep;
|
||||||
|
}
|
||||||
|
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
int num_cores = thread->num_cores;
|
||||||
|
int skipped = thread->skipped_by_thread(args.DestY());
|
||||||
|
int count = skipped + thread->count_for_thread(args.DestY(), args.Count()) * num_cores;
|
||||||
|
|
||||||
// Find bands for top solid color, top fade, center textured, bottom fade, bottom solid color:
|
// Find bands for top solid color, top fade, center textured, bottom fade, bottom solid color:
|
||||||
int start_fade = 2; // How fast it should fade out
|
int start_fade = 2; // How fast it should fade out
|
||||||
int fade_length = (1 << (24 - start_fade));
|
int fade_length = (1 << (24 - start_fade));
|
||||||
|
@ -62,28 +80,11 @@ namespace swrenderer
|
||||||
start_fadebottom_y = clamp(start_fadebottom_y, 0, count);
|
start_fadebottom_y = clamp(start_fadebottom_y, 0, count);
|
||||||
end_fadebottom_y = clamp(end_fadebottom_y, 0, count);
|
end_fadebottom_y = clamp(end_fadebottom_y, 0, count);
|
||||||
|
|
||||||
int num_cores = thread->num_cores;
|
|
||||||
int skipped = thread->skipped_by_thread(args.DestY());
|
|
||||||
dest = thread->dest_for_thread(args.DestY(), pitch, dest);
|
dest = thread->dest_for_thread(args.DestY(), pitch, dest);
|
||||||
frac += fracstep * skipped;
|
frac += fracstep * skipped;
|
||||||
fracstep *= num_cores;
|
fracstep *= num_cores;
|
||||||
pitch *= num_cores;
|
pitch *= num_cores;
|
||||||
|
|
||||||
if (!fadeSky)
|
|
||||||
{
|
|
||||||
count = thread->count_for_thread(args.DestY(), count);
|
|
||||||
|
|
||||||
for (int index = 0; index < count; index++)
|
|
||||||
{
|
|
||||||
uint32_t sample_index = (((((uint32_t)frac) << 8) >> FRACBITS) * textureheight0) >> FRACBITS;
|
|
||||||
*dest = source0[sample_index];
|
|
||||||
dest += pitch;
|
|
||||||
frac += fracstep;
|
|
||||||
}
|
|
||||||
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
__m128i solid_top_fill = _mm_unpacklo_epi8(_mm_cvtsi32_si128(solid_top), _mm_setzero_si128());
|
__m128i solid_top_fill = _mm_unpacklo_epi8(_mm_cvtsi32_si128(solid_top), _mm_setzero_si128());
|
||||||
__m128i solid_bottom_fill = _mm_unpacklo_epi8(_mm_cvtsi32_si128(solid_bottom), _mm_setzero_si128());
|
__m128i solid_bottom_fill = _mm_unpacklo_epi8(_mm_cvtsi32_si128(solid_bottom), _mm_setzero_si128());
|
||||||
|
|
||||||
|
|
|
@ -140,6 +140,8 @@ void DrawerThreads::WorkerMain(DrawerThread *thread)
|
||||||
// Grab the commands
|
// Grab the commands
|
||||||
DrawerCommandQueuePtr list = active_commands[thread->current_queue];
|
DrawerCommandQueuePtr list = active_commands[thread->current_queue];
|
||||||
thread->current_queue++;
|
thread->current_queue++;
|
||||||
|
thread->numa_start_y = thread->numa_node * viewheight / thread->num_numa_nodes;
|
||||||
|
thread->numa_end_y = (thread->numa_node + 1) * viewheight / thread->num_numa_nodes;
|
||||||
start_lock.unlock();
|
start_lock.unlock();
|
||||||
|
|
||||||
// Do the work:
|
// Do the work:
|
||||||
|
@ -206,8 +208,6 @@ void DrawerThreads::StartThreads()
|
||||||
thread->num_cores = I_GetNumaNodeThreadCount(numaNode);
|
thread->num_cores = I_GetNumaNodeThreadCount(numaNode);
|
||||||
thread->numa_node = numaNode;
|
thread->numa_node = numaNode;
|
||||||
thread->num_numa_nodes = I_GetNumaNodeCount();
|
thread->num_numa_nodes = I_GetNumaNodeCount();
|
||||||
thread->numa_start_y = numaNode * viewheight / I_GetNumaNodeCount();
|
|
||||||
thread->numa_end_y = (numaNode + 1) * viewheight / I_GetNumaNodeCount();
|
|
||||||
thread->thread = std::thread([=]() { queue->WorkerMain(thread); });
|
thread->thread = std::thread([=]() { queue->WorkerMain(thread); });
|
||||||
I_SetThreadNumaNode(thread->thread, numaNode);
|
I_SetThreadNumaNode(thread->thread, numaNode);
|
||||||
}
|
}
|
||||||
|
@ -223,8 +223,6 @@ void DrawerThreads::StartThreads()
|
||||||
thread->num_cores = num_threads;
|
thread->num_cores = num_threads;
|
||||||
thread->numa_node = 0;
|
thread->numa_node = 0;
|
||||||
thread->num_numa_nodes = 1;
|
thread->num_numa_nodes = 1;
|
||||||
thread->numa_start_y = 0;
|
|
||||||
thread->numa_end_y = viewheight;
|
|
||||||
thread->thread = std::thread([=]() { queue->WorkerMain(thread); });
|
thread->thread = std::thread([=]() { queue->WorkerMain(thread); });
|
||||||
I_SetThreadNumaNode(thread->thread, 0);
|
I_SetThreadNumaNode(thread->thread, 0);
|
||||||
}
|
}
|
||||||
|
@ -288,7 +286,4 @@ void MemcpyCommand::Execute(DrawerThread *thread)
|
||||||
d += dstep;
|
d += dstep;
|
||||||
s += sstep;
|
s += sstep;
|
||||||
}
|
}
|
||||||
|
|
||||||
thread->numa_start_y = thread->numa_node * viewheight / thread->num_numa_nodes;
|
|
||||||
thread->numa_end_y = (thread->numa_node + 1) * viewheight / thread->num_numa_nodes;
|
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in a new issue