mirror of
https://github.com/ZDoom/gzdoom.git
synced 2024-11-27 14:22:13 +00:00
- fix MemcpyCommand not using the same lines for the threads as softpoly (visible as a race condition when screenblocks didn't start at top of screen)
This commit is contained in:
parent
0faa9111b9
commit
8c52f20373
4 changed files with 59 additions and 30 deletions
|
@ -68,17 +68,16 @@ void PolyTriangleDrawer::SetViewport(const DrawerCommandQueuePtr &queue, int x,
|
||||||
isBgraRenderTarget = dest_bgra;
|
isBgraRenderTarget = dest_bgra;
|
||||||
|
|
||||||
int offsetx = clamp(x, 0, dest_width);
|
int offsetx = clamp(x, 0, dest_width);
|
||||||
int offsety = clamp(y, 0, dest_height);
|
|
||||||
int pixelsize = dest_bgra ? 4 : 1;
|
int pixelsize = dest_bgra ? 4 : 1;
|
||||||
|
|
||||||
int viewport_x = x - offsetx;
|
int viewport_x = x - offsetx;
|
||||||
int viewport_y = y - offsety;
|
int viewport_y = y;
|
||||||
int viewport_width = width;
|
int viewport_width = width;
|
||||||
int viewport_height = height;
|
int viewport_height = height;
|
||||||
|
|
||||||
dest += (offsetx + offsety * dest_pitch) * pixelsize;
|
dest += offsetx * pixelsize;
|
||||||
dest_width = clamp(viewport_x + viewport_width, 0, dest_width - offsetx);
|
dest_width = clamp(viewport_x + viewport_width, 0, dest_width - offsetx);
|
||||||
dest_height = clamp(viewport_y + viewport_height, 0, dest_height - offsety);
|
dest_height = clamp(viewport_y + viewport_height, 0, dest_height);
|
||||||
|
|
||||||
queue->Push<PolySetViewportCommand>(viewport_x, viewport_y, viewport_width, viewport_height, dest, dest_width, dest_height, dest_pitch, dest_bgra);
|
queue->Push<PolySetViewportCommand>(viewport_x, viewport_y, viewport_width, viewport_height, dest, dest_width, dest_height, dest_pitch, dest_bgra);
|
||||||
}
|
}
|
||||||
|
@ -127,13 +126,11 @@ void PolyTriangleThreadData::ClearStencil(uint8_t value)
|
||||||
int height = buffer->Height();
|
int height = buffer->Height();
|
||||||
uint8_t *data = buffer->Values();
|
uint8_t *data = buffer->Values();
|
||||||
|
|
||||||
int start_y = numa_node * height / num_numa_nodes;
|
int skip = skipped_by_thread(0);
|
||||||
int end_y = (numa_node + 1) * height / num_numa_nodes;
|
int count = count_for_thread(0, height);
|
||||||
int core_skip = (num_cores - (start_y - core) % num_cores) % num_cores;
|
|
||||||
start_y += core_skip;
|
|
||||||
|
|
||||||
data += start_y * width;
|
data += skip * width;
|
||||||
for (int y = start_y; y < end_y; y += num_cores)
|
for (int i = 0; i < count; i++)
|
||||||
{
|
{
|
||||||
memset(data, value, width);
|
memset(data, value, width);
|
||||||
data += num_cores * width;
|
data += num_cores * width;
|
||||||
|
@ -151,8 +148,6 @@ void PolyTriangleThreadData::SetViewport(int x, int y, int width, int height, ui
|
||||||
dest_height = new_dest_height;
|
dest_height = new_dest_height;
|
||||||
dest_pitch = new_dest_pitch;
|
dest_pitch = new_dest_pitch;
|
||||||
dest_bgra = new_dest_bgra;
|
dest_bgra = new_dest_bgra;
|
||||||
numa_start_y = numa_node * screen->GetHeight() / num_numa_nodes;
|
|
||||||
numa_end_y = (numa_node + 1) * screen->GetHeight() / num_numa_nodes;
|
|
||||||
ccw = true;
|
ccw = true;
|
||||||
weaponScene = false;
|
weaponScene = false;
|
||||||
}
|
}
|
||||||
|
|
|
@ -71,7 +71,11 @@ public:
|
||||||
int numa_start_y;
|
int numa_start_y;
|
||||||
int numa_end_y;
|
int numa_end_y;
|
||||||
|
|
||||||
// The number of lines to skip to reach the first line to be rendered by this thread
|
bool line_skipped_by_thread(int line)
|
||||||
|
{
|
||||||
|
return line < numa_start_y || line >= numa_end_y || line % num_cores != core;
|
||||||
|
}
|
||||||
|
|
||||||
int skipped_by_thread(int first_line)
|
int skipped_by_thread(int first_line)
|
||||||
{
|
{
|
||||||
int clip_first_line = MAX(first_line, numa_start_y);
|
int clip_first_line = MAX(first_line, numa_start_y);
|
||||||
|
@ -79,6 +83,13 @@ public:
|
||||||
return clip_first_line + core_skip - first_line;
|
return clip_first_line + core_skip - first_line;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
int count_for_thread(int first_line, int count)
|
||||||
|
{
|
||||||
|
count = MIN(count, numa_end_y - first_line);
|
||||||
|
int c = (count - skipped_by_thread(first_line) + num_cores - 1) / num_cores;
|
||||||
|
return MAX(c, 0);
|
||||||
|
}
|
||||||
|
|
||||||
// Varyings
|
// Varyings
|
||||||
float worldposX[MAXWIDTH];
|
float worldposX[MAXWIDTH];
|
||||||
float worldposY[MAXWIDTH];
|
float worldposY[MAXWIDTH];
|
||||||
|
@ -97,6 +108,8 @@ public:
|
||||||
uint8_t *dest = nullptr;
|
uint8_t *dest = nullptr;
|
||||||
bool weaponScene = false;
|
bool weaponScene = false;
|
||||||
|
|
||||||
|
int viewport_y = 0;
|
||||||
|
|
||||||
private:
|
private:
|
||||||
ShadedTriVertex ShadeVertex(const PolyDrawArgs &drawargs, const void *vertices, int index);
|
ShadedTriVertex ShadeVertex(const PolyDrawArgs &drawargs, const void *vertices, int index);
|
||||||
void DrawShadedTriangle(const ShadedTriVertex *vertices, bool ccw, TriDrawTriangleArgs *args);
|
void DrawShadedTriangle(const ShadedTriVertex *vertices, bool ccw, TriDrawTriangleArgs *args);
|
||||||
|
@ -105,7 +118,6 @@ private:
|
||||||
static int ClipEdge(const ShadedTriVertex *verts, ShadedTriVertex *clippedvert);
|
static int ClipEdge(const ShadedTriVertex *verts, ShadedTriVertex *clippedvert);
|
||||||
|
|
||||||
int viewport_x = 0;
|
int viewport_x = 0;
|
||||||
int viewport_y = 0;
|
|
||||||
int viewport_width = 0;
|
int viewport_width = 0;
|
||||||
int viewport_height = 0;
|
int viewport_height = 0;
|
||||||
bool ccw = true;
|
bool ccw = true;
|
||||||
|
|
|
@ -60,73 +60,84 @@ void ScreenTriangle::Draw(const TriDrawTriangleArgs *args, PolyTriangleThreadDat
|
||||||
ShadedTriVertex *sortedVertices[3];
|
ShadedTriVertex *sortedVertices[3];
|
||||||
SortVertices(args, sortedVertices);
|
SortVertices(args, sortedVertices);
|
||||||
|
|
||||||
|
int clipleft = 0;
|
||||||
|
int cliptop = MAX(thread->viewport_y, thread->numa_start_y);
|
||||||
int clipright = thread->dest_width;
|
int clipright = thread->dest_width;
|
||||||
int cliptop = thread->numa_start_y;
|
|
||||||
int clipbottom = MIN(thread->dest_height, thread->numa_end_y);
|
int clipbottom = MIN(thread->dest_height, thread->numa_end_y);
|
||||||
|
|
||||||
int topY = (int)(sortedVertices[0]->y + 0.5f);
|
int topY = (int)(sortedVertices[0]->y + 0.5f);
|
||||||
int midY = (int)(sortedVertices[1]->y + 0.5f);
|
int midY = (int)(sortedVertices[1]->y + 0.5f);
|
||||||
int bottomY = (int)(sortedVertices[2]->y + 0.5f);
|
int bottomY = (int)(sortedVertices[2]->y + 0.5f);
|
||||||
|
|
||||||
topY = MAX(topY, 0);
|
topY = MAX(topY, cliptop);
|
||||||
midY = clamp(midY, 0, clipbottom);
|
midY = MIN(midY, clipbottom);
|
||||||
bottomY = MIN(bottomY, clipbottom);
|
bottomY = MIN(bottomY, clipbottom);
|
||||||
|
|
||||||
if (topY >= bottomY)
|
if (topY >= bottomY)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
|
topY += thread->skipped_by_thread(topY);
|
||||||
|
int num_cores = thread->num_cores;
|
||||||
|
|
||||||
// Find start/end X positions for each line covered by the triangle:
|
// Find start/end X positions for each line covered by the triangle:
|
||||||
|
|
||||||
int16_t edges[MAXHEIGHT * 2];
|
int16_t edges[MAXHEIGHT * 2];
|
||||||
|
|
||||||
|
int y = topY;
|
||||||
|
|
||||||
float longDX = sortedVertices[2]->x - sortedVertices[0]->x;
|
float longDX = sortedVertices[2]->x - sortedVertices[0]->x;
|
||||||
float longDY = sortedVertices[2]->y - sortedVertices[0]->y;
|
float longDY = sortedVertices[2]->y - sortedVertices[0]->y;
|
||||||
float longStep = longDX / longDY;
|
float longStep = longDX / longDY;
|
||||||
float longPos = sortedVertices[0]->x + longStep * (topY + 0.5f - sortedVertices[0]->y) + 0.5f;
|
float longPos = sortedVertices[0]->x + longStep * (y + 0.5f - sortedVertices[0]->y) + 0.5f;
|
||||||
|
longStep *= num_cores;
|
||||||
|
|
||||||
if (topY < midY)
|
if (y < midY)
|
||||||
{
|
{
|
||||||
float shortDX = sortedVertices[1]->x - sortedVertices[0]->x;
|
float shortDX = sortedVertices[1]->x - sortedVertices[0]->x;
|
||||||
float shortDY = sortedVertices[1]->y - sortedVertices[0]->y;
|
float shortDY = sortedVertices[1]->y - sortedVertices[0]->y;
|
||||||
float shortStep = shortDX / shortDY;
|
float shortStep = shortDX / shortDY;
|
||||||
float shortPos = sortedVertices[0]->x + shortStep * (topY + 0.5f - sortedVertices[0]->y) + 0.5f;
|
float shortPos = sortedVertices[0]->x + shortStep * (y + 0.5f - sortedVertices[0]->y) + 0.5f;
|
||||||
|
shortStep *= num_cores;
|
||||||
|
|
||||||
for (int y = topY; y < midY; y++)
|
while (y < midY)
|
||||||
{
|
{
|
||||||
int x0 = (int)shortPos;
|
int x0 = (int)shortPos;
|
||||||
int x1 = (int)longPos;
|
int x1 = (int)longPos;
|
||||||
if (x1 < x0) std::swap(x0, x1);
|
if (x1 < x0) std::swap(x0, x1);
|
||||||
x0 = clamp(x0, 0, clipright);
|
x0 = clamp(x0, clipleft, clipright);
|
||||||
x1 = clamp(x1, 0, clipright);
|
x1 = clamp(x1, clipleft, clipright);
|
||||||
|
|
||||||
edges[y << 1] = x0;
|
edges[y << 1] = x0;
|
||||||
edges[(y << 1) + 1] = x1;
|
edges[(y << 1) + 1] = x1;
|
||||||
|
|
||||||
shortPos += shortStep;
|
shortPos += shortStep;
|
||||||
longPos += longStep;
|
longPos += longStep;
|
||||||
|
y += num_cores;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (midY < bottomY)
|
if (y < bottomY)
|
||||||
{
|
{
|
||||||
float shortDX = sortedVertices[2]->x - sortedVertices[1]->x;
|
float shortDX = sortedVertices[2]->x - sortedVertices[1]->x;
|
||||||
float shortDY = sortedVertices[2]->y - sortedVertices[1]->y;
|
float shortDY = sortedVertices[2]->y - sortedVertices[1]->y;
|
||||||
float shortStep = shortDX / shortDY;
|
float shortStep = shortDX / shortDY;
|
||||||
float shortPos = sortedVertices[1]->x + shortStep * (midY + 0.5f - sortedVertices[1]->y) + 0.5f;
|
float shortPos = sortedVertices[1]->x + shortStep * (y + 0.5f - sortedVertices[1]->y) + 0.5f;
|
||||||
|
shortStep *= num_cores;
|
||||||
|
|
||||||
for (int y = midY; y < bottomY; y++)
|
while (y < bottomY)
|
||||||
{
|
{
|
||||||
int x0 = (int)shortPos;
|
int x0 = (int)shortPos;
|
||||||
int x1 = (int)longPos;
|
int x1 = (int)longPos;
|
||||||
if (x1 < x0) std::swap(x0, x1);
|
if (x1 < x0) std::swap(x0, x1);
|
||||||
x0 = clamp(x0, 0, clipright);
|
x0 = clamp(x0, clipleft, clipright);
|
||||||
x1 = clamp(x1, 0, clipright);
|
x1 = clamp(x1, clipleft, clipright);
|
||||||
|
|
||||||
edges[y << 1] = x0;
|
edges[y << 1] = x0;
|
||||||
edges[(y << 1) + 1] = x1;
|
edges[(y << 1) + 1] = x1;
|
||||||
|
|
||||||
shortPos += shortStep;
|
shortPos += shortStep;
|
||||||
longPos += longStep;
|
longPos += longStep;
|
||||||
|
y += num_cores;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -183,8 +194,14 @@ void DrawTriangle(const TriDrawTriangleArgs *args, PolyTriangleThreadData *threa
|
||||||
if (OptT::Flags & SWTRI_WriteStencil)
|
if (OptT::Flags & SWTRI_WriteStencil)
|
||||||
stencilWriteValue = args->uniforms->StencilWriteValue();
|
stencilWriteValue = args->uniforms->StencilWriteValue();
|
||||||
|
|
||||||
|
float weaponWOffset;
|
||||||
|
if ((OptT::Flags & SWTRI_DepthTest) || (OptT::Flags & SWTRI_WriteDepth))
|
||||||
|
{
|
||||||
|
weaponWOffset = thread->weaponScene ? 1.0f : 0.0f;
|
||||||
|
}
|
||||||
|
|
||||||
int num_cores = thread->num_cores;
|
int num_cores = thread->num_cores;
|
||||||
for (int y = topY + thread->skipped_by_thread(topY); y < bottomY; y += num_cores)
|
for (int y = topY; y < bottomY; y += num_cores)
|
||||||
{
|
{
|
||||||
int x = edges[y << 1];
|
int x = edges[y << 1];
|
||||||
int xend = edges[(y << 1) + 1];
|
int xend = edges[(y << 1) + 1];
|
||||||
|
@ -198,7 +215,7 @@ void DrawTriangle(const TriDrawTriangleArgs *args, PolyTriangleThreadData *threa
|
||||||
|
|
||||||
float startX = x + (0.5f - v1X);
|
float startX = x + (0.5f - v1X);
|
||||||
float startY = y + (0.5f - v1Y);
|
float startY = y + (0.5f - v1Y);
|
||||||
posXW = v1W + stepXW * startX + args->gradientY.W * startY + (thread->weaponScene ? 1.0f : 0.0f);
|
posXW = v1W + stepXW * startX + args->gradientY.W * startY + weaponWOffset;
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifndef NO_SSE
|
#ifndef NO_SSE
|
||||||
|
|
|
@ -142,6 +142,11 @@ void DrawerThreads::WorkerMain(DrawerThread *thread)
|
||||||
thread->current_queue++;
|
thread->current_queue++;
|
||||||
thread->numa_start_y = thread->numa_node * screen->GetHeight() / thread->num_numa_nodes;
|
thread->numa_start_y = thread->numa_node * screen->GetHeight() / thread->num_numa_nodes;
|
||||||
thread->numa_end_y = (thread->numa_node + 1) * screen->GetHeight() / thread->num_numa_nodes;
|
thread->numa_end_y = (thread->numa_node + 1) * screen->GetHeight() / thread->num_numa_nodes;
|
||||||
|
if (thread->poly)
|
||||||
|
{
|
||||||
|
thread->poly->numa_start_y = thread->numa_start_y;
|
||||||
|
thread->poly->numa_end_y = thread->numa_end_y;
|
||||||
|
}
|
||||||
start_lock.unlock();
|
start_lock.unlock();
|
||||||
|
|
||||||
// Do the work:
|
// Do the work:
|
||||||
|
|
Loading…
Reference in a new issue