- fix MemcpyCommand not using the same lines for the threads as softpoly (visible as a race condition when screenblocks didn't start at top of screen)

This commit is contained in:
Magnus Norddahl 2018-12-20 04:27:30 +01:00
parent 0faa9111b9
commit 8c52f20373
4 changed files with 59 additions and 30 deletions

View file

@ -68,17 +68,16 @@ void PolyTriangleDrawer::SetViewport(const DrawerCommandQueuePtr &queue, int x,
isBgraRenderTarget = dest_bgra; isBgraRenderTarget = dest_bgra;
int offsetx = clamp(x, 0, dest_width); int offsetx = clamp(x, 0, dest_width);
int offsety = clamp(y, 0, dest_height);
int pixelsize = dest_bgra ? 4 : 1; int pixelsize = dest_bgra ? 4 : 1;
int viewport_x = x - offsetx; int viewport_x = x - offsetx;
int viewport_y = y - offsety; int viewport_y = y;
int viewport_width = width; int viewport_width = width;
int viewport_height = height; int viewport_height = height;
dest += (offsetx + offsety * dest_pitch) * pixelsize; dest += offsetx * pixelsize;
dest_width = clamp(viewport_x + viewport_width, 0, dest_width - offsetx); dest_width = clamp(viewport_x + viewport_width, 0, dest_width - offsetx);
dest_height = clamp(viewport_y + viewport_height, 0, dest_height - offsety); dest_height = clamp(viewport_y + viewport_height, 0, dest_height);
queue->Push<PolySetViewportCommand>(viewport_x, viewport_y, viewport_width, viewport_height, dest, dest_width, dest_height, dest_pitch, dest_bgra); queue->Push<PolySetViewportCommand>(viewport_x, viewport_y, viewport_width, viewport_height, dest, dest_width, dest_height, dest_pitch, dest_bgra);
} }
@ -127,13 +126,11 @@ void PolyTriangleThreadData::ClearStencil(uint8_t value)
int height = buffer->Height(); int height = buffer->Height();
uint8_t *data = buffer->Values(); uint8_t *data = buffer->Values();
int start_y = numa_node * height / num_numa_nodes; int skip = skipped_by_thread(0);
int end_y = (numa_node + 1) * height / num_numa_nodes; int count = count_for_thread(0, height);
int core_skip = (num_cores - (start_y - core) % num_cores) % num_cores;
start_y += core_skip;
data += start_y * width; data += skip * width;
for (int y = start_y; y < end_y; y += num_cores) for (int i = 0; i < count; i++)
{ {
memset(data, value, width); memset(data, value, width);
data += num_cores * width; data += num_cores * width;
@ -151,8 +148,6 @@ void PolyTriangleThreadData::SetViewport(int x, int y, int width, int height, ui
dest_height = new_dest_height; dest_height = new_dest_height;
dest_pitch = new_dest_pitch; dest_pitch = new_dest_pitch;
dest_bgra = new_dest_bgra; dest_bgra = new_dest_bgra;
numa_start_y = numa_node * screen->GetHeight() / num_numa_nodes;
numa_end_y = (numa_node + 1) * screen->GetHeight() / num_numa_nodes;
ccw = true; ccw = true;
weaponScene = false; weaponScene = false;
} }

View file

@ -71,7 +71,11 @@ public:
int numa_start_y; int numa_start_y;
int numa_end_y; int numa_end_y;
// The number of lines to skip to reach the first line to be rendered by this thread bool line_skipped_by_thread(int line)
{
return line < numa_start_y || line >= numa_end_y || line % num_cores != core;
}
int skipped_by_thread(int first_line) int skipped_by_thread(int first_line)
{ {
int clip_first_line = MAX(first_line, numa_start_y); int clip_first_line = MAX(first_line, numa_start_y);
@ -79,6 +83,13 @@ public:
return clip_first_line + core_skip - first_line; return clip_first_line + core_skip - first_line;
} }
int count_for_thread(int first_line, int count)
{
count = MIN(count, numa_end_y - first_line);
int c = (count - skipped_by_thread(first_line) + num_cores - 1) / num_cores;
return MAX(c, 0);
}
// Varyings // Varyings
float worldposX[MAXWIDTH]; float worldposX[MAXWIDTH];
float worldposY[MAXWIDTH]; float worldposY[MAXWIDTH];
@ -97,6 +108,8 @@ public:
uint8_t *dest = nullptr; uint8_t *dest = nullptr;
bool weaponScene = false; bool weaponScene = false;
int viewport_y = 0;
private: private:
ShadedTriVertex ShadeVertex(const PolyDrawArgs &drawargs, const void *vertices, int index); ShadedTriVertex ShadeVertex(const PolyDrawArgs &drawargs, const void *vertices, int index);
void DrawShadedTriangle(const ShadedTriVertex *vertices, bool ccw, TriDrawTriangleArgs *args); void DrawShadedTriangle(const ShadedTriVertex *vertices, bool ccw, TriDrawTriangleArgs *args);
@ -105,7 +118,6 @@ private:
static int ClipEdge(const ShadedTriVertex *verts, ShadedTriVertex *clippedvert); static int ClipEdge(const ShadedTriVertex *verts, ShadedTriVertex *clippedvert);
int viewport_x = 0; int viewport_x = 0;
int viewport_y = 0;
int viewport_width = 0; int viewport_width = 0;
int viewport_height = 0; int viewport_height = 0;
bool ccw = true; bool ccw = true;

View file

@ -60,73 +60,84 @@ void ScreenTriangle::Draw(const TriDrawTriangleArgs *args, PolyTriangleThreadDat
ShadedTriVertex *sortedVertices[3]; ShadedTriVertex *sortedVertices[3];
SortVertices(args, sortedVertices); SortVertices(args, sortedVertices);
int clipleft = 0;
int cliptop = MAX(thread->viewport_y, thread->numa_start_y);
int clipright = thread->dest_width; int clipright = thread->dest_width;
int cliptop = thread->numa_start_y;
int clipbottom = MIN(thread->dest_height, thread->numa_end_y); int clipbottom = MIN(thread->dest_height, thread->numa_end_y);
int topY = (int)(sortedVertices[0]->y + 0.5f); int topY = (int)(sortedVertices[0]->y + 0.5f);
int midY = (int)(sortedVertices[1]->y + 0.5f); int midY = (int)(sortedVertices[1]->y + 0.5f);
int bottomY = (int)(sortedVertices[2]->y + 0.5f); int bottomY = (int)(sortedVertices[2]->y + 0.5f);
topY = MAX(topY, 0); topY = MAX(topY, cliptop);
midY = clamp(midY, 0, clipbottom); midY = MIN(midY, clipbottom);
bottomY = MIN(bottomY, clipbottom); bottomY = MIN(bottomY, clipbottom);
if (topY >= bottomY) if (topY >= bottomY)
return; return;
topY += thread->skipped_by_thread(topY);
int num_cores = thread->num_cores;
// Find start/end X positions for each line covered by the triangle: // Find start/end X positions for each line covered by the triangle:
int16_t edges[MAXHEIGHT * 2]; int16_t edges[MAXHEIGHT * 2];
int y = topY;
float longDX = sortedVertices[2]->x - sortedVertices[0]->x; float longDX = sortedVertices[2]->x - sortedVertices[0]->x;
float longDY = sortedVertices[2]->y - sortedVertices[0]->y; float longDY = sortedVertices[2]->y - sortedVertices[0]->y;
float longStep = longDX / longDY; float longStep = longDX / longDY;
float longPos = sortedVertices[0]->x + longStep * (topY + 0.5f - sortedVertices[0]->y) + 0.5f; float longPos = sortedVertices[0]->x + longStep * (y + 0.5f - sortedVertices[0]->y) + 0.5f;
longStep *= num_cores;
if (topY < midY) if (y < midY)
{ {
float shortDX = sortedVertices[1]->x - sortedVertices[0]->x; float shortDX = sortedVertices[1]->x - sortedVertices[0]->x;
float shortDY = sortedVertices[1]->y - sortedVertices[0]->y; float shortDY = sortedVertices[1]->y - sortedVertices[0]->y;
float shortStep = shortDX / shortDY; float shortStep = shortDX / shortDY;
float shortPos = sortedVertices[0]->x + shortStep * (topY + 0.5f - sortedVertices[0]->y) + 0.5f; float shortPos = sortedVertices[0]->x + shortStep * (y + 0.5f - sortedVertices[0]->y) + 0.5f;
shortStep *= num_cores;
for (int y = topY; y < midY; y++) while (y < midY)
{ {
int x0 = (int)shortPos; int x0 = (int)shortPos;
int x1 = (int)longPos; int x1 = (int)longPos;
if (x1 < x0) std::swap(x0, x1); if (x1 < x0) std::swap(x0, x1);
x0 = clamp(x0, 0, clipright); x0 = clamp(x0, clipleft, clipright);
x1 = clamp(x1, 0, clipright); x1 = clamp(x1, clipleft, clipright);
edges[y << 1] = x0; edges[y << 1] = x0;
edges[(y << 1) + 1] = x1; edges[(y << 1) + 1] = x1;
shortPos += shortStep; shortPos += shortStep;
longPos += longStep; longPos += longStep;
y += num_cores;
} }
} }
if (midY < bottomY) if (y < bottomY)
{ {
float shortDX = sortedVertices[2]->x - sortedVertices[1]->x; float shortDX = sortedVertices[2]->x - sortedVertices[1]->x;
float shortDY = sortedVertices[2]->y - sortedVertices[1]->y; float shortDY = sortedVertices[2]->y - sortedVertices[1]->y;
float shortStep = shortDX / shortDY; float shortStep = shortDX / shortDY;
float shortPos = sortedVertices[1]->x + shortStep * (midY + 0.5f - sortedVertices[1]->y) + 0.5f; float shortPos = sortedVertices[1]->x + shortStep * (y + 0.5f - sortedVertices[1]->y) + 0.5f;
shortStep *= num_cores;
for (int y = midY; y < bottomY; y++) while (y < bottomY)
{ {
int x0 = (int)shortPos; int x0 = (int)shortPos;
int x1 = (int)longPos; int x1 = (int)longPos;
if (x1 < x0) std::swap(x0, x1); if (x1 < x0) std::swap(x0, x1);
x0 = clamp(x0, 0, clipright); x0 = clamp(x0, clipleft, clipright);
x1 = clamp(x1, 0, clipright); x1 = clamp(x1, clipleft, clipright);
edges[y << 1] = x0; edges[y << 1] = x0;
edges[(y << 1) + 1] = x1; edges[(y << 1) + 1] = x1;
shortPos += shortStep; shortPos += shortStep;
longPos += longStep; longPos += longStep;
y += num_cores;
} }
} }
@ -183,8 +194,14 @@ void DrawTriangle(const TriDrawTriangleArgs *args, PolyTriangleThreadData *threa
if (OptT::Flags & SWTRI_WriteStencil) if (OptT::Flags & SWTRI_WriteStencil)
stencilWriteValue = args->uniforms->StencilWriteValue(); stencilWriteValue = args->uniforms->StencilWriteValue();
float weaponWOffset;
if ((OptT::Flags & SWTRI_DepthTest) || (OptT::Flags & SWTRI_WriteDepth))
{
weaponWOffset = thread->weaponScene ? 1.0f : 0.0f;
}
int num_cores = thread->num_cores; int num_cores = thread->num_cores;
for (int y = topY + thread->skipped_by_thread(topY); y < bottomY; y += num_cores) for (int y = topY; y < bottomY; y += num_cores)
{ {
int x = edges[y << 1]; int x = edges[y << 1];
int xend = edges[(y << 1) + 1]; int xend = edges[(y << 1) + 1];
@ -198,7 +215,7 @@ void DrawTriangle(const TriDrawTriangleArgs *args, PolyTriangleThreadData *threa
float startX = x + (0.5f - v1X); float startX = x + (0.5f - v1X);
float startY = y + (0.5f - v1Y); float startY = y + (0.5f - v1Y);
posXW = v1W + stepXW * startX + args->gradientY.W * startY + (thread->weaponScene ? 1.0f : 0.0f); posXW = v1W + stepXW * startX + args->gradientY.W * startY + weaponWOffset;
} }
#ifndef NO_SSE #ifndef NO_SSE

View file

@ -142,6 +142,11 @@ void DrawerThreads::WorkerMain(DrawerThread *thread)
thread->current_queue++; thread->current_queue++;
thread->numa_start_y = thread->numa_node * screen->GetHeight() / thread->num_numa_nodes; thread->numa_start_y = thread->numa_node * screen->GetHeight() / thread->num_numa_nodes;
thread->numa_end_y = (thread->numa_node + 1) * screen->GetHeight() / thread->num_numa_nodes; thread->numa_end_y = (thread->numa_node + 1) * screen->GetHeight() / thread->num_numa_nodes;
if (thread->poly)
{
thread->poly->numa_start_y = thread->numa_start_y;
thread->poly->numa_end_y = thread->numa_end_y;
}
start_lock.unlock(); start_lock.unlock();
// Do the work: // Do the work: