From 8c52f20373aecbab19bdd95f5f27a33b0af5f865 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Thu, 20 Dec 2018 04:27:30 +0100 Subject: [PATCH] - fix MemcpyCommand not using the same lines for the threads as softpoly (visible as a race condition when screenblocks didn't start at top of screen) --- src/polyrenderer/drawers/poly_triangle.cpp | 19 +++----- src/polyrenderer/drawers/poly_triangle.h | 16 ++++++- src/polyrenderer/drawers/screen_triangle.cpp | 49 +++++++++++++------- src/swrenderer/drawers/r_thread.cpp | 5 ++ 4 files changed, 59 insertions(+), 30 deletions(-) diff --git a/src/polyrenderer/drawers/poly_triangle.cpp b/src/polyrenderer/drawers/poly_triangle.cpp index 1949b06e3..fab366296 100644 --- a/src/polyrenderer/drawers/poly_triangle.cpp +++ b/src/polyrenderer/drawers/poly_triangle.cpp @@ -68,17 +68,16 @@ void PolyTriangleDrawer::SetViewport(const DrawerCommandQueuePtr &queue, int x, isBgraRenderTarget = dest_bgra; int offsetx = clamp(x, 0, dest_width); - int offsety = clamp(y, 0, dest_height); int pixelsize = dest_bgra ? 4 : 1; int viewport_x = x - offsetx; - int viewport_y = y - offsety; + int viewport_y = y; int viewport_width = width; int viewport_height = height; - dest += (offsetx + offsety * dest_pitch) * pixelsize; + dest += offsetx * pixelsize; dest_width = clamp(viewport_x + viewport_width, 0, dest_width - offsetx); - dest_height = clamp(viewport_y + viewport_height, 0, dest_height - offsety); + dest_height = clamp(viewport_y + viewport_height, 0, dest_height); queue->Push(viewport_x, viewport_y, viewport_width, viewport_height, dest, dest_width, dest_height, dest_pitch, dest_bgra); } @@ -127,13 +126,11 @@ void PolyTriangleThreadData::ClearStencil(uint8_t value) int height = buffer->Height(); uint8_t *data = buffer->Values(); - int start_y = numa_node * height / num_numa_nodes; - int end_y = (numa_node + 1) * height / num_numa_nodes; - int core_skip = (num_cores - (start_y - core) % num_cores) % num_cores; - start_y += core_skip; + int skip = skipped_by_thread(0); + int count = count_for_thread(0, height); - data += start_y * width; - for (int y = start_y; y < end_y; y += num_cores) + data += skip * width; + for (int i = 0; i < count; i++) { memset(data, value, width); data += num_cores * width; @@ -151,8 +148,6 @@ void PolyTriangleThreadData::SetViewport(int x, int y, int width, int height, ui dest_height = new_dest_height; dest_pitch = new_dest_pitch; dest_bgra = new_dest_bgra; - numa_start_y = numa_node * screen->GetHeight() / num_numa_nodes; - numa_end_y = (numa_node + 1) * screen->GetHeight() / num_numa_nodes; ccw = true; weaponScene = false; } diff --git a/src/polyrenderer/drawers/poly_triangle.h b/src/polyrenderer/drawers/poly_triangle.h index fadc799d6..4504a3950 100644 --- a/src/polyrenderer/drawers/poly_triangle.h +++ b/src/polyrenderer/drawers/poly_triangle.h @@ -71,7 +71,11 @@ public: int numa_start_y; int numa_end_y; - // The number of lines to skip to reach the first line to be rendered by this thread + bool line_skipped_by_thread(int line) + { + return line < numa_start_y || line >= numa_end_y || line % num_cores != core; + } + int skipped_by_thread(int first_line) { int clip_first_line = MAX(first_line, numa_start_y); @@ -79,6 +83,13 @@ public: return clip_first_line + core_skip - first_line; } + int count_for_thread(int first_line, int count) + { + count = MIN(count, numa_end_y - first_line); + int c = (count - skipped_by_thread(first_line) + num_cores - 1) / num_cores; + return MAX(c, 0); + } + // Varyings float worldposX[MAXWIDTH]; float worldposY[MAXWIDTH]; @@ -97,6 +108,8 @@ public: uint8_t *dest = nullptr; bool weaponScene = false; + int viewport_y = 0; + private: ShadedTriVertex ShadeVertex(const PolyDrawArgs &drawargs, const void *vertices, int index); void DrawShadedTriangle(const ShadedTriVertex *vertices, bool ccw, TriDrawTriangleArgs *args); @@ -105,7 +118,6 @@ private: static int ClipEdge(const ShadedTriVertex *verts, ShadedTriVertex *clippedvert); int viewport_x = 0; - int viewport_y = 0; int viewport_width = 0; int viewport_height = 0; bool ccw = true; diff --git a/src/polyrenderer/drawers/screen_triangle.cpp b/src/polyrenderer/drawers/screen_triangle.cpp index 551753c20..151e86a9f 100644 --- a/src/polyrenderer/drawers/screen_triangle.cpp +++ b/src/polyrenderer/drawers/screen_triangle.cpp @@ -60,73 +60,84 @@ void ScreenTriangle::Draw(const TriDrawTriangleArgs *args, PolyTriangleThreadDat ShadedTriVertex *sortedVertices[3]; SortVertices(args, sortedVertices); + int clipleft = 0; + int cliptop = MAX(thread->viewport_y, thread->numa_start_y); int clipright = thread->dest_width; - int cliptop = thread->numa_start_y; int clipbottom = MIN(thread->dest_height, thread->numa_end_y); int topY = (int)(sortedVertices[0]->y + 0.5f); int midY = (int)(sortedVertices[1]->y + 0.5f); int bottomY = (int)(sortedVertices[2]->y + 0.5f); - topY = MAX(topY, 0); - midY = clamp(midY, 0, clipbottom); + topY = MAX(topY, cliptop); + midY = MIN(midY, clipbottom); bottomY = MIN(bottomY, clipbottom); if (topY >= bottomY) return; + topY += thread->skipped_by_thread(topY); + int num_cores = thread->num_cores; + // Find start/end X positions for each line covered by the triangle: int16_t edges[MAXHEIGHT * 2]; + int y = topY; + float longDX = sortedVertices[2]->x - sortedVertices[0]->x; float longDY = sortedVertices[2]->y - sortedVertices[0]->y; float longStep = longDX / longDY; - float longPos = sortedVertices[0]->x + longStep * (topY + 0.5f - sortedVertices[0]->y) + 0.5f; + float longPos = sortedVertices[0]->x + longStep * (y + 0.5f - sortedVertices[0]->y) + 0.5f; + longStep *= num_cores; - if (topY < midY) + if (y < midY) { float shortDX = sortedVertices[1]->x - sortedVertices[0]->x; float shortDY = sortedVertices[1]->y - sortedVertices[0]->y; float shortStep = shortDX / shortDY; - float shortPos = sortedVertices[0]->x + shortStep * (topY + 0.5f - sortedVertices[0]->y) + 0.5f; + float shortPos = sortedVertices[0]->x + shortStep * (y + 0.5f - sortedVertices[0]->y) + 0.5f; + shortStep *= num_cores; - for (int y = topY; y < midY; y++) + while (y < midY) { int x0 = (int)shortPos; int x1 = (int)longPos; if (x1 < x0) std::swap(x0, x1); - x0 = clamp(x0, 0, clipright); - x1 = clamp(x1, 0, clipright); + x0 = clamp(x0, clipleft, clipright); + x1 = clamp(x1, clipleft, clipright); edges[y << 1] = x0; edges[(y << 1) + 1] = x1; shortPos += shortStep; longPos += longStep; + y += num_cores; } } - if (midY < bottomY) + if (y < bottomY) { float shortDX = sortedVertices[2]->x - sortedVertices[1]->x; float shortDY = sortedVertices[2]->y - sortedVertices[1]->y; float shortStep = shortDX / shortDY; - float shortPos = sortedVertices[1]->x + shortStep * (midY + 0.5f - sortedVertices[1]->y) + 0.5f; + float shortPos = sortedVertices[1]->x + shortStep * (y + 0.5f - sortedVertices[1]->y) + 0.5f; + shortStep *= num_cores; - for (int y = midY; y < bottomY; y++) + while (y < bottomY) { int x0 = (int)shortPos; int x1 = (int)longPos; if (x1 < x0) std::swap(x0, x1); - x0 = clamp(x0, 0, clipright); - x1 = clamp(x1, 0, clipright); + x0 = clamp(x0, clipleft, clipright); + x1 = clamp(x1, clipleft, clipright); edges[y << 1] = x0; edges[(y << 1) + 1] = x1; shortPos += shortStep; longPos += longStep; + y += num_cores; } } @@ -183,8 +194,14 @@ void DrawTriangle(const TriDrawTriangleArgs *args, PolyTriangleThreadData *threa if (OptT::Flags & SWTRI_WriteStencil) stencilWriteValue = args->uniforms->StencilWriteValue(); + float weaponWOffset; + if ((OptT::Flags & SWTRI_DepthTest) || (OptT::Flags & SWTRI_WriteDepth)) + { + weaponWOffset = thread->weaponScene ? 1.0f : 0.0f; + } + int num_cores = thread->num_cores; - for (int y = topY + thread->skipped_by_thread(topY); y < bottomY; y += num_cores) + for (int y = topY; y < bottomY; y += num_cores) { int x = edges[y << 1]; int xend = edges[(y << 1) + 1]; @@ -198,7 +215,7 @@ void DrawTriangle(const TriDrawTriangleArgs *args, PolyTriangleThreadData *threa float startX = x + (0.5f - v1X); float startY = y + (0.5f - v1Y); - posXW = v1W + stepXW * startX + args->gradientY.W * startY + (thread->weaponScene ? 1.0f : 0.0f); + posXW = v1W + stepXW * startX + args->gradientY.W * startY + weaponWOffset; } #ifndef NO_SSE diff --git a/src/swrenderer/drawers/r_thread.cpp b/src/swrenderer/drawers/r_thread.cpp index 5055e259b..e8b6eeada 100644 --- a/src/swrenderer/drawers/r_thread.cpp +++ b/src/swrenderer/drawers/r_thread.cpp @@ -142,6 +142,11 @@ void DrawerThreads::WorkerMain(DrawerThread *thread) thread->current_queue++; thread->numa_start_y = thread->numa_node * screen->GetHeight() / thread->num_numa_nodes; thread->numa_end_y = (thread->numa_node + 1) * screen->GetHeight() / thread->num_numa_nodes; + if (thread->poly) + { + thread->poly->numa_start_y = thread->numa_start_y; + thread->poly->numa_end_y = thread->numa_end_y; + } start_lock.unlock(); // Do the work: