mirror of
https://github.com/ZDoom/gzdoom.git
synced 2025-01-18 23:52:02 +00:00
Support for drawing in multiple passes
This commit is contained in:
parent
6c70eaea2f
commit
103a6baac5
3 changed files with 141 additions and 43 deletions
17
src/r_draw.h
17
src/r_draw.h
|
@ -458,25 +458,34 @@ public:
|
||||||
// Number of active threads
|
// Number of active threads
|
||||||
int num_cores = 1;
|
int num_cores = 1;
|
||||||
|
|
||||||
|
// Range of rows processed this pass
|
||||||
|
int pass_start_y = 0;
|
||||||
|
int pass_end_y = 300;
|
||||||
|
|
||||||
uint32_t dc_temp_rgbabuff_rgba[MAXHEIGHT * 4];
|
uint32_t dc_temp_rgbabuff_rgba[MAXHEIGHT * 4];
|
||||||
uint32_t *dc_temp_rgba;
|
uint32_t *dc_temp_rgba;
|
||||||
|
|
||||||
// Checks if a line is rendered by this thread
|
// Checks if a line is rendered by this thread
|
||||||
bool line_skipped_by_thread(int line)
|
bool line_skipped_by_thread(int line)
|
||||||
{
|
{
|
||||||
return line % num_cores != core;
|
return line < pass_start_y || line >= pass_end_y || line % num_cores != core;
|
||||||
}
|
}
|
||||||
|
|
||||||
// The number of lines to skip to reach the first line to be rendered by this thread
|
// The number of lines to skip to reach the first line to be rendered by this thread
|
||||||
int skipped_by_thread(int first_line)
|
int skipped_by_thread(int first_line)
|
||||||
{
|
{
|
||||||
return (num_cores - (first_line - core) % num_cores) % num_cores;
|
int pass_skip = MAX(pass_start_y - first_line, 0);
|
||||||
|
int core_skip = (num_cores - (first_line + pass_skip - core) % num_cores) % num_cores;
|
||||||
|
return pass_skip + core_skip;
|
||||||
}
|
}
|
||||||
|
|
||||||
// The number of lines to be rendered by this thread
|
// The number of lines to be rendered by this thread
|
||||||
int count_for_thread(int first_line, int count)
|
int count_for_thread(int first_line, int count)
|
||||||
{
|
{
|
||||||
return (count - skipped_by_thread(first_line) + num_cores - 1) / num_cores;
|
int lines_until_pass_end = MAX(pass_end_y - first_line, 0);
|
||||||
|
count = MIN(count, lines_until_pass_end);
|
||||||
|
int c = (count - skipped_by_thread(first_line) + num_cores - 1) / num_cores;
|
||||||
|
return MAX(c, 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Calculate the dest address for the first line to be rendered by this thread
|
// Calculate the dest address for the first line to be rendered by this thread
|
||||||
|
@ -522,6 +531,8 @@ class DrawerCommandQueue
|
||||||
|
|
||||||
bool no_threading = false;
|
bool no_threading = false;
|
||||||
DrawerThread single_core_thread;
|
DrawerThread single_core_thread;
|
||||||
|
int num_passes = 2;
|
||||||
|
int rows_in_pass = 540;
|
||||||
|
|
||||||
void StartThreads();
|
void StartThreads();
|
||||||
void StopThreads();
|
void StopThreads();
|
||||||
|
|
|
@ -104,12 +104,20 @@ void DrawerCommandQueue::Finish()
|
||||||
thread.core = 0;
|
thread.core = 0;
|
||||||
thread.num_cores = queue->threads.size() + 1;
|
thread.num_cores = queue->threads.size() + 1;
|
||||||
|
|
||||||
|
for (int pass = 0; pass < queue->num_passes; pass++)
|
||||||
|
{
|
||||||
|
thread.pass_start_y = pass * queue->rows_in_pass;
|
||||||
|
thread.pass_end_y = (pass + 1) * queue->rows_in_pass;
|
||||||
|
if (pass + 1 == queue->num_passes)
|
||||||
|
thread.pass_end_y = MAX(thread.pass_end_y, MAXHEIGHT);
|
||||||
|
|
||||||
size_t size = queue->active_commands.size();
|
size_t size = queue->active_commands.size();
|
||||||
for (size_t i = 0; i < size; i++)
|
for (size_t i = 0; i < size; i++)
|
||||||
{
|
{
|
||||||
auto &command = queue->active_commands[i];
|
auto &command = queue->active_commands[i];
|
||||||
command->Execute(&thread);
|
command->Execute(&thread);
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// Wait for everyone to finish:
|
// Wait for everyone to finish:
|
||||||
|
|
||||||
|
@ -156,12 +164,20 @@ void DrawerCommandQueue::StartThreads()
|
||||||
start_lock.unlock();
|
start_lock.unlock();
|
||||||
|
|
||||||
// Do the work:
|
// Do the work:
|
||||||
|
for (int pass = 0; pass < queue->num_passes; pass++)
|
||||||
|
{
|
||||||
|
thread->pass_start_y = pass * queue->rows_in_pass;
|
||||||
|
thread->pass_end_y = (pass + 1) * queue->rows_in_pass;
|
||||||
|
if (pass + 1 == queue->num_passes)
|
||||||
|
thread->pass_end_y = MAX(thread->pass_end_y, MAXHEIGHT);
|
||||||
|
|
||||||
size_t size = queue->active_commands.size();
|
size_t size = queue->active_commands.size();
|
||||||
for (size_t i = 0; i < size; i++)
|
for (size_t i = 0; i < size; i++)
|
||||||
{
|
{
|
||||||
auto &command = queue->active_commands[i];
|
auto &command = queue->active_commands[i];
|
||||||
command->Execute(thread);
|
command->Execute(thread);
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// Notify main thread that we finished:
|
// Notify main thread that we finished:
|
||||||
std::unique_lock<std::mutex> end_lock(queue->end_mutex);
|
std::unique_lock<std::mutex> end_lock(queue->end_mutex);
|
||||||
|
@ -1611,6 +1627,79 @@ public:
|
||||||
BYTE xshift = yshift - ds_xbits;
|
BYTE xshift = yshift - ds_xbits;
|
||||||
int xmask = ((1 << ds_xbits) - 1) << ds_ybits;
|
int xmask = ((1 << ds_xbits) - 1) << ds_ybits;
|
||||||
|
|
||||||
|
int sse_count = count / 4;
|
||||||
|
count -= sse_count * 4;
|
||||||
|
|
||||||
|
if (shade_constants.simple_shade)
|
||||||
|
{
|
||||||
|
SSE_SHADE_SIMPLE_INIT(light);
|
||||||
|
|
||||||
|
while (sse_count--)
|
||||||
|
{
|
||||||
|
spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift);
|
||||||
|
uint32_t p0 = source[spot];
|
||||||
|
xfrac += xstep;
|
||||||
|
yfrac += ystep;
|
||||||
|
|
||||||
|
spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift);
|
||||||
|
uint32_t p1 = source[spot];
|
||||||
|
xfrac += xstep;
|
||||||
|
yfrac += ystep;
|
||||||
|
|
||||||
|
spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift);
|
||||||
|
uint32_t p2 = source[spot];
|
||||||
|
xfrac += xstep;
|
||||||
|
yfrac += ystep;
|
||||||
|
|
||||||
|
spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift);
|
||||||
|
uint32_t p3 = source[spot];
|
||||||
|
xfrac += xstep;
|
||||||
|
yfrac += ystep;
|
||||||
|
|
||||||
|
// Lookup pixel from flat texture tile
|
||||||
|
__m128i fg = _mm_set_epi32(p3, p2, p1, p0);
|
||||||
|
SSE_SHADE_SIMPLE(fg);
|
||||||
|
_mm_storeu_si128((__m128i*)dest, fg);
|
||||||
|
dest += 4;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
SSE_SHADE_INIT(light, shade_constants);
|
||||||
|
|
||||||
|
while (sse_count--)
|
||||||
|
{
|
||||||
|
spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift);
|
||||||
|
uint32_t p0 = source[spot];
|
||||||
|
xfrac += xstep;
|
||||||
|
yfrac += ystep;
|
||||||
|
|
||||||
|
spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift);
|
||||||
|
uint32_t p1 = source[spot];
|
||||||
|
xfrac += xstep;
|
||||||
|
yfrac += ystep;
|
||||||
|
|
||||||
|
spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift);
|
||||||
|
uint32_t p2 = source[spot];
|
||||||
|
xfrac += xstep;
|
||||||
|
yfrac += ystep;
|
||||||
|
|
||||||
|
spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift);
|
||||||
|
uint32_t p3 = source[spot];
|
||||||
|
xfrac += xstep;
|
||||||
|
yfrac += ystep;
|
||||||
|
|
||||||
|
// Lookup pixel from flat texture tile
|
||||||
|
__m128i fg = _mm_set_epi32(p3, p2, p1, p0);
|
||||||
|
SSE_SHADE(fg, shade_constants);
|
||||||
|
_mm_storeu_si128((__m128i*)dest, fg);
|
||||||
|
dest += 4;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (count == 0)
|
||||||
|
return;
|
||||||
|
|
||||||
do
|
do
|
||||||
{
|
{
|
||||||
// Current texture index in u,v.
|
// Current texture index in u,v.
|
||||||
|
|
|
@ -1528,7 +1528,6 @@ public:
|
||||||
fracstep = dc_iscale;
|
fracstep = dc_iscale;
|
||||||
frac = dc_texturefrac;
|
frac = dc_texturefrac;
|
||||||
|
|
||||||
{
|
|
||||||
const BYTE *source = dc_source;
|
const BYTE *source = dc_source;
|
||||||
|
|
||||||
if (count & 1) {
|
if (count & 1) {
|
||||||
|
@ -1562,7 +1561,6 @@ public:
|
||||||
dest += 32;
|
dest += 32;
|
||||||
} while (--count);
|
} while (--count);
|
||||||
}
|
}
|
||||||
}
|
|
||||||
};
|
};
|
||||||
|
|
||||||
class FillColumnHorizRGBACommand : public DrawerCommand
|
class FillColumnHorizRGBACommand : public DrawerCommand
|
||||||
|
|
Loading…
Reference in a new issue