Support for drawing in multiple passes

This commit is contained in:
Magnus Norddahl 2016-06-10 15:57:31 +02:00
parent 6c70eaea2f
commit 103a6baac5
3 changed files with 141 additions and 43 deletions

View file

@ -458,25 +458,34 @@ public:
// Number of active threads // Number of active threads
int num_cores = 1; int num_cores = 1;
// Range of rows processed this pass
int pass_start_y = 0;
int pass_end_y = 300;
uint32_t dc_temp_rgbabuff_rgba[MAXHEIGHT * 4]; uint32_t dc_temp_rgbabuff_rgba[MAXHEIGHT * 4];
uint32_t *dc_temp_rgba; uint32_t *dc_temp_rgba;
// Checks if a line is rendered by this thread // Checks if a line is rendered by this thread
bool line_skipped_by_thread(int line) bool line_skipped_by_thread(int line)
{ {
return line % num_cores != core; return line < pass_start_y || line >= pass_end_y || line % num_cores != core;
} }
// The number of lines to skip to reach the first line to be rendered by this thread // The number of lines to skip to reach the first line to be rendered by this thread
int skipped_by_thread(int first_line) int skipped_by_thread(int first_line)
{ {
return (num_cores - (first_line - core) % num_cores) % num_cores; int pass_skip = MAX(pass_start_y - first_line, 0);
int core_skip = (num_cores - (first_line + pass_skip - core) % num_cores) % num_cores;
return pass_skip + core_skip;
} }
// The number of lines to be rendered by this thread // The number of lines to be rendered by this thread
int count_for_thread(int first_line, int count) int count_for_thread(int first_line, int count)
{ {
return (count - skipped_by_thread(first_line) + num_cores - 1) / num_cores; int lines_until_pass_end = MAX(pass_end_y - first_line, 0);
count = MIN(count, lines_until_pass_end);
int c = (count - skipped_by_thread(first_line) + num_cores - 1) / num_cores;
return MAX(c, 0);
} }
// Calculate the dest address for the first line to be rendered by this thread // Calculate the dest address for the first line to be rendered by this thread
@ -522,6 +531,8 @@ class DrawerCommandQueue
bool no_threading = false; bool no_threading = false;
DrawerThread single_core_thread; DrawerThread single_core_thread;
int num_passes = 2;
int rows_in_pass = 540;
void StartThreads(); void StartThreads();
void StopThreads(); void StopThreads();

View file

@ -104,11 +104,19 @@ void DrawerCommandQueue::Finish()
thread.core = 0; thread.core = 0;
thread.num_cores = queue->threads.size() + 1; thread.num_cores = queue->threads.size() + 1;
size_t size = queue->active_commands.size(); for (int pass = 0; pass < queue->num_passes; pass++)
for (size_t i = 0; i < size; i++)
{ {
auto &command = queue->active_commands[i]; thread.pass_start_y = pass * queue->rows_in_pass;
command->Execute(&thread); thread.pass_end_y = (pass + 1) * queue->rows_in_pass;
if (pass + 1 == queue->num_passes)
thread.pass_end_y = MAX(thread.pass_end_y, MAXHEIGHT);
size_t size = queue->active_commands.size();
for (size_t i = 0; i < size; i++)
{
auto &command = queue->active_commands[i];
command->Execute(&thread);
}
} }
// Wait for everyone to finish: // Wait for everyone to finish:
@ -156,11 +164,19 @@ void DrawerCommandQueue::StartThreads()
start_lock.unlock(); start_lock.unlock();
// Do the work: // Do the work:
size_t size = queue->active_commands.size(); for (int pass = 0; pass < queue->num_passes; pass++)
for (size_t i = 0; i < size; i++)
{ {
auto &command = queue->active_commands[i]; thread->pass_start_y = pass * queue->rows_in_pass;
command->Execute(thread); thread->pass_end_y = (pass + 1) * queue->rows_in_pass;
if (pass + 1 == queue->num_passes)
thread->pass_end_y = MAX(thread->pass_end_y, MAXHEIGHT);
size_t size = queue->active_commands.size();
for (size_t i = 0; i < size; i++)
{
auto &command = queue->active_commands[i];
command->Execute(thread);
}
} }
// Notify main thread that we finished: // Notify main thread that we finished:
@ -1611,6 +1627,79 @@ public:
BYTE xshift = yshift - ds_xbits; BYTE xshift = yshift - ds_xbits;
int xmask = ((1 << ds_xbits) - 1) << ds_ybits; int xmask = ((1 << ds_xbits) - 1) << ds_ybits;
int sse_count = count / 4;
count -= sse_count * 4;
if (shade_constants.simple_shade)
{
SSE_SHADE_SIMPLE_INIT(light);
while (sse_count--)
{
spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift);
uint32_t p0 = source[spot];
xfrac += xstep;
yfrac += ystep;
spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift);
uint32_t p1 = source[spot];
xfrac += xstep;
yfrac += ystep;
spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift);
uint32_t p2 = source[spot];
xfrac += xstep;
yfrac += ystep;
spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift);
uint32_t p3 = source[spot];
xfrac += xstep;
yfrac += ystep;
// Lookup pixel from flat texture tile
__m128i fg = _mm_set_epi32(p3, p2, p1, p0);
SSE_SHADE_SIMPLE(fg);
_mm_storeu_si128((__m128i*)dest, fg);
dest += 4;
}
}
else
{
SSE_SHADE_INIT(light, shade_constants);
while (sse_count--)
{
spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift);
uint32_t p0 = source[spot];
xfrac += xstep;
yfrac += ystep;
spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift);
uint32_t p1 = source[spot];
xfrac += xstep;
yfrac += ystep;
spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift);
uint32_t p2 = source[spot];
xfrac += xstep;
yfrac += ystep;
spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift);
uint32_t p3 = source[spot];
xfrac += xstep;
yfrac += ystep;
// Lookup pixel from flat texture tile
__m128i fg = _mm_set_epi32(p3, p2, p1, p0);
SSE_SHADE(fg, shade_constants);
_mm_storeu_si128((__m128i*)dest, fg);
dest += 4;
}
}
if (count == 0)
return;
do do
{ {
// Current texture index in u,v. // Current texture index in u,v.

View file

@ -1528,40 +1528,38 @@ public:
fracstep = dc_iscale; fracstep = dc_iscale;
frac = dc_texturefrac; frac = dc_texturefrac;
{ const BYTE *source = dc_source;
const BYTE *source = dc_source;
if (count & 1) { if (count & 1) {
*dest = source[frac >> FRACBITS]; dest += 4; frac += fracstep; *dest = source[frac >> FRACBITS]; dest += 4; frac += fracstep;
}
if (count & 2) {
dest[0] = source[frac >> FRACBITS]; frac += fracstep;
dest[4] = source[frac >> FRACBITS]; frac += fracstep;
dest += 8;
}
if (count & 4) {
dest[0] = source[frac >> FRACBITS]; frac += fracstep;
dest[4] = source[frac >> FRACBITS]; frac += fracstep;
dest[8] = source[frac >> FRACBITS]; frac += fracstep;
dest[12] = source[frac >> FRACBITS]; frac += fracstep;
dest += 16;
}
count >>= 3;
if (!count) return;
do
{
dest[0] = source[frac >> FRACBITS]; frac += fracstep;
dest[4] = source[frac >> FRACBITS]; frac += fracstep;
dest[8] = source[frac >> FRACBITS]; frac += fracstep;
dest[12] = source[frac >> FRACBITS]; frac += fracstep;
dest[16] = source[frac >> FRACBITS]; frac += fracstep;
dest[20] = source[frac >> FRACBITS]; frac += fracstep;
dest[24] = source[frac >> FRACBITS]; frac += fracstep;
dest[28] = source[frac >> FRACBITS]; frac += fracstep;
dest += 32;
} while (--count);
} }
if (count & 2) {
dest[0] = source[frac >> FRACBITS]; frac += fracstep;
dest[4] = source[frac >> FRACBITS]; frac += fracstep;
dest += 8;
}
if (count & 4) {
dest[0] = source[frac >> FRACBITS]; frac += fracstep;
dest[4] = source[frac >> FRACBITS]; frac += fracstep;
dest[8] = source[frac >> FRACBITS]; frac += fracstep;
dest[12] = source[frac >> FRACBITS]; frac += fracstep;
dest += 16;
}
count >>= 3;
if (!count) return;
do
{
dest[0] = source[frac >> FRACBITS]; frac += fracstep;
dest[4] = source[frac >> FRACBITS]; frac += fracstep;
dest[8] = source[frac >> FRACBITS]; frac += fracstep;
dest[12] = source[frac >> FRACBITS]; frac += fracstep;
dest[16] = source[frac >> FRACBITS]; frac += fracstep;
dest[20] = source[frac >> FRACBITS]; frac += fracstep;
dest[24] = source[frac >> FRACBITS]; frac += fracstep;
dest[28] = source[frac >> FRACBITS]; frac += fracstep;
dest += 32;
} while (--count);
} }
}; };