mirror of
https://github.com/ZDoom/gzdoom.git
synced 2024-11-12 07:34:36 +00:00
Support for drawing in multiple passes
This commit is contained in:
parent
6c70eaea2f
commit
103a6baac5
3 changed files with 141 additions and 43 deletions
17
src/r_draw.h
17
src/r_draw.h
|
@ -458,25 +458,34 @@ public:
|
|||
// Number of active threads
|
||||
int num_cores = 1;
|
||||
|
||||
// Range of rows processed this pass
|
||||
int pass_start_y = 0;
|
||||
int pass_end_y = 300;
|
||||
|
||||
uint32_t dc_temp_rgbabuff_rgba[MAXHEIGHT * 4];
|
||||
uint32_t *dc_temp_rgba;
|
||||
|
||||
// Checks if a line is rendered by this thread
|
||||
bool line_skipped_by_thread(int line)
|
||||
{
|
||||
return line % num_cores != core;
|
||||
return line < pass_start_y || line >= pass_end_y || line % num_cores != core;
|
||||
}
|
||||
|
||||
// The number of lines to skip to reach the first line to be rendered by this thread
|
||||
int skipped_by_thread(int first_line)
|
||||
{
|
||||
return (num_cores - (first_line - core) % num_cores) % num_cores;
|
||||
int pass_skip = MAX(pass_start_y - first_line, 0);
|
||||
int core_skip = (num_cores - (first_line + pass_skip - core) % num_cores) % num_cores;
|
||||
return pass_skip + core_skip;
|
||||
}
|
||||
|
||||
// The number of lines to be rendered by this thread
|
||||
int count_for_thread(int first_line, int count)
|
||||
{
|
||||
return (count - skipped_by_thread(first_line) + num_cores - 1) / num_cores;
|
||||
int lines_until_pass_end = MAX(pass_end_y - first_line, 0);
|
||||
count = MIN(count, lines_until_pass_end);
|
||||
int c = (count - skipped_by_thread(first_line) + num_cores - 1) / num_cores;
|
||||
return MAX(c, 0);
|
||||
}
|
||||
|
||||
// Calculate the dest address for the first line to be rendered by this thread
|
||||
|
@ -522,6 +531,8 @@ class DrawerCommandQueue
|
|||
|
||||
bool no_threading = false;
|
||||
DrawerThread single_core_thread;
|
||||
int num_passes = 2;
|
||||
int rows_in_pass = 540;
|
||||
|
||||
void StartThreads();
|
||||
void StopThreads();
|
||||
|
|
|
@ -104,11 +104,19 @@ void DrawerCommandQueue::Finish()
|
|||
thread.core = 0;
|
||||
thread.num_cores = queue->threads.size() + 1;
|
||||
|
||||
size_t size = queue->active_commands.size();
|
||||
for (size_t i = 0; i < size; i++)
|
||||
for (int pass = 0; pass < queue->num_passes; pass++)
|
||||
{
|
||||
auto &command = queue->active_commands[i];
|
||||
command->Execute(&thread);
|
||||
thread.pass_start_y = pass * queue->rows_in_pass;
|
||||
thread.pass_end_y = (pass + 1) * queue->rows_in_pass;
|
||||
if (pass + 1 == queue->num_passes)
|
||||
thread.pass_end_y = MAX(thread.pass_end_y, MAXHEIGHT);
|
||||
|
||||
size_t size = queue->active_commands.size();
|
||||
for (size_t i = 0; i < size; i++)
|
||||
{
|
||||
auto &command = queue->active_commands[i];
|
||||
command->Execute(&thread);
|
||||
}
|
||||
}
|
||||
|
||||
// Wait for everyone to finish:
|
||||
|
@ -156,11 +164,19 @@ void DrawerCommandQueue::StartThreads()
|
|||
start_lock.unlock();
|
||||
|
||||
// Do the work:
|
||||
size_t size = queue->active_commands.size();
|
||||
for (size_t i = 0; i < size; i++)
|
||||
for (int pass = 0; pass < queue->num_passes; pass++)
|
||||
{
|
||||
auto &command = queue->active_commands[i];
|
||||
command->Execute(thread);
|
||||
thread->pass_start_y = pass * queue->rows_in_pass;
|
||||
thread->pass_end_y = (pass + 1) * queue->rows_in_pass;
|
||||
if (pass + 1 == queue->num_passes)
|
||||
thread->pass_end_y = MAX(thread->pass_end_y, MAXHEIGHT);
|
||||
|
||||
size_t size = queue->active_commands.size();
|
||||
for (size_t i = 0; i < size; i++)
|
||||
{
|
||||
auto &command = queue->active_commands[i];
|
||||
command->Execute(thread);
|
||||
}
|
||||
}
|
||||
|
||||
// Notify main thread that we finished:
|
||||
|
@ -1611,6 +1627,79 @@ public:
|
|||
BYTE xshift = yshift - ds_xbits;
|
||||
int xmask = ((1 << ds_xbits) - 1) << ds_ybits;
|
||||
|
||||
int sse_count = count / 4;
|
||||
count -= sse_count * 4;
|
||||
|
||||
if (shade_constants.simple_shade)
|
||||
{
|
||||
SSE_SHADE_SIMPLE_INIT(light);
|
||||
|
||||
while (sse_count--)
|
||||
{
|
||||
spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift);
|
||||
uint32_t p0 = source[spot];
|
||||
xfrac += xstep;
|
||||
yfrac += ystep;
|
||||
|
||||
spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift);
|
||||
uint32_t p1 = source[spot];
|
||||
xfrac += xstep;
|
||||
yfrac += ystep;
|
||||
|
||||
spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift);
|
||||
uint32_t p2 = source[spot];
|
||||
xfrac += xstep;
|
||||
yfrac += ystep;
|
||||
|
||||
spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift);
|
||||
uint32_t p3 = source[spot];
|
||||
xfrac += xstep;
|
||||
yfrac += ystep;
|
||||
|
||||
// Lookup pixel from flat texture tile
|
||||
__m128i fg = _mm_set_epi32(p3, p2, p1, p0);
|
||||
SSE_SHADE_SIMPLE(fg);
|
||||
_mm_storeu_si128((__m128i*)dest, fg);
|
||||
dest += 4;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
SSE_SHADE_INIT(light, shade_constants);
|
||||
|
||||
while (sse_count--)
|
||||
{
|
||||
spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift);
|
||||
uint32_t p0 = source[spot];
|
||||
xfrac += xstep;
|
||||
yfrac += ystep;
|
||||
|
||||
spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift);
|
||||
uint32_t p1 = source[spot];
|
||||
xfrac += xstep;
|
||||
yfrac += ystep;
|
||||
|
||||
spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift);
|
||||
uint32_t p2 = source[spot];
|
||||
xfrac += xstep;
|
||||
yfrac += ystep;
|
||||
|
||||
spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift);
|
||||
uint32_t p3 = source[spot];
|
||||
xfrac += xstep;
|
||||
yfrac += ystep;
|
||||
|
||||
// Lookup pixel from flat texture tile
|
||||
__m128i fg = _mm_set_epi32(p3, p2, p1, p0);
|
||||
SSE_SHADE(fg, shade_constants);
|
||||
_mm_storeu_si128((__m128i*)dest, fg);
|
||||
dest += 4;
|
||||
}
|
||||
}
|
||||
|
||||
if (count == 0)
|
||||
return;
|
||||
|
||||
do
|
||||
{
|
||||
// Current texture index in u,v.
|
||||
|
|
|
@ -1528,40 +1528,38 @@ public:
|
|||
fracstep = dc_iscale;
|
||||
frac = dc_texturefrac;
|
||||
|
||||
{
|
||||
const BYTE *source = dc_source;
|
||||
const BYTE *source = dc_source;
|
||||
|
||||
if (count & 1) {
|
||||
*dest = source[frac >> FRACBITS]; dest += 4; frac += fracstep;
|
||||
}
|
||||
if (count & 2) {
|
||||
dest[0] = source[frac >> FRACBITS]; frac += fracstep;
|
||||
dest[4] = source[frac >> FRACBITS]; frac += fracstep;
|
||||
dest += 8;
|
||||
}
|
||||
if (count & 4) {
|
||||
dest[0] = source[frac >> FRACBITS]; frac += fracstep;
|
||||
dest[4] = source[frac >> FRACBITS]; frac += fracstep;
|
||||
dest[8] = source[frac >> FRACBITS]; frac += fracstep;
|
||||
dest[12] = source[frac >> FRACBITS]; frac += fracstep;
|
||||
dest += 16;
|
||||
}
|
||||
count >>= 3;
|
||||
if (!count) return;
|
||||
|
||||
do
|
||||
{
|
||||
dest[0] = source[frac >> FRACBITS]; frac += fracstep;
|
||||
dest[4] = source[frac >> FRACBITS]; frac += fracstep;
|
||||
dest[8] = source[frac >> FRACBITS]; frac += fracstep;
|
||||
dest[12] = source[frac >> FRACBITS]; frac += fracstep;
|
||||
dest[16] = source[frac >> FRACBITS]; frac += fracstep;
|
||||
dest[20] = source[frac >> FRACBITS]; frac += fracstep;
|
||||
dest[24] = source[frac >> FRACBITS]; frac += fracstep;
|
||||
dest[28] = source[frac >> FRACBITS]; frac += fracstep;
|
||||
dest += 32;
|
||||
} while (--count);
|
||||
if (count & 1) {
|
||||
*dest = source[frac >> FRACBITS]; dest += 4; frac += fracstep;
|
||||
}
|
||||
if (count & 2) {
|
||||
dest[0] = source[frac >> FRACBITS]; frac += fracstep;
|
||||
dest[4] = source[frac >> FRACBITS]; frac += fracstep;
|
||||
dest += 8;
|
||||
}
|
||||
if (count & 4) {
|
||||
dest[0] = source[frac >> FRACBITS]; frac += fracstep;
|
||||
dest[4] = source[frac >> FRACBITS]; frac += fracstep;
|
||||
dest[8] = source[frac >> FRACBITS]; frac += fracstep;
|
||||
dest[12] = source[frac >> FRACBITS]; frac += fracstep;
|
||||
dest += 16;
|
||||
}
|
||||
count >>= 3;
|
||||
if (!count) return;
|
||||
|
||||
do
|
||||
{
|
||||
dest[0] = source[frac >> FRACBITS]; frac += fracstep;
|
||||
dest[4] = source[frac >> FRACBITS]; frac += fracstep;
|
||||
dest[8] = source[frac >> FRACBITS]; frac += fracstep;
|
||||
dest[12] = source[frac >> FRACBITS]; frac += fracstep;
|
||||
dest[16] = source[frac >> FRACBITS]; frac += fracstep;
|
||||
dest[20] = source[frac >> FRACBITS]; frac += fracstep;
|
||||
dest[24] = source[frac >> FRACBITS]; frac += fracstep;
|
||||
dest[28] = source[frac >> FRACBITS]; frac += fracstep;
|
||||
dest += 32;
|
||||
} while (--count);
|
||||
}
|
||||
};
|
||||
|
||||
|
|
Loading…
Reference in a new issue