Add thread awareness to the rt family of drawers

This commit is contained in:
Magnus Norddahl 2016-12-06 15:13:43 +01:00
parent 6a3ae2ce45
commit c16506bf59
3 changed files with 125 additions and 106 deletions

View file

@ -925,18 +925,28 @@ namespace swrenderer
void rt_tlate1col(int hx, int sx, int yl, int yh) void rt_tlate1col(int hx, int sx, int yl, int yh)
{ {
if (r_swtruecolor) if (r_swtruecolor)
{
DrawerCommandQueue::QueueCommand<DrawColumnRt1TranslatedLLVMCommand>(hx, sx, yl, yh); DrawerCommandQueue::QueueCommand<DrawColumnRt1TranslatedLLVMCommand>(hx, sx, yl, yh);
}
else else
{
DrawerCommandQueue::QueueCommand<DrawColumnRt1TranslatedPalCommand>(hx, sx, yl, yh); DrawerCommandQueue::QueueCommand<DrawColumnRt1TranslatedPalCommand>(hx, sx, yl, yh);
rt_map1col(hx, sx, yl, yh);
}
} }
// Translates all four spans to the screen starting at sx. // Translates all four spans to the screen starting at sx.
void rt_tlate4cols(int sx, int yl, int yh) void rt_tlate4cols(int sx, int yl, int yh)
{ {
if (r_swtruecolor) if (r_swtruecolor)
{
DrawerCommandQueue::QueueCommand<DrawColumnRt4TranslatedLLVMCommand>(0, sx, yl, yh); DrawerCommandQueue::QueueCommand<DrawColumnRt4TranslatedLLVMCommand>(0, sx, yl, yh);
}
else else
{
DrawerCommandQueue::QueueCommand<DrawColumnRt4TranslatedPalCommand>(0, sx, yl, yh); DrawerCommandQueue::QueueCommand<DrawColumnRt4TranslatedPalCommand>(0, sx, yl, yh);
rt_map4cols(sx, yl, yh);
}
} }
// Adds one span at hx to the screen at sx without clamping. // Adds one span at hx to the screen at sx without clamping.

View file

@ -91,18 +91,20 @@ namespace swrenderer
fixed_t fracstep; fixed_t fracstep;
fixed_t frac; fixed_t frac;
count = thread->count_for_thread(_yl, count);
if (count <= 0) if (count <= 0)
return; return;
{
int x = _x & 3;
dest = &thread->dc_temp[x + 4 * _yl];
}
fracstep = _iscale; fracstep = _iscale;
frac = _texturefrac; frac = _texturefrac;
const uint8_t *source = _source; const uint8_t *source = _source;
int x = _x & 3;
dest = &thread->dc_temp[x + thread->temp_line_for_thread(_yl) * 4];
frac += fracstep * thread->skipped_by_thread(_yl);
fracstep *= thread->num_cores;
if (count & 1) { if (count & 1) {
*dest = source[frac >> FRACBITS]; dest += 4; frac += fracstep; *dest = source[frac >> FRACBITS]; dest += 4; frac += fracstep;
} }
@ -141,13 +143,12 @@ namespace swrenderer
uint8_t color = _color; uint8_t color = _color;
uint8_t *dest; uint8_t *dest;
count = thread->count_for_thread(_yl, count);
if (count <= 0) if (count <= 0)
return; return;
{
int x = _x & 3; int x = _x & 3;
dest = &thread->dc_temp[x + 4 * _yl]; dest = &thread->dc_temp[x + thread->temp_line_for_thread(_yl) * 4];
}
if (count & 1) { if (count & 1) {
*dest = color; *dest = color;
@ -183,14 +184,15 @@ namespace swrenderer
int count; int count;
int pitch; int pitch;
count = yh-yl; count = yh - yl + 1;
if (count < 0)
return;
count++;
dest = ylookup[yl] + sx + _destorg; count = thread->count_for_thread(yl, count);
source = &thread->dc_temp[yl*4 + hx]; if (count <= 0)
pitch = _pitch; return;
dest = ylookup[yl + thread->skipped_by_thread(yl)] + sx + _destorg;
source = &thread->dc_temp[thread->temp_line_for_thread(yl)*4 + hx];
pitch = _pitch * thread->num_cores;
if (count & 1) { if (count & 1) {
*dest = *source; *dest = *source;
@ -223,14 +225,15 @@ namespace swrenderer
int count; int count;
int pitch; int pitch;
count = yh-yl; count = yh - yl + 1;
if (count < 0)
return;
count++;
dest = (int *)(ylookup[yl] + sx + _destorg); count = thread->count_for_thread(yl, count);
source = (int *)(&thread->dc_temp[yl*4]); if (count <= 0)
pitch = _pitch/sizeof(int); return;
dest = (int *)(ylookup[yl + thread->skipped_by_thread(yl)] + sx + _destorg);
source = (int *)(&thread->dc_temp[thread->temp_line_for_thread(yl)*4]);
pitch = _pitch*thread->num_cores/sizeof(int);
if (count & 1) { if (count & 1) {
*dest = *source; *dest = *source;
@ -256,15 +259,16 @@ namespace swrenderer
int count; int count;
int pitch; int pitch;
count = yh-yl; count = yh - yl + 1;
if (count < 0)
count = thread->count_for_thread(yl, count);
if (count <= 0)
return; return;
count++;
colormap = _colormap; colormap = _colormap;
dest = ylookup[yl] + sx + _destorg; dest = ylookup[yl + thread->skipped_by_thread(yl)] + sx + _destorg;
source = &thread->dc_temp[yl*4 + hx]; source = &thread->dc_temp[thread->temp_line_for_thread(yl) *4 + hx];
pitch = _pitch; pitch = _pitch*thread->num_cores;
if (count & 1) { if (count & 1) {
*dest = colormap[*source]; *dest = colormap[*source];
@ -290,15 +294,16 @@ namespace swrenderer
int count; int count;
int pitch; int pitch;
count = yh-yl; count = yh - yl + 1;
if (count < 0)
count = thread->count_for_thread(yl, count);
if (count <= 0)
return; return;
count++;
colormap = _colormap; colormap = _colormap;
dest = ylookup[yl] + sx + _destorg; dest = ylookup[yl + thread->skipped_by_thread(yl)] + sx + _destorg;
source = &thread->dc_temp[yl*4]; source = &thread->dc_temp[thread->temp_line_for_thread(yl)*4];
pitch = _pitch; pitch = _pitch*thread->num_cores;
if (count & 1) { if (count & 1) {
dest[0] = colormap[source[0]]; dest[0] = colormap[source[0]];
@ -328,7 +333,11 @@ namespace swrenderer
void DrawColumnRt1TranslatedPalCommand::Execute(DrawerThread *thread) void DrawColumnRt1TranslatedPalCommand::Execute(DrawerThread *thread)
{ {
int count = yh - yl + 1; int count = yh - yl + 1;
uint8_t *source = &thread->dc_temp[yl*4 + hx]; count = thread->count_for_thread(yl, count);
if (count <= 0)
return;
uint8_t *source = &thread->dc_temp[thread->temp_line_for_thread(yl)*4 + hx];
const uint8_t *translation = _translation; const uint8_t *translation = _translation;
// Things we do to hit the compiler's optimizer with a clue bat: // Things we do to hit the compiler's optimizer with a clue bat:
@ -376,7 +385,11 @@ namespace swrenderer
void DrawColumnRt4TranslatedPalCommand::Execute(DrawerThread *thread) void DrawColumnRt4TranslatedPalCommand::Execute(DrawerThread *thread)
{ {
int count = yh - yl + 1; int count = yh - yl + 1;
uint8_t *source = &thread->dc_temp[yl*4]; count = thread->count_for_thread(yl, count);
if (count <= 0)
return;
uint8_t *source = &thread->dc_temp[thread->temp_line_for_thread(yl)*4];
const uint8_t *translation = _translation; const uint8_t *translation = _translation;
int c0, c1; int c0, c1;
uint8_t b0, b1; uint8_t b0, b1;
@ -420,19 +433,18 @@ namespace swrenderer
const uint8_t *colormap; const uint8_t *colormap;
uint8_t *source; uint8_t *source;
uint8_t *dest; uint8_t *dest;
int count;
int pitch; int pitch;
count = yh-yl; int count = yh - yl + 1;
if (count < 0) count = thread->count_for_thread(yl, count);
if (count <= 0)
return; return;
count++;
const uint32_t *fg2rgb = _srcblend; const uint32_t *fg2rgb = _srcblend;
const uint32_t *bg2rgb = _destblend; const uint32_t *bg2rgb = _destblend;
dest = ylookup[yl] + sx + _destorg; dest = ylookup[yl + thread->skipped_by_thread(yl)] + sx + _destorg;
source = &thread->dc_temp[yl*4 + hx]; source = &thread->dc_temp[thread->temp_line_for_thread(yl)*4 + hx];
pitch = _pitch; pitch = _pitch * thread->num_cores;
colormap = _colormap; colormap = _colormap;
do { do {
@ -453,19 +465,18 @@ namespace swrenderer
const uint8_t *colormap; const uint8_t *colormap;
uint8_t *source; uint8_t *source;
uint8_t *dest; uint8_t *dest;
int count;
int pitch; int pitch;
count = yh-yl; int count = yh - yl + 1;
if (count < 0) count = thread->count_for_thread(yl, count);
if (count <= 0)
return; return;
count++;
const uint32_t *fg2rgb = _srcblend; const uint32_t *fg2rgb = _srcblend;
const uint32_t *bg2rgb = _destblend; const uint32_t *bg2rgb = _destblend;
dest = ylookup[yl] + sx + _destorg; dest = ylookup[yl + thread->skipped_by_thread(yl)] + sx + _destorg;
source = &thread->dc_temp[yl*4]; source = &thread->dc_temp[thread->temp_line_for_thread(yl)*4];
pitch = _pitch; pitch = _pitch * thread->num_cores;
colormap = _colormap; colormap = _colormap;
do { do {
@ -509,19 +520,18 @@ namespace swrenderer
const uint8_t *colormap; const uint8_t *colormap;
uint8_t *source; uint8_t *source;
uint8_t *dest; uint8_t *dest;
int count;
int pitch; int pitch;
count = yh-yl; int count = yh - yl + 1;
if (count < 0) count = thread->count_for_thread(yl, count);
if (count <= 0)
return; return;
count++;
fgstart = &Col2RGB8[0][_color]; fgstart = &Col2RGB8[0][_color];
colormap = _colormap; colormap = _colormap;
dest = ylookup[yl] + sx + _destorg; dest = ylookup[yl + thread->skipped_by_thread(yl)] + sx + _destorg;
source = &thread->dc_temp[yl*4 + hx]; source = &thread->dc_temp[thread->temp_line_for_thread(yl)*4 + hx];
pitch = _pitch; pitch = _pitch * thread->num_cores;
do { do {
uint32_t val = colormap[*source]; uint32_t val = colormap[*source];
@ -539,19 +549,18 @@ namespace swrenderer
const uint8_t *colormap; const uint8_t *colormap;
uint8_t *source; uint8_t *source;
uint8_t *dest; uint8_t *dest;
int count;
int pitch; int pitch;
count = yh-yl; int count = yh - yl + 1;
if (count < 0) count = thread->count_for_thread(yl, count);
if (count <= 0)
return; return;
count++;
fgstart = &Col2RGB8[0][_color]; fgstart = &Col2RGB8[0][_color];
colormap = _colormap; colormap = _colormap;
dest = ylookup[yl] + sx + _destorg; dest = ylookup[yl + thread->skipped_by_thread(yl)] + sx + _destorg;
source = &thread->dc_temp[yl*4]; source = &thread->dc_temp[thread->temp_line_for_thread(yl)*4];
pitch = _pitch; pitch = _pitch * thread->num_cores;
do { do {
uint32_t val; uint32_t val;
@ -582,19 +591,18 @@ namespace swrenderer
const uint8_t *colormap; const uint8_t *colormap;
uint8_t *source; uint8_t *source;
uint8_t *dest; uint8_t *dest;
int count;
int pitch; int pitch;
count = yh-yl; int count = yh - yl + 1;
if (count < 0) count = thread->count_for_thread(yl, count);
if (count <= 0)
return; return;
count++;
const uint32_t *fg2rgb = _srcblend; const uint32_t *fg2rgb = _srcblend;
const uint32_t *bg2rgb = _destblend; const uint32_t *bg2rgb = _destblend;
dest = ylookup[yl] + sx + _destorg; dest = ylookup[yl + thread->skipped_by_thread(yl)] + sx + _destorg;
source = &thread->dc_temp[yl*4 + hx]; source = &thread->dc_temp[thread->temp_line_for_thread(yl)*4 + hx];
pitch = _pitch; pitch = _pitch * thread->num_cores;
colormap = _colormap; colormap = _colormap;
do { do {
@ -617,17 +625,16 @@ namespace swrenderer
const uint8_t *colormap; const uint8_t *colormap;
uint8_t *source; uint8_t *source;
uint8_t *dest; uint8_t *dest;
int count;
int pitch; int pitch;
count = yh-yl; int count = yh - yl + 1;
if (count < 0) count = thread->count_for_thread(yl, count);
if (count <= 0)
return; return;
count++;
dest = ylookup[yl] + sx + _destorg; dest = ylookup[yl + thread->skipped_by_thread(yl)] + sx + _destorg;
source = &thread->dc_temp[yl*4]; source = &thread->dc_temp[thread->temp_line_for_thread(yl)*4];
pitch = _pitch; pitch = _pitch * thread->num_cores;
colormap = _colormap; colormap = _colormap;
const uint32_t *fg2rgb = _srcblend; const uint32_t *fg2rgb = _srcblend;
@ -681,19 +688,18 @@ namespace swrenderer
const uint8_t *colormap; const uint8_t *colormap;
uint8_t *source; uint8_t *source;
uint8_t *dest; uint8_t *dest;
int count;
int pitch; int pitch;
count = yh-yl; int count = yh - yl + 1;
if (count < 0) count = thread->count_for_thread(yl, count);
if (count <= 0)
return; return;
count++;
const uint32_t *fg2rgb = _srcblend; const uint32_t *fg2rgb = _srcblend;
const uint32_t *bg2rgb = _destblend; const uint32_t *bg2rgb = _destblend;
dest = ylookup[yl] + sx + _destorg; dest = ylookup[yl + thread->skipped_by_thread(yl)] + sx + _destorg;
source = &thread->dc_temp[yl*4 + hx]; source = &thread->dc_temp[thread->temp_line_for_thread(yl)*4 + hx];
pitch = _pitch; pitch = _pitch * thread->num_cores;
colormap = _colormap; colormap = _colormap;
do { do {
@ -715,19 +721,18 @@ namespace swrenderer
const uint8_t *colormap; const uint8_t *colormap;
uint8_t *source; uint8_t *source;
uint8_t *dest; uint8_t *dest;
int count;
int pitch; int pitch;
count = yh-yl; int count = yh - yl + 1;
if (count < 0) count = thread->count_for_thread(yl, count);
if (count <= 0)
return; return;
count++;
const uint32_t *fg2rgb = _srcblend; const uint32_t *fg2rgb = _srcblend;
const uint32_t *bg2rgb = _destblend; const uint32_t *bg2rgb = _destblend;
dest = ylookup[yl] + sx + _destorg; dest = ylookup[yl + thread->skipped_by_thread(yl)] + sx + _destorg;
source = &thread->dc_temp[yl*4]; source = &thread->dc_temp[thread->temp_line_for_thread(yl)*4];
pitch = _pitch; pitch = _pitch * thread->num_cores;
colormap = _colormap; colormap = _colormap;
do { do {
@ -774,19 +779,18 @@ namespace swrenderer
const uint8_t *colormap; const uint8_t *colormap;
uint8_t *source; uint8_t *source;
uint8_t *dest; uint8_t *dest;
int count;
int pitch; int pitch;
count = yh-yl; int count = yh - yl + 1;
if (count < 0) count = thread->count_for_thread(yl, count);
if (count <= 0)
return; return;
count++;
const uint32_t *fg2rgb = _srcblend; const uint32_t *fg2rgb = _srcblend;
const uint32_t *bg2rgb = _destblend; const uint32_t *bg2rgb = _destblend;
dest = ylookup[yl] + sx + _destorg; dest = ylookup[yl + thread->skipped_by_thread(yl)] + sx + _destorg;
source = &thread->dc_temp[yl*4 + hx]; source = &thread->dc_temp[thread->temp_line_for_thread(yl)*4 + hx];
pitch = _pitch; pitch = _pitch * thread->num_cores;
colormap = _colormap; colormap = _colormap;
do { do {
@ -808,19 +812,18 @@ namespace swrenderer
const uint8_t *colormap; const uint8_t *colormap;
uint8_t *source; uint8_t *source;
uint8_t *dest; uint8_t *dest;
int count;
int pitch; int pitch;
count = yh-yl; int count = yh - yl + 1;
if (count < 0) count = thread->count_for_thread(yl, count);
if (count <= 0)
return; return;
count++;
const uint32_t *fg2rgb = _srcblend; const uint32_t *fg2rgb = _srcblend;
const uint32_t *bg2rgb = _destblend; const uint32_t *bg2rgb = _destblend;
dest = ylookup[yl] + sx + _destorg; dest = ylookup[yl + thread->skipped_by_thread(yl)] + sx + _destorg;
source = &thread->dc_temp[yl*4]; source = &thread->dc_temp[thread->temp_line_for_thread(yl)*4];
pitch = _pitch; pitch = _pitch * thread->num_cores;
colormap = _colormap; colormap = _colormap;
do { do {

View file

@ -89,6 +89,12 @@ public:
{ {
return dest + skipped_by_thread(first_line) * pitch; return dest + skipped_by_thread(first_line) * pitch;
} }
// The first line in the dc_temp buffer used this thread
int temp_line_for_thread(int first_line)
{
return (first_line + skipped_by_thread(first_line)) / num_cores;
}
}; };
// Task to be executed by each worker thread // Task to be executed by each worker thread