Add Rt drawers

This commit is contained in:
Magnus Norddahl 2016-12-06 07:23:55 +01:00
parent f4172782ed
commit 6a3ae2ce45
3 changed files with 904 additions and 54 deletions

View File

@ -202,43 +202,85 @@ namespace swrenderer
void Execute(DrawerThread *thread) override;
};
//class RtInitColsPalCommand : public DrawerCommand { public: void Execute(DrawerThread *thread) override; };
//class DrawColumnHorizPalCommand : public DrawerCommand { public: void Execute(DrawerThread *thread) override; };
class FillColumnHorizPalCommand : public DrawerCommand
class RtInitColsPalCommand : public DrawerCommand
{
public:
RtInitColsPalCommand(uint8_t *buff);
void Execute(DrawerThread *thread) override;
FString DebugInfo() override { return "RtInitColsPalCommand"; }
private:
uint8_t *buff;
};
class PalColumnHorizCommand : public DrawerCommand
{
public:
PalColumnHorizCommand();
protected:
const uint8_t *_source;
fixed_t _iscale;
fixed_t _texturefrac;
int _count;
int _color;
int _x;
int _yl;
};
class DrawColumnHorizPalCommand : public PalColumnHorizCommand
{
public:
void Execute(DrawerThread *thread) override;
FString DebugInfo() override { return "FillColumnHorizPalCommand"; }
};
class FillColumnHorizPalCommand : public PalColumnHorizCommand
{
public:
void Execute(DrawerThread *thread) override;
FString DebugInfo() override { return "FillColumnHorizPalCommand"; }
};
class PalRtCommand : public DrawerCommand
{
public:
PalRtCommand(int hx, int sx, int yl, int yh);
FString DebugInfo() override { return "PalRtCommand"; }
protected:
int hx, sx, yl, yh;
uint8_t *_destorg;
int _pitch;
const uint8_t *_colormap;
const uint32_t *_srcblend;
const uint32_t *_destblend;
const uint8_t *_translation;
int _color;
};
class DrawColumnRt1CopyPalCommand : public PalRtCommand { public: using PalRtCommand::PalRtCommand; void Execute(DrawerThread *thread) override; };
class DrawColumnRt4CopyPalCommand : public PalRtCommand { public: using PalRtCommand::PalRtCommand; void Execute(DrawerThread *thread) override; };
class DrawColumnRt1PalCommand : public PalRtCommand { public: using PalRtCommand::PalRtCommand; void Execute(DrawerThread *thread) override; };
class DrawColumnRt4PalCommand : public PalRtCommand { public: using PalRtCommand::PalRtCommand; void Execute(DrawerThread *thread) override; };
class DrawColumnRt1TranslatedPalCommand : public PalRtCommand { public: using PalRtCommand::PalRtCommand; void Execute(DrawerThread *thread) override; };
class DrawColumnRt4TranslatedPalCommand : public PalRtCommand { public: using PalRtCommand::PalRtCommand; void Execute(DrawerThread *thread) override; };
class DrawColumnRt1AddPalCommand : public PalRtCommand { public: using PalRtCommand::PalRtCommand; void Execute(DrawerThread *thread) override; };
class DrawColumnRt4AddPalCommand : public PalRtCommand { public: using PalRtCommand::PalRtCommand; void Execute(DrawerThread *thread) override; };
class DrawColumnRt1AddTranslatedPalCommand : public PalRtCommand { public: using PalRtCommand::PalRtCommand; void Execute(DrawerThread *thread) override; };
class DrawColumnRt4AddTranslatedPalCommand : public PalRtCommand { public: using PalRtCommand::PalRtCommand; void Execute(DrawerThread *thread) override; };
//class DrawColumnRt1AddTranslatedPalCommand : public PalRtCommand { public: using PalRtCommand::PalRtCommand; void Execute(DrawerThread *thread) override; };
//class DrawColumnRt4AddTranslatedPalCommand : public PalRtCommand { public: using PalRtCommand::PalRtCommand; void Execute(DrawerThread *thread) override; };
class DrawColumnRt1ShadedPalCommand : public PalRtCommand { public: using PalRtCommand::PalRtCommand; void Execute(DrawerThread *thread) override; };
class DrawColumnRt4ShadedPalCommand : public PalRtCommand { public: using PalRtCommand::PalRtCommand; void Execute(DrawerThread *thread) override; };
class DrawColumnRt1AddClampPalCommand : public PalRtCommand { public: using PalRtCommand::PalRtCommand; void Execute(DrawerThread *thread) override; };
class DrawColumnRt4AddClampPalCommand : public PalRtCommand { public: using PalRtCommand::PalRtCommand; void Execute(DrawerThread *thread) override; };
class DrawColumnRt1AddClampTranslatedPalCommand : public PalRtCommand { public: using PalRtCommand::PalRtCommand; void Execute(DrawerThread *thread) override; };
class DrawColumnRt4AddClampTranslatedPalCommand : public PalRtCommand { public: using PalRtCommand::PalRtCommand; void Execute(DrawerThread *thread) override; };
//class DrawColumnRt1AddClampTranslatedPalCommand : public PalRtCommand { public: using PalRtCommand::PalRtCommand; void Execute(DrawerThread *thread) override; };
//class DrawColumnRt4AddClampTranslatedPalCommand : public PalRtCommand { public: using PalRtCommand::PalRtCommand; void Execute(DrawerThread *thread) override; };
class DrawColumnRt1SubClampPalCommand : public PalRtCommand { public: using PalRtCommand::PalRtCommand; void Execute(DrawerThread *thread) override; };
class DrawColumnRt4SubClampPalCommand : public PalRtCommand { public: using PalRtCommand::PalRtCommand; void Execute(DrawerThread *thread) override; };
class DrawColumnRt1SubClampTranslatedPalCommand : public PalRtCommand { public: using PalRtCommand::PalRtCommand; void Execute(DrawerThread *thread) override; };
class DrawColumnRt4SubClampTranslatedPalCommand : public PalRtCommand { public: using PalRtCommand::PalRtCommand; void Execute(DrawerThread *thread) override; };
//class DrawColumnRt1SubClampTranslatedPalCommand : public PalRtCommand { public: using PalRtCommand::PalRtCommand; void Execute(DrawerThread *thread) override; };
//class DrawColumnRt4SubClampTranslatedPalCommand : public PalRtCommand { public: using PalRtCommand::PalRtCommand; void Execute(DrawerThread *thread) override; };
class DrawColumnRt1RevSubClampPalCommand : public PalRtCommand { public: using PalRtCommand::PalRtCommand; void Execute(DrawerThread *thread) override; };
class DrawColumnRt4RevSubClampPalCommand : public PalRtCommand { public: using PalRtCommand::PalRtCommand; void Execute(DrawerThread *thread) override; };
class DrawColumnRt1RevSubClampTranslatedPalCommand : public PalRtCommand { public: using PalRtCommand::PalRtCommand; void Execute(DrawerThread *thread) override; };
class DrawColumnRt4RevSubClampTranslatedPalCommand : public PalRtCommand { public: using PalRtCommand::PalRtCommand; void Execute(DrawerThread *thread) override; };
//class DrawColumnRt1RevSubClampTranslatedPalCommand : public PalRtCommand { public: using PalRtCommand::PalRtCommand; void Execute(DrawerThread *thread) override; };
//class DrawColumnRt4RevSubClampTranslatedPalCommand : public PalRtCommand { public: using PalRtCommand::PalRtCommand; void Execute(DrawerThread *thread) override; };
}

View File

@ -622,7 +622,10 @@ namespace swrenderer
for (int y = 3; y >= 0; y--)
horizspan[y] = dc_ctspan[y] = &dc_tspans[y][0];
DrawerCommandQueue::QueueCommand<RtInitColsRGBACommand>(buffer);
if (r_swtruecolor)
DrawerCommandQueue::QueueCommand<RtInitColsRGBACommand>(buffer);
else
DrawerCommandQueue::QueueCommand<RtInitColsPalCommand>(buffer);
}
void rt_span_coverage(int x, int start, int stop)
@ -861,10 +864,17 @@ namespace swrenderer
(*span)[1] = dc_yh;
*span += 2;
if (drawer_needs_pal_input || !r_swtruecolor)
DrawerCommandQueue::QueueCommand<DrawColumnHorizRGBACommand<uint8_t>>();
if (r_swtruecolor)
{
if (drawer_needs_pal_input)
DrawerCommandQueue::QueueCommand<DrawColumnHorizRGBACommand<uint8_t>>();
else
DrawerCommandQueue::QueueCommand<DrawColumnHorizRGBACommand<uint32_t>>();
}
else
DrawerCommandQueue::QueueCommand<DrawColumnHorizRGBACommand<uint32_t>>();
{
DrawerCommandQueue::QueueCommand<DrawColumnHorizPalCommand>();
}
}
// Copies one span at hx to the screen at sx.
@ -879,11 +889,18 @@ namespace swrenderer
// Copies all four spans to the screen starting at sx.
void rt_copy4cols(int sx, int yl, int yh)
{
// To do: we could do this with SSE using __m128i
rt_copy1col(0, sx, yl, yh);
rt_copy1col(1, sx + 1, yl, yh);
rt_copy1col(2, sx + 2, yl, yh);
rt_copy1col(3, sx + 3, yl, yh);
if (r_swtruecolor)
{
// To do: we could do this with SSE using __m128i
rt_copy1col(0, sx, yl, yh);
rt_copy1col(1, sx + 1, yl, yh);
rt_copy1col(2, sx + 2, yl, yh);
rt_copy1col(3, sx + 3, yl, yh);
}
else
{
DrawerCommandQueue::QueueCommand<DrawColumnRt4CopyPalCommand>(0, sx, yl, yh);
}
}
// Maps one span at hx to the screen at sx.
@ -944,18 +961,28 @@ namespace swrenderer
void rt_tlateadd1col(int hx, int sx, int yl, int yh)
{
if (r_swtruecolor)
{
DrawerCommandQueue::QueueCommand<DrawColumnRt1AddClampTranslatedLLVMCommand>(hx, sx, yl, yh);
}
else
DrawerCommandQueue::QueueCommand<DrawColumnRt1AddTranslatedPalCommand>(hx, sx, yl, yh);
{
DrawerCommandQueue::QueueCommand<DrawColumnRt1TranslatedPalCommand>(hx, sx, yl, yh);
rt_add1col(hx, sx, yl, yh);
}
}
// Translates and adds all four spans to the screen starting at sx without clamping.
void rt_tlateadd4cols(int sx, int yl, int yh)
{
if (r_swtruecolor)
{
DrawerCommandQueue::QueueCommand<DrawColumnRt4AddClampTranslatedLLVMCommand>(0, sx, yl, yh);
}
else
DrawerCommandQueue::QueueCommand<DrawColumnRt4AddTranslatedPalCommand>(0, sx, yl, yh);
{
DrawerCommandQueue::QueueCommand<DrawColumnRt4TranslatedPalCommand>(0, sx, yl, yh);
rt_add4cols(sx, yl, yh);
}
}
// Shades one span at hx to the screen at sx.
@ -998,18 +1025,28 @@ namespace swrenderer
void rt_tlateaddclamp1col(int hx, int sx, int yl, int yh)
{
if (r_swtruecolor)
{
DrawerCommandQueue::QueueCommand<DrawColumnRt1AddClampTranslatedLLVMCommand>(hx, sx, yl, yh);
}
else
DrawerCommandQueue::QueueCommand<DrawColumnRt1AddClampTranslatedPalCommand>(hx, sx, yl, yh);
{
DrawerCommandQueue::QueueCommand<DrawColumnRt1TranslatedPalCommand>(hx, sx, yl, yh);
rt_addclamp1col(hx, sx, yl, yh);
}
}
// Translates and adds all four spans to the screen starting at sx with clamping.
void rt_tlateaddclamp4cols(int sx, int yl, int yh)
{
if (r_swtruecolor)
{
DrawerCommandQueue::QueueCommand<DrawColumnRt4AddClampTranslatedLLVMCommand>(0, sx, yl, yh);
}
else
DrawerCommandQueue::QueueCommand<DrawColumnRt4AddClampTranslatedPalCommand>(0, sx, yl, yh);
{
DrawerCommandQueue::QueueCommand<DrawColumnRt4TranslatedPalCommand>(0, sx, yl, yh);
rt_addclamp4cols(sx, yl, yh);
}
}
// Subtracts one span at hx to the screen at sx with clamping.
@ -1034,18 +1071,28 @@ namespace swrenderer
void rt_tlatesubclamp1col(int hx, int sx, int yl, int yh)
{
if (r_swtruecolor)
{
DrawerCommandQueue::QueueCommand<DrawColumnRt1SubClampTranslatedLLVMCommand>(hx, sx, yl, yh);
}
else
DrawerCommandQueue::QueueCommand<DrawColumnRt1SubClampTranslatedPalCommand>(hx, sx, yl, yh);
{
DrawerCommandQueue::QueueCommand<DrawColumnRt1TranslatedPalCommand>(hx, sx, yl, yh);
rt_subclamp1col(hx, sx, yl, yh);
}
}
// Translates and subtracts all four spans to the screen starting at sx with clamping.
void rt_tlatesubclamp4cols(int sx, int yl, int yh)
{
if (r_swtruecolor)
{
DrawerCommandQueue::QueueCommand<DrawColumnRt4SubClampTranslatedLLVMCommand>(0, sx, yl, yh);
}
else
DrawerCommandQueue::QueueCommand<DrawColumnRt4SubClampTranslatedPalCommand>(0, sx, yl, yh);
{
DrawerCommandQueue::QueueCommand<DrawColumnRt4TranslatedPalCommand>(0, sx, yl, yh);
rt_subclamp4cols(sx, yl, yh);
}
}
// Subtracts one span at hx from the screen at sx with clamping.
@ -1070,18 +1117,28 @@ namespace swrenderer
void rt_tlaterevsubclamp1col(int hx, int sx, int yl, int yh)
{
if (r_swtruecolor)
{
DrawerCommandQueue::QueueCommand<DrawColumnRt1RevSubClampTranslatedLLVMCommand>(hx, sx, yl, yh);
}
else
DrawerCommandQueue::QueueCommand<DrawColumnRt1RevSubClampTranslatedPalCommand>(hx, sx, yl, yh);
{
DrawerCommandQueue::QueueCommand<DrawColumnRt1TranslatedPalCommand>(hx, sx, yl, yh);
rt_revsubclamp1col(hx, sx, yl, yh);
}
}
// Translates and subtracts all four spans from the screen starting at sx with clamping.
void rt_tlaterevsubclamp4cols(int sx, int yl, int yh)
{
if (r_swtruecolor)
{
DrawerCommandQueue::QueueCommand<DrawColumnRt4RevSubClampTranslatedLLVMCommand>(0, sx, yl, yh);
}
else
DrawerCommandQueue::QueueCommand<DrawColumnRt4RevSubClampTranslatedPalCommand>(0, sx, yl, yh);
{
DrawerCommandQueue::QueueCommand<DrawColumnRt4TranslatedPalCommand>(0, sx, yl, yh);
rt_revsubclamp4cols(sx, yl, yh);
}
}
uint32_t vlinec1()

View File

@ -1,3 +1,42 @@
/*
** r_drawt.cpp
** Faster column drawers for modern processors
**
**---------------------------------------------------------------------------
** Copyright 1998-2006 Randy Heit
** All rights reserved.
**
** Redistribution and use in source and binary forms, with or without
** modification, are permitted provided that the following conditions
** are met:
**
** 1. Redistributions of source code must retain the above copyright
** notice, this list of conditions and the following disclaimer.
** 2. Redistributions in binary form must reproduce the above copyright
** notice, this list of conditions and the following disclaimer in the
** documentation and/or other materials provided with the distribution.
** 3. The name of the author may not be used to endorse or promote products
** derived from this software without specific prior written permission.
**
** THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
** IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
** OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
** IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
** INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
** NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
** DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
** THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
** (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
** THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
**---------------------------------------------------------------------------
**
** These functions stretch columns into a temporary buffer and then
** map them to the screen. On modern machines, this is faster than drawing
** them directly to the screen.
**
** Will I be able to even understand any of this if I come back to it later?
** Let's hope so. :-)
*/
#include "templates.h"
#include "doomtype.h"
@ -9,105 +48,817 @@
#include "v_video.h"
#include "r_draw_pal.h"
// I should have commented this stuff better.
//
// dc_temp is the buffer R_DrawColumnHoriz writes into.
// dc_tspans points into it.
// dc_ctspan points into dc_tspans.
// horizspan also points into dc_tspans.
// dc_ctspan is advanced while drawing into dc_temp.
// horizspan is advanced up to dc_ctspan when drawing from dc_temp to the screen.
namespace swrenderer
{
PalRtCommand::PalRtCommand(int hx, int sx, int yl, int yh)
RtInitColsPalCommand::RtInitColsPalCommand(uint8_t *buff) : buff(buff)
{
}
void RtInitColsPalCommand::Execute(DrawerThread *thread)
{
thread->dc_temp = buff == nullptr ? thread->dc_temp_buff : buff;
}
/////////////////////////////////////////////////////////////////////
PalColumnHorizCommand::PalColumnHorizCommand()
{
using namespace drawerargs;
_source = dc_source;
_iscale = dc_iscale;
_texturefrac = dc_texturefrac;
_count = dc_count;
_color = dc_color;
_x = dc_x;
_yl = dc_yl;
}
void DrawColumnHorizPalCommand::Execute(DrawerThread *thread)
{
int count = _count;
uint8_t *dest;
fixed_t fracstep;
fixed_t frac;
if (count <= 0)
return;
{
int x = _x & 3;
dest = &thread->dc_temp[x + 4 * _yl];
}
fracstep = _iscale;
frac = _texturefrac;
const uint8_t *source = _source;
if (count & 1) {
*dest = source[frac >> FRACBITS]; dest += 4; frac += fracstep;
}
if (count & 2) {
dest[0] = source[frac >> FRACBITS]; frac += fracstep;
dest[4] = source[frac >> FRACBITS]; frac += fracstep;
dest += 8;
}
if (count & 4) {
dest[0] = source[frac >> FRACBITS]; frac += fracstep;
dest[4] = source[frac >> FRACBITS]; frac += fracstep;
dest[8] = source[frac >> FRACBITS]; frac += fracstep;
dest[12] = source[frac >> FRACBITS]; frac += fracstep;
dest += 16;
}
count >>= 3;
if (!count) return;
do
{
dest[0] = source[frac >> FRACBITS]; frac += fracstep;
dest[4] = source[frac >> FRACBITS]; frac += fracstep;
dest[8] = source[frac >> FRACBITS]; frac += fracstep;
dest[12] = source[frac >> FRACBITS]; frac += fracstep;
dest[16] = source[frac >> FRACBITS]; frac += fracstep;
dest[20] = source[frac >> FRACBITS]; frac += fracstep;
dest[24] = source[frac >> FRACBITS]; frac += fracstep;
dest[28] = source[frac >> FRACBITS]; frac += fracstep;
dest += 32;
} while (--count);
}
void FillColumnHorizPalCommand::Execute(DrawerThread *thread)
{
int count = _count;
uint8_t color = _color;
uint8_t *dest;
if (count <= 0)
return;
{
int x = _x & 3;
dest = &thread->dc_temp[x + 4 * _yl];
}
if (count & 1) {
*dest = color;
dest += 4;
}
if (!(count >>= 1))
return;
do {
dest[0] = color; dest[4] = color;
dest += 8;
} while (--count);
}
/////////////////////////////////////////////////////////////////////
PalRtCommand::PalRtCommand(int hx, int sx, int yl, int yh) : hx(hx), sx(sx), yl(yl), yh(yh)
{
using namespace drawerargs;
_destorg = dc_destorg;
_pitch = dc_pitch;
_colormap = dc_colormap;
_srcblend = dc_srcblend;
_destblend = dc_destblend;
_translation = dc_translation;
_color = dc_color;
}
void DrawColumnRt1CopyPalCommand::Execute(DrawerThread *thread)
{
uint8_t *source;
uint8_t *dest;
int count;
int pitch;
count = yh-yl;
if (count < 0)
return;
count++;
dest = ylookup[yl] + sx + _destorg;
source = &thread->dc_temp[yl*4 + hx];
pitch = _pitch;
if (count & 1) {
*dest = *source;
source += 4;
dest += pitch;
}
if (count & 2) {
dest[0] = source[0];
dest[pitch] = source[4];
source += 8;
dest += pitch*2;
}
if (!(count >>= 2))
return;
do {
dest[0] = source[0];
dest[pitch] = source[4];
dest[pitch*2] = source[8];
dest[pitch*3] = source[12];
source += 16;
dest += pitch*4;
} while (--count);
}
void DrawColumnRt4CopyPalCommand::Execute(DrawerThread *thread)
{
int *source;
int *dest;
int count;
int pitch;
count = yh-yl;
if (count < 0)
return;
count++;
dest = (int *)(ylookup[yl] + sx + _destorg);
source = (int *)(&thread->dc_temp[yl*4]);
pitch = _pitch/sizeof(int);
if (count & 1) {
*dest = *source;
source += 4/sizeof(int);
dest += pitch;
}
if (!(count >>= 1))
return;
do {
dest[0] = source[0];
dest[pitch] = source[4/sizeof(int)];
source += 8/sizeof(int);
dest += pitch*2;
} while (--count);
}
void DrawColumnRt1PalCommand::Execute(DrawerThread *thread)
{
const uint8_t *colormap;
uint8_t *source;
uint8_t *dest;
int count;
int pitch;
count = yh-yl;
if (count < 0)
return;
count++;
colormap = _colormap;
dest = ylookup[yl] + sx + _destorg;
source = &thread->dc_temp[yl*4 + hx];
pitch = _pitch;
if (count & 1) {
*dest = colormap[*source];
source += 4;
dest += pitch;
}
if (!(count >>= 1))
return;
do {
dest[0] = colormap[source[0]];
dest[pitch] = colormap[source[4]];
source += 8;
dest += pitch*2;
} while (--count);
}
void DrawColumnRt4PalCommand::Execute(DrawerThread *thread)
{
const uint8_t *colormap;
uint8_t *source;
uint8_t *dest;
int count;
int pitch;
count = yh-yl;
if (count < 0)
return;
count++;
colormap = _colormap;
dest = ylookup[yl] + sx + _destorg;
source = &thread->dc_temp[yl*4];
pitch = _pitch;
if (count & 1) {
dest[0] = colormap[source[0]];
dest[1] = colormap[source[1]];
dest[2] = colormap[source[2]];
dest[3] = colormap[source[3]];
source += 4;
dest += pitch;
}
if (!(count >>= 1))
return;
do {
dest[0] = colormap[source[0]];
dest[1] = colormap[source[1]];
dest[2] = colormap[source[2]];
dest[3] = colormap[source[3]];
dest[pitch] = colormap[source[4]];
dest[pitch+1] = colormap[source[5]];
dest[pitch+2] = colormap[source[6]];
dest[pitch+3] = colormap[source[7]];
source += 8;
dest += pitch*2;
} while (--count);
}
void DrawColumnRt1TranslatedPalCommand::Execute(DrawerThread *thread)
{
int count = yh - yl + 1;
uint8_t *source = &thread->dc_temp[yl*4 + hx];
const uint8_t *translation = _translation;
// Things we do to hit the compiler's optimizer with a clue bat:
// 1. Parallelism is explicitly spelled out by using a separate
// C instruction for each assembly instruction. GCC lets me
// have four temporaries, but VC++ spills to the stack with
// more than two. Two is probably optimal, anyway.
// 2. The results of the translation lookups are explicitly
// stored in byte-sized variables. This causes the VC++ code
// to use byte mov instructions in most cases; for apparently
// random reasons, it will use movzx for some places. GCC
// ignores this and uses movzx always.
// Do 8 rows at a time.
for (int count8 = count >> 3; count8; --count8)
{
int c0, c1;
uint8_t b0, b1;
c0 = source[0]; c1 = source[4];
b0 = translation[c0]; b1 = translation[c1];
source[0] = b0; source[4] = b1;
c0 = source[8]; c1 = source[12];
b0 = translation[c0]; b1 = translation[c1];
source[8] = b0; source[12] = b1;
c0 = source[16]; c1 = source[20];
b0 = translation[c0]; b1 = translation[c1];
source[16] = b0; source[20] = b1;
c0 = source[24]; c1 = source[28];
b0 = translation[c0]; b1 = translation[c1];
source[24] = b0; source[28] = b1;
source += 32;
}
// Finish by doing 1 row at a time.
for (count &= 7; count; --count, source += 4)
{
source[0] = translation[source[0]];
}
}
void DrawColumnRt4TranslatedPalCommand::Execute(DrawerThread *thread)
{
int count = yh - yl + 1;
uint8_t *source = &thread->dc_temp[yl*4];
const uint8_t *translation = _translation;
int c0, c1;
uint8_t b0, b1;
// Do 2 rows at a time.
for (int count8 = count >> 1; count8; --count8)
{
c0 = source[0]; c1 = source[1];
b0 = translation[c0]; b1 = translation[c1];
source[0] = b0; source[1] = b1;
c0 = source[2]; c1 = source[3];
b0 = translation[c0]; b1 = translation[c1];
source[2] = b0; source[3] = b1;
c0 = source[4]; c1 = source[5];
b0 = translation[c0]; b1 = translation[c1];
source[4] = b0; source[5] = b1;
c0 = source[6]; c1 = source[7];
b0 = translation[c0]; b1 = translation[c1];
source[6] = b0; source[7] = b1;
source += 8;
}
// Do the final row if count was odd.
if (count & 1)
{
c0 = source[0]; c1 = source[1];
b0 = translation[c0]; b1 = translation[c1];
source[0] = b0; source[1] = b1;
c0 = source[2]; c1 = source[3];
b0 = translation[c0]; b1 = translation[c1];
source[2] = b0; source[3] = b1;
}
}
void DrawColumnRt1AddPalCommand::Execute(DrawerThread *thread)
{
const uint8_t *colormap;
uint8_t *source;
uint8_t *dest;
int count;
int pitch;
count = yh-yl;
if (count < 0)
return;
count++;
const uint32_t *fg2rgb = _srcblend;
const uint32_t *bg2rgb = _destblend;
dest = ylookup[yl] + sx + _destorg;
source = &thread->dc_temp[yl*4 + hx];
pitch = _pitch;
colormap = _colormap;
do {
uint32_t fg = colormap[*source];
uint32_t bg = *dest;
fg = fg2rgb[fg];
bg = bg2rgb[bg];
fg = (fg+bg) | 0x1f07c1f;
*dest = RGB32k.All[fg & (fg>>15)];
source += 4;
dest += pitch;
} while (--count);
}
void DrawColumnRt4AddPalCommand::Execute(DrawerThread *thread)
{
}
const uint8_t *colormap;
uint8_t *source;
uint8_t *dest;
int count;
int pitch;
void DrawColumnRt1AddTranslatedPalCommand::Execute(DrawerThread *thread)
{
}
count = yh-yl;
if (count < 0)
return;
count++;
void DrawColumnRt4AddTranslatedPalCommand::Execute(DrawerThread *thread)
{
const uint32_t *fg2rgb = _srcblend;
const uint32_t *bg2rgb = _destblend;
dest = ylookup[yl] + sx + _destorg;
source = &thread->dc_temp[yl*4];
pitch = _pitch;
colormap = _colormap;
do {
uint32_t fg = colormap[source[0]];
uint32_t bg = dest[0];
fg = fg2rgb[fg];
bg = bg2rgb[bg];
fg = (fg+bg) | 0x1f07c1f;
dest[0] = RGB32k.All[fg & (fg>>15)];
fg = colormap[source[1]];
bg = dest[1];
fg = fg2rgb[fg];
bg = bg2rgb[bg];
fg = (fg+bg) | 0x1f07c1f;
dest[1] = RGB32k.All[fg & (fg>>15)];
fg = colormap[source[2]];
bg = dest[2];
fg = fg2rgb[fg];
bg = bg2rgb[bg];
fg = (fg+bg) | 0x1f07c1f;
dest[2] = RGB32k.All[fg & (fg>>15)];
fg = colormap[source[3]];
bg = dest[3];
fg = fg2rgb[fg];
bg = bg2rgb[bg];
fg = (fg+bg) | 0x1f07c1f;
dest[3] = RGB32k.All[fg & (fg>>15)];
source += 4;
dest += pitch;
} while (--count);
}
void DrawColumnRt1ShadedPalCommand::Execute(DrawerThread *thread)
{
uint32_t *fgstart;
const uint8_t *colormap;
uint8_t *source;
uint8_t *dest;
int count;
int pitch;
count = yh-yl;
if (count < 0)
return;
count++;
fgstart = &Col2RGB8[0][_color];
colormap = _colormap;
dest = ylookup[yl] + sx + _destorg;
source = &thread->dc_temp[yl*4 + hx];
pitch = _pitch;
do {
uint32_t val = colormap[*source];
uint32_t fg = fgstart[val<<8];
val = (Col2RGB8[64-val][*dest] + fg) | 0x1f07c1f;
*dest = RGB32k.All[val & (val>>15)];
source += 4;
dest += pitch;
} while (--count);
}
void DrawColumnRt4ShadedPalCommand::Execute(DrawerThread *thread)
{
uint32_t *fgstart;
const uint8_t *colormap;
uint8_t *source;
uint8_t *dest;
int count;
int pitch;
count = yh-yl;
if (count < 0)
return;
count++;
fgstart = &Col2RGB8[0][_color];
colormap = _colormap;
dest = ylookup[yl] + sx + _destorg;
source = &thread->dc_temp[yl*4];
pitch = _pitch;
do {
uint32_t val;
val = colormap[source[0]];
val = (Col2RGB8[64-val][dest[0]] + fgstart[val<<8]) | 0x1f07c1f;
dest[0] = RGB32k.All[val & (val>>15)];
val = colormap[source[1]];
val = (Col2RGB8[64-val][dest[1]] + fgstart[val<<8]) | 0x1f07c1f;
dest[1] = RGB32k.All[val & (val>>15)];
val = colormap[source[2]];
val = (Col2RGB8[64-val][dest[2]] + fgstart[val<<8]) | 0x1f07c1f;
dest[2] = RGB32k.All[val & (val>>15)];
val = colormap[source[3]];
val = (Col2RGB8[64-val][dest[3]] + fgstart[val<<8]) | 0x1f07c1f;
dest[3] = RGB32k.All[val & (val>>15)];
source += 4;
dest += pitch;
} while (--count);
}
void DrawColumnRt1AddClampPalCommand::Execute(DrawerThread *thread)
{
const uint8_t *colormap;
uint8_t *source;
uint8_t *dest;
int count;
int pitch;
count = yh-yl;
if (count < 0)
return;
count++;
const uint32_t *fg2rgb = _srcblend;
const uint32_t *bg2rgb = _destblend;
dest = ylookup[yl] + sx + _destorg;
source = &thread->dc_temp[yl*4 + hx];
pitch = _pitch;
colormap = _colormap;
do {
uint32_t a = fg2rgb[colormap[*source]] + bg2rgb[*dest];
uint32_t b = a;
a |= 0x01f07c1f;
b &= 0x40100400;
a &= 0x3fffffff;
b = b - (b >> 5);
a |= b;
*dest = RGB32k.All[(a>>15) & a];
source += 4;
dest += pitch;
} while (--count);
}
void DrawColumnRt4AddClampPalCommand::Execute(DrawerThread *thread)
{
}
const uint8_t *colormap;
uint8_t *source;
uint8_t *dest;
int count;
int pitch;
void DrawColumnRt1AddClampTranslatedPalCommand::Execute(DrawerThread *thread)
{
}
count = yh-yl;
if (count < 0)
return;
count++;
void DrawColumnRt4AddClampTranslatedPalCommand::Execute(DrawerThread *thread)
{
dest = ylookup[yl] + sx + _destorg;
source = &thread->dc_temp[yl*4];
pitch = _pitch;
colormap = _colormap;
const uint32_t *fg2rgb = _srcblend;
const uint32_t *bg2rgb = _destblend;
do {
uint32_t a = fg2rgb[colormap[source[0]]] + bg2rgb[dest[0]];
uint32_t b = a;
a |= 0x01f07c1f;
b &= 0x40100400;
a &= 0x3fffffff;
b = b - (b >> 5);
a |= b;
dest[0] = RGB32k.All[(a>>15) & a];
a = fg2rgb[colormap[source[1]]] + bg2rgb[dest[1]];
b = a;
a |= 0x01f07c1f;
b &= 0x40100400;
a &= 0x3fffffff;
b = b - (b >> 5);
a |= b;
dest[1] = RGB32k.All[(a>>15) & a];
a = fg2rgb[colormap[source[2]]] + bg2rgb[dest[2]];
b = a;
a |= 0x01f07c1f;
b &= 0x40100400;
a &= 0x3fffffff;
b = b - (b >> 5);
a |= b;
dest[2] = RGB32k.All[(a>>15) & a];
a = fg2rgb[colormap[source[3]]] + bg2rgb[dest[3]];
b = a;
a |= 0x01f07c1f;
b &= 0x40100400;
a &= 0x3fffffff;
b = b - (b >> 5);
a |= b;
dest[3] = RGB32k.All[(a>>15) & a];
source += 4;
dest += pitch;
} while (--count);
}
void DrawColumnRt1SubClampPalCommand::Execute(DrawerThread *thread)
{
const uint8_t *colormap;
uint8_t *source;
uint8_t *dest;
int count;
int pitch;
count = yh-yl;
if (count < 0)
return;
count++;
const uint32_t *fg2rgb = _srcblend;
const uint32_t *bg2rgb = _destblend;
dest = ylookup[yl] + sx + _destorg;
source = &thread->dc_temp[yl*4 + hx];
pitch = _pitch;
colormap = _colormap;
do {
uint32_t a = (fg2rgb[colormap[*source]] | 0x40100400) - bg2rgb[*dest];
uint32_t b = a;
b &= 0x40100400;
b = b - (b >> 5);
a &= b;
a |= 0x01f07c1f;
*dest = RGB32k.All[(a>>15) & a];
source += 4;
dest += pitch;
} while (--count);
}
void DrawColumnRt4SubClampPalCommand::Execute(DrawerThread *thread)
{
}
const uint8_t *colormap;
uint8_t *source;
uint8_t *dest;
int count;
int pitch;
void DrawColumnRt1SubClampTranslatedPalCommand::Execute(DrawerThread *thread)
{
}
count = yh-yl;
if (count < 0)
return;
count++;
void DrawColumnRt4SubClampTranslatedPalCommand::Execute(DrawerThread *thread)
{
const uint32_t *fg2rgb = _srcblend;
const uint32_t *bg2rgb = _destblend;
dest = ylookup[yl] + sx + _destorg;
source = &thread->dc_temp[yl*4];
pitch = _pitch;
colormap = _colormap;
do {
uint32_t a = (fg2rgb[colormap[source[0]]] | 0x40100400) - bg2rgb[dest[0]];
uint32_t b = a;
b &= 0x40100400;
b = b - (b >> 5);
a &= b;
a |= 0x01f07c1f;
dest[0] = RGB32k.All[(a>>15) & a];
a = (fg2rgb[colormap[source[1]]] | 0x40100400) - bg2rgb[dest[1]];
b = a;
b &= 0x40100400;
b = b - (b >> 5);
a &= b;
a |= 0x01f07c1f;
dest[1] = RGB32k.All[(a>>15) & a];
a = (fg2rgb[colormap[source[2]]] | 0x40100400) - bg2rgb[dest[2]];
b = a;
b &= 0x40100400;
b = b - (b >> 5);
a &= b;
a |= 0x01f07c1f;
dest[2] = RGB32k.All[(a>>15) & a];
a = (fg2rgb[colormap[source[3]]] | 0x40100400) - bg2rgb[dest[3]];
b = a;
b &= 0x40100400;
b = b - (b >> 5);
a &= b;
a |= 0x01f07c1f;
dest[3] = RGB32k.All[(a>>15) & a];
source += 4;
dest += pitch;
} while (--count);
}
void DrawColumnRt1RevSubClampPalCommand::Execute(DrawerThread *thread)
{
const uint8_t *colormap;
uint8_t *source;
uint8_t *dest;
int count;
int pitch;
count = yh-yl;
if (count < 0)
return;
count++;
const uint32_t *fg2rgb = _srcblend;
const uint32_t *bg2rgb = _destblend;
dest = ylookup[yl] + sx + _destorg;
source = &thread->dc_temp[yl*4 + hx];
pitch = _pitch;
colormap = _colormap;
do {
uint32_t a = (bg2rgb[*dest] | 0x40100400) - fg2rgb[colormap[*source]];
uint32_t b = a;
b &= 0x40100400;
b = b - (b >> 5);
a &= b;
a |= 0x01f07c1f;
*dest = RGB32k.All[(a>>15) & a];
source += 4;
dest += pitch;
} while (--count);
}
void DrawColumnRt4RevSubClampPalCommand::Execute(DrawerThread *thread)
{
}
const uint8_t *colormap;
uint8_t *source;
uint8_t *dest;
int count;
int pitch;
void DrawColumnRt1RevSubClampTranslatedPalCommand::Execute(DrawerThread *thread)
{
}
count = yh-yl;
if (count < 0)
return;
count++;
void DrawColumnRt4RevSubClampTranslatedPalCommand::Execute(DrawerThread *thread)
{
const uint32_t *fg2rgb = _srcblend;
const uint32_t *bg2rgb = _destblend;
dest = ylookup[yl] + sx + _destorg;
source = &thread->dc_temp[yl*4];
pitch = _pitch;
colormap = _colormap;
do {
uint32_t a = (bg2rgb[dest[0]] | 0x40100400) - fg2rgb[colormap[source[0]]];
uint32_t b = a;
b &= 0x40100400;
b = b - (b >> 5);
a &= b;
a |= 0x01f07c1f;
dest[0] = RGB32k.All[(a>>15) & a];
a = (bg2rgb[dest[1]] | 0x40100400) - fg2rgb[colormap[source[1]]];
b = a;
b &= 0x40100400;
b = b - (b >> 5);
a &= b;
a |= 0x01f07c1f;
dest[1] = RGB32k.All[(a>>15) & a];
a = (bg2rgb[dest[2]] | 0x40100400) - fg2rgb[colormap[source[2]]];
b = a;
b &= 0x40100400;
b = b - (b >> 5);
a &= b;
a |= 0x01f07c1f;
dest[2] = RGB32k.All[(a>>15) & a];
a = (bg2rgb[dest[3]] | 0x40100400) - fg2rgb[colormap[source[3]]];
b = a;
b &= 0x40100400;
b = b - (b >> 5);
a &= b;
a |= 0x01f07c1f;
dest[3] = RGB32k.All[(a>>15) & a];
source += 4;
dest += pitch;
} while (--count);
}
}