diff --git a/src/r_draw_pal.h b/src/r_draw_pal.h index f1297a06b..f3b220f82 100644 --- a/src/r_draw_pal.h +++ b/src/r_draw_pal.h @@ -202,43 +202,85 @@ namespace swrenderer void Execute(DrawerThread *thread) override; }; - //class RtInitColsPalCommand : public DrawerCommand { public: void Execute(DrawerThread *thread) override; }; - //class DrawColumnHorizPalCommand : public DrawerCommand { public: void Execute(DrawerThread *thread) override; }; - class FillColumnHorizPalCommand : public DrawerCommand + class RtInitColsPalCommand : public DrawerCommand + { + public: + RtInitColsPalCommand(uint8_t *buff); + void Execute(DrawerThread *thread) override; + FString DebugInfo() override { return "RtInitColsPalCommand"; } + + private: + uint8_t *buff; + }; + + class PalColumnHorizCommand : public DrawerCommand + { + public: + PalColumnHorizCommand(); + + protected: + const uint8_t *_source; + fixed_t _iscale; + fixed_t _texturefrac; + int _count; + int _color; + int _x; + int _yl; + }; + + class DrawColumnHorizPalCommand : public PalColumnHorizCommand { public: void Execute(DrawerThread *thread) override; FString DebugInfo() override { return "FillColumnHorizPalCommand"; } }; - + + class FillColumnHorizPalCommand : public PalColumnHorizCommand + { + public: + void Execute(DrawerThread *thread) override; + FString DebugInfo() override { return "FillColumnHorizPalCommand"; } + }; + class PalRtCommand : public DrawerCommand { public: PalRtCommand(int hx, int sx, int yl, int yh); FString DebugInfo() override { return "PalRtCommand"; } + + protected: + int hx, sx, yl, yh; + uint8_t *_destorg; + int _pitch; + const uint8_t *_colormap; + const uint32_t *_srcblend; + const uint32_t *_destblend; + const uint8_t *_translation; + int _color; }; class DrawColumnRt1CopyPalCommand : public PalRtCommand { public: using PalRtCommand::PalRtCommand; void Execute(DrawerThread *thread) override; }; + class DrawColumnRt4CopyPalCommand : public PalRtCommand { public: using PalRtCommand::PalRtCommand; void Execute(DrawerThread *thread) override; }; class DrawColumnRt1PalCommand : public PalRtCommand { public: using PalRtCommand::PalRtCommand; void Execute(DrawerThread *thread) override; }; class DrawColumnRt4PalCommand : public PalRtCommand { public: using PalRtCommand::PalRtCommand; void Execute(DrawerThread *thread) override; }; class DrawColumnRt1TranslatedPalCommand : public PalRtCommand { public: using PalRtCommand::PalRtCommand; void Execute(DrawerThread *thread) override; }; class DrawColumnRt4TranslatedPalCommand : public PalRtCommand { public: using PalRtCommand::PalRtCommand; void Execute(DrawerThread *thread) override; }; class DrawColumnRt1AddPalCommand : public PalRtCommand { public: using PalRtCommand::PalRtCommand; void Execute(DrawerThread *thread) override; }; class DrawColumnRt4AddPalCommand : public PalRtCommand { public: using PalRtCommand::PalRtCommand; void Execute(DrawerThread *thread) override; }; - class DrawColumnRt1AddTranslatedPalCommand : public PalRtCommand { public: using PalRtCommand::PalRtCommand; void Execute(DrawerThread *thread) override; }; - class DrawColumnRt4AddTranslatedPalCommand : public PalRtCommand { public: using PalRtCommand::PalRtCommand; void Execute(DrawerThread *thread) override; }; + //class DrawColumnRt1AddTranslatedPalCommand : public PalRtCommand { public: using PalRtCommand::PalRtCommand; void Execute(DrawerThread *thread) override; }; + //class DrawColumnRt4AddTranslatedPalCommand : public PalRtCommand { public: using PalRtCommand::PalRtCommand; void Execute(DrawerThread *thread) override; }; class DrawColumnRt1ShadedPalCommand : public PalRtCommand { public: using PalRtCommand::PalRtCommand; void Execute(DrawerThread *thread) override; }; class DrawColumnRt4ShadedPalCommand : public PalRtCommand { public: using PalRtCommand::PalRtCommand; void Execute(DrawerThread *thread) override; }; class DrawColumnRt1AddClampPalCommand : public PalRtCommand { public: using PalRtCommand::PalRtCommand; void Execute(DrawerThread *thread) override; }; class DrawColumnRt4AddClampPalCommand : public PalRtCommand { public: using PalRtCommand::PalRtCommand; void Execute(DrawerThread *thread) override; }; - class DrawColumnRt1AddClampTranslatedPalCommand : public PalRtCommand { public: using PalRtCommand::PalRtCommand; void Execute(DrawerThread *thread) override; }; - class DrawColumnRt4AddClampTranslatedPalCommand : public PalRtCommand { public: using PalRtCommand::PalRtCommand; void Execute(DrawerThread *thread) override; }; + //class DrawColumnRt1AddClampTranslatedPalCommand : public PalRtCommand { public: using PalRtCommand::PalRtCommand; void Execute(DrawerThread *thread) override; }; + //class DrawColumnRt4AddClampTranslatedPalCommand : public PalRtCommand { public: using PalRtCommand::PalRtCommand; void Execute(DrawerThread *thread) override; }; class DrawColumnRt1SubClampPalCommand : public PalRtCommand { public: using PalRtCommand::PalRtCommand; void Execute(DrawerThread *thread) override; }; class DrawColumnRt4SubClampPalCommand : public PalRtCommand { public: using PalRtCommand::PalRtCommand; void Execute(DrawerThread *thread) override; }; - class DrawColumnRt1SubClampTranslatedPalCommand : public PalRtCommand { public: using PalRtCommand::PalRtCommand; void Execute(DrawerThread *thread) override; }; - class DrawColumnRt4SubClampTranslatedPalCommand : public PalRtCommand { public: using PalRtCommand::PalRtCommand; void Execute(DrawerThread *thread) override; }; + //class DrawColumnRt1SubClampTranslatedPalCommand : public PalRtCommand { public: using PalRtCommand::PalRtCommand; void Execute(DrawerThread *thread) override; }; + //class DrawColumnRt4SubClampTranslatedPalCommand : public PalRtCommand { public: using PalRtCommand::PalRtCommand; void Execute(DrawerThread *thread) override; }; class DrawColumnRt1RevSubClampPalCommand : public PalRtCommand { public: using PalRtCommand::PalRtCommand; void Execute(DrawerThread *thread) override; }; class DrawColumnRt4RevSubClampPalCommand : public PalRtCommand { public: using PalRtCommand::PalRtCommand; void Execute(DrawerThread *thread) override; }; - class DrawColumnRt1RevSubClampTranslatedPalCommand : public PalRtCommand { public: using PalRtCommand::PalRtCommand; void Execute(DrawerThread *thread) override; }; - class DrawColumnRt4RevSubClampTranslatedPalCommand : public PalRtCommand { public: using PalRtCommand::PalRtCommand; void Execute(DrawerThread *thread) override; }; + //class DrawColumnRt1RevSubClampTranslatedPalCommand : public PalRtCommand { public: using PalRtCommand::PalRtCommand; void Execute(DrawerThread *thread) override; }; + //class DrawColumnRt4RevSubClampTranslatedPalCommand : public PalRtCommand { public: using PalRtCommand::PalRtCommand; void Execute(DrawerThread *thread) override; }; } diff --git a/src/r_draw_tc.cpp b/src/r_draw_tc.cpp index 2326cab91..77d450006 100644 --- a/src/r_draw_tc.cpp +++ b/src/r_draw_tc.cpp @@ -622,7 +622,10 @@ namespace swrenderer for (int y = 3; y >= 0; y--) horizspan[y] = dc_ctspan[y] = &dc_tspans[y][0]; - DrawerCommandQueue::QueueCommand(buffer); + if (r_swtruecolor) + DrawerCommandQueue::QueueCommand(buffer); + else + DrawerCommandQueue::QueueCommand(buffer); } void rt_span_coverage(int x, int start, int stop) @@ -861,10 +864,17 @@ namespace swrenderer (*span)[1] = dc_yh; *span += 2; - if (drawer_needs_pal_input || !r_swtruecolor) - DrawerCommandQueue::QueueCommand>(); + if (r_swtruecolor) + { + if (drawer_needs_pal_input) + DrawerCommandQueue::QueueCommand>(); + else + DrawerCommandQueue::QueueCommand>(); + } else - DrawerCommandQueue::QueueCommand>(); + { + DrawerCommandQueue::QueueCommand(); + } } // Copies one span at hx to the screen at sx. @@ -879,11 +889,18 @@ namespace swrenderer // Copies all four spans to the screen starting at sx. void rt_copy4cols(int sx, int yl, int yh) { - // To do: we could do this with SSE using __m128i - rt_copy1col(0, sx, yl, yh); - rt_copy1col(1, sx + 1, yl, yh); - rt_copy1col(2, sx + 2, yl, yh); - rt_copy1col(3, sx + 3, yl, yh); + if (r_swtruecolor) + { + // To do: we could do this with SSE using __m128i + rt_copy1col(0, sx, yl, yh); + rt_copy1col(1, sx + 1, yl, yh); + rt_copy1col(2, sx + 2, yl, yh); + rt_copy1col(3, sx + 3, yl, yh); + } + else + { + DrawerCommandQueue::QueueCommand(0, sx, yl, yh); + } } // Maps one span at hx to the screen at sx. @@ -944,18 +961,28 @@ namespace swrenderer void rt_tlateadd1col(int hx, int sx, int yl, int yh) { if (r_swtruecolor) + { DrawerCommandQueue::QueueCommand(hx, sx, yl, yh); + } else - DrawerCommandQueue::QueueCommand(hx, sx, yl, yh); + { + DrawerCommandQueue::QueueCommand(hx, sx, yl, yh); + rt_add1col(hx, sx, yl, yh); + } } // Translates and adds all four spans to the screen starting at sx without clamping. void rt_tlateadd4cols(int sx, int yl, int yh) { if (r_swtruecolor) + { DrawerCommandQueue::QueueCommand(0, sx, yl, yh); + } else - DrawerCommandQueue::QueueCommand(0, sx, yl, yh); + { + DrawerCommandQueue::QueueCommand(0, sx, yl, yh); + rt_add4cols(sx, yl, yh); + } } // Shades one span at hx to the screen at sx. @@ -998,18 +1025,28 @@ namespace swrenderer void rt_tlateaddclamp1col(int hx, int sx, int yl, int yh) { if (r_swtruecolor) + { DrawerCommandQueue::QueueCommand(hx, sx, yl, yh); + } else - DrawerCommandQueue::QueueCommand(hx, sx, yl, yh); + { + DrawerCommandQueue::QueueCommand(hx, sx, yl, yh); + rt_addclamp1col(hx, sx, yl, yh); + } } // Translates and adds all four spans to the screen starting at sx with clamping. void rt_tlateaddclamp4cols(int sx, int yl, int yh) { if (r_swtruecolor) + { DrawerCommandQueue::QueueCommand(0, sx, yl, yh); + } else - DrawerCommandQueue::QueueCommand(0, sx, yl, yh); + { + DrawerCommandQueue::QueueCommand(0, sx, yl, yh); + rt_addclamp4cols(sx, yl, yh); + } } // Subtracts one span at hx to the screen at sx with clamping. @@ -1034,18 +1071,28 @@ namespace swrenderer void rt_tlatesubclamp1col(int hx, int sx, int yl, int yh) { if (r_swtruecolor) + { DrawerCommandQueue::QueueCommand(hx, sx, yl, yh); + } else - DrawerCommandQueue::QueueCommand(hx, sx, yl, yh); + { + DrawerCommandQueue::QueueCommand(hx, sx, yl, yh); + rt_subclamp1col(hx, sx, yl, yh); + } } // Translates and subtracts all four spans to the screen starting at sx with clamping. void rt_tlatesubclamp4cols(int sx, int yl, int yh) { if (r_swtruecolor) + { DrawerCommandQueue::QueueCommand(0, sx, yl, yh); + } else - DrawerCommandQueue::QueueCommand(0, sx, yl, yh); + { + DrawerCommandQueue::QueueCommand(0, sx, yl, yh); + rt_subclamp4cols(sx, yl, yh); + } } // Subtracts one span at hx from the screen at sx with clamping. @@ -1070,18 +1117,28 @@ namespace swrenderer void rt_tlaterevsubclamp1col(int hx, int sx, int yl, int yh) { if (r_swtruecolor) + { DrawerCommandQueue::QueueCommand(hx, sx, yl, yh); + } else - DrawerCommandQueue::QueueCommand(hx, sx, yl, yh); + { + DrawerCommandQueue::QueueCommand(hx, sx, yl, yh); + rt_revsubclamp1col(hx, sx, yl, yh); + } } // Translates and subtracts all four spans from the screen starting at sx with clamping. void rt_tlaterevsubclamp4cols(int sx, int yl, int yh) { if (r_swtruecolor) + { DrawerCommandQueue::QueueCommand(0, sx, yl, yh); + } else - DrawerCommandQueue::QueueCommand(0, sx, yl, yh); + { + DrawerCommandQueue::QueueCommand(0, sx, yl, yh); + rt_revsubclamp4cols(sx, yl, yh); + } } uint32_t vlinec1() diff --git a/src/r_drawt_pal.cpp b/src/r_drawt_pal.cpp index b35046285..b9f0c378d 100644 --- a/src/r_drawt_pal.cpp +++ b/src/r_drawt_pal.cpp @@ -1,3 +1,42 @@ +/* +** r_drawt.cpp +** Faster column drawers for modern processors +** +**--------------------------------------------------------------------------- +** Copyright 1998-2006 Randy Heit +** All rights reserved. +** +** Redistribution and use in source and binary forms, with or without +** modification, are permitted provided that the following conditions +** are met: +** +** 1. Redistributions of source code must retain the above copyright +** notice, this list of conditions and the following disclaimer. +** 2. Redistributions in binary form must reproduce the above copyright +** notice, this list of conditions and the following disclaimer in the +** documentation and/or other materials provided with the distribution. +** 3. The name of the author may not be used to endorse or promote products +** derived from this software without specific prior written permission. +** +** THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR +** IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES +** OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. +** IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, +** INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT +** NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +** DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +** THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +** (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF +** THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +**--------------------------------------------------------------------------- +** +** These functions stretch columns into a temporary buffer and then +** map them to the screen. On modern machines, this is faster than drawing +** them directly to the screen. +** +** Will I be able to even understand any of this if I come back to it later? +** Let's hope so. :-) +*/ #include "templates.h" #include "doomtype.h" @@ -9,105 +48,817 @@ #include "v_video.h" #include "r_draw_pal.h" +// I should have commented this stuff better. +// +// dc_temp is the buffer R_DrawColumnHoriz writes into. +// dc_tspans points into it. +// dc_ctspan points into dc_tspans. +// horizspan also points into dc_tspans. + +// dc_ctspan is advanced while drawing into dc_temp. +// horizspan is advanced up to dc_ctspan when drawing from dc_temp to the screen. + namespace swrenderer { - PalRtCommand::PalRtCommand(int hx, int sx, int yl, int yh) + RtInitColsPalCommand::RtInitColsPalCommand(uint8_t *buff) : buff(buff) { } + void RtInitColsPalCommand::Execute(DrawerThread *thread) + { + thread->dc_temp = buff == nullptr ? thread->dc_temp_buff : buff; + } + + ///////////////////////////////////////////////////////////////////// + + PalColumnHorizCommand::PalColumnHorizCommand() + { + using namespace drawerargs; + + _source = dc_source; + _iscale = dc_iscale; + _texturefrac = dc_texturefrac; + _count = dc_count; + _color = dc_color; + _x = dc_x; + _yl = dc_yl; + } + + void DrawColumnHorizPalCommand::Execute(DrawerThread *thread) + { + int count = _count; + uint8_t *dest; + fixed_t fracstep; + fixed_t frac; + + if (count <= 0) + return; + + { + int x = _x & 3; + dest = &thread->dc_temp[x + 4 * _yl]; + } + fracstep = _iscale; + frac = _texturefrac; + + const uint8_t *source = _source; + + if (count & 1) { + *dest = source[frac >> FRACBITS]; dest += 4; frac += fracstep; + } + if (count & 2) { + dest[0] = source[frac >> FRACBITS]; frac += fracstep; + dest[4] = source[frac >> FRACBITS]; frac += fracstep; + dest += 8; + } + if (count & 4) { + dest[0] = source[frac >> FRACBITS]; frac += fracstep; + dest[4] = source[frac >> FRACBITS]; frac += fracstep; + dest[8] = source[frac >> FRACBITS]; frac += fracstep; + dest[12] = source[frac >> FRACBITS]; frac += fracstep; + dest += 16; + } + count >>= 3; + if (!count) return; + + do + { + dest[0] = source[frac >> FRACBITS]; frac += fracstep; + dest[4] = source[frac >> FRACBITS]; frac += fracstep; + dest[8] = source[frac >> FRACBITS]; frac += fracstep; + dest[12] = source[frac >> FRACBITS]; frac += fracstep; + dest[16] = source[frac >> FRACBITS]; frac += fracstep; + dest[20] = source[frac >> FRACBITS]; frac += fracstep; + dest[24] = source[frac >> FRACBITS]; frac += fracstep; + dest[28] = source[frac >> FRACBITS]; frac += fracstep; + dest += 32; + } while (--count); + } + void FillColumnHorizPalCommand::Execute(DrawerThread *thread) { + int count = _count; + uint8_t color = _color; + uint8_t *dest; + + if (count <= 0) + return; + + { + int x = _x & 3; + dest = &thread->dc_temp[x + 4 * _yl]; + } + + if (count & 1) { + *dest = color; + dest += 4; + } + if (!(count >>= 1)) + return; + do { + dest[0] = color; dest[4] = color; + dest += 8; + } while (--count); + } + + ///////////////////////////////////////////////////////////////////// + + PalRtCommand::PalRtCommand(int hx, int sx, int yl, int yh) : hx(hx), sx(sx), yl(yl), yh(yh) + { + using namespace drawerargs; + + _destorg = dc_destorg; + _pitch = dc_pitch; + _colormap = dc_colormap; + _srcblend = dc_srcblend; + _destblend = dc_destblend; + _translation = dc_translation; + _color = dc_color; } void DrawColumnRt1CopyPalCommand::Execute(DrawerThread *thread) { + uint8_t *source; + uint8_t *dest; + int count; + int pitch; + + count = yh-yl; + if (count < 0) + return; + count++; + + dest = ylookup[yl] + sx + _destorg; + source = &thread->dc_temp[yl*4 + hx]; + pitch = _pitch; + + if (count & 1) { + *dest = *source; + source += 4; + dest += pitch; + } + if (count & 2) { + dest[0] = source[0]; + dest[pitch] = source[4]; + source += 8; + dest += pitch*2; + } + if (!(count >>= 2)) + return; + + do { + dest[0] = source[0]; + dest[pitch] = source[4]; + dest[pitch*2] = source[8]; + dest[pitch*3] = source[12]; + source += 16; + dest += pitch*4; + } while (--count); + } + + void DrawColumnRt4CopyPalCommand::Execute(DrawerThread *thread) + { + int *source; + int *dest; + int count; + int pitch; + + count = yh-yl; + if (count < 0) + return; + count++; + + dest = (int *)(ylookup[yl] + sx + _destorg); + source = (int *)(&thread->dc_temp[yl*4]); + pitch = _pitch/sizeof(int); + + if (count & 1) { + *dest = *source; + source += 4/sizeof(int); + dest += pitch; + } + if (!(count >>= 1)) + return; + + do { + dest[0] = source[0]; + dest[pitch] = source[4/sizeof(int)]; + source += 8/sizeof(int); + dest += pitch*2; + } while (--count); } void DrawColumnRt1PalCommand::Execute(DrawerThread *thread) { + const uint8_t *colormap; + uint8_t *source; + uint8_t *dest; + int count; + int pitch; + + count = yh-yl; + if (count < 0) + return; + count++; + + colormap = _colormap; + dest = ylookup[yl] + sx + _destorg; + source = &thread->dc_temp[yl*4 + hx]; + pitch = _pitch; + + if (count & 1) { + *dest = colormap[*source]; + source += 4; + dest += pitch; + } + if (!(count >>= 1)) + return; + + do { + dest[0] = colormap[source[0]]; + dest[pitch] = colormap[source[4]]; + source += 8; + dest += pitch*2; + } while (--count); } void DrawColumnRt4PalCommand::Execute(DrawerThread *thread) { + const uint8_t *colormap; + uint8_t *source; + uint8_t *dest; + int count; + int pitch; + + count = yh-yl; + if (count < 0) + return; + count++; + + colormap = _colormap; + dest = ylookup[yl] + sx + _destorg; + source = &thread->dc_temp[yl*4]; + pitch = _pitch; + + if (count & 1) { + dest[0] = colormap[source[0]]; + dest[1] = colormap[source[1]]; + dest[2] = colormap[source[2]]; + dest[3] = colormap[source[3]]; + source += 4; + dest += pitch; + } + if (!(count >>= 1)) + return; + + do { + dest[0] = colormap[source[0]]; + dest[1] = colormap[source[1]]; + dest[2] = colormap[source[2]]; + dest[3] = colormap[source[3]]; + dest[pitch] = colormap[source[4]]; + dest[pitch+1] = colormap[source[5]]; + dest[pitch+2] = colormap[source[6]]; + dest[pitch+3] = colormap[source[7]]; + source += 8; + dest += pitch*2; + } while (--count); } void DrawColumnRt1TranslatedPalCommand::Execute(DrawerThread *thread) { + int count = yh - yl + 1; + uint8_t *source = &thread->dc_temp[yl*4 + hx]; + const uint8_t *translation = _translation; + + // Things we do to hit the compiler's optimizer with a clue bat: + // 1. Parallelism is explicitly spelled out by using a separate + // C instruction for each assembly instruction. GCC lets me + // have four temporaries, but VC++ spills to the stack with + // more than two. Two is probably optimal, anyway. + // 2. The results of the translation lookups are explicitly + // stored in byte-sized variables. This causes the VC++ code + // to use byte mov instructions in most cases; for apparently + // random reasons, it will use movzx for some places. GCC + // ignores this and uses movzx always. + + // Do 8 rows at a time. + for (int count8 = count >> 3; count8; --count8) + { + int c0, c1; + uint8_t b0, b1; + + c0 = source[0]; c1 = source[4]; + b0 = translation[c0]; b1 = translation[c1]; + source[0] = b0; source[4] = b1; + + c0 = source[8]; c1 = source[12]; + b0 = translation[c0]; b1 = translation[c1]; + source[8] = b0; source[12] = b1; + + c0 = source[16]; c1 = source[20]; + b0 = translation[c0]; b1 = translation[c1]; + source[16] = b0; source[20] = b1; + + c0 = source[24]; c1 = source[28]; + b0 = translation[c0]; b1 = translation[c1]; + source[24] = b0; source[28] = b1; + + source += 32; + } + // Finish by doing 1 row at a time. + for (count &= 7; count; --count, source += 4) + { + source[0] = translation[source[0]]; + } } void DrawColumnRt4TranslatedPalCommand::Execute(DrawerThread *thread) { + int count = yh - yl + 1; + uint8_t *source = &thread->dc_temp[yl*4]; + const uint8_t *translation = _translation; + int c0, c1; + uint8_t b0, b1; + + // Do 2 rows at a time. + for (int count8 = count >> 1; count8; --count8) + { + c0 = source[0]; c1 = source[1]; + b0 = translation[c0]; b1 = translation[c1]; + source[0] = b0; source[1] = b1; + + c0 = source[2]; c1 = source[3]; + b0 = translation[c0]; b1 = translation[c1]; + source[2] = b0; source[3] = b1; + + c0 = source[4]; c1 = source[5]; + b0 = translation[c0]; b1 = translation[c1]; + source[4] = b0; source[5] = b1; + + c0 = source[6]; c1 = source[7]; + b0 = translation[c0]; b1 = translation[c1]; + source[6] = b0; source[7] = b1; + + source += 8; + } + // Do the final row if count was odd. + if (count & 1) + { + c0 = source[0]; c1 = source[1]; + b0 = translation[c0]; b1 = translation[c1]; + source[0] = b0; source[1] = b1; + + c0 = source[2]; c1 = source[3]; + b0 = translation[c0]; b1 = translation[c1]; + source[2] = b0; source[3] = b1; + } } void DrawColumnRt1AddPalCommand::Execute(DrawerThread *thread) { + const uint8_t *colormap; + uint8_t *source; + uint8_t *dest; + int count; + int pitch; + + count = yh-yl; + if (count < 0) + return; + count++; + + const uint32_t *fg2rgb = _srcblend; + const uint32_t *bg2rgb = _destblend; + dest = ylookup[yl] + sx + _destorg; + source = &thread->dc_temp[yl*4 + hx]; + pitch = _pitch; + colormap = _colormap; + + do { + uint32_t fg = colormap[*source]; + uint32_t bg = *dest; + + fg = fg2rgb[fg]; + bg = bg2rgb[bg]; + fg = (fg+bg) | 0x1f07c1f; + *dest = RGB32k.All[fg & (fg>>15)]; + source += 4; + dest += pitch; + } while (--count); } void DrawColumnRt4AddPalCommand::Execute(DrawerThread *thread) { - } + const uint8_t *colormap; + uint8_t *source; + uint8_t *dest; + int count; + int pitch; - void DrawColumnRt1AddTranslatedPalCommand::Execute(DrawerThread *thread) - { - } + count = yh-yl; + if (count < 0) + return; + count++; - void DrawColumnRt4AddTranslatedPalCommand::Execute(DrawerThread *thread) - { + const uint32_t *fg2rgb = _srcblend; + const uint32_t *bg2rgb = _destblend; + dest = ylookup[yl] + sx + _destorg; + source = &thread->dc_temp[yl*4]; + pitch = _pitch; + colormap = _colormap; + + do { + uint32_t fg = colormap[source[0]]; + uint32_t bg = dest[0]; + fg = fg2rgb[fg]; + bg = bg2rgb[bg]; + fg = (fg+bg) | 0x1f07c1f; + dest[0] = RGB32k.All[fg & (fg>>15)]; + + fg = colormap[source[1]]; + bg = dest[1]; + fg = fg2rgb[fg]; + bg = bg2rgb[bg]; + fg = (fg+bg) | 0x1f07c1f; + dest[1] = RGB32k.All[fg & (fg>>15)]; + + + fg = colormap[source[2]]; + bg = dest[2]; + fg = fg2rgb[fg]; + bg = bg2rgb[bg]; + fg = (fg+bg) | 0x1f07c1f; + dest[2] = RGB32k.All[fg & (fg>>15)]; + + fg = colormap[source[3]]; + bg = dest[3]; + fg = fg2rgb[fg]; + bg = bg2rgb[bg]; + fg = (fg+bg) | 0x1f07c1f; + dest[3] = RGB32k.All[fg & (fg>>15)]; + + source += 4; + dest += pitch; + } while (--count); } void DrawColumnRt1ShadedPalCommand::Execute(DrawerThread *thread) { + uint32_t *fgstart; + const uint8_t *colormap; + uint8_t *source; + uint8_t *dest; + int count; + int pitch; + + count = yh-yl; + if (count < 0) + return; + count++; + + fgstart = &Col2RGB8[0][_color]; + colormap = _colormap; + dest = ylookup[yl] + sx + _destorg; + source = &thread->dc_temp[yl*4 + hx]; + pitch = _pitch; + + do { + uint32_t val = colormap[*source]; + uint32_t fg = fgstart[val<<8]; + val = (Col2RGB8[64-val][*dest] + fg) | 0x1f07c1f; + *dest = RGB32k.All[val & (val>>15)]; + source += 4; + dest += pitch; + } while (--count); } void DrawColumnRt4ShadedPalCommand::Execute(DrawerThread *thread) { + uint32_t *fgstart; + const uint8_t *colormap; + uint8_t *source; + uint8_t *dest; + int count; + int pitch; + + count = yh-yl; + if (count < 0) + return; + count++; + + fgstart = &Col2RGB8[0][_color]; + colormap = _colormap; + dest = ylookup[yl] + sx + _destorg; + source = &thread->dc_temp[yl*4]; + pitch = _pitch; + + do { + uint32_t val; + + val = colormap[source[0]]; + val = (Col2RGB8[64-val][dest[0]] + fgstart[val<<8]) | 0x1f07c1f; + dest[0] = RGB32k.All[val & (val>>15)]; + + val = colormap[source[1]]; + val = (Col2RGB8[64-val][dest[1]] + fgstart[val<<8]) | 0x1f07c1f; + dest[1] = RGB32k.All[val & (val>>15)]; + + val = colormap[source[2]]; + val = (Col2RGB8[64-val][dest[2]] + fgstart[val<<8]) | 0x1f07c1f; + dest[2] = RGB32k.All[val & (val>>15)]; + + val = colormap[source[3]]; + val = (Col2RGB8[64-val][dest[3]] + fgstart[val<<8]) | 0x1f07c1f; + dest[3] = RGB32k.All[val & (val>>15)]; + + source += 4; + dest += pitch; + } while (--count); } void DrawColumnRt1AddClampPalCommand::Execute(DrawerThread *thread) { + const uint8_t *colormap; + uint8_t *source; + uint8_t *dest; + int count; + int pitch; + + count = yh-yl; + if (count < 0) + return; + count++; + + const uint32_t *fg2rgb = _srcblend; + const uint32_t *bg2rgb = _destblend; + dest = ylookup[yl] + sx + _destorg; + source = &thread->dc_temp[yl*4 + hx]; + pitch = _pitch; + colormap = _colormap; + + do { + uint32_t a = fg2rgb[colormap[*source]] + bg2rgb[*dest]; + uint32_t b = a; + + a |= 0x01f07c1f; + b &= 0x40100400; + a &= 0x3fffffff; + b = b - (b >> 5); + a |= b; + *dest = RGB32k.All[(a>>15) & a]; + source += 4; + dest += pitch; + } while (--count); } void DrawColumnRt4AddClampPalCommand::Execute(DrawerThread *thread) { - } + const uint8_t *colormap; + uint8_t *source; + uint8_t *dest; + int count; + int pitch; - void DrawColumnRt1AddClampTranslatedPalCommand::Execute(DrawerThread *thread) - { - } + count = yh-yl; + if (count < 0) + return; + count++; - void DrawColumnRt4AddClampTranslatedPalCommand::Execute(DrawerThread *thread) - { + dest = ylookup[yl] + sx + _destorg; + source = &thread->dc_temp[yl*4]; + pitch = _pitch; + colormap = _colormap; + + const uint32_t *fg2rgb = _srcblend; + const uint32_t *bg2rgb = _destblend; + + do { + uint32_t a = fg2rgb[colormap[source[0]]] + bg2rgb[dest[0]]; + uint32_t b = a; + + a |= 0x01f07c1f; + b &= 0x40100400; + a &= 0x3fffffff; + b = b - (b >> 5); + a |= b; + dest[0] = RGB32k.All[(a>>15) & a]; + + a = fg2rgb[colormap[source[1]]] + bg2rgb[dest[1]]; + b = a; + a |= 0x01f07c1f; + b &= 0x40100400; + a &= 0x3fffffff; + b = b - (b >> 5); + a |= b; + dest[1] = RGB32k.All[(a>>15) & a]; + + a = fg2rgb[colormap[source[2]]] + bg2rgb[dest[2]]; + b = a; + a |= 0x01f07c1f; + b &= 0x40100400; + a &= 0x3fffffff; + b = b - (b >> 5); + a |= b; + dest[2] = RGB32k.All[(a>>15) & a]; + + a = fg2rgb[colormap[source[3]]] + bg2rgb[dest[3]]; + b = a; + a |= 0x01f07c1f; + b &= 0x40100400; + a &= 0x3fffffff; + b = b - (b >> 5); + a |= b; + dest[3] = RGB32k.All[(a>>15) & a]; + + source += 4; + dest += pitch; + } while (--count); } void DrawColumnRt1SubClampPalCommand::Execute(DrawerThread *thread) { + const uint8_t *colormap; + uint8_t *source; + uint8_t *dest; + int count; + int pitch; + + count = yh-yl; + if (count < 0) + return; + count++; + + const uint32_t *fg2rgb = _srcblend; + const uint32_t *bg2rgb = _destblend; + dest = ylookup[yl] + sx + _destorg; + source = &thread->dc_temp[yl*4 + hx]; + pitch = _pitch; + colormap = _colormap; + + do { + uint32_t a = (fg2rgb[colormap[*source]] | 0x40100400) - bg2rgb[*dest]; + uint32_t b = a; + + b &= 0x40100400; + b = b - (b >> 5); + a &= b; + a |= 0x01f07c1f; + *dest = RGB32k.All[(a>>15) & a]; + source += 4; + dest += pitch; + } while (--count); } void DrawColumnRt4SubClampPalCommand::Execute(DrawerThread *thread) { - } + const uint8_t *colormap; + uint8_t *source; + uint8_t *dest; + int count; + int pitch; - void DrawColumnRt1SubClampTranslatedPalCommand::Execute(DrawerThread *thread) - { - } + count = yh-yl; + if (count < 0) + return; + count++; - void DrawColumnRt4SubClampTranslatedPalCommand::Execute(DrawerThread *thread) - { + const uint32_t *fg2rgb = _srcblend; + const uint32_t *bg2rgb = _destblend; + dest = ylookup[yl] + sx + _destorg; + source = &thread->dc_temp[yl*4]; + pitch = _pitch; + colormap = _colormap; + + do { + uint32_t a = (fg2rgb[colormap[source[0]]] | 0x40100400) - bg2rgb[dest[0]]; + uint32_t b = a; + + b &= 0x40100400; + b = b - (b >> 5); + a &= b; + a |= 0x01f07c1f; + dest[0] = RGB32k.All[(a>>15) & a]; + + a = (fg2rgb[colormap[source[1]]] | 0x40100400) - bg2rgb[dest[1]]; + b = a; + b &= 0x40100400; + b = b - (b >> 5); + a &= b; + a |= 0x01f07c1f; + dest[1] = RGB32k.All[(a>>15) & a]; + + a = (fg2rgb[colormap[source[2]]] | 0x40100400) - bg2rgb[dest[2]]; + b = a; + b &= 0x40100400; + b = b - (b >> 5); + a &= b; + a |= 0x01f07c1f; + dest[2] = RGB32k.All[(a>>15) & a]; + + a = (fg2rgb[colormap[source[3]]] | 0x40100400) - bg2rgb[dest[3]]; + b = a; + b &= 0x40100400; + b = b - (b >> 5); + a &= b; + a |= 0x01f07c1f; + dest[3] = RGB32k.All[(a>>15) & a]; + + source += 4; + dest += pitch; + } while (--count); } void DrawColumnRt1RevSubClampPalCommand::Execute(DrawerThread *thread) { + const uint8_t *colormap; + uint8_t *source; + uint8_t *dest; + int count; + int pitch; + + count = yh-yl; + if (count < 0) + return; + count++; + + const uint32_t *fg2rgb = _srcblend; + const uint32_t *bg2rgb = _destblend; + dest = ylookup[yl] + sx + _destorg; + source = &thread->dc_temp[yl*4 + hx]; + pitch = _pitch; + colormap = _colormap; + + do { + uint32_t a = (bg2rgb[*dest] | 0x40100400) - fg2rgb[colormap[*source]]; + uint32_t b = a; + + b &= 0x40100400; + b = b - (b >> 5); + a &= b; + a |= 0x01f07c1f; + *dest = RGB32k.All[(a>>15) & a]; + source += 4; + dest += pitch; + } while (--count); } void DrawColumnRt4RevSubClampPalCommand::Execute(DrawerThread *thread) { - } + const uint8_t *colormap; + uint8_t *source; + uint8_t *dest; + int count; + int pitch; - void DrawColumnRt1RevSubClampTranslatedPalCommand::Execute(DrawerThread *thread) - { - } + count = yh-yl; + if (count < 0) + return; + count++; - void DrawColumnRt4RevSubClampTranslatedPalCommand::Execute(DrawerThread *thread) - { + const uint32_t *fg2rgb = _srcblend; + const uint32_t *bg2rgb = _destblend; + dest = ylookup[yl] + sx + _destorg; + source = &thread->dc_temp[yl*4]; + pitch = _pitch; + colormap = _colormap; + + do { + uint32_t a = (bg2rgb[dest[0]] | 0x40100400) - fg2rgb[colormap[source[0]]]; + uint32_t b = a; + + b &= 0x40100400; + b = b - (b >> 5); + a &= b; + a |= 0x01f07c1f; + dest[0] = RGB32k.All[(a>>15) & a]; + + a = (bg2rgb[dest[1]] | 0x40100400) - fg2rgb[colormap[source[1]]]; + b = a; + b &= 0x40100400; + b = b - (b >> 5); + a &= b; + a |= 0x01f07c1f; + dest[1] = RGB32k.All[(a>>15) & a]; + + a = (bg2rgb[dest[2]] | 0x40100400) - fg2rgb[colormap[source[2]]]; + b = a; + b &= 0x40100400; + b = b - (b >> 5); + a &= b; + a |= 0x01f07c1f; + dest[2] = RGB32k.All[(a>>15) & a]; + + a = (bg2rgb[dest[3]] | 0x40100400) - fg2rgb[colormap[source[3]]]; + b = a; + b &= 0x40100400; + b = b - (b >> 5); + a &= b; + a |= 0x01f07c1f; + dest[3] = RGB32k.All[(a>>15) & a]; + + source += 4; + dest += pitch; + } while (--count); } }