mirror of
https://github.com/ZDoom/qzdoom.git
synced 2024-11-29 07:22:05 +00:00
Added php script for the span drawers
This commit is contained in:
parent
257f48de97
commit
5fa5b062d6
4 changed files with 5617 additions and 6 deletions
|
@ -42,6 +42,7 @@
|
|||
#include "swrenderer/scene/r_light.h"
|
||||
#include "r_draw_wall32.h"
|
||||
#include "r_draw_sprite32.h"
|
||||
#include "r_draw_span32.h"
|
||||
|
||||
#include "gi.h"
|
||||
#include "stats.h"
|
||||
|
@ -246,6 +247,54 @@ namespace swrenderer
|
|||
Queue->Push<DrawColumnRevSubClampTranslatedLLVMCommand>(args);
|
||||
}
|
||||
|
||||
void SWTruecolorDrawers::DrawSpan(const SpanDrawerArgs &args)
|
||||
{
|
||||
if (r_phpdrawers)
|
||||
Queue->Push<DrawSpan32Command>(args);
|
||||
else
|
||||
Queue->Push<DrawSpanLLVMCommand>(args);
|
||||
}
|
||||
|
||||
void SWTruecolorDrawers::DrawSpanMasked(const SpanDrawerArgs &args)
|
||||
{
|
||||
if (r_phpdrawers)
|
||||
Queue->Push<DrawSpanMasked32Command>(args);
|
||||
else
|
||||
Queue->Push<DrawSpanMaskedLLVMCommand>(args);
|
||||
}
|
||||
|
||||
void SWTruecolorDrawers::DrawSpanTranslucent(const SpanDrawerArgs &args)
|
||||
{
|
||||
if (r_phpdrawers)
|
||||
Queue->Push<DrawSpanTranslucent32Command>(args);
|
||||
else
|
||||
Queue->Push<DrawSpanTranslucentLLVMCommand>(args);
|
||||
}
|
||||
|
||||
void SWTruecolorDrawers::DrawSpanMaskedTranslucent(const SpanDrawerArgs &args)
|
||||
{
|
||||
if (r_phpdrawers)
|
||||
Queue->Push<DrawSpanAddClamp32Command>(args);
|
||||
else
|
||||
Queue->Push<DrawSpanMaskedTranslucentLLVMCommand>(args);
|
||||
}
|
||||
|
||||
void SWTruecolorDrawers::DrawSpanAddClamp(const SpanDrawerArgs &args)
|
||||
{
|
||||
if (r_phpdrawers)
|
||||
Queue->Push<DrawSpanTranslucent32Command>(args);
|
||||
else
|
||||
Queue->Push<DrawSpanAddClampLLVMCommand>(args);
|
||||
}
|
||||
|
||||
void SWTruecolorDrawers::DrawSpanMaskedAddClamp(const SpanDrawerArgs &args)
|
||||
{
|
||||
if (r_phpdrawers)
|
||||
Queue->Push<DrawSpanAddClamp32Command>(args);
|
||||
else
|
||||
Queue->Push<DrawSpanMaskedAddClampLLVMCommand>(args);
|
||||
}
|
||||
|
||||
DrawSpanLLVMCommand::DrawSpanLLVMCommand(const SpanDrawerArgs &drawerargs)
|
||||
{
|
||||
auto shade_constants = drawerargs.ColormapConstants();
|
||||
|
|
|
@ -384,12 +384,12 @@ namespace swrenderer
|
|||
void DrawSubClampTranslatedColumn(const SpriteDrawerArgs &args) override;
|
||||
void DrawRevSubClampColumn(const SpriteDrawerArgs &args) override;
|
||||
void DrawRevSubClampTranslatedColumn(const SpriteDrawerArgs &args) override;
|
||||
void DrawSpan(const SpanDrawerArgs &args) override { Queue->Push<DrawSpanLLVMCommand>(args); }
|
||||
void DrawSpanMasked(const SpanDrawerArgs &args) override { Queue->Push<DrawSpanMaskedLLVMCommand>(args); }
|
||||
void DrawSpanTranslucent(const SpanDrawerArgs &args) override { Queue->Push<DrawSpanTranslucentLLVMCommand>(args); }
|
||||
void DrawSpanMaskedTranslucent(const SpanDrawerArgs &args) override { Queue->Push<DrawSpanMaskedTranslucentLLVMCommand>(args); }
|
||||
void DrawSpanAddClamp(const SpanDrawerArgs &args) override { Queue->Push<DrawSpanAddClampLLVMCommand>(args); }
|
||||
void DrawSpanMaskedAddClamp(const SpanDrawerArgs &args) override { Queue->Push<DrawSpanMaskedAddClampLLVMCommand>(args); }
|
||||
void DrawSpan(const SpanDrawerArgs &args) override;
|
||||
void DrawSpanMasked(const SpanDrawerArgs &args) override;
|
||||
void DrawSpanTranslucent(const SpanDrawerArgs &args) override;
|
||||
void DrawSpanMaskedTranslucent(const SpanDrawerArgs &args) override;
|
||||
void DrawSpanAddClamp(const SpanDrawerArgs &args) override;
|
||||
void DrawSpanMaskedAddClamp(const SpanDrawerArgs &args) override;
|
||||
void FillSpan(const SpanDrawerArgs &args) override { Queue->Push<FillSpanRGBACommand>(args); }
|
||||
|
||||
void DrawTiltedSpan(const SpanDrawerArgs &args, int y, int x1, int x2, const FVector3 &plane_sz, const FVector3 &plane_su, const FVector3 &plane_sv, bool plane_shade, int planeshade, float planelightfloat, fixed_t pviewx, fixed_t pviewy, FDynamicColormap *basecolormap) override
|
||||
|
|
5174
src/swrenderer/drawers/r_draw_span32.h
Normal file
5174
src/swrenderer/drawers/r_draw_span32.h
Normal file
File diff suppressed because it is too large
Load diff
388
src/swrenderer/drawers/r_draw_span32.php
Normal file
388
src/swrenderer/drawers/r_draw_span32.php
Normal file
|
@ -0,0 +1,388 @@
|
|||
#!/usr/bin/php
|
||||
/*
|
||||
** Drawer commands for spans
|
||||
** Copyright (c) 2016 Magnus Norddahl
|
||||
**
|
||||
** This software is provided 'as-is', without any express or implied
|
||||
** warranty. In no event will the authors be held liable for any damages
|
||||
** arising from the use of this software.
|
||||
**
|
||||
** Permission is granted to anyone to use this software for any purpose,
|
||||
** including commercial applications, and to alter it and redistribute it
|
||||
** freely, subject to the following restrictions:
|
||||
**
|
||||
** 1. The origin of this software must not be misrepresented; you must not
|
||||
** claim that you wrote the original software. If you use this software
|
||||
** in a product, an acknowledgment in the product documentation would be
|
||||
** appreciated but is not required.
|
||||
** 2. Altered source versions must be plainly marked as such, and must not be
|
||||
** misrepresented as being the original software.
|
||||
** 3. This notice may not be removed or altered from any source distribution.
|
||||
**
|
||||
*/
|
||||
|
||||
/*
|
||||
Warning: this C++ source file has been auto-generated. Please modify the original php script that generated it.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "swrenderer/drawers/r_draw_rgba.h"
|
||||
#include "swrenderer/viewport/r_spandrawer.h"
|
||||
|
||||
namespace swrenderer
|
||||
{
|
||||
<?
|
||||
GenerateDrawerCommand("DrawSpan32Command", "opaque");
|
||||
GenerateDrawerCommand("DrawSpanMasked32Command", "masked");
|
||||
GenerateDrawerCommand("DrawSpanTranslucent32Command", "translucent");
|
||||
GenerateDrawerCommand("DrawSpanAddClamp32Command", "addclamp");
|
||||
//GenerateDrawerCommand("DrawSpanSubClamp32Command", "subclamp");
|
||||
//GenerateDrawerCommand("DrawSpanRevSubClamp32Command", "revsubclamp");
|
||||
|
||||
function GenerateDrawerCommand($className, $blendVariant)
|
||||
{
|
||||
?>
|
||||
class <?=$className?> : public DrawerCommand
|
||||
{
|
||||
protected:
|
||||
SpanDrawerArgs args;
|
||||
|
||||
public:
|
||||
<?=$className?>(const SpanDrawerArgs &drawerargs) : args(drawerargs) { }
|
||||
|
||||
void Execute(DrawerThread *thread) override
|
||||
{
|
||||
if (thread->line_skipped_by_thread(args.DestY())) return;
|
||||
|
||||
uint32_t xbits = args.TextureWidthBits();
|
||||
uint32_t ybits = args.TextureHeightBits();
|
||||
uint32_t xstep = args.TextureUStep();
|
||||
uint32_t ystep = args.TextureVStep();
|
||||
uint32_t xfrac = args.TextureUPos();
|
||||
uint32_t yfrac = args.TextureVPos();
|
||||
uint32_t yshift = 32 - ybits;
|
||||
uint32_t xshift = yshift - xbits;
|
||||
uint32_t xmask = ((1 << xbits) - 1) << ybits;
|
||||
|
||||
const uint32_t *source = (const uint32_t*)args.TexturePixels();
|
||||
|
||||
double lod = args.TextureLOD();
|
||||
bool mipmapped = args.MipmappedTexture();
|
||||
|
||||
bool magnifying = lod < 0.0;
|
||||
if (r_mipmap && mipmapped)
|
||||
{
|
||||
int level = (int)lod;
|
||||
while (level > 0)
|
||||
{
|
||||
if (xbits <= 2 || ybits <= 2)
|
||||
break;
|
||||
|
||||
source += (1 << (xbits)) * (1 << (ybits));
|
||||
xbits -= 1;
|
||||
ybits -= 1;
|
||||
level--;
|
||||
}
|
||||
}
|
||||
|
||||
bool is_nearest_filter = !((magnifying && r_magfilter) || (!magnifying && r_minfilter));
|
||||
|
||||
auto shade_constants = args.ColormapConstants();
|
||||
if (shade_constants.simple_shade)
|
||||
{
|
||||
<? LoopShade($blendVariant, true);?>
|
||||
}
|
||||
else
|
||||
{
|
||||
<? LoopShade($blendVariant, false);?>
|
||||
}
|
||||
}
|
||||
|
||||
FString DebugInfo() override { return "<?=$className?>"; }
|
||||
};
|
||||
|
||||
<?
|
||||
}
|
||||
|
||||
function LoopShade($blendVariant, $isSimpleShade)
|
||||
{ ?>
|
||||
if (is_nearest_filter)
|
||||
{
|
||||
<? LoopFilter($blendVariant, $isSimpleShade, true);?>
|
||||
}
|
||||
else
|
||||
{
|
||||
<? LoopFilter($blendVariant, $isSimpleShade, false);?>
|
||||
}
|
||||
<? }
|
||||
|
||||
function LoopFilter($blendVariant, $isSimpleShade, $isNearestFilter)
|
||||
{ ?>
|
||||
bool is_64x64 = xbits == 6 && ybits == 6;
|
||||
if (is_64x64)
|
||||
{
|
||||
<? Loop($blendVariant, $isSimpleShade, $isNearestFilter, true);?>
|
||||
}
|
||||
else
|
||||
{
|
||||
<? Loop($blendVariant, $isSimpleShade, $isNearestFilter, false);?>
|
||||
}
|
||||
<?
|
||||
}
|
||||
|
||||
function Loop($blendVariant, $isSimpleShade, $isNearestFilter, $is64x64)
|
||||
{ ?>
|
||||
// Shade constants
|
||||
int light = 256 - (args.Light() >> (FRACBITS - 8));
|
||||
__m128i mlight = _mm_set_epi16(256, light, light, light, 256, light, light, light);
|
||||
__m128i inv_light = _mm_set_epi16(0, 256 - light, 256 - light, 256 - light, 0, 256 - light, 256 - light, 256 - light);
|
||||
<? if ($isSimpleShade == false)
|
||||
{ ?>
|
||||
__m128i inv_desaturate = _mm_setr_epi16(256, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate);
|
||||
__m128i shade_fade = _mm_set_epi16(shade_constants.fade_alpha, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue, shade_constants.fade_alpha, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue);
|
||||
shade_fade = _mm_mullo_epi16(shade_fade, inv_light);
|
||||
__m128i shade_light = _mm_set_epi16(shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue, shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue);
|
||||
int desaturate = shade_constants.desaturate;
|
||||
<? } ?>
|
||||
|
||||
int count = args.DestX2() - args.DestX1() + 1;
|
||||
int pitch = RenderViewport::Instance()->RenderTarget->GetPitch();
|
||||
uint32_t *dest = (uint32_t*)RenderViewport::Instance()->GetDest(args.DestX1(), args.DestY());
|
||||
|
||||
<? if ($isNearestFilter == false)
|
||||
{ ?>
|
||||
xfrac -= 1 << (31 - xbits);
|
||||
yfrac -= 1 << (31 - ybits);
|
||||
<? } ?>
|
||||
uint32_t srcalpha = args.SrcAlpha() >> (FRACBITS - 8);
|
||||
uint32_t destalpha = args.DestAlpha() >> (FRACBITS - 8);
|
||||
|
||||
int ssecount = count / 2;
|
||||
for (int index = 0; index < ssecount; index++)
|
||||
{
|
||||
int offset = index * 2;
|
||||
<? if ($blendVariant != "opaque") { ?>
|
||||
__m128i bgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)(dest + offset)), _mm_setzero_si128());
|
||||
<? } ?>
|
||||
|
||||
// Sample
|
||||
unsigned int ifgcolor[2];
|
||||
{
|
||||
<? Sample($isNearestFilter, $is64x64);?>
|
||||
ifgcolor[0] = sampleout;
|
||||
xfrac += xstep;
|
||||
yfrac += ystep;
|
||||
}
|
||||
{
|
||||
<? Sample($isNearestFilter, $is64x64);?>
|
||||
ifgcolor[1] = sampleout;
|
||||
xfrac += xstep;
|
||||
yfrac += ystep;
|
||||
}
|
||||
__m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128());
|
||||
|
||||
// Shade
|
||||
<? Shade($isSimpleShade);?>
|
||||
|
||||
// Blend
|
||||
<? Blend($blendVariant);?>
|
||||
|
||||
_mm_storel_epi64((__m128i*)(dest + offset), outcolor);
|
||||
}
|
||||
|
||||
if (ssecount * 2 != count)
|
||||
{
|
||||
int index = ssecount * 2;
|
||||
int offset = index;
|
||||
<? if ($blendVariant != "opaque") { ?>
|
||||
__m128i bgcolor = _mm_unpacklo_epi8(_mm_cvtsi32_si128(dest[offset]), _mm_setzero_si128());
|
||||
<? } ?>
|
||||
|
||||
// Sample
|
||||
unsigned int ifgcolor[2];
|
||||
<? Sample($isNearestFilter, $is64x64);?>
|
||||
ifgcolor[0] = sampleout;
|
||||
ifgcolor[1] = 0;
|
||||
__m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128());
|
||||
|
||||
// Shade
|
||||
<? Shade($isSimpleShade);?>
|
||||
|
||||
// Blend
|
||||
<? Blend($blendVariant);?>
|
||||
|
||||
dest[offset] = _mm_cvtsi128_si32(outcolor);
|
||||
}
|
||||
<? }
|
||||
|
||||
function Sample($isNearestFilter, $is64x64)
|
||||
{
|
||||
if ($isNearestFilter == true && $is64x64 == true)
|
||||
{ ?>
|
||||
int sample_index = ((xfrac >> (32 - 6 - 6)) & (63 * 64)) + (yfrac >> (32 - 6));
|
||||
unsigned int sampleout = source[sample_index];
|
||||
<? }
|
||||
else if ($isNearestFilter == true)
|
||||
{ ?>
|
||||
int sample_index = ((xfrac >> xshift) & xmask) + (yfrac >> yshift);
|
||||
unsigned int sampleout = source[sample_index];
|
||||
<? }
|
||||
else
|
||||
{
|
||||
if ($is64x64 == true)
|
||||
{ ?>
|
||||
uint32_t xxbits = 26;
|
||||
uint32_t yybits = 26;
|
||||
<? }
|
||||
else
|
||||
{ ?>
|
||||
uint32_t xxbits = 32 - xbits;
|
||||
uint32_t yybits = 32 - ybits;
|
||||
<? }
|
||||
?>
|
||||
uint32_t xxshift = (32 - xxbits);
|
||||
uint32_t yyshift = (32 - yybits);
|
||||
uint32_t xxmask = (1 << xxshift) - 1;
|
||||
uint32_t yymask = (1 << yyshift) - 1;
|
||||
uint32_t x = xfrac >> xxbits;
|
||||
uint32_t y = yfrac >> yybits;
|
||||
|
||||
uint32_t p00 = source[((y & yymask) + ((x & xxmask) << yyshift))];
|
||||
uint32_t p01 = source[(((y + 1) & yymask) + ((x & xxmask) << yyshift))];
|
||||
uint32_t p10 = source[((y & yymask) + (((x + 1) & xxmask) << yyshift))];
|
||||
uint32_t p11 = source[(((y + 1) & yymask) + (((x + 1) & xxmask) << yyshift))];
|
||||
|
||||
uint32_t inv_b = (xfrac >> (xxbits - 4)) & 15;
|
||||
uint32_t inv_a = (yfrac >> (yybits - 4)) & 15;
|
||||
uint32_t a = 16 - inv_a;
|
||||
uint32_t b = 16 - inv_b;
|
||||
|
||||
uint32_t sred = (RPART(p00) * (a * b) + RPART(p01) * (inv_a * b) + RPART(p10) * (a * inv_b) + RPART(p11) * (inv_a * inv_b) + 127) >> 8;
|
||||
uint32_t sgreen = (GPART(p00) * (a * b) + GPART(p01) * (inv_a * b) + GPART(p10) * (a * inv_b) + GPART(p11) * (inv_a * inv_b) + 127) >> 8;
|
||||
uint32_t sblue = (BPART(p00) * (a * b) + BPART(p01) * (inv_a * b) + BPART(p10) * (a * inv_b) + BPART(p11) * (inv_a * inv_b) + 127) >> 8;
|
||||
uint32_t salpha = (APART(p00) * (a * b) + APART(p01) * (inv_a * b) + APART(p10) * (a * inv_b) + APART(p11) * (inv_a * inv_b) + 127) >> 8;
|
||||
|
||||
unsigned int sampleout = (salpha << 24) | (sred << 16) | (sgreen << 8) | sblue;
|
||||
<? }
|
||||
}
|
||||
|
||||
function Shade($isSimpleShade)
|
||||
{
|
||||
if ($isSimpleShade == true)
|
||||
{ ?>
|
||||
fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8);
|
||||
<? }
|
||||
else
|
||||
{ ?>
|
||||
int blue0 = BPART(ifgcolor[0]);
|
||||
int green0 = GPART(ifgcolor[0]);
|
||||
int red0 = RPART(ifgcolor[0]);
|
||||
int intensity0 = ((red0 * 77 + green0 * 143 + blue0 * 37) >> 8) * desaturate;
|
||||
|
||||
int blue1 = BPART(ifgcolor[1]);
|
||||
int green1 = GPART(ifgcolor[1]);
|
||||
int red1 = RPART(ifgcolor[1]);
|
||||
int intensity1 = ((red1 * 77 + green1 * 143 + blue1 * 37) >> 8) * desaturate;
|
||||
|
||||
__m128i intensity = _mm_set_epi16(0, intensity1, intensity1, intensity1, 0, intensity0, intensity0, intensity0);
|
||||
|
||||
fgcolor = _mm_srli_epi16(_mm_add_epi16(_mm_mullo_epi16(fgcolor, inv_desaturate), intensity), 8);
|
||||
fgcolor = _mm_mullo_epi16(fgcolor, mlight);
|
||||
fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8);
|
||||
fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8);
|
||||
<? }
|
||||
}
|
||||
|
||||
function Blend($blendVariant)
|
||||
{
|
||||
if ($blendVariant == "opaque")
|
||||
{ ?>
|
||||
__m128i outcolor = fgcolor;
|
||||
outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128());
|
||||
<? }
|
||||
else if ($blendVariant == "masked")
|
||||
{ ?>
|
||||
__m128i alpha = _mm_shufflelo_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3));
|
||||
alpha = _mm_shufflehi_epi16(fgcolor, _MM_SHUFFLE(3,3,3,3));
|
||||
alpha = _mm_add_epi16(alpha, _mm_srli_epi16(alpha, 7)); // 255 -> 256
|
||||
__m128i inv_alpha = _mm_sub_epi16(_mm_set1_epi16(256), alpha);
|
||||
|
||||
fgcolor = _mm_mullo_epi16(fgcolor, alpha);
|
||||
bgcolor = _mm_mullo_epi16(bgcolor, inv_alpha);
|
||||
__m128i outcolor = _mm_srli_epi16(_mm_add_epi16(fgcolor, bgcolor), 8);
|
||||
outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128());
|
||||
outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000));
|
||||
<? }
|
||||
else if ($blendVariant == "translucent")
|
||||
{ ?>
|
||||
__m128i fgalpha = _mm_set1_epi16(srcalpha);
|
||||
__m128i bgalpha = _mm_set1_epi16(destalpha);
|
||||
|
||||
fgcolor = _mm_mullo_epi16(fgcolor, fgalpha);
|
||||
bgcolor = _mm_mullo_epi16(bgcolor, bgalpha);
|
||||
|
||||
__m128i fg_lo = _mm_unpacklo_epi16(fgcolor, _mm_setzero_si128());
|
||||
__m128i bg_lo = _mm_unpacklo_epi16(bgcolor, _mm_setzero_si128());
|
||||
__m128i fg_hi = _mm_unpackhi_epi16(fgcolor, _mm_setzero_si128());
|
||||
__m128i bg_hi = _mm_unpackhi_epi16(bgcolor, _mm_setzero_si128());
|
||||
|
||||
__m128i out_lo = _mm_add_epi32(fg_lo, bg_lo);
|
||||
__m128i out_hi = _mm_add_epi32(fg_hi, bg_hi);
|
||||
|
||||
out_lo = _mm_srai_epi32(out_lo, 8);
|
||||
out_hi = _mm_srai_epi32(out_hi, 8);
|
||||
__m128i outcolor = _mm_packs_epi32(out_lo, out_hi);
|
||||
outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128());
|
||||
outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000));
|
||||
<? }
|
||||
else
|
||||
{ ?>
|
||||
uint32_t alpha0 = APART(ifgcolor[0]);
|
||||
uint32_t alpha1 = APART(ifgcolor[1]);
|
||||
alpha0 += alpha0 >> 7; // 255->256
|
||||
alpha1 += alpha1 >> 7; // 255->256
|
||||
uint32_t inv_alpha0 = 256 - alpha0;
|
||||
uint32_t inv_alpha1 = 256 - alpha1;
|
||||
|
||||
uint32_t bgalpha0 = (destalpha * alpha0 + (inv_alpha0 << 8) + 128) >> 8;
|
||||
uint32_t bgalpha1 = (destalpha * alpha1 + (inv_alpha1 << 8) + 128) >> 8;
|
||||
uint32_t fgalpha0 = (srcalpha * alpha0 + 128) >> 8;
|
||||
uint32_t fgalpha1 = (srcalpha * alpha1 + 128) >> 8;
|
||||
|
||||
__m128i bgalpha = _mm_set_epi16(bgalpha1, bgalpha1, bgalpha1, bgalpha1, bgalpha0, bgalpha0, bgalpha0, bgalpha0);
|
||||
__m128i fgalpha = _mm_set_epi16(fgalpha1, fgalpha1, fgalpha1, fgalpha1, fgalpha0, fgalpha0, fgalpha0, fgalpha0);
|
||||
|
||||
fgcolor = _mm_mullo_epi16(fgcolor, fgalpha);
|
||||
bgcolor = _mm_mullo_epi16(bgcolor, bgalpha);
|
||||
|
||||
__m128i fg_lo = _mm_unpacklo_epi16(fgcolor, _mm_setzero_si128());
|
||||
__m128i bg_lo = _mm_unpacklo_epi16(bgcolor, _mm_setzero_si128());
|
||||
__m128i fg_hi = _mm_unpackhi_epi16(fgcolor, _mm_setzero_si128());
|
||||
__m128i bg_hi = _mm_unpackhi_epi16(bgcolor, _mm_setzero_si128());
|
||||
|
||||
<? if ($blendVariant == "add" || $blendVariant == "addclamp")
|
||||
{ ?>
|
||||
__m128i out_lo = _mm_add_epi32(fg_lo, bg_lo);
|
||||
__m128i out_hi = _mm_add_epi32(fg_hi, bg_hi);
|
||||
<? }
|
||||
else if ($blendVariant == "subclamp")
|
||||
{ ?>
|
||||
__m128i out_lo = _mm_sub_epi32(fg_lo, bg_lo);
|
||||
__m128i out_hi = _mm_sub_epi32(fg_hi, bg_hi);
|
||||
<? }
|
||||
else if ($blendVariant == "revsubclamp")
|
||||
{ ?>
|
||||
__m128i out_lo = _mm_sub_epi32(bg_lo, fg_lo);
|
||||
__m128i out_hi = _mm_sub_epi32(bg_hi, fg_hi);
|
||||
<? } ?>
|
||||
|
||||
out_lo = _mm_srai_epi32(out_lo, 8);
|
||||
out_hi = _mm_srai_epi32(out_hi, 8);
|
||||
__m128i outcolor = _mm_packs_epi32(out_lo, out_hi);
|
||||
outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128());
|
||||
outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000));
|
||||
<? }
|
||||
}
|
||||
?>
|
||||
}
|
Loading…
Reference in a new issue