mirror of
https://github.com/ZDoom/qzdoom.git
synced 2024-11-25 05:21:02 +00:00
Non-SSE version of the sky drawers
This commit is contained in:
parent
527a172fcd
commit
279fa7e886
3 changed files with 390 additions and 0 deletions
|
@ -39,10 +39,17 @@
|
||||||
#include "gl/data/gl_matrix.h"
|
#include "gl/data/gl_matrix.h"
|
||||||
#include "swrenderer/viewport/r_viewport.h"
|
#include "swrenderer/viewport/r_viewport.h"
|
||||||
#include "swrenderer/scene/r_light.h"
|
#include "swrenderer/scene/r_light.h"
|
||||||
|
#ifdef NO_SSE
|
||||||
|
#include "r_draw_wall32.h"
|
||||||
|
#include "r_draw_sprite32.h"
|
||||||
|
#include "r_draw_span32.h"
|
||||||
|
#include "r_draw_sky32.h"
|
||||||
|
#else
|
||||||
#include "r_draw_wall32_sse2.h"
|
#include "r_draw_wall32_sse2.h"
|
||||||
#include "r_draw_sprite32_sse2.h"
|
#include "r_draw_sprite32_sse2.h"
|
||||||
#include "r_draw_span32_sse2.h"
|
#include "r_draw_span32_sse2.h"
|
||||||
#include "r_draw_sky32_sse2.h"
|
#include "r_draw_sky32_sse2.h"
|
||||||
|
#endif
|
||||||
|
|
||||||
#include "gi.h"
|
#include "gi.h"
|
||||||
#include "stats.h"
|
#include "stats.h"
|
||||||
|
|
|
@ -387,4 +387,68 @@ namespace swrenderer
|
||||||
return alpha | (red << 16) | (green << 8) | blue;
|
return alpha | (red << 16) | (green << 8) | blue;
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
/////////////////////////////////////////////////////////////////////////////
|
||||||
|
// Vector classes for non-SSE drawers that behave like their SSE counterparts
|
||||||
|
|
||||||
|
namespace drawervectors
|
||||||
|
{
|
||||||
|
struct vec4ui
|
||||||
|
{
|
||||||
|
vec4ui() {}
|
||||||
|
vec4ui(uint32_t v) : a(v), r(v), g(v), b(v) { }
|
||||||
|
vec4ui(uint32_t a, uint32_t r, uint32_t g, uint32_t b) : a(a), r(r), g(g), b(b) { }
|
||||||
|
uint32_t a, r, g, b;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct vec8us
|
||||||
|
{
|
||||||
|
vec8us() {}
|
||||||
|
vec8us(uint16_t v) : a0(v), r0(v), g0(v), b0(v) { }
|
||||||
|
vec8us(uint16_t a0, uint16_t r0, uint16_t g0, uint16_t b0, uint16_t a1, uint16_t r1, uint16_t g1, uint16_t b1) : a0(a0), r0(r0), g0(g0), b0(b0), a1(a1), r1(r1), g1(g1), b1(b1) { }
|
||||||
|
uint16_t a0, r0, g0, b0, a1, r1, g1, b1;
|
||||||
|
};
|
||||||
|
|
||||||
|
inline vec8us unpack(uint32_t lo, uint32_t hi) { return vec8us(APART(lo), RPART(lo), GPART(lo), BPART(lo), APART(hi), RPART(hi), GPART(hi), BPART(hi)); }
|
||||||
|
inline vec4ui unpacklo(vec8us v) { return vec4ui(v.a0, v.r0, v.g0, v.b0); }
|
||||||
|
inline vec4ui unpackhi(vec8us v) { return vec4ui(v.a1, v.r1, v.g1, v.b1); }
|
||||||
|
|
||||||
|
inline vec8us pack(vec4ui lo, vec4ui hi)
|
||||||
|
{
|
||||||
|
return vec8us(lo.a, lo.r, lo.g, lo.b, hi.a, hi.r, hi.g, hi.b);
|
||||||
|
}
|
||||||
|
inline uint32_t packlo(vec8us v)
|
||||||
|
{
|
||||||
|
return MAKEARGB((uint32_t)clamp<int16_t>(v.a0, 0, 255), (uint32_t)clamp<int16_t>(v.r0, 0, 255), (uint32_t)clamp<int16_t>(v.g0, 0, 255), (uint32_t)clamp<int16_t>(v.b0, 0, 255));
|
||||||
|
}
|
||||||
|
inline uint32_t packhi(vec8us v)
|
||||||
|
{
|
||||||
|
return MAKEARGB((uint32_t)clamp<int16_t>(v.a1, 0, 255), (uint32_t)clamp<int16_t>(v.r1, 0, 255), (uint32_t)clamp<int16_t>(v.g1, 0, 255), (uint32_t)clamp<int16_t>(v.b1, 0, 255));
|
||||||
|
}
|
||||||
|
|
||||||
|
inline vec8us operator+(vec8us a, vec8us b)
|
||||||
|
{
|
||||||
|
return vec8us(a.a0 + b.a0, a.r0 + b.r0, a.g0 + b.g0, a.b0 + b.b0, a.a1 + b.a1, a.r1 + b.r1, a.g1 + b.g1, a.b1 + b.b1);
|
||||||
|
}
|
||||||
|
|
||||||
|
inline vec8us operator-(vec8us a, vec8us b)
|
||||||
|
{
|
||||||
|
return vec8us(a.a0 - b.a0, a.r0 - b.r0, a.g0 - b.g0, a.b0 - b.b0, a.a1 - b.a1, a.r1 - b.r1, a.g1 - b.g1, a.b1 - b.b1);
|
||||||
|
}
|
||||||
|
|
||||||
|
inline vec8us operator*(vec8us a, vec8us b)
|
||||||
|
{
|
||||||
|
return vec8us(a.a0 * b.a0, a.r0 * b.r0, a.g0 * b.g0, a.b0 * b.b0, a.a1 * b.a1, a.r1 * b.r1, a.g1 * b.g1, a.b1 * b.b1);
|
||||||
|
}
|
||||||
|
|
||||||
|
inline vec8us operator<<(vec8us a, int bits)
|
||||||
|
{
|
||||||
|
return vec8us(a.a0 << bits, a.r0 << bits, a.g0 << bits, a.b0 << bits, a.a1 << bits, a.r1 << bits, a.g1 << bits, a.b1 << bits);
|
||||||
|
}
|
||||||
|
|
||||||
|
inline vec8us operator>>(vec8us a, int bits)
|
||||||
|
{
|
||||||
|
return vec8us(a.a0 >> bits, a.r0 >> bits, a.g0 >> bits, a.b0 >> bits, a.a1 >> bits, a.r1 >> bits, a.g1 >> bits, a.b1 >> bits);
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
319
src/swrenderer/drawers/r_draw_sky32.h
Normal file
319
src/swrenderer/drawers/r_draw_sky32.h
Normal file
|
@ -0,0 +1,319 @@
|
||||||
|
/*
|
||||||
|
** Drawer commands for spans
|
||||||
|
** Copyright (c) 2016 Magnus Norddahl
|
||||||
|
**
|
||||||
|
** This software is provided 'as-is', without any express or implied
|
||||||
|
** warranty. In no event will the authors be held liable for any damages
|
||||||
|
** arising from the use of this software.
|
||||||
|
**
|
||||||
|
** Permission is granted to anyone to use this software for any purpose,
|
||||||
|
** including commercial applications, and to alter it and redistribute it
|
||||||
|
** freely, subject to the following restrictions:
|
||||||
|
**
|
||||||
|
** 1. The origin of this software must not be misrepresented; you must not
|
||||||
|
** claim that you wrote the original software. If you use this software
|
||||||
|
** in a product, an acknowledgment in the product documentation would be
|
||||||
|
** appreciated but is not required.
|
||||||
|
** 2. Altered source versions must be plainly marked as such, and must not be
|
||||||
|
** misrepresented as being the original software.
|
||||||
|
** 3. This notice may not be removed or altered from any source distribution.
|
||||||
|
**
|
||||||
|
*/
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include "swrenderer/drawers/r_draw_rgba.h"
|
||||||
|
#include "swrenderer/viewport/r_skydrawer.h"
|
||||||
|
|
||||||
|
namespace swrenderer
|
||||||
|
{
|
||||||
|
|
||||||
|
class DrawSkySingle32Command : public DrawerCommand
|
||||||
|
{
|
||||||
|
protected:
|
||||||
|
SkyDrawerArgs args;
|
||||||
|
|
||||||
|
public:
|
||||||
|
DrawSkySingle32Command(const SkyDrawerArgs &args) : args(args) { }
|
||||||
|
|
||||||
|
void Execute(DrawerThread *thread) override
|
||||||
|
{
|
||||||
|
using namespace drawervectors;
|
||||||
|
|
||||||
|
uint32_t *dest = (uint32_t *)args.Dest();
|
||||||
|
int count = args.Count();
|
||||||
|
int pitch = RenderViewport::Instance()->RenderTarget->GetPitch();
|
||||||
|
const uint32_t *source0 = (const uint32_t *)args.FrontTexturePixels();
|
||||||
|
int textureheight0 = args.FrontTextureHeight();
|
||||||
|
|
||||||
|
int32_t frac = args.TextureVPos();
|
||||||
|
int32_t fracstep = args.TextureVStep();
|
||||||
|
|
||||||
|
uint32_t solid_top = args.SolidTopColor();
|
||||||
|
uint32_t solid_bottom = args.SolidBottomColor();
|
||||||
|
bool fadeSky = args.FadeSky();
|
||||||
|
|
||||||
|
// Find bands for top solid color, top fade, center textured, bottom fade, bottom solid color:
|
||||||
|
int start_fade = 2; // How fast it should fade out
|
||||||
|
int fade_length = (1 << (24 - start_fade));
|
||||||
|
int start_fadetop_y = (-frac) / fracstep;
|
||||||
|
int end_fadetop_y = (fade_length - frac) / fracstep;
|
||||||
|
int start_fadebottom_y = ((2 << 24) - fade_length - frac) / fracstep;
|
||||||
|
int end_fadebottom_y = ((2 << 24) - frac) / fracstep;
|
||||||
|
start_fadetop_y = clamp(start_fadetop_y, 0, count);
|
||||||
|
end_fadetop_y = clamp(end_fadetop_y, 0, count);
|
||||||
|
start_fadebottom_y = clamp(start_fadebottom_y, 0, count);
|
||||||
|
end_fadebottom_y = clamp(end_fadebottom_y, 0, count);
|
||||||
|
|
||||||
|
int num_cores = thread->num_cores;
|
||||||
|
int skipped = thread->skipped_by_thread(args.DestY());
|
||||||
|
dest = thread->dest_for_thread(args.DestY(), pitch, dest);
|
||||||
|
frac += fracstep * skipped;
|
||||||
|
fracstep *= num_cores;
|
||||||
|
pitch *= num_cores;
|
||||||
|
|
||||||
|
if (!fadeSky)
|
||||||
|
{
|
||||||
|
count = thread->count_for_thread(args.DestY(), count);
|
||||||
|
|
||||||
|
for (int index = 0; index < count; index++)
|
||||||
|
{
|
||||||
|
uint32_t sample_index = (((((uint32_t)frac) << 8) >> FRACBITS) * textureheight0) >> FRACBITS;
|
||||||
|
*dest = source0[sample_index];
|
||||||
|
dest += pitch;
|
||||||
|
frac += fracstep;
|
||||||
|
}
|
||||||
|
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
vec8us solid_top_fill = unpack(solid_top, 0);
|
||||||
|
vec8us solid_bottom_fill = unpack(solid_bottom, 0);
|
||||||
|
|
||||||
|
int index = skipped;
|
||||||
|
|
||||||
|
// Top solid color:
|
||||||
|
while (index < start_fadetop_y)
|
||||||
|
{
|
||||||
|
*dest = solid_top;
|
||||||
|
dest += pitch;
|
||||||
|
frac += fracstep;
|
||||||
|
index += num_cores;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Top fade:
|
||||||
|
while (index < end_fadetop_y)
|
||||||
|
{
|
||||||
|
uint32_t sample_index = (((((uint32_t)frac) << 8) >> FRACBITS) * textureheight0) >> FRACBITS;
|
||||||
|
uint32_t fg = source0[sample_index];
|
||||||
|
|
||||||
|
vec8us alpha = MAX(MIN(frac >> (16 - start_fade), 256), 0);
|
||||||
|
vec8us inv_alpha = vec8us(256) - alpha;
|
||||||
|
|
||||||
|
vec8us c = unpack(fg, 0);
|
||||||
|
c = (c * alpha + solid_top_fill * inv_alpha) >> 8;
|
||||||
|
*dest = packlo(c);
|
||||||
|
|
||||||
|
frac += fracstep;
|
||||||
|
dest += pitch;
|
||||||
|
index += num_cores;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Textured center:
|
||||||
|
while (index < start_fadebottom_y)
|
||||||
|
{
|
||||||
|
uint32_t sample_index = (((((uint32_t)frac) << 8) >> FRACBITS) * textureheight0) >> FRACBITS;
|
||||||
|
*dest = source0[sample_index];
|
||||||
|
|
||||||
|
frac += fracstep;
|
||||||
|
dest += pitch;
|
||||||
|
index += num_cores;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Fade bottom:
|
||||||
|
while (index < end_fadebottom_y)
|
||||||
|
{
|
||||||
|
uint32_t sample_index = (((((uint32_t)frac) << 8) >> FRACBITS) * textureheight0) >> FRACBITS;
|
||||||
|
uint32_t fg = source0[sample_index];
|
||||||
|
|
||||||
|
vec8us alpha = MAX(MIN(((2 << 24) - frac) >> (16 - start_fade), 256), 0);
|
||||||
|
vec8us inv_alpha = vec8us(256) - alpha;
|
||||||
|
|
||||||
|
vec8us c = unpack(fg, 0);
|
||||||
|
c = (c * alpha + solid_top_fill * inv_alpha) >> 8;
|
||||||
|
*dest = packlo(c);
|
||||||
|
|
||||||
|
frac += fracstep;
|
||||||
|
dest += pitch;
|
||||||
|
index += num_cores;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Bottom solid color:
|
||||||
|
while (index < count)
|
||||||
|
{
|
||||||
|
*dest = solid_bottom;
|
||||||
|
dest += pitch;
|
||||||
|
index += num_cores;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
FString DebugInfo() override { return "DrawSkySingle32Command"; }
|
||||||
|
};
|
||||||
|
|
||||||
|
class DrawSkyDouble32Command : public DrawerCommand
|
||||||
|
{
|
||||||
|
protected:
|
||||||
|
SkyDrawerArgs args;
|
||||||
|
|
||||||
|
public:
|
||||||
|
DrawSkyDouble32Command(const SkyDrawerArgs &args) : args(args) { }
|
||||||
|
|
||||||
|
void Execute(DrawerThread *thread) override
|
||||||
|
{
|
||||||
|
using namespace drawervectors;
|
||||||
|
|
||||||
|
uint32_t *dest = (uint32_t *)args.Dest();
|
||||||
|
int count = args.Count();
|
||||||
|
int pitch = RenderViewport::Instance()->RenderTarget->GetPitch();
|
||||||
|
const uint32_t *source0 = (const uint32_t *)args.FrontTexturePixels();
|
||||||
|
const uint32_t *source1 = (const uint32_t *)args.BackTexturePixels();
|
||||||
|
int textureheight0 = args.FrontTextureHeight();
|
||||||
|
uint32_t maxtextureheight1 = args.BackTextureHeight() - 1;
|
||||||
|
|
||||||
|
int32_t frac = args.TextureVPos();
|
||||||
|
int32_t fracstep = args.TextureVStep();
|
||||||
|
|
||||||
|
uint32_t solid_top = args.SolidTopColor();
|
||||||
|
uint32_t solid_bottom = args.SolidBottomColor();
|
||||||
|
bool fadeSky = args.FadeSky();
|
||||||
|
|
||||||
|
// Find bands for top solid color, top fade, center textured, bottom fade, bottom solid color:
|
||||||
|
int start_fade = 2; // How fast it should fade out
|
||||||
|
int fade_length = (1 << (24 - start_fade));
|
||||||
|
int start_fadetop_y = (-frac) / fracstep;
|
||||||
|
int end_fadetop_y = (fade_length - frac) / fracstep;
|
||||||
|
int start_fadebottom_y = ((2 << 24) - fade_length - frac) / fracstep;
|
||||||
|
int end_fadebottom_y = ((2 << 24) - frac) / fracstep;
|
||||||
|
start_fadetop_y = clamp(start_fadetop_y, 0, count);
|
||||||
|
end_fadetop_y = clamp(end_fadetop_y, 0, count);
|
||||||
|
start_fadebottom_y = clamp(start_fadebottom_y, 0, count);
|
||||||
|
end_fadebottom_y = clamp(end_fadebottom_y, 0, count);
|
||||||
|
|
||||||
|
int num_cores = thread->num_cores;
|
||||||
|
int skipped = thread->skipped_by_thread(args.DestY());
|
||||||
|
dest = thread->dest_for_thread(args.DestY(), pitch, dest);
|
||||||
|
frac += fracstep * skipped;
|
||||||
|
fracstep *= num_cores;
|
||||||
|
pitch *= num_cores;
|
||||||
|
|
||||||
|
if (!fadeSky)
|
||||||
|
{
|
||||||
|
count = thread->count_for_thread(args.DestY(), count);
|
||||||
|
|
||||||
|
for (int index = 0; index < count; index++)
|
||||||
|
{
|
||||||
|
uint32_t sample_index = (((((uint32_t)frac) << 8) >> FRACBITS) * textureheight0) >> FRACBITS;
|
||||||
|
uint32_t fg = source0[sample_index];
|
||||||
|
if (fg == 0)
|
||||||
|
{
|
||||||
|
uint32_t sample_index2 = MIN(sample_index, maxtextureheight1);
|
||||||
|
fg = source1[sample_index2];
|
||||||
|
}
|
||||||
|
|
||||||
|
*dest = fg;
|
||||||
|
dest += pitch;
|
||||||
|
frac += fracstep;
|
||||||
|
}
|
||||||
|
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
vec8us solid_top_fill = unpack(solid_top, 0);
|
||||||
|
vec8us solid_bottom_fill = unpack(solid_bottom, 0);
|
||||||
|
|
||||||
|
int index = skipped;
|
||||||
|
|
||||||
|
// Top solid color:
|
||||||
|
while (index < start_fadetop_y)
|
||||||
|
{
|
||||||
|
*dest = solid_top;
|
||||||
|
dest += pitch;
|
||||||
|
frac += fracstep;
|
||||||
|
index += num_cores;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Top fade:
|
||||||
|
while (index < end_fadetop_y)
|
||||||
|
{
|
||||||
|
uint32_t sample_index = (((((uint32_t)frac) << 8) >> FRACBITS) * textureheight0) >> FRACBITS;
|
||||||
|
uint32_t fg = source0[sample_index];
|
||||||
|
if (fg == 0)
|
||||||
|
{
|
||||||
|
uint32_t sample_index2 = MIN(sample_index, maxtextureheight1);
|
||||||
|
fg = source1[sample_index2];
|
||||||
|
}
|
||||||
|
|
||||||
|
vec8us alpha = MAX(MIN(frac >> (16 - start_fade), 256), 0);
|
||||||
|
vec8us inv_alpha = vec8us(256) - alpha;
|
||||||
|
|
||||||
|
vec8us c = unpack(fg, 0);
|
||||||
|
c = (c * alpha + solid_top_fill * inv_alpha) >> 8;
|
||||||
|
*dest = packlo(c);
|
||||||
|
|
||||||
|
frac += fracstep;
|
||||||
|
dest += pitch;
|
||||||
|
index += num_cores;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Textured center:
|
||||||
|
while (index < start_fadebottom_y)
|
||||||
|
{
|
||||||
|
uint32_t sample_index = (((((uint32_t)frac) << 8) >> FRACBITS) * textureheight0) >> FRACBITS;
|
||||||
|
uint32_t fg = source0[sample_index];
|
||||||
|
if (fg == 0)
|
||||||
|
{
|
||||||
|
uint32_t sample_index2 = MIN(sample_index, maxtextureheight1);
|
||||||
|
fg = source1[sample_index2];
|
||||||
|
}
|
||||||
|
*dest = fg;
|
||||||
|
|
||||||
|
frac += fracstep;
|
||||||
|
dest += pitch;
|
||||||
|
index += num_cores;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Fade bottom:
|
||||||
|
while (index < end_fadebottom_y)
|
||||||
|
{
|
||||||
|
uint32_t sample_index = (((((uint32_t)frac) << 8) >> FRACBITS) * textureheight0) >> FRACBITS;
|
||||||
|
uint32_t fg = source0[sample_index];
|
||||||
|
if (fg == 0)
|
||||||
|
{
|
||||||
|
uint32_t sample_index2 = MIN(sample_index, maxtextureheight1);
|
||||||
|
fg = source1[sample_index2];
|
||||||
|
}
|
||||||
|
|
||||||
|
vec8us alpha = MAX(MIN(((2 << 24) - frac) >> (16 - start_fade), 256), 0);
|
||||||
|
vec8us inv_alpha = vec8us(256) - alpha;
|
||||||
|
|
||||||
|
vec8us c = unpack(fg, 0);
|
||||||
|
c = (c * alpha + solid_top_fill * inv_alpha) >> 8;
|
||||||
|
*dest = packlo(c);
|
||||||
|
|
||||||
|
frac += fracstep;
|
||||||
|
dest += pitch;
|
||||||
|
index += num_cores;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Bottom solid color:
|
||||||
|
while (index < count)
|
||||||
|
{
|
||||||
|
*dest = solid_bottom;
|
||||||
|
dest += pitch;
|
||||||
|
index += num_cores;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
FString DebugInfo() override { return "DrawSkyDouble32Command"; }
|
||||||
|
};
|
||||||
|
}
|
Loading…
Reference in a new issue