Split softpoly into more files

This commit is contained in:
Magnus Norddahl 2019-12-15 17:14:23 +01:00
parent e0fb9a45e2
commit 5632c80ab2
16 changed files with 2949 additions and 2691 deletions

View file

@ -708,7 +708,11 @@ set ( SWRENDER_SOURCES
set( POLYRENDER_SOURCES
rendering/polyrenderer/drawers/poly_triangle.cpp
rendering/polyrenderer/drawers/poly_thread.cpp
rendering/polyrenderer/drawers/screen_triangle.cpp
rendering/polyrenderer/drawers/screen_scanline_setup.cpp
rendering/polyrenderer/drawers/screen_shader.cpp
rendering/polyrenderer/drawers/screen_blend.cpp
rendering/polyrenderer/math/gpu_types.cpp
)

View file

@ -2,6 +2,7 @@
#include "poly_buffers.h"
#include "poly_framebuffer.h"
#include "poly_renderstate.h"
#include "rendering/polyrenderer/drawers/poly_thread.h"
#include "doomerrors.h"
PolyBuffer *PolyBuffer::First = nullptr;

View file

@ -0,0 +1,821 @@
/*
** Polygon Doom software renderer
** Copyright (c) 2016 Magnus Norddahl
**
** This software is provided 'as-is', without any express or implied
** warranty. In no event will the authors be held liable for any damages
** arising from the use of this software.
**
** Permission is granted to anyone to use this software for any purpose,
** including commercial applications, and to alter it and redistribute it
** freely, subject to the following restrictions:
**
** 1. The origin of this software must not be misrepresented; you must not
** claim that you wrote the original software. If you use this software
** in a product, an acknowledgment in the product documentation would be
** appreciated but is not required.
** 2. Altered source versions must be plainly marked as such, and must not be
** misrepresented as being the original software.
** 3. This notice may not be removed or altered from any source distribution.
**
*/
#include <stddef.h>
#include "templates.h"
#include "doomdef.h"
#include "w_wad.h"
#include "v_video.h"
#include "doomstat.h"
#include "st_stuff.h"
#include "g_game.h"
#include "g_level.h"
#include "r_data/r_translate.h"
#include "r_data/models/models.h"
#include "v_palette.h"
#include "r_data/colormaps.h"
#include "poly_thread.h"
#include "swrenderer/drawers/r_draw_rgba.h"
#include "screen_triangle.h"
#include "x86.h"
PolyTriangleThreadData::PolyTriangleThreadData(int32_t core, int32_t num_cores, int32_t numa_node, int32_t num_numa_nodes, int numa_start_y, int numa_end_y)
: core(core), num_cores(num_cores), numa_node(numa_node), num_numa_nodes(num_numa_nodes), numa_start_y(numa_start_y), numa_end_y(numa_end_y)
{
}
void PolyTriangleThreadData::ClearDepth(float value)
{
int width = depthstencil->Width();
int height = depthstencil->Height();
float *data = depthstencil->DepthValues();
int skip = skipped_by_thread(0);
int count = count_for_thread(0, height);
data += skip * width;
for (int i = 0; i < count; i++)
{
for (int x = 0; x < width; x++)
data[x] = value;
data += num_cores * width;
}
}
void PolyTriangleThreadData::ClearStencil(uint8_t value)
{
int width = depthstencil->Width();
int height = depthstencil->Height();
uint8_t *data = depthstencil->StencilValues();
int skip = skipped_by_thread(0);
int count = count_for_thread(0, height);
data += skip * width;
for (int i = 0; i < count; i++)
{
memset(data, value, width);
data += num_cores * width;
}
}
void PolyTriangleThreadData::SetViewport(int x, int y, int width, int height, uint8_t *new_dest, int new_dest_width, int new_dest_height, int new_dest_pitch, bool new_dest_bgra, PolyDepthStencil *new_depthstencil, bool new_topdown)
{
viewport_x = x;
viewport_y = y;
viewport_width = width;
viewport_height = height;
dest = new_dest;
dest_width = new_dest_width;
dest_height = new_dest_height;
dest_pitch = new_dest_pitch;
dest_bgra = new_dest_bgra;
depthstencil = new_depthstencil;
topdown = new_topdown;
UpdateClip();
}
void PolyTriangleThreadData::SetScissor(int x, int y, int w, int h)
{
scissor.left = x;
scissor.right = x + w;
scissor.top = y;
scissor.bottom = y + h;
UpdateClip();
}
void PolyTriangleThreadData::UpdateClip()
{
clip.left = MAX(MAX(viewport_x, scissor.left), 0);
clip.top = MAX(MAX(viewport_y, scissor.top), 0);
clip.right = MIN(MIN(viewport_x + viewport_width, scissor.right), dest_width);
clip.bottom = MIN(MIN(viewport_y + viewport_height, scissor.bottom), dest_height);
}
void PolyTriangleThreadData::PushStreamData(const StreamData &data, const PolyPushConstants &constants)
{
mainVertexShader.Data = data;
mainVertexShader.uClipSplit = constants.uClipSplit;
PushConstants = &constants;
AlphaThreshold = clamp((int)(PushConstants->uAlphaThreshold * 255.0f + 0.5f), 0, 255) << 24;
numPolyLights = 0;
if (constants.uLightIndex >= 0)
{
const FVector4 &lightRange = lights[constants.uLightIndex];
static_assert(sizeof(FVector4) == 16, "sizeof(FVector4) is not 16 bytes");
if (lightRange.Y > lightRange.X)
{
int start = constants.uLightIndex + 1;
int modulatedStart = static_cast<int>(lightRange.X) + start;
int modulatedEnd = static_cast<int>(lightRange.Y) + start;
for (int i = modulatedStart; i < modulatedEnd; i += 4)
{
if (numPolyLights == maxPolyLights)
break;
auto &lightpos = lights[i];
auto &lightcolor = lights[i + 1];
//auto &lightspot1 = lights[i + 2];
//auto &lightspot2 = lights[i + 3];
uint32_t r = (int)clamp(lightcolor.X * 255.0f, 0.0f, 255.0f);
uint32_t g = (int)clamp(lightcolor.Y * 255.0f, 0.0f, 255.0f);
uint32_t b = (int)clamp(lightcolor.Z * 255.0f, 0.0f, 255.0f);
auto& polylight = polyLights[numPolyLights++];
polylight.x = lightpos.X;
polylight.y = lightpos.Y;
polylight.z = lightpos.Z;
polylight.radius = 256.0f / lightpos.W;
polylight.color = (r << 16) | (g << 8) | b;
if (lightcolor.W < 0.0f)
polylight.radius = -polylight.radius;
}
}
}
}
void PolyTriangleThreadData::PushMatrices(const VSMatrix &modelMatrix, const VSMatrix &normalModelMatrix, const VSMatrix &textureMatrix)
{
mainVertexShader.ModelMatrix = modelMatrix;
mainVertexShader.NormalModelMatrix = normalModelMatrix;
mainVertexShader.TextureMatrix = textureMatrix;
}
void PolyTriangleThreadData::SetViewpointUniforms(const HWViewpointUniforms *uniforms)
{
mainVertexShader.Viewpoint = uniforms;
}
void PolyTriangleThreadData::SetDepthClamp(bool on)
{
}
void PolyTriangleThreadData::SetDepthMask(bool on)
{
WriteDepth = on;
}
void PolyTriangleThreadData::SetDepthFunc(int func)
{
if (func == DF_LEqual || func == DF_Less)
{
DepthTest = true;
}
else // if (func == DF_Always)
{
DepthTest = false;
}
}
void PolyTriangleThreadData::SetDepthRange(float min, float max)
{
// The only two variants used by hwrenderer layer
if (min == 0.0f && max == 1.0f)
{
}
else if (min == 1.0f && max == 1.0f)
{
}
}
void PolyTriangleThreadData::SetDepthBias(float depthBiasConstantFactor, float depthBiasSlopeFactor)
{
depthbias = (float)(depthBiasConstantFactor / 2500.0);
}
void PolyTriangleThreadData::SetColorMask(bool r, bool g, bool b, bool a)
{
WriteColor = r;
}
void PolyTriangleThreadData::SetStencil(int stencilRef, int op)
{
StencilTestValue = stencilRef;
if (op == SOP_Increment)
{
WriteStencil = StencilTest;
StencilWriteValue = MIN(stencilRef + 1, (int)255);
}
else if (op == SOP_Decrement)
{
WriteStencil = StencilTest;
StencilWriteValue = MAX(stencilRef - 1, (int)0);
}
else // SOP_Keep
{
WriteStencil = false;
StencilWriteValue = stencilRef;
}
}
void PolyTriangleThreadData::SetCulling(int mode)
{
SetTwoSided(mode == Cull_None);
SetCullCCW(mode == Cull_CCW);
}
void PolyTriangleThreadData::EnableStencil(bool on)
{
StencilTest = on;
WriteStencil = on && (StencilTestValue != StencilWriteValue);
}
void PolyTriangleThreadData::SetRenderStyle(FRenderStyle style)
{
RenderStyle = style;
}
void PolyTriangleThreadData::SetShader(int specialEffect, int effectState, bool alphaTest)
{
SpecialEffect = specialEffect;
EffectState = effectState;
AlphaTest = alphaTest;
}
void PolyTriangleThreadData::SetTexture(int unit, const void *pixels, int width, int height, bool bgra)
{
textures[unit].pixels = pixels;
textures[unit].width = width;
textures[unit].height = height;
textures[unit].bgra = bgra;
}
void PolyTriangleThreadData::DrawIndexed(int index, int vcount, PolyDrawMode drawmode)
{
if (vcount < 3)
return;
elements += index;
ShadedTriVertex vertbuffer[3];
ShadedTriVertex *vert[3] = { &vertbuffer[0], &vertbuffer[1], &vertbuffer[2] };
if (drawmode == PolyDrawMode::Triangles)
{
for (int i = 0; i < vcount / 3; i++)
{
for (int j = 0; j < 3; j++)
*vert[j] = ShadeVertex(*(elements++));
DrawShadedTriangle(vert, ccw);
}
}
else if (drawmode == PolyDrawMode::TriangleFan)
{
*vert[0] = ShadeVertex(*(elements++));
*vert[1] = ShadeVertex(*(elements++));
for (int i = 2; i < vcount; i++)
{
*vert[2] = ShadeVertex(*(elements++));
DrawShadedTriangle(vert, ccw);
std::swap(vert[1], vert[2]);
}
}
else if (drawmode == PolyDrawMode::TriangleStrip)
{
bool toggleccw = ccw;
*vert[0] = ShadeVertex(*(elements++));
*vert[1] = ShadeVertex(*(elements++));
for (int i = 2; i < vcount; i++)
{
*vert[2] = ShadeVertex(*(elements++));
DrawShadedTriangle(vert, toggleccw);
ShadedTriVertex *vtmp = vert[0];
vert[0] = vert[1];
vert[1] = vert[2];
vert[2] = vtmp;
toggleccw = !toggleccw;
}
}
else if (drawmode == PolyDrawMode::Lines)
{
for (int i = 0; i < vcount / 2; i++)
{
*vert[0] = ShadeVertex(*(elements++));
*vert[1] = ShadeVertex(*(elements++));
DrawShadedLine(vert);
}
}
else if (drawmode == PolyDrawMode::Points)
{
for (int i = 0; i < vcount; i++)
{
*vert[0] = ShadeVertex(*(elements++));
DrawShadedPoint(vert);
}
}
}
void PolyTriangleThreadData::Draw(int index, int vcount, PolyDrawMode drawmode)
{
if (vcount < 3)
return;
int vinput = index;
ShadedTriVertex vertbuffer[3];
ShadedTriVertex *vert[3] = { &vertbuffer[0], &vertbuffer[1], &vertbuffer[2] };
if (drawmode == PolyDrawMode::Triangles)
{
for (int i = 0; i < vcount / 3; i++)
{
for (int j = 0; j < 3; j++)
*vert[j] = ShadeVertex(vinput++);
DrawShadedTriangle(vert, ccw);
}
}
else if (drawmode == PolyDrawMode::TriangleFan)
{
*vert[0] = ShadeVertex(vinput++);
*vert[1] = ShadeVertex(vinput++);
for (int i = 2; i < vcount; i++)
{
*vert[2] = ShadeVertex(vinput++);
DrawShadedTriangle(vert, ccw);
std::swap(vert[1], vert[2]);
}
}
else if (drawmode == PolyDrawMode::TriangleStrip)
{
bool toggleccw = ccw;
*vert[0] = ShadeVertex(vinput++);
*vert[1] = ShadeVertex(vinput++);
for (int i = 2; i < vcount; i++)
{
*vert[2] = ShadeVertex(vinput++);
DrawShadedTriangle(vert, toggleccw);
ShadedTriVertex *vtmp = vert[0];
vert[0] = vert[1];
vert[1] = vert[2];
vert[2] = vtmp;
toggleccw = !toggleccw;
}
}
else if (drawmode == PolyDrawMode::Lines)
{
for (int i = 0; i < vcount / 2; i++)
{
*vert[0] = ShadeVertex(vinput++);
*vert[1] = ShadeVertex(vinput++);
DrawShadedLine(vert);
}
}
else if (drawmode == PolyDrawMode::Points)
{
for (int i = 0; i < vcount; i++)
{
*vert[0] = ShadeVertex(vinput++);
DrawShadedPoint(vert);
}
}
}
ShadedTriVertex PolyTriangleThreadData::ShadeVertex(int index)
{
inputAssembly->Load(this, vertices, index);
mainVertexShader.SIMPLE = (SpecialEffect == EFF_BURN) || (SpecialEffect == EFF_STENCIL);
mainVertexShader.SPHEREMAP = (SpecialEffect == EFF_SPHEREMAP);
mainVertexShader.main();
return mainVertexShader;
}
bool PolyTriangleThreadData::IsDegenerate(const ShadedTriVertex *const* vert)
{
// A degenerate triangle has a zero cross product for two of its sides.
float ax = vert[1]->gl_Position.X - vert[0]->gl_Position.X;
float ay = vert[1]->gl_Position.Y - vert[0]->gl_Position.Y;
float az = vert[1]->gl_Position.W - vert[0]->gl_Position.W;
float bx = vert[2]->gl_Position.X - vert[0]->gl_Position.X;
float by = vert[2]->gl_Position.Y - vert[0]->gl_Position.Y;
float bz = vert[2]->gl_Position.W - vert[0]->gl_Position.W;
float crossx = ay * bz - az * by;
float crossy = az * bx - ax * bz;
float crossz = ax * by - ay * bx;
float crosslengthsqr = crossx * crossx + crossy * crossy + crossz * crossz;
return crosslengthsqr <= 1.e-8f;
}
bool PolyTriangleThreadData::IsFrontfacing(TriDrawTriangleArgs *args)
{
float a =
args->v1->x * args->v2->y - args->v2->x * args->v1->y +
args->v2->x * args->v3->y - args->v3->x * args->v2->y +
args->v3->x * args->v1->y - args->v1->x * args->v3->y;
return a <= 0.0f;
}
void PolyTriangleThreadData::DrawShadedPoint(const ShadedTriVertex *const* vertex)
{
}
void PolyTriangleThreadData::DrawShadedLine(const ShadedTriVertex *const* vert)
{
static const int numclipdistances = 9;
float clipdistance[numclipdistances * 2];
float *clipd = clipdistance;
for (int i = 0; i < 2; i++)
{
const auto &v = *vert[i];
clipd[0] = v.gl_Position.X + v.gl_Position.W;
clipd[1] = v.gl_Position.W - v.gl_Position.X;
clipd[2] = v.gl_Position.Y + v.gl_Position.W;
clipd[3] = v.gl_Position.W - v.gl_Position.Y;
clipd[4] = v.gl_Position.Z + v.gl_Position.W;
clipd[5] = v.gl_Position.W - v.gl_Position.Z;
clipd[6] = v.gl_ClipDistance[0];
clipd[7] = v.gl_ClipDistance[1];
clipd[8] = v.gl_ClipDistance[2];
clipd += numclipdistances;
}
float t1 = 0.0f;
float t2 = 1.0f;
for (int p = 0; p < numclipdistances; p++)
{
float clipdistance1 = clipdistance[0 * numclipdistances + p];
float clipdistance2 = clipdistance[1 * numclipdistances + p];
if (clipdistance1 < 0.0f) t1 = MAX(-clipdistance1 / (clipdistance2 - clipdistance1), t1);
if (clipdistance2 < 0.0f) t2 = MIN(1.0f + clipdistance2 / (clipdistance1 - clipdistance2), t2);
if (t1 >= t2)
return;
}
float weights[] = { 1.0f - t1, t1, 1.0f - t2, t2 };
ScreenTriVertex clippedvert[2];
for (int i = 0; i < 2; i++)
{
auto &v = clippedvert[i];
memset(&v, 0, sizeof(ScreenTriVertex));
for (int w = 0; w < 2; w++)
{
float weight = weights[i * 2 + w];
v.x += vert[w]->gl_Position.X * weight;
v.y += vert[w]->gl_Position.Y * weight;
v.z += vert[w]->gl_Position.Z * weight;
v.w += vert[w]->gl_Position.W * weight;
}
// Calculate normalized device coordinates:
v.w = 1.0f / v.w;
v.x *= v.w;
v.y *= v.w;
v.z *= v.w;
// Apply viewport scale to get screen coordinates:
v.x = viewport_x + viewport_width * (1.0f + v.x) * 0.5f;
if (topdown)
v.y = viewport_y + viewport_height * (1.0f - v.y) * 0.5f;
else
v.y = viewport_y + viewport_height * (1.0f + v.y) * 0.5f;
}
uint32_t vColorA = (int)(vert[0]->vColor.W * 255.0f + 0.5f);
uint32_t vColorR = (int)(vert[0]->vColor.X * 255.0f + 0.5f);
uint32_t vColorG = (int)(vert[0]->vColor.Y * 255.0f + 0.5f);
uint32_t vColorB = (int)(vert[0]->vColor.Z * 255.0f + 0.5f);
uint32_t color = MAKEARGB(vColorA, vColorR, vColorG, vColorB);
// Slow and naive implementation. Hopefully fast enough..
float x1 = clippedvert[0].x;
float y1 = clippedvert[0].y;
float x2 = clippedvert[1].x;
float y2 = clippedvert[1].y;
float dx = x2 - x1;
float dy = y2 - y1;
float step = (abs(dx) >= abs(dy)) ? abs(dx) : abs(dy);
dx /= step;
dy /= step;
float x = x1;
float y = y1;
int istep = (int)step;
int pixelsize = dest_bgra ? 4 : 1;
for (int i = 0; i <= istep; i++)
{
int scrx = (int)x;
int scry = (int)y;
if (scrx >= clip.left && scrx < clip.right && scry >= clip.top && scry < clip.bottom && !line_skipped_by_thread(scry))
{
uint8_t *destpixel = dest + (scrx + scry * dest_width) * pixelsize;
if (pixelsize == 4)
{
*reinterpret_cast<uint32_t*>(destpixel) = color;
}
else
{
*destpixel = color;
}
}
x += dx;
y += dy;
}
}
void PolyTriangleThreadData::DrawShadedTriangle(const ShadedTriVertex *const* vert, bool ccw)
{
// Reject triangle if degenerate
if (IsDegenerate(vert))
return;
// Cull, clip and generate additional vertices as needed
ScreenTriVertex clippedvert[max_additional_vertices];
int numclipvert = ClipEdge(vert);
// Convert barycentric weights to actual vertices
for (int i = 0; i < numclipvert; i++)
{
auto &v = clippedvert[i];
memset(&v, 0, sizeof(ScreenTriVertex));
for (int w = 0; w < 3; w++)
{
float weight = weights[i * 3 + w];
v.x += vert[w]->gl_Position.X * weight;
v.y += vert[w]->gl_Position.Y * weight;
v.z += vert[w]->gl_Position.Z * weight;
v.w += vert[w]->gl_Position.W * weight;
v.u += vert[w]->vTexCoord.X * weight;
v.v += vert[w]->vTexCoord.Y * weight;
v.worldX += vert[w]->pixelpos.X * weight;
v.worldY += vert[w]->pixelpos.Y * weight;
v.worldZ += vert[w]->pixelpos.Z * weight;
v.a += vert[w]->vColor.W * weight;
v.r += vert[w]->vColor.X * weight;
v.g += vert[w]->vColor.Y * weight;
v.b += vert[w]->vColor.Z * weight;
v.gradientdistZ += vert[w]->gradientdist.Z * weight;
}
}
#ifdef NO_SSE
// Map to 2D viewport:
for (int j = 0; j < numclipvert; j++)
{
auto &v = clippedvert[j];
// Calculate normalized device coordinates:
v.w = 1.0f / v.w;
v.x *= v.w;
v.y *= v.w;
v.z *= v.w;
// Apply viewport scale to get screen coordinates:
v.x = viewport_x + viewport_width * (1.0f + v.x) * 0.5f;
if (topdown)
v.y = viewport_y + viewport_height * (1.0f - v.y) * 0.5f;
else
v.y = viewport_y + viewport_height * (1.0f + v.y) * 0.5f;
}
#else
// Map to 2D viewport:
__m128 mviewport_x = _mm_set1_ps((float)viewport_x);
__m128 mviewport_y = _mm_set1_ps((float)viewport_y);
__m128 mviewport_halfwidth = _mm_set1_ps(viewport_width * 0.5f);
__m128 mviewport_halfheight = _mm_set1_ps(viewport_height * 0.5f);
__m128 mone = _mm_set1_ps(1.0f);
int sse_length = (numclipvert + 3) / 4 * 4;
for (int j = 0; j < sse_length; j += 4)
{
__m128 vx = _mm_loadu_ps(&clippedvert[j].x);
__m128 vy = _mm_loadu_ps(&clippedvert[j + 1].x);
__m128 vz = _mm_loadu_ps(&clippedvert[j + 2].x);
__m128 vw = _mm_loadu_ps(&clippedvert[j + 3].x);
_MM_TRANSPOSE4_PS(vx, vy, vz, vw);
// Calculate normalized device coordinates:
vw = _mm_div_ps(mone, vw);
vx = _mm_mul_ps(vx, vw);
vy = _mm_mul_ps(vy, vw);
vz = _mm_mul_ps(vz, vw);
// Apply viewport scale to get screen coordinates:
vx = _mm_add_ps(mviewport_x, _mm_mul_ps(mviewport_halfwidth, _mm_add_ps(mone, vx)));
if (topdown)
vy = _mm_add_ps(mviewport_y, _mm_mul_ps(mviewport_halfheight, _mm_sub_ps(mone, vy)));
else
vy = _mm_add_ps(mviewport_y, _mm_mul_ps(mviewport_halfheight, _mm_add_ps(mone, vy)));
_MM_TRANSPOSE4_PS(vx, vy, vz, vw);
_mm_storeu_ps(&clippedvert[j].x, vx);
_mm_storeu_ps(&clippedvert[j + 1].x, vy);
_mm_storeu_ps(&clippedvert[j + 2].x, vz);
_mm_storeu_ps(&clippedvert[j + 3].x, vw);
}
#endif
if (!topdown) ccw = !ccw;
TriDrawTriangleArgs args;
if (twosided && numclipvert > 2)
{
args.v1 = &clippedvert[0];
args.v2 = &clippedvert[1];
args.v3 = &clippedvert[2];
ccw = !IsFrontfacing(&args);
}
// Draw screen triangles
if (ccw)
{
for (int i = numclipvert - 1; i > 1; i--)
{
args.v1 = &clippedvert[numclipvert - 1];
args.v2 = &clippedvert[i - 1];
args.v3 = &clippedvert[i - 2];
if (IsFrontfacing(&args) == ccw && args.CalculateGradients())
{
ScreenTriangle::Draw(&args, this);
}
}
}
else
{
for (int i = 2; i < numclipvert; i++)
{
args.v1 = &clippedvert[0];
args.v2 = &clippedvert[i - 1];
args.v3 = &clippedvert[i];
if (IsFrontfacing(&args) != ccw && args.CalculateGradients())
{
ScreenTriangle::Draw(&args, this);
}
}
}
}
int PolyTriangleThreadData::ClipEdge(const ShadedTriVertex *const* verts)
{
// use barycentric weights for clipped vertices
weights = weightsbuffer;
for (int i = 0; i < 3; i++)
{
weights[i * 3 + 0] = 0.0f;
weights[i * 3 + 1] = 0.0f;
weights[i * 3 + 2] = 0.0f;
weights[i * 3 + i] = 1.0f;
}
// Clip and cull so that the following is true for all vertices:
// -v.w <= v.x <= v.w
// -v.w <= v.y <= v.w
// -v.w <= v.z <= v.w
// halfspace clip distances
static const int numclipdistances = 9;
#ifdef NO_SSE
float clipdistance[numclipdistances * 3];
bool needsclipping = false;
float *clipd = clipdistance;
for (int i = 0; i < 3; i++)
{
const auto &v = *verts[i];
clipd[0] = v.gl_Position.X + v.gl_Position.W;
clipd[1] = v.gl_Position.W - v.gl_Position.X;
clipd[2] = v.gl_Position.Y + v.gl_Position.W;
clipd[3] = v.gl_Position.W - v.gl_Position.Y;
clipd[4] = v.gl_Position.Z + v.gl_Position.W;
clipd[5] = v.gl_Position.W - v.gl_Position.Z;
clipd[6] = v.gl_ClipDistance[0];
clipd[7] = v.gl_ClipDistance[1];
clipd[8] = v.gl_ClipDistance[2];
for (int j = 0; j < 9; j++)
needsclipping = needsclipping || clipd[i];
clipd += numclipdistances;
}
// If all halfspace clip distances are positive then the entire triangle is visible. Skip the expensive clipping step.
if (!needsclipping)
{
return 3;
}
#else
__m128 mx = _mm_loadu_ps(&verts[0]->gl_Position.X);
__m128 my = _mm_loadu_ps(&verts[1]->gl_Position.X);
__m128 mz = _mm_loadu_ps(&verts[2]->gl_Position.X);
__m128 mw = _mm_setzero_ps();
_MM_TRANSPOSE4_PS(mx, my, mz, mw);
__m128 clipd0 = _mm_add_ps(mx, mw);
__m128 clipd1 = _mm_sub_ps(mw, mx);
__m128 clipd2 = _mm_add_ps(my, mw);
__m128 clipd3 = _mm_sub_ps(mw, my);
__m128 clipd4 = _mm_add_ps(mz, mw);
__m128 clipd5 = _mm_sub_ps(mw, mz);
__m128 clipd6 = _mm_setr_ps(verts[0]->gl_ClipDistance[0], verts[1]->gl_ClipDistance[0], verts[2]->gl_ClipDistance[0], 0.0f);
__m128 clipd7 = _mm_setr_ps(verts[0]->gl_ClipDistance[1], verts[1]->gl_ClipDistance[1], verts[2]->gl_ClipDistance[1], 0.0f);
__m128 clipd8 = _mm_setr_ps(verts[0]->gl_ClipDistance[2], verts[1]->gl_ClipDistance[2], verts[2]->gl_ClipDistance[2], 0.0f);
__m128 mneedsclipping = _mm_cmplt_ps(clipd0, _mm_setzero_ps());
mneedsclipping = _mm_or_ps(mneedsclipping, _mm_cmplt_ps(clipd1, _mm_setzero_ps()));
mneedsclipping = _mm_or_ps(mneedsclipping, _mm_cmplt_ps(clipd2, _mm_setzero_ps()));
mneedsclipping = _mm_or_ps(mneedsclipping, _mm_cmplt_ps(clipd3, _mm_setzero_ps()));
mneedsclipping = _mm_or_ps(mneedsclipping, _mm_cmplt_ps(clipd4, _mm_setzero_ps()));
mneedsclipping = _mm_or_ps(mneedsclipping, _mm_cmplt_ps(clipd5, _mm_setzero_ps()));
mneedsclipping = _mm_or_ps(mneedsclipping, _mm_cmplt_ps(clipd6, _mm_setzero_ps()));
mneedsclipping = _mm_or_ps(mneedsclipping, _mm_cmplt_ps(clipd7, _mm_setzero_ps()));
mneedsclipping = _mm_or_ps(mneedsclipping, _mm_cmplt_ps(clipd8, _mm_setzero_ps()));
if (_mm_movemask_ps(mneedsclipping) == 0)
{
return 3;
}
float clipdistance[numclipdistances * 4];
_mm_storeu_ps(clipdistance, clipd0);
_mm_storeu_ps(clipdistance + 4, clipd1);
_mm_storeu_ps(clipdistance + 8, clipd2);
_mm_storeu_ps(clipdistance + 12, clipd3);
_mm_storeu_ps(clipdistance + 16, clipd4);
_mm_storeu_ps(clipdistance + 20, clipd5);
_mm_storeu_ps(clipdistance + 24, clipd6);
_mm_storeu_ps(clipdistance + 28, clipd7);
_mm_storeu_ps(clipdistance + 32, clipd8);
#endif
// Clip against each halfspace
float *input = weights;
float *output = weights + max_additional_vertices * 3;
int inputverts = 3;
for (int p = 0; p < numclipdistances; p++)
{
// Clip each edge
int outputverts = 0;
for (int i = 0; i < inputverts; i++)
{
int j = (i + 1) % inputverts;
#ifdef NO_SSE
float clipdistance1 =
clipdistance[0 * numclipdistances + p] * input[i * 3 + 0] +
clipdistance[1 * numclipdistances + p] * input[i * 3 + 1] +
clipdistance[2 * numclipdistances + p] * input[i * 3 + 2];
float clipdistance2 =
clipdistance[0 * numclipdistances + p] * input[j * 3 + 0] +
clipdistance[1 * numclipdistances + p] * input[j * 3 + 1] +
clipdistance[2 * numclipdistances + p] * input[j * 3 + 2];
#else
float clipdistance1 =
clipdistance[0 + p * 4] * input[i * 3 + 0] +
clipdistance[1 + p * 4] * input[i * 3 + 1] +
clipdistance[2 + p * 4] * input[i * 3 + 2];
float clipdistance2 =
clipdistance[0 + p * 4] * input[j * 3 + 0] +
clipdistance[1 + p * 4] * input[j * 3 + 1] +
clipdistance[2 + p * 4] * input[j * 3 + 2];
#endif
// Clip halfspace
if ((clipdistance1 >= 0.0f || clipdistance2 >= 0.0f) && outputverts + 1 < max_additional_vertices)
{
float t1 = (clipdistance1 < 0.0f) ? MAX(-clipdistance1 / (clipdistance2 - clipdistance1), 0.0f) : 0.0f;
float t2 = (clipdistance2 < 0.0f) ? MIN(1.0f + clipdistance2 / (clipdistance1 - clipdistance2), 1.0f) : 1.0f;
// add t1 vertex
for (int k = 0; k < 3; k++)
output[outputverts * 3 + k] = input[i * 3 + k] * (1.0f - t1) + input[j * 3 + k] * t1;
outputverts++;
if (t2 != 1.0f && t2 > t1)
{
// add t2 vertex
for (int k = 0; k < 3; k++)
output[outputverts * 3 + k] = input[i * 3 + k] * (1.0f - t2) + input[j * 3 + k] * t2;
outputverts++;
}
}
}
std::swap(input, output);
inputverts = outputverts;
if (inputverts == 0)
break;
}
weights = input;
return inputverts;
}
PolyTriangleThreadData *PolyTriangleThreadData::Get(DrawerThread *thread)
{
if (!thread->poly)
thread->poly = std::make_shared<PolyTriangleThreadData>(thread->core, thread->num_cores, thread->numa_node, thread->num_numa_nodes, thread->numa_start_y, thread->numa_end_y);
return thread->poly.get();
}

View file

@ -0,0 +1,197 @@
/*
** Polygon Doom software renderer
** Copyright (c) 2016 Magnus Norddahl
**
** This software is provided 'as-is', without any express or implied
** warranty. In no event will the authors be held liable for any damages
** arising from the use of this software.
**
** Permission is granted to anyone to use this software for any purpose,
** including commercial applications, and to alter it and redistribute it
** freely, subject to the following restrictions:
**
** 1. The origin of this software must not be misrepresented; you must not
** claim that you wrote the original software. If you use this software
** in a product, an acknowledgment in the product documentation would be
** appreciated but is not required.
** 2. Altered source versions must be plainly marked as such, and must not be
** misrepresented as being the original software.
** 3. This notice may not be removed or altered from any source distribution.
**
*/
#pragma once
#include "poly_triangle.h"
struct PolyLight
{
uint32_t color;
float x, y, z;
float radius;
};
class PolyTriangleThreadData
{
public:
PolyTriangleThreadData(int32_t core, int32_t num_cores, int32_t numa_node, int32_t num_numa_nodes, int numa_start_y, int numa_end_y);
void ClearDepth(float value);
void ClearStencil(uint8_t value);
void SetViewport(int x, int y, int width, int height, uint8_t *dest, int dest_width, int dest_height, int dest_pitch, bool dest_bgra, PolyDepthStencil *depthstencil, bool topdown);
void SetCullCCW(bool value) { ccw = value; }
void SetTwoSided(bool value) { twosided = value; }
void SetInputAssembly(PolyInputAssembly *input) { inputAssembly = input; }
void SetVertexBuffer(const void *data) { vertices = data; }
void SetIndexBuffer(const void *data) { elements = (const unsigned int *)data; }
void SetLightBuffer(const void *data) { lights = (const FVector4 *)data; }
void SetViewpointUniforms(const HWViewpointUniforms *uniforms);
void SetDepthClamp(bool on);
void SetDepthMask(bool on);
void SetDepthFunc(int func);
void SetDepthRange(float min, float max);
void SetDepthBias(float depthBiasConstantFactor, float depthBiasSlopeFactor);
void SetColorMask(bool r, bool g, bool b, bool a);
void SetStencil(int stencilRef, int op);
void SetCulling(int mode);
void EnableStencil(bool on);
void SetScissor(int x, int y, int w, int h);
void SetRenderStyle(FRenderStyle style);
void SetTexture(int unit, const void *pixels, int width, int height, bool bgra);
void SetShader(int specialEffect, int effectState, bool alphaTest);
void UpdateClip();
void PushStreamData(const StreamData &data, const PolyPushConstants &constants);
void PushMatrices(const VSMatrix &modelMatrix, const VSMatrix &normalModelMatrix, const VSMatrix &textureMatrix);
void DrawIndexed(int index, int count, PolyDrawMode mode);
void Draw(int index, int vcount, PolyDrawMode mode);
int32_t core;
int32_t num_cores;
int32_t numa_node;
int32_t num_numa_nodes;
int numa_start_y;
int numa_end_y;
bool line_skipped_by_thread(int line)
{
return line < numa_start_y || line >= numa_end_y || line % num_cores != core;
}
int skipped_by_thread(int first_line)
{
int clip_first_line = MAX(first_line, numa_start_y);
int core_skip = (num_cores - (clip_first_line - core) % num_cores) % num_cores;
return clip_first_line + core_skip - first_line;
}
int count_for_thread(int first_line, int count)
{
count = MIN(count, numa_end_y - first_line);
int c = (count - skipped_by_thread(first_line) + num_cores - 1) / num_cores;
return MAX(c, 0);
}
struct Scanline
{
float W[MAXWIDTH];
uint16_t U[MAXWIDTH];
uint16_t V[MAXWIDTH];
float WorldX[MAXWIDTH];
float WorldY[MAXWIDTH];
float WorldZ[MAXWIDTH];
uint8_t vColorA[MAXWIDTH];
uint8_t vColorR[MAXWIDTH];
uint8_t vColorG[MAXWIDTH];
uint8_t vColorB[MAXWIDTH];
float GradientdistZ[MAXWIDTH];
uint32_t FragColor[MAXWIDTH];
uint16_t lightarray[MAXWIDTH];
uint32_t dynlights[MAXWIDTH];
uint8_t discard[MAXWIDTH];
} scanline;
static PolyTriangleThreadData *Get(DrawerThread *thread);
int dest_pitch = 0;
int dest_width = 0;
int dest_height = 0;
bool dest_bgra = false;
uint8_t *dest = nullptr;
PolyDepthStencil *depthstencil = nullptr;
bool topdown = true;
float depthbias = 0.0f;
int viewport_y = 0;
struct ClipRect
{
int left = 0;
int top = 0;
int right = 0;
int bottom = 0;
} clip, scissor;
FRenderStyle RenderStyle;
int SpecialEffect = EFF_NONE;
int EffectState = 0;
bool AlphaTest = false;
uint32_t AlphaThreshold = 0x7f000000;
const PolyPushConstants* PushConstants = nullptr;
const void *vertices = nullptr;
const unsigned int *elements = nullptr;
const FVector4 *lights = nullptr;
enum { maxPolyLights = 16 };
PolyLight polyLights[maxPolyLights];
int numPolyLights = 0;
PolyMainVertexShader mainVertexShader;
struct TextureUnit
{
const void* pixels = nullptr;
int width = 0;
int height = 0;
bool bgra = true;
} textures[16];
bool DepthTest = false;
bool StencilTest = true;
bool WriteStencil = true;
bool WriteColor = true;
bool WriteDepth = true;
uint8_t StencilTestValue = 0;
uint8_t StencilWriteValue = 0;
void (*FragmentShader)(int x0, int x1, PolyTriangleThreadData* thread) = nullptr;
void (*WriteColorFunc)(int y, int x0, int x1, PolyTriangleThreadData* thread) = nullptr;
private:
ShadedTriVertex ShadeVertex(int index);
void DrawShadedPoint(const ShadedTriVertex *const* vertex);
void DrawShadedLine(const ShadedTriVertex *const* vertices);
void DrawShadedTriangle(const ShadedTriVertex *const* vertices, bool ccw);
static bool IsDegenerate(const ShadedTriVertex *const* vertices);
static bool IsFrontfacing(TriDrawTriangleArgs *args);
int ClipEdge(const ShadedTriVertex *const* verts);
int viewport_x = 0;
int viewport_width = 0;
int viewport_height = 0;
bool ccw = true;
bool twosided = true;
PolyInputAssembly *inputAssembly = nullptr;
enum { max_additional_vertices = 16 };
float weightsbuffer[max_additional_vertices * 3 * 2];
float *weights = nullptr;
};

File diff suppressed because it is too large Load diff

View file

@ -119,467 +119,3 @@ class PolyInputAssembly
public:
virtual void Load(PolyTriangleThreadData *thread, const void *vertices, int index) = 0;
};
struct PolyLight
{
uint32_t color;
float x, y, z;
float radius;
};
class PolyTriangleThreadData
{
public:
PolyTriangleThreadData(int32_t core, int32_t num_cores, int32_t numa_node, int32_t num_numa_nodes, int numa_start_y, int numa_end_y)
: core(core), num_cores(num_cores), numa_node(numa_node), num_numa_nodes(num_numa_nodes), numa_start_y(numa_start_y), numa_end_y(numa_end_y)
{
}
void ClearDepth(float value);
void ClearStencil(uint8_t value);
void SetViewport(int x, int y, int width, int height, uint8_t *dest, int dest_width, int dest_height, int dest_pitch, bool dest_bgra, PolyDepthStencil *depthstencil, bool topdown);
void SetCullCCW(bool value) { ccw = value; }
void SetTwoSided(bool value) { twosided = value; }
void SetInputAssembly(PolyInputAssembly *input) { inputAssembly = input; }
void SetVertexBuffer(const void *data) { vertices = data; }
void SetIndexBuffer(const void *data) { elements = (const unsigned int *)data; }
void SetLightBuffer(const void *data) { lights = (const FVector4 *)data; }
void SetViewpointUniforms(const HWViewpointUniforms *uniforms);
void SetDepthClamp(bool on);
void SetDepthMask(bool on);
void SetDepthFunc(int func);
void SetDepthRange(float min, float max);
void SetDepthBias(float depthBiasConstantFactor, float depthBiasSlopeFactor);
void SetColorMask(bool r, bool g, bool b, bool a);
void SetStencil(int stencilRef, int op);
void SetCulling(int mode);
void EnableStencil(bool on);
void SetScissor(int x, int y, int w, int h);
void SetRenderStyle(FRenderStyle style);
void SetTexture(int unit, const void *pixels, int width, int height, bool bgra);
void SetShader(int specialEffect, int effectState, bool alphaTest);
void UpdateClip();
void PushStreamData(const StreamData &data, const PolyPushConstants &constants);
void PushMatrices(const VSMatrix &modelMatrix, const VSMatrix &normalModelMatrix, const VSMatrix &textureMatrix);
void DrawIndexed(int index, int count, PolyDrawMode mode);
void Draw(int index, int vcount, PolyDrawMode mode);
int32_t core;
int32_t num_cores;
int32_t numa_node;
int32_t num_numa_nodes;
int numa_start_y;
int numa_end_y;
bool line_skipped_by_thread(int line)
{
return line < numa_start_y || line >= numa_end_y || line % num_cores != core;
}
int skipped_by_thread(int first_line)
{
int clip_first_line = MAX(first_line, numa_start_y);
int core_skip = (num_cores - (clip_first_line - core) % num_cores) % num_cores;
return clip_first_line + core_skip - first_line;
}
int count_for_thread(int first_line, int count)
{
count = MIN(count, numa_end_y - first_line);
int c = (count - skipped_by_thread(first_line) + num_cores - 1) / num_cores;
return MAX(c, 0);
}
struct Scanline
{
float W[MAXWIDTH];
uint16_t U[MAXWIDTH];
uint16_t V[MAXWIDTH];
float WorldX[MAXWIDTH];
float WorldY[MAXWIDTH];
float WorldZ[MAXWIDTH];
uint8_t vColorA[MAXWIDTH];
uint8_t vColorR[MAXWIDTH];
uint8_t vColorG[MAXWIDTH];
uint8_t vColorB[MAXWIDTH];
float GradientdistZ[MAXWIDTH];
uint32_t FragColor[MAXWIDTH];
uint16_t lightarray[MAXWIDTH];
uint32_t dynlights[MAXWIDTH];
uint8_t discard[MAXWIDTH];
} scanline;
static PolyTriangleThreadData *Get(DrawerThread *thread);
int dest_pitch = 0;
int dest_width = 0;
int dest_height = 0;
bool dest_bgra = false;
uint8_t *dest = nullptr;
PolyDepthStencil *depthstencil = nullptr;
bool topdown = true;
float depthbias = 0.0f;
int viewport_y = 0;
struct ClipRect
{
int left = 0;
int top = 0;
int right = 0;
int bottom = 0;
} clip, scissor;
FRenderStyle RenderStyle;
int SpecialEffect = EFF_NONE;
int EffectState = 0;
bool AlphaTest = false;
uint32_t AlphaThreshold = 0x7f000000;
const PolyPushConstants* PushConstants = nullptr;
const void *vertices = nullptr;
const unsigned int *elements = nullptr;
const FVector4 *lights = nullptr;
enum { maxPolyLights = 16 };
PolyLight polyLights[maxPolyLights];
int numPolyLights = 0;
PolyMainVertexShader mainVertexShader;
struct TextureUnit
{
const void* pixels = nullptr;
int width = 0;
int height = 0;
bool bgra = true;
} textures[16];
bool DepthTest = false;
bool StencilTest = true;
bool WriteStencil = true;
bool WriteColor = true;
bool WriteDepth = true;
uint8_t StencilTestValue = 0;
uint8_t StencilWriteValue = 0;
void (*FragmentShader)(int x0, int x1, PolyTriangleThreadData* thread) = nullptr;
void (*WriteColorFunc)(int y, int x0, int x1, PolyTriangleThreadData* thread) = nullptr;
private:
ShadedTriVertex ShadeVertex(int index);
void DrawShadedPoint(const ShadedTriVertex *const* vertex);
void DrawShadedLine(const ShadedTriVertex *const* vertices);
void DrawShadedTriangle(const ShadedTriVertex *const* vertices, bool ccw);
static bool IsDegenerate(const ShadedTriVertex *const* vertices);
static bool IsFrontfacing(TriDrawTriangleArgs *args);
int ClipEdge(const ShadedTriVertex *const* verts);
int viewport_x = 0;
int viewport_width = 0;
int viewport_height = 0;
bool ccw = true;
bool twosided = true;
PolyInputAssembly *inputAssembly = nullptr;
enum { max_additional_vertices = 16 };
float weightsbuffer[max_additional_vertices * 3 * 2];
float *weights = nullptr;
};
class PolyDrawerCommand : public DrawerCommand
{
public:
};
class PolySetDepthClampCommand : public PolyDrawerCommand
{
public:
PolySetDepthClampCommand(bool on) : on(on) { }
void Execute(DrawerThread *thread) override { PolyTriangleThreadData::Get(thread)->SetDepthClamp(on); }
private:
bool on;
};
class PolySetDepthMaskCommand : public PolyDrawerCommand
{
public:
PolySetDepthMaskCommand(bool on) : on(on) { }
void Execute(DrawerThread *thread) override { PolyTriangleThreadData::Get(thread)->SetDepthMask(on); }
private:
bool on;
};
class PolySetDepthFuncCommand : public PolyDrawerCommand
{
public:
PolySetDepthFuncCommand(int func) : func(func) { }
void Execute(DrawerThread *thread) override { PolyTriangleThreadData::Get(thread)->SetDepthFunc(func); }
private:
int func;
};
class PolySetDepthRangeCommand : public PolyDrawerCommand
{
public:
PolySetDepthRangeCommand(float min, float max) : min(min), max(max) { }
void Execute(DrawerThread *thread) override { PolyTriangleThreadData::Get(thread)->SetDepthRange(min, max); }
private:
float min;
float max;
};
class PolySetDepthBiasCommand : public PolyDrawerCommand
{
public:
PolySetDepthBiasCommand(float depthBiasConstantFactor, float depthBiasSlopeFactor) : depthBiasConstantFactor(depthBiasConstantFactor), depthBiasSlopeFactor(depthBiasSlopeFactor) { }
void Execute(DrawerThread *thread) override { PolyTriangleThreadData::Get(thread)->SetDepthBias(depthBiasConstantFactor, depthBiasSlopeFactor); }
private:
float depthBiasConstantFactor;
float depthBiasSlopeFactor;
};
class PolySetColorMaskCommand : public PolyDrawerCommand
{
public:
PolySetColorMaskCommand(bool r, bool g, bool b, bool a) : r(r), g(g), b(b), a(a) { }
void Execute(DrawerThread *thread) override { PolyTriangleThreadData::Get(thread)->SetColorMask(r, g, b, a); }
private:
bool r;
bool g;
bool b;
bool a;
};
class PolySetStencilCommand : public PolyDrawerCommand
{
public:
PolySetStencilCommand(int stencilRef, int op) : stencilRef(stencilRef), op(op) { }
void Execute(DrawerThread *thread) override { PolyTriangleThreadData::Get(thread)->SetStencil(stencilRef, op); }
private:
int stencilRef;
int op;
};
class PolySetCullingCommand : public PolyDrawerCommand
{
public:
PolySetCullingCommand(int mode) : mode(mode) { }
void Execute(DrawerThread *thread) override { PolyTriangleThreadData::Get(thread)->SetCulling(mode); }
private:
int mode;
};
class PolyEnableStencilCommand : public PolyDrawerCommand
{
public:
PolyEnableStencilCommand(bool on) : on(on) { }
void Execute(DrawerThread *thread) override { PolyTriangleThreadData::Get(thread)->EnableStencil(on); }
private:
bool on;
};
class PolySetScissorCommand : public PolyDrawerCommand
{
public:
PolySetScissorCommand(int x, int y, int w, int h) : x(x), y(y), w(w), h(h) { }
void Execute(DrawerThread *thread) override { PolyTriangleThreadData::Get(thread)->SetScissor(x, y, w, h); }
private:
int x;
int y;
int w;
int h;
};
class PolySetRenderStyleCommand : public PolyDrawerCommand
{
public:
PolySetRenderStyleCommand(FRenderStyle style) : style(style) { }
void Execute(DrawerThread *thread) override { PolyTriangleThreadData::Get(thread)->SetRenderStyle(style); }
private:
FRenderStyle style;
};
class PolySetTextureCommand : public PolyDrawerCommand
{
public:
PolySetTextureCommand(int unit, void *pixels, int width, int height, bool bgra) : unit(unit), pixels(pixels), width(width), height(height), bgra(bgra) { }
void Execute(DrawerThread *thread) override { PolyTriangleThreadData::Get(thread)->SetTexture(unit, pixels, width, height, bgra); }
private:
int unit;
void *pixels;
int width;
int height;
bool bgra;
};
class PolySetShaderCommand : public PolyDrawerCommand
{
public:
PolySetShaderCommand(int specialEffect, int effectState, bool alphaTest) : specialEffect(specialEffect), effectState(effectState), alphaTest(alphaTest) { }
void Execute(DrawerThread *thread) override { PolyTriangleThreadData::Get(thread)->SetShader(specialEffect, effectState, alphaTest); }
private:
int specialEffect;
int effectState;
bool alphaTest;
};
class PolySetVertexBufferCommand : public PolyDrawerCommand
{
public:
PolySetVertexBufferCommand(const void *vertices) : vertices(vertices) { }
void Execute(DrawerThread *thread) override { PolyTriangleThreadData::Get(thread)->SetVertexBuffer(vertices); }
private:
const void *vertices;
};
class PolySetIndexBufferCommand : public PolyDrawerCommand
{
public:
PolySetIndexBufferCommand(const void *indices) : indices(indices) { }
void Execute(DrawerThread *thread) override { PolyTriangleThreadData::Get(thread)->SetIndexBuffer(indices); }
private:
const void *indices;
};
class PolySetLightBufferCommand : public PolyDrawerCommand
{
public:
PolySetLightBufferCommand(const void *lights) : lights(lights) { }
void Execute(DrawerThread *thread) override { PolyTriangleThreadData::Get(thread)->SetLightBuffer(lights); }
private:
const void *lights;
};
class PolySetInputAssemblyCommand : public PolyDrawerCommand
{
public:
PolySetInputAssemblyCommand(PolyInputAssembly *input) : input(input) { }
void Execute(DrawerThread *thread) override { PolyTriangleThreadData::Get(thread)->SetInputAssembly(input); }
private:
PolyInputAssembly *input;
};
class PolyClearDepthCommand : public PolyDrawerCommand
{
public:
PolyClearDepthCommand(float value) : value(value) { }
void Execute(DrawerThread *thread) override { PolyTriangleThreadData::Get(thread)->ClearDepth(value); }
private:
float value;
};
class PolyClearStencilCommand : public PolyDrawerCommand
{
public:
PolyClearStencilCommand(uint8_t value) : value(value) { }
void Execute(DrawerThread *thread) override { PolyTriangleThreadData::Get(thread)->ClearStencil(value); }
private:
uint8_t value;
};
class PolySetViewportCommand : public PolyDrawerCommand
{
public:
PolySetViewportCommand(int x, int y, int width, int height, uint8_t *dest, int dest_width, int dest_height, int dest_pitch, bool dest_bgra, PolyDepthStencil *depthstencil, bool topdown)
: x(x), y(y), width(width), height(height), dest(dest), dest_width(dest_width), dest_height(dest_height), dest_pitch(dest_pitch), dest_bgra(dest_bgra), depthstencil(depthstencil), topdown(topdown) { }
void Execute(DrawerThread *thread) override { PolyTriangleThreadData::Get(thread)->SetViewport(x, y, width, height, dest, dest_width, dest_height, dest_pitch, dest_bgra, depthstencil, topdown); }
private:
int x;
int y;
int width;
int height;
uint8_t *dest;
int dest_width;
int dest_height;
int dest_pitch;
bool dest_bgra;
PolyDepthStencil *depthstencil;
bool topdown;
};
class PolySetViewpointUniformsCommand : public PolyDrawerCommand
{
public:
PolySetViewpointUniformsCommand(const HWViewpointUniforms *uniforms) : uniforms(uniforms) {}
void Execute(DrawerThread *thread) override { PolyTriangleThreadData::Get(thread)->SetViewpointUniforms(uniforms); }
private:
const HWViewpointUniforms *uniforms;
};
class PolyPushMatricesCommand : public PolyDrawerCommand
{
public:
PolyPushMatricesCommand(const VSMatrix &modelMatrix, const VSMatrix &normalModelMatrix, const VSMatrix &textureMatrix)
: modelMatrix(modelMatrix), normalModelMatrix(normalModelMatrix), textureMatrix(textureMatrix) { }
void Execute(DrawerThread *thread) override { PolyTriangleThreadData::Get(thread)->PushMatrices(modelMatrix, normalModelMatrix, textureMatrix); }
private:
VSMatrix modelMatrix;
VSMatrix normalModelMatrix;
VSMatrix textureMatrix;
};
class PolyPushStreamDataCommand : public PolyDrawerCommand
{
public:
PolyPushStreamDataCommand(const StreamData &data, const PolyPushConstants &constants) : data(data), constants(constants) { }
void Execute(DrawerThread *thread) override { PolyTriangleThreadData::Get(thread)->PushStreamData(data, constants); }
private:
StreamData data;
PolyPushConstants constants;
};
class PolyDrawCommand : public PolyDrawerCommand
{
public:
PolyDrawCommand(int index, int count, PolyDrawMode mode) : index(index), count(count), mode(mode) { }
void Execute(DrawerThread *thread) override { PolyTriangleThreadData::Get(thread)->Draw(index, count, mode); }
private:
int index;
int count;
PolyDrawMode mode;
};
class PolyDrawIndexedCommand : public PolyDrawerCommand
{
public:
PolyDrawIndexedCommand(int index, int count, PolyDrawMode mode) : index(index), count(count), mode(mode) { }
void Execute(DrawerThread *thread) override { PolyTriangleThreadData::Get(thread)->DrawIndexed(index, count, mode); }
private:
int index;
int count;
PolyDrawMode mode;
};

View file

@ -0,0 +1,575 @@
/*
** Polygon Doom software renderer
** Copyright (c) 2016 Magnus Norddahl
**
** This software is provided 'as-is', without any express or implied
** warranty. In no event will the authors be held liable for any damages
** arising from the use of this software.
**
** Permission is granted to anyone to use this software for any purpose,
** including commercial applications, and to alter it and redistribute it
** freely, subject to the following restrictions:
**
** 1. The origin of this software must not be misrepresented; you must not
** claim that you wrote the original software. If you use this software
** in a product, an acknowledgment in the product documentation would be
** appreciated but is not required.
** 2. Altered source versions must be plainly marked as such, and must not be
** misrepresented as being the original software.
** 3. This notice may not be removed or altered from any source distribution.
**
*/
#include "screen_blend.h"
static const int shiftTable[] = {
0, 0, 0, 0, // STYLEALPHA_Zero
0, 0, 0, 0, // STYLEALPHA_One
24, 24, 24, 24, // STYLEALPHA_Src
24, 24, 24, 24, // STYLEALPHA_InvSrc
24, 16, 8, 0, // STYLEALPHA_SrcCol
24, 16, 8, 0, // STYLEALPHA_InvSrcCol
24, 16, 8, 0, // STYLEALPHA_DstCol
24, 16, 8, 0 // STYLEALPHA_InvDstCol
};
#if 1 //#ifndef USE_AVX2
template<typename OptT>
static void BlendColor(int y, int x0, int x1, PolyTriangleThreadData* thread)
{
FRenderStyle style = thread->RenderStyle;
bool invsrc = style.SrcAlpha & 1;
bool invdst = style.DestAlpha & 1;
const int* shiftsrc = shiftTable + (style.SrcAlpha << 2);
const int* shiftdst = shiftTable + (style.DestAlpha << 2);
uint32_t* dest = (uint32_t*)thread->dest;
uint32_t* line = dest + y * (ptrdiff_t)thread->dest_pitch;
uint32_t* fragcolor = thread->scanline.FragColor;
int srcSelect = style.SrcAlpha <= STYLEALPHA_One ? 0 : (style.SrcAlpha >= STYLEALPHA_DstCol ? 1 : 2);
int dstSelect = style.DestAlpha <= STYLEALPHA_One ? 0 : (style.DestAlpha >= STYLEALPHA_DstCol ? 1 : 2);
uint32_t inputs[3];
inputs[0] = 0;
for (int x = x0; x < x1; x++)
{
inputs[1] = line[x];
inputs[2] = fragcolor[x];
uint32_t srcinput = inputs[srcSelect];
uint32_t dstinput = inputs[dstSelect];
uint32_t out[4];
for (int i = 0; i < 4; i++)
{
// Grab component for scale factors
int32_t src = (srcinput >> shiftsrc[i]) & 0xff;
int32_t dst = (dstinput >> shiftdst[i]) & 0xff;
// Inverse if needed
if (invsrc) src = 0xff - src;
if (invdst) dst = 0xff - dst;
// Rescale 0-255 to 0-256
src = src + (src >> 7);
dst = dst + (dst >> 7);
// Multiply with input
src = src * ((inputs[2] >> (24 - (i << 3))) & 0xff);
dst = dst * ((inputs[1] >> (24 - (i << 3))) & 0xff);
// Apply blend operator
int32_t val;
if (OptT::Flags & SWBLEND_Sub)
{
val = src - dst;
}
else if (OptT::Flags & SWBLEND_RevSub)
{
val = dst - src;
}
else
{
val = src + dst;
}
out[i] = clamp((val + 127) >> 8, 0, 255);
}
line[x] = MAKEARGB(out[0], out[1], out[2], out[3]);
}
}
#else
template<typename OptT>
static void BlendColor(int y, int x0, int x1, PolyTriangleThreadData* thread)
{
FRenderStyle style = thread->RenderStyle;
bool invsrc = style.SrcAlpha & 1;
bool invdst = style.DestAlpha & 1;
__m128i shiftsrc = _mm_loadu_si128((const __m128i*)(shiftTable + (style.SrcAlpha << 2)));
__m128i shiftdst = _mm_loadu_si128((const __m128i*)(shiftTable + (style.DestAlpha << 2)));
uint32_t* dest = (uint32_t*)thread->dest;
uint32_t* line = dest + y * (ptrdiff_t)thread->dest_pitch;
uint32_t* fragcolor = thread->scanline.FragColor;
int srcSelect = style.SrcAlpha <= STYLEALPHA_One ? 0 : (style.SrcAlpha >= STYLEALPHA_DstCol ? 1 : 2);
int dstSelect = style.DestAlpha <= STYLEALPHA_One ? 0 : (style.DestAlpha >= STYLEALPHA_DstCol ? 1 : 2);
uint32_t inputs[3];
inputs[0] = 0;
__m128i shiftmul = _mm_set_epi32(24, 16, 8, 0);
for (int x = x0; x < x1; x++)
{
inputs[1] = line[x];
inputs[2] = fragcolor[x];
__m128i srcinput = _mm_set1_epi32(inputs[srcSelect]);
__m128i dstinput = _mm_set1_epi32(inputs[dstSelect]);
// Grab component for scale factors
__m128i src = _mm_and_si128(_mm_srlv_epi32(srcinput, shiftsrc), _mm_set1_epi32(0xff));
__m128i dst = _mm_and_si128(_mm_srlv_epi32(dstinput, shiftdst), _mm_set1_epi32(0xff));
// Inverse if needed
if (invsrc) src = _mm_sub_epi32(_mm_set1_epi32(0xff), src);
if (invdst) dst = _mm_sub_epi32(_mm_set1_epi32(0xff), dst);
// Rescale 0-255 to 0-256
src = _mm_add_epi32(src, _mm_srli_epi32(src, 7));
dst = _mm_add_epi32(dst, _mm_srli_epi32(dst, 7));
// Multiply with input
__m128i mulsrc = _mm_and_si128(_mm_srlv_epi32(_mm_set1_epi32(inputs[2]), shiftmul), _mm_set1_epi32(0xff));
__m128i muldst = _mm_and_si128(_mm_srlv_epi32(_mm_set1_epi32(inputs[1]), shiftmul), _mm_set1_epi32(0xff));
__m128i mulresult = _mm_mullo_epi16(_mm_packs_epi32(src, dst), _mm_packs_epi32(mulsrc, muldst));
src = _mm_unpacklo_epi16(mulresult, _mm_setzero_si128());
dst = _mm_unpackhi_epi16(mulresult, _mm_setzero_si128());
// Apply blend operator
__m128i val;
if (OptT::Flags & SWBLEND_Sub)
{
val = _mm_sub_epi32(src, dst);
}
else if (OptT::Flags & SWBLEND_RevSub)
{
val = _mm_sub_epi32(dst, src);
}
else
{
val = _mm_add_epi32(src, dst);
}
__m128i out = _mm_srli_epi32(_mm_add_epi32(val, _mm_set1_epi32(127)), 8);
out = _mm_packs_epi32(out, out);
out = _mm_packus_epi16(out, out);
line[x] = _mm_cvtsi128_si32(out);
}
}
#endif
#ifdef NO_SSE
static void BlendColorOpaque(int y, int x0, int x1, PolyTriangleThreadData* thread)
{
uint32_t* dest = (uint32_t*)thread->dest;
uint32_t* line = dest + y * (ptrdiff_t)thread->dest_pitch;
uint32_t* fragcolor = thread->scanline.FragColor;
memcpy(line + x0, fragcolor + x0, (x1 - x0) * sizeof(uint32_t));
}
#else
static void BlendColorOpaque(int y, int x0, int x1, PolyTriangleThreadData* thread)
{
uint32_t* dest = (uint32_t*)thread->dest;
uint32_t* line = dest + y * (ptrdiff_t)thread->dest_pitch;
uint32_t* fragcolor = thread->scanline.FragColor;
int ssecount = ((x1 - x0) & ~3);
int sseend = x0 + ssecount;
for (int x = x0; x < sseend; x += 4)
{
__m128i v = _mm_loadu_si128((__m128i*) & fragcolor[x]);
_mm_storeu_si128((__m128i*) & line[x], v);
}
for (int x = sseend; x < x1; x++)
{
line[x] = fragcolor[x];
}
}
#endif
static void BlendColorAdd_Src_InvSrc(int y, int x0, int x1, PolyTriangleThreadData* thread)
{
uint32_t* line = (uint32_t*)thread->dest + y * (ptrdiff_t)thread->dest_pitch;
uint32_t* fragcolor = thread->scanline.FragColor;
int sseend = x0;
#ifndef NO_SSE
int ssecount = ((x1 - x0) & ~1);
sseend = x0 + ssecount;
for (int x = x0; x < sseend; x += 2)
{
__m128i dst = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)&line[x]), _mm_setzero_si128());
__m128i src = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)&fragcolor[x]), _mm_setzero_si128());
__m128i srcscale = _mm_shufflehi_epi16(_mm_shufflelo_epi16(src, _MM_SHUFFLE(3, 3, 3, 3)), _MM_SHUFFLE(3, 3, 3, 3));
srcscale = _mm_add_epi16(srcscale, _mm_srli_epi16(srcscale, 7));
__m128i dstscale = _mm_sub_epi16(_mm_set1_epi16(256), srcscale);
__m128i out = _mm_srli_epi16(_mm_add_epi16(_mm_add_epi16(_mm_mullo_epi16(src, srcscale), _mm_mullo_epi16(dst, dstscale)), _mm_set1_epi16(127)), 8);
_mm_storel_epi64((__m128i*)&line[x], _mm_packus_epi16(out, out));
}
#endif
for (int x = sseend; x < x1; x++)
{
uint32_t dst = line[x];
uint32_t src = fragcolor[x];
uint32_t srcscale = APART(src);
srcscale += srcscale >> 7;
uint32_t dstscale = 256 - srcscale;
uint32_t a = ((APART(src) * srcscale + APART(dst) * dstscale) + 127) >> 8;
uint32_t r = ((RPART(src) * srcscale + RPART(dst) * dstscale) + 127) >> 8;
uint32_t g = ((GPART(src) * srcscale + GPART(dst) * dstscale) + 127) >> 8;
uint32_t b = ((BPART(src) * srcscale + BPART(dst) * dstscale) + 127) >> 8;
line[x] = MAKEARGB(a, r, g, b);
}
}
static void BlendColorAdd_SrcCol_InvSrcCol(int y, int x0, int x1, PolyTriangleThreadData* thread)
{
uint32_t* line = (uint32_t*)thread->dest + y * (ptrdiff_t)thread->dest_pitch;
uint32_t* fragcolor = thread->scanline.FragColor;
int sseend = x0;
#ifndef NO_SSE
int ssecount = ((x1 - x0) & ~1);
sseend = x0 + ssecount;
for (int x = x0; x < sseend; x += 2)
{
__m128i dst = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*) & line[x]), _mm_setzero_si128());
__m128i src = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*) & fragcolor[x]), _mm_setzero_si128());
__m128i srcscale = src;
srcscale = _mm_add_epi16(srcscale, _mm_srli_epi16(srcscale, 7));
__m128i dstscale = _mm_sub_epi16(_mm_set1_epi16(256), srcscale);
__m128i out = _mm_srli_epi16(_mm_add_epi16(_mm_add_epi16(_mm_mullo_epi16(src, srcscale), _mm_mullo_epi16(dst, dstscale)), _mm_set1_epi16(127)), 8);
_mm_storel_epi64((__m128i*) & line[x], _mm_packus_epi16(out, out));
}
#endif
for (int x = sseend; x < x1; x++)
{
uint32_t dst = line[x];
uint32_t src = fragcolor[x];
uint32_t srcscale_a = APART(src);
uint32_t srcscale_r = RPART(src);
uint32_t srcscale_g = GPART(src);
uint32_t srcscale_b = BPART(src);
srcscale_a += srcscale_a >> 7;
srcscale_r += srcscale_r >> 7;
srcscale_g += srcscale_g >> 7;
srcscale_b += srcscale_b >> 7;
uint32_t dstscale_a = 256 - srcscale_a;
uint32_t dstscale_r = 256 - srcscale_r;
uint32_t dstscale_g = 256 - srcscale_g;
uint32_t dstscale_b = 256 - srcscale_b;
uint32_t a = ((APART(src) * srcscale_a + APART(dst) * dstscale_a) + 127) >> 8;
uint32_t r = ((RPART(src) * srcscale_r + RPART(dst) * dstscale_r) + 127) >> 8;
uint32_t g = ((GPART(src) * srcscale_g + GPART(dst) * dstscale_g) + 127) >> 8;
uint32_t b = ((BPART(src) * srcscale_b + BPART(dst) * dstscale_b) + 127) >> 8;
line[x] = MAKEARGB(a, r, g, b);
}
}
static void BlendColorAdd_Src_One(int y, int x0, int x1, PolyTriangleThreadData* thread)
{
uint32_t* line = (uint32_t*)thread->dest + y * (ptrdiff_t)thread->dest_pitch;
uint32_t* fragcolor = thread->scanline.FragColor;
int sseend = x0;
#ifndef NO_SSE
int ssecount = ((x1 - x0) & ~1);
sseend = x0 + ssecount;
for (int x = x0; x < sseend; x += 2)
{
__m128i dst = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*) & line[x]), _mm_setzero_si128());
__m128i src = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*) & fragcolor[x]), _mm_setzero_si128());
__m128i srcscale = _mm_shufflehi_epi16(_mm_shufflelo_epi16(src, _MM_SHUFFLE(3, 3, 3, 3)), _MM_SHUFFLE(3, 3, 3, 3));
srcscale = _mm_add_epi16(srcscale, _mm_srli_epi16(srcscale, 7));
__m128i out = _mm_add_epi16(_mm_srli_epi16(_mm_add_epi16(_mm_mullo_epi16(src, srcscale), _mm_set1_epi16(127)), 8), dst);
_mm_storel_epi64((__m128i*) & line[x], _mm_packus_epi16(out, out));
}
#endif
for (int x = sseend; x < x1; x++)
{
uint32_t dst = line[x];
uint32_t src = fragcolor[x];
uint32_t srcscale = APART(src);
srcscale += srcscale >> 7;
uint32_t a = MIN<int32_t>((((APART(src) * srcscale) + 127) >> 8) + APART(dst), 255);
uint32_t r = MIN<int32_t>((((RPART(src) * srcscale) + 127) >> 8) + RPART(dst), 255);
uint32_t g = MIN<int32_t>((((GPART(src) * srcscale) + 127) >> 8) + GPART(dst), 255);
uint32_t b = MIN<int32_t>((((BPART(src) * srcscale) + 127) >> 8) + BPART(dst), 255);
line[x] = MAKEARGB(a, r, g, b);
}
}
static void BlendColorAdd_SrcCol_One(int y, int x0, int x1, PolyTriangleThreadData* thread)
{
uint32_t* line = (uint32_t*)thread->dest + y * (ptrdiff_t)thread->dest_pitch;
uint32_t* fragcolor = thread->scanline.FragColor;
int sseend = x0;
#ifndef NO_SSE
int ssecount = ((x1 - x0) & ~1);
sseend = x0 + ssecount;
for (int x = x0; x < sseend; x += 2)
{
__m128i dst = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*) & line[x]), _mm_setzero_si128());
__m128i src = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*) & fragcolor[x]), _mm_setzero_si128());
__m128i srcscale = src;
srcscale = _mm_add_epi16(srcscale, _mm_srli_epi16(srcscale, 7));
__m128i out = _mm_add_epi16(_mm_srli_epi16(_mm_add_epi16(_mm_mullo_epi16(src, srcscale), _mm_set1_epi16(127)), 8), dst);
_mm_storel_epi64((__m128i*) & line[x], _mm_packus_epi16(out, out));
}
#endif
for (int x = sseend; x < x1; x++)
{
uint32_t dst = line[x];
uint32_t src = fragcolor[x];
uint32_t srcscale_a = APART(src);
uint32_t srcscale_r = RPART(src);
uint32_t srcscale_g = GPART(src);
uint32_t srcscale_b = BPART(src);
srcscale_a += srcscale_a >> 7;
srcscale_r += srcscale_r >> 7;
srcscale_g += srcscale_g >> 7;
srcscale_b += srcscale_b >> 7;
uint32_t a = MIN<int32_t>((((APART(src) * srcscale_a) + 127) >> 8) + APART(dst), 255);
uint32_t r = MIN<int32_t>((((RPART(src) * srcscale_r) + 127) >> 8) + RPART(dst), 255);
uint32_t g = MIN<int32_t>((((GPART(src) * srcscale_g) + 127) >> 8) + GPART(dst), 255);
uint32_t b = MIN<int32_t>((((BPART(src) * srcscale_b) + 127) >> 8) + BPART(dst), 255);
line[x] = MAKEARGB(a, r, g, b);
}
}
static void BlendColorAdd_DstCol_Zero(int y, int x0, int x1, PolyTriangleThreadData* thread)
{
uint32_t* line = (uint32_t*)thread->dest + y * (ptrdiff_t)thread->dest_pitch;
uint32_t* fragcolor = thread->scanline.FragColor;
int sseend = x0;
#ifndef NO_SSE
int ssecount = ((x1 - x0) & ~1);
sseend = x0 + ssecount;
for (int x = x0; x < sseend; x += 2)
{
__m128i dst = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*) & line[x]), _mm_setzero_si128());
__m128i src = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*) & fragcolor[x]), _mm_setzero_si128());
__m128i srcscale = dst;
srcscale = _mm_add_epi16(srcscale, _mm_srli_epi16(srcscale, 7));
__m128i out = _mm_srli_epi16(_mm_add_epi16(_mm_mullo_epi16(src, srcscale), _mm_set1_epi16(127)), 8);
_mm_storel_epi64((__m128i*) & line[x], _mm_packus_epi16(out, out));
}
#endif
for (int x = sseend; x < x1; x++)
{
uint32_t dst = line[x];
uint32_t src = fragcolor[x];
uint32_t srcscale_a = APART(dst);
uint32_t srcscale_r = RPART(dst);
uint32_t srcscale_g = GPART(dst);
uint32_t srcscale_b = BPART(dst);
srcscale_a += srcscale_a >> 7;
srcscale_r += srcscale_r >> 7;
srcscale_g += srcscale_g >> 7;
srcscale_b += srcscale_b >> 7;
uint32_t a = (((APART(src) * srcscale_a) + 127) >> 8);
uint32_t r = (((RPART(src) * srcscale_r) + 127) >> 8);
uint32_t g = (((GPART(src) * srcscale_g) + 127) >> 8);
uint32_t b = (((BPART(src) * srcscale_b) + 127) >> 8);
line[x] = MAKEARGB(a, r, g, b);
}
}
static void BlendColorAdd_InvDstCol_Zero(int y, int x0, int x1, PolyTriangleThreadData* thread)
{
uint32_t* line = (uint32_t*)thread->dest + y * (ptrdiff_t)thread->dest_pitch;
uint32_t* fragcolor = thread->scanline.FragColor;
int sseend = x0;
#ifndef NO_SSE
int ssecount = ((x1 - x0) & ~1);
sseend = x0 + ssecount;
for (int x = x0; x < sseend; x += 2)
{
__m128i dst = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*) & line[x]), _mm_setzero_si128());
__m128i src = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*) & fragcolor[x]), _mm_setzero_si128());
__m128i srcscale = _mm_sub_epi16(_mm_set1_epi16(255), dst);
srcscale = _mm_add_epi16(srcscale, _mm_srli_epi16(srcscale, 7));
__m128i out = _mm_srli_epi16(_mm_add_epi16(_mm_mullo_epi16(src, srcscale), _mm_set1_epi16(127)), 8);
_mm_storel_epi64((__m128i*) & line[x], _mm_packus_epi16(out, out));
}
#endif
for (int x = sseend; x < x1; x++)
{
uint32_t dst = line[x];
uint32_t src = fragcolor[x];
uint32_t srcscale_a = 255 - APART(dst);
uint32_t srcscale_r = 255 - RPART(dst);
uint32_t srcscale_g = 255 - GPART(dst);
uint32_t srcscale_b = 255 - BPART(dst);
srcscale_a += srcscale_a >> 7;
srcscale_r += srcscale_r >> 7;
srcscale_g += srcscale_g >> 7;
srcscale_b += srcscale_b >> 7;
uint32_t a = (((APART(src) * srcscale_a) + 127) >> 8);
uint32_t r = (((RPART(src) * srcscale_r) + 127) >> 8);
uint32_t g = (((GPART(src) * srcscale_g) + 127) >> 8);
uint32_t b = (((BPART(src) * srcscale_b) + 127) >> 8);
line[x] = MAKEARGB(a, r, g, b);
}
}
static void BlendColorRevSub_Src_One(int y, int x0, int x1, PolyTriangleThreadData* thread)
{
uint32_t* line = (uint32_t*)thread->dest + y * (ptrdiff_t)thread->dest_pitch;
uint32_t* fragcolor = thread->scanline.FragColor;
int sseend = x0;
#ifndef NO_SSE
int ssecount = ((x1 - x0) & ~1);
sseend = x0 + ssecount;
for (int x = x0; x < sseend; x += 2)
{
__m128i dst = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*) & line[x]), _mm_setzero_si128());
__m128i src = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*) & fragcolor[x]), _mm_setzero_si128());
__m128i srcscale = _mm_shufflehi_epi16(_mm_shufflelo_epi16(src, _MM_SHUFFLE(3, 3, 3, 3)), _MM_SHUFFLE(3, 3, 3, 3));
srcscale = _mm_add_epi16(srcscale, _mm_srli_epi16(srcscale, 7));
__m128i out = _mm_sub_epi16(dst, _mm_srli_epi16(_mm_add_epi16(_mm_mullo_epi16(src, srcscale), _mm_set1_epi16(127)), 8));
_mm_storel_epi64((__m128i*) & line[x], _mm_packus_epi16(out, out));
}
#endif
for (int x = sseend; x < x1; x++)
{
uint32_t dst = line[x];
uint32_t src = fragcolor[x];
uint32_t srcscale = APART(src);
srcscale += srcscale >> 7;
uint32_t a = MAX<int32_t>(APART(dst) - (((APART(src) * srcscale) + 127) >> 8), 0);
uint32_t r = MAX<int32_t>(RPART(dst) - (((RPART(src) * srcscale) + 127) >> 8), 0);
uint32_t g = MAX<int32_t>(GPART(dst) - (((GPART(src) * srcscale) + 127) >> 8), 0);
uint32_t b = MAX<int32_t>(BPART(dst) - (((BPART(src) * srcscale) + 127) >> 8), 0);
line[x] = MAKEARGB(a, r, g, b);
}
}
void SelectWriteColorFunc(PolyTriangleThreadData* thread)
{
FRenderStyle style = thread->RenderStyle;
if (style.BlendOp == STYLEOP_Add)
{
if (style.SrcAlpha == STYLEALPHA_One && style.DestAlpha == STYLEALPHA_Zero)
{
thread->WriteColorFunc = &BlendColorOpaque;
}
else if (style.SrcAlpha == STYLEALPHA_Src && style.DestAlpha == STYLEALPHA_InvSrc)
{
thread->WriteColorFunc = &BlendColorAdd_Src_InvSrc;
}
else if (style.SrcAlpha == STYLEALPHA_SrcCol && style.DestAlpha == STYLEALPHA_InvSrcCol)
{
thread->WriteColorFunc = &BlendColorAdd_SrcCol_InvSrcCol;
}
else if (style.SrcAlpha == STYLEALPHA_Src && style.DestAlpha == STYLEALPHA_One)
{
thread->WriteColorFunc = &BlendColorAdd_Src_One;
}
else if (style.SrcAlpha == STYLEALPHA_SrcCol && style.DestAlpha == STYLEALPHA_One)
{
thread->WriteColorFunc = &BlendColorAdd_SrcCol_One;
}
else if (style.SrcAlpha == STYLEALPHA_DstCol && style.DestAlpha == STYLEALPHA_Zero)
{
thread->WriteColorFunc = &BlendColorAdd_DstCol_Zero;
}
else if (style.SrcAlpha == STYLEALPHA_InvDstCol && style.DestAlpha == STYLEALPHA_Zero)
{
thread->WriteColorFunc = &BlendColorAdd_InvDstCol_Zero;
}
else
{
thread->WriteColorFunc = &BlendColor<BlendColorOpt_Add>;
}
}
else if (style.BlendOp == STYLEOP_Sub)
{
thread->WriteColorFunc = &BlendColor<BlendColorOpt_Sub>;
}
else // if (style.BlendOp == STYLEOP_RevSub)
{
if (style.SrcAlpha == STYLEALPHA_Src && style.DestAlpha == STYLEALPHA_One)
{
thread->WriteColorFunc = &BlendColorRevSub_Src_One;
}
else
{
thread->WriteColorFunc = &BlendColor<BlendColorOpt_RevSub>;
}
}
}

View file

@ -0,0 +1,49 @@
/*
** Polygon Doom software renderer
** Copyright (c) 2016 Magnus Norddahl
**
** This software is provided 'as-is', without any express or implied
** warranty. In no event will the authors be held liable for any damages
** arising from the use of this software.
**
** Permission is granted to anyone to use this software for any purpose,
** including commercial applications, and to alter it and redistribute it
** freely, subject to the following restrictions:
**
** 1. The origin of this software must not be misrepresented; you must not
** claim that you wrote the original software. If you use this software
** in a product, an acknowledgment in the product documentation would be
** appreciated but is not required.
** 2. Altered source versions must be plainly marked as such, and must not be
** misrepresented as being the original software.
** 3. This notice may not be removed or altered from any source distribution.
**
*/
#pragma once
class PolyTriangleThreadData;
enum SWBlendColor
{
SWBLEND_Sub = 1,
SWBLEND_RevSub = 2
};
struct BlendColorOpt_Add { static const int Flags = 0; };
struct BlendColorOpt_Sub { static const int Flags = 1; };
struct BlendColorOpt_RevSub { static const int Flags = 2; };
template<typename OptT>
void BlendColor(int y, int x0, int x1, PolyTriangleThreadData* thread);
void BlendColorOpaque(int y, int x0, int x1, PolyTriangleThreadData* thread);
void BlendColorOpaque(int y, int x0, int x1, PolyTriangleThreadData* thread);
void BlendColorAdd_Src_InvSrc(int y, int x0, int x1, PolyTriangleThreadData* thread);
void BlendColorAdd_SrcCol_InvSrcCol(int y, int x0, int x1, PolyTriangleThreadData* thread);
void BlendColorAdd_Src_One(int y, int x0, int x1, PolyTriangleThreadData* thread);
void BlendColorAdd_SrcCol_One(int y, int x0, int x1, PolyTriangleThreadData* thread);
void BlendColorAdd_DstCol_Zero(int y, int x0, int x1, PolyTriangleThreadData* thread);
void BlendColorAdd_InvDstCol_Zero(int y, int x0, int x1, PolyTriangleThreadData* thread);
void BlendColorRevSub_Src_One(int y, int x0, int x1, PolyTriangleThreadData* thread);
void SelectWriteColorFunc(PolyTriangleThreadData* thread);

View file

@ -0,0 +1,420 @@
/*
** Polygon Doom software renderer
** Copyright (c) 2016 Magnus Norddahl
**
** This software is provided 'as-is', without any express or implied
** warranty. In no event will the authors be held liable for any damages
** arising from the use of this software.
**
** Permission is granted to anyone to use this software for any purpose,
** including commercial applications, and to alter it and redistribute it
** freely, subject to the following restrictions:
**
** 1. The origin of this software must not be misrepresented; you must not
** claim that you wrote the original software. If you use this software
** in a product, an acknowledgment in the product documentation would be
** appreciated but is not required.
** 2. Altered source versions must be plainly marked as such, and must not be
** misrepresented as being the original software.
** 3. This notice may not be removed or altered from any source distribution.
**
*/
#include <stddef.h>
#include "templates.h"
#include "doomdef.h"
#include "poly_thread.h"
#include "screen_scanline_setup.h"
#include "x86.h"
#include <cmath>
#ifdef NO_SSE
void WriteW(int y, int x0, int x1, const TriDrawTriangleArgs* args, PolyTriangleThreadData* thread)
{
float startX = x0 + (0.5f - args->v1->x);
float startY = y + (0.5f - args->v1->y);
float posW = args->v1->w + args->gradientX.W * startX + args->gradientY.W * startY;
float stepW = args->gradientX.W;
float* w = thread->scanline.W;
for (int x = x0; x < x1; x++)
{
w[x] = 1.0f / posW;
posW += stepW;
}
}
#else
void WriteW(int y, int x0, int x1, const TriDrawTriangleArgs* args, PolyTriangleThreadData* thread)
{
float startX = x0 + (0.5f - args->v1->x);
float startY = y + (0.5f - args->v1->y);
float posW = args->v1->w + args->gradientX.W * startX + args->gradientY.W * startY;
float stepW = args->gradientX.W;
float* w = thread->scanline.W;
int ssecount = ((x1 - x0) & ~3);
int sseend = x0 + ssecount;
__m128 mstepW = _mm_set1_ps(stepW * 4.0f);
__m128 mposW = _mm_setr_ps(posW, posW + stepW, posW + stepW + stepW, posW + stepW + stepW + stepW);
for (int x = x0; x < sseend; x += 4)
{
// One Newton-Raphson iteration for 1/posW
__m128 res = _mm_rcp_ps(mposW);
__m128 muls = _mm_mul_ps(mposW, _mm_mul_ps(res, res));
_mm_storeu_ps(w + x, _mm_sub_ps(_mm_add_ps(res, res), muls));
mposW = _mm_add_ps(mposW, mstepW);
}
posW += ssecount * stepW;
for (int x = sseend; x < x1; x++)
{
w[x] = 1.0f / posW;
posW += stepW;
}
}
#endif
static void WriteDynLightArray(int x0, int x1, PolyTriangleThreadData* thread)
{
int num_lights = thread->numPolyLights;
PolyLight* lights = thread->polyLights;
float worldnormalX = thread->mainVertexShader.vWorldNormal.X;
float worldnormalY = thread->mainVertexShader.vWorldNormal.Y;
float worldnormalZ = thread->mainVertexShader.vWorldNormal.Z;
uint32_t* dynlights = thread->scanline.dynlights;
float* worldposX = thread->scanline.WorldX;
float* worldposY = thread->scanline.WorldY;
float* worldposZ = thread->scanline.WorldZ;
int sseend = x0;
#ifndef NO_SSE
int ssecount = ((x1 - x0) & ~3);
sseend = x0 + ssecount;
__m128 mworldnormalX = _mm_set1_ps(worldnormalX);
__m128 mworldnormalY = _mm_set1_ps(worldnormalY);
__m128 mworldnormalZ = _mm_set1_ps(worldnormalZ);
for (int x = x0; x < sseend; x += 4)
{
__m128i litlo = _mm_setzero_si128();
//__m128i litlo = _mm_shuffle_epi32(_mm_unpacklo_epi8(_mm_cvtsi32_si128(dynlightcolor), _mm_setzero_si128()), _MM_SHUFFLE(1, 0, 1, 0));
__m128i lithi = litlo;
for (int i = 0; i < num_lights; i++)
{
__m128 lightposX = _mm_set1_ps(lights[i].x);
__m128 lightposY = _mm_set1_ps(lights[i].y);
__m128 lightposZ = _mm_set1_ps(lights[i].z);
__m128 light_radius = _mm_set1_ps(lights[i].radius);
__m128i light_color = _mm_shuffle_epi32(_mm_unpacklo_epi8(_mm_cvtsi32_si128(lights[i].color), _mm_setzero_si128()), _MM_SHUFFLE(1, 0, 1, 0));
__m128 is_attenuated = _mm_cmplt_ps(light_radius, _mm_setzero_ps());
light_radius = _mm_andnot_ps(_mm_set1_ps(-0.0f), light_radius); // clear sign bit
// L = light-pos
// dist = sqrt(dot(L, L))
// distance_attenuation = 1 - MIN(dist * (1/radius), 1)
__m128 Lx = _mm_sub_ps(lightposX, _mm_loadu_ps(&worldposX[x]));
__m128 Ly = _mm_sub_ps(lightposY, _mm_loadu_ps(&worldposY[x]));
__m128 Lz = _mm_sub_ps(lightposZ, _mm_loadu_ps(&worldposZ[x]));
__m128 dist2 = _mm_add_ps(_mm_mul_ps(Lx, Lx), _mm_add_ps(_mm_mul_ps(Ly, Ly), _mm_mul_ps(Lz, Lz)));
__m128 rcp_dist = _mm_rsqrt_ps(dist2);
__m128 dist = _mm_mul_ps(dist2, rcp_dist);
__m128 distance_attenuation = _mm_sub_ps(_mm_set1_ps(256.0f), _mm_min_ps(_mm_mul_ps(dist, light_radius), _mm_set1_ps(256.0f)));
// The simple light type
__m128 simple_attenuation = distance_attenuation;
// The point light type
// diffuse = max(dot(N,normalize(L)),0) * attenuation
Lx = _mm_mul_ps(Lx, rcp_dist);
Ly = _mm_mul_ps(Ly, rcp_dist);
Lz = _mm_mul_ps(Lz, rcp_dist);
__m128 dotNL = _mm_add_ps(_mm_add_ps(_mm_mul_ps(mworldnormalX, Lx), _mm_mul_ps(mworldnormalY, Ly)), _mm_mul_ps(mworldnormalZ, Lz));
__m128 point_attenuation = _mm_mul_ps(_mm_max_ps(dotNL, _mm_setzero_ps()), distance_attenuation);
__m128i attenuation = _mm_cvtps_epi32(_mm_or_ps(_mm_and_ps(is_attenuated, point_attenuation), _mm_andnot_ps(is_attenuated, simple_attenuation)));
attenuation = _mm_shufflehi_epi16(_mm_shufflelo_epi16(attenuation, _MM_SHUFFLE(2, 2, 0, 0)), _MM_SHUFFLE(2, 2, 0, 0));
__m128i attenlo = _mm_shuffle_epi32(attenuation, _MM_SHUFFLE(1, 1, 0, 0));
__m128i attenhi = _mm_shuffle_epi32(attenuation, _MM_SHUFFLE(3, 3, 2, 2));
litlo = _mm_add_epi16(litlo, _mm_srli_epi16(_mm_mullo_epi16(light_color, attenlo), 8));
lithi = _mm_add_epi16(lithi, _mm_srli_epi16(_mm_mullo_epi16(light_color, attenhi), 8));
}
_mm_storeu_si128((__m128i*)&dynlights[x], _mm_packus_epi16(litlo, lithi));
}
#endif
for (int x = x0; x < x1; x++)
{
uint32_t lit_r = 0;
uint32_t lit_g = 0;
uint32_t lit_b = 0;
for (int i = 0; i < num_lights; i++)
{
float lightposX = lights[i].x;
float lightposY = lights[i].y;
float lightposZ = lights[i].z;
float light_radius = lights[i].radius;
uint32_t light_color = lights[i].color;
bool is_attenuated = light_radius < 0.0f;
if (is_attenuated)
light_radius = -light_radius;
// L = light-pos
// dist = sqrt(dot(L, L))
// distance_attenuation = 1 - MIN(dist * (1/radius), 1)
float Lx = lightposX - worldposX[x];
float Ly = lightposY - worldposY[x];
float Lz = lightposZ - worldposZ[x];
float dist2 = Lx * Lx + Ly * Ly + Lz * Lz;
#ifdef NO_SSE
//float rcp_dist = 1.0f / sqrt(dist2);
float rcp_dist = 1.0f / (dist2 * 0.01f);
#else
float rcp_dist = _mm_cvtss_f32(_mm_rsqrt_ss(_mm_set_ss(dist2)));
#endif
float dist = dist2 * rcp_dist;
float distance_attenuation = 256.0f - MIN(dist * light_radius, 256.0f);
// The simple light type
float simple_attenuation = distance_attenuation;
// The point light type
// diffuse = max(dot(N,normalize(L)),0) * attenuation
Lx *= rcp_dist;
Ly *= rcp_dist;
Lz *= rcp_dist;
float dotNL = worldnormalX * Lx + worldnormalY * Ly + worldnormalZ * Lz;
float point_attenuation = MAX(dotNL, 0.0f) * distance_attenuation;
uint32_t attenuation = (uint32_t)(is_attenuated ? (int32_t)point_attenuation : (int32_t)simple_attenuation);
lit_r += (RPART(light_color) * attenuation) >> 8;
lit_g += (GPART(light_color) * attenuation) >> 8;
lit_b += (BPART(light_color) * attenuation) >> 8;
}
lit_r = MIN<uint32_t>(lit_r, 255);
lit_g = MIN<uint32_t>(lit_g, 255);
lit_b = MIN<uint32_t>(lit_b, 255);
dynlights[x] = MAKEARGB(255, lit_r, lit_g, lit_b);
// Palette version:
// dynlights[x] = RGB256k.All[((lit_r >> 2) << 12) | ((lit_g >> 2) << 6) | (lit_b >> 2)];
}
}
static void WriteLightArray(int y, int x0, int x1, const TriDrawTriangleArgs* args, PolyTriangleThreadData* thread)
{
float startX = x0 + (0.5f - args->v1->x);
float startY = y + (0.5f - args->v1->y);
float posW = args->v1->w + args->gradientX.W * startX + args->gradientY.W * startY;
float stepW = args->gradientX.W;
float globVis = thread->mainVertexShader.Viewpoint->mGlobVis;
uint32_t light = (int)(thread->PushConstants->uLightLevel * 255.0f);
fixed_t shade = (fixed_t)((2.0f - (light + 12.0f) / 128.0f) * (float)FRACUNIT);
fixed_t lightpos = (fixed_t)(globVis * posW * (float)FRACUNIT);
fixed_t lightstep = (fixed_t)(globVis * stepW * (float)FRACUNIT);
fixed_t maxvis = 24 * FRACUNIT / 32;
fixed_t maxlight = 31 * FRACUNIT / 32;
uint16_t *lightarray = thread->scanline.lightarray;
fixed_t lightend = lightpos + lightstep * (x1 - x0);
if (lightpos < maxvis && shade >= lightpos && shade - lightpos <= maxlight &&
lightend < maxvis && shade >= lightend && shade - lightend <= maxlight)
{
//if (BitsPerPixel == 32)
{
lightpos += FRACUNIT - shade;
for (int x = x0; x < x1; x++)
{
lightarray[x] = lightpos >> 8;
lightpos += lightstep;
}
}
/*else
{
lightpos = shade - lightpos;
for (int x = x0; x < x1; x++)
{
lightarray[x] = (lightpos >> 3) & 0xffffff00;
lightpos -= lightstep;
}
}*/
}
else
{
//if (BitsPerPixel == 32)
{
for (int x = x0; x < x1; x++)
{
lightarray[x] = (FRACUNIT - clamp<fixed_t>(shade - MIN(maxvis, lightpos), 0, maxlight)) >> 8;
lightpos += lightstep;
}
}
/*else
{
for (int x = x0; x < x1; x++)
{
lightarray[x] = (clamp<fixed_t>(shade - MIN(maxvis, lightpos), 0, maxlight) >> 3) & 0xffffff00;
lightpos += lightstep;
}
}*/
}
}
#ifdef NO_SSE
static void WriteVarying(float pos, float step, int x0, int x1, const float* w, float* varying)
{
for (int x = x0; x < x1; x++)
{
varying[x] = pos * w[x];
pos += step;
}
}
#else
static void WriteVarying(float pos, float step, int x0, int x1, const float* w, float* varying)
{
int ssecount = ((x1 - x0) & ~3);
int sseend = x0 + ssecount;
__m128 mstep = _mm_set1_ps(step * 4.0f);
__m128 mpos = _mm_setr_ps(pos, pos + step, pos + step + step, pos + step + step + step);
for (int x = x0; x < sseend; x += 4)
{
_mm_storeu_ps(varying + x, _mm_mul_ps(mpos, _mm_loadu_ps(w + x)));
mpos = _mm_add_ps(mpos, mstep);
}
pos += ssecount * step;
for (int x = sseend; x < x1; x++)
{
varying[x] = pos * w[x];
pos += step;
}
}
#endif
#ifdef NO_SSE
static void WriteVaryingWrap(float pos, float step, int x0, int x1, const float* w, uint16_t* varying)
{
for (int x = x0; x < x1; x++)
{
float value = pos * w[x];
value = value - std::floor(value);
varying[x] = static_cast<uint32_t>(static_cast<int32_t>(value * static_cast<float>(0x1000'0000)) << 4) >> 16;
pos += step;
}
}
#else
static void WriteVaryingWrap(float pos, float step, int x0, int x1, const float* w, uint16_t* varying)
{
int ssecount = ((x1 - x0) & ~3);
int sseend = x0 + ssecount;
__m128 mstep = _mm_set1_ps(step * 4.0f);
__m128 mpos = _mm_setr_ps(pos, pos + step, pos + step + step, pos + step + step + step);
for (int x = x0; x < sseend; x += 4)
{
__m128 value = _mm_mul_ps(mpos, _mm_loadu_ps(w + x));
__m128 f = value;
__m128 t = _mm_cvtepi32_ps(_mm_cvttps_epi32(f));
__m128 r = _mm_sub_ps(t, _mm_and_ps(_mm_cmplt_ps(f, t), _mm_set1_ps(1.0f)));
value = _mm_sub_ps(f, r);
__m128i ivalue = _mm_srli_epi32(_mm_slli_epi32(_mm_cvttps_epi32(_mm_mul_ps(value, _mm_set1_ps(static_cast<float>(0x1000'0000)))), 4), 17);
_mm_storel_epi64((__m128i*)(varying + x), _mm_slli_epi16(_mm_packs_epi32(ivalue, ivalue), 1));
mpos = _mm_add_ps(mpos, mstep);
}
pos += ssecount * step;
for (int x = sseend; x < x1; x++)
{
float value = pos * w[x];
__m128 f = _mm_set_ss(value);
__m128 t = _mm_cvtepi32_ps(_mm_cvttps_epi32(f));
__m128 r = _mm_sub_ss(t, _mm_and_ps(_mm_cmplt_ps(f, t), _mm_set_ss(1.0f)));
value = _mm_cvtss_f32(_mm_sub_ss(f, r));
varying[x] = static_cast<uint32_t>(static_cast<int32_t>(value * static_cast<float>(0x1000'0000)) << 4) >> 16;
pos += step;
}
}
#endif
#ifdef NO_SSE
static void WriteVaryingColor(float pos, float step, int x0, int x1, const float* w, uint8_t* varying)
{
for (int x = x0; x < x1; x++)
{
varying[x] = clamp(static_cast<int>(pos * w[x] * 255.0f), 0, 255);
pos += step;
}
}
#else
static void WriteVaryingColor(float pos, float step, int x0, int x1, const float* w, uint8_t* varying)
{
int ssecount = ((x1 - x0) & ~3);
int sseend = x0 + ssecount;
__m128 mstep = _mm_set1_ps(step * 4.0f);
__m128 mpos = _mm_setr_ps(pos, pos + step, pos + step + step, pos + step + step + step);
for (int x = x0; x < sseend; x += 4)
{
__m128i value = _mm_cvttps_epi32(_mm_mul_ps(_mm_mul_ps(mpos, _mm_loadu_ps(w + x)), _mm_set1_ps(255.0f)));
value = _mm_packs_epi32(value, value);
value = _mm_packus_epi16(value, value);
*(uint32_t*)(varying + x) = _mm_cvtsi128_si32(value);
mpos = _mm_add_ps(mpos, mstep);
}
pos += ssecount * step;
for (int x = sseend; x < x1; x++)
{
varying[x] = clamp(static_cast<int>(pos * w[x] * 255.0f), 0, 255);
pos += step;
}
}
#endif
void WriteVaryings(int y, int x0, int x1, const TriDrawTriangleArgs* args, PolyTriangleThreadData* thread)
{
float startX = x0 + (0.5f - args->v1->x);
float startY = y + (0.5f - args->v1->y);
WriteVaryingWrap(args->v1->u * args->v1->w + args->gradientX.U * startX + args->gradientY.U * startY, args->gradientX.U, x0, x1, thread->scanline.W, thread->scanline.U);
WriteVaryingWrap(args->v1->v * args->v1->w + args->gradientX.V * startX + args->gradientY.V * startY, args->gradientX.V, x0, x1, thread->scanline.W, thread->scanline.V);
WriteVarying(args->v1->worldX * args->v1->w + args->gradientX.WorldX * startX + args->gradientY.WorldX * startY, args->gradientX.WorldX, x0, x1, thread->scanline.W, thread->scanline.WorldX);
WriteVarying(args->v1->worldY * args->v1->w + args->gradientX.WorldY * startX + args->gradientY.WorldY * startY, args->gradientX.WorldY, x0, x1, thread->scanline.W, thread->scanline.WorldY);
WriteVarying(args->v1->worldZ * args->v1->w + args->gradientX.WorldZ * startX + args->gradientY.WorldZ * startY, args->gradientX.WorldZ, x0, x1, thread->scanline.W, thread->scanline.WorldZ);
WriteVarying(args->v1->gradientdistZ * args->v1->w + args->gradientX.GradientdistZ * startX + args->gradientY.GradientdistZ * startY, args->gradientX.GradientdistZ, x0, x1, thread->scanline.W, thread->scanline.GradientdistZ);
WriteVaryingColor(args->v1->a * args->v1->w + args->gradientX.A * startX + args->gradientY.A * startY, args->gradientX.A, x0, x1, thread->scanline.W, thread->scanline.vColorA);
WriteVaryingColor(args->v1->r * args->v1->w + args->gradientX.R * startX + args->gradientY.R * startY, args->gradientX.R, x0, x1, thread->scanline.W, thread->scanline.vColorR);
WriteVaryingColor(args->v1->g * args->v1->w + args->gradientX.G * startX + args->gradientY.G * startY, args->gradientX.G, x0, x1, thread->scanline.W, thread->scanline.vColorG);
WriteVaryingColor(args->v1->b * args->v1->w + args->gradientX.B * startX + args->gradientY.B * startY, args->gradientX.B, x0, x1, thread->scanline.W, thread->scanline.vColorB);
if (thread->PushConstants->uLightLevel >= 0.0f)
WriteLightArray(y, x0, x1, args, thread);
if (thread->numPolyLights > 0)
WriteDynLightArray(x0, x1, thread);
}

View file

@ -0,0 +1,29 @@
/*
** Polygon Doom software renderer
** Copyright (c) 2016 Magnus Norddahl
**
** This software is provided 'as-is', without any express or implied
** warranty. In no event will the authors be held liable for any damages
** arising from the use of this software.
**
** Permission is granted to anyone to use this software for any purpose,
** including commercial applications, and to alter it and redistribute it
** freely, subject to the following restrictions:
**
** 1. The origin of this software must not be misrepresented; you must not
** claim that you wrote the original software. If you use this software
** in a product, an acknowledgment in the product documentation would be
** appreciated but is not required.
** 2. Altered source versions must be plainly marked as such, and must not be
** misrepresented as being the original software.
** 3. This notice may not be removed or altered from any source distribution.
**
*/
#pragma once
struct TriDrawTriangleArgs;
class PolyTriangleThreadData;
void WriteW(int y, int x0, int x1, const TriDrawTriangleArgs* args, PolyTriangleThreadData* thread);
void WriteVaryings(int y, int x0, int x1, const TriDrawTriangleArgs* args, PolyTriangleThreadData* thread);

View file

@ -0,0 +1,524 @@
/*
** Polygon Doom software renderer
** Copyright (c) 2016 Magnus Norddahl
**
** This software is provided 'as-is', without any express or implied
** warranty. In no event will the authors be held liable for any damages
** arising from the use of this software.
**
** Permission is granted to anyone to use this software for any purpose,
** including commercial applications, and to alter it and redistribute it
** freely, subject to the following restrictions:
**
** 1. The origin of this software must not be misrepresented; you must not
** claim that you wrote the original software. If you use this software
** in a product, an acknowledgment in the product documentation would be
** appreciated but is not required.
** 2. Altered source versions must be plainly marked as such, and must not be
** misrepresented as being the original software.
** 3. This notice may not be removed or altered from any source distribution.
**
*/
#include <stddef.h>
#include "templates.h"
#include "doomdef.h"
#include "poly_thread.h"
#include "screen_scanline_setup.h"
#include "x86.h"
#include <cmath>
static uint32_t SampleTexture(uint32_t u, uint32_t v, const void* texPixels, int texWidth, int texHeight, bool texBgra)
{
int texelX = (u * texWidth) >> 16;
int texelY = (v * texHeight) >> 16;
int texelOffset = texelX + texelY * texWidth;
if (texBgra)
{
return static_cast<const uint32_t*>(texPixels)[texelOffset];
}
else
{
uint32_t c = static_cast<const uint8_t*>(texPixels)[texelOffset];
return (c << 16) | 0xff000000;
}
}
static void EffectFogBoundary(int x0, int x1, PolyTriangleThreadData* thread)
{
uint32_t* fragcolor = thread->scanline.FragColor;
for (int x = x0; x < x1; x++)
{
/*float fogdist = pixelpos.w;
float fogfactor = exp2(uFogDensity * fogdist);
FragColor = vec4(uFogColor.rgb, 1.0 - fogfactor);*/
fragcolor[x] = 0;
}
}
static void EffectBurn(int x0, int x1, PolyTriangleThreadData* thread)
{
int texWidth = thread->textures[0].width;
int texHeight = thread->textures[0].height;
const void* texPixels = thread->textures[0].pixels;
bool texBgra = thread->textures[0].bgra;
int tex2Width = thread->textures[1].width;
int tex2Height = thread->textures[1].height;
const void* tex2Pixels = thread->textures[1].pixels;
bool tex2Bgra = thread->textures[1].bgra;
uint32_t* fragcolor = thread->scanline.FragColor;
uint16_t* u = thread->scanline.U;
uint16_t* v = thread->scanline.V;
for (int x = x0; x < x1; x++)
{
uint32_t frag_r = thread->scanline.vColorR[x];
uint32_t frag_g = thread->scanline.vColorG[x];
uint32_t frag_b = thread->scanline.vColorB[x];
uint32_t frag_a = thread->scanline.vColorA[x];
frag_r += frag_r >> 7; // 255 -> 256
frag_g += frag_g >> 7; // 255 -> 256
frag_b += frag_b >> 7; // 255 -> 256
frag_a += frag_a >> 7; // 255 -> 256
uint32_t t1 = SampleTexture(u[x], v[x], texPixels, texWidth, texHeight, texBgra);
uint32_t t2 = SampleTexture(u[x], 0xffff - v[x], tex2Pixels, tex2Width, tex2Height, tex2Bgra);
uint32_t r = (frag_r * RPART(t1)) >> 8;
uint32_t g = (frag_g * GPART(t1)) >> 8;
uint32_t b = (frag_b * BPART(t1)) >> 8;
uint32_t a = (frag_a * APART(t2)) >> 8;
fragcolor[x] = MAKEARGB(a, r, g, b);
}
}
static void EffectStencil(int x0, int x1, PolyTriangleThreadData* thread)
{
/*for (int x = x0; x < x1; x++)
{
fragcolor[x] = 0x00ffffff;
}*/
}
static void FuncPaletted(int x0, int x1, PolyTriangleThreadData* thread)
{
int texWidth = thread->textures[0].width;
int texHeight = thread->textures[0].height;
const void* texPixels = thread->textures[0].pixels;
bool texBgra = thread->textures[0].bgra;
const uint32_t* lut = (const uint32_t*)thread->textures[1].pixels;
uint32_t* fragcolor = thread->scanline.FragColor;
uint16_t* u = thread->scanline.U;
uint16_t* v = thread->scanline.V;
for (int x = x0; x < x1; x++)
{
fragcolor[x] = lut[RPART(SampleTexture(u[x], v[x], texPixels, texWidth, texHeight, texBgra))] | 0xff000000;
}
}
static void FuncNoTexture(int x0, int x1, PolyTriangleThreadData* thread)
{
auto& streamdata = thread->mainVertexShader.Data;
uint32_t a = (int)(streamdata.uObjectColor.a * 255.0f);
uint32_t r = (int)(streamdata.uObjectColor.r * 255.0f);
uint32_t g = (int)(streamdata.uObjectColor.g * 255.0f);
uint32_t b = (int)(streamdata.uObjectColor.b * 255.0f);
uint32_t texel = MAKEARGB(a, r, g, b);
if (streamdata.uDesaturationFactor > 0.0f)
{
uint32_t t = (int)(streamdata.uDesaturationFactor * 256.0f);
uint32_t inv_t = 256 - t;
uint32_t gray = (RPART(texel) * 77 + GPART(texel) * 143 + BPART(texel) * 37) >> 8;
texel = MAKEARGB(
APART(texel),
(RPART(texel) * inv_t + gray * t + 127) >> 8,
(GPART(texel) * inv_t + gray * t + 127) >> 8,
(BPART(texel) * inv_t + gray * t + 127) >> 8);
}
uint32_t* fragcolor = thread->scanline.FragColor;
for (int x = x0; x < x1; x++)
{
fragcolor[x] = texel;
}
}
static void FuncNormal(int x0, int x1, PolyTriangleThreadData* thread)
{
int texWidth = thread->textures[0].width;
int texHeight = thread->textures[0].height;
const void* texPixels = thread->textures[0].pixels;
bool texBgra = thread->textures[0].bgra;
uint32_t* fragcolor = thread->scanline.FragColor;
uint16_t* u = thread->scanline.U;
uint16_t* v = thread->scanline.V;
for (int x = x0; x < x1; x++)
{
uint32_t texel = SampleTexture(u[x], v[x], texPixels, texWidth, texHeight, texBgra);
fragcolor[x] = texel;
}
}
static void FuncNormal_Stencil(int x0, int x1, PolyTriangleThreadData* thread)
{
int texWidth = thread->textures[0].width;
int texHeight = thread->textures[0].height;
const void* texPixels = thread->textures[0].pixels;
bool texBgra = thread->textures[0].bgra;
uint32_t* fragcolor = thread->scanline.FragColor;
uint16_t* u = thread->scanline.U;
uint16_t* v = thread->scanline.V;
for (int x = x0; x < x1; x++)
{
uint32_t texel = SampleTexture(u[x], v[x], texPixels, texWidth, texHeight, texBgra);
fragcolor[x] = texel | 0x00ffffff;
}
}
static void FuncNormal_Opaque(int x0, int x1, PolyTriangleThreadData* thread)
{
int texWidth = thread->textures[0].width;
int texHeight = thread->textures[0].height;
const void* texPixels = thread->textures[0].pixels;
bool texBgra = thread->textures[0].bgra;
uint32_t* fragcolor = thread->scanline.FragColor;
uint16_t* u = thread->scanline.U;
uint16_t* v = thread->scanline.V;
for (int x = x0; x < x1; x++)
{
uint32_t texel = SampleTexture(u[x], v[x], texPixels, texWidth, texHeight, texBgra);
fragcolor[x] = texel | 0xff000000;
}
}
static void FuncNormal_Inverse(int x0, int x1, PolyTriangleThreadData* thread)
{
int texWidth = thread->textures[0].width;
int texHeight = thread->textures[0].height;
const void* texPixels = thread->textures[0].pixels;
bool texBgra = thread->textures[0].bgra;
uint32_t* fragcolor = thread->scanline.FragColor;
uint16_t* u = thread->scanline.U;
uint16_t* v = thread->scanline.V;
for (int x = x0; x < x1; x++)
{
uint32_t texel = SampleTexture(u[x], v[x], texPixels, texWidth, texHeight, texBgra);
fragcolor[x] = MAKEARGB(APART(texel), 0xff - RPART(texel), 0xff - BPART(texel), 0xff - GPART(texel));
}
}
static void FuncNormal_AlphaTexture(int x0, int x1, PolyTriangleThreadData* thread)
{
int texWidth = thread->textures[0].width;
int texHeight = thread->textures[0].height;
const void* texPixels = thread->textures[0].pixels;
bool texBgra = thread->textures[0].bgra;
uint32_t* fragcolor = thread->scanline.FragColor;
uint16_t* u = thread->scanline.U;
uint16_t* v = thread->scanline.V;
for (int x = x0; x < x1; x++)
{
uint32_t texel = SampleTexture(u[x], v[x], texPixels, texWidth, texHeight, texBgra);
uint32_t gray = (RPART(texel) * 77 + GPART(texel) * 143 + BPART(texel) * 37) >> 8;
uint32_t alpha = APART(texel);
alpha += alpha >> 7;
alpha = (alpha * gray + 127) >> 8;
texel = (alpha << 24) | 0x00ffffff;
fragcolor[x] = texel;
}
}
static void FuncNormal_ClampY(int x0, int x1, PolyTriangleThreadData* thread)
{
int texWidth = thread->textures[0].width;
int texHeight = thread->textures[0].height;
const void* texPixels = thread->textures[0].pixels;
bool texBgra = thread->textures[0].bgra;
uint32_t* fragcolor = thread->scanline.FragColor;
uint16_t* u = thread->scanline.U;
uint16_t* v = thread->scanline.V;
for (int x = x0; x < x1; x++)
{
fragcolor[x] = SampleTexture(u[x], v[x], texPixels, texWidth, texHeight, texBgra);
if (v[x] < 0.0 || v[x] > 1.0)
fragcolor[x] &= 0x00ffffff;
}
}
static void FuncNormal_InvertOpaque(int x0, int x1, PolyTriangleThreadData* thread)
{
int texWidth = thread->textures[0].width;
int texHeight = thread->textures[0].height;
const void* texPixels = thread->textures[0].pixels;
bool texBgra = thread->textures[0].bgra;
uint32_t* fragcolor = thread->scanline.FragColor;
uint16_t* u = thread->scanline.U;
uint16_t* v = thread->scanline.V;
for (int x = x0; x < x1; x++)
{
uint32_t texel = SampleTexture(u[x], v[x], texPixels, texWidth, texHeight, texBgra);
fragcolor[x] = MAKEARGB(0xff, 0xff - RPART(texel), 0xff - BPART(texel), 0xff - GPART(texel));
}
}
static void FuncNormal_AddColor(int x0, int x1, PolyTriangleThreadData* thread)
{
auto& streamdata = thread->mainVertexShader.Data;
uint32_t r = (int)(streamdata.uAddColor.r * 255.0f);
uint32_t g = (int)(streamdata.uAddColor.g * 255.0f);
uint32_t b = (int)(streamdata.uAddColor.b * 255.0f);
uint32_t* fragcolor = thread->scanline.FragColor;
for (int x = x0; x < x1; x++)
{
uint32_t texel = fragcolor[x];
fragcolor[x] = MAKEARGB(
APART(texel),
MIN(r + RPART(texel), (uint32_t)255),
MIN(g + GPART(texel), (uint32_t)255),
MIN(b + BPART(texel), (uint32_t)255));
}
}
static void FuncNormal_AddObjectColor(int x0, int x1, PolyTriangleThreadData* thread)
{
auto& streamdata = thread->mainVertexShader.Data;
uint32_t r = (int)(streamdata.uObjectColor.r * 256.0f);
uint32_t g = (int)(streamdata.uObjectColor.g * 256.0f);
uint32_t b = (int)(streamdata.uObjectColor.b * 256.0f);
uint32_t* fragcolor = thread->scanline.FragColor;
for (int x = x0; x < x1; x++)
{
uint32_t texel = fragcolor[x];
fragcolor[x] = MAKEARGB(
APART(texel),
MIN((r * RPART(texel)) >> 8, (uint32_t)255),
MIN((g * GPART(texel)) >> 8, (uint32_t)255),
MIN((b * BPART(texel)) >> 8, (uint32_t)255));
}
}
static void FuncNormal_AddObjectColor2(int x0, int x1, PolyTriangleThreadData* thread)
{
auto& streamdata = thread->mainVertexShader.Data;
float* gradientdistZ = thread->scanline.GradientdistZ;
uint32_t* fragcolor = thread->scanline.FragColor;
for (int x = x0; x < x1; x++)
{
float t = gradientdistZ[x];
float inv_t = 1.0f - t;
uint32_t r = (int)((streamdata.uObjectColor.r * inv_t + streamdata.uObjectColor2.r * t) * 256.0f);
uint32_t g = (int)((streamdata.uObjectColor.g * inv_t + streamdata.uObjectColor2.g * t) * 256.0f);
uint32_t b = (int)((streamdata.uObjectColor.b * inv_t + streamdata.uObjectColor2.b * t) * 256.0f);
uint32_t texel = fragcolor[x];
fragcolor[x] = MAKEARGB(
APART(texel),
MIN((r * RPART(texel)) >> 8, (uint32_t)255),
MIN((g * GPART(texel)) >> 8, (uint32_t)255),
MIN((b * BPART(texel)) >> 8, (uint32_t)255));
}
}
static void FuncNormal_DesaturationFactor(int x0, int x1, PolyTriangleThreadData* thread)
{
auto& streamdata = thread->mainVertexShader.Data;
uint32_t* fragcolor = thread->scanline.FragColor;
uint32_t t = (int)(streamdata.uDesaturationFactor * 256.0f);
uint32_t inv_t = 256 - t;
for (int x = x0; x < x1; x++)
{
uint32_t texel = fragcolor[x];
uint32_t gray = (RPART(texel) * 77 + GPART(texel) * 143 + BPART(texel) * 37) >> 8;
fragcolor[x] = MAKEARGB(
APART(texel),
(RPART(texel) * inv_t + gray * t + 127) >> 8,
(GPART(texel) * inv_t + gray * t + 127) >> 8,
(BPART(texel) * inv_t + gray * t + 127) >> 8);
}
}
static void RunAlphaTest(int x0, int x1, PolyTriangleThreadData* thread)
{
uint32_t alphaThreshold = thread->AlphaThreshold;
uint32_t* fragcolor = thread->scanline.FragColor;
uint8_t* discard = thread->scanline.discard;
for (int x = x0; x < x1; x++)
{
discard[x] = fragcolor[x] <= alphaThreshold;
}
}
static void ApplyVertexColor(int x0, int x1, PolyTriangleThreadData* thread)
{
uint32_t* fragcolor = thread->scanline.FragColor;
for (int x = x0; x < x1; x++)
{
uint32_t r = thread->scanline.vColorR[x];
uint32_t g = thread->scanline.vColorG[x];
uint32_t b = thread->scanline.vColorB[x];
uint32_t a = thread->scanline.vColorA[x];
a += a >> 7;
r += r >> 7;
g += g >> 7;
b += b >> 7;
uint32_t texel = fragcolor[x];
fragcolor[x] = MAKEARGB(
(APART(texel) * a + 127) >> 8,
(RPART(texel) * r + 127) >> 8,
(GPART(texel) * g + 127) >> 8,
(BPART(texel) * b + 127) >> 8);
}
}
static void MainFP(int x0, int x1, PolyTriangleThreadData* thread)
{
if (thread->EffectState == SHADER_Paletted) // func_paletted
{
FuncPaletted(x0, x1, thread);
}
else if (thread->EffectState == SHADER_NoTexture) // func_notexture
{
FuncNoTexture(x0, x1, thread);
}
else // func_normal
{
auto constants = thread->PushConstants;
switch (constants->uTextureMode)
{
default:
case TM_NORMAL:
case TM_FOGLAYER: FuncNormal(x0, x1, thread); break;
case TM_STENCIL: FuncNormal_Stencil(x0, x1, thread); break;
case TM_OPAQUE: FuncNormal_Opaque(x0, x1, thread); break;
case TM_INVERSE: FuncNormal_Inverse(x0, x1, thread); break;
case TM_ALPHATEXTURE: FuncNormal_AlphaTexture(x0, x1, thread); break;
case TM_CLAMPY: FuncNormal_ClampY(x0, x1, thread); break;
case TM_INVERTOPAQUE: FuncNormal_InvertOpaque(x0, x1, thread); break;
}
if (constants->uTextureMode != TM_FOGLAYER)
{
auto& streamdata = thread->mainVertexShader.Data;
if (streamdata.uAddColor.r != 0.0f || streamdata.uAddColor.g != 0.0f || streamdata.uAddColor.b != 0.0f)
{
FuncNormal_AddColor(x0, x1, thread);
}
if (streamdata.uObjectColor2.a == 0.0f)
{
if (streamdata.uObjectColor.r != 1.0f || streamdata.uObjectColor.g != 1.0f || streamdata.uObjectColor.b != 1.0f)
{
FuncNormal_AddObjectColor(x0, x1, thread);
}
}
else
{
FuncNormal_AddObjectColor2(x0, x1, thread);
}
if (streamdata.uDesaturationFactor > 0.0f)
{
FuncNormal_DesaturationFactor(x0, x1, thread);
}
}
}
if (thread->AlphaTest)
RunAlphaTest(x0, x1, thread);
ApplyVertexColor(x0, x1, thread);
auto constants = thread->PushConstants;
uint32_t* fragcolor = thread->scanline.FragColor;
if (constants->uLightLevel >= 0.0f && thread->numPolyLights > 0)
{
uint16_t* lightarray = thread->scanline.lightarray;
uint32_t* dynlights = thread->scanline.dynlights;
for (int x = x0; x < x1; x++)
{
uint32_t fg = fragcolor[x];
int lightshade = lightarray[x];
uint32_t dynlight = dynlights[x];
uint32_t a = APART(fg);
uint32_t r = MIN((RPART(fg) * (lightshade + RPART(dynlight))) >> 8, (uint32_t)255);
uint32_t g = MIN((GPART(fg) * (lightshade + GPART(dynlight))) >> 8, (uint32_t)255);
uint32_t b = MIN((BPART(fg) * (lightshade + BPART(dynlight))) >> 8, (uint32_t)255);
fragcolor[x] = MAKEARGB(a, r, g, b);
}
}
else if (constants->uLightLevel >= 0.0f)
{
uint16_t* lightarray = thread->scanline.lightarray;
for (int x = x0; x < x1; x++)
{
uint32_t fg = fragcolor[x];
int lightshade = lightarray[x];
uint32_t a = APART(fg);
uint32_t r = (RPART(fg) * lightshade) >> 8;
uint32_t g = (GPART(fg) * lightshade) >> 8;
uint32_t b = (BPART(fg) * lightshade) >> 8;
fragcolor[x] = MAKEARGB(a, r, g, b);
}
// To do: apply fog
}
else if (thread->numPolyLights > 0)
{
uint32_t* dynlights = thread->scanline.dynlights;
for (int x = x0; x < x1; x++)
{
uint32_t fg = fragcolor[x];
uint32_t dynlight = dynlights[x];
uint32_t a = APART(fg);
uint32_t r = MIN((RPART(fg) * RPART(dynlight)) >> 8, (uint32_t)255);
uint32_t g = MIN((GPART(fg) * GPART(dynlight)) >> 8, (uint32_t)255);
uint32_t b = MIN((BPART(fg) * BPART(dynlight)) >> 8, (uint32_t)255);
fragcolor[x] = MAKEARGB(a, r, g, b);
}
}
}
void SelectFragmentShader(PolyTriangleThreadData* thread)
{
void (*fragshader)(int x0, int x1, PolyTriangleThreadData * thread);
if (thread->SpecialEffect == EFF_FOGBOUNDARY) // fogboundary.fp
{
fragshader = &EffectFogBoundary;
}
else if (thread->SpecialEffect == EFF_BURN) // burn.fp
{
fragshader = &EffectBurn;
}
else if (thread->SpecialEffect == EFF_STENCIL) // stencil.fp
{
fragshader = &EffectStencil;
}
else
{
fragshader = &MainFP;
}
thread->FragmentShader = fragshader;
}

View file

@ -0,0 +1,27 @@
/*
** Polygon Doom software renderer
** Copyright (c) 2016 Magnus Norddahl
**
** This software is provided 'as-is', without any express or implied
** warranty. In no event will the authors be held liable for any damages
** arising from the use of this software.
**
** Permission is granted to anyone to use this software for any purpose,
** including commercial applications, and to alter it and redistribute it
** freely, subject to the following restrictions:
**
** 1. The origin of this software must not be misrepresented; you must not
** claim that you wrote the original software. If you use this software
** in a product, an acknowledgment in the product documentation would be
** appreciated but is not required.
** 2. Altered source versions must be plainly marked as such, and must not be
** misrepresented as being the original software.
** 3. This notice may not be removed or altered from any source distribution.
**
*/
#pragma once
class PolyTriangleThreadData;
void SelectFragmentShader(PolyTriangleThreadData* thread);

File diff suppressed because it is too large Load diff

View file

@ -126,13 +126,3 @@ struct TestSpanOpt0 { static const int Flags = 0; };
struct TestSpanOpt1 { static const int Flags = 1; };
struct TestSpanOpt2 { static const int Flags = 2; };
struct TestSpanOpt3 { static const int Flags = 3; };
enum SWBlendColor
{
SWBLEND_Sub = 1,
SWBLEND_RevSub = 2
};
struct BlendColorOpt_Add { static const int Flags = 0; };
struct BlendColorOpt_Sub { static const int Flags = 1; };
struct BlendColorOpt_RevSub { static const int Flags = 2; };

View file

@ -1,4 +1,8 @@
#include "../swrenderer/textures/r_swtexture.h"
#include "drawers/poly_triangle.cpp"
#include "drawers/poly_thread.cpp"
#include "drawers/screen_triangle.cpp"
#include "drawers/screen_scanline_setup.cpp"
#include "drawers/screen_shader.cpp"
#include "drawers/screen_blend.cpp"
#include "math/gpu_types.cpp"

View file

@ -52,7 +52,7 @@
#include "r_thread.h"
#include "swrenderer/scene/r_light.h"
#include "playsim/a_dynlight.h"
#include "polyrenderer/drawers/poly_triangle.h"
#include "polyrenderer/drawers/poly_thread.h"
CVAR(Bool, r_dynlights, 1, CVAR_ARCHIVE | CVAR_GLOBALCONFIG);
CVAR(Bool, r_fuzzscale, 1, CVAR_ARCHIVE | CVAR_GLOBALCONFIG);