mirror of
https://github.com/ZDoom/qzdoom.git
synced 2025-01-18 15:11:46 +00:00
Split softpoly into more files
This commit is contained in:
parent
e0fb9a45e2
commit
5632c80ab2
16 changed files with 2949 additions and 2691 deletions
|
@ -708,7 +708,11 @@ set ( SWRENDER_SOURCES
|
|||
|
||||
set( POLYRENDER_SOURCES
|
||||
rendering/polyrenderer/drawers/poly_triangle.cpp
|
||||
rendering/polyrenderer/drawers/poly_thread.cpp
|
||||
rendering/polyrenderer/drawers/screen_triangle.cpp
|
||||
rendering/polyrenderer/drawers/screen_scanline_setup.cpp
|
||||
rendering/polyrenderer/drawers/screen_shader.cpp
|
||||
rendering/polyrenderer/drawers/screen_blend.cpp
|
||||
rendering/polyrenderer/math/gpu_types.cpp
|
||||
)
|
||||
|
||||
|
|
|
@ -2,6 +2,7 @@
|
|||
#include "poly_buffers.h"
|
||||
#include "poly_framebuffer.h"
|
||||
#include "poly_renderstate.h"
|
||||
#include "rendering/polyrenderer/drawers/poly_thread.h"
|
||||
#include "doomerrors.h"
|
||||
|
||||
PolyBuffer *PolyBuffer::First = nullptr;
|
||||
|
|
821
src/rendering/polyrenderer/drawers/poly_thread.cpp
Normal file
821
src/rendering/polyrenderer/drawers/poly_thread.cpp
Normal file
|
@ -0,0 +1,821 @@
|
|||
/*
|
||||
** Polygon Doom software renderer
|
||||
** Copyright (c) 2016 Magnus Norddahl
|
||||
**
|
||||
** This software is provided 'as-is', without any express or implied
|
||||
** warranty. In no event will the authors be held liable for any damages
|
||||
** arising from the use of this software.
|
||||
**
|
||||
** Permission is granted to anyone to use this software for any purpose,
|
||||
** including commercial applications, and to alter it and redistribute it
|
||||
** freely, subject to the following restrictions:
|
||||
**
|
||||
** 1. The origin of this software must not be misrepresented; you must not
|
||||
** claim that you wrote the original software. If you use this software
|
||||
** in a product, an acknowledgment in the product documentation would be
|
||||
** appreciated but is not required.
|
||||
** 2. Altered source versions must be plainly marked as such, and must not be
|
||||
** misrepresented as being the original software.
|
||||
** 3. This notice may not be removed or altered from any source distribution.
|
||||
**
|
||||
*/
|
||||
|
||||
#include <stddef.h>
|
||||
#include "templates.h"
|
||||
#include "doomdef.h"
|
||||
|
||||
#include "w_wad.h"
|
||||
#include "v_video.h"
|
||||
#include "doomstat.h"
|
||||
#include "st_stuff.h"
|
||||
#include "g_game.h"
|
||||
#include "g_level.h"
|
||||
#include "r_data/r_translate.h"
|
||||
#include "r_data/models/models.h"
|
||||
#include "v_palette.h"
|
||||
#include "r_data/colormaps.h"
|
||||
#include "poly_thread.h"
|
||||
#include "swrenderer/drawers/r_draw_rgba.h"
|
||||
#include "screen_triangle.h"
|
||||
#include "x86.h"
|
||||
|
||||
PolyTriangleThreadData::PolyTriangleThreadData(int32_t core, int32_t num_cores, int32_t numa_node, int32_t num_numa_nodes, int numa_start_y, int numa_end_y)
|
||||
: core(core), num_cores(num_cores), numa_node(numa_node), num_numa_nodes(num_numa_nodes), numa_start_y(numa_start_y), numa_end_y(numa_end_y)
|
||||
{
|
||||
}
|
||||
|
||||
void PolyTriangleThreadData::ClearDepth(float value)
|
||||
{
|
||||
int width = depthstencil->Width();
|
||||
int height = depthstencil->Height();
|
||||
float *data = depthstencil->DepthValues();
|
||||
|
||||
int skip = skipped_by_thread(0);
|
||||
int count = count_for_thread(0, height);
|
||||
|
||||
data += skip * width;
|
||||
for (int i = 0; i < count; i++)
|
||||
{
|
||||
for (int x = 0; x < width; x++)
|
||||
data[x] = value;
|
||||
data += num_cores * width;
|
||||
}
|
||||
}
|
||||
|
||||
void PolyTriangleThreadData::ClearStencil(uint8_t value)
|
||||
{
|
||||
int width = depthstencil->Width();
|
||||
int height = depthstencil->Height();
|
||||
uint8_t *data = depthstencil->StencilValues();
|
||||
|
||||
int skip = skipped_by_thread(0);
|
||||
int count = count_for_thread(0, height);
|
||||
|
||||
data += skip * width;
|
||||
for (int i = 0; i < count; i++)
|
||||
{
|
||||
memset(data, value, width);
|
||||
data += num_cores * width;
|
||||
}
|
||||
}
|
||||
|
||||
void PolyTriangleThreadData::SetViewport(int x, int y, int width, int height, uint8_t *new_dest, int new_dest_width, int new_dest_height, int new_dest_pitch, bool new_dest_bgra, PolyDepthStencil *new_depthstencil, bool new_topdown)
|
||||
{
|
||||
viewport_x = x;
|
||||
viewport_y = y;
|
||||
viewport_width = width;
|
||||
viewport_height = height;
|
||||
dest = new_dest;
|
||||
dest_width = new_dest_width;
|
||||
dest_height = new_dest_height;
|
||||
dest_pitch = new_dest_pitch;
|
||||
dest_bgra = new_dest_bgra;
|
||||
depthstencil = new_depthstencil;
|
||||
topdown = new_topdown;
|
||||
UpdateClip();
|
||||
}
|
||||
|
||||
void PolyTriangleThreadData::SetScissor(int x, int y, int w, int h)
|
||||
{
|
||||
scissor.left = x;
|
||||
scissor.right = x + w;
|
||||
scissor.top = y;
|
||||
scissor.bottom = y + h;
|
||||
UpdateClip();
|
||||
}
|
||||
|
||||
void PolyTriangleThreadData::UpdateClip()
|
||||
{
|
||||
clip.left = MAX(MAX(viewport_x, scissor.left), 0);
|
||||
clip.top = MAX(MAX(viewport_y, scissor.top), 0);
|
||||
clip.right = MIN(MIN(viewport_x + viewport_width, scissor.right), dest_width);
|
||||
clip.bottom = MIN(MIN(viewport_y + viewport_height, scissor.bottom), dest_height);
|
||||
}
|
||||
|
||||
void PolyTriangleThreadData::PushStreamData(const StreamData &data, const PolyPushConstants &constants)
|
||||
{
|
||||
mainVertexShader.Data = data;
|
||||
mainVertexShader.uClipSplit = constants.uClipSplit;
|
||||
|
||||
PushConstants = &constants;
|
||||
|
||||
AlphaThreshold = clamp((int)(PushConstants->uAlphaThreshold * 255.0f + 0.5f), 0, 255) << 24;
|
||||
|
||||
numPolyLights = 0;
|
||||
if (constants.uLightIndex >= 0)
|
||||
{
|
||||
const FVector4 &lightRange = lights[constants.uLightIndex];
|
||||
static_assert(sizeof(FVector4) == 16, "sizeof(FVector4) is not 16 bytes");
|
||||
if (lightRange.Y > lightRange.X)
|
||||
{
|
||||
int start = constants.uLightIndex + 1;
|
||||
int modulatedStart = static_cast<int>(lightRange.X) + start;
|
||||
int modulatedEnd = static_cast<int>(lightRange.Y) + start;
|
||||
for (int i = modulatedStart; i < modulatedEnd; i += 4)
|
||||
{
|
||||
if (numPolyLights == maxPolyLights)
|
||||
break;
|
||||
|
||||
auto &lightpos = lights[i];
|
||||
auto &lightcolor = lights[i + 1];
|
||||
//auto &lightspot1 = lights[i + 2];
|
||||
//auto &lightspot2 = lights[i + 3];
|
||||
uint32_t r = (int)clamp(lightcolor.X * 255.0f, 0.0f, 255.0f);
|
||||
uint32_t g = (int)clamp(lightcolor.Y * 255.0f, 0.0f, 255.0f);
|
||||
uint32_t b = (int)clamp(lightcolor.Z * 255.0f, 0.0f, 255.0f);
|
||||
|
||||
auto& polylight = polyLights[numPolyLights++];
|
||||
polylight.x = lightpos.X;
|
||||
polylight.y = lightpos.Y;
|
||||
polylight.z = lightpos.Z;
|
||||
polylight.radius = 256.0f / lightpos.W;
|
||||
polylight.color = (r << 16) | (g << 8) | b;
|
||||
if (lightcolor.W < 0.0f)
|
||||
polylight.radius = -polylight.radius;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void PolyTriangleThreadData::PushMatrices(const VSMatrix &modelMatrix, const VSMatrix &normalModelMatrix, const VSMatrix &textureMatrix)
|
||||
{
|
||||
mainVertexShader.ModelMatrix = modelMatrix;
|
||||
mainVertexShader.NormalModelMatrix = normalModelMatrix;
|
||||
mainVertexShader.TextureMatrix = textureMatrix;
|
||||
}
|
||||
|
||||
void PolyTriangleThreadData::SetViewpointUniforms(const HWViewpointUniforms *uniforms)
|
||||
{
|
||||
mainVertexShader.Viewpoint = uniforms;
|
||||
}
|
||||
|
||||
void PolyTriangleThreadData::SetDepthClamp(bool on)
|
||||
{
|
||||
}
|
||||
|
||||
void PolyTriangleThreadData::SetDepthMask(bool on)
|
||||
{
|
||||
WriteDepth = on;
|
||||
}
|
||||
|
||||
void PolyTriangleThreadData::SetDepthFunc(int func)
|
||||
{
|
||||
if (func == DF_LEqual || func == DF_Less)
|
||||
{
|
||||
DepthTest = true;
|
||||
}
|
||||
else // if (func == DF_Always)
|
||||
{
|
||||
DepthTest = false;
|
||||
}
|
||||
}
|
||||
|
||||
void PolyTriangleThreadData::SetDepthRange(float min, float max)
|
||||
{
|
||||
// The only two variants used by hwrenderer layer
|
||||
if (min == 0.0f && max == 1.0f)
|
||||
{
|
||||
}
|
||||
else if (min == 1.0f && max == 1.0f)
|
||||
{
|
||||
}
|
||||
}
|
||||
|
||||
void PolyTriangleThreadData::SetDepthBias(float depthBiasConstantFactor, float depthBiasSlopeFactor)
|
||||
{
|
||||
depthbias = (float)(depthBiasConstantFactor / 2500.0);
|
||||
}
|
||||
|
||||
void PolyTriangleThreadData::SetColorMask(bool r, bool g, bool b, bool a)
|
||||
{
|
||||
WriteColor = r;
|
||||
}
|
||||
|
||||
void PolyTriangleThreadData::SetStencil(int stencilRef, int op)
|
||||
{
|
||||
StencilTestValue = stencilRef;
|
||||
if (op == SOP_Increment)
|
||||
{
|
||||
WriteStencil = StencilTest;
|
||||
StencilWriteValue = MIN(stencilRef + 1, (int)255);
|
||||
}
|
||||
else if (op == SOP_Decrement)
|
||||
{
|
||||
WriteStencil = StencilTest;
|
||||
StencilWriteValue = MAX(stencilRef - 1, (int)0);
|
||||
}
|
||||
else // SOP_Keep
|
||||
{
|
||||
WriteStencil = false;
|
||||
StencilWriteValue = stencilRef;
|
||||
}
|
||||
}
|
||||
|
||||
void PolyTriangleThreadData::SetCulling(int mode)
|
||||
{
|
||||
SetTwoSided(mode == Cull_None);
|
||||
SetCullCCW(mode == Cull_CCW);
|
||||
}
|
||||
|
||||
void PolyTriangleThreadData::EnableStencil(bool on)
|
||||
{
|
||||
StencilTest = on;
|
||||
WriteStencil = on && (StencilTestValue != StencilWriteValue);
|
||||
}
|
||||
|
||||
void PolyTriangleThreadData::SetRenderStyle(FRenderStyle style)
|
||||
{
|
||||
RenderStyle = style;
|
||||
}
|
||||
|
||||
void PolyTriangleThreadData::SetShader(int specialEffect, int effectState, bool alphaTest)
|
||||
{
|
||||
SpecialEffect = specialEffect;
|
||||
EffectState = effectState;
|
||||
AlphaTest = alphaTest;
|
||||
}
|
||||
|
||||
void PolyTriangleThreadData::SetTexture(int unit, const void *pixels, int width, int height, bool bgra)
|
||||
{
|
||||
textures[unit].pixels = pixels;
|
||||
textures[unit].width = width;
|
||||
textures[unit].height = height;
|
||||
textures[unit].bgra = bgra;
|
||||
}
|
||||
|
||||
void PolyTriangleThreadData::DrawIndexed(int index, int vcount, PolyDrawMode drawmode)
|
||||
{
|
||||
if (vcount < 3)
|
||||
return;
|
||||
|
||||
elements += index;
|
||||
|
||||
ShadedTriVertex vertbuffer[3];
|
||||
ShadedTriVertex *vert[3] = { &vertbuffer[0], &vertbuffer[1], &vertbuffer[2] };
|
||||
if (drawmode == PolyDrawMode::Triangles)
|
||||
{
|
||||
for (int i = 0; i < vcount / 3; i++)
|
||||
{
|
||||
for (int j = 0; j < 3; j++)
|
||||
*vert[j] = ShadeVertex(*(elements++));
|
||||
DrawShadedTriangle(vert, ccw);
|
||||
}
|
||||
}
|
||||
else if (drawmode == PolyDrawMode::TriangleFan)
|
||||
{
|
||||
*vert[0] = ShadeVertex(*(elements++));
|
||||
*vert[1] = ShadeVertex(*(elements++));
|
||||
for (int i = 2; i < vcount; i++)
|
||||
{
|
||||
*vert[2] = ShadeVertex(*(elements++));
|
||||
DrawShadedTriangle(vert, ccw);
|
||||
std::swap(vert[1], vert[2]);
|
||||
}
|
||||
}
|
||||
else if (drawmode == PolyDrawMode::TriangleStrip)
|
||||
{
|
||||
bool toggleccw = ccw;
|
||||
*vert[0] = ShadeVertex(*(elements++));
|
||||
*vert[1] = ShadeVertex(*(elements++));
|
||||
for (int i = 2; i < vcount; i++)
|
||||
{
|
||||
*vert[2] = ShadeVertex(*(elements++));
|
||||
DrawShadedTriangle(vert, toggleccw);
|
||||
ShadedTriVertex *vtmp = vert[0];
|
||||
vert[0] = vert[1];
|
||||
vert[1] = vert[2];
|
||||
vert[2] = vtmp;
|
||||
toggleccw = !toggleccw;
|
||||
}
|
||||
}
|
||||
else if (drawmode == PolyDrawMode::Lines)
|
||||
{
|
||||
for (int i = 0; i < vcount / 2; i++)
|
||||
{
|
||||
*vert[0] = ShadeVertex(*(elements++));
|
||||
*vert[1] = ShadeVertex(*(elements++));
|
||||
DrawShadedLine(vert);
|
||||
}
|
||||
}
|
||||
else if (drawmode == PolyDrawMode::Points)
|
||||
{
|
||||
for (int i = 0; i < vcount; i++)
|
||||
{
|
||||
*vert[0] = ShadeVertex(*(elements++));
|
||||
DrawShadedPoint(vert);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void PolyTriangleThreadData::Draw(int index, int vcount, PolyDrawMode drawmode)
|
||||
{
|
||||
if (vcount < 3)
|
||||
return;
|
||||
|
||||
int vinput = index;
|
||||
|
||||
ShadedTriVertex vertbuffer[3];
|
||||
ShadedTriVertex *vert[3] = { &vertbuffer[0], &vertbuffer[1], &vertbuffer[2] };
|
||||
if (drawmode == PolyDrawMode::Triangles)
|
||||
{
|
||||
for (int i = 0; i < vcount / 3; i++)
|
||||
{
|
||||
for (int j = 0; j < 3; j++)
|
||||
*vert[j] = ShadeVertex(vinput++);
|
||||
DrawShadedTriangle(vert, ccw);
|
||||
}
|
||||
}
|
||||
else if (drawmode == PolyDrawMode::TriangleFan)
|
||||
{
|
||||
*vert[0] = ShadeVertex(vinput++);
|
||||
*vert[1] = ShadeVertex(vinput++);
|
||||
for (int i = 2; i < vcount; i++)
|
||||
{
|
||||
*vert[2] = ShadeVertex(vinput++);
|
||||
DrawShadedTriangle(vert, ccw);
|
||||
std::swap(vert[1], vert[2]);
|
||||
}
|
||||
}
|
||||
else if (drawmode == PolyDrawMode::TriangleStrip)
|
||||
{
|
||||
bool toggleccw = ccw;
|
||||
*vert[0] = ShadeVertex(vinput++);
|
||||
*vert[1] = ShadeVertex(vinput++);
|
||||
for (int i = 2; i < vcount; i++)
|
||||
{
|
||||
*vert[2] = ShadeVertex(vinput++);
|
||||
DrawShadedTriangle(vert, toggleccw);
|
||||
ShadedTriVertex *vtmp = vert[0];
|
||||
vert[0] = vert[1];
|
||||
vert[1] = vert[2];
|
||||
vert[2] = vtmp;
|
||||
toggleccw = !toggleccw;
|
||||
}
|
||||
}
|
||||
else if (drawmode == PolyDrawMode::Lines)
|
||||
{
|
||||
for (int i = 0; i < vcount / 2; i++)
|
||||
{
|
||||
*vert[0] = ShadeVertex(vinput++);
|
||||
*vert[1] = ShadeVertex(vinput++);
|
||||
DrawShadedLine(vert);
|
||||
}
|
||||
}
|
||||
else if (drawmode == PolyDrawMode::Points)
|
||||
{
|
||||
for (int i = 0; i < vcount; i++)
|
||||
{
|
||||
*vert[0] = ShadeVertex(vinput++);
|
||||
DrawShadedPoint(vert);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
ShadedTriVertex PolyTriangleThreadData::ShadeVertex(int index)
|
||||
{
|
||||
inputAssembly->Load(this, vertices, index);
|
||||
mainVertexShader.SIMPLE = (SpecialEffect == EFF_BURN) || (SpecialEffect == EFF_STENCIL);
|
||||
mainVertexShader.SPHEREMAP = (SpecialEffect == EFF_SPHEREMAP);
|
||||
mainVertexShader.main();
|
||||
return mainVertexShader;
|
||||
}
|
||||
|
||||
bool PolyTriangleThreadData::IsDegenerate(const ShadedTriVertex *const* vert)
|
||||
{
|
||||
// A degenerate triangle has a zero cross product for two of its sides.
|
||||
float ax = vert[1]->gl_Position.X - vert[0]->gl_Position.X;
|
||||
float ay = vert[1]->gl_Position.Y - vert[0]->gl_Position.Y;
|
||||
float az = vert[1]->gl_Position.W - vert[0]->gl_Position.W;
|
||||
float bx = vert[2]->gl_Position.X - vert[0]->gl_Position.X;
|
||||
float by = vert[2]->gl_Position.Y - vert[0]->gl_Position.Y;
|
||||
float bz = vert[2]->gl_Position.W - vert[0]->gl_Position.W;
|
||||
float crossx = ay * bz - az * by;
|
||||
float crossy = az * bx - ax * bz;
|
||||
float crossz = ax * by - ay * bx;
|
||||
float crosslengthsqr = crossx * crossx + crossy * crossy + crossz * crossz;
|
||||
return crosslengthsqr <= 1.e-8f;
|
||||
}
|
||||
|
||||
bool PolyTriangleThreadData::IsFrontfacing(TriDrawTriangleArgs *args)
|
||||
{
|
||||
float a =
|
||||
args->v1->x * args->v2->y - args->v2->x * args->v1->y +
|
||||
args->v2->x * args->v3->y - args->v3->x * args->v2->y +
|
||||
args->v3->x * args->v1->y - args->v1->x * args->v3->y;
|
||||
return a <= 0.0f;
|
||||
}
|
||||
|
||||
void PolyTriangleThreadData::DrawShadedPoint(const ShadedTriVertex *const* vertex)
|
||||
{
|
||||
}
|
||||
|
||||
void PolyTriangleThreadData::DrawShadedLine(const ShadedTriVertex *const* vert)
|
||||
{
|
||||
static const int numclipdistances = 9;
|
||||
float clipdistance[numclipdistances * 2];
|
||||
float *clipd = clipdistance;
|
||||
for (int i = 0; i < 2; i++)
|
||||
{
|
||||
const auto &v = *vert[i];
|
||||
clipd[0] = v.gl_Position.X + v.gl_Position.W;
|
||||
clipd[1] = v.gl_Position.W - v.gl_Position.X;
|
||||
clipd[2] = v.gl_Position.Y + v.gl_Position.W;
|
||||
clipd[3] = v.gl_Position.W - v.gl_Position.Y;
|
||||
clipd[4] = v.gl_Position.Z + v.gl_Position.W;
|
||||
clipd[5] = v.gl_Position.W - v.gl_Position.Z;
|
||||
clipd[6] = v.gl_ClipDistance[0];
|
||||
clipd[7] = v.gl_ClipDistance[1];
|
||||
clipd[8] = v.gl_ClipDistance[2];
|
||||
clipd += numclipdistances;
|
||||
}
|
||||
|
||||
float t1 = 0.0f;
|
||||
float t2 = 1.0f;
|
||||
for (int p = 0; p < numclipdistances; p++)
|
||||
{
|
||||
float clipdistance1 = clipdistance[0 * numclipdistances + p];
|
||||
float clipdistance2 = clipdistance[1 * numclipdistances + p];
|
||||
if (clipdistance1 < 0.0f) t1 = MAX(-clipdistance1 / (clipdistance2 - clipdistance1), t1);
|
||||
if (clipdistance2 < 0.0f) t2 = MIN(1.0f + clipdistance2 / (clipdistance1 - clipdistance2), t2);
|
||||
if (t1 >= t2)
|
||||
return;
|
||||
}
|
||||
|
||||
float weights[] = { 1.0f - t1, t1, 1.0f - t2, t2 };
|
||||
|
||||
ScreenTriVertex clippedvert[2];
|
||||
for (int i = 0; i < 2; i++)
|
||||
{
|
||||
auto &v = clippedvert[i];
|
||||
memset(&v, 0, sizeof(ScreenTriVertex));
|
||||
for (int w = 0; w < 2; w++)
|
||||
{
|
||||
float weight = weights[i * 2 + w];
|
||||
v.x += vert[w]->gl_Position.X * weight;
|
||||
v.y += vert[w]->gl_Position.Y * weight;
|
||||
v.z += vert[w]->gl_Position.Z * weight;
|
||||
v.w += vert[w]->gl_Position.W * weight;
|
||||
}
|
||||
|
||||
// Calculate normalized device coordinates:
|
||||
v.w = 1.0f / v.w;
|
||||
v.x *= v.w;
|
||||
v.y *= v.w;
|
||||
v.z *= v.w;
|
||||
|
||||
// Apply viewport scale to get screen coordinates:
|
||||
v.x = viewport_x + viewport_width * (1.0f + v.x) * 0.5f;
|
||||
if (topdown)
|
||||
v.y = viewport_y + viewport_height * (1.0f - v.y) * 0.5f;
|
||||
else
|
||||
v.y = viewport_y + viewport_height * (1.0f + v.y) * 0.5f;
|
||||
}
|
||||
|
||||
uint32_t vColorA = (int)(vert[0]->vColor.W * 255.0f + 0.5f);
|
||||
uint32_t vColorR = (int)(vert[0]->vColor.X * 255.0f + 0.5f);
|
||||
uint32_t vColorG = (int)(vert[0]->vColor.Y * 255.0f + 0.5f);
|
||||
uint32_t vColorB = (int)(vert[0]->vColor.Z * 255.0f + 0.5f);
|
||||
uint32_t color = MAKEARGB(vColorA, vColorR, vColorG, vColorB);
|
||||
|
||||
// Slow and naive implementation. Hopefully fast enough..
|
||||
|
||||
float x1 = clippedvert[0].x;
|
||||
float y1 = clippedvert[0].y;
|
||||
float x2 = clippedvert[1].x;
|
||||
float y2 = clippedvert[1].y;
|
||||
float dx = x2 - x1;
|
||||
float dy = y2 - y1;
|
||||
float step = (abs(dx) >= abs(dy)) ? abs(dx) : abs(dy);
|
||||
dx /= step;
|
||||
dy /= step;
|
||||
float x = x1;
|
||||
float y = y1;
|
||||
int istep = (int)step;
|
||||
int pixelsize = dest_bgra ? 4 : 1;
|
||||
for (int i = 0; i <= istep; i++)
|
||||
{
|
||||
int scrx = (int)x;
|
||||
int scry = (int)y;
|
||||
if (scrx >= clip.left && scrx < clip.right && scry >= clip.top && scry < clip.bottom && !line_skipped_by_thread(scry))
|
||||
{
|
||||
uint8_t *destpixel = dest + (scrx + scry * dest_width) * pixelsize;
|
||||
if (pixelsize == 4)
|
||||
{
|
||||
*reinterpret_cast<uint32_t*>(destpixel) = color;
|
||||
}
|
||||
else
|
||||
{
|
||||
*destpixel = color;
|
||||
}
|
||||
}
|
||||
x += dx;
|
||||
y += dy;
|
||||
}
|
||||
}
|
||||
|
||||
void PolyTriangleThreadData::DrawShadedTriangle(const ShadedTriVertex *const* vert, bool ccw)
|
||||
{
|
||||
// Reject triangle if degenerate
|
||||
if (IsDegenerate(vert))
|
||||
return;
|
||||
|
||||
// Cull, clip and generate additional vertices as needed
|
||||
ScreenTriVertex clippedvert[max_additional_vertices];
|
||||
int numclipvert = ClipEdge(vert);
|
||||
|
||||
// Convert barycentric weights to actual vertices
|
||||
for (int i = 0; i < numclipvert; i++)
|
||||
{
|
||||
auto &v = clippedvert[i];
|
||||
memset(&v, 0, sizeof(ScreenTriVertex));
|
||||
for (int w = 0; w < 3; w++)
|
||||
{
|
||||
float weight = weights[i * 3 + w];
|
||||
v.x += vert[w]->gl_Position.X * weight;
|
||||
v.y += vert[w]->gl_Position.Y * weight;
|
||||
v.z += vert[w]->gl_Position.Z * weight;
|
||||
v.w += vert[w]->gl_Position.W * weight;
|
||||
v.u += vert[w]->vTexCoord.X * weight;
|
||||
v.v += vert[w]->vTexCoord.Y * weight;
|
||||
v.worldX += vert[w]->pixelpos.X * weight;
|
||||
v.worldY += vert[w]->pixelpos.Y * weight;
|
||||
v.worldZ += vert[w]->pixelpos.Z * weight;
|
||||
v.a += vert[w]->vColor.W * weight;
|
||||
v.r += vert[w]->vColor.X * weight;
|
||||
v.g += vert[w]->vColor.Y * weight;
|
||||
v.b += vert[w]->vColor.Z * weight;
|
||||
v.gradientdistZ += vert[w]->gradientdist.Z * weight;
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef NO_SSE
|
||||
// Map to 2D viewport:
|
||||
for (int j = 0; j < numclipvert; j++)
|
||||
{
|
||||
auto &v = clippedvert[j];
|
||||
|
||||
// Calculate normalized device coordinates:
|
||||
v.w = 1.0f / v.w;
|
||||
v.x *= v.w;
|
||||
v.y *= v.w;
|
||||
v.z *= v.w;
|
||||
|
||||
// Apply viewport scale to get screen coordinates:
|
||||
v.x = viewport_x + viewport_width * (1.0f + v.x) * 0.5f;
|
||||
if (topdown)
|
||||
v.y = viewport_y + viewport_height * (1.0f - v.y) * 0.5f;
|
||||
else
|
||||
v.y = viewport_y + viewport_height * (1.0f + v.y) * 0.5f;
|
||||
}
|
||||
#else
|
||||
// Map to 2D viewport:
|
||||
__m128 mviewport_x = _mm_set1_ps((float)viewport_x);
|
||||
__m128 mviewport_y = _mm_set1_ps((float)viewport_y);
|
||||
__m128 mviewport_halfwidth = _mm_set1_ps(viewport_width * 0.5f);
|
||||
__m128 mviewport_halfheight = _mm_set1_ps(viewport_height * 0.5f);
|
||||
__m128 mone = _mm_set1_ps(1.0f);
|
||||
int sse_length = (numclipvert + 3) / 4 * 4;
|
||||
for (int j = 0; j < sse_length; j += 4)
|
||||
{
|
||||
__m128 vx = _mm_loadu_ps(&clippedvert[j].x);
|
||||
__m128 vy = _mm_loadu_ps(&clippedvert[j + 1].x);
|
||||
__m128 vz = _mm_loadu_ps(&clippedvert[j + 2].x);
|
||||
__m128 vw = _mm_loadu_ps(&clippedvert[j + 3].x);
|
||||
_MM_TRANSPOSE4_PS(vx, vy, vz, vw);
|
||||
|
||||
// Calculate normalized device coordinates:
|
||||
vw = _mm_div_ps(mone, vw);
|
||||
vx = _mm_mul_ps(vx, vw);
|
||||
vy = _mm_mul_ps(vy, vw);
|
||||
vz = _mm_mul_ps(vz, vw);
|
||||
|
||||
// Apply viewport scale to get screen coordinates:
|
||||
vx = _mm_add_ps(mviewport_x, _mm_mul_ps(mviewport_halfwidth, _mm_add_ps(mone, vx)));
|
||||
if (topdown)
|
||||
vy = _mm_add_ps(mviewport_y, _mm_mul_ps(mviewport_halfheight, _mm_sub_ps(mone, vy)));
|
||||
else
|
||||
vy = _mm_add_ps(mviewport_y, _mm_mul_ps(mviewport_halfheight, _mm_add_ps(mone, vy)));
|
||||
|
||||
_MM_TRANSPOSE4_PS(vx, vy, vz, vw);
|
||||
_mm_storeu_ps(&clippedvert[j].x, vx);
|
||||
_mm_storeu_ps(&clippedvert[j + 1].x, vy);
|
||||
_mm_storeu_ps(&clippedvert[j + 2].x, vz);
|
||||
_mm_storeu_ps(&clippedvert[j + 3].x, vw);
|
||||
}
|
||||
#endif
|
||||
|
||||
if (!topdown) ccw = !ccw;
|
||||
|
||||
TriDrawTriangleArgs args;
|
||||
|
||||
if (twosided && numclipvert > 2)
|
||||
{
|
||||
args.v1 = &clippedvert[0];
|
||||
args.v2 = &clippedvert[1];
|
||||
args.v3 = &clippedvert[2];
|
||||
ccw = !IsFrontfacing(&args);
|
||||
}
|
||||
|
||||
// Draw screen triangles
|
||||
if (ccw)
|
||||
{
|
||||
for (int i = numclipvert - 1; i > 1; i--)
|
||||
{
|
||||
args.v1 = &clippedvert[numclipvert - 1];
|
||||
args.v2 = &clippedvert[i - 1];
|
||||
args.v3 = &clippedvert[i - 2];
|
||||
if (IsFrontfacing(&args) == ccw && args.CalculateGradients())
|
||||
{
|
||||
ScreenTriangle::Draw(&args, this);
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
for (int i = 2; i < numclipvert; i++)
|
||||
{
|
||||
args.v1 = &clippedvert[0];
|
||||
args.v2 = &clippedvert[i - 1];
|
||||
args.v3 = &clippedvert[i];
|
||||
if (IsFrontfacing(&args) != ccw && args.CalculateGradients())
|
||||
{
|
||||
ScreenTriangle::Draw(&args, this);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
int PolyTriangleThreadData::ClipEdge(const ShadedTriVertex *const* verts)
|
||||
{
|
||||
// use barycentric weights for clipped vertices
|
||||
weights = weightsbuffer;
|
||||
for (int i = 0; i < 3; i++)
|
||||
{
|
||||
weights[i * 3 + 0] = 0.0f;
|
||||
weights[i * 3 + 1] = 0.0f;
|
||||
weights[i * 3 + 2] = 0.0f;
|
||||
weights[i * 3 + i] = 1.0f;
|
||||
}
|
||||
|
||||
// Clip and cull so that the following is true for all vertices:
|
||||
// -v.w <= v.x <= v.w
|
||||
// -v.w <= v.y <= v.w
|
||||
// -v.w <= v.z <= v.w
|
||||
|
||||
// halfspace clip distances
|
||||
static const int numclipdistances = 9;
|
||||
#ifdef NO_SSE
|
||||
float clipdistance[numclipdistances * 3];
|
||||
bool needsclipping = false;
|
||||
float *clipd = clipdistance;
|
||||
for (int i = 0; i < 3; i++)
|
||||
{
|
||||
const auto &v = *verts[i];
|
||||
clipd[0] = v.gl_Position.X + v.gl_Position.W;
|
||||
clipd[1] = v.gl_Position.W - v.gl_Position.X;
|
||||
clipd[2] = v.gl_Position.Y + v.gl_Position.W;
|
||||
clipd[3] = v.gl_Position.W - v.gl_Position.Y;
|
||||
clipd[4] = v.gl_Position.Z + v.gl_Position.W;
|
||||
clipd[5] = v.gl_Position.W - v.gl_Position.Z;
|
||||
clipd[6] = v.gl_ClipDistance[0];
|
||||
clipd[7] = v.gl_ClipDistance[1];
|
||||
clipd[8] = v.gl_ClipDistance[2];
|
||||
for (int j = 0; j < 9; j++)
|
||||
needsclipping = needsclipping || clipd[i];
|
||||
clipd += numclipdistances;
|
||||
}
|
||||
|
||||
// If all halfspace clip distances are positive then the entire triangle is visible. Skip the expensive clipping step.
|
||||
if (!needsclipping)
|
||||
{
|
||||
return 3;
|
||||
}
|
||||
#else
|
||||
__m128 mx = _mm_loadu_ps(&verts[0]->gl_Position.X);
|
||||
__m128 my = _mm_loadu_ps(&verts[1]->gl_Position.X);
|
||||
__m128 mz = _mm_loadu_ps(&verts[2]->gl_Position.X);
|
||||
__m128 mw = _mm_setzero_ps();
|
||||
_MM_TRANSPOSE4_PS(mx, my, mz, mw);
|
||||
__m128 clipd0 = _mm_add_ps(mx, mw);
|
||||
__m128 clipd1 = _mm_sub_ps(mw, mx);
|
||||
__m128 clipd2 = _mm_add_ps(my, mw);
|
||||
__m128 clipd3 = _mm_sub_ps(mw, my);
|
||||
__m128 clipd4 = _mm_add_ps(mz, mw);
|
||||
__m128 clipd5 = _mm_sub_ps(mw, mz);
|
||||
__m128 clipd6 = _mm_setr_ps(verts[0]->gl_ClipDistance[0], verts[1]->gl_ClipDistance[0], verts[2]->gl_ClipDistance[0], 0.0f);
|
||||
__m128 clipd7 = _mm_setr_ps(verts[0]->gl_ClipDistance[1], verts[1]->gl_ClipDistance[1], verts[2]->gl_ClipDistance[1], 0.0f);
|
||||
__m128 clipd8 = _mm_setr_ps(verts[0]->gl_ClipDistance[2], verts[1]->gl_ClipDistance[2], verts[2]->gl_ClipDistance[2], 0.0f);
|
||||
__m128 mneedsclipping = _mm_cmplt_ps(clipd0, _mm_setzero_ps());
|
||||
mneedsclipping = _mm_or_ps(mneedsclipping, _mm_cmplt_ps(clipd1, _mm_setzero_ps()));
|
||||
mneedsclipping = _mm_or_ps(mneedsclipping, _mm_cmplt_ps(clipd2, _mm_setzero_ps()));
|
||||
mneedsclipping = _mm_or_ps(mneedsclipping, _mm_cmplt_ps(clipd3, _mm_setzero_ps()));
|
||||
mneedsclipping = _mm_or_ps(mneedsclipping, _mm_cmplt_ps(clipd4, _mm_setzero_ps()));
|
||||
mneedsclipping = _mm_or_ps(mneedsclipping, _mm_cmplt_ps(clipd5, _mm_setzero_ps()));
|
||||
mneedsclipping = _mm_or_ps(mneedsclipping, _mm_cmplt_ps(clipd6, _mm_setzero_ps()));
|
||||
mneedsclipping = _mm_or_ps(mneedsclipping, _mm_cmplt_ps(clipd7, _mm_setzero_ps()));
|
||||
mneedsclipping = _mm_or_ps(mneedsclipping, _mm_cmplt_ps(clipd8, _mm_setzero_ps()));
|
||||
if (_mm_movemask_ps(mneedsclipping) == 0)
|
||||
{
|
||||
return 3;
|
||||
}
|
||||
float clipdistance[numclipdistances * 4];
|
||||
_mm_storeu_ps(clipdistance, clipd0);
|
||||
_mm_storeu_ps(clipdistance + 4, clipd1);
|
||||
_mm_storeu_ps(clipdistance + 8, clipd2);
|
||||
_mm_storeu_ps(clipdistance + 12, clipd3);
|
||||
_mm_storeu_ps(clipdistance + 16, clipd4);
|
||||
_mm_storeu_ps(clipdistance + 20, clipd5);
|
||||
_mm_storeu_ps(clipdistance + 24, clipd6);
|
||||
_mm_storeu_ps(clipdistance + 28, clipd7);
|
||||
_mm_storeu_ps(clipdistance + 32, clipd8);
|
||||
#endif
|
||||
|
||||
// Clip against each halfspace
|
||||
float *input = weights;
|
||||
float *output = weights + max_additional_vertices * 3;
|
||||
int inputverts = 3;
|
||||
for (int p = 0; p < numclipdistances; p++)
|
||||
{
|
||||
// Clip each edge
|
||||
int outputverts = 0;
|
||||
for (int i = 0; i < inputverts; i++)
|
||||
{
|
||||
int j = (i + 1) % inputverts;
|
||||
#ifdef NO_SSE
|
||||
float clipdistance1 =
|
||||
clipdistance[0 * numclipdistances + p] * input[i * 3 + 0] +
|
||||
clipdistance[1 * numclipdistances + p] * input[i * 3 + 1] +
|
||||
clipdistance[2 * numclipdistances + p] * input[i * 3 + 2];
|
||||
|
||||
float clipdistance2 =
|
||||
clipdistance[0 * numclipdistances + p] * input[j * 3 + 0] +
|
||||
clipdistance[1 * numclipdistances + p] * input[j * 3 + 1] +
|
||||
clipdistance[2 * numclipdistances + p] * input[j * 3 + 2];
|
||||
#else
|
||||
float clipdistance1 =
|
||||
clipdistance[0 + p * 4] * input[i * 3 + 0] +
|
||||
clipdistance[1 + p * 4] * input[i * 3 + 1] +
|
||||
clipdistance[2 + p * 4] * input[i * 3 + 2];
|
||||
|
||||
float clipdistance2 =
|
||||
clipdistance[0 + p * 4] * input[j * 3 + 0] +
|
||||
clipdistance[1 + p * 4] * input[j * 3 + 1] +
|
||||
clipdistance[2 + p * 4] * input[j * 3 + 2];
|
||||
#endif
|
||||
|
||||
// Clip halfspace
|
||||
if ((clipdistance1 >= 0.0f || clipdistance2 >= 0.0f) && outputverts + 1 < max_additional_vertices)
|
||||
{
|
||||
float t1 = (clipdistance1 < 0.0f) ? MAX(-clipdistance1 / (clipdistance2 - clipdistance1), 0.0f) : 0.0f;
|
||||
float t2 = (clipdistance2 < 0.0f) ? MIN(1.0f + clipdistance2 / (clipdistance1 - clipdistance2), 1.0f) : 1.0f;
|
||||
|
||||
// add t1 vertex
|
||||
for (int k = 0; k < 3; k++)
|
||||
output[outputverts * 3 + k] = input[i * 3 + k] * (1.0f - t1) + input[j * 3 + k] * t1;
|
||||
outputverts++;
|
||||
|
||||
if (t2 != 1.0f && t2 > t1)
|
||||
{
|
||||
// add t2 vertex
|
||||
for (int k = 0; k < 3; k++)
|
||||
output[outputverts * 3 + k] = input[i * 3 + k] * (1.0f - t2) + input[j * 3 + k] * t2;
|
||||
outputverts++;
|
||||
}
|
||||
}
|
||||
}
|
||||
std::swap(input, output);
|
||||
inputverts = outputverts;
|
||||
if (inputverts == 0)
|
||||
break;
|
||||
}
|
||||
|
||||
weights = input;
|
||||
return inputverts;
|
||||
}
|
||||
|
||||
PolyTriangleThreadData *PolyTriangleThreadData::Get(DrawerThread *thread)
|
||||
{
|
||||
if (!thread->poly)
|
||||
thread->poly = std::make_shared<PolyTriangleThreadData>(thread->core, thread->num_cores, thread->numa_node, thread->num_numa_nodes, thread->numa_start_y, thread->numa_end_y);
|
||||
return thread->poly.get();
|
||||
}
|
197
src/rendering/polyrenderer/drawers/poly_thread.h
Normal file
197
src/rendering/polyrenderer/drawers/poly_thread.h
Normal file
|
@ -0,0 +1,197 @@
|
|||
/*
|
||||
** Polygon Doom software renderer
|
||||
** Copyright (c) 2016 Magnus Norddahl
|
||||
**
|
||||
** This software is provided 'as-is', without any express or implied
|
||||
** warranty. In no event will the authors be held liable for any damages
|
||||
** arising from the use of this software.
|
||||
**
|
||||
** Permission is granted to anyone to use this software for any purpose,
|
||||
** including commercial applications, and to alter it and redistribute it
|
||||
** freely, subject to the following restrictions:
|
||||
**
|
||||
** 1. The origin of this software must not be misrepresented; you must not
|
||||
** claim that you wrote the original software. If you use this software
|
||||
** in a product, an acknowledgment in the product documentation would be
|
||||
** appreciated but is not required.
|
||||
** 2. Altered source versions must be plainly marked as such, and must not be
|
||||
** misrepresented as being the original software.
|
||||
** 3. This notice may not be removed or altered from any source distribution.
|
||||
**
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "poly_triangle.h"
|
||||
|
||||
struct PolyLight
|
||||
{
|
||||
uint32_t color;
|
||||
float x, y, z;
|
||||
float radius;
|
||||
};
|
||||
|
||||
class PolyTriangleThreadData
|
||||
{
|
||||
public:
|
||||
PolyTriangleThreadData(int32_t core, int32_t num_cores, int32_t numa_node, int32_t num_numa_nodes, int numa_start_y, int numa_end_y);
|
||||
|
||||
void ClearDepth(float value);
|
||||
void ClearStencil(uint8_t value);
|
||||
void SetViewport(int x, int y, int width, int height, uint8_t *dest, int dest_width, int dest_height, int dest_pitch, bool dest_bgra, PolyDepthStencil *depthstencil, bool topdown);
|
||||
|
||||
void SetCullCCW(bool value) { ccw = value; }
|
||||
void SetTwoSided(bool value) { twosided = value; }
|
||||
|
||||
void SetInputAssembly(PolyInputAssembly *input) { inputAssembly = input; }
|
||||
void SetVertexBuffer(const void *data) { vertices = data; }
|
||||
void SetIndexBuffer(const void *data) { elements = (const unsigned int *)data; }
|
||||
void SetLightBuffer(const void *data) { lights = (const FVector4 *)data; }
|
||||
void SetViewpointUniforms(const HWViewpointUniforms *uniforms);
|
||||
void SetDepthClamp(bool on);
|
||||
void SetDepthMask(bool on);
|
||||
void SetDepthFunc(int func);
|
||||
void SetDepthRange(float min, float max);
|
||||
void SetDepthBias(float depthBiasConstantFactor, float depthBiasSlopeFactor);
|
||||
void SetColorMask(bool r, bool g, bool b, bool a);
|
||||
void SetStencil(int stencilRef, int op);
|
||||
void SetCulling(int mode);
|
||||
void EnableStencil(bool on);
|
||||
void SetScissor(int x, int y, int w, int h);
|
||||
void SetRenderStyle(FRenderStyle style);
|
||||
void SetTexture(int unit, const void *pixels, int width, int height, bool bgra);
|
||||
void SetShader(int specialEffect, int effectState, bool alphaTest);
|
||||
|
||||
void UpdateClip();
|
||||
|
||||
void PushStreamData(const StreamData &data, const PolyPushConstants &constants);
|
||||
void PushMatrices(const VSMatrix &modelMatrix, const VSMatrix &normalModelMatrix, const VSMatrix &textureMatrix);
|
||||
|
||||
void DrawIndexed(int index, int count, PolyDrawMode mode);
|
||||
void Draw(int index, int vcount, PolyDrawMode mode);
|
||||
|
||||
int32_t core;
|
||||
int32_t num_cores;
|
||||
int32_t numa_node;
|
||||
int32_t num_numa_nodes;
|
||||
|
||||
int numa_start_y;
|
||||
int numa_end_y;
|
||||
|
||||
bool line_skipped_by_thread(int line)
|
||||
{
|
||||
return line < numa_start_y || line >= numa_end_y || line % num_cores != core;
|
||||
}
|
||||
|
||||
int skipped_by_thread(int first_line)
|
||||
{
|
||||
int clip_first_line = MAX(first_line, numa_start_y);
|
||||
int core_skip = (num_cores - (clip_first_line - core) % num_cores) % num_cores;
|
||||
return clip_first_line + core_skip - first_line;
|
||||
}
|
||||
|
||||
int count_for_thread(int first_line, int count)
|
||||
{
|
||||
count = MIN(count, numa_end_y - first_line);
|
||||
int c = (count - skipped_by_thread(first_line) + num_cores - 1) / num_cores;
|
||||
return MAX(c, 0);
|
||||
}
|
||||
|
||||
struct Scanline
|
||||
{
|
||||
float W[MAXWIDTH];
|
||||
uint16_t U[MAXWIDTH];
|
||||
uint16_t V[MAXWIDTH];
|
||||
float WorldX[MAXWIDTH];
|
||||
float WorldY[MAXWIDTH];
|
||||
float WorldZ[MAXWIDTH];
|
||||
uint8_t vColorA[MAXWIDTH];
|
||||
uint8_t vColorR[MAXWIDTH];
|
||||
uint8_t vColorG[MAXWIDTH];
|
||||
uint8_t vColorB[MAXWIDTH];
|
||||
float GradientdistZ[MAXWIDTH];
|
||||
uint32_t FragColor[MAXWIDTH];
|
||||
uint16_t lightarray[MAXWIDTH];
|
||||
uint32_t dynlights[MAXWIDTH];
|
||||
uint8_t discard[MAXWIDTH];
|
||||
} scanline;
|
||||
|
||||
static PolyTriangleThreadData *Get(DrawerThread *thread);
|
||||
|
||||
int dest_pitch = 0;
|
||||
int dest_width = 0;
|
||||
int dest_height = 0;
|
||||
bool dest_bgra = false;
|
||||
uint8_t *dest = nullptr;
|
||||
PolyDepthStencil *depthstencil = nullptr;
|
||||
bool topdown = true;
|
||||
|
||||
float depthbias = 0.0f;
|
||||
|
||||
int viewport_y = 0;
|
||||
|
||||
struct ClipRect
|
||||
{
|
||||
int left = 0;
|
||||
int top = 0;
|
||||
int right = 0;
|
||||
int bottom = 0;
|
||||
} clip, scissor;
|
||||
|
||||
FRenderStyle RenderStyle;
|
||||
int SpecialEffect = EFF_NONE;
|
||||
int EffectState = 0;
|
||||
bool AlphaTest = false;
|
||||
uint32_t AlphaThreshold = 0x7f000000;
|
||||
const PolyPushConstants* PushConstants = nullptr;
|
||||
|
||||
const void *vertices = nullptr;
|
||||
const unsigned int *elements = nullptr;
|
||||
const FVector4 *lights = nullptr;
|
||||
|
||||
enum { maxPolyLights = 16 };
|
||||
PolyLight polyLights[maxPolyLights];
|
||||
int numPolyLights = 0;
|
||||
|
||||
PolyMainVertexShader mainVertexShader;
|
||||
|
||||
struct TextureUnit
|
||||
{
|
||||
const void* pixels = nullptr;
|
||||
int width = 0;
|
||||
int height = 0;
|
||||
bool bgra = true;
|
||||
} textures[16];
|
||||
|
||||
bool DepthTest = false;
|
||||
bool StencilTest = true;
|
||||
bool WriteStencil = true;
|
||||
bool WriteColor = true;
|
||||
bool WriteDepth = true;
|
||||
uint8_t StencilTestValue = 0;
|
||||
uint8_t StencilWriteValue = 0;
|
||||
|
||||
void (*FragmentShader)(int x0, int x1, PolyTriangleThreadData* thread) = nullptr;
|
||||
void (*WriteColorFunc)(int y, int x0, int x1, PolyTriangleThreadData* thread) = nullptr;
|
||||
|
||||
private:
|
||||
ShadedTriVertex ShadeVertex(int index);
|
||||
void DrawShadedPoint(const ShadedTriVertex *const* vertex);
|
||||
void DrawShadedLine(const ShadedTriVertex *const* vertices);
|
||||
void DrawShadedTriangle(const ShadedTriVertex *const* vertices, bool ccw);
|
||||
static bool IsDegenerate(const ShadedTriVertex *const* vertices);
|
||||
static bool IsFrontfacing(TriDrawTriangleArgs *args);
|
||||
|
||||
int ClipEdge(const ShadedTriVertex *const* verts);
|
||||
|
||||
int viewport_x = 0;
|
||||
int viewport_width = 0;
|
||||
int viewport_height = 0;
|
||||
bool ccw = true;
|
||||
bool twosided = true;
|
||||
PolyInputAssembly *inputAssembly = nullptr;
|
||||
|
||||
enum { max_additional_vertices = 16 };
|
||||
float weightsbuffer[max_additional_vertices * 3 * 2];
|
||||
float *weights = nullptr;
|
||||
};
|
File diff suppressed because it is too large
Load diff
|
@ -119,467 +119,3 @@ class PolyInputAssembly
|
|||
public:
|
||||
virtual void Load(PolyTriangleThreadData *thread, const void *vertices, int index) = 0;
|
||||
};
|
||||
|
||||
struct PolyLight
|
||||
{
|
||||
uint32_t color;
|
||||
float x, y, z;
|
||||
float radius;
|
||||
};
|
||||
|
||||
class PolyTriangleThreadData
|
||||
{
|
||||
public:
|
||||
PolyTriangleThreadData(int32_t core, int32_t num_cores, int32_t numa_node, int32_t num_numa_nodes, int numa_start_y, int numa_end_y)
|
||||
: core(core), num_cores(num_cores), numa_node(numa_node), num_numa_nodes(num_numa_nodes), numa_start_y(numa_start_y), numa_end_y(numa_end_y)
|
||||
{
|
||||
}
|
||||
|
||||
void ClearDepth(float value);
|
||||
void ClearStencil(uint8_t value);
|
||||
void SetViewport(int x, int y, int width, int height, uint8_t *dest, int dest_width, int dest_height, int dest_pitch, bool dest_bgra, PolyDepthStencil *depthstencil, bool topdown);
|
||||
|
||||
void SetCullCCW(bool value) { ccw = value; }
|
||||
void SetTwoSided(bool value) { twosided = value; }
|
||||
|
||||
void SetInputAssembly(PolyInputAssembly *input) { inputAssembly = input; }
|
||||
void SetVertexBuffer(const void *data) { vertices = data; }
|
||||
void SetIndexBuffer(const void *data) { elements = (const unsigned int *)data; }
|
||||
void SetLightBuffer(const void *data) { lights = (const FVector4 *)data; }
|
||||
void SetViewpointUniforms(const HWViewpointUniforms *uniforms);
|
||||
void SetDepthClamp(bool on);
|
||||
void SetDepthMask(bool on);
|
||||
void SetDepthFunc(int func);
|
||||
void SetDepthRange(float min, float max);
|
||||
void SetDepthBias(float depthBiasConstantFactor, float depthBiasSlopeFactor);
|
||||
void SetColorMask(bool r, bool g, bool b, bool a);
|
||||
void SetStencil(int stencilRef, int op);
|
||||
void SetCulling(int mode);
|
||||
void EnableStencil(bool on);
|
||||
void SetScissor(int x, int y, int w, int h);
|
||||
void SetRenderStyle(FRenderStyle style);
|
||||
void SetTexture(int unit, const void *pixels, int width, int height, bool bgra);
|
||||
void SetShader(int specialEffect, int effectState, bool alphaTest);
|
||||
|
||||
void UpdateClip();
|
||||
|
||||
void PushStreamData(const StreamData &data, const PolyPushConstants &constants);
|
||||
void PushMatrices(const VSMatrix &modelMatrix, const VSMatrix &normalModelMatrix, const VSMatrix &textureMatrix);
|
||||
|
||||
void DrawIndexed(int index, int count, PolyDrawMode mode);
|
||||
void Draw(int index, int vcount, PolyDrawMode mode);
|
||||
|
||||
int32_t core;
|
||||
int32_t num_cores;
|
||||
int32_t numa_node;
|
||||
int32_t num_numa_nodes;
|
||||
|
||||
int numa_start_y;
|
||||
int numa_end_y;
|
||||
|
||||
bool line_skipped_by_thread(int line)
|
||||
{
|
||||
return line < numa_start_y || line >= numa_end_y || line % num_cores != core;
|
||||
}
|
||||
|
||||
int skipped_by_thread(int first_line)
|
||||
{
|
||||
int clip_first_line = MAX(first_line, numa_start_y);
|
||||
int core_skip = (num_cores - (clip_first_line - core) % num_cores) % num_cores;
|
||||
return clip_first_line + core_skip - first_line;
|
||||
}
|
||||
|
||||
int count_for_thread(int first_line, int count)
|
||||
{
|
||||
count = MIN(count, numa_end_y - first_line);
|
||||
int c = (count - skipped_by_thread(first_line) + num_cores - 1) / num_cores;
|
||||
return MAX(c, 0);
|
||||
}
|
||||
|
||||
struct Scanline
|
||||
{
|
||||
float W[MAXWIDTH];
|
||||
uint16_t U[MAXWIDTH];
|
||||
uint16_t V[MAXWIDTH];
|
||||
float WorldX[MAXWIDTH];
|
||||
float WorldY[MAXWIDTH];
|
||||
float WorldZ[MAXWIDTH];
|
||||
uint8_t vColorA[MAXWIDTH];
|
||||
uint8_t vColorR[MAXWIDTH];
|
||||
uint8_t vColorG[MAXWIDTH];
|
||||
uint8_t vColorB[MAXWIDTH];
|
||||
float GradientdistZ[MAXWIDTH];
|
||||
uint32_t FragColor[MAXWIDTH];
|
||||
uint16_t lightarray[MAXWIDTH];
|
||||
uint32_t dynlights[MAXWIDTH];
|
||||
uint8_t discard[MAXWIDTH];
|
||||
} scanline;
|
||||
|
||||
static PolyTriangleThreadData *Get(DrawerThread *thread);
|
||||
|
||||
int dest_pitch = 0;
|
||||
int dest_width = 0;
|
||||
int dest_height = 0;
|
||||
bool dest_bgra = false;
|
||||
uint8_t *dest = nullptr;
|
||||
PolyDepthStencil *depthstencil = nullptr;
|
||||
bool topdown = true;
|
||||
|
||||
float depthbias = 0.0f;
|
||||
|
||||
int viewport_y = 0;
|
||||
|
||||
struct ClipRect
|
||||
{
|
||||
int left = 0;
|
||||
int top = 0;
|
||||
int right = 0;
|
||||
int bottom = 0;
|
||||
} clip, scissor;
|
||||
|
||||
FRenderStyle RenderStyle;
|
||||
int SpecialEffect = EFF_NONE;
|
||||
int EffectState = 0;
|
||||
bool AlphaTest = false;
|
||||
uint32_t AlphaThreshold = 0x7f000000;
|
||||
const PolyPushConstants* PushConstants = nullptr;
|
||||
|
||||
const void *vertices = nullptr;
|
||||
const unsigned int *elements = nullptr;
|
||||
const FVector4 *lights = nullptr;
|
||||
|
||||
enum { maxPolyLights = 16 };
|
||||
PolyLight polyLights[maxPolyLights];
|
||||
int numPolyLights = 0;
|
||||
|
||||
PolyMainVertexShader mainVertexShader;
|
||||
|
||||
struct TextureUnit
|
||||
{
|
||||
const void* pixels = nullptr;
|
||||
int width = 0;
|
||||
int height = 0;
|
||||
bool bgra = true;
|
||||
} textures[16];
|
||||
|
||||
bool DepthTest = false;
|
||||
bool StencilTest = true;
|
||||
bool WriteStencil = true;
|
||||
bool WriteColor = true;
|
||||
bool WriteDepth = true;
|
||||
uint8_t StencilTestValue = 0;
|
||||
uint8_t StencilWriteValue = 0;
|
||||
|
||||
void (*FragmentShader)(int x0, int x1, PolyTriangleThreadData* thread) = nullptr;
|
||||
void (*WriteColorFunc)(int y, int x0, int x1, PolyTriangleThreadData* thread) = nullptr;
|
||||
|
||||
private:
|
||||
ShadedTriVertex ShadeVertex(int index);
|
||||
void DrawShadedPoint(const ShadedTriVertex *const* vertex);
|
||||
void DrawShadedLine(const ShadedTriVertex *const* vertices);
|
||||
void DrawShadedTriangle(const ShadedTriVertex *const* vertices, bool ccw);
|
||||
static bool IsDegenerate(const ShadedTriVertex *const* vertices);
|
||||
static bool IsFrontfacing(TriDrawTriangleArgs *args);
|
||||
|
||||
int ClipEdge(const ShadedTriVertex *const* verts);
|
||||
|
||||
int viewport_x = 0;
|
||||
int viewport_width = 0;
|
||||
int viewport_height = 0;
|
||||
bool ccw = true;
|
||||
bool twosided = true;
|
||||
PolyInputAssembly *inputAssembly = nullptr;
|
||||
|
||||
enum { max_additional_vertices = 16 };
|
||||
float weightsbuffer[max_additional_vertices * 3 * 2];
|
||||
float *weights = nullptr;
|
||||
};
|
||||
|
||||
class PolyDrawerCommand : public DrawerCommand
|
||||
{
|
||||
public:
|
||||
};
|
||||
|
||||
class PolySetDepthClampCommand : public PolyDrawerCommand
|
||||
{
|
||||
public:
|
||||
PolySetDepthClampCommand(bool on) : on(on) { }
|
||||
void Execute(DrawerThread *thread) override { PolyTriangleThreadData::Get(thread)->SetDepthClamp(on); }
|
||||
|
||||
private:
|
||||
bool on;
|
||||
};
|
||||
|
||||
class PolySetDepthMaskCommand : public PolyDrawerCommand
|
||||
{
|
||||
public:
|
||||
PolySetDepthMaskCommand(bool on) : on(on) { }
|
||||
void Execute(DrawerThread *thread) override { PolyTriangleThreadData::Get(thread)->SetDepthMask(on); }
|
||||
|
||||
private:
|
||||
bool on;
|
||||
};
|
||||
|
||||
class PolySetDepthFuncCommand : public PolyDrawerCommand
|
||||
{
|
||||
public:
|
||||
PolySetDepthFuncCommand(int func) : func(func) { }
|
||||
void Execute(DrawerThread *thread) override { PolyTriangleThreadData::Get(thread)->SetDepthFunc(func); }
|
||||
|
||||
private:
|
||||
int func;
|
||||
};
|
||||
|
||||
class PolySetDepthRangeCommand : public PolyDrawerCommand
|
||||
{
|
||||
public:
|
||||
PolySetDepthRangeCommand(float min, float max) : min(min), max(max) { }
|
||||
void Execute(DrawerThread *thread) override { PolyTriangleThreadData::Get(thread)->SetDepthRange(min, max); }
|
||||
|
||||
private:
|
||||
float min;
|
||||
float max;
|
||||
};
|
||||
|
||||
class PolySetDepthBiasCommand : public PolyDrawerCommand
|
||||
{
|
||||
public:
|
||||
PolySetDepthBiasCommand(float depthBiasConstantFactor, float depthBiasSlopeFactor) : depthBiasConstantFactor(depthBiasConstantFactor), depthBiasSlopeFactor(depthBiasSlopeFactor) { }
|
||||
void Execute(DrawerThread *thread) override { PolyTriangleThreadData::Get(thread)->SetDepthBias(depthBiasConstantFactor, depthBiasSlopeFactor); }
|
||||
|
||||
private:
|
||||
float depthBiasConstantFactor;
|
||||
float depthBiasSlopeFactor;
|
||||
};
|
||||
|
||||
class PolySetColorMaskCommand : public PolyDrawerCommand
|
||||
{
|
||||
public:
|
||||
PolySetColorMaskCommand(bool r, bool g, bool b, bool a) : r(r), g(g), b(b), a(a) { }
|
||||
void Execute(DrawerThread *thread) override { PolyTriangleThreadData::Get(thread)->SetColorMask(r, g, b, a); }
|
||||
|
||||
private:
|
||||
bool r;
|
||||
bool g;
|
||||
bool b;
|
||||
bool a;
|
||||
};
|
||||
|
||||
class PolySetStencilCommand : public PolyDrawerCommand
|
||||
{
|
||||
public:
|
||||
PolySetStencilCommand(int stencilRef, int op) : stencilRef(stencilRef), op(op) { }
|
||||
void Execute(DrawerThread *thread) override { PolyTriangleThreadData::Get(thread)->SetStencil(stencilRef, op); }
|
||||
|
||||
private:
|
||||
int stencilRef;
|
||||
int op;
|
||||
};
|
||||
|
||||
class PolySetCullingCommand : public PolyDrawerCommand
|
||||
{
|
||||
public:
|
||||
PolySetCullingCommand(int mode) : mode(mode) { }
|
||||
void Execute(DrawerThread *thread) override { PolyTriangleThreadData::Get(thread)->SetCulling(mode); }
|
||||
|
||||
private:
|
||||
int mode;
|
||||
};
|
||||
|
||||
class PolyEnableStencilCommand : public PolyDrawerCommand
|
||||
{
|
||||
public:
|
||||
PolyEnableStencilCommand(bool on) : on(on) { }
|
||||
void Execute(DrawerThread *thread) override { PolyTriangleThreadData::Get(thread)->EnableStencil(on); }
|
||||
|
||||
private:
|
||||
bool on;
|
||||
};
|
||||
|
||||
class PolySetScissorCommand : public PolyDrawerCommand
|
||||
{
|
||||
public:
|
||||
PolySetScissorCommand(int x, int y, int w, int h) : x(x), y(y), w(w), h(h) { }
|
||||
void Execute(DrawerThread *thread) override { PolyTriangleThreadData::Get(thread)->SetScissor(x, y, w, h); }
|
||||
|
||||
private:
|
||||
int x;
|
||||
int y;
|
||||
int w;
|
||||
int h;
|
||||
};
|
||||
|
||||
class PolySetRenderStyleCommand : public PolyDrawerCommand
|
||||
{
|
||||
public:
|
||||
PolySetRenderStyleCommand(FRenderStyle style) : style(style) { }
|
||||
void Execute(DrawerThread *thread) override { PolyTriangleThreadData::Get(thread)->SetRenderStyle(style); }
|
||||
|
||||
private:
|
||||
FRenderStyle style;
|
||||
};
|
||||
|
||||
class PolySetTextureCommand : public PolyDrawerCommand
|
||||
{
|
||||
public:
|
||||
PolySetTextureCommand(int unit, void *pixels, int width, int height, bool bgra) : unit(unit), pixels(pixels), width(width), height(height), bgra(bgra) { }
|
||||
void Execute(DrawerThread *thread) override { PolyTriangleThreadData::Get(thread)->SetTexture(unit, pixels, width, height, bgra); }
|
||||
|
||||
private:
|
||||
int unit;
|
||||
void *pixels;
|
||||
int width;
|
||||
int height;
|
||||
bool bgra;
|
||||
};
|
||||
|
||||
class PolySetShaderCommand : public PolyDrawerCommand
|
||||
{
|
||||
public:
|
||||
PolySetShaderCommand(int specialEffect, int effectState, bool alphaTest) : specialEffect(specialEffect), effectState(effectState), alphaTest(alphaTest) { }
|
||||
void Execute(DrawerThread *thread) override { PolyTriangleThreadData::Get(thread)->SetShader(specialEffect, effectState, alphaTest); }
|
||||
|
||||
private:
|
||||
int specialEffect;
|
||||
int effectState;
|
||||
bool alphaTest;
|
||||
};
|
||||
|
||||
class PolySetVertexBufferCommand : public PolyDrawerCommand
|
||||
{
|
||||
public:
|
||||
PolySetVertexBufferCommand(const void *vertices) : vertices(vertices) { }
|
||||
void Execute(DrawerThread *thread) override { PolyTriangleThreadData::Get(thread)->SetVertexBuffer(vertices); }
|
||||
|
||||
private:
|
||||
const void *vertices;
|
||||
};
|
||||
|
||||
class PolySetIndexBufferCommand : public PolyDrawerCommand
|
||||
{
|
||||
public:
|
||||
PolySetIndexBufferCommand(const void *indices) : indices(indices) { }
|
||||
void Execute(DrawerThread *thread) override { PolyTriangleThreadData::Get(thread)->SetIndexBuffer(indices); }
|
||||
|
||||
private:
|
||||
const void *indices;
|
||||
};
|
||||
|
||||
class PolySetLightBufferCommand : public PolyDrawerCommand
|
||||
{
|
||||
public:
|
||||
PolySetLightBufferCommand(const void *lights) : lights(lights) { }
|
||||
void Execute(DrawerThread *thread) override { PolyTriangleThreadData::Get(thread)->SetLightBuffer(lights); }
|
||||
|
||||
private:
|
||||
const void *lights;
|
||||
};
|
||||
|
||||
class PolySetInputAssemblyCommand : public PolyDrawerCommand
|
||||
{
|
||||
public:
|
||||
PolySetInputAssemblyCommand(PolyInputAssembly *input) : input(input) { }
|
||||
void Execute(DrawerThread *thread) override { PolyTriangleThreadData::Get(thread)->SetInputAssembly(input); }
|
||||
|
||||
private:
|
||||
PolyInputAssembly *input;
|
||||
};
|
||||
|
||||
class PolyClearDepthCommand : public PolyDrawerCommand
|
||||
{
|
||||
public:
|
||||
PolyClearDepthCommand(float value) : value(value) { }
|
||||
void Execute(DrawerThread *thread) override { PolyTriangleThreadData::Get(thread)->ClearDepth(value); }
|
||||
|
||||
private:
|
||||
float value;
|
||||
};
|
||||
|
||||
class PolyClearStencilCommand : public PolyDrawerCommand
|
||||
{
|
||||
public:
|
||||
PolyClearStencilCommand(uint8_t value) : value(value) { }
|
||||
void Execute(DrawerThread *thread) override { PolyTriangleThreadData::Get(thread)->ClearStencil(value); }
|
||||
|
||||
private:
|
||||
uint8_t value;
|
||||
};
|
||||
|
||||
class PolySetViewportCommand : public PolyDrawerCommand
|
||||
{
|
||||
public:
|
||||
PolySetViewportCommand(int x, int y, int width, int height, uint8_t *dest, int dest_width, int dest_height, int dest_pitch, bool dest_bgra, PolyDepthStencil *depthstencil, bool topdown)
|
||||
: x(x), y(y), width(width), height(height), dest(dest), dest_width(dest_width), dest_height(dest_height), dest_pitch(dest_pitch), dest_bgra(dest_bgra), depthstencil(depthstencil), topdown(topdown) { }
|
||||
void Execute(DrawerThread *thread) override { PolyTriangleThreadData::Get(thread)->SetViewport(x, y, width, height, dest, dest_width, dest_height, dest_pitch, dest_bgra, depthstencil, topdown); }
|
||||
|
||||
private:
|
||||
int x;
|
||||
int y;
|
||||
int width;
|
||||
int height;
|
||||
uint8_t *dest;
|
||||
int dest_width;
|
||||
int dest_height;
|
||||
int dest_pitch;
|
||||
bool dest_bgra;
|
||||
PolyDepthStencil *depthstencil;
|
||||
bool topdown;
|
||||
};
|
||||
|
||||
class PolySetViewpointUniformsCommand : public PolyDrawerCommand
|
||||
{
|
||||
public:
|
||||
PolySetViewpointUniformsCommand(const HWViewpointUniforms *uniforms) : uniforms(uniforms) {}
|
||||
void Execute(DrawerThread *thread) override { PolyTriangleThreadData::Get(thread)->SetViewpointUniforms(uniforms); }
|
||||
|
||||
private:
|
||||
const HWViewpointUniforms *uniforms;
|
||||
};
|
||||
|
||||
class PolyPushMatricesCommand : public PolyDrawerCommand
|
||||
{
|
||||
public:
|
||||
PolyPushMatricesCommand(const VSMatrix &modelMatrix, const VSMatrix &normalModelMatrix, const VSMatrix &textureMatrix)
|
||||
: modelMatrix(modelMatrix), normalModelMatrix(normalModelMatrix), textureMatrix(textureMatrix) { }
|
||||
void Execute(DrawerThread *thread) override { PolyTriangleThreadData::Get(thread)->PushMatrices(modelMatrix, normalModelMatrix, textureMatrix); }
|
||||
|
||||
private:
|
||||
VSMatrix modelMatrix;
|
||||
VSMatrix normalModelMatrix;
|
||||
VSMatrix textureMatrix;
|
||||
};
|
||||
|
||||
class PolyPushStreamDataCommand : public PolyDrawerCommand
|
||||
{
|
||||
public:
|
||||
PolyPushStreamDataCommand(const StreamData &data, const PolyPushConstants &constants) : data(data), constants(constants) { }
|
||||
void Execute(DrawerThread *thread) override { PolyTriangleThreadData::Get(thread)->PushStreamData(data, constants); }
|
||||
|
||||
private:
|
||||
StreamData data;
|
||||
PolyPushConstants constants;
|
||||
};
|
||||
|
||||
class PolyDrawCommand : public PolyDrawerCommand
|
||||
{
|
||||
public:
|
||||
PolyDrawCommand(int index, int count, PolyDrawMode mode) : index(index), count(count), mode(mode) { }
|
||||
void Execute(DrawerThread *thread) override { PolyTriangleThreadData::Get(thread)->Draw(index, count, mode); }
|
||||
|
||||
private:
|
||||
int index;
|
||||
int count;
|
||||
PolyDrawMode mode;
|
||||
};
|
||||
|
||||
class PolyDrawIndexedCommand : public PolyDrawerCommand
|
||||
{
|
||||
public:
|
||||
PolyDrawIndexedCommand(int index, int count, PolyDrawMode mode) : index(index), count(count), mode(mode) { }
|
||||
void Execute(DrawerThread *thread) override { PolyTriangleThreadData::Get(thread)->DrawIndexed(index, count, mode); }
|
||||
|
||||
private:
|
||||
int index;
|
||||
int count;
|
||||
PolyDrawMode mode;
|
||||
};
|
||||
|
|
575
src/rendering/polyrenderer/drawers/screen_blend.cpp
Normal file
575
src/rendering/polyrenderer/drawers/screen_blend.cpp
Normal file
|
@ -0,0 +1,575 @@
|
|||
/*
|
||||
** Polygon Doom software renderer
|
||||
** Copyright (c) 2016 Magnus Norddahl
|
||||
**
|
||||
** This software is provided 'as-is', without any express or implied
|
||||
** warranty. In no event will the authors be held liable for any damages
|
||||
** arising from the use of this software.
|
||||
**
|
||||
** Permission is granted to anyone to use this software for any purpose,
|
||||
** including commercial applications, and to alter it and redistribute it
|
||||
** freely, subject to the following restrictions:
|
||||
**
|
||||
** 1. The origin of this software must not be misrepresented; you must not
|
||||
** claim that you wrote the original software. If you use this software
|
||||
** in a product, an acknowledgment in the product documentation would be
|
||||
** appreciated but is not required.
|
||||
** 2. Altered source versions must be plainly marked as such, and must not be
|
||||
** misrepresented as being the original software.
|
||||
** 3. This notice may not be removed or altered from any source distribution.
|
||||
**
|
||||
*/
|
||||
|
||||
#include "screen_blend.h"
|
||||
|
||||
static const int shiftTable[] = {
|
||||
0, 0, 0, 0, // STYLEALPHA_Zero
|
||||
0, 0, 0, 0, // STYLEALPHA_One
|
||||
24, 24, 24, 24, // STYLEALPHA_Src
|
||||
24, 24, 24, 24, // STYLEALPHA_InvSrc
|
||||
24, 16, 8, 0, // STYLEALPHA_SrcCol
|
||||
24, 16, 8, 0, // STYLEALPHA_InvSrcCol
|
||||
24, 16, 8, 0, // STYLEALPHA_DstCol
|
||||
24, 16, 8, 0 // STYLEALPHA_InvDstCol
|
||||
};
|
||||
|
||||
#if 1 //#ifndef USE_AVX2
|
||||
template<typename OptT>
|
||||
static void BlendColor(int y, int x0, int x1, PolyTriangleThreadData* thread)
|
||||
{
|
||||
FRenderStyle style = thread->RenderStyle;
|
||||
|
||||
bool invsrc = style.SrcAlpha & 1;
|
||||
bool invdst = style.DestAlpha & 1;
|
||||
|
||||
const int* shiftsrc = shiftTable + (style.SrcAlpha << 2);
|
||||
const int* shiftdst = shiftTable + (style.DestAlpha << 2);
|
||||
|
||||
uint32_t* dest = (uint32_t*)thread->dest;
|
||||
uint32_t* line = dest + y * (ptrdiff_t)thread->dest_pitch;
|
||||
uint32_t* fragcolor = thread->scanline.FragColor;
|
||||
|
||||
int srcSelect = style.SrcAlpha <= STYLEALPHA_One ? 0 : (style.SrcAlpha >= STYLEALPHA_DstCol ? 1 : 2);
|
||||
int dstSelect = style.DestAlpha <= STYLEALPHA_One ? 0 : (style.DestAlpha >= STYLEALPHA_DstCol ? 1 : 2);
|
||||
|
||||
uint32_t inputs[3];
|
||||
inputs[0] = 0;
|
||||
|
||||
for (int x = x0; x < x1; x++)
|
||||
{
|
||||
inputs[1] = line[x];
|
||||
inputs[2] = fragcolor[x];
|
||||
|
||||
uint32_t srcinput = inputs[srcSelect];
|
||||
uint32_t dstinput = inputs[dstSelect];
|
||||
|
||||
uint32_t out[4];
|
||||
for (int i = 0; i < 4; i++)
|
||||
{
|
||||
// Grab component for scale factors
|
||||
int32_t src = (srcinput >> shiftsrc[i]) & 0xff;
|
||||
int32_t dst = (dstinput >> shiftdst[i]) & 0xff;
|
||||
|
||||
// Inverse if needed
|
||||
if (invsrc) src = 0xff - src;
|
||||
if (invdst) dst = 0xff - dst;
|
||||
|
||||
// Rescale 0-255 to 0-256
|
||||
src = src + (src >> 7);
|
||||
dst = dst + (dst >> 7);
|
||||
|
||||
// Multiply with input
|
||||
src = src * ((inputs[2] >> (24 - (i << 3))) & 0xff);
|
||||
dst = dst * ((inputs[1] >> (24 - (i << 3))) & 0xff);
|
||||
|
||||
// Apply blend operator
|
||||
int32_t val;
|
||||
if (OptT::Flags & SWBLEND_Sub)
|
||||
{
|
||||
val = src - dst;
|
||||
}
|
||||
else if (OptT::Flags & SWBLEND_RevSub)
|
||||
{
|
||||
val = dst - src;
|
||||
}
|
||||
else
|
||||
{
|
||||
val = src + dst;
|
||||
}
|
||||
out[i] = clamp((val + 127) >> 8, 0, 255);
|
||||
}
|
||||
|
||||
line[x] = MAKEARGB(out[0], out[1], out[2], out[3]);
|
||||
}
|
||||
}
|
||||
#else
|
||||
template<typename OptT>
|
||||
static void BlendColor(int y, int x0, int x1, PolyTriangleThreadData* thread)
|
||||
{
|
||||
FRenderStyle style = thread->RenderStyle;
|
||||
|
||||
bool invsrc = style.SrcAlpha & 1;
|
||||
bool invdst = style.DestAlpha & 1;
|
||||
|
||||
__m128i shiftsrc = _mm_loadu_si128((const __m128i*)(shiftTable + (style.SrcAlpha << 2)));
|
||||
__m128i shiftdst = _mm_loadu_si128((const __m128i*)(shiftTable + (style.DestAlpha << 2)));
|
||||
|
||||
uint32_t* dest = (uint32_t*)thread->dest;
|
||||
uint32_t* line = dest + y * (ptrdiff_t)thread->dest_pitch;
|
||||
uint32_t* fragcolor = thread->scanline.FragColor;
|
||||
|
||||
int srcSelect = style.SrcAlpha <= STYLEALPHA_One ? 0 : (style.SrcAlpha >= STYLEALPHA_DstCol ? 1 : 2);
|
||||
int dstSelect = style.DestAlpha <= STYLEALPHA_One ? 0 : (style.DestAlpha >= STYLEALPHA_DstCol ? 1 : 2);
|
||||
|
||||
uint32_t inputs[3];
|
||||
inputs[0] = 0;
|
||||
|
||||
__m128i shiftmul = _mm_set_epi32(24, 16, 8, 0);
|
||||
|
||||
for (int x = x0; x < x1; x++)
|
||||
{
|
||||
inputs[1] = line[x];
|
||||
inputs[2] = fragcolor[x];
|
||||
|
||||
__m128i srcinput = _mm_set1_epi32(inputs[srcSelect]);
|
||||
__m128i dstinput = _mm_set1_epi32(inputs[dstSelect]);
|
||||
|
||||
// Grab component for scale factors
|
||||
__m128i src = _mm_and_si128(_mm_srlv_epi32(srcinput, shiftsrc), _mm_set1_epi32(0xff));
|
||||
__m128i dst = _mm_and_si128(_mm_srlv_epi32(dstinput, shiftdst), _mm_set1_epi32(0xff));
|
||||
|
||||
// Inverse if needed
|
||||
if (invsrc) src = _mm_sub_epi32(_mm_set1_epi32(0xff), src);
|
||||
if (invdst) dst = _mm_sub_epi32(_mm_set1_epi32(0xff), dst);
|
||||
|
||||
// Rescale 0-255 to 0-256
|
||||
src = _mm_add_epi32(src, _mm_srli_epi32(src, 7));
|
||||
dst = _mm_add_epi32(dst, _mm_srli_epi32(dst, 7));
|
||||
|
||||
// Multiply with input
|
||||
__m128i mulsrc = _mm_and_si128(_mm_srlv_epi32(_mm_set1_epi32(inputs[2]), shiftmul), _mm_set1_epi32(0xff));
|
||||
__m128i muldst = _mm_and_si128(_mm_srlv_epi32(_mm_set1_epi32(inputs[1]), shiftmul), _mm_set1_epi32(0xff));
|
||||
__m128i mulresult = _mm_mullo_epi16(_mm_packs_epi32(src, dst), _mm_packs_epi32(mulsrc, muldst));
|
||||
src = _mm_unpacklo_epi16(mulresult, _mm_setzero_si128());
|
||||
dst = _mm_unpackhi_epi16(mulresult, _mm_setzero_si128());
|
||||
|
||||
// Apply blend operator
|
||||
__m128i val;
|
||||
if (OptT::Flags & SWBLEND_Sub)
|
||||
{
|
||||
val = _mm_sub_epi32(src, dst);
|
||||
}
|
||||
else if (OptT::Flags & SWBLEND_RevSub)
|
||||
{
|
||||
val = _mm_sub_epi32(dst, src);
|
||||
}
|
||||
else
|
||||
{
|
||||
val = _mm_add_epi32(src, dst);
|
||||
}
|
||||
|
||||
__m128i out = _mm_srli_epi32(_mm_add_epi32(val, _mm_set1_epi32(127)), 8);
|
||||
out = _mm_packs_epi32(out, out);
|
||||
out = _mm_packus_epi16(out, out);
|
||||
line[x] = _mm_cvtsi128_si32(out);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef NO_SSE
|
||||
static void BlendColorOpaque(int y, int x0, int x1, PolyTriangleThreadData* thread)
|
||||
{
|
||||
uint32_t* dest = (uint32_t*)thread->dest;
|
||||
uint32_t* line = dest + y * (ptrdiff_t)thread->dest_pitch;
|
||||
uint32_t* fragcolor = thread->scanline.FragColor;
|
||||
|
||||
memcpy(line + x0, fragcolor + x0, (x1 - x0) * sizeof(uint32_t));
|
||||
}
|
||||
#else
|
||||
static void BlendColorOpaque(int y, int x0, int x1, PolyTriangleThreadData* thread)
|
||||
{
|
||||
uint32_t* dest = (uint32_t*)thread->dest;
|
||||
uint32_t* line = dest + y * (ptrdiff_t)thread->dest_pitch;
|
||||
uint32_t* fragcolor = thread->scanline.FragColor;
|
||||
|
||||
int ssecount = ((x1 - x0) & ~3);
|
||||
int sseend = x0 + ssecount;
|
||||
|
||||
for (int x = x0; x < sseend; x += 4)
|
||||
{
|
||||
__m128i v = _mm_loadu_si128((__m128i*) & fragcolor[x]);
|
||||
_mm_storeu_si128((__m128i*) & line[x], v);
|
||||
}
|
||||
|
||||
for (int x = sseend; x < x1; x++)
|
||||
{
|
||||
line[x] = fragcolor[x];
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
static void BlendColorAdd_Src_InvSrc(int y, int x0, int x1, PolyTriangleThreadData* thread)
|
||||
{
|
||||
uint32_t* line = (uint32_t*)thread->dest + y * (ptrdiff_t)thread->dest_pitch;
|
||||
uint32_t* fragcolor = thread->scanline.FragColor;
|
||||
|
||||
int sseend = x0;
|
||||
|
||||
#ifndef NO_SSE
|
||||
int ssecount = ((x1 - x0) & ~1);
|
||||
sseend = x0 + ssecount;
|
||||
for (int x = x0; x < sseend; x += 2)
|
||||
{
|
||||
__m128i dst = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)&line[x]), _mm_setzero_si128());
|
||||
__m128i src = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)&fragcolor[x]), _mm_setzero_si128());
|
||||
|
||||
__m128i srcscale = _mm_shufflehi_epi16(_mm_shufflelo_epi16(src, _MM_SHUFFLE(3, 3, 3, 3)), _MM_SHUFFLE(3, 3, 3, 3));
|
||||
srcscale = _mm_add_epi16(srcscale, _mm_srli_epi16(srcscale, 7));
|
||||
__m128i dstscale = _mm_sub_epi16(_mm_set1_epi16(256), srcscale);
|
||||
|
||||
__m128i out = _mm_srli_epi16(_mm_add_epi16(_mm_add_epi16(_mm_mullo_epi16(src, srcscale), _mm_mullo_epi16(dst, dstscale)), _mm_set1_epi16(127)), 8);
|
||||
_mm_storel_epi64((__m128i*)&line[x], _mm_packus_epi16(out, out));
|
||||
}
|
||||
#endif
|
||||
|
||||
for (int x = sseend; x < x1; x++)
|
||||
{
|
||||
uint32_t dst = line[x];
|
||||
uint32_t src = fragcolor[x];
|
||||
|
||||
uint32_t srcscale = APART(src);
|
||||
srcscale += srcscale >> 7;
|
||||
uint32_t dstscale = 256 - srcscale;
|
||||
|
||||
uint32_t a = ((APART(src) * srcscale + APART(dst) * dstscale) + 127) >> 8;
|
||||
uint32_t r = ((RPART(src) * srcscale + RPART(dst) * dstscale) + 127) >> 8;
|
||||
uint32_t g = ((GPART(src) * srcscale + GPART(dst) * dstscale) + 127) >> 8;
|
||||
uint32_t b = ((BPART(src) * srcscale + BPART(dst) * dstscale) + 127) >> 8;
|
||||
|
||||
line[x] = MAKEARGB(a, r, g, b);
|
||||
}
|
||||
}
|
||||
|
||||
static void BlendColorAdd_SrcCol_InvSrcCol(int y, int x0, int x1, PolyTriangleThreadData* thread)
|
||||
{
|
||||
uint32_t* line = (uint32_t*)thread->dest + y * (ptrdiff_t)thread->dest_pitch;
|
||||
uint32_t* fragcolor = thread->scanline.FragColor;
|
||||
|
||||
int sseend = x0;
|
||||
|
||||
#ifndef NO_SSE
|
||||
int ssecount = ((x1 - x0) & ~1);
|
||||
sseend = x0 + ssecount;
|
||||
for (int x = x0; x < sseend; x += 2)
|
||||
{
|
||||
__m128i dst = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*) & line[x]), _mm_setzero_si128());
|
||||
__m128i src = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*) & fragcolor[x]), _mm_setzero_si128());
|
||||
|
||||
__m128i srcscale = src;
|
||||
srcscale = _mm_add_epi16(srcscale, _mm_srli_epi16(srcscale, 7));
|
||||
__m128i dstscale = _mm_sub_epi16(_mm_set1_epi16(256), srcscale);
|
||||
|
||||
__m128i out = _mm_srli_epi16(_mm_add_epi16(_mm_add_epi16(_mm_mullo_epi16(src, srcscale), _mm_mullo_epi16(dst, dstscale)), _mm_set1_epi16(127)), 8);
|
||||
_mm_storel_epi64((__m128i*) & line[x], _mm_packus_epi16(out, out));
|
||||
}
|
||||
#endif
|
||||
|
||||
for (int x = sseend; x < x1; x++)
|
||||
{
|
||||
uint32_t dst = line[x];
|
||||
uint32_t src = fragcolor[x];
|
||||
|
||||
uint32_t srcscale_a = APART(src);
|
||||
uint32_t srcscale_r = RPART(src);
|
||||
uint32_t srcscale_g = GPART(src);
|
||||
uint32_t srcscale_b = BPART(src);
|
||||
srcscale_a += srcscale_a >> 7;
|
||||
srcscale_r += srcscale_r >> 7;
|
||||
srcscale_g += srcscale_g >> 7;
|
||||
srcscale_b += srcscale_b >> 7;
|
||||
uint32_t dstscale_a = 256 - srcscale_a;
|
||||
uint32_t dstscale_r = 256 - srcscale_r;
|
||||
uint32_t dstscale_g = 256 - srcscale_g;
|
||||
uint32_t dstscale_b = 256 - srcscale_b;
|
||||
|
||||
uint32_t a = ((APART(src) * srcscale_a + APART(dst) * dstscale_a) + 127) >> 8;
|
||||
uint32_t r = ((RPART(src) * srcscale_r + RPART(dst) * dstscale_r) + 127) >> 8;
|
||||
uint32_t g = ((GPART(src) * srcscale_g + GPART(dst) * dstscale_g) + 127) >> 8;
|
||||
uint32_t b = ((BPART(src) * srcscale_b + BPART(dst) * dstscale_b) + 127) >> 8;
|
||||
|
||||
line[x] = MAKEARGB(a, r, g, b);
|
||||
}
|
||||
}
|
||||
|
||||
static void BlendColorAdd_Src_One(int y, int x0, int x1, PolyTriangleThreadData* thread)
|
||||
{
|
||||
uint32_t* line = (uint32_t*)thread->dest + y * (ptrdiff_t)thread->dest_pitch;
|
||||
uint32_t* fragcolor = thread->scanline.FragColor;
|
||||
|
||||
int sseend = x0;
|
||||
|
||||
#ifndef NO_SSE
|
||||
int ssecount = ((x1 - x0) & ~1);
|
||||
sseend = x0 + ssecount;
|
||||
for (int x = x0; x < sseend; x += 2)
|
||||
{
|
||||
__m128i dst = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*) & line[x]), _mm_setzero_si128());
|
||||
__m128i src = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*) & fragcolor[x]), _mm_setzero_si128());
|
||||
|
||||
__m128i srcscale = _mm_shufflehi_epi16(_mm_shufflelo_epi16(src, _MM_SHUFFLE(3, 3, 3, 3)), _MM_SHUFFLE(3, 3, 3, 3));
|
||||
srcscale = _mm_add_epi16(srcscale, _mm_srli_epi16(srcscale, 7));
|
||||
|
||||
__m128i out = _mm_add_epi16(_mm_srli_epi16(_mm_add_epi16(_mm_mullo_epi16(src, srcscale), _mm_set1_epi16(127)), 8), dst);
|
||||
_mm_storel_epi64((__m128i*) & line[x], _mm_packus_epi16(out, out));
|
||||
}
|
||||
#endif
|
||||
|
||||
for (int x = sseend; x < x1; x++)
|
||||
{
|
||||
uint32_t dst = line[x];
|
||||
uint32_t src = fragcolor[x];
|
||||
|
||||
uint32_t srcscale = APART(src);
|
||||
srcscale += srcscale >> 7;
|
||||
|
||||
uint32_t a = MIN<int32_t>((((APART(src) * srcscale) + 127) >> 8) + APART(dst), 255);
|
||||
uint32_t r = MIN<int32_t>((((RPART(src) * srcscale) + 127) >> 8) + RPART(dst), 255);
|
||||
uint32_t g = MIN<int32_t>((((GPART(src) * srcscale) + 127) >> 8) + GPART(dst), 255);
|
||||
uint32_t b = MIN<int32_t>((((BPART(src) * srcscale) + 127) >> 8) + BPART(dst), 255);
|
||||
|
||||
line[x] = MAKEARGB(a, r, g, b);
|
||||
}
|
||||
}
|
||||
|
||||
static void BlendColorAdd_SrcCol_One(int y, int x0, int x1, PolyTriangleThreadData* thread)
|
||||
{
|
||||
uint32_t* line = (uint32_t*)thread->dest + y * (ptrdiff_t)thread->dest_pitch;
|
||||
uint32_t* fragcolor = thread->scanline.FragColor;
|
||||
|
||||
int sseend = x0;
|
||||
|
||||
#ifndef NO_SSE
|
||||
int ssecount = ((x1 - x0) & ~1);
|
||||
sseend = x0 + ssecount;
|
||||
for (int x = x0; x < sseend; x += 2)
|
||||
{
|
||||
__m128i dst = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*) & line[x]), _mm_setzero_si128());
|
||||
__m128i src = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*) & fragcolor[x]), _mm_setzero_si128());
|
||||
|
||||
__m128i srcscale = src;
|
||||
srcscale = _mm_add_epi16(srcscale, _mm_srli_epi16(srcscale, 7));
|
||||
|
||||
__m128i out = _mm_add_epi16(_mm_srli_epi16(_mm_add_epi16(_mm_mullo_epi16(src, srcscale), _mm_set1_epi16(127)), 8), dst);
|
||||
_mm_storel_epi64((__m128i*) & line[x], _mm_packus_epi16(out, out));
|
||||
}
|
||||
#endif
|
||||
|
||||
for (int x = sseend; x < x1; x++)
|
||||
{
|
||||
uint32_t dst = line[x];
|
||||
uint32_t src = fragcolor[x];
|
||||
|
||||
uint32_t srcscale_a = APART(src);
|
||||
uint32_t srcscale_r = RPART(src);
|
||||
uint32_t srcscale_g = GPART(src);
|
||||
uint32_t srcscale_b = BPART(src);
|
||||
srcscale_a += srcscale_a >> 7;
|
||||
srcscale_r += srcscale_r >> 7;
|
||||
srcscale_g += srcscale_g >> 7;
|
||||
srcscale_b += srcscale_b >> 7;
|
||||
|
||||
uint32_t a = MIN<int32_t>((((APART(src) * srcscale_a) + 127) >> 8) + APART(dst), 255);
|
||||
uint32_t r = MIN<int32_t>((((RPART(src) * srcscale_r) + 127) >> 8) + RPART(dst), 255);
|
||||
uint32_t g = MIN<int32_t>((((GPART(src) * srcscale_g) + 127) >> 8) + GPART(dst), 255);
|
||||
uint32_t b = MIN<int32_t>((((BPART(src) * srcscale_b) + 127) >> 8) + BPART(dst), 255);
|
||||
|
||||
line[x] = MAKEARGB(a, r, g, b);
|
||||
}
|
||||
}
|
||||
|
||||
static void BlendColorAdd_DstCol_Zero(int y, int x0, int x1, PolyTriangleThreadData* thread)
|
||||
{
|
||||
uint32_t* line = (uint32_t*)thread->dest + y * (ptrdiff_t)thread->dest_pitch;
|
||||
uint32_t* fragcolor = thread->scanline.FragColor;
|
||||
|
||||
int sseend = x0;
|
||||
|
||||
#ifndef NO_SSE
|
||||
int ssecount = ((x1 - x0) & ~1);
|
||||
sseend = x0 + ssecount;
|
||||
for (int x = x0; x < sseend; x += 2)
|
||||
{
|
||||
__m128i dst = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*) & line[x]), _mm_setzero_si128());
|
||||
__m128i src = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*) & fragcolor[x]), _mm_setzero_si128());
|
||||
|
||||
__m128i srcscale = dst;
|
||||
srcscale = _mm_add_epi16(srcscale, _mm_srli_epi16(srcscale, 7));
|
||||
|
||||
__m128i out = _mm_srli_epi16(_mm_add_epi16(_mm_mullo_epi16(src, srcscale), _mm_set1_epi16(127)), 8);
|
||||
_mm_storel_epi64((__m128i*) & line[x], _mm_packus_epi16(out, out));
|
||||
}
|
||||
#endif
|
||||
|
||||
for (int x = sseend; x < x1; x++)
|
||||
{
|
||||
uint32_t dst = line[x];
|
||||
uint32_t src = fragcolor[x];
|
||||
|
||||
uint32_t srcscale_a = APART(dst);
|
||||
uint32_t srcscale_r = RPART(dst);
|
||||
uint32_t srcscale_g = GPART(dst);
|
||||
uint32_t srcscale_b = BPART(dst);
|
||||
srcscale_a += srcscale_a >> 7;
|
||||
srcscale_r += srcscale_r >> 7;
|
||||
srcscale_g += srcscale_g >> 7;
|
||||
srcscale_b += srcscale_b >> 7;
|
||||
|
||||
uint32_t a = (((APART(src) * srcscale_a) + 127) >> 8);
|
||||
uint32_t r = (((RPART(src) * srcscale_r) + 127) >> 8);
|
||||
uint32_t g = (((GPART(src) * srcscale_g) + 127) >> 8);
|
||||
uint32_t b = (((BPART(src) * srcscale_b) + 127) >> 8);
|
||||
|
||||
line[x] = MAKEARGB(a, r, g, b);
|
||||
}
|
||||
}
|
||||
|
||||
static void BlendColorAdd_InvDstCol_Zero(int y, int x0, int x1, PolyTriangleThreadData* thread)
|
||||
{
|
||||
uint32_t* line = (uint32_t*)thread->dest + y * (ptrdiff_t)thread->dest_pitch;
|
||||
uint32_t* fragcolor = thread->scanline.FragColor;
|
||||
|
||||
int sseend = x0;
|
||||
|
||||
#ifndef NO_SSE
|
||||
int ssecount = ((x1 - x0) & ~1);
|
||||
sseend = x0 + ssecount;
|
||||
for (int x = x0; x < sseend; x += 2)
|
||||
{
|
||||
__m128i dst = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*) & line[x]), _mm_setzero_si128());
|
||||
__m128i src = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*) & fragcolor[x]), _mm_setzero_si128());
|
||||
|
||||
__m128i srcscale = _mm_sub_epi16(_mm_set1_epi16(255), dst);
|
||||
srcscale = _mm_add_epi16(srcscale, _mm_srli_epi16(srcscale, 7));
|
||||
|
||||
__m128i out = _mm_srli_epi16(_mm_add_epi16(_mm_mullo_epi16(src, srcscale), _mm_set1_epi16(127)), 8);
|
||||
_mm_storel_epi64((__m128i*) & line[x], _mm_packus_epi16(out, out));
|
||||
}
|
||||
#endif
|
||||
|
||||
for (int x = sseend; x < x1; x++)
|
||||
{
|
||||
uint32_t dst = line[x];
|
||||
uint32_t src = fragcolor[x];
|
||||
|
||||
uint32_t srcscale_a = 255 - APART(dst);
|
||||
uint32_t srcscale_r = 255 - RPART(dst);
|
||||
uint32_t srcscale_g = 255 - GPART(dst);
|
||||
uint32_t srcscale_b = 255 - BPART(dst);
|
||||
srcscale_a += srcscale_a >> 7;
|
||||
srcscale_r += srcscale_r >> 7;
|
||||
srcscale_g += srcscale_g >> 7;
|
||||
srcscale_b += srcscale_b >> 7;
|
||||
|
||||
uint32_t a = (((APART(src) * srcscale_a) + 127) >> 8);
|
||||
uint32_t r = (((RPART(src) * srcscale_r) + 127) >> 8);
|
||||
uint32_t g = (((GPART(src) * srcscale_g) + 127) >> 8);
|
||||
uint32_t b = (((BPART(src) * srcscale_b) + 127) >> 8);
|
||||
|
||||
line[x] = MAKEARGB(a, r, g, b);
|
||||
}
|
||||
}
|
||||
|
||||
static void BlendColorRevSub_Src_One(int y, int x0, int x1, PolyTriangleThreadData* thread)
|
||||
{
|
||||
uint32_t* line = (uint32_t*)thread->dest + y * (ptrdiff_t)thread->dest_pitch;
|
||||
uint32_t* fragcolor = thread->scanline.FragColor;
|
||||
|
||||
int sseend = x0;
|
||||
|
||||
#ifndef NO_SSE
|
||||
int ssecount = ((x1 - x0) & ~1);
|
||||
sseend = x0 + ssecount;
|
||||
for (int x = x0; x < sseend; x += 2)
|
||||
{
|
||||
__m128i dst = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*) & line[x]), _mm_setzero_si128());
|
||||
__m128i src = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*) & fragcolor[x]), _mm_setzero_si128());
|
||||
|
||||
__m128i srcscale = _mm_shufflehi_epi16(_mm_shufflelo_epi16(src, _MM_SHUFFLE(3, 3, 3, 3)), _MM_SHUFFLE(3, 3, 3, 3));
|
||||
srcscale = _mm_add_epi16(srcscale, _mm_srli_epi16(srcscale, 7));
|
||||
|
||||
__m128i out = _mm_sub_epi16(dst, _mm_srli_epi16(_mm_add_epi16(_mm_mullo_epi16(src, srcscale), _mm_set1_epi16(127)), 8));
|
||||
_mm_storel_epi64((__m128i*) & line[x], _mm_packus_epi16(out, out));
|
||||
}
|
||||
#endif
|
||||
|
||||
for (int x = sseend; x < x1; x++)
|
||||
{
|
||||
uint32_t dst = line[x];
|
||||
uint32_t src = fragcolor[x];
|
||||
|
||||
uint32_t srcscale = APART(src);
|
||||
srcscale += srcscale >> 7;
|
||||
|
||||
uint32_t a = MAX<int32_t>(APART(dst) - (((APART(src) * srcscale) + 127) >> 8), 0);
|
||||
uint32_t r = MAX<int32_t>(RPART(dst) - (((RPART(src) * srcscale) + 127) >> 8), 0);
|
||||
uint32_t g = MAX<int32_t>(GPART(dst) - (((GPART(src) * srcscale) + 127) >> 8), 0);
|
||||
uint32_t b = MAX<int32_t>(BPART(dst) - (((BPART(src) * srcscale) + 127) >> 8), 0);
|
||||
|
||||
line[x] = MAKEARGB(a, r, g, b);
|
||||
}
|
||||
}
|
||||
|
||||
void SelectWriteColorFunc(PolyTriangleThreadData* thread)
|
||||
{
|
||||
FRenderStyle style = thread->RenderStyle;
|
||||
if (style.BlendOp == STYLEOP_Add)
|
||||
{
|
||||
if (style.SrcAlpha == STYLEALPHA_One && style.DestAlpha == STYLEALPHA_Zero)
|
||||
{
|
||||
thread->WriteColorFunc = &BlendColorOpaque;
|
||||
}
|
||||
else if (style.SrcAlpha == STYLEALPHA_Src && style.DestAlpha == STYLEALPHA_InvSrc)
|
||||
{
|
||||
thread->WriteColorFunc = &BlendColorAdd_Src_InvSrc;
|
||||
}
|
||||
else if (style.SrcAlpha == STYLEALPHA_SrcCol && style.DestAlpha == STYLEALPHA_InvSrcCol)
|
||||
{
|
||||
thread->WriteColorFunc = &BlendColorAdd_SrcCol_InvSrcCol;
|
||||
}
|
||||
else if (style.SrcAlpha == STYLEALPHA_Src && style.DestAlpha == STYLEALPHA_One)
|
||||
{
|
||||
thread->WriteColorFunc = &BlendColorAdd_Src_One;
|
||||
}
|
||||
else if (style.SrcAlpha == STYLEALPHA_SrcCol && style.DestAlpha == STYLEALPHA_One)
|
||||
{
|
||||
thread->WriteColorFunc = &BlendColorAdd_SrcCol_One;
|
||||
}
|
||||
else if (style.SrcAlpha == STYLEALPHA_DstCol && style.DestAlpha == STYLEALPHA_Zero)
|
||||
{
|
||||
thread->WriteColorFunc = &BlendColorAdd_DstCol_Zero;
|
||||
}
|
||||
else if (style.SrcAlpha == STYLEALPHA_InvDstCol && style.DestAlpha == STYLEALPHA_Zero)
|
||||
{
|
||||
thread->WriteColorFunc = &BlendColorAdd_InvDstCol_Zero;
|
||||
}
|
||||
else
|
||||
{
|
||||
thread->WriteColorFunc = &BlendColor<BlendColorOpt_Add>;
|
||||
}
|
||||
}
|
||||
else if (style.BlendOp == STYLEOP_Sub)
|
||||
{
|
||||
thread->WriteColorFunc = &BlendColor<BlendColorOpt_Sub>;
|
||||
}
|
||||
else // if (style.BlendOp == STYLEOP_RevSub)
|
||||
{
|
||||
if (style.SrcAlpha == STYLEALPHA_Src && style.DestAlpha == STYLEALPHA_One)
|
||||
{
|
||||
thread->WriteColorFunc = &BlendColorRevSub_Src_One;
|
||||
}
|
||||
else
|
||||
{
|
||||
thread->WriteColorFunc = &BlendColor<BlendColorOpt_RevSub>;
|
||||
}
|
||||
}
|
||||
}
|
49
src/rendering/polyrenderer/drawers/screen_blend.h
Normal file
49
src/rendering/polyrenderer/drawers/screen_blend.h
Normal file
|
@ -0,0 +1,49 @@
|
|||
/*
|
||||
** Polygon Doom software renderer
|
||||
** Copyright (c) 2016 Magnus Norddahl
|
||||
**
|
||||
** This software is provided 'as-is', without any express or implied
|
||||
** warranty. In no event will the authors be held liable for any damages
|
||||
** arising from the use of this software.
|
||||
**
|
||||
** Permission is granted to anyone to use this software for any purpose,
|
||||
** including commercial applications, and to alter it and redistribute it
|
||||
** freely, subject to the following restrictions:
|
||||
**
|
||||
** 1. The origin of this software must not be misrepresented; you must not
|
||||
** claim that you wrote the original software. If you use this software
|
||||
** in a product, an acknowledgment in the product documentation would be
|
||||
** appreciated but is not required.
|
||||
** 2. Altered source versions must be plainly marked as such, and must not be
|
||||
** misrepresented as being the original software.
|
||||
** 3. This notice may not be removed or altered from any source distribution.
|
||||
**
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
class PolyTriangleThreadData;
|
||||
|
||||
enum SWBlendColor
|
||||
{
|
||||
SWBLEND_Sub = 1,
|
||||
SWBLEND_RevSub = 2
|
||||
};
|
||||
|
||||
struct BlendColorOpt_Add { static const int Flags = 0; };
|
||||
struct BlendColorOpt_Sub { static const int Flags = 1; };
|
||||
struct BlendColorOpt_RevSub { static const int Flags = 2; };
|
||||
|
||||
template<typename OptT>
|
||||
void BlendColor(int y, int x0, int x1, PolyTriangleThreadData* thread);
|
||||
void BlendColorOpaque(int y, int x0, int x1, PolyTriangleThreadData* thread);
|
||||
void BlendColorOpaque(int y, int x0, int x1, PolyTriangleThreadData* thread);
|
||||
void BlendColorAdd_Src_InvSrc(int y, int x0, int x1, PolyTriangleThreadData* thread);
|
||||
void BlendColorAdd_SrcCol_InvSrcCol(int y, int x0, int x1, PolyTriangleThreadData* thread);
|
||||
void BlendColorAdd_Src_One(int y, int x0, int x1, PolyTriangleThreadData* thread);
|
||||
void BlendColorAdd_SrcCol_One(int y, int x0, int x1, PolyTriangleThreadData* thread);
|
||||
void BlendColorAdd_DstCol_Zero(int y, int x0, int x1, PolyTriangleThreadData* thread);
|
||||
void BlendColorAdd_InvDstCol_Zero(int y, int x0, int x1, PolyTriangleThreadData* thread);
|
||||
void BlendColorRevSub_Src_One(int y, int x0, int x1, PolyTriangleThreadData* thread);
|
||||
|
||||
void SelectWriteColorFunc(PolyTriangleThreadData* thread);
|
420
src/rendering/polyrenderer/drawers/screen_scanline_setup.cpp
Normal file
420
src/rendering/polyrenderer/drawers/screen_scanline_setup.cpp
Normal file
|
@ -0,0 +1,420 @@
|
|||
/*
|
||||
** Polygon Doom software renderer
|
||||
** Copyright (c) 2016 Magnus Norddahl
|
||||
**
|
||||
** This software is provided 'as-is', without any express or implied
|
||||
** warranty. In no event will the authors be held liable for any damages
|
||||
** arising from the use of this software.
|
||||
**
|
||||
** Permission is granted to anyone to use this software for any purpose,
|
||||
** including commercial applications, and to alter it and redistribute it
|
||||
** freely, subject to the following restrictions:
|
||||
**
|
||||
** 1. The origin of this software must not be misrepresented; you must not
|
||||
** claim that you wrote the original software. If you use this software
|
||||
** in a product, an acknowledgment in the product documentation would be
|
||||
** appreciated but is not required.
|
||||
** 2. Altered source versions must be plainly marked as such, and must not be
|
||||
** misrepresented as being the original software.
|
||||
** 3. This notice may not be removed or altered from any source distribution.
|
||||
**
|
||||
*/
|
||||
|
||||
#include <stddef.h>
|
||||
#include "templates.h"
|
||||
#include "doomdef.h"
|
||||
#include "poly_thread.h"
|
||||
#include "screen_scanline_setup.h"
|
||||
#include "x86.h"
|
||||
#include <cmath>
|
||||
|
||||
#ifdef NO_SSE
|
||||
void WriteW(int y, int x0, int x1, const TriDrawTriangleArgs* args, PolyTriangleThreadData* thread)
|
||||
{
|
||||
float startX = x0 + (0.5f - args->v1->x);
|
||||
float startY = y + (0.5f - args->v1->y);
|
||||
|
||||
float posW = args->v1->w + args->gradientX.W * startX + args->gradientY.W * startY;
|
||||
float stepW = args->gradientX.W;
|
||||
float* w = thread->scanline.W;
|
||||
for (int x = x0; x < x1; x++)
|
||||
{
|
||||
w[x] = 1.0f / posW;
|
||||
posW += stepW;
|
||||
}
|
||||
}
|
||||
#else
|
||||
void WriteW(int y, int x0, int x1, const TriDrawTriangleArgs* args, PolyTriangleThreadData* thread)
|
||||
{
|
||||
float startX = x0 + (0.5f - args->v1->x);
|
||||
float startY = y + (0.5f - args->v1->y);
|
||||
|
||||
float posW = args->v1->w + args->gradientX.W * startX + args->gradientY.W * startY;
|
||||
float stepW = args->gradientX.W;
|
||||
float* w = thread->scanline.W;
|
||||
|
||||
int ssecount = ((x1 - x0) & ~3);
|
||||
int sseend = x0 + ssecount;
|
||||
|
||||
__m128 mstepW = _mm_set1_ps(stepW * 4.0f);
|
||||
__m128 mposW = _mm_setr_ps(posW, posW + stepW, posW + stepW + stepW, posW + stepW + stepW + stepW);
|
||||
|
||||
for (int x = x0; x < sseend; x += 4)
|
||||
{
|
||||
// One Newton-Raphson iteration for 1/posW
|
||||
__m128 res = _mm_rcp_ps(mposW);
|
||||
__m128 muls = _mm_mul_ps(mposW, _mm_mul_ps(res, res));
|
||||
_mm_storeu_ps(w + x, _mm_sub_ps(_mm_add_ps(res, res), muls));
|
||||
mposW = _mm_add_ps(mposW, mstepW);
|
||||
}
|
||||
|
||||
posW += ssecount * stepW;
|
||||
for (int x = sseend; x < x1; x++)
|
||||
{
|
||||
w[x] = 1.0f / posW;
|
||||
posW += stepW;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
static void WriteDynLightArray(int x0, int x1, PolyTriangleThreadData* thread)
|
||||
{
|
||||
int num_lights = thread->numPolyLights;
|
||||
PolyLight* lights = thread->polyLights;
|
||||
|
||||
float worldnormalX = thread->mainVertexShader.vWorldNormal.X;
|
||||
float worldnormalY = thread->mainVertexShader.vWorldNormal.Y;
|
||||
float worldnormalZ = thread->mainVertexShader.vWorldNormal.Z;
|
||||
|
||||
uint32_t* dynlights = thread->scanline.dynlights;
|
||||
float* worldposX = thread->scanline.WorldX;
|
||||
float* worldposY = thread->scanline.WorldY;
|
||||
float* worldposZ = thread->scanline.WorldZ;
|
||||
|
||||
int sseend = x0;
|
||||
|
||||
#ifndef NO_SSE
|
||||
int ssecount = ((x1 - x0) & ~3);
|
||||
sseend = x0 + ssecount;
|
||||
|
||||
__m128 mworldnormalX = _mm_set1_ps(worldnormalX);
|
||||
__m128 mworldnormalY = _mm_set1_ps(worldnormalY);
|
||||
__m128 mworldnormalZ = _mm_set1_ps(worldnormalZ);
|
||||
|
||||
for (int x = x0; x < sseend; x += 4)
|
||||
{
|
||||
__m128i litlo = _mm_setzero_si128();
|
||||
//__m128i litlo = _mm_shuffle_epi32(_mm_unpacklo_epi8(_mm_cvtsi32_si128(dynlightcolor), _mm_setzero_si128()), _MM_SHUFFLE(1, 0, 1, 0));
|
||||
__m128i lithi = litlo;
|
||||
|
||||
for (int i = 0; i < num_lights; i++)
|
||||
{
|
||||
__m128 lightposX = _mm_set1_ps(lights[i].x);
|
||||
__m128 lightposY = _mm_set1_ps(lights[i].y);
|
||||
__m128 lightposZ = _mm_set1_ps(lights[i].z);
|
||||
__m128 light_radius = _mm_set1_ps(lights[i].radius);
|
||||
__m128i light_color = _mm_shuffle_epi32(_mm_unpacklo_epi8(_mm_cvtsi32_si128(lights[i].color), _mm_setzero_si128()), _MM_SHUFFLE(1, 0, 1, 0));
|
||||
|
||||
__m128 is_attenuated = _mm_cmplt_ps(light_radius, _mm_setzero_ps());
|
||||
light_radius = _mm_andnot_ps(_mm_set1_ps(-0.0f), light_radius); // clear sign bit
|
||||
|
||||
// L = light-pos
|
||||
// dist = sqrt(dot(L, L))
|
||||
// distance_attenuation = 1 - MIN(dist * (1/radius), 1)
|
||||
__m128 Lx = _mm_sub_ps(lightposX, _mm_loadu_ps(&worldposX[x]));
|
||||
__m128 Ly = _mm_sub_ps(lightposY, _mm_loadu_ps(&worldposY[x]));
|
||||
__m128 Lz = _mm_sub_ps(lightposZ, _mm_loadu_ps(&worldposZ[x]));
|
||||
__m128 dist2 = _mm_add_ps(_mm_mul_ps(Lx, Lx), _mm_add_ps(_mm_mul_ps(Ly, Ly), _mm_mul_ps(Lz, Lz)));
|
||||
__m128 rcp_dist = _mm_rsqrt_ps(dist2);
|
||||
__m128 dist = _mm_mul_ps(dist2, rcp_dist);
|
||||
__m128 distance_attenuation = _mm_sub_ps(_mm_set1_ps(256.0f), _mm_min_ps(_mm_mul_ps(dist, light_radius), _mm_set1_ps(256.0f)));
|
||||
|
||||
// The simple light type
|
||||
__m128 simple_attenuation = distance_attenuation;
|
||||
|
||||
// The point light type
|
||||
// diffuse = max(dot(N,normalize(L)),0) * attenuation
|
||||
Lx = _mm_mul_ps(Lx, rcp_dist);
|
||||
Ly = _mm_mul_ps(Ly, rcp_dist);
|
||||
Lz = _mm_mul_ps(Lz, rcp_dist);
|
||||
__m128 dotNL = _mm_add_ps(_mm_add_ps(_mm_mul_ps(mworldnormalX, Lx), _mm_mul_ps(mworldnormalY, Ly)), _mm_mul_ps(mworldnormalZ, Lz));
|
||||
__m128 point_attenuation = _mm_mul_ps(_mm_max_ps(dotNL, _mm_setzero_ps()), distance_attenuation);
|
||||
|
||||
__m128i attenuation = _mm_cvtps_epi32(_mm_or_ps(_mm_and_ps(is_attenuated, point_attenuation), _mm_andnot_ps(is_attenuated, simple_attenuation)));
|
||||
|
||||
attenuation = _mm_shufflehi_epi16(_mm_shufflelo_epi16(attenuation, _MM_SHUFFLE(2, 2, 0, 0)), _MM_SHUFFLE(2, 2, 0, 0));
|
||||
__m128i attenlo = _mm_shuffle_epi32(attenuation, _MM_SHUFFLE(1, 1, 0, 0));
|
||||
__m128i attenhi = _mm_shuffle_epi32(attenuation, _MM_SHUFFLE(3, 3, 2, 2));
|
||||
|
||||
litlo = _mm_add_epi16(litlo, _mm_srli_epi16(_mm_mullo_epi16(light_color, attenlo), 8));
|
||||
lithi = _mm_add_epi16(lithi, _mm_srli_epi16(_mm_mullo_epi16(light_color, attenhi), 8));
|
||||
}
|
||||
|
||||
_mm_storeu_si128((__m128i*)&dynlights[x], _mm_packus_epi16(litlo, lithi));
|
||||
}
|
||||
#endif
|
||||
|
||||
for (int x = x0; x < x1; x++)
|
||||
{
|
||||
uint32_t lit_r = 0;
|
||||
uint32_t lit_g = 0;
|
||||
uint32_t lit_b = 0;
|
||||
|
||||
for (int i = 0; i < num_lights; i++)
|
||||
{
|
||||
float lightposX = lights[i].x;
|
||||
float lightposY = lights[i].y;
|
||||
float lightposZ = lights[i].z;
|
||||
float light_radius = lights[i].radius;
|
||||
uint32_t light_color = lights[i].color;
|
||||
|
||||
bool is_attenuated = light_radius < 0.0f;
|
||||
if (is_attenuated)
|
||||
light_radius = -light_radius;
|
||||
|
||||
// L = light-pos
|
||||
// dist = sqrt(dot(L, L))
|
||||
// distance_attenuation = 1 - MIN(dist * (1/radius), 1)
|
||||
float Lx = lightposX - worldposX[x];
|
||||
float Ly = lightposY - worldposY[x];
|
||||
float Lz = lightposZ - worldposZ[x];
|
||||
float dist2 = Lx * Lx + Ly * Ly + Lz * Lz;
|
||||
#ifdef NO_SSE
|
||||
//float rcp_dist = 1.0f / sqrt(dist2);
|
||||
float rcp_dist = 1.0f / (dist2 * 0.01f);
|
||||
#else
|
||||
float rcp_dist = _mm_cvtss_f32(_mm_rsqrt_ss(_mm_set_ss(dist2)));
|
||||
#endif
|
||||
float dist = dist2 * rcp_dist;
|
||||
float distance_attenuation = 256.0f - MIN(dist * light_radius, 256.0f);
|
||||
|
||||
// The simple light type
|
||||
float simple_attenuation = distance_attenuation;
|
||||
|
||||
// The point light type
|
||||
// diffuse = max(dot(N,normalize(L)),0) * attenuation
|
||||
Lx *= rcp_dist;
|
||||
Ly *= rcp_dist;
|
||||
Lz *= rcp_dist;
|
||||
float dotNL = worldnormalX * Lx + worldnormalY * Ly + worldnormalZ * Lz;
|
||||
float point_attenuation = MAX(dotNL, 0.0f) * distance_attenuation;
|
||||
|
||||
uint32_t attenuation = (uint32_t)(is_attenuated ? (int32_t)point_attenuation : (int32_t)simple_attenuation);
|
||||
|
||||
lit_r += (RPART(light_color) * attenuation) >> 8;
|
||||
lit_g += (GPART(light_color) * attenuation) >> 8;
|
||||
lit_b += (BPART(light_color) * attenuation) >> 8;
|
||||
}
|
||||
|
||||
lit_r = MIN<uint32_t>(lit_r, 255);
|
||||
lit_g = MIN<uint32_t>(lit_g, 255);
|
||||
lit_b = MIN<uint32_t>(lit_b, 255);
|
||||
dynlights[x] = MAKEARGB(255, lit_r, lit_g, lit_b);
|
||||
|
||||
// Palette version:
|
||||
// dynlights[x] = RGB256k.All[((lit_r >> 2) << 12) | ((lit_g >> 2) << 6) | (lit_b >> 2)];
|
||||
}
|
||||
}
|
||||
|
||||
static void WriteLightArray(int y, int x0, int x1, const TriDrawTriangleArgs* args, PolyTriangleThreadData* thread)
|
||||
{
|
||||
float startX = x0 + (0.5f - args->v1->x);
|
||||
float startY = y + (0.5f - args->v1->y);
|
||||
float posW = args->v1->w + args->gradientX.W * startX + args->gradientY.W * startY;
|
||||
float stepW = args->gradientX.W;
|
||||
|
||||
float globVis = thread->mainVertexShader.Viewpoint->mGlobVis;
|
||||
|
||||
uint32_t light = (int)(thread->PushConstants->uLightLevel * 255.0f);
|
||||
fixed_t shade = (fixed_t)((2.0f - (light + 12.0f) / 128.0f) * (float)FRACUNIT);
|
||||
fixed_t lightpos = (fixed_t)(globVis * posW * (float)FRACUNIT);
|
||||
fixed_t lightstep = (fixed_t)(globVis * stepW * (float)FRACUNIT);
|
||||
|
||||
fixed_t maxvis = 24 * FRACUNIT / 32;
|
||||
fixed_t maxlight = 31 * FRACUNIT / 32;
|
||||
|
||||
uint16_t *lightarray = thread->scanline.lightarray;
|
||||
|
||||
fixed_t lightend = lightpos + lightstep * (x1 - x0);
|
||||
if (lightpos < maxvis && shade >= lightpos && shade - lightpos <= maxlight &&
|
||||
lightend < maxvis && shade >= lightend && shade - lightend <= maxlight)
|
||||
{
|
||||
//if (BitsPerPixel == 32)
|
||||
{
|
||||
lightpos += FRACUNIT - shade;
|
||||
for (int x = x0; x < x1; x++)
|
||||
{
|
||||
lightarray[x] = lightpos >> 8;
|
||||
lightpos += lightstep;
|
||||
}
|
||||
}
|
||||
/*else
|
||||
{
|
||||
lightpos = shade - lightpos;
|
||||
for (int x = x0; x < x1; x++)
|
||||
{
|
||||
lightarray[x] = (lightpos >> 3) & 0xffffff00;
|
||||
lightpos -= lightstep;
|
||||
}
|
||||
}*/
|
||||
}
|
||||
else
|
||||
{
|
||||
//if (BitsPerPixel == 32)
|
||||
{
|
||||
for (int x = x0; x < x1; x++)
|
||||
{
|
||||
lightarray[x] = (FRACUNIT - clamp<fixed_t>(shade - MIN(maxvis, lightpos), 0, maxlight)) >> 8;
|
||||
lightpos += lightstep;
|
||||
}
|
||||
}
|
||||
/*else
|
||||
{
|
||||
for (int x = x0; x < x1; x++)
|
||||
{
|
||||
lightarray[x] = (clamp<fixed_t>(shade - MIN(maxvis, lightpos), 0, maxlight) >> 3) & 0xffffff00;
|
||||
lightpos += lightstep;
|
||||
}
|
||||
}*/
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef NO_SSE
|
||||
static void WriteVarying(float pos, float step, int x0, int x1, const float* w, float* varying)
|
||||
{
|
||||
for (int x = x0; x < x1; x++)
|
||||
{
|
||||
varying[x] = pos * w[x];
|
||||
pos += step;
|
||||
}
|
||||
}
|
||||
#else
|
||||
static void WriteVarying(float pos, float step, int x0, int x1, const float* w, float* varying)
|
||||
{
|
||||
int ssecount = ((x1 - x0) & ~3);
|
||||
int sseend = x0 + ssecount;
|
||||
|
||||
__m128 mstep = _mm_set1_ps(step * 4.0f);
|
||||
__m128 mpos = _mm_setr_ps(pos, pos + step, pos + step + step, pos + step + step + step);
|
||||
|
||||
for (int x = x0; x < sseend; x += 4)
|
||||
{
|
||||
_mm_storeu_ps(varying + x, _mm_mul_ps(mpos, _mm_loadu_ps(w + x)));
|
||||
mpos = _mm_add_ps(mpos, mstep);
|
||||
}
|
||||
|
||||
pos += ssecount * step;
|
||||
for (int x = sseend; x < x1; x++)
|
||||
{
|
||||
varying[x] = pos * w[x];
|
||||
pos += step;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef NO_SSE
|
||||
static void WriteVaryingWrap(float pos, float step, int x0, int x1, const float* w, uint16_t* varying)
|
||||
{
|
||||
for (int x = x0; x < x1; x++)
|
||||
{
|
||||
float value = pos * w[x];
|
||||
value = value - std::floor(value);
|
||||
varying[x] = static_cast<uint32_t>(static_cast<int32_t>(value * static_cast<float>(0x1000'0000)) << 4) >> 16;
|
||||
pos += step;
|
||||
}
|
||||
}
|
||||
#else
|
||||
static void WriteVaryingWrap(float pos, float step, int x0, int x1, const float* w, uint16_t* varying)
|
||||
{
|
||||
int ssecount = ((x1 - x0) & ~3);
|
||||
int sseend = x0 + ssecount;
|
||||
|
||||
__m128 mstep = _mm_set1_ps(step * 4.0f);
|
||||
__m128 mpos = _mm_setr_ps(pos, pos + step, pos + step + step, pos + step + step + step);
|
||||
|
||||
for (int x = x0; x < sseend; x += 4)
|
||||
{
|
||||
__m128 value = _mm_mul_ps(mpos, _mm_loadu_ps(w + x));
|
||||
__m128 f = value;
|
||||
__m128 t = _mm_cvtepi32_ps(_mm_cvttps_epi32(f));
|
||||
__m128 r = _mm_sub_ps(t, _mm_and_ps(_mm_cmplt_ps(f, t), _mm_set1_ps(1.0f)));
|
||||
value = _mm_sub_ps(f, r);
|
||||
|
||||
__m128i ivalue = _mm_srli_epi32(_mm_slli_epi32(_mm_cvttps_epi32(_mm_mul_ps(value, _mm_set1_ps(static_cast<float>(0x1000'0000)))), 4), 17);
|
||||
_mm_storel_epi64((__m128i*)(varying + x), _mm_slli_epi16(_mm_packs_epi32(ivalue, ivalue), 1));
|
||||
mpos = _mm_add_ps(mpos, mstep);
|
||||
}
|
||||
|
||||
pos += ssecount * step;
|
||||
for (int x = sseend; x < x1; x++)
|
||||
{
|
||||
float value = pos * w[x];
|
||||
__m128 f = _mm_set_ss(value);
|
||||
__m128 t = _mm_cvtepi32_ps(_mm_cvttps_epi32(f));
|
||||
__m128 r = _mm_sub_ss(t, _mm_and_ps(_mm_cmplt_ps(f, t), _mm_set_ss(1.0f)));
|
||||
value = _mm_cvtss_f32(_mm_sub_ss(f, r));
|
||||
|
||||
varying[x] = static_cast<uint32_t>(static_cast<int32_t>(value * static_cast<float>(0x1000'0000)) << 4) >> 16;
|
||||
pos += step;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef NO_SSE
|
||||
static void WriteVaryingColor(float pos, float step, int x0, int x1, const float* w, uint8_t* varying)
|
||||
{
|
||||
for (int x = x0; x < x1; x++)
|
||||
{
|
||||
varying[x] = clamp(static_cast<int>(pos * w[x] * 255.0f), 0, 255);
|
||||
pos += step;
|
||||
}
|
||||
}
|
||||
#else
|
||||
static void WriteVaryingColor(float pos, float step, int x0, int x1, const float* w, uint8_t* varying)
|
||||
{
|
||||
int ssecount = ((x1 - x0) & ~3);
|
||||
int sseend = x0 + ssecount;
|
||||
|
||||
__m128 mstep = _mm_set1_ps(step * 4.0f);
|
||||
__m128 mpos = _mm_setr_ps(pos, pos + step, pos + step + step, pos + step + step + step);
|
||||
|
||||
for (int x = x0; x < sseend; x += 4)
|
||||
{
|
||||
__m128i value = _mm_cvttps_epi32(_mm_mul_ps(_mm_mul_ps(mpos, _mm_loadu_ps(w + x)), _mm_set1_ps(255.0f)));
|
||||
value = _mm_packs_epi32(value, value);
|
||||
value = _mm_packus_epi16(value, value);
|
||||
*(uint32_t*)(varying + x) = _mm_cvtsi128_si32(value);
|
||||
mpos = _mm_add_ps(mpos, mstep);
|
||||
}
|
||||
|
||||
pos += ssecount * step;
|
||||
for (int x = sseend; x < x1; x++)
|
||||
{
|
||||
varying[x] = clamp(static_cast<int>(pos * w[x] * 255.0f), 0, 255);
|
||||
pos += step;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
void WriteVaryings(int y, int x0, int x1, const TriDrawTriangleArgs* args, PolyTriangleThreadData* thread)
|
||||
{
|
||||
float startX = x0 + (0.5f - args->v1->x);
|
||||
float startY = y + (0.5f - args->v1->y);
|
||||
|
||||
WriteVaryingWrap(args->v1->u * args->v1->w + args->gradientX.U * startX + args->gradientY.U * startY, args->gradientX.U, x0, x1, thread->scanline.W, thread->scanline.U);
|
||||
WriteVaryingWrap(args->v1->v * args->v1->w + args->gradientX.V * startX + args->gradientY.V * startY, args->gradientX.V, x0, x1, thread->scanline.W, thread->scanline.V);
|
||||
WriteVarying(args->v1->worldX * args->v1->w + args->gradientX.WorldX * startX + args->gradientY.WorldX * startY, args->gradientX.WorldX, x0, x1, thread->scanline.W, thread->scanline.WorldX);
|
||||
WriteVarying(args->v1->worldY * args->v1->w + args->gradientX.WorldY * startX + args->gradientY.WorldY * startY, args->gradientX.WorldY, x0, x1, thread->scanline.W, thread->scanline.WorldY);
|
||||
WriteVarying(args->v1->worldZ * args->v1->w + args->gradientX.WorldZ * startX + args->gradientY.WorldZ * startY, args->gradientX.WorldZ, x0, x1, thread->scanline.W, thread->scanline.WorldZ);
|
||||
WriteVarying(args->v1->gradientdistZ * args->v1->w + args->gradientX.GradientdistZ * startX + args->gradientY.GradientdistZ * startY, args->gradientX.GradientdistZ, x0, x1, thread->scanline.W, thread->scanline.GradientdistZ);
|
||||
WriteVaryingColor(args->v1->a * args->v1->w + args->gradientX.A * startX + args->gradientY.A * startY, args->gradientX.A, x0, x1, thread->scanline.W, thread->scanline.vColorA);
|
||||
WriteVaryingColor(args->v1->r * args->v1->w + args->gradientX.R * startX + args->gradientY.R * startY, args->gradientX.R, x0, x1, thread->scanline.W, thread->scanline.vColorR);
|
||||
WriteVaryingColor(args->v1->g * args->v1->w + args->gradientX.G * startX + args->gradientY.G * startY, args->gradientX.G, x0, x1, thread->scanline.W, thread->scanline.vColorG);
|
||||
WriteVaryingColor(args->v1->b * args->v1->w + args->gradientX.B * startX + args->gradientY.B * startY, args->gradientX.B, x0, x1, thread->scanline.W, thread->scanline.vColorB);
|
||||
|
||||
if (thread->PushConstants->uLightLevel >= 0.0f)
|
||||
WriteLightArray(y, x0, x1, args, thread);
|
||||
|
||||
if (thread->numPolyLights > 0)
|
||||
WriteDynLightArray(x0, x1, thread);
|
||||
}
|
29
src/rendering/polyrenderer/drawers/screen_scanline_setup.h
Normal file
29
src/rendering/polyrenderer/drawers/screen_scanline_setup.h
Normal file
|
@ -0,0 +1,29 @@
|
|||
/*
|
||||
** Polygon Doom software renderer
|
||||
** Copyright (c) 2016 Magnus Norddahl
|
||||
**
|
||||
** This software is provided 'as-is', without any express or implied
|
||||
** warranty. In no event will the authors be held liable for any damages
|
||||
** arising from the use of this software.
|
||||
**
|
||||
** Permission is granted to anyone to use this software for any purpose,
|
||||
** including commercial applications, and to alter it and redistribute it
|
||||
** freely, subject to the following restrictions:
|
||||
**
|
||||
** 1. The origin of this software must not be misrepresented; you must not
|
||||
** claim that you wrote the original software. If you use this software
|
||||
** in a product, an acknowledgment in the product documentation would be
|
||||
** appreciated but is not required.
|
||||
** 2. Altered source versions must be plainly marked as such, and must not be
|
||||
** misrepresented as being the original software.
|
||||
** 3. This notice may not be removed or altered from any source distribution.
|
||||
**
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
struct TriDrawTriangleArgs;
|
||||
class PolyTriangleThreadData;
|
||||
|
||||
void WriteW(int y, int x0, int x1, const TriDrawTriangleArgs* args, PolyTriangleThreadData* thread);
|
||||
void WriteVaryings(int y, int x0, int x1, const TriDrawTriangleArgs* args, PolyTriangleThreadData* thread);
|
524
src/rendering/polyrenderer/drawers/screen_shader.cpp
Normal file
524
src/rendering/polyrenderer/drawers/screen_shader.cpp
Normal file
|
@ -0,0 +1,524 @@
|
|||
/*
|
||||
** Polygon Doom software renderer
|
||||
** Copyright (c) 2016 Magnus Norddahl
|
||||
**
|
||||
** This software is provided 'as-is', without any express or implied
|
||||
** warranty. In no event will the authors be held liable for any damages
|
||||
** arising from the use of this software.
|
||||
**
|
||||
** Permission is granted to anyone to use this software for any purpose,
|
||||
** including commercial applications, and to alter it and redistribute it
|
||||
** freely, subject to the following restrictions:
|
||||
**
|
||||
** 1. The origin of this software must not be misrepresented; you must not
|
||||
** claim that you wrote the original software. If you use this software
|
||||
** in a product, an acknowledgment in the product documentation would be
|
||||
** appreciated but is not required.
|
||||
** 2. Altered source versions must be plainly marked as such, and must not be
|
||||
** misrepresented as being the original software.
|
||||
** 3. This notice may not be removed or altered from any source distribution.
|
||||
**
|
||||
*/
|
||||
|
||||
#include <stddef.h>
|
||||
#include "templates.h"
|
||||
#include "doomdef.h"
|
||||
#include "poly_thread.h"
|
||||
#include "screen_scanline_setup.h"
|
||||
#include "x86.h"
|
||||
#include <cmath>
|
||||
|
||||
static uint32_t SampleTexture(uint32_t u, uint32_t v, const void* texPixels, int texWidth, int texHeight, bool texBgra)
|
||||
{
|
||||
int texelX = (u * texWidth) >> 16;
|
||||
int texelY = (v * texHeight) >> 16;
|
||||
int texelOffset = texelX + texelY * texWidth;
|
||||
if (texBgra)
|
||||
{
|
||||
return static_cast<const uint32_t*>(texPixels)[texelOffset];
|
||||
}
|
||||
else
|
||||
{
|
||||
uint32_t c = static_cast<const uint8_t*>(texPixels)[texelOffset];
|
||||
return (c << 16) | 0xff000000;
|
||||
}
|
||||
}
|
||||
|
||||
static void EffectFogBoundary(int x0, int x1, PolyTriangleThreadData* thread)
|
||||
{
|
||||
uint32_t* fragcolor = thread->scanline.FragColor;
|
||||
for (int x = x0; x < x1; x++)
|
||||
{
|
||||
/*float fogdist = pixelpos.w;
|
||||
float fogfactor = exp2(uFogDensity * fogdist);
|
||||
FragColor = vec4(uFogColor.rgb, 1.0 - fogfactor);*/
|
||||
fragcolor[x] = 0;
|
||||
}
|
||||
}
|
||||
|
||||
static void EffectBurn(int x0, int x1, PolyTriangleThreadData* thread)
|
||||
{
|
||||
int texWidth = thread->textures[0].width;
|
||||
int texHeight = thread->textures[0].height;
|
||||
const void* texPixels = thread->textures[0].pixels;
|
||||
bool texBgra = thread->textures[0].bgra;
|
||||
|
||||
int tex2Width = thread->textures[1].width;
|
||||
int tex2Height = thread->textures[1].height;
|
||||
const void* tex2Pixels = thread->textures[1].pixels;
|
||||
bool tex2Bgra = thread->textures[1].bgra;
|
||||
|
||||
uint32_t* fragcolor = thread->scanline.FragColor;
|
||||
uint16_t* u = thread->scanline.U;
|
||||
uint16_t* v = thread->scanline.V;
|
||||
for (int x = x0; x < x1; x++)
|
||||
{
|
||||
uint32_t frag_r = thread->scanline.vColorR[x];
|
||||
uint32_t frag_g = thread->scanline.vColorG[x];
|
||||
uint32_t frag_b = thread->scanline.vColorB[x];
|
||||
uint32_t frag_a = thread->scanline.vColorA[x];
|
||||
frag_r += frag_r >> 7; // 255 -> 256
|
||||
frag_g += frag_g >> 7; // 255 -> 256
|
||||
frag_b += frag_b >> 7; // 255 -> 256
|
||||
frag_a += frag_a >> 7; // 255 -> 256
|
||||
|
||||
uint32_t t1 = SampleTexture(u[x], v[x], texPixels, texWidth, texHeight, texBgra);
|
||||
uint32_t t2 = SampleTexture(u[x], 0xffff - v[x], tex2Pixels, tex2Width, tex2Height, tex2Bgra);
|
||||
|
||||
uint32_t r = (frag_r * RPART(t1)) >> 8;
|
||||
uint32_t g = (frag_g * GPART(t1)) >> 8;
|
||||
uint32_t b = (frag_b * BPART(t1)) >> 8;
|
||||
uint32_t a = (frag_a * APART(t2)) >> 8;
|
||||
|
||||
fragcolor[x] = MAKEARGB(a, r, g, b);
|
||||
}
|
||||
}
|
||||
|
||||
static void EffectStencil(int x0, int x1, PolyTriangleThreadData* thread)
|
||||
{
|
||||
/*for (int x = x0; x < x1; x++)
|
||||
{
|
||||
fragcolor[x] = 0x00ffffff;
|
||||
}*/
|
||||
}
|
||||
|
||||
static void FuncPaletted(int x0, int x1, PolyTriangleThreadData* thread)
|
||||
{
|
||||
int texWidth = thread->textures[0].width;
|
||||
int texHeight = thread->textures[0].height;
|
||||
const void* texPixels = thread->textures[0].pixels;
|
||||
bool texBgra = thread->textures[0].bgra;
|
||||
const uint32_t* lut = (const uint32_t*)thread->textures[1].pixels;
|
||||
uint32_t* fragcolor = thread->scanline.FragColor;
|
||||
uint16_t* u = thread->scanline.U;
|
||||
uint16_t* v = thread->scanline.V;
|
||||
|
||||
for (int x = x0; x < x1; x++)
|
||||
{
|
||||
fragcolor[x] = lut[RPART(SampleTexture(u[x], v[x], texPixels, texWidth, texHeight, texBgra))] | 0xff000000;
|
||||
}
|
||||
}
|
||||
|
||||
static void FuncNoTexture(int x0, int x1, PolyTriangleThreadData* thread)
|
||||
{
|
||||
auto& streamdata = thread->mainVertexShader.Data;
|
||||
uint32_t a = (int)(streamdata.uObjectColor.a * 255.0f);
|
||||
uint32_t r = (int)(streamdata.uObjectColor.r * 255.0f);
|
||||
uint32_t g = (int)(streamdata.uObjectColor.g * 255.0f);
|
||||
uint32_t b = (int)(streamdata.uObjectColor.b * 255.0f);
|
||||
uint32_t texel = MAKEARGB(a, r, g, b);
|
||||
|
||||
if (streamdata.uDesaturationFactor > 0.0f)
|
||||
{
|
||||
uint32_t t = (int)(streamdata.uDesaturationFactor * 256.0f);
|
||||
uint32_t inv_t = 256 - t;
|
||||
uint32_t gray = (RPART(texel) * 77 + GPART(texel) * 143 + BPART(texel) * 37) >> 8;
|
||||
texel = MAKEARGB(
|
||||
APART(texel),
|
||||
(RPART(texel) * inv_t + gray * t + 127) >> 8,
|
||||
(GPART(texel) * inv_t + gray * t + 127) >> 8,
|
||||
(BPART(texel) * inv_t + gray * t + 127) >> 8);
|
||||
}
|
||||
|
||||
uint32_t* fragcolor = thread->scanline.FragColor;
|
||||
for (int x = x0; x < x1; x++)
|
||||
{
|
||||
fragcolor[x] = texel;
|
||||
}
|
||||
}
|
||||
|
||||
static void FuncNormal(int x0, int x1, PolyTriangleThreadData* thread)
|
||||
{
|
||||
int texWidth = thread->textures[0].width;
|
||||
int texHeight = thread->textures[0].height;
|
||||
const void* texPixels = thread->textures[0].pixels;
|
||||
bool texBgra = thread->textures[0].bgra;
|
||||
uint32_t* fragcolor = thread->scanline.FragColor;
|
||||
uint16_t* u = thread->scanline.U;
|
||||
uint16_t* v = thread->scanline.V;
|
||||
|
||||
for (int x = x0; x < x1; x++)
|
||||
{
|
||||
uint32_t texel = SampleTexture(u[x], v[x], texPixels, texWidth, texHeight, texBgra);
|
||||
fragcolor[x] = texel;
|
||||
}
|
||||
}
|
||||
|
||||
static void FuncNormal_Stencil(int x0, int x1, PolyTriangleThreadData* thread)
|
||||
{
|
||||
int texWidth = thread->textures[0].width;
|
||||
int texHeight = thread->textures[0].height;
|
||||
const void* texPixels = thread->textures[0].pixels;
|
||||
bool texBgra = thread->textures[0].bgra;
|
||||
uint32_t* fragcolor = thread->scanline.FragColor;
|
||||
uint16_t* u = thread->scanline.U;
|
||||
uint16_t* v = thread->scanline.V;
|
||||
|
||||
for (int x = x0; x < x1; x++)
|
||||
{
|
||||
uint32_t texel = SampleTexture(u[x], v[x], texPixels, texWidth, texHeight, texBgra);
|
||||
fragcolor[x] = texel | 0x00ffffff;
|
||||
}
|
||||
}
|
||||
|
||||
static void FuncNormal_Opaque(int x0, int x1, PolyTriangleThreadData* thread)
|
||||
{
|
||||
int texWidth = thread->textures[0].width;
|
||||
int texHeight = thread->textures[0].height;
|
||||
const void* texPixels = thread->textures[0].pixels;
|
||||
bool texBgra = thread->textures[0].bgra;
|
||||
uint32_t* fragcolor = thread->scanline.FragColor;
|
||||
uint16_t* u = thread->scanline.U;
|
||||
uint16_t* v = thread->scanline.V;
|
||||
|
||||
for (int x = x0; x < x1; x++)
|
||||
{
|
||||
uint32_t texel = SampleTexture(u[x], v[x], texPixels, texWidth, texHeight, texBgra);
|
||||
fragcolor[x] = texel | 0xff000000;
|
||||
}
|
||||
}
|
||||
|
||||
static void FuncNormal_Inverse(int x0, int x1, PolyTriangleThreadData* thread)
|
||||
{
|
||||
int texWidth = thread->textures[0].width;
|
||||
int texHeight = thread->textures[0].height;
|
||||
const void* texPixels = thread->textures[0].pixels;
|
||||
bool texBgra = thread->textures[0].bgra;
|
||||
uint32_t* fragcolor = thread->scanline.FragColor;
|
||||
uint16_t* u = thread->scanline.U;
|
||||
uint16_t* v = thread->scanline.V;
|
||||
|
||||
for (int x = x0; x < x1; x++)
|
||||
{
|
||||
uint32_t texel = SampleTexture(u[x], v[x], texPixels, texWidth, texHeight, texBgra);
|
||||
fragcolor[x] = MAKEARGB(APART(texel), 0xff - RPART(texel), 0xff - BPART(texel), 0xff - GPART(texel));
|
||||
}
|
||||
}
|
||||
|
||||
static void FuncNormal_AlphaTexture(int x0, int x1, PolyTriangleThreadData* thread)
|
||||
{
|
||||
int texWidth = thread->textures[0].width;
|
||||
int texHeight = thread->textures[0].height;
|
||||
const void* texPixels = thread->textures[0].pixels;
|
||||
bool texBgra = thread->textures[0].bgra;
|
||||
uint32_t* fragcolor = thread->scanline.FragColor;
|
||||
uint16_t* u = thread->scanline.U;
|
||||
uint16_t* v = thread->scanline.V;
|
||||
|
||||
for (int x = x0; x < x1; x++)
|
||||
{
|
||||
uint32_t texel = SampleTexture(u[x], v[x], texPixels, texWidth, texHeight, texBgra);
|
||||
uint32_t gray = (RPART(texel) * 77 + GPART(texel) * 143 + BPART(texel) * 37) >> 8;
|
||||
uint32_t alpha = APART(texel);
|
||||
alpha += alpha >> 7;
|
||||
alpha = (alpha * gray + 127) >> 8;
|
||||
texel = (alpha << 24) | 0x00ffffff;
|
||||
fragcolor[x] = texel;
|
||||
}
|
||||
}
|
||||
|
||||
static void FuncNormal_ClampY(int x0, int x1, PolyTriangleThreadData* thread)
|
||||
{
|
||||
int texWidth = thread->textures[0].width;
|
||||
int texHeight = thread->textures[0].height;
|
||||
const void* texPixels = thread->textures[0].pixels;
|
||||
bool texBgra = thread->textures[0].bgra;
|
||||
uint32_t* fragcolor = thread->scanline.FragColor;
|
||||
uint16_t* u = thread->scanline.U;
|
||||
uint16_t* v = thread->scanline.V;
|
||||
|
||||
for (int x = x0; x < x1; x++)
|
||||
{
|
||||
fragcolor[x] = SampleTexture(u[x], v[x], texPixels, texWidth, texHeight, texBgra);
|
||||
if (v[x] < 0.0 || v[x] > 1.0)
|
||||
fragcolor[x] &= 0x00ffffff;
|
||||
}
|
||||
}
|
||||
|
||||
static void FuncNormal_InvertOpaque(int x0, int x1, PolyTriangleThreadData* thread)
|
||||
{
|
||||
int texWidth = thread->textures[0].width;
|
||||
int texHeight = thread->textures[0].height;
|
||||
const void* texPixels = thread->textures[0].pixels;
|
||||
bool texBgra = thread->textures[0].bgra;
|
||||
uint32_t* fragcolor = thread->scanline.FragColor;
|
||||
uint16_t* u = thread->scanline.U;
|
||||
uint16_t* v = thread->scanline.V;
|
||||
|
||||
for (int x = x0; x < x1; x++)
|
||||
{
|
||||
uint32_t texel = SampleTexture(u[x], v[x], texPixels, texWidth, texHeight, texBgra);
|
||||
fragcolor[x] = MAKEARGB(0xff, 0xff - RPART(texel), 0xff - BPART(texel), 0xff - GPART(texel));
|
||||
}
|
||||
}
|
||||
|
||||
static void FuncNormal_AddColor(int x0, int x1, PolyTriangleThreadData* thread)
|
||||
{
|
||||
auto& streamdata = thread->mainVertexShader.Data;
|
||||
uint32_t r = (int)(streamdata.uAddColor.r * 255.0f);
|
||||
uint32_t g = (int)(streamdata.uAddColor.g * 255.0f);
|
||||
uint32_t b = (int)(streamdata.uAddColor.b * 255.0f);
|
||||
uint32_t* fragcolor = thread->scanline.FragColor;
|
||||
for (int x = x0; x < x1; x++)
|
||||
{
|
||||
uint32_t texel = fragcolor[x];
|
||||
fragcolor[x] = MAKEARGB(
|
||||
APART(texel),
|
||||
MIN(r + RPART(texel), (uint32_t)255),
|
||||
MIN(g + GPART(texel), (uint32_t)255),
|
||||
MIN(b + BPART(texel), (uint32_t)255));
|
||||
}
|
||||
}
|
||||
|
||||
static void FuncNormal_AddObjectColor(int x0, int x1, PolyTriangleThreadData* thread)
|
||||
{
|
||||
auto& streamdata = thread->mainVertexShader.Data;
|
||||
uint32_t r = (int)(streamdata.uObjectColor.r * 256.0f);
|
||||
uint32_t g = (int)(streamdata.uObjectColor.g * 256.0f);
|
||||
uint32_t b = (int)(streamdata.uObjectColor.b * 256.0f);
|
||||
uint32_t* fragcolor = thread->scanline.FragColor;
|
||||
for (int x = x0; x < x1; x++)
|
||||
{
|
||||
uint32_t texel = fragcolor[x];
|
||||
fragcolor[x] = MAKEARGB(
|
||||
APART(texel),
|
||||
MIN((r * RPART(texel)) >> 8, (uint32_t)255),
|
||||
MIN((g * GPART(texel)) >> 8, (uint32_t)255),
|
||||
MIN((b * BPART(texel)) >> 8, (uint32_t)255));
|
||||
}
|
||||
}
|
||||
|
||||
static void FuncNormal_AddObjectColor2(int x0, int x1, PolyTriangleThreadData* thread)
|
||||
{
|
||||
auto& streamdata = thread->mainVertexShader.Data;
|
||||
float* gradientdistZ = thread->scanline.GradientdistZ;
|
||||
uint32_t* fragcolor = thread->scanline.FragColor;
|
||||
for (int x = x0; x < x1; x++)
|
||||
{
|
||||
float t = gradientdistZ[x];
|
||||
float inv_t = 1.0f - t;
|
||||
uint32_t r = (int)((streamdata.uObjectColor.r * inv_t + streamdata.uObjectColor2.r * t) * 256.0f);
|
||||
uint32_t g = (int)((streamdata.uObjectColor.g * inv_t + streamdata.uObjectColor2.g * t) * 256.0f);
|
||||
uint32_t b = (int)((streamdata.uObjectColor.b * inv_t + streamdata.uObjectColor2.b * t) * 256.0f);
|
||||
|
||||
uint32_t texel = fragcolor[x];
|
||||
fragcolor[x] = MAKEARGB(
|
||||
APART(texel),
|
||||
MIN((r * RPART(texel)) >> 8, (uint32_t)255),
|
||||
MIN((g * GPART(texel)) >> 8, (uint32_t)255),
|
||||
MIN((b * BPART(texel)) >> 8, (uint32_t)255));
|
||||
}
|
||||
}
|
||||
|
||||
static void FuncNormal_DesaturationFactor(int x0, int x1, PolyTriangleThreadData* thread)
|
||||
{
|
||||
auto& streamdata = thread->mainVertexShader.Data;
|
||||
uint32_t* fragcolor = thread->scanline.FragColor;
|
||||
uint32_t t = (int)(streamdata.uDesaturationFactor * 256.0f);
|
||||
uint32_t inv_t = 256 - t;
|
||||
for (int x = x0; x < x1; x++)
|
||||
{
|
||||
uint32_t texel = fragcolor[x];
|
||||
uint32_t gray = (RPART(texel) * 77 + GPART(texel) * 143 + BPART(texel) * 37) >> 8;
|
||||
fragcolor[x] = MAKEARGB(
|
||||
APART(texel),
|
||||
(RPART(texel) * inv_t + gray * t + 127) >> 8,
|
||||
(GPART(texel) * inv_t + gray * t + 127) >> 8,
|
||||
(BPART(texel) * inv_t + gray * t + 127) >> 8);
|
||||
}
|
||||
}
|
||||
|
||||
static void RunAlphaTest(int x0, int x1, PolyTriangleThreadData* thread)
|
||||
{
|
||||
uint32_t alphaThreshold = thread->AlphaThreshold;
|
||||
uint32_t* fragcolor = thread->scanline.FragColor;
|
||||
uint8_t* discard = thread->scanline.discard;
|
||||
for (int x = x0; x < x1; x++)
|
||||
{
|
||||
discard[x] = fragcolor[x] <= alphaThreshold;
|
||||
}
|
||||
}
|
||||
|
||||
static void ApplyVertexColor(int x0, int x1, PolyTriangleThreadData* thread)
|
||||
{
|
||||
uint32_t* fragcolor = thread->scanline.FragColor;
|
||||
for (int x = x0; x < x1; x++)
|
||||
{
|
||||
uint32_t r = thread->scanline.vColorR[x];
|
||||
uint32_t g = thread->scanline.vColorG[x];
|
||||
uint32_t b = thread->scanline.vColorB[x];
|
||||
uint32_t a = thread->scanline.vColorA[x];
|
||||
|
||||
a += a >> 7;
|
||||
r += r >> 7;
|
||||
g += g >> 7;
|
||||
b += b >> 7;
|
||||
|
||||
uint32_t texel = fragcolor[x];
|
||||
fragcolor[x] = MAKEARGB(
|
||||
(APART(texel) * a + 127) >> 8,
|
||||
(RPART(texel) * r + 127) >> 8,
|
||||
(GPART(texel) * g + 127) >> 8,
|
||||
(BPART(texel) * b + 127) >> 8);
|
||||
}
|
||||
}
|
||||
|
||||
static void MainFP(int x0, int x1, PolyTriangleThreadData* thread)
|
||||
{
|
||||
if (thread->EffectState == SHADER_Paletted) // func_paletted
|
||||
{
|
||||
FuncPaletted(x0, x1, thread);
|
||||
}
|
||||
else if (thread->EffectState == SHADER_NoTexture) // func_notexture
|
||||
{
|
||||
FuncNoTexture(x0, x1, thread);
|
||||
}
|
||||
else // func_normal
|
||||
{
|
||||
auto constants = thread->PushConstants;
|
||||
|
||||
switch (constants->uTextureMode)
|
||||
{
|
||||
default:
|
||||
case TM_NORMAL:
|
||||
case TM_FOGLAYER: FuncNormal(x0, x1, thread); break;
|
||||
case TM_STENCIL: FuncNormal_Stencil(x0, x1, thread); break;
|
||||
case TM_OPAQUE: FuncNormal_Opaque(x0, x1, thread); break;
|
||||
case TM_INVERSE: FuncNormal_Inverse(x0, x1, thread); break;
|
||||
case TM_ALPHATEXTURE: FuncNormal_AlphaTexture(x0, x1, thread); break;
|
||||
case TM_CLAMPY: FuncNormal_ClampY(x0, x1, thread); break;
|
||||
case TM_INVERTOPAQUE: FuncNormal_InvertOpaque(x0, x1, thread); break;
|
||||
}
|
||||
|
||||
if (constants->uTextureMode != TM_FOGLAYER)
|
||||
{
|
||||
auto& streamdata = thread->mainVertexShader.Data;
|
||||
|
||||
if (streamdata.uAddColor.r != 0.0f || streamdata.uAddColor.g != 0.0f || streamdata.uAddColor.b != 0.0f)
|
||||
{
|
||||
FuncNormal_AddColor(x0, x1, thread);
|
||||
}
|
||||
|
||||
if (streamdata.uObjectColor2.a == 0.0f)
|
||||
{
|
||||
if (streamdata.uObjectColor.r != 1.0f || streamdata.uObjectColor.g != 1.0f || streamdata.uObjectColor.b != 1.0f)
|
||||
{
|
||||
FuncNormal_AddObjectColor(x0, x1, thread);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
FuncNormal_AddObjectColor2(x0, x1, thread);
|
||||
}
|
||||
|
||||
if (streamdata.uDesaturationFactor > 0.0f)
|
||||
{
|
||||
FuncNormal_DesaturationFactor(x0, x1, thread);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (thread->AlphaTest)
|
||||
RunAlphaTest(x0, x1, thread);
|
||||
|
||||
ApplyVertexColor(x0, x1, thread);
|
||||
|
||||
auto constants = thread->PushConstants;
|
||||
uint32_t* fragcolor = thread->scanline.FragColor;
|
||||
if (constants->uLightLevel >= 0.0f && thread->numPolyLights > 0)
|
||||
{
|
||||
uint16_t* lightarray = thread->scanline.lightarray;
|
||||
uint32_t* dynlights = thread->scanline.dynlights;
|
||||
for (int x = x0; x < x1; x++)
|
||||
{
|
||||
uint32_t fg = fragcolor[x];
|
||||
int lightshade = lightarray[x];
|
||||
uint32_t dynlight = dynlights[x];
|
||||
|
||||
uint32_t a = APART(fg);
|
||||
uint32_t r = MIN((RPART(fg) * (lightshade + RPART(dynlight))) >> 8, (uint32_t)255);
|
||||
uint32_t g = MIN((GPART(fg) * (lightshade + GPART(dynlight))) >> 8, (uint32_t)255);
|
||||
uint32_t b = MIN((BPART(fg) * (lightshade + BPART(dynlight))) >> 8, (uint32_t)255);
|
||||
|
||||
fragcolor[x] = MAKEARGB(a, r, g, b);
|
||||
}
|
||||
}
|
||||
else if (constants->uLightLevel >= 0.0f)
|
||||
{
|
||||
uint16_t* lightarray = thread->scanline.lightarray;
|
||||
for (int x = x0; x < x1; x++)
|
||||
{
|
||||
uint32_t fg = fragcolor[x];
|
||||
int lightshade = lightarray[x];
|
||||
|
||||
uint32_t a = APART(fg);
|
||||
uint32_t r = (RPART(fg) * lightshade) >> 8;
|
||||
uint32_t g = (GPART(fg) * lightshade) >> 8;
|
||||
uint32_t b = (BPART(fg) * lightshade) >> 8;
|
||||
|
||||
fragcolor[x] = MAKEARGB(a, r, g, b);
|
||||
}
|
||||
|
||||
// To do: apply fog
|
||||
}
|
||||
else if (thread->numPolyLights > 0)
|
||||
{
|
||||
uint32_t* dynlights = thread->scanline.dynlights;
|
||||
for (int x = x0; x < x1; x++)
|
||||
{
|
||||
uint32_t fg = fragcolor[x];
|
||||
uint32_t dynlight = dynlights[x];
|
||||
|
||||
uint32_t a = APART(fg);
|
||||
uint32_t r = MIN((RPART(fg) * RPART(dynlight)) >> 8, (uint32_t)255);
|
||||
uint32_t g = MIN((GPART(fg) * GPART(dynlight)) >> 8, (uint32_t)255);
|
||||
uint32_t b = MIN((BPART(fg) * BPART(dynlight)) >> 8, (uint32_t)255);
|
||||
|
||||
fragcolor[x] = MAKEARGB(a, r, g, b);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void SelectFragmentShader(PolyTriangleThreadData* thread)
|
||||
{
|
||||
void (*fragshader)(int x0, int x1, PolyTriangleThreadData * thread);
|
||||
|
||||
if (thread->SpecialEffect == EFF_FOGBOUNDARY) // fogboundary.fp
|
||||
{
|
||||
fragshader = &EffectFogBoundary;
|
||||
}
|
||||
else if (thread->SpecialEffect == EFF_BURN) // burn.fp
|
||||
{
|
||||
fragshader = &EffectBurn;
|
||||
}
|
||||
else if (thread->SpecialEffect == EFF_STENCIL) // stencil.fp
|
||||
{
|
||||
fragshader = &EffectStencil;
|
||||
}
|
||||
else
|
||||
{
|
||||
fragshader = &MainFP;
|
||||
}
|
||||
|
||||
thread->FragmentShader = fragshader;
|
||||
}
|
27
src/rendering/polyrenderer/drawers/screen_shader.h
Normal file
27
src/rendering/polyrenderer/drawers/screen_shader.h
Normal file
|
@ -0,0 +1,27 @@
|
|||
/*
|
||||
** Polygon Doom software renderer
|
||||
** Copyright (c) 2016 Magnus Norddahl
|
||||
**
|
||||
** This software is provided 'as-is', without any express or implied
|
||||
** warranty. In no event will the authors be held liable for any damages
|
||||
** arising from the use of this software.
|
||||
**
|
||||
** Permission is granted to anyone to use this software for any purpose,
|
||||
** including commercial applications, and to alter it and redistribute it
|
||||
** freely, subject to the following restrictions:
|
||||
**
|
||||
** 1. The origin of this software must not be misrepresented; you must not
|
||||
** claim that you wrote the original software. If you use this software
|
||||
** in a product, an acknowledgment in the product documentation would be
|
||||
** appreciated but is not required.
|
||||
** 2. Altered source versions must be plainly marked as such, and must not be
|
||||
** misrepresented as being the original software.
|
||||
** 3. This notice may not be removed or altered from any source distribution.
|
||||
**
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
class PolyTriangleThreadData;
|
||||
|
||||
void SelectFragmentShader(PolyTriangleThreadData* thread);
|
File diff suppressed because it is too large
Load diff
|
@ -126,13 +126,3 @@ struct TestSpanOpt0 { static const int Flags = 0; };
|
|||
struct TestSpanOpt1 { static const int Flags = 1; };
|
||||
struct TestSpanOpt2 { static const int Flags = 2; };
|
||||
struct TestSpanOpt3 { static const int Flags = 3; };
|
||||
|
||||
enum SWBlendColor
|
||||
{
|
||||
SWBLEND_Sub = 1,
|
||||
SWBLEND_RevSub = 2
|
||||
};
|
||||
|
||||
struct BlendColorOpt_Add { static const int Flags = 0; };
|
||||
struct BlendColorOpt_Sub { static const int Flags = 1; };
|
||||
struct BlendColorOpt_RevSub { static const int Flags = 2; };
|
||||
|
|
|
@ -1,4 +1,8 @@
|
|||
#include "../swrenderer/textures/r_swtexture.h"
|
||||
#include "drawers/poly_triangle.cpp"
|
||||
#include "drawers/poly_thread.cpp"
|
||||
#include "drawers/screen_triangle.cpp"
|
||||
#include "drawers/screen_scanline_setup.cpp"
|
||||
#include "drawers/screen_shader.cpp"
|
||||
#include "drawers/screen_blend.cpp"
|
||||
#include "math/gpu_types.cpp"
|
||||
|
|
|
@ -52,7 +52,7 @@
|
|||
#include "r_thread.h"
|
||||
#include "swrenderer/scene/r_light.h"
|
||||
#include "playsim/a_dynlight.h"
|
||||
#include "polyrenderer/drawers/poly_triangle.h"
|
||||
#include "polyrenderer/drawers/poly_thread.h"
|
||||
|
||||
CVAR(Bool, r_dynlights, 1, CVAR_ARCHIVE | CVAR_GLOBALCONFIG);
|
||||
CVAR(Bool, r_fuzzscale, 1, CVAR_ARCHIVE | CVAR_GLOBALCONFIG);
|
||||
|
|
Loading…
Reference in a new issue