mirror of
https://github.com/ZDoom/raze-gles.git
synced 2025-01-25 08:21:14 +00:00
818 lines
24 KiB
C++
818 lines
24 KiB
C++
|
/*
|
||
|
** Polygon Doom software renderer
|
||
|
** Copyright (c) 2016 Magnus Norddahl
|
||
|
**
|
||
|
** This software is provided 'as-is', without any express or implied
|
||
|
** warranty. In no event will the authors be held liable for any damages
|
||
|
** arising from the use of this software.
|
||
|
**
|
||
|
** Permission is granted to anyone to use this software for any purpose,
|
||
|
** including commercial applications, and to alter it and redistribute it
|
||
|
** freely, subject to the following restrictions:
|
||
|
**
|
||
|
** 1. The origin of this software must not be misrepresented; you must not
|
||
|
** claim that you wrote the original software. If you use this software
|
||
|
** in a product, an acknowledgment in the product documentation would be
|
||
|
** appreciated but is not required.
|
||
|
** 2. Altered source versions must be plainly marked as such, and must not be
|
||
|
** misrepresented as being the original software.
|
||
|
** 3. This notice may not be removed or altered from any source distribution.
|
||
|
**
|
||
|
*/
|
||
|
|
||
|
#include <stddef.h>
|
||
|
#include "templates.h"
|
||
|
|
||
|
#include "filesystem.h"
|
||
|
#include "v_video.h"
|
||
|
#include "model.h"
|
||
|
#include "poly_thread.h"
|
||
|
#include "screen_triangle.h"
|
||
|
#include "x86.h"
|
||
|
|
||
|
#ifndef NO_SSE
|
||
|
#include <immintrin.h>
|
||
|
#endif
|
||
|
|
||
|
PolyTriangleThreadData::PolyTriangleThreadData(int32_t core, int32_t num_cores, int32_t numa_node, int32_t num_numa_nodes, int numa_start_y, int numa_end_y)
|
||
|
: core(core), num_cores(num_cores), numa_node(numa_node), num_numa_nodes(num_numa_nodes), numa_start_y(numa_start_y), numa_end_y(numa_end_y)
|
||
|
{
|
||
|
}
|
||
|
|
||
|
void PolyTriangleThreadData::ClearDepth(float value)
|
||
|
{
|
||
|
int width = depthstencil->Width();
|
||
|
int height = depthstencil->Height();
|
||
|
float *data = depthstencil->DepthValues();
|
||
|
|
||
|
int skip = skipped_by_thread(0);
|
||
|
int count = count_for_thread(0, height);
|
||
|
|
||
|
data += skip * width;
|
||
|
for (int i = 0; i < count; i++)
|
||
|
{
|
||
|
for (int x = 0; x < width; x++)
|
||
|
data[x] = value;
|
||
|
data += num_cores * width;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
void PolyTriangleThreadData::ClearStencil(uint8_t value)
|
||
|
{
|
||
|
int width = depthstencil->Width();
|
||
|
int height = depthstencil->Height();
|
||
|
uint8_t *data = depthstencil->StencilValues();
|
||
|
|
||
|
int skip = skipped_by_thread(0);
|
||
|
int count = count_for_thread(0, height);
|
||
|
|
||
|
data += skip * width;
|
||
|
for (int i = 0; i < count; i++)
|
||
|
{
|
||
|
memset(data, value, width);
|
||
|
data += num_cores * width;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
void PolyTriangleThreadData::SetViewport(int x, int y, int width, int height, uint8_t *new_dest, int new_dest_width, int new_dest_height, int new_dest_pitch, bool new_dest_bgra, PolyDepthStencil *new_depthstencil, bool new_topdown)
|
||
|
{
|
||
|
viewport_x = x;
|
||
|
viewport_y = y;
|
||
|
viewport_width = width;
|
||
|
viewport_height = height;
|
||
|
dest = new_dest;
|
||
|
dest_width = new_dest_width;
|
||
|
dest_height = new_dest_height;
|
||
|
dest_pitch = new_dest_pitch;
|
||
|
dest_bgra = new_dest_bgra;
|
||
|
depthstencil = new_depthstencil;
|
||
|
topdown = new_topdown;
|
||
|
UpdateClip();
|
||
|
}
|
||
|
|
||
|
void PolyTriangleThreadData::SetScissor(int x, int y, int w, int h)
|
||
|
{
|
||
|
scissor.left = x;
|
||
|
scissor.right = x + w;
|
||
|
scissor.top = y;
|
||
|
scissor.bottom = y + h;
|
||
|
UpdateClip();
|
||
|
}
|
||
|
|
||
|
void PolyTriangleThreadData::UpdateClip()
|
||
|
{
|
||
|
clip.left = MAX(MAX(viewport_x, scissor.left), 0);
|
||
|
clip.top = MAX(MAX(viewport_y, scissor.top), 0);
|
||
|
clip.right = MIN(MIN(viewport_x + viewport_width, scissor.right), dest_width);
|
||
|
clip.bottom = MIN(MIN(viewport_y + viewport_height, scissor.bottom), dest_height);
|
||
|
}
|
||
|
|
||
|
void PolyTriangleThreadData::PushStreamData(const StreamData &data, const PolyPushConstants &constants)
|
||
|
{
|
||
|
mainVertexShader.Data = data;
|
||
|
mainVertexShader.uClipSplit = constants.uClipSplit;
|
||
|
|
||
|
PushConstants = &constants;
|
||
|
|
||
|
AlphaThreshold = clamp((int)(PushConstants->uAlphaThreshold * 255.0f + 0.5f), 0, 255) << 24;
|
||
|
|
||
|
numPolyLights = 0;
|
||
|
if (constants.uLightIndex >= 0)
|
||
|
{
|
||
|
const FVector4 &lightRange = lights[constants.uLightIndex];
|
||
|
static_assert(sizeof(FVector4) == 16, "sizeof(FVector4) is not 16 bytes");
|
||
|
if (lightRange.Y > lightRange.X)
|
||
|
{
|
||
|
int start = constants.uLightIndex + 1;
|
||
|
int modulatedStart = static_cast<int>(lightRange.X) + start;
|
||
|
int modulatedEnd = static_cast<int>(lightRange.Y) + start;
|
||
|
for (int i = modulatedStart; i < modulatedEnd; i += 4)
|
||
|
{
|
||
|
if (numPolyLights == maxPolyLights)
|
||
|
break;
|
||
|
|
||
|
auto &lightpos = lights[i];
|
||
|
auto &lightcolor = lights[i + 1];
|
||
|
//auto &lightspot1 = lights[i + 2];
|
||
|
//auto &lightspot2 = lights[i + 3];
|
||
|
uint32_t r = (int)clamp(lightcolor.X * 255.0f, 0.0f, 255.0f);
|
||
|
uint32_t g = (int)clamp(lightcolor.Y * 255.0f, 0.0f, 255.0f);
|
||
|
uint32_t b = (int)clamp(lightcolor.Z * 255.0f, 0.0f, 255.0f);
|
||
|
|
||
|
auto& polylight = polyLights[numPolyLights++];
|
||
|
polylight.x = lightpos.X;
|
||
|
polylight.y = lightpos.Y;
|
||
|
polylight.z = lightpos.Z;
|
||
|
polylight.radius = 256.0f / lightpos.W;
|
||
|
polylight.color = (r << 16) | (g << 8) | b;
|
||
|
if (lightcolor.W < 0.0f)
|
||
|
polylight.radius = -polylight.radius;
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
void PolyTriangleThreadData::PushMatrices(const VSMatrix &modelMatrix, const VSMatrix &normalModelMatrix, const VSMatrix &textureMatrix)
|
||
|
{
|
||
|
mainVertexShader.ModelMatrix = modelMatrix;
|
||
|
mainVertexShader.NormalModelMatrix = normalModelMatrix;
|
||
|
mainVertexShader.TextureMatrix = textureMatrix;
|
||
|
}
|
||
|
|
||
|
void PolyTriangleThreadData::SetViewpointUniforms(const HWViewpointUniforms *uniforms)
|
||
|
{
|
||
|
mainVertexShader.Viewpoint = uniforms;
|
||
|
}
|
||
|
|
||
|
void PolyTriangleThreadData::SetDepthClamp(bool on)
|
||
|
{
|
||
|
}
|
||
|
|
||
|
void PolyTriangleThreadData::SetDepthMask(bool on)
|
||
|
{
|
||
|
WriteDepth = on;
|
||
|
}
|
||
|
|
||
|
void PolyTriangleThreadData::SetDepthFunc(int func)
|
||
|
{
|
||
|
if (func == DF_LEqual || func == DF_Less)
|
||
|
{
|
||
|
DepthTest = true;
|
||
|
}
|
||
|
else // if (func == DF_Always)
|
||
|
{
|
||
|
DepthTest = false;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
void PolyTriangleThreadData::SetDepthRange(float min, float max)
|
||
|
{
|
||
|
// The only two variants used by hwrenderer layer
|
||
|
if (min == 0.0f && max == 1.0f)
|
||
|
{
|
||
|
}
|
||
|
else if (min == 1.0f && max == 1.0f)
|
||
|
{
|
||
|
}
|
||
|
}
|
||
|
|
||
|
void PolyTriangleThreadData::SetDepthBias(float depthBiasConstantFactor, float depthBiasSlopeFactor)
|
||
|
{
|
||
|
depthbias = (float)(depthBiasConstantFactor / 2500.0);
|
||
|
}
|
||
|
|
||
|
void PolyTriangleThreadData::SetColorMask(bool r, bool g, bool b, bool a)
|
||
|
{
|
||
|
WriteColor = r;
|
||
|
}
|
||
|
|
||
|
void PolyTriangleThreadData::SetStencil(int stencilRef, int op)
|
||
|
{
|
||
|
StencilTestValue = stencilRef;
|
||
|
if (op == SOP_Increment)
|
||
|
{
|
||
|
WriteStencil = StencilTest;
|
||
|
StencilWriteValue = MIN(stencilRef + 1, (int)255);
|
||
|
}
|
||
|
else if (op == SOP_Decrement)
|
||
|
{
|
||
|
WriteStencil = StencilTest;
|
||
|
StencilWriteValue = MAX(stencilRef - 1, (int)0);
|
||
|
}
|
||
|
else // SOP_Keep
|
||
|
{
|
||
|
WriteStencil = false;
|
||
|
StencilWriteValue = stencilRef;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
void PolyTriangleThreadData::SetCulling(int mode)
|
||
|
{
|
||
|
SetTwoSided(mode == Cull_None);
|
||
|
SetCullCCW(mode == Cull_CCW);
|
||
|
}
|
||
|
|
||
|
void PolyTriangleThreadData::EnableStencil(bool on)
|
||
|
{
|
||
|
StencilTest = on;
|
||
|
WriteStencil = on && (StencilTestValue != StencilWriteValue);
|
||
|
}
|
||
|
|
||
|
void PolyTriangleThreadData::SetRenderStyle(FRenderStyle style)
|
||
|
{
|
||
|
RenderStyle = style;
|
||
|
}
|
||
|
|
||
|
void PolyTriangleThreadData::SetShader(int specialEffect, int effectState, bool alphaTest, bool colormapShader)
|
||
|
{
|
||
|
SpecialEffect = specialEffect;
|
||
|
EffectState = effectState;
|
||
|
AlphaTest = alphaTest;
|
||
|
ColormapShader = colormapShader;
|
||
|
}
|
||
|
|
||
|
void PolyTriangleThreadData::SetTexture(int unit, const void *pixels, int width, int height, bool bgra)
|
||
|
{
|
||
|
textures[unit].pixels = pixels;
|
||
|
textures[unit].width = width;
|
||
|
textures[unit].height = height;
|
||
|
textures[unit].bgra = bgra;
|
||
|
}
|
||
|
|
||
|
void PolyTriangleThreadData::DrawIndexed(int index, int vcount, PolyDrawMode drawmode)
|
||
|
{
|
||
|
if (vcount < 3)
|
||
|
return;
|
||
|
|
||
|
elements += index;
|
||
|
|
||
|
ShadedTriVertex vertbuffer[3];
|
||
|
ShadedTriVertex *vert[3] = { &vertbuffer[0], &vertbuffer[1], &vertbuffer[2] };
|
||
|
if (drawmode == PolyDrawMode::Triangles)
|
||
|
{
|
||
|
for (int i = 0; i < vcount / 3; i++)
|
||
|
{
|
||
|
for (int j = 0; j < 3; j++)
|
||
|
*vert[j] = ShadeVertex(*(elements++));
|
||
|
DrawShadedTriangle(vert, ccw);
|
||
|
}
|
||
|
}
|
||
|
else if (drawmode == PolyDrawMode::TriangleFan)
|
||
|
{
|
||
|
*vert[0] = ShadeVertex(*(elements++));
|
||
|
*vert[1] = ShadeVertex(*(elements++));
|
||
|
for (int i = 2; i < vcount; i++)
|
||
|
{
|
||
|
*vert[2] = ShadeVertex(*(elements++));
|
||
|
DrawShadedTriangle(vert, ccw);
|
||
|
std::swap(vert[1], vert[2]);
|
||
|
}
|
||
|
}
|
||
|
else if (drawmode == PolyDrawMode::TriangleStrip)
|
||
|
{
|
||
|
bool toggleccw = ccw;
|
||
|
*vert[0] = ShadeVertex(*(elements++));
|
||
|
*vert[1] = ShadeVertex(*(elements++));
|
||
|
for (int i = 2; i < vcount; i++)
|
||
|
{
|
||
|
*vert[2] = ShadeVertex(*(elements++));
|
||
|
DrawShadedTriangle(vert, toggleccw);
|
||
|
ShadedTriVertex *vtmp = vert[0];
|
||
|
vert[0] = vert[1];
|
||
|
vert[1] = vert[2];
|
||
|
vert[2] = vtmp;
|
||
|
toggleccw = !toggleccw;
|
||
|
}
|
||
|
}
|
||
|
else if (drawmode == PolyDrawMode::Lines)
|
||
|
{
|
||
|
for (int i = 0; i < vcount / 2; i++)
|
||
|
{
|
||
|
*vert[0] = ShadeVertex(*(elements++));
|
||
|
*vert[1] = ShadeVertex(*(elements++));
|
||
|
DrawShadedLine(vert);
|
||
|
}
|
||
|
}
|
||
|
else if (drawmode == PolyDrawMode::Points)
|
||
|
{
|
||
|
for (int i = 0; i < vcount; i++)
|
||
|
{
|
||
|
*vert[0] = ShadeVertex(*(elements++));
|
||
|
DrawShadedPoint(vert);
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
void PolyTriangleThreadData::Draw(int index, int vcount, PolyDrawMode drawmode)
|
||
|
{
|
||
|
if (vcount < 3)
|
||
|
return;
|
||
|
|
||
|
int vinput = index;
|
||
|
|
||
|
ShadedTriVertex vertbuffer[3];
|
||
|
ShadedTriVertex *vert[3] = { &vertbuffer[0], &vertbuffer[1], &vertbuffer[2] };
|
||
|
if (drawmode == PolyDrawMode::Triangles)
|
||
|
{
|
||
|
for (int i = 0; i < vcount / 3; i++)
|
||
|
{
|
||
|
for (int j = 0; j < 3; j++)
|
||
|
*vert[j] = ShadeVertex(vinput++);
|
||
|
DrawShadedTriangle(vert, ccw);
|
||
|
}
|
||
|
}
|
||
|
else if (drawmode == PolyDrawMode::TriangleFan)
|
||
|
{
|
||
|
*vert[0] = ShadeVertex(vinput++);
|
||
|
*vert[1] = ShadeVertex(vinput++);
|
||
|
for (int i = 2; i < vcount; i++)
|
||
|
{
|
||
|
*vert[2] = ShadeVertex(vinput++);
|
||
|
DrawShadedTriangle(vert, ccw);
|
||
|
std::swap(vert[1], vert[2]);
|
||
|
}
|
||
|
}
|
||
|
else if (drawmode == PolyDrawMode::TriangleStrip)
|
||
|
{
|
||
|
bool toggleccw = ccw;
|
||
|
*vert[0] = ShadeVertex(vinput++);
|
||
|
*vert[1] = ShadeVertex(vinput++);
|
||
|
for (int i = 2; i < vcount; i++)
|
||
|
{
|
||
|
*vert[2] = ShadeVertex(vinput++);
|
||
|
DrawShadedTriangle(vert, toggleccw);
|
||
|
ShadedTriVertex *vtmp = vert[0];
|
||
|
vert[0] = vert[1];
|
||
|
vert[1] = vert[2];
|
||
|
vert[2] = vtmp;
|
||
|
toggleccw = !toggleccw;
|
||
|
}
|
||
|
}
|
||
|
else if (drawmode == PolyDrawMode::Lines)
|
||
|
{
|
||
|
for (int i = 0; i < vcount / 2; i++)
|
||
|
{
|
||
|
*vert[0] = ShadeVertex(vinput++);
|
||
|
*vert[1] = ShadeVertex(vinput++);
|
||
|
DrawShadedLine(vert);
|
||
|
}
|
||
|
}
|
||
|
else if (drawmode == PolyDrawMode::Points)
|
||
|
{
|
||
|
for (int i = 0; i < vcount; i++)
|
||
|
{
|
||
|
*vert[0] = ShadeVertex(vinput++);
|
||
|
DrawShadedPoint(vert);
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
ShadedTriVertex PolyTriangleThreadData::ShadeVertex(int index)
|
||
|
{
|
||
|
inputAssembly->Load(this, vertices, index);
|
||
|
mainVertexShader.SIMPLE = (SpecialEffect == EFF_BURN) || (SpecialEffect == EFF_STENCIL);
|
||
|
mainVertexShader.SPHEREMAP = (SpecialEffect == EFF_SPHEREMAP);
|
||
|
mainVertexShader.main();
|
||
|
return mainVertexShader;
|
||
|
}
|
||
|
|
||
|
bool PolyTriangleThreadData::IsDegenerate(const ShadedTriVertex *const* vert)
|
||
|
{
|
||
|
// A degenerate triangle has a zero cross product for two of its sides.
|
||
|
float ax = vert[1]->gl_Position.X - vert[0]->gl_Position.X;
|
||
|
float ay = vert[1]->gl_Position.Y - vert[0]->gl_Position.Y;
|
||
|
float az = vert[1]->gl_Position.W - vert[0]->gl_Position.W;
|
||
|
float bx = vert[2]->gl_Position.X - vert[0]->gl_Position.X;
|
||
|
float by = vert[2]->gl_Position.Y - vert[0]->gl_Position.Y;
|
||
|
float bz = vert[2]->gl_Position.W - vert[0]->gl_Position.W;
|
||
|
float crossx = ay * bz - az * by;
|
||
|
float crossy = az * bx - ax * bz;
|
||
|
float crossz = ax * by - ay * bx;
|
||
|
float crosslengthsqr = crossx * crossx + crossy * crossy + crossz * crossz;
|
||
|
return crosslengthsqr <= 1.e-8f;
|
||
|
}
|
||
|
|
||
|
bool PolyTriangleThreadData::IsFrontfacing(TriDrawTriangleArgs *args)
|
||
|
{
|
||
|
float a =
|
||
|
args->v1->x * args->v2->y - args->v2->x * args->v1->y +
|
||
|
args->v2->x * args->v3->y - args->v3->x * args->v2->y +
|
||
|
args->v3->x * args->v1->y - args->v1->x * args->v3->y;
|
||
|
return a <= 0.0f;
|
||
|
}
|
||
|
|
||
|
void PolyTriangleThreadData::DrawShadedPoint(const ShadedTriVertex *const* vertex)
|
||
|
{
|
||
|
}
|
||
|
|
||
|
void PolyTriangleThreadData::DrawShadedLine(const ShadedTriVertex *const* vert)
|
||
|
{
|
||
|
static const int numclipdistances = 9;
|
||
|
float clipdistance[numclipdistances * 2];
|
||
|
float *clipd = clipdistance;
|
||
|
for (int i = 0; i < 2; i++)
|
||
|
{
|
||
|
const auto &v = *vert[i];
|
||
|
clipd[0] = v.gl_Position.X + v.gl_Position.W;
|
||
|
clipd[1] = v.gl_Position.W - v.gl_Position.X;
|
||
|
clipd[2] = v.gl_Position.Y + v.gl_Position.W;
|
||
|
clipd[3] = v.gl_Position.W - v.gl_Position.Y;
|
||
|
clipd[4] = v.gl_Position.Z + v.gl_Position.W;
|
||
|
clipd[5] = v.gl_Position.W - v.gl_Position.Z;
|
||
|
clipd[6] = v.gl_ClipDistance[0];
|
||
|
clipd[7] = v.gl_ClipDistance[1];
|
||
|
clipd[8] = v.gl_ClipDistance[2];
|
||
|
clipd += numclipdistances;
|
||
|
}
|
||
|
|
||
|
float t1 = 0.0f;
|
||
|
float t2 = 1.0f;
|
||
|
for (int p = 0; p < numclipdistances; p++)
|
||
|
{
|
||
|
float clipdistance1 = clipdistance[0 * numclipdistances + p];
|
||
|
float clipdistance2 = clipdistance[1 * numclipdistances + p];
|
||
|
if (clipdistance1 < 0.0f) t1 = MAX(-clipdistance1 / (clipdistance2 - clipdistance1), t1);
|
||
|
if (clipdistance2 < 0.0f) t2 = MIN(1.0f + clipdistance2 / (clipdistance1 - clipdistance2), t2);
|
||
|
if (t1 >= t2)
|
||
|
return;
|
||
|
}
|
||
|
|
||
|
float weights[] = { 1.0f - t1, t1, 1.0f - t2, t2 };
|
||
|
|
||
|
ScreenTriVertex clippedvert[2];
|
||
|
for (int i = 0; i < 2; i++)
|
||
|
{
|
||
|
auto &v = clippedvert[i];
|
||
|
memset(&v, 0, sizeof(ScreenTriVertex));
|
||
|
for (int w = 0; w < 2; w++)
|
||
|
{
|
||
|
float weight = weights[i * 2 + w];
|
||
|
v.x += vert[w]->gl_Position.X * weight;
|
||
|
v.y += vert[w]->gl_Position.Y * weight;
|
||
|
v.z += vert[w]->gl_Position.Z * weight;
|
||
|
v.w += vert[w]->gl_Position.W * weight;
|
||
|
}
|
||
|
|
||
|
// Calculate normalized device coordinates:
|
||
|
v.w = 1.0f / v.w;
|
||
|
v.x *= v.w;
|
||
|
v.y *= v.w;
|
||
|
v.z *= v.w;
|
||
|
|
||
|
// Apply viewport scale to get screen coordinates:
|
||
|
v.x = viewport_x + viewport_width * (1.0f + v.x) * 0.5f;
|
||
|
if (topdown)
|
||
|
v.y = viewport_y + viewport_height * (1.0f - v.y) * 0.5f;
|
||
|
else
|
||
|
v.y = viewport_y + viewport_height * (1.0f + v.y) * 0.5f;
|
||
|
}
|
||
|
|
||
|
uint32_t vColorA = (int)(vert[0]->vColor.W * 255.0f + 0.5f);
|
||
|
uint32_t vColorR = (int)(vert[0]->vColor.X * 255.0f + 0.5f);
|
||
|
uint32_t vColorG = (int)(vert[0]->vColor.Y * 255.0f + 0.5f);
|
||
|
uint32_t vColorB = (int)(vert[0]->vColor.Z * 255.0f + 0.5f);
|
||
|
uint32_t color = MAKEARGB(vColorA, vColorR, vColorG, vColorB);
|
||
|
|
||
|
// Slow and naive implementation. Hopefully fast enough..
|
||
|
|
||
|
float x1 = clippedvert[0].x;
|
||
|
float y1 = clippedvert[0].y;
|
||
|
float x2 = clippedvert[1].x;
|
||
|
float y2 = clippedvert[1].y;
|
||
|
float dx = x2 - x1;
|
||
|
float dy = y2 - y1;
|
||
|
float step = (abs(dx) >= abs(dy)) ? abs(dx) : abs(dy);
|
||
|
dx /= step;
|
||
|
dy /= step;
|
||
|
float x = x1;
|
||
|
float y = y1;
|
||
|
int istep = (int)step;
|
||
|
int pixelsize = dest_bgra ? 4 : 1;
|
||
|
for (int i = 0; i <= istep; i++)
|
||
|
{
|
||
|
int scrx = (int)x;
|
||
|
int scry = (int)y;
|
||
|
if (scrx >= clip.left && scrx < clip.right && scry >= clip.top && scry < clip.bottom && !line_skipped_by_thread(scry))
|
||
|
{
|
||
|
uint8_t *destpixel = dest + (scrx + scry * dest_width) * pixelsize;
|
||
|
if (pixelsize == 4)
|
||
|
{
|
||
|
*reinterpret_cast<uint32_t*>(destpixel) = color;
|
||
|
}
|
||
|
else
|
||
|
{
|
||
|
*destpixel = color;
|
||
|
}
|
||
|
}
|
||
|
x += dx;
|
||
|
y += dy;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
void PolyTriangleThreadData::DrawShadedTriangle(const ShadedTriVertex *const* vert, bool ccw)
|
||
|
{
|
||
|
// Reject triangle if degenerate
|
||
|
if (IsDegenerate(vert))
|
||
|
return;
|
||
|
|
||
|
// Cull, clip and generate additional vertices as needed
|
||
|
ScreenTriVertex clippedvert[max_additional_vertices];
|
||
|
int numclipvert = ClipEdge(vert);
|
||
|
|
||
|
// Convert barycentric weights to actual vertices
|
||
|
for (int i = 0; i < numclipvert; i++)
|
||
|
{
|
||
|
auto &v = clippedvert[i];
|
||
|
memset(&v, 0, sizeof(ScreenTriVertex));
|
||
|
for (int w = 0; w < 3; w++)
|
||
|
{
|
||
|
float weight = weights[i * 3 + w];
|
||
|
v.x += vert[w]->gl_Position.X * weight;
|
||
|
v.y += vert[w]->gl_Position.Y * weight;
|
||
|
v.z += vert[w]->gl_Position.Z * weight;
|
||
|
v.w += vert[w]->gl_Position.W * weight;
|
||
|
v.u += vert[w]->vTexCoord.X * weight;
|
||
|
v.v += vert[w]->vTexCoord.Y * weight;
|
||
|
v.worldX += vert[w]->pixelpos.X * weight;
|
||
|
v.worldY += vert[w]->pixelpos.Y * weight;
|
||
|
v.worldZ += vert[w]->pixelpos.Z * weight;
|
||
|
v.a += vert[w]->vColor.W * weight;
|
||
|
v.r += vert[w]->vColor.X * weight;
|
||
|
v.g += vert[w]->vColor.Y * weight;
|
||
|
v.b += vert[w]->vColor.Z * weight;
|
||
|
v.gradientdistZ += vert[w]->gradientdist.Z * weight;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
#ifdef NO_SSE
|
||
|
// Map to 2D viewport:
|
||
|
for (int j = 0; j < numclipvert; j++)
|
||
|
{
|
||
|
auto &v = clippedvert[j];
|
||
|
|
||
|
// Calculate normalized device coordinates:
|
||
|
v.w = 1.0f / v.w;
|
||
|
v.x *= v.w;
|
||
|
v.y *= v.w;
|
||
|
v.z *= v.w;
|
||
|
|
||
|
// Apply viewport scale to get screen coordinates:
|
||
|
v.x = viewport_x + viewport_width * (1.0f + v.x) * 0.5f;
|
||
|
if (topdown)
|
||
|
v.y = viewport_y + viewport_height * (1.0f - v.y) * 0.5f;
|
||
|
else
|
||
|
v.y = viewport_y + viewport_height * (1.0f + v.y) * 0.5f;
|
||
|
}
|
||
|
#else
|
||
|
// Map to 2D viewport:
|
||
|
__m128 mviewport_x = _mm_set1_ps((float)viewport_x);
|
||
|
__m128 mviewport_y = _mm_set1_ps((float)viewport_y);
|
||
|
__m128 mviewport_halfwidth = _mm_set1_ps(viewport_width * 0.5f);
|
||
|
__m128 mviewport_halfheight = _mm_set1_ps(viewport_height * 0.5f);
|
||
|
__m128 mone = _mm_set1_ps(1.0f);
|
||
|
int sse_length = (numclipvert + 3) / 4 * 4;
|
||
|
for (int j = 0; j < sse_length; j += 4)
|
||
|
{
|
||
|
__m128 vx = _mm_loadu_ps(&clippedvert[j].x);
|
||
|
__m128 vy = _mm_loadu_ps(&clippedvert[j + 1].x);
|
||
|
__m128 vz = _mm_loadu_ps(&clippedvert[j + 2].x);
|
||
|
__m128 vw = _mm_loadu_ps(&clippedvert[j + 3].x);
|
||
|
_MM_TRANSPOSE4_PS(vx, vy, vz, vw);
|
||
|
|
||
|
// Calculate normalized device coordinates:
|
||
|
vw = _mm_div_ps(mone, vw);
|
||
|
vx = _mm_mul_ps(vx, vw);
|
||
|
vy = _mm_mul_ps(vy, vw);
|
||
|
vz = _mm_mul_ps(vz, vw);
|
||
|
|
||
|
// Apply viewport scale to get screen coordinates:
|
||
|
vx = _mm_add_ps(mviewport_x, _mm_mul_ps(mviewport_halfwidth, _mm_add_ps(mone, vx)));
|
||
|
if (topdown)
|
||
|
vy = _mm_add_ps(mviewport_y, _mm_mul_ps(mviewport_halfheight, _mm_sub_ps(mone, vy)));
|
||
|
else
|
||
|
vy = _mm_add_ps(mviewport_y, _mm_mul_ps(mviewport_halfheight, _mm_add_ps(mone, vy)));
|
||
|
|
||
|
_MM_TRANSPOSE4_PS(vx, vy, vz, vw);
|
||
|
_mm_storeu_ps(&clippedvert[j].x, vx);
|
||
|
_mm_storeu_ps(&clippedvert[j + 1].x, vy);
|
||
|
_mm_storeu_ps(&clippedvert[j + 2].x, vz);
|
||
|
_mm_storeu_ps(&clippedvert[j + 3].x, vw);
|
||
|
}
|
||
|
#endif
|
||
|
|
||
|
if (!topdown) ccw = !ccw;
|
||
|
|
||
|
TriDrawTriangleArgs args;
|
||
|
|
||
|
if (twosided && numclipvert > 2)
|
||
|
{
|
||
|
args.v1 = &clippedvert[0];
|
||
|
args.v2 = &clippedvert[1];
|
||
|
args.v3 = &clippedvert[2];
|
||
|
ccw = !IsFrontfacing(&args);
|
||
|
}
|
||
|
|
||
|
// Draw screen triangles
|
||
|
if (ccw)
|
||
|
{
|
||
|
for (int i = numclipvert - 1; i > 1; i--)
|
||
|
{
|
||
|
args.v1 = &clippedvert[numclipvert - 1];
|
||
|
args.v2 = &clippedvert[i - 1];
|
||
|
args.v3 = &clippedvert[i - 2];
|
||
|
if (IsFrontfacing(&args) == ccw && args.CalculateGradients())
|
||
|
{
|
||
|
ScreenTriangle::Draw(&args, this);
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
else
|
||
|
{
|
||
|
for (int i = 2; i < numclipvert; i++)
|
||
|
{
|
||
|
args.v1 = &clippedvert[0];
|
||
|
args.v2 = &clippedvert[i - 1];
|
||
|
args.v3 = &clippedvert[i];
|
||
|
if (IsFrontfacing(&args) != ccw && args.CalculateGradients())
|
||
|
{
|
||
|
ScreenTriangle::Draw(&args, this);
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
int PolyTriangleThreadData::ClipEdge(const ShadedTriVertex *const* verts)
|
||
|
{
|
||
|
// use barycentric weights for clipped vertices
|
||
|
weights = weightsbuffer;
|
||
|
for (int i = 0; i < 3; i++)
|
||
|
{
|
||
|
weights[i * 3 + 0] = 0.0f;
|
||
|
weights[i * 3 + 1] = 0.0f;
|
||
|
weights[i * 3 + 2] = 0.0f;
|
||
|
weights[i * 3 + i] = 1.0f;
|
||
|
}
|
||
|
|
||
|
// Clip and cull so that the following is true for all vertices:
|
||
|
// -v.w <= v.x <= v.w
|
||
|
// -v.w <= v.y <= v.w
|
||
|
// -v.w <= v.z <= v.w
|
||
|
|
||
|
// halfspace clip distances
|
||
|
static const int numclipdistances = 9;
|
||
|
#ifdef NO_SSE
|
||
|
float clipdistance[numclipdistances * 3];
|
||
|
bool needsclipping = false;
|
||
|
float *clipd = clipdistance;
|
||
|
for (int i = 0; i < 3; i++)
|
||
|
{
|
||
|
const auto &v = *verts[i];
|
||
|
clipd[0] = v.gl_Position.X + v.gl_Position.W;
|
||
|
clipd[1] = v.gl_Position.W - v.gl_Position.X;
|
||
|
clipd[2] = v.gl_Position.Y + v.gl_Position.W;
|
||
|
clipd[3] = v.gl_Position.W - v.gl_Position.Y;
|
||
|
clipd[4] = v.gl_Position.Z + v.gl_Position.W;
|
||
|
clipd[5] = v.gl_Position.W - v.gl_Position.Z;
|
||
|
clipd[6] = v.gl_ClipDistance[0];
|
||
|
clipd[7] = v.gl_ClipDistance[1];
|
||
|
clipd[8] = v.gl_ClipDistance[2];
|
||
|
for (int j = 0; j < 9; j++)
|
||
|
needsclipping = needsclipping || clipd[i];
|
||
|
clipd += numclipdistances;
|
||
|
}
|
||
|
|
||
|
// If all halfspace clip distances are positive then the entire triangle is visible. Skip the expensive clipping step.
|
||
|
if (!needsclipping)
|
||
|
{
|
||
|
return 3;
|
||
|
}
|
||
|
#else
|
||
|
__m128 mx = _mm_loadu_ps(&verts[0]->gl_Position.X);
|
||
|
__m128 my = _mm_loadu_ps(&verts[1]->gl_Position.X);
|
||
|
__m128 mz = _mm_loadu_ps(&verts[2]->gl_Position.X);
|
||
|
__m128 mw = _mm_setzero_ps();
|
||
|
_MM_TRANSPOSE4_PS(mx, my, mz, mw);
|
||
|
__m128 clipd0 = _mm_add_ps(mx, mw);
|
||
|
__m128 clipd1 = _mm_sub_ps(mw, mx);
|
||
|
__m128 clipd2 = _mm_add_ps(my, mw);
|
||
|
__m128 clipd3 = _mm_sub_ps(mw, my);
|
||
|
__m128 clipd4 = _mm_add_ps(mz, mw);
|
||
|
__m128 clipd5 = _mm_sub_ps(mw, mz);
|
||
|
__m128 clipd6 = _mm_setr_ps(verts[0]->gl_ClipDistance[0], verts[1]->gl_ClipDistance[0], verts[2]->gl_ClipDistance[0], 0.0f);
|
||
|
__m128 clipd7 = _mm_setr_ps(verts[0]->gl_ClipDistance[1], verts[1]->gl_ClipDistance[1], verts[2]->gl_ClipDistance[1], 0.0f);
|
||
|
__m128 clipd8 = _mm_setr_ps(verts[0]->gl_ClipDistance[2], verts[1]->gl_ClipDistance[2], verts[2]->gl_ClipDistance[2], 0.0f);
|
||
|
__m128 mneedsclipping = _mm_cmplt_ps(clipd0, _mm_setzero_ps());
|
||
|
mneedsclipping = _mm_or_ps(mneedsclipping, _mm_cmplt_ps(clipd1, _mm_setzero_ps()));
|
||
|
mneedsclipping = _mm_or_ps(mneedsclipping, _mm_cmplt_ps(clipd2, _mm_setzero_ps()));
|
||
|
mneedsclipping = _mm_or_ps(mneedsclipping, _mm_cmplt_ps(clipd3, _mm_setzero_ps()));
|
||
|
mneedsclipping = _mm_or_ps(mneedsclipping, _mm_cmplt_ps(clipd4, _mm_setzero_ps()));
|
||
|
mneedsclipping = _mm_or_ps(mneedsclipping, _mm_cmplt_ps(clipd5, _mm_setzero_ps()));
|
||
|
mneedsclipping = _mm_or_ps(mneedsclipping, _mm_cmplt_ps(clipd6, _mm_setzero_ps()));
|
||
|
mneedsclipping = _mm_or_ps(mneedsclipping, _mm_cmplt_ps(clipd7, _mm_setzero_ps()));
|
||
|
mneedsclipping = _mm_or_ps(mneedsclipping, _mm_cmplt_ps(clipd8, _mm_setzero_ps()));
|
||
|
if (_mm_movemask_ps(mneedsclipping) == 0)
|
||
|
{
|
||
|
return 3;
|
||
|
}
|
||
|
float clipdistance[numclipdistances * 4];
|
||
|
_mm_storeu_ps(clipdistance, clipd0);
|
||
|
_mm_storeu_ps(clipdistance + 4, clipd1);
|
||
|
_mm_storeu_ps(clipdistance + 8, clipd2);
|
||
|
_mm_storeu_ps(clipdistance + 12, clipd3);
|
||
|
_mm_storeu_ps(clipdistance + 16, clipd4);
|
||
|
_mm_storeu_ps(clipdistance + 20, clipd5);
|
||
|
_mm_storeu_ps(clipdistance + 24, clipd6);
|
||
|
_mm_storeu_ps(clipdistance + 28, clipd7);
|
||
|
_mm_storeu_ps(clipdistance + 32, clipd8);
|
||
|
#endif
|
||
|
|
||
|
// Clip against each halfspace
|
||
|
float *input = weights;
|
||
|
float *output = weights + max_additional_vertices * 3;
|
||
|
int inputverts = 3;
|
||
|
for (int p = 0; p < numclipdistances; p++)
|
||
|
{
|
||
|
// Clip each edge
|
||
|
int outputverts = 0;
|
||
|
for (int i = 0; i < inputverts; i++)
|
||
|
{
|
||
|
int j = (i + 1) % inputverts;
|
||
|
#ifdef NO_SSE
|
||
|
float clipdistance1 =
|
||
|
clipdistance[0 * numclipdistances + p] * input[i * 3 + 0] +
|
||
|
clipdistance[1 * numclipdistances + p] * input[i * 3 + 1] +
|
||
|
clipdistance[2 * numclipdistances + p] * input[i * 3 + 2];
|
||
|
|
||
|
float clipdistance2 =
|
||
|
clipdistance[0 * numclipdistances + p] * input[j * 3 + 0] +
|
||
|
clipdistance[1 * numclipdistances + p] * input[j * 3 + 1] +
|
||
|
clipdistance[2 * numclipdistances + p] * input[j * 3 + 2];
|
||
|
#else
|
||
|
float clipdistance1 =
|
||
|
clipdistance[0 + p * 4] * input[i * 3 + 0] +
|
||
|
clipdistance[1 + p * 4] * input[i * 3 + 1] +
|
||
|
clipdistance[2 + p * 4] * input[i * 3 + 2];
|
||
|
|
||
|
float clipdistance2 =
|
||
|
clipdistance[0 + p * 4] * input[j * 3 + 0] +
|
||
|
clipdistance[1 + p * 4] * input[j * 3 + 1] +
|
||
|
clipdistance[2 + p * 4] * input[j * 3 + 2];
|
||
|
#endif
|
||
|
|
||
|
// Clip halfspace
|
||
|
if ((clipdistance1 >= 0.0f || clipdistance2 >= 0.0f) && outputverts + 1 < max_additional_vertices)
|
||
|
{
|
||
|
float t1 = (clipdistance1 < 0.0f) ? MAX(-clipdistance1 / (clipdistance2 - clipdistance1), 0.0f) : 0.0f;
|
||
|
float t2 = (clipdistance2 < 0.0f) ? MIN(1.0f + clipdistance2 / (clipdistance1 - clipdistance2), 1.0f) : 1.0f;
|
||
|
|
||
|
// add t1 vertex
|
||
|
for (int k = 0; k < 3; k++)
|
||
|
output[outputverts * 3 + k] = input[i * 3 + k] * (1.0f - t1) + input[j * 3 + k] * t1;
|
||
|
outputverts++;
|
||
|
|
||
|
if (t2 != 1.0f && t2 > t1)
|
||
|
{
|
||
|
// add t2 vertex
|
||
|
for (int k = 0; k < 3; k++)
|
||
|
output[outputverts * 3 + k] = input[i * 3 + k] * (1.0f - t2) + input[j * 3 + k] * t2;
|
||
|
outputverts++;
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
std::swap(input, output);
|
||
|
inputverts = outputverts;
|
||
|
if (inputverts == 0)
|
||
|
break;
|
||
|
}
|
||
|
|
||
|
weights = input;
|
||
|
return inputverts;
|
||
|
}
|
||
|
|
||
|
PolyTriangleThreadData *PolyTriangleThreadData::Get(DrawerThread *thread)
|
||
|
{
|
||
|
if (!thread->poly)
|
||
|
thread->poly = std::make_shared<PolyTriangleThreadData>(thread->core, thread->num_cores, thread->numa_node, thread->num_numa_nodes, thread->numa_start_y, thread->numa_end_y);
|
||
|
return thread->poly.get();
|
||
|
}
|