mirror of
https://git.code.sf.net/p/quake/quakeforge
synced 2025-01-19 23:40:51 +00:00
ef9b04ba83
It seems that i686 code generation is all over the place reguarding sse2 vs fp, with the resulting differences in carried precision. I'm not sure I'm happy with the situation, but at least it's being tested to a certain extent. Not sure if this broke basic (no sse) i686 tests.
331 lines
7.9 KiB
C
331 lines
7.9 KiB
C
/*
|
|
simd.c
|
|
|
|
SIMD math support
|
|
|
|
Copyright (C) 2020 Bill Currie <bill@taniwha.org>
|
|
|
|
This program is free software; you can redistribute it and/or
|
|
modify it under the terms of the GNU General Public License
|
|
as published by the Free Software Foundation; either version 2
|
|
of the License, or (at your option) any later version.
|
|
|
|
This program is distributed in the hope that it will be useful,
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
|
|
|
|
See the GNU General Public License for more details.
|
|
|
|
You should have received a copy of the GNU General Public License
|
|
along with this program; if not, write to:
|
|
|
|
Free Software Foundation, Inc.
|
|
59 Temple Place - Suite 330
|
|
Boston, MA 02111-1307, USA
|
|
|
|
*/
|
|
#ifdef HAVE_CONFIG_H
|
|
# include "config.h"
|
|
#endif
|
|
|
|
#include <math.h>
|
|
|
|
#define IMPLEMENT_VEC4F_Funcs
|
|
#define IMPLEMENT_VEC4D_Funcs
|
|
#define IMPLEMENT_MAT4F_Funcs
|
|
|
|
#include "QF/simd/vec4d.h"
|
|
#include "QF/simd/vec4f.h"
|
|
#include "QF/simd/mat4f.h"
|
|
#include "QF/sys.h"
|
|
|
|
vec4f_t
|
|
BarycentricCoords_vf (const vec4f_t **points, int num_points, const vec4f_t p)
|
|
{
|
|
vec4f_t zero = { };
|
|
vec4f_t a, b, c, x, l, ab, bc, ca, d;
|
|
if (num_points > 4)
|
|
Sys_Error ("Don't know how to compute the barycentric coordinates "
|
|
"for %d points", num_points);
|
|
switch (num_points) {
|
|
case 1:
|
|
l = zero;
|
|
l[0] = 1;
|
|
return l;
|
|
case 2:
|
|
x = p - *points[0];
|
|
a = *points[1] - *points[0];
|
|
d = dotf (x, a) / dotf (a, a);
|
|
l = zero;
|
|
l[1] = d[0];
|
|
l[0] = 1 - d[0];
|
|
return l;
|
|
case 3:
|
|
x = p - *points[0];
|
|
a = *points[1] - *points[0];
|
|
b = *points[2] - *points[0];
|
|
ab = crossf (a, b);
|
|
d = dotf (ab, ab);
|
|
l[1] = dotf (crossf (x, b), ab)[0];
|
|
l[2] = dotf (crossf (a, x), ab)[0];
|
|
l[0] = d[0] - l[1] - l[2];
|
|
return l / d;
|
|
case 4:
|
|
x = p - *points[0];
|
|
a = *points[1] - *points[0];
|
|
b = *points[2] - *points[0];
|
|
c = *points[3] - *points[0];
|
|
ab = crossf (a, b);
|
|
bc = crossf (b, c);
|
|
ca = crossf (c, a);
|
|
d = dotf (a, bc);
|
|
l[1] = (dotf (x, bc) / d)[0];
|
|
l[2] = (dotf (x, ca) / d)[0];
|
|
l[3] = (dotf (x, ab) / d)[0];
|
|
l[0] = 1 - l[1] - l[2] - l[3];
|
|
return l;
|
|
}
|
|
Sys_Error ("Not enough points to project or enclose the point");
|
|
}
|
|
|
|
static vec4f_t
|
|
circum_circle (const vec4f_t points[], int num_points)
|
|
{
|
|
vec4f_t a, c, b;
|
|
vec4f_t bc, ca, ab;
|
|
vec4f_t aa, bb, cc;
|
|
vec4f_t div;
|
|
vec4f_t alpha, beta, gamma;
|
|
|
|
switch (num_points) {
|
|
case 1:
|
|
return points[0];
|
|
case 2:
|
|
return (points[0] + points[1]) / 2;
|
|
case 3:
|
|
a = points[0] - points[1];
|
|
b = points[0] - points[2];
|
|
c = points[1] - points[2];
|
|
aa = dotf (a, a);
|
|
bb = dotf (b, b);
|
|
cc = dotf (c, c);
|
|
div = dotf (a, c);
|
|
div = 2 * (aa * cc - div * div);
|
|
alpha = cc * dotf (a, b) / div;
|
|
beta = -bb * dotf (a, c) / div;
|
|
gamma = aa * dotf (b, c) / div;
|
|
return alpha * points[0] + beta * points[1] + gamma * points[2];
|
|
case 4:
|
|
a = points[1] - points[0];
|
|
b = points[2] - points[0];
|
|
c = points[3] - points[0];
|
|
bc = crossf (b, c);
|
|
ca = crossf (c, a);
|
|
ab = crossf (a, b);
|
|
div = 2 * dotf (a, bc);
|
|
aa = dotf (a, a) / div;
|
|
bb = dotf (b, b) / div;
|
|
cc = dotf (c, c) / div;
|
|
return bc * aa + bb * ca + cc * ab + points[0];
|
|
}
|
|
vec4f_t zero = {};
|
|
return zero;
|
|
}
|
|
|
|
vspheref_t
|
|
CircumSphere_vf (const vec4f_t *points, int num_points)
|
|
{
|
|
vspheref_t sphere = {};
|
|
if (num_points > 0 && num_points <= 4) {
|
|
sphere.center = circum_circle (points, num_points);
|
|
vec4f_t d = sphere.center - points[0];
|
|
sphere.radius = sqrt(dotf (d, d)[0]);
|
|
}
|
|
return sphere;
|
|
}
|
|
|
|
static vec4f_t
|
|
closest_affine_point (const vec4f_t **points, int num_points, const vec4f_t x)
|
|
{
|
|
vec4f_t closest = {};
|
|
vec4f_t a, b, n, d;
|
|
vec4f_t l;
|
|
|
|
switch (num_points) {
|
|
default:
|
|
case 1:
|
|
closest = *points[0];
|
|
break;
|
|
case 2:
|
|
n = *points[1] - *points[0];
|
|
d = x - *points[0];
|
|
l = dotf (d, n) / dotf (n, n);
|
|
closest = *points[0] + l * n;
|
|
break;
|
|
case 3:
|
|
a = *points[1] - *points[0];
|
|
b = *points[2] - *points[0];
|
|
n = crossf (a, b);
|
|
d = *points[0] - x;
|
|
l = dotf (d, n) / dotf (n, n);
|
|
closest = x + l * n;
|
|
break;
|
|
}
|
|
return closest;
|
|
}
|
|
|
|
static int
|
|
test_support_points(const vec4f_t **points, int *num_points, vec4f_t center)
|
|
{
|
|
vec4i_t cmp;
|
|
int in_affine = 0;
|
|
int in_convex = 0;
|
|
vec4f_t v, d, n, a, b;
|
|
float nn, dd, vv, dn;
|
|
|
|
switch (*num_points) {
|
|
case 1:
|
|
cmp = *points[0] == center;
|
|
in_affine = cmp[0] && cmp[1] && cmp[2];
|
|
// the convex hull and affine hull for a single point are the same
|
|
in_convex = in_affine;
|
|
break;
|
|
case 2:
|
|
v = *points[1] - *points[0];
|
|
d = center - *points[0];
|
|
n = crossf (v, d);
|
|
nn = dotf (n, n)[0];
|
|
dd = dotf (d, d)[0];
|
|
vv = dotf (v, v)[0];
|
|
in_affine = nn < 1e-6 * vv * dd;
|
|
break;
|
|
case 3:
|
|
a = *points[1] - *points[0];
|
|
b = *points[2] - *points[0];
|
|
d = center - *points[0];
|
|
n = crossf (a, b);
|
|
dn = dotf (d, n)[0];
|
|
dd = dotf (d, d)[0];
|
|
nn = dotf (n, n)[0];
|
|
in_affine = dn * dn < 1e-6 * dd * nn;
|
|
break;
|
|
case 4:
|
|
in_affine = 1;
|
|
break;
|
|
default:
|
|
Sys_Error ("Invalid number of points (%d) in test_support_points",
|
|
*num_points);
|
|
}
|
|
|
|
// if in_convex is not true while in_affine is, then need to test as
|
|
// there is more than one dimension for the affine hull (a single support
|
|
// point is never dropped as it cannot be redundant)
|
|
if (in_affine && !in_convex) {
|
|
vec4f_t lambda;
|
|
int dropped = 0;
|
|
int count = *num_points;
|
|
|
|
lambda = BarycentricCoords_vf (points, count, center);
|
|
|
|
for (int i = 0; i < count; i++) {
|
|
points[i - dropped] = points[i];
|
|
if (lambda[i] < -1e-4) {
|
|
dropped++;
|
|
(*num_points)--;
|
|
}
|
|
}
|
|
in_convex = !dropped;
|
|
if (dropped) {
|
|
for (int i = count - dropped; i < count; i++) {
|
|
points[i] = 0;
|
|
}
|
|
}
|
|
}
|
|
return in_convex;
|
|
}
|
|
|
|
vspheref_t
|
|
SmallestEnclosingBall_vf (const vec4f_t *points, int num_points)
|
|
{
|
|
vspheref_t sphere = {};
|
|
vec4f_t center = {};
|
|
const vec4f_t *best;
|
|
const vec4f_t *support[4];
|
|
int num_support;
|
|
int i;
|
|
int iters = 0;
|
|
|
|
if (num_points < 1) {
|
|
return sphere;
|
|
}
|
|
|
|
for (i = 0; i < 4; i++) {
|
|
support[i] = 0;
|
|
}
|
|
|
|
vec4f_t dist = {};
|
|
float best_dist = 0;
|
|
center = points[0];
|
|
best = &points[0];
|
|
for (i = 1; i < num_points; i++) {
|
|
dist = points[i] - center;
|
|
dist = dotf (dist, dist);
|
|
if (dist[0] > best_dist) {
|
|
best_dist = dist[0];
|
|
best = &points[i];
|
|
}
|
|
}
|
|
num_support = 1;
|
|
support[0] = best;
|
|
sphere.radius = best_dist; // note: radius squared until the end
|
|
|
|
while (!test_support_points (support, &num_support, center)) {
|
|
vec4f_t affine;
|
|
vec4f_t center_to_affine, center_to_point;
|
|
float affine_dist, point_proj, point_dist, bound;
|
|
float scale = 1;
|
|
int i;
|
|
|
|
if (iters++ > 10)
|
|
Sys_Error ("stuck SEB");
|
|
best = 0;
|
|
|
|
affine = closest_affine_point (support, num_support, center);
|
|
center_to_affine = affine - center;
|
|
affine_dist = dotf (center_to_affine, center_to_affine)[0];
|
|
for (i = 0; i < num_points; i++) {
|
|
if (&points[i] == support[0] || &points[i] == support[1]
|
|
|| &points[i] == support[2])
|
|
continue;
|
|
center_to_point = points[i] - center;
|
|
point_proj = dotf (center_to_affine, center_to_point)[0];
|
|
if (affine_dist - point_proj <= 0
|
|
|| ((affine_dist - point_proj) * (affine_dist - point_proj)
|
|
< 1e-6 * sphere.radius * affine_dist))
|
|
continue;
|
|
point_dist = dotf (center_to_point, center_to_point)[0];
|
|
bound = sphere.radius - point_dist;
|
|
bound /= 2 * (affine_dist - point_proj);
|
|
if (bound < scale) {
|
|
best = &points[i];
|
|
scale = bound;
|
|
}
|
|
}
|
|
center = center + scale * center_to_affine;
|
|
dist = center - *support[0];
|
|
sphere.radius = dotf (dist, dist)[0];
|
|
if (best) {
|
|
support[num_support++] = best;
|
|
}
|
|
}
|
|
best_dist = 0;
|
|
for (i = 0; i < num_points; i++) {
|
|
dist = center - points[i];
|
|
dist = dotf (dist, dist);
|
|
if (dist[0] > best_dist)
|
|
best_dist = dist[0];
|
|
}
|
|
sphere.center = center;
|
|
sphere.radius = sqrt (best_dist);
|
|
return sphere;
|
|
}
|