[util] Get tests working with sse2

It seems that i686 code generation is all over the place reguarding sse2
vs fp, with the resulting differences in carried precision. I'm not sure
I'm happy with the situation, but at least it's being tested to a
certain extent. Not sure if this broke basic (no sse) i686 tests.
This commit is contained in:
Bill Currie 2021-05-28 12:28:45 +09:00
parent c5f2aca07d
commit ef9b04ba83
7 changed files with 56 additions and 24 deletions

View file

@ -81,8 +81,8 @@ AC_ARG_ENABLE(optimize,
optimize=yes
)
QF_CC_OPTION(-Wno-psabi)
dnl QF_CC_OPTION(-msse2)
dnl QF_CC_OPTION(-Wno-psabi)
QF_CC_OPTION(-msse2)
dnl QF_CC_OPTION(-Wno-psabi)
dnl QF_CC_OPTION(-mavx2)
dnl fma is not used as it is the equivalent of turning on
@ -97,10 +97,11 @@ if test "x$optimize" = xyes -a "x$leave_cflags_alone" != "xyes"; then
saved_cflags="$CFLAGS"
CFLAGS=""
QF_CC_OPTION(-frename-registers)
if test "$CC_MAJ" -ge 4; then
QF_CC_OPTION(-finline-limit=32000 -Winline)
fi
heavy="-O2 $CFLAGS -ffast-math -fno-unsafe-math-optimizations -funroll-loops -fomit-frame-pointer -fexpensive-optimizations"
dnl if test "$CC_MAJ" -ge 4; then
dnl QF_CC_OPTION(-finline-limit=32000 -Winline)
dnl fi
dnl heavy="-O2 $CFLAGS -ffast-math -fno-unsafe-math-optimizations -funroll-loops -fomit-frame-pointer -fexpensive-optimizations"
heavy="-O2 $CFLAGS -fno-fast-math -funroll-loops -fomit-frame-pointer -fexpensive-optimizations"
CFLAGS="$saved_cflags"
light="-O2"
AC_ARG_ENABLE(strict-aliasing,

View file

@ -1199,10 +1199,11 @@ BarycentricCoords (const vec_t **points, int num_points, const vec3_t p,
CrossProduct (a, b, ab);
div = DotProduct (ab, ab);
CrossProduct (x, b, n);
lambda[1] = DotProduct (n, ab) / div;
lambda[1] = DotProduct (n, ab);
CrossProduct (a, x, n);
lambda[2] = DotProduct (n, ab) / div;
lambda[0] = 1 - lambda[1] - lambda[2];
lambda[2] = DotProduct (n, ab);
lambda[0] = div - lambda[1] - lambda[2];
VectorScale (lambda, 1 / div, lambda);
return;
case 4:
VectorSubtract (p, points[0], x);

View file

@ -66,10 +66,10 @@ BarycentricCoords_vf (const vec4f_t **points, int num_points, const vec4f_t p)
b = *points[2] - *points[0];
ab = crossf (a, b);
d = dotf (ab, ab);
l[1] = (dotf (crossf (x, b), ab) / d)[0];
l[2] = (dotf (crossf (a, x), ab) / d)[0];
l[0] = 1 - l[1] - l[2];
return l;
l[1] = dotf (crossf (x, b), ab)[0];
l[2] = dotf (crossf (a, x), ab)[0];
l[0] = d[0] - l[1] - l[2];
return l / d;
case 4:
x = p - *points[0];
a = *points[1] - *points[0];

View file

@ -35,8 +35,8 @@ struct {
{tri, 3, points[0], {1, 0, 0}},
{tri, 3, points[1], {0, 1, 0}},
{tri, 3, points[2], {0, 0, 1}},
{tri, 3, points[3], {0.333333284, 0.333333333, 0.333333333}},//rounding :P
{tri, 3, points[8], {0.333333284, 0.333333333, 0.333333333}},//rounding :P
{tri, 3, points[3], {0.333333333, 0.333333333, 0.333333333}},
{tri, 3, points[8], {0.333333333, 0.333333333, 0.333333333}},
{tetra, 4, points[0], {1, 0, 0, 0}},
{tetra, 4, points[1], {0, 1, 0, 0}},
{tetra, 4, points[2], {0, 0, 1, 0}},

View file

@ -36,8 +36,8 @@ struct {
{tri, 3, &points[0], {1, 0, 0}},
{tri, 3, &points[1], {0, 1, 0}},
{tri, 3, &points[2], {0, 0, 1}},
{tri, 3, &points[3], {0.333333284, 0.333333333, 0.333333333}},//rounding :P
{tri, 3, &points[8], {0.333333284, 0.333333333, 0.333333333}},//rounding :P
{tri, 3, &points[3], {0.333333333, 0.333333333, 0.333333333}},
{tri, 3, &points[8], {0.333333333, 0.333333333, 0.333333333}},
{tetra, 4, &points[0], {1, 0, 0, 0}},
{tetra, 4, &points[1], {0, 1, 0, 0}},
{tetra, 4, &points[2], {0, 0, 1, 0}},

View file

@ -37,11 +37,11 @@
int a = 5;
int b = 6;
int c;
float point[4] = { 2, 3, 4, 1 }; // a point, so w = 1
float normal[4] = { 1, 2, 3, 0 }; // a vector, so w = 0
float direction[4] = { 4, 5, 6, 0 }; // a vector, so w = 0
float plane[4];
float intercept[4];
vec4f_t point = { 2, 3, 4, 1 }; // a point, so w = 1
vec4f_t normal = { 1, 2, 3, 0 }; // a vector, so w = 0
vec4f_t direction = { 4, 5, 6, 0 }; // a vector, so w = 0
vec4f_t plane;
vec4f_t intercept;
exprtype_t *vector_params[] = {
&cexpr_vector,
@ -134,7 +134,7 @@ exprsym_t symbols[] = {
exprval_t test_result = { &cexpr_int, &c };
exprval_t plane_result = { &cexpr_vector, &plane };
// a bit hacky, but no l-values
exprval_t dist_result = { &cexpr_float, &plane[3] };
exprval_t dist_result = { &cexpr_float, (float *)&plane + 3 };
exprval_t intercept_result = { &cexpr_vector, &intercept };
exprtab_t symtab = {

View file

@ -243,6 +243,27 @@ fail:
// -ffast-math is any real benefit
#define ISNAN(x) (((x) & 0x7f800000) == 0x7f800000 && ((x) & 0x7fffff))
// FIXME differences in precision between archs
static int
cmp (float a, float b)
{
typedef union {
float f;
int i;
} fi;
fi ax;
fi bx;
int x;
ax.f = a;
bx.f = b;
x = ax.i - bx.i;
if (x < 0) {
x = -x;
}
return (x & 0x7ffffffc) == 0;
}
static int
test_rotation4 (const vec3_t a, const vec3_t b, const quat_t expect)
{
@ -282,7 +303,7 @@ test_rotation4 (const vec3_t a, const vec3_t b, const quat_t expect)
// yes, float precision will make it difficult to set up expect
// but it is at least consistent (ie, the "errors" are not at all
// random and thus will be the same from run to run)
if (quat[i] != expect[i]) {
if (!cmp (quat[i], expect[i])) {
goto fail;
}
}
@ -331,10 +352,19 @@ static struct {
{0, 0, 1,
1, 0, 0,
0, 1, 0}},
#if defined(__i686__) && defined(__OPTIMIZE__)
// the fp unit carries more precision than a 32-bit float, so
// the close-to-zero errors are different
{{s05, 0.0, 0.0, s05},
{1, 0, 0,
0, 3.42285418e-08, -0.99999994,
0, 0.99999994, 3.42285418e-08}},
#else
{{s05, 0.0, 0.0, s05},
{1, 0, 0,
0, 5.96046448e-8, -0.99999994,
0, 0.99999994, 5.96046448e-8}},
#endif
};
#define num_quat_mat_tests (sizeof (quat_mat_tests) / sizeof (quat_mat_tests[0]))