[util] Get tests working with sse2

It seems that i686 code generation is all over the place reguarding sse2
vs fp, with the resulting differences in carried precision. I'm not sure
I'm happy with the situation, but at least it's being tested to a
certain extent. Not sure if this broke basic (no sse) i686 tests.
This commit is contained in:
Bill Currie 2021-05-28 12:28:45 +09:00
parent c5f2aca07d
commit ef9b04ba83
7 changed files with 56 additions and 24 deletions

View file

@ -81,8 +81,8 @@ AC_ARG_ENABLE(optimize,
optimize=yes optimize=yes
) )
QF_CC_OPTION(-Wno-psabi) dnl QF_CC_OPTION(-Wno-psabi)
dnl QF_CC_OPTION(-msse2) QF_CC_OPTION(-msse2)
dnl QF_CC_OPTION(-Wno-psabi) dnl QF_CC_OPTION(-Wno-psabi)
dnl QF_CC_OPTION(-mavx2) dnl QF_CC_OPTION(-mavx2)
dnl fma is not used as it is the equivalent of turning on dnl fma is not used as it is the equivalent of turning on
@ -97,10 +97,11 @@ if test "x$optimize" = xyes -a "x$leave_cflags_alone" != "xyes"; then
saved_cflags="$CFLAGS" saved_cflags="$CFLAGS"
CFLAGS="" CFLAGS=""
QF_CC_OPTION(-frename-registers) QF_CC_OPTION(-frename-registers)
if test "$CC_MAJ" -ge 4; then dnl if test "$CC_MAJ" -ge 4; then
QF_CC_OPTION(-finline-limit=32000 -Winline) dnl QF_CC_OPTION(-finline-limit=32000 -Winline)
fi dnl fi
heavy="-O2 $CFLAGS -ffast-math -fno-unsafe-math-optimizations -funroll-loops -fomit-frame-pointer -fexpensive-optimizations" dnl heavy="-O2 $CFLAGS -ffast-math -fno-unsafe-math-optimizations -funroll-loops -fomit-frame-pointer -fexpensive-optimizations"
heavy="-O2 $CFLAGS -fno-fast-math -funroll-loops -fomit-frame-pointer -fexpensive-optimizations"
CFLAGS="$saved_cflags" CFLAGS="$saved_cflags"
light="-O2" light="-O2"
AC_ARG_ENABLE(strict-aliasing, AC_ARG_ENABLE(strict-aliasing,

View file

@ -1199,10 +1199,11 @@ BarycentricCoords (const vec_t **points, int num_points, const vec3_t p,
CrossProduct (a, b, ab); CrossProduct (a, b, ab);
div = DotProduct (ab, ab); div = DotProduct (ab, ab);
CrossProduct (x, b, n); CrossProduct (x, b, n);
lambda[1] = DotProduct (n, ab) / div; lambda[1] = DotProduct (n, ab);
CrossProduct (a, x, n); CrossProduct (a, x, n);
lambda[2] = DotProduct (n, ab) / div; lambda[2] = DotProduct (n, ab);
lambda[0] = 1 - lambda[1] - lambda[2]; lambda[0] = div - lambda[1] - lambda[2];
VectorScale (lambda, 1 / div, lambda);
return; return;
case 4: case 4:
VectorSubtract (p, points[0], x); VectorSubtract (p, points[0], x);

View file

@ -66,10 +66,10 @@ BarycentricCoords_vf (const vec4f_t **points, int num_points, const vec4f_t p)
b = *points[2] - *points[0]; b = *points[2] - *points[0];
ab = crossf (a, b); ab = crossf (a, b);
d = dotf (ab, ab); d = dotf (ab, ab);
l[1] = (dotf (crossf (x, b), ab) / d)[0]; l[1] = dotf (crossf (x, b), ab)[0];
l[2] = (dotf (crossf (a, x), ab) / d)[0]; l[2] = dotf (crossf (a, x), ab)[0];
l[0] = 1 - l[1] - l[2]; l[0] = d[0] - l[1] - l[2];
return l; return l / d;
case 4: case 4:
x = p - *points[0]; x = p - *points[0];
a = *points[1] - *points[0]; a = *points[1] - *points[0];

View file

@ -35,8 +35,8 @@ struct {
{tri, 3, points[0], {1, 0, 0}}, {tri, 3, points[0], {1, 0, 0}},
{tri, 3, points[1], {0, 1, 0}}, {tri, 3, points[1], {0, 1, 0}},
{tri, 3, points[2], {0, 0, 1}}, {tri, 3, points[2], {0, 0, 1}},
{tri, 3, points[3], {0.333333284, 0.333333333, 0.333333333}},//rounding :P {tri, 3, points[3], {0.333333333, 0.333333333, 0.333333333}},
{tri, 3, points[8], {0.333333284, 0.333333333, 0.333333333}},//rounding :P {tri, 3, points[8], {0.333333333, 0.333333333, 0.333333333}},
{tetra, 4, points[0], {1, 0, 0, 0}}, {tetra, 4, points[0], {1, 0, 0, 0}},
{tetra, 4, points[1], {0, 1, 0, 0}}, {tetra, 4, points[1], {0, 1, 0, 0}},
{tetra, 4, points[2], {0, 0, 1, 0}}, {tetra, 4, points[2], {0, 0, 1, 0}},

View file

@ -36,8 +36,8 @@ struct {
{tri, 3, &points[0], {1, 0, 0}}, {tri, 3, &points[0], {1, 0, 0}},
{tri, 3, &points[1], {0, 1, 0}}, {tri, 3, &points[1], {0, 1, 0}},
{tri, 3, &points[2], {0, 0, 1}}, {tri, 3, &points[2], {0, 0, 1}},
{tri, 3, &points[3], {0.333333284, 0.333333333, 0.333333333}},//rounding :P {tri, 3, &points[3], {0.333333333, 0.333333333, 0.333333333}},
{tri, 3, &points[8], {0.333333284, 0.333333333, 0.333333333}},//rounding :P {tri, 3, &points[8], {0.333333333, 0.333333333, 0.333333333}},
{tetra, 4, &points[0], {1, 0, 0, 0}}, {tetra, 4, &points[0], {1, 0, 0, 0}},
{tetra, 4, &points[1], {0, 1, 0, 0}}, {tetra, 4, &points[1], {0, 1, 0, 0}},
{tetra, 4, &points[2], {0, 0, 1, 0}}, {tetra, 4, &points[2], {0, 0, 1, 0}},

View file

@ -37,11 +37,11 @@
int a = 5; int a = 5;
int b = 6; int b = 6;
int c; int c;
float point[4] = { 2, 3, 4, 1 }; // a point, so w = 1 vec4f_t point = { 2, 3, 4, 1 }; // a point, so w = 1
float normal[4] = { 1, 2, 3, 0 }; // a vector, so w = 0 vec4f_t normal = { 1, 2, 3, 0 }; // a vector, so w = 0
float direction[4] = { 4, 5, 6, 0 }; // a vector, so w = 0 vec4f_t direction = { 4, 5, 6, 0 }; // a vector, so w = 0
float plane[4]; vec4f_t plane;
float intercept[4]; vec4f_t intercept;
exprtype_t *vector_params[] = { exprtype_t *vector_params[] = {
&cexpr_vector, &cexpr_vector,
@ -134,7 +134,7 @@ exprsym_t symbols[] = {
exprval_t test_result = { &cexpr_int, &c }; exprval_t test_result = { &cexpr_int, &c };
exprval_t plane_result = { &cexpr_vector, &plane }; exprval_t plane_result = { &cexpr_vector, &plane };
// a bit hacky, but no l-values // a bit hacky, but no l-values
exprval_t dist_result = { &cexpr_float, &plane[3] }; exprval_t dist_result = { &cexpr_float, (float *)&plane + 3 };
exprval_t intercept_result = { &cexpr_vector, &intercept }; exprval_t intercept_result = { &cexpr_vector, &intercept };
exprtab_t symtab = { exprtab_t symtab = {

View file

@ -243,6 +243,27 @@ fail:
// -ffast-math is any real benefit // -ffast-math is any real benefit
#define ISNAN(x) (((x) & 0x7f800000) == 0x7f800000 && ((x) & 0x7fffff)) #define ISNAN(x) (((x) & 0x7f800000) == 0x7f800000 && ((x) & 0x7fffff))
// FIXME differences in precision between archs
static int
cmp (float a, float b)
{
typedef union {
float f;
int i;
} fi;
fi ax;
fi bx;
int x;
ax.f = a;
bx.f = b;
x = ax.i - bx.i;
if (x < 0) {
x = -x;
}
return (x & 0x7ffffffc) == 0;
}
static int static int
test_rotation4 (const vec3_t a, const vec3_t b, const quat_t expect) test_rotation4 (const vec3_t a, const vec3_t b, const quat_t expect)
{ {
@ -282,7 +303,7 @@ test_rotation4 (const vec3_t a, const vec3_t b, const quat_t expect)
// yes, float precision will make it difficult to set up expect // yes, float precision will make it difficult to set up expect
// but it is at least consistent (ie, the "errors" are not at all // but it is at least consistent (ie, the "errors" are not at all
// random and thus will be the same from run to run) // random and thus will be the same from run to run)
if (quat[i] != expect[i]) { if (!cmp (quat[i], expect[i])) {
goto fail; goto fail;
} }
} }
@ -331,10 +352,19 @@ static struct {
{0, 0, 1, {0, 0, 1,
1, 0, 0, 1, 0, 0,
0, 1, 0}}, 0, 1, 0}},
#if defined(__i686__) && defined(__OPTIMIZE__)
// the fp unit carries more precision than a 32-bit float, so
// the close-to-zero errors are different
{{s05, 0.0, 0.0, s05},
{1, 0, 0,
0, 3.42285418e-08, -0.99999994,
0, 0.99999994, 3.42285418e-08}},
#else
{{s05, 0.0, 0.0, s05}, {{s05, 0.0, 0.0, s05},
{1, 0, 0, {1, 0, 0,
0, 5.96046448e-8, -0.99999994, 0, 5.96046448e-8, -0.99999994,
0, 0.99999994, 5.96046448e-8}}, 0, 0.99999994, 5.96046448e-8}},
#endif
}; };
#define num_quat_mat_tests (sizeof (quat_mat_tests) / sizeof (quat_mat_tests[0])) #define num_quat_mat_tests (sizeof (quat_mat_tests) / sizeof (quat_mat_tests[0]))