mirror of
https://git.code.sf.net/p/quake/quakeforge
synced 2024-11-22 20:41:20 +00:00
[util] Get tests working with sse2
It seems that i686 code generation is all over the place reguarding sse2 vs fp, with the resulting differences in carried precision. I'm not sure I'm happy with the situation, but at least it's being tested to a certain extent. Not sure if this broke basic (no sse) i686 tests.
This commit is contained in:
parent
c5f2aca07d
commit
ef9b04ba83
7 changed files with 56 additions and 24 deletions
|
@ -81,8 +81,8 @@ AC_ARG_ENABLE(optimize,
|
|||
optimize=yes
|
||||
)
|
||||
|
||||
QF_CC_OPTION(-Wno-psabi)
|
||||
dnl QF_CC_OPTION(-msse2)
|
||||
dnl QF_CC_OPTION(-Wno-psabi)
|
||||
QF_CC_OPTION(-msse2)
|
||||
dnl QF_CC_OPTION(-Wno-psabi)
|
||||
dnl QF_CC_OPTION(-mavx2)
|
||||
dnl fma is not used as it is the equivalent of turning on
|
||||
|
@ -97,10 +97,11 @@ if test "x$optimize" = xyes -a "x$leave_cflags_alone" != "xyes"; then
|
|||
saved_cflags="$CFLAGS"
|
||||
CFLAGS=""
|
||||
QF_CC_OPTION(-frename-registers)
|
||||
if test "$CC_MAJ" -ge 4; then
|
||||
QF_CC_OPTION(-finline-limit=32000 -Winline)
|
||||
fi
|
||||
heavy="-O2 $CFLAGS -ffast-math -fno-unsafe-math-optimizations -funroll-loops -fomit-frame-pointer -fexpensive-optimizations"
|
||||
dnl if test "$CC_MAJ" -ge 4; then
|
||||
dnl QF_CC_OPTION(-finline-limit=32000 -Winline)
|
||||
dnl fi
|
||||
dnl heavy="-O2 $CFLAGS -ffast-math -fno-unsafe-math-optimizations -funroll-loops -fomit-frame-pointer -fexpensive-optimizations"
|
||||
heavy="-O2 $CFLAGS -fno-fast-math -funroll-loops -fomit-frame-pointer -fexpensive-optimizations"
|
||||
CFLAGS="$saved_cflags"
|
||||
light="-O2"
|
||||
AC_ARG_ENABLE(strict-aliasing,
|
||||
|
|
|
@ -1199,10 +1199,11 @@ BarycentricCoords (const vec_t **points, int num_points, const vec3_t p,
|
|||
CrossProduct (a, b, ab);
|
||||
div = DotProduct (ab, ab);
|
||||
CrossProduct (x, b, n);
|
||||
lambda[1] = DotProduct (n, ab) / div;
|
||||
lambda[1] = DotProduct (n, ab);
|
||||
CrossProduct (a, x, n);
|
||||
lambda[2] = DotProduct (n, ab) / div;
|
||||
lambda[0] = 1 - lambda[1] - lambda[2];
|
||||
lambda[2] = DotProduct (n, ab);
|
||||
lambda[0] = div - lambda[1] - lambda[2];
|
||||
VectorScale (lambda, 1 / div, lambda);
|
||||
return;
|
||||
case 4:
|
||||
VectorSubtract (p, points[0], x);
|
||||
|
|
|
@ -66,10 +66,10 @@ BarycentricCoords_vf (const vec4f_t **points, int num_points, const vec4f_t p)
|
|||
b = *points[2] - *points[0];
|
||||
ab = crossf (a, b);
|
||||
d = dotf (ab, ab);
|
||||
l[1] = (dotf (crossf (x, b), ab) / d)[0];
|
||||
l[2] = (dotf (crossf (a, x), ab) / d)[0];
|
||||
l[0] = 1 - l[1] - l[2];
|
||||
return l;
|
||||
l[1] = dotf (crossf (x, b), ab)[0];
|
||||
l[2] = dotf (crossf (a, x), ab)[0];
|
||||
l[0] = d[0] - l[1] - l[2];
|
||||
return l / d;
|
||||
case 4:
|
||||
x = p - *points[0];
|
||||
a = *points[1] - *points[0];
|
||||
|
|
|
@ -35,8 +35,8 @@ struct {
|
|||
{tri, 3, points[0], {1, 0, 0}},
|
||||
{tri, 3, points[1], {0, 1, 0}},
|
||||
{tri, 3, points[2], {0, 0, 1}},
|
||||
{tri, 3, points[3], {0.333333284, 0.333333333, 0.333333333}},//rounding :P
|
||||
{tri, 3, points[8], {0.333333284, 0.333333333, 0.333333333}},//rounding :P
|
||||
{tri, 3, points[3], {0.333333333, 0.333333333, 0.333333333}},
|
||||
{tri, 3, points[8], {0.333333333, 0.333333333, 0.333333333}},
|
||||
{tetra, 4, points[0], {1, 0, 0, 0}},
|
||||
{tetra, 4, points[1], {0, 1, 0, 0}},
|
||||
{tetra, 4, points[2], {0, 0, 1, 0}},
|
||||
|
|
|
@ -36,8 +36,8 @@ struct {
|
|||
{tri, 3, &points[0], {1, 0, 0}},
|
||||
{tri, 3, &points[1], {0, 1, 0}},
|
||||
{tri, 3, &points[2], {0, 0, 1}},
|
||||
{tri, 3, &points[3], {0.333333284, 0.333333333, 0.333333333}},//rounding :P
|
||||
{tri, 3, &points[8], {0.333333284, 0.333333333, 0.333333333}},//rounding :P
|
||||
{tri, 3, &points[3], {0.333333333, 0.333333333, 0.333333333}},
|
||||
{tri, 3, &points[8], {0.333333333, 0.333333333, 0.333333333}},
|
||||
{tetra, 4, &points[0], {1, 0, 0, 0}},
|
||||
{tetra, 4, &points[1], {0, 1, 0, 0}},
|
||||
{tetra, 4, &points[2], {0, 0, 1, 0}},
|
||||
|
|
|
@ -37,11 +37,11 @@
|
|||
int a = 5;
|
||||
int b = 6;
|
||||
int c;
|
||||
float point[4] = { 2, 3, 4, 1 }; // a point, so w = 1
|
||||
float normal[4] = { 1, 2, 3, 0 }; // a vector, so w = 0
|
||||
float direction[4] = { 4, 5, 6, 0 }; // a vector, so w = 0
|
||||
float plane[4];
|
||||
float intercept[4];
|
||||
vec4f_t point = { 2, 3, 4, 1 }; // a point, so w = 1
|
||||
vec4f_t normal = { 1, 2, 3, 0 }; // a vector, so w = 0
|
||||
vec4f_t direction = { 4, 5, 6, 0 }; // a vector, so w = 0
|
||||
vec4f_t plane;
|
||||
vec4f_t intercept;
|
||||
|
||||
exprtype_t *vector_params[] = {
|
||||
&cexpr_vector,
|
||||
|
@ -134,7 +134,7 @@ exprsym_t symbols[] = {
|
|||
exprval_t test_result = { &cexpr_int, &c };
|
||||
exprval_t plane_result = { &cexpr_vector, &plane };
|
||||
// a bit hacky, but no l-values
|
||||
exprval_t dist_result = { &cexpr_float, &plane[3] };
|
||||
exprval_t dist_result = { &cexpr_float, (float *)&plane + 3 };
|
||||
exprval_t intercept_result = { &cexpr_vector, &intercept };
|
||||
|
||||
exprtab_t symtab = {
|
||||
|
|
|
@ -243,6 +243,27 @@ fail:
|
|||
// -ffast-math is any real benefit
|
||||
#define ISNAN(x) (((x) & 0x7f800000) == 0x7f800000 && ((x) & 0x7fffff))
|
||||
|
||||
// FIXME differences in precision between archs
|
||||
static int
|
||||
cmp (float a, float b)
|
||||
{
|
||||
typedef union {
|
||||
float f;
|
||||
int i;
|
||||
} fi;
|
||||
fi ax;
|
||||
fi bx;
|
||||
int x;
|
||||
|
||||
ax.f = a;
|
||||
bx.f = b;
|
||||
x = ax.i - bx.i;
|
||||
if (x < 0) {
|
||||
x = -x;
|
||||
}
|
||||
return (x & 0x7ffffffc) == 0;
|
||||
}
|
||||
|
||||
static int
|
||||
test_rotation4 (const vec3_t a, const vec3_t b, const quat_t expect)
|
||||
{
|
||||
|
@ -282,7 +303,7 @@ test_rotation4 (const vec3_t a, const vec3_t b, const quat_t expect)
|
|||
// yes, float precision will make it difficult to set up expect
|
||||
// but it is at least consistent (ie, the "errors" are not at all
|
||||
// random and thus will be the same from run to run)
|
||||
if (quat[i] != expect[i]) {
|
||||
if (!cmp (quat[i], expect[i])) {
|
||||
goto fail;
|
||||
}
|
||||
}
|
||||
|
@ -331,10 +352,19 @@ static struct {
|
|||
{0, 0, 1,
|
||||
1, 0, 0,
|
||||
0, 1, 0}},
|
||||
#if defined(__i686__) && defined(__OPTIMIZE__)
|
||||
// the fp unit carries more precision than a 32-bit float, so
|
||||
// the close-to-zero errors are different
|
||||
{{s05, 0.0, 0.0, s05},
|
||||
{1, 0, 0,
|
||||
0, 3.42285418e-08, -0.99999994,
|
||||
0, 0.99999994, 3.42285418e-08}},
|
||||
#else
|
||||
{{s05, 0.0, 0.0, s05},
|
||||
{1, 0, 0,
|
||||
0, 5.96046448e-8, -0.99999994,
|
||||
0, 0.99999994, 5.96046448e-8}},
|
||||
#endif
|
||||
};
|
||||
#define num_quat_mat_tests (sizeof (quat_mat_tests) / sizeof (quat_mat_tests[0]))
|
||||
|
||||
|
|
Loading…
Reference in a new issue