[util] Add float a simd version of the SEB

And its support functions. I can't tell if it's any faster (mtwist_rand
is a significant chunk of the benchmark timings, oops), but it's nice to
have.
This commit is contained in:
Bill Currie 2021-03-27 23:38:10 +09:00
parent d072a7b99c
commit 29e029c792
7 changed files with 645 additions and 0 deletions

View file

@ -88,4 +88,10 @@ VEC_TYPE (int, vec4i_t);
typedef vec4f_t mat4f_t[4];
typedef vec4i_t mat4i_t[4];
typedef struct vspheref_s {
vec4f_t center; // w set to 1
float radius;
} vspheref_t;
#endif//__QF_simd_types_h

View file

@ -325,4 +325,12 @@ magnitude3f (vec4f_t v)
return vsqrtf (dotf (v, v));
}
vec4f_t __attribute__((pure))
BarycentricCoords_vf (const vec4f_t **points, int num_points, vec4f_t p);
vspheref_t __attribute__((pure))
CircumSphere_vf (const vec4f_t *points, int num_points);
vspheref_t SmallestEnclosingBall_vf (const vec4f_t *points, int num_points);
#endif//__QF_simd_vec4f_h

View file

@ -37,3 +37,295 @@
#include "QF/simd/vec4d.h"
#include "QF/simd/vec4f.h"
#include "QF/simd/mat4f.h"
#include "QF/sys.h"
vec4f_t
BarycentricCoords_vf (const vec4f_t **points, int num_points, const vec4f_t p)
{
vec4f_t zero = { };
vec4f_t a, b, c, x, l, ab, bc, ca, d;
if (num_points > 4)
Sys_Error ("Don't know how to compute the barycentric coordinates "
"for %d points", num_points);
switch (num_points) {
case 1:
l = zero;
l[0] = 1;
return l;
case 2:
x = p - *points[0];
a = *points[1] - *points[0];
d = dotf (x, a) / dotf (a, a);
l = zero;
l[1] = d[0];
l[0] = 1 - d[0];
return l;
case 3:
x = p - *points[0];
a = *points[1] - *points[0];
b = *points[2] - *points[0];
ab = crossf (a, b);
d = dotf (ab, ab);
l[1] = (dotf (crossf (x, b), ab) / d)[0];
l[2] = (dotf (crossf (a, x), ab) / d)[0];
l[0] = 1 - l[1] - l[2];
return l;
case 4:
x = p - *points[0];
a = *points[1] - *points[0];
b = *points[2] - *points[0];
c = *points[3] - *points[0];
ab = crossf (a, b);
bc = crossf (b, c);
ca = crossf (c, a);
d = dotf (a, bc);
l[1] = (dotf (x, bc) / d)[0];
l[2] = (dotf (x, ca) / d)[0];
l[3] = (dotf (x, ab) / d)[0];
l[0] = 1 - l[1] - l[2] - l[3];
return l;
}
Sys_Error ("Not enough points to project or enclose the point");
}
static vec4f_t
circum_circle (const vec4f_t points[], int num_points)
{
vec4f_t a, c, b;
vec4f_t bc, ca, ab;
vec4f_t aa, bb, cc;
vec4f_t div;
vec4f_t alpha, beta, gamma;
switch (num_points) {
case 1:
return points[0];
case 2:
return (points[0] + points[1]) / 2;
case 3:
a = points[0] - points[1];
b = points[0] - points[2];
c = points[1] - points[2];
aa = dotf (a, a);
bb = dotf (b, b);
cc = dotf (c, c);
div = dotf (a, c);
div = 2 * (aa * cc - div * div);
alpha = cc * dotf (a, b) / div;
beta = -bb * dotf (a, c) / div;
gamma = aa * dotf (b, c) / div;
return alpha * points[0] + beta * points[1] + gamma * points[2];
case 4:
a = points[1] - points[0];
b = points[2] - points[0];
c = points[3] - points[0];
bc = crossf (b, c);
ca = crossf (c, a);
ab = crossf (a, b);
div = 2 * dotf (a, bc);
aa = dotf (a, a) / div;
bb = dotf (b, b) / div;
cc = dotf (c, c) / div;
return bc * aa + bb * ca + cc * ab + points[0];
}
vec4f_t zero = {};
return zero;
}
vspheref_t
CircumSphere_vf (const vec4f_t *points, int num_points)
{
vspheref_t sphere = {};
if (num_points > 0 && num_points <= 4) {
sphere.center = circum_circle (points, num_points);
vec4f_t d = sphere.center - points[0];
sphere.radius = sqrt(dotf (d, d)[0]);
}
return sphere;
}
static vec4f_t
closest_affine_point (const vec4f_t **points, int num_points, const vec4f_t x)
{
vec4f_t closest = {};
vec4f_t a, b, n, d;
vec4f_t l;
switch (num_points) {
default:
case 1:
closest = *points[0];
break;
case 2:
n = *points[1] - *points[0];
d = x - *points[0];
l = dotf (d, n) / dotf (n, n);
closest = *points[0] + l * n;
break;
case 3:
a = *points[1] - *points[0];
b = *points[2] - *points[0];
n = crossf (a, b);
d = *points[0] - x;
l = dotf (d, n) / dotf (n, n);
closest = x + l * n;
break;
}
return closest;
}
static int
test_support_points(const vec4f_t **points, int *num_points, vec4f_t center)
{
vec4i_t cmp;
int in_affine = 0;
int in_convex = 0;
vec4f_t v, d, n, a, b;
float nn, dd, vv, dn;
switch (*num_points) {
case 1:
cmp = *points[0] == center;
in_affine = cmp[0] && cmp[1] && cmp[2];
// the convex hull and affine hull for a single point are the same
in_convex = in_affine;
break;
case 2:
v = *points[1] - *points[0];
d = center - *points[0];
n = crossf (v, d);
nn = dotf (n, n)[0];
dd = dotf (d, d)[0];
vv = dotf (v, v)[0];
in_affine = nn < 1e-6 * vv * dd;
break;
case 3:
a = *points[1] - *points[0];
b = *points[2] - *points[0];
d = center - *points[0];
n = crossf (a, b);
dn = dotf (d, n)[0];
dd = dotf (d, d)[0];
nn = dotf (n, n)[0];
in_affine = dn * dn < 1e-6 * dd * nn;
break;
case 4:
in_affine = 1;
break;
default:
Sys_Error ("Invalid number of points (%d) in test_support_points",
*num_points);
}
// if in_convex is not true while in_affine is, then need to test as
// there is more than one dimension for the affine hull (a single support
// point is never dropped as it cannot be redundant)
if (in_affine && !in_convex) {
vec4f_t lambda;
int dropped = 0;
int count = *num_points;
lambda = BarycentricCoords_vf (points, count, center);
for (int i = 0; i < count; i++) {
points[i - dropped] = points[i];
if (lambda[i] < -1e-4) {
dropped++;
(*num_points)--;
}
}
in_convex = !dropped;
if (dropped) {
for (int i = count - dropped; i < count; i++) {
points[i] = 0;
}
}
}
return in_convex;
}
vspheref_t
SmallestEnclosingBall_vf (const vec4f_t *points, int num_points)
{
vspheref_t sphere = {};
vec4f_t center = {};
const vec4f_t *best;
const vec4f_t *support[4];
int num_support;
int i;
int iters = 0;
if (num_points < 1) {
return sphere;
}
for (i = 0; i < 4; i++) {
support[i] = 0;
}
vec4f_t dist = {};
float best_dist = 0;
center = points[0];
best = &points[0];
for (i = 1; i < num_points; i++) {
dist = points[i] - center;
dist = dotf (dist, dist);
if (dist[0] > best_dist) {
best_dist = dist[0];
best = &points[i];
}
}
num_support = 1;
support[0] = best;
sphere.radius = best_dist; // note: radius squared until the end
while (!test_support_points (support, &num_support, center)) {
vec4f_t affine;
vec4f_t center_to_affine, center_to_point;
float affine_dist, point_proj, point_dist, bound;
float scale = 1;
int i;
if (iters++ > 10)
Sys_Error ("stuck SEB");
best = 0;
affine = closest_affine_point (support, num_support, center);
center_to_affine = affine - center;
affine_dist = dotf (center_to_affine, center_to_affine)[0];
for (i = 0; i < num_points; i++) {
if (&points[i] == support[0] || &points[i] == support[1]
|| &points[i] == support[2])
continue;
center_to_point = points[i] - center;
point_proj = dotf (center_to_affine, center_to_point)[0];
if (affine_dist - point_proj <= 0
|| ((affine_dist - point_proj) * (affine_dist - point_proj)
< 1e-6 * sphere.radius * affine_dist))
continue;
point_dist = dotf (center_to_point, center_to_point)[0];
bound = sphere.radius - point_dist;
bound /= 2 * (affine_dist - point_proj);
if (bound < scale) {
best = &points[i];
scale = bound;
}
}
center = center + scale * center_to_affine;
dist = center - *support[0];
sphere.radius = dotf (dist, dist)[0];
if (best) {
support[num_support++] = best;
}
}
best_dist = 0;
for (i = 0; i < num_points; i++) {
dist = center - points[i];
dist = dotf (dist, dist);
if (dist[0] > best_dist)
best_dist = dist[0];
}
sphere.center = center;
sphere.radius = sqrt (best_dist);
return sphere;
}

View file

@ -1,8 +1,10 @@
libs_util_tests = \
libs/util/test/test-bary \
libs/util/test/test-baryvf \
libs/util/test/test-cexpr \
libs/util/test/test-cmem \
libs/util/test/test-cs \
libs/util/test/test-csvf \
libs/util/test/test-darray \
libs/util/test/test-dq \
libs/util/test/test-half \
@ -12,6 +14,7 @@ libs_util_tests = \
libs/util/test/test-qfs \
libs/util/test/test-quat \
libs/util/test/test-seb \
libs/util/test/test-sebvf \
libs/util/test/test-seg \
libs/util/test/test-set \
libs/util/test/test-simd \
@ -26,6 +29,10 @@ libs_util_test_test_bary_SOURCES=libs/util/test/test-bary.c
libs_util_test_test_bary_LDADD=libs/util/libQFutil.la
libs_util_test_test_bary_DEPENDENCIES=libs/util/libQFutil.la
libs_util_test_test_baryvf_SOURCES=libs/util/test/test-baryvf.c
libs_util_test_test_baryvf_LDADD=libs/util/libQFutil.la
libs_util_test_test_baryvf_DEPENDENCIES=libs/util/libQFutil.la
libs_util_test_test_cexpr_SOURCES=libs/util/test/test-cexpr.c
libs_util_test_test_cexpr_LDADD=libs/util/libQFutil.la
libs_util_test_test_cexpr_DEPENDENCIES=libs/util/libQFutil.la
@ -38,6 +45,10 @@ libs_util_test_test_cs_SOURCES=libs/util/test/test-cs.c
libs_util_test_test_cs_LDADD=libs/util/libQFutil.la
libs_util_test_test_cs_DEPENDENCIES=libs/util/libQFutil.la
libs_util_test_test_csvf_SOURCES=libs/util/test/test-csvf.c
libs_util_test_test_csvf_LDADD=libs/util/libQFutil.la
libs_util_test_test_csvf_DEPENDENCIES=libs/util/libQFutil.la
libs_util_test_test_darray_SOURCES=libs/util/test/test-darray.c
libs_util_test_test_darray_LDADD=libs/util/libQFutil.la
libs_util_test_test_darray_DEPENDENCIES=libs/util/libQFutil.la
@ -74,6 +85,10 @@ libs_util_test_test_seb_SOURCES=libs/util/test/test-seb.c
libs_util_test_test_seb_LDADD=libs/util/libQFutil.la
libs_util_test_test_seb_DEPENDENCIES=libs/util/libQFutil.la
libs_util_test_test_sebvf_SOURCES=libs/util/test/test-sebvf.c
libs_util_test_test_sebvf_LDADD=libs/util/libQFutil.la
libs_util_test_test_sebvf_DEPENDENCIES=libs/util/libQFutil.la
libs_util_test_test_seg_SOURCES=libs/util/test/test-seg.c
libs_util_test_test_seg_LDADD=libs/util/libQFutil.la
libs_util_test_test_seg_DEPENDENCIES=libs/util/libQFutil.la

View file

@ -0,0 +1,118 @@
#ifdef HAVE_CONFIG_H
# include "config.h"
#endif
#include "QF/mathlib.h"
#include "QF/mersenne.h"
#include "QF/sys.h"
#include "QF/simd/vec4f.h"
vec4f_t points[] = {
{-1, -1, 1},
{ 1, 1, 1},
{-1, 1, -1},
{ 1, -1, -1},
{-1, -1, -1},
{ 1, 1, -1},
{-1, 1, 1},
{ 1, -1, 1},
{ 0, 0, 0},
};
const vec4f_t *line[] = {&points[0], &points[1]};
const vec4f_t *tri[] = {&points[0], &points[1], &points[2]};
const vec4f_t *tetra[] = {&points[0], &points[1], &points[2], &points[3]};
struct {
const vec4f_t **points;
int num_points;
vec4f_t *x;
vec_t expect[4];
} tests[] = {
{line, 2, &points[0], {1, 0}},
{line, 2, &points[1], {0, 1}},
{line, 2, &points[2], {0.5, 0.5}},
{line, 2, &points[3], {0.5, 0.5}},
{line, 2, &points[8], {0.5, 0.5}},
{tri, 3, &points[0], {1, 0, 0}},
{tri, 3, &points[1], {0, 1, 0}},
{tri, 3, &points[2], {0, 0, 1}},
{tri, 3, &points[3], {0.333333284, 0.333333333, 0.333333333}},//rounding :P
{tri, 3, &points[8], {0.333333284, 0.333333333, 0.333333333}},//rounding :P
{tetra, 4, &points[0], {1, 0, 0, 0}},
{tetra, 4, &points[1], {0, 1, 0, 0}},
{tetra, 4, &points[2], {0, 0, 1, 0}},
{tetra, 4, &points[3], {0, 0, 0, 1}},
{tetra, 4, &points[4], { 0.5, -0.5, 0.5, 0.5}},
{tetra, 4, &points[5], {-0.5, 0.5, 0.5, 0.5}},
{tetra, 4, &points[6], { 0.5, 0.5, 0.5, -0.5}},
{tetra, 4, &points[7], { 0.5, 0.5, -0.5, 0.5}},
{tetra, 4, &points[8], {0.25, 0.25, 0.25, 0.25}},
};
#define num_tests (sizeof (tests) / sizeof (tests[0]))
static inline float
rnd (mtstate_t *mt)
{
union {
uint32_t u;
float f;
} uf;
do {
uf.u = mtwist_rand (mt) & 0x007fffff;
} while (!uf.u);
uf.u |= 0x40000000;
return uf.f - 3.0;
}
int
main (int argc, const char **argv)
{
int res = 0;
size_t i;
int j;
vec4f_t lambda;
mtstate_t mt;
double start, end;
for (i = 0; i < num_tests; i ++) {
lambda = BarycentricCoords_vf (tests[i].points, tests[i].num_points, *tests[i].x);
for (j = 0; j < tests[i].num_points; j++) {
if (tests[i].expect[j] != lambda[j])
break;
}
if (j != tests[i].num_points) {
res = 1;
printf ("test %d failed\n", (int) i);
printf ("expect:");
for (j = 0; j < tests[i].num_points; j++)
printf (" %.9g", tests[i].expect[j]);
printf ("\ngot :");
for (j = 0; j < tests[i].num_points; j++)
printf (" %.9g", lambda[j]);
printf ("\n");
}
}
mtwist_seed (&mt, 0);
start = Sys_DoubleTime ();
for (i = 0; i < 1000000; i++) {
vec4f_t p = { rnd (&mt), rnd (&mt), rnd (&mt) };
vec4f_t x = {};
lambda = BarycentricCoords_vf (tetra, 4, p);
for (j = 0; j < 4; j++) {
x = x + lambda[j] * *tetra[j];
}
if (VectorDistance_fast (x, p) > 1e-4) {
res = 1;
printf ("[%.9g %.9g %.9g] != [%.9g %.9g %.9g]: [%g %g %g %g]\n",
VectorExpand (x), VectorExpand (p), QuatExpand (lambda));
break;
}
}
end = Sys_DoubleTime ();
printf ("%d itterations in %gs: %g itters/second\n", (int) i, end - start,
i / (end - start));
return res;
}

103
libs/util/test/test-csvf.c Normal file
View file

@ -0,0 +1,103 @@
#ifdef HAVE_CONFIG_H
# include "config.h"
#endif
#include "QF/mathlib.h"
#include "QF/mersenne.h"
#include "QF/simd/vec4f.h"
#include "QF/sys.h"
const vec4f_t points[] = {
{-1, -1, 1},
{ 1, 1, 1},
{-1, 1, -1},
{ 1, -1, -1},
{-1, -1, -1},
{ 1, 1, -1},
{-1, 1, 1},
{ 1, -1, 1},
{ 0, 0, 0},
};
struct {
const vec4f_t *points;
int num_points;
sphere_t expect;
} tests[] = {
{0, 0, {{ 0, 0, 0}, 0}},
{points, 1, {{-1, -1, 1}, 0}},
{points, 2, {{ 0, 0, 1}, 1.41421356}},
{points, 3, {{-0.333333343, 0.333333343, 0.333333343}, 1.63299322}},
{points, 4, {{0, 0, 0}, 1.73205081}},
};
#define num_tests (sizeof (tests) / sizeof (tests[0]))
static inline float
rnd (mtstate_t *mt)
{
union {
uint32_t u;
float f;
} uf;
do {
uf.u = mtwist_rand (mt) & 0x007fffff;
} while (!uf.u);
uf.u |= 0x40000000;
return uf.f - 3.0;
}
int
main (int argc, const char **argv)
{
int res = 0;
size_t i, j;
vspheref_t sphere;
mtstate_t mt;
double start, end;
for (i = 0; i < num_tests; i ++) {
sphere = CircumSphere_vf (tests[i].points, tests[i].num_points);
if (VectorDistance_fast (sphere.center, tests[i].expect.center) > 1e-4
|| fabs (sphere.radius - tests[i].expect.radius) > 1e-4) {
res = 1;
printf ("test %d failed\n", (int) i);
printf ("expect: [%.9g %.9g %.9g],%.9g\n",
VectorExpand (tests[i].expect.center),
tests[i].expect.radius);
printf ("got : [%.9g %.9g %.9g],%.9g\n",
VectorExpand (sphere.center),
sphere.radius);
}
}
mtwist_seed (&mt, 0);
start = Sys_DoubleTime ();
for (i = 0; !res && i < 1000000; i++) {
vec4f_t cloud[4];
vspheref_t cc;
vec_t r2;
for (j = 0; j < 4; j++) {
VectorSet (rnd (&mt), rnd (&mt), rnd (&mt), cloud[j]);
}
cc = CircumSphere_vf (cloud, 4);
r2 = cc.radius * cc.radius;
for (j = 0; j < 4; j++) {
if (fabs (VectorDistance_fast (cloud[j], cc.center) - r2)
< 1e-3 * r2)
continue;
printf ("%d %.9g - %.9g = %.9g\n", (int)j,
VectorDistance_fast (cloud[j], cc.center), r2,
VectorDistance_fast (cloud[j], cc.center) - r2);
printf ("[%.9g %.9g %.9g] - [%.9g %.9g %.9g] = %.9g != %.9g\n",
VectorExpand (cloud[j]), VectorExpand (cc.center),
VectorDistance_fast (cloud[j], cc.center), r2);
res = 1;
}
}
end = Sys_DoubleTime ();
printf ("%d itterations in %gs: %g itters/second\n", (int) i, end - start,
i / (end - start));
return res;
}

103
libs/util/test/test-sebvf.c Normal file
View file

@ -0,0 +1,103 @@
#ifdef HAVE_CONFIG_H
# include "config.h"
#endif
#include "QF/mathlib.h"
#include "QF/mersenne.h"
#include "QF/sys.h"
#include "QF/simd/vec4f.h"
const vec4f_t points[] = {
{-1, -1, 1, 1},
{ 1, 1, 1, 1},
{-1, 1, -1, 1},
{ 1, -1, -1, 1},
{-1, -1, -1, 1},
{ 1, 1, -1, 1},
{-1, 1, 1, 1},
{ 1, -1, 1, 1},
{ 0, 0, 0, 1},
};
struct {
const vec4f_t *points;
int num_points;
vspheref_t expect;
} tests[] = {
{0, 0, {{ 0, 0, 0, 1}, 0}},
{points, 1, {{-1, -1, 1, 1}, 0}},
{points, 2, {{ 0, 0, 1, 1}, 1.41421356}},
{points, 3, {{-0.333333343, 0.333333343, 0.333333343, 1}, 1.63299322}},
{points, 4, {{0, 0, 0, 1}, 1.73205081}},
};
#define num_tests (sizeof (tests) / sizeof (tests[0]))
static inline float
rnd (mtstate_t *mt)
{
union {
uint32_t u;
float f;
} uf;
do {
uf.u = mtwist_rand (mt) & 0x007fffff;
} while (!uf.u);
uf.u |= 0x40000000;
return uf.f - 1.0;
}
int
main (int argc, const char **argv)
{
int res = 0;
size_t i, j;
vspheref_t sphere;
mtstate_t mt;
double start, end;
for (i = 0; i < num_tests; i ++) {
sphere = SmallestEnclosingBall_vf (tests[i].points, tests[i].num_points);
if (VectorDistance_fast (sphere.center, tests[i].expect.center) > 1e-4
|| fabs (sphere.radius - tests[i].expect.radius) > 1e-4) {
res = 1;
printf ("test %d failed\n", (int) i);
printf ("expect: [%.9g %.9g %.9g],%.9g\n",
VectorExpand (tests[i].expect.center),
tests[i].expect.radius);
printf ("got : [%.9g %.9g %.9g],%.9g\n",
VectorExpand (sphere.center),
sphere.radius);
}
}
mtwist_seed (&mt, 0);
start = Sys_DoubleTime ();
for (i = 0; !res && i < 1000000; i++) {
vec4f_t cloud[10];
vspheref_t seb;
vec_t r2;
for (j = 0; j < 5; j++) {
VectorSet (rnd (&mt), rnd (&mt), rnd (&mt), cloud[j]);
}
seb = SmallestEnclosingBall_vf (cloud, 5);
r2 = seb.radius * seb.radius;
for (j = 0; j < 5; j++) {
if (VectorDistance_fast (cloud[j], seb.center) - r2
> 1e-5 * r2) {
res = 1;
printf ("%d %.9g - %.9g = %.9g\n", (int)j,
VectorDistance_fast (cloud[j], seb.center), r2,
VectorDistance_fast (cloud[j], seb.center) - r2);
printf ("[%.9g %.9g %.9g] - [%.9g %.9g %.9g] = %.9g > %.9g\n",
VectorExpand (cloud[j]), VectorExpand (seb.center),
VectorDistance_fast (cloud[j], seb.center), r2);
}
}
}
end = Sys_DoubleTime ();
printf ("%d iterations in %gs: %g iters/second\n", (int) i, end - start,
i / (end - start));
return res;
}