From 163d14704491e56d98909b0180c46913cd601366 Mon Sep 17 00:00:00 2001 From: Bill Currie Date: Tue, 27 Jul 2021 13:54:22 +0900 Subject: [PATCH] [util] Give set_count a >8x speed boost I knew counting bits individually was slow, but it never really mattered until now. However, I didn't expect such a dramatic boost just by going to mapping bytes to bit counts. 16-bit words would be faster still, but the 64kB lookup table would probably start hurting cache performance, and 32-bit words (4GB table) definitely would ruin the cache. The universe isn't big enough for 64-bits :) --- libs/util/set.c | 27 +++++++++++++++++++++++---- libs/util/test/test-set.c | 12 ++++++++++++ 2 files changed, 35 insertions(+), 4 deletions(-) diff --git a/libs/util/set.c b/libs/util/set.c index ffb0e5489..f0215454c 100644 --- a/libs/util/set.c +++ b/libs/util/set.c @@ -553,12 +553,31 @@ set_is_member (const set_t *set, unsigned x) unsigned set_count (const set_t *set) { + static byte bit_counts[] = { + 0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4, + 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, + 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, + 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, + 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, + 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, + 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, + 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, + 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, + 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, + 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, + 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, + 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, + 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, + 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, + 4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8, + }; unsigned count = 0; - unsigned i; + byte *b = (byte *) set->map; + unsigned i = SET_WORDS (set) * sizeof (set_bits_t); - for (i = 0; i < set->size; i++) - if (_set_is_member (set, i)) - count++; + while (i-- > 0) { + count += bit_counts[*b++]; + } return count; } diff --git a/libs/util/test/test-set.c b/libs/util/test/test-set.c index e1b93d137..6e326268a 100644 --- a/libs/util/test/test-set.c +++ b/libs/util/test/test-set.c @@ -83,6 +83,12 @@ check_size (const set_t *set, const set_t *unused) return set->size; } +static int +check_count (const set_t *set, const set_t *unused) +{ + return set_count (set); +} + static set_t * make_5 (void) { @@ -198,6 +204,7 @@ struct { {make_55, make_5, set_union, set_is_equivalent, 0, "{5 55}"}, {make_55, make_5, set_union, set_is_intersecting, 1, "{5 55}"}, {make_55, make_5, set_union, set_is_disjoint, 0, "{5 55}"}, + {make_55, make_5, set_union, check_count, 2, "{5 55}"}, {make_not_SIZE, make_everything, 0, set_is_equivalent, 0, 0}, {make_not_SIZE, make_everything, 0, set_is_intersecting, 1, 0}, {make_not_SIZE, make_everything, 0, set_is_disjoint, 0, 0}, @@ -210,6 +217,11 @@ struct { {make_5, make_everything, 0, set_is_equivalent, 0, 0}, {make_5, make_everything, 0, set_is_intersecting, 1, 0}, {make_5, make_everything, 0, set_is_disjoint, 0, 0}, + {make_empty, 0, 0, check_count, 0, 0}, + {make_everything, 0, 0, check_count, 0, 0}, + {make_5, 0, 0, check_count, 1, 0}, + {make_not_5, 0, 0, check_count, 1, 0}, + {make_0_to_SIZEm1, 0, 0, check_size, SIZE, 0}, }; #define num_tests (sizeof (tests) / sizeof (tests[0]))