[util] Give set_count a >8x speed boost

I knew counting bits individually was slow, but it never really mattered
until now. However, I didn't expect such a dramatic boost just by going
to mapping bytes to bit counts. 16-bit words would be faster still, but
the 64kB lookup table would probably start hurting cache performance,
and 32-bit words (4GB table) definitely would ruin the cache. The
universe isn't big enough for 64-bits :)
This commit is contained in:
Bill Currie 2021-07-27 13:54:22 +09:00
parent 50740c1014
commit 163d147044
2 changed files with 35 additions and 4 deletions

View file

@ -553,12 +553,31 @@ set_is_member (const set_t *set, unsigned x)
unsigned
set_count (const set_t *set)
{
static byte bit_counts[] = {
0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4,
1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8,
};
unsigned count = 0;
unsigned i;
byte *b = (byte *) set->map;
unsigned i = SET_WORDS (set) * sizeof (set_bits_t);
for (i = 0; i < set->size; i++)
if (_set_is_member (set, i))
count++;
while (i-- > 0) {
count += bit_counts[*b++];
}
return count;
}

View file

@ -83,6 +83,12 @@ check_size (const set_t *set, const set_t *unused)
return set->size;
}
static int
check_count (const set_t *set, const set_t *unused)
{
return set_count (set);
}
static set_t *
make_5 (void)
{
@ -198,6 +204,7 @@ struct {
{make_55, make_5, set_union, set_is_equivalent, 0, "{5 55}"},
{make_55, make_5, set_union, set_is_intersecting, 1, "{5 55}"},
{make_55, make_5, set_union, set_is_disjoint, 0, "{5 55}"},
{make_55, make_5, set_union, check_count, 2, "{5 55}"},
{make_not_SIZE, make_everything, 0, set_is_equivalent, 0, 0},
{make_not_SIZE, make_everything, 0, set_is_intersecting, 1, 0},
{make_not_SIZE, make_everything, 0, set_is_disjoint, 0, 0},
@ -210,6 +217,11 @@ struct {
{make_5, make_everything, 0, set_is_equivalent, 0, 0},
{make_5, make_everything, 0, set_is_intersecting, 1, 0},
{make_5, make_everything, 0, set_is_disjoint, 0, 0},
{make_empty, 0, 0, check_count, 0, 0},
{make_everything, 0, 0, check_count, 0, 0},
{make_5, 0, 0, check_count, 1, 0},
{make_not_5, 0, 0, check_count, 1, 0},
{make_0_to_SIZEm1, 0, 0, check_size, SIZE, 0},
};
#define num_tests (sizeof (tests) / sizeof (tests[0]))