[util] Write some tests for utf8 r/w

And fix some errors with 5-byte encodings.
This commit is contained in:
Bill Currie 2021-07-27 23:17:03 +09:00
parent e39bc83a6a
commit 8fdd9c1f5a
3 changed files with 122 additions and 4 deletions

View file

@ -219,8 +219,8 @@ MSG_WriteUTF8 (sizebuf_t *sb, unsigned utf8)
utf8 <<= 2;
} else if (utf8 & 0x03e00000) {
buf = SZ_GetSpace (sb, count = 5);
*buf++ = 0xf8 | ((utf8 & 0x30000000) >> 28); // 2 bits
utf8 <<= 4;
*buf++ = 0xf8 | ((utf8 & 0x03000000) >> 24); // 2 bits
utf8 <<= 8;
} else if (utf8 & 0x001f0000) {
buf = SZ_GetSpace (sb, count = 4);
*buf++ = 0xf0 | ((utf8 & 0x001c0000) >> 18); // 3 bits
@ -231,7 +231,7 @@ MSG_WriteUTF8 (sizebuf_t *sb, unsigned utf8)
utf8 <<= 20;
} else if (utf8 & 0x00000780) {
buf = SZ_GetSpace (sb, count = 2);
*buf++ = 0xc0 | ((utf8 & 0x000007c0) >> 6); // 5 bits
*buf++ = 0xc0 | ((utf8 & 0x000007c0) >> 6); // 5 bits
utf8 <<= 26;
} else {
buf = SZ_GetSpace (sb, count = 1);

View file

@ -18,7 +18,8 @@ libs_util_tests = \
libs/util/test/test-sebvf \
libs/util/test/test-seg \
libs/util/test/test-set \
libs/util/test/test-simd
libs/util/test/test-simd \
libs/util/test/test-utf8
TESTS += $(libs_util_tests)
@ -103,3 +104,7 @@ libs_util_test_test_set_DEPENDENCIES=libs/util/libQFutil.la
libs_util_test_test_simd_SOURCES=libs/util/test/test-simd.c
libs_util_test_test_simd_LDADD=libs/util/libQFutil.la
libs_util_test_test_simd_DEPENDENCIES=libs/util/libQFutil.la
libs_util_test_test_utf8_SOURCES=libs/util/test/test-utf8.c
libs_util_test_test_utf8_LDADD=libs/util/libQFutil.la
libs_util_test_test_utf8_DEPENDENCIES=libs/util/libQFutil.la

113
libs/util/test/test-utf8.c Normal file
View file

@ -0,0 +1,113 @@
#ifdef HAVE_CONFIG_H
# include "config.h"
#endif
#include <stdio.h>
#include "QF/msg.h"
#include "QF/sizebuf.h"
static byte buffer[1024];
static sizebuf_t sb = { .data = buffer, .maxsize = sizeof (buffer) };
static qmsg_t msg = { .message = &sb, };
typedef struct {
int32_t input;
unsigned bytes;
} utf8_test_t;
static utf8_test_t tests[] = {
{0x43214321, 6},
{0x55555555, 6},
{0x2aaaaaaa, 6},
{0x55aa55aa, 6},
{0x1df001ed, 6}, // hey, why not? good bit pattern
{0x089abcde, 6},
{0x049abcde, 6},
{0x023abcde, 5},
{0x012abcde, 5},
{0x008abcde, 5},
{0x004abcde, 5},
{0x002abcde, 5},
{0x001abcde, 4},
{0x000abcde, 4},
{0x0006bcde, 4},
{0x0002bcde, 4},
{0x0001bcde, 4},
{0x0000bcde, 3},
{0x00007cde, 3},
{0x00003cde, 3},
{0x00001cde, 3},
{0x00000cde, 3},
{0x000004de, 2},
{0x000002de, 2},
{0x000001de, 2},
{0x000000de, 2},
{0x0000005e, 1},
{0x0000004e, 1},
{0x0000002e, 1},
{0x0000001e, 1},
{0x0000000e, 1},
{0x00000006, 1},
{0x00000000, 1},
{0x40000000, 6},
{0x20000000, 6},
{0x10000000, 6},
{0x08000000, 6},
{0x04000000, 6},
{0x02000000, 5},
{0x01000000, 5},
{0x00800000, 5},
{0x00400000, 5},
{0x00200000, 5},
{0x00100000, 4},
{0x00080000, 4},
{0x00040000, 4},
{0x00020000, 4},
{0x00010000, 4},
{0x00008000, 3},
{0x00004000, 3},
{0x00002000, 3},
{0x00001000, 3},
{0x00000800, 3},
{0x00000400, 2},
{0x00000200, 2},
{0x00000100, 2},
{0x00000080, 2},
{0x00000040, 1},
{0x00000020, 1},
{0x00000010, 1},
{0x00000008, 1},
{0x00000004, 1},
{0x00000002, 1},
{0x00000001, 1},
};
#define num_tests (sizeof (tests) / (sizeof (tests[0])))
int
main (int argc, const char **argv)
{
int res = 0;
for (size_t i = 0; i < num_tests; i++) {
sb.cursize = 0;
msg.readcount = 0;
msg.badread = 0;
MSG_WriteUTF8 (&sb, tests[i].input);
int32_t output = MSG_ReadUTF8 (&msg);
printf ("%d %08x\n", (int) i, tests[i].input);
SZ_Dump (&sb);
if (sb.cursize != tests[i].bytes || msg.readcount != tests[i].bytes
|| output != tests[i].input || msg.badread) {
res |= 1;
printf ("test %d failed\n", (int) i);
printf ("expect: %8x %d %d 0\n",
tests[i].input, tests[i].bytes, tests[i].bytes);
printf ("got : %8x %d %d %d\n",
output, sb.cursize, msg.readcount, msg.badread);
}
}
return res;
}