From aec2284f4597ea39375fc2c9ac4220a0abd495a8 Mon Sep 17 00:00:00 2001 From: Wolfgang Bumiller Date: Sat, 22 Dec 2012 22:31:10 +0100 Subject: [PATCH] u8_analyze: return false on any invalid character, we do not allow invalid/overlong characters in code at all --- utf8lib.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/utf8lib.c b/utf8lib.c index 3c4749b..b26f2f1 100644 --- a/utf8lib.c +++ b/utf8lib.c @@ -48,7 +48,7 @@ static bool u8_analyze(const char *_s, size_t *_start, size_t *_len, Uchar *_ch, Uchar ch; i = 0; -findchar: +/* findchar: */ while (i < _maxlen && s[i] && (bits = utf8_lengths[s[i]]) == 0) ++i; @@ -71,14 +71,20 @@ findchar: if ( (s[i+j] & 0xC0) != 0x80 ) { i += j; - goto findchar; + /* in gmqcc, invalid / overlong encodings are considered an error + * goto findchar; + */ + return false; } ch = (ch << 6) | (s[i+j] & 0x3F); } if (ch < utf8_range[bits] || ch >= 0x10FFFF) { - i += bits; - goto findchar; + /* same: error + * i += bits; + * goto findchar; + */ + return false; } if (_start)