u8_analyze: return false on any invalid character, we do not allow invalid/overlong characters in code at all

This commit is contained in:
Wolfgang Bumiller 2012-12-22 22:31:10 +01:00
parent fe3d8e44e6
commit aec2284f45

View file

@ -48,7 +48,7 @@ static bool u8_analyze(const char *_s, size_t *_start, size_t *_len, Uchar *_ch,
Uchar ch; Uchar ch;
i = 0; i = 0;
findchar: /* findchar: */
while (i < _maxlen && s[i] && (bits = utf8_lengths[s[i]]) == 0) while (i < _maxlen && s[i] && (bits = utf8_lengths[s[i]]) == 0)
++i; ++i;
@ -71,14 +71,20 @@ findchar:
if ( (s[i+j] & 0xC0) != 0x80 ) if ( (s[i+j] & 0xC0) != 0x80 )
{ {
i += j; i += j;
goto findchar; /* in gmqcc, invalid / overlong encodings are considered an error
* goto findchar;
*/
return false;
} }
ch = (ch << 6) | (s[i+j] & 0x3F); ch = (ch << 6) | (s[i+j] & 0x3F);
} }
if (ch < utf8_range[bits] || ch >= 0x10FFFF) if (ch < utf8_range[bits] || ch >= 0x10FFFF)
{ {
i += bits; /* same: error
goto findchar; * i += bits;
* goto findchar;
*/
return false;
} }
if (_start) if (_start)