diff --git a/ChangeLog b/ChangeLog index 3e4cb9de7..14dc1e270 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,10 @@ +2019-02-15 Richard Frith-Macdonald + + * Source/GSMime.m: When encoding quoted words in a header, ensure + we don't split multibyte utf-8 characters into separate words. + * Tests/base/GSMime/general.m: Add test for encode/decode folded + subject line wit a euro symbol at the fold point. + 2019-02-14 Richard Frith-Macdonald * Source/GSPrivate.h: diff --git a/Source/Additions/GSMime.m b/Source/Additions/GSMime.m index 490ff1903..5c9729fb3 100644 --- a/Source/Additions/GSMime.m +++ b/Source/Additions/GSMime.m @@ -3808,7 +3808,7 @@ static char* _charsToEncode = "()<>@,;:_\"/[]?.="; static NSUInteger quotableLength(const uint8_t *ptr, NSUInteger size, NSUInteger max, - NSUInteger *quotedLength) + NSUInteger *quotedLength, BOOL utf8) { NSUInteger encoded; NSUInteger index; @@ -3816,17 +3816,45 @@ quotableLength(const uint8_t *ptr, NSUInteger size, NSUInteger max, for (encoded = index = 0; index < size; index++) { uint8_t c = ptr[index]; - int add = 1; if (c < 32 || c >= 127 || strchr(_charsToEncode, c)) { - add += 2; + if (encoded + 3 > max) + { + break; + } + encoded += 3; } - if (encoded + add > max) + else { - break; + if (encoded >= max) + { + break; + } + encoded++; + } + } + + if (YES == utf8 && index < size) + { + uint8_t c = ptr[index]; + + /* We are breaking up a utf-8 string, so we must make sure + * we don't break inside a character. + */ + if ((c & 0xc0) == 0x80) + { + /* The next byte is a continuation byte, so we must be + * inside a utf-8 codepoint and need to step back out + * of it. + */ + do + { + encoded -= 3; + c = ptr[--index]; + } + while ((c & 0xc0) == 0x80); } - encoded += add; } *quotedLength = encoded; return index; @@ -4062,8 +4090,13 @@ appendString(NSMutableData *m, NSUInteger offset, NSUInteger fold, NSString *cset = selectCharacterSet(str, &d); const uint8_t *ptr = (const uint8_t*)[d bytes]; NSUInteger len = [d length]; + BOOL utf8 = NO; - if ([cset isEqualToString: @"us-ascii"]) + if ([cset isEqualToString: @"utf-8"]) + { + utf8 = YES; + } + else if ([cset isEqualToString: @"us-ascii"]) { if (0 == fold) { @@ -4152,7 +4185,7 @@ appendString(NSMutableData *m, NSUInteger offset, NSUInteger fold, uint8_t *buffer; NSUInteger existingLength; NSUInteger quotedLength; - NSUInteger charLength; + NSUInteger byteLength; uint8_t style = 'Q'; /* Calculate the number of encoded characters we can @@ -4171,23 +4204,34 @@ appendString(NSMutableData *m, NSUInteger offset, NSUInteger fold, offset = 1; } - charLength = quotableLength(ptr + pos, len - pos, - fold - offset - overhead, "edLength); - if (quotedLength > (charLength * 4) / 3) + byteLength = quotableLength(ptr + pos, len - pos, + fold - offset - overhead, "edLength, utf8); + if (quotedLength > (byteLength * 4) / 3) { /* Using base64 is more compact than using quoted * text, so lets do that. */ style = 'B'; - charLength = ((fold - offset - overhead) / 4) * 3; - if (charLength >= len - pos) + byteLength = ((fold - offset - overhead) / 4) * 3; + if (byteLength >= len - pos) { /* If we have less text than we can fit, * just encode all of it. */ - charLength = len - pos; + byteLength = len - pos; } - quotedLength = 4 * ((charLength + 2) / 3); + else if (YES == utf8 + && (ptr[pos + byteLength] % 0xc0) == 0x80) + { + /* The byte after the end of the data we propose + * to encode is a utf8 continuation byte + * so step back to the character boundary. + */ + do { + byteLength--; + } while ((ptr[pos + byteLength] % 0xc0) == 0x80); + } + quotedLength = 4 * ((byteLength + 2) / 3); } /* make sure we have enough space in the output buffer. @@ -4207,21 +4251,31 @@ appendString(NSMutableData *m, NSUInteger offset, NSUInteger fold, *buffer++ = '?'; if ('Q' == style) { - quotedWord(ptr + pos, charLength, buffer); + quotedWord(ptr + pos, byteLength, buffer); } else { - GSPrivateEncodeBase64(ptr + pos, charLength, buffer); + GSPrivateEncodeBase64(ptr + pos, byteLength, buffer); } buffer[quotedLength] = '?'; buffer[quotedLength + 1] = '='; offset += quotedLength + overhead; - pos += charLength; + pos += byteLength; } } return offset; } } +/* For testing ++ (NSUInteger) appendString: (NSString*)str + to: (NSMutableData*)m + at: (NSUInteger)offset + fold: (NSUInteger)fold + ok: (BOOL*)ok +{ + return appendString(m, offset, fold, str, ok); +} +*/ /** * Returns the full text of the header, built from its component parts, diff --git a/Tests/base/GSMime/general.m b/Tests/base/GSMime/general.m index 6d98546c0..dd0c1531a 100644 --- a/Tests/base/GSMime/general.m +++ b/Tests/base/GSMime/general.m @@ -3,6 +3,14 @@ #import #import "Testing.h" +@interface GSMimeHeader (Testing) ++ (NSUInteger) appendString: (NSString*)str + to: (NSMutableData*)m + at: (NSUInteger)offset + fold: (NSUInteger)fold + ok: (BOOL*)ok; +@end + static GSMimeDocument * parse(GSMimeParser **parserPointer, NSData *data) { @@ -239,18 +247,27 @@ int main() PASS_EQUAL(idoc, doc, "mime12.dat documents are the same"); data = [idoc rawMimeData]; doc = [GSMimeParser documentFromData: data]; - PASS_EQUAL(idoc, doc, "rawMimeData reproduces document with 'q' header"); - NSLog(@"Made\n%@\nOrig\n%@", data, orig); + PASS_EQUAL(idoc, doc, "rawMimeData reproduces document with 'Q' header"); +// NSLog(@"Made\n%@\nOrig\n%@", data, orig); if (NO == oldStyleFolding) { [idoc setHeader: @"Subject" value: @"==répà==" parameters: nil]; data = [idoc rawMimeData]; - NSLog(@"Made\n%@", data); +// NSLog(@"Made\n%@", data); doc = [GSMimeParser documentFromData: data]; - PASS_EQUAL(doc, idoc, "rawMimeData reproduces document with 'b' header"); + PASS_EQUAL(doc, idoc, "rawMimeData reproduces document with 'B' header"); + + /* Header where Euro character would cross folding boundary */ + [idoc setHeader: @"Subject" value: + @"Benefit from 6.1% APR rate 111111on los over €20,000" + parameters: nil]; + data = [idoc rawMimeData]; +// NSLog(@"Made\n%@", data); + doc = [GSMimeParser documentFromData: data]; + PASS_EQUAL(doc, idoc, "rawMimeData reproduces document with long header"); } - + [arp release]; arp = nil; return 0; }