header line folding fix

This commit is contained in:
Richard Frith-Macdonald 2019-02-15 14:13:14 +00:00
parent 911c5cdd66
commit 06019034b0
3 changed files with 101 additions and 23 deletions

View file

@ -1,3 +1,10 @@
2019-02-15 Richard Frith-Macdonald <rfm@gnu.org>
* Source/GSMime.m: When encoding quoted words in a header, ensure
we don't split multibyte utf-8 characters into separate words.
* Tests/base/GSMime/general.m: Add test for encode/decode folded
subject line wit a euro symbol at the fold point.
2019-02-14 Richard Frith-Macdonald <rfm@gnu.org> 2019-02-14 Richard Frith-Macdonald <rfm@gnu.org>
* Source/GSPrivate.h: * Source/GSPrivate.h:

View file

@ -3808,7 +3808,7 @@ static char* _charsToEncode = "()<>@,;:_\"/[]?.=";
static NSUInteger static NSUInteger
quotableLength(const uint8_t *ptr, NSUInteger size, NSUInteger max, quotableLength(const uint8_t *ptr, NSUInteger size, NSUInteger max,
NSUInteger *quotedLength) NSUInteger *quotedLength, BOOL utf8)
{ {
NSUInteger encoded; NSUInteger encoded;
NSUInteger index; NSUInteger index;
@ -3816,17 +3816,45 @@ quotableLength(const uint8_t *ptr, NSUInteger size, NSUInteger max,
for (encoded = index = 0; index < size; index++) for (encoded = index = 0; index < size; index++)
{ {
uint8_t c = ptr[index]; uint8_t c = ptr[index];
int add = 1;
if (c < 32 || c >= 127 || strchr(_charsToEncode, c)) if (c < 32 || c >= 127 || strchr(_charsToEncode, c))
{ {
add += 2; if (encoded + 3 > max)
{
break;
}
encoded += 3;
} }
if (encoded + add > max) else
{ {
break; if (encoded >= max)
{
break;
}
encoded++;
}
}
if (YES == utf8 && index < size)
{
uint8_t c = ptr[index];
/* We are breaking up a utf-8 string, so we must make sure
* we don't break inside a character.
*/
if ((c & 0xc0) == 0x80)
{
/* The next byte is a continuation byte, so we must be
* inside a utf-8 codepoint and need to step back out
* of it.
*/
do
{
encoded -= 3;
c = ptr[--index];
}
while ((c & 0xc0) == 0x80);
} }
encoded += add;
} }
*quotedLength = encoded; *quotedLength = encoded;
return index; return index;
@ -4062,8 +4090,13 @@ appendString(NSMutableData *m, NSUInteger offset, NSUInteger fold,
NSString *cset = selectCharacterSet(str, &d); NSString *cset = selectCharacterSet(str, &d);
const uint8_t *ptr = (const uint8_t*)[d bytes]; const uint8_t *ptr = (const uint8_t*)[d bytes];
NSUInteger len = [d length]; NSUInteger len = [d length];
BOOL utf8 = NO;
if ([cset isEqualToString: @"us-ascii"]) if ([cset isEqualToString: @"utf-8"])
{
utf8 = YES;
}
else if ([cset isEqualToString: @"us-ascii"])
{ {
if (0 == fold) if (0 == fold)
{ {
@ -4152,7 +4185,7 @@ appendString(NSMutableData *m, NSUInteger offset, NSUInteger fold,
uint8_t *buffer; uint8_t *buffer;
NSUInteger existingLength; NSUInteger existingLength;
NSUInteger quotedLength; NSUInteger quotedLength;
NSUInteger charLength; NSUInteger byteLength;
uint8_t style = 'Q'; uint8_t style = 'Q';
/* Calculate the number of encoded characters we can /* Calculate the number of encoded characters we can
@ -4171,23 +4204,34 @@ appendString(NSMutableData *m, NSUInteger offset, NSUInteger fold,
offset = 1; offset = 1;
} }
charLength = quotableLength(ptr + pos, len - pos, byteLength = quotableLength(ptr + pos, len - pos,
fold - offset - overhead, &quotedLength); fold - offset - overhead, &quotedLength, utf8);
if (quotedLength > (charLength * 4) / 3) if (quotedLength > (byteLength * 4) / 3)
{ {
/* Using base64 is more compact than using quoted /* Using base64 is more compact than using quoted
* text, so lets do that. * text, so lets do that.
*/ */
style = 'B'; style = 'B';
charLength = ((fold - offset - overhead) / 4) * 3; byteLength = ((fold - offset - overhead) / 4) * 3;
if (charLength >= len - pos) if (byteLength >= len - pos)
{ {
/* If we have less text than we can fit, /* If we have less text than we can fit,
* just encode all of it. * just encode all of it.
*/ */
charLength = len - pos; byteLength = len - pos;
} }
quotedLength = 4 * ((charLength + 2) / 3); else if (YES == utf8
&& (ptr[pos + byteLength] % 0xc0) == 0x80)
{
/* The byte after the end of the data we propose
* to encode is a utf8 continuation byte
* so step back to the character boundary.
*/
do {
byteLength--;
} while ((ptr[pos + byteLength] % 0xc0) == 0x80);
}
quotedLength = 4 * ((byteLength + 2) / 3);
} }
/* make sure we have enough space in the output buffer. /* make sure we have enough space in the output buffer.
@ -4207,21 +4251,31 @@ appendString(NSMutableData *m, NSUInteger offset, NSUInteger fold,
*buffer++ = '?'; *buffer++ = '?';
if ('Q' == style) if ('Q' == style)
{ {
quotedWord(ptr + pos, charLength, buffer); quotedWord(ptr + pos, byteLength, buffer);
} }
else else
{ {
GSPrivateEncodeBase64(ptr + pos, charLength, buffer); GSPrivateEncodeBase64(ptr + pos, byteLength, buffer);
} }
buffer[quotedLength] = '?'; buffer[quotedLength] = '?';
buffer[quotedLength + 1] = '='; buffer[quotedLength + 1] = '=';
offset += quotedLength + overhead; offset += quotedLength + overhead;
pos += charLength; pos += byteLength;
} }
} }
return offset; return offset;
} }
} }
/* For testing
+ (NSUInteger) appendString: (NSString*)str
to: (NSMutableData*)m
at: (NSUInteger)offset
fold: (NSUInteger)fold
ok: (BOOL*)ok
{
return appendString(m, offset, fold, str, ok);
}
*/
/** /**
* Returns the full text of the header, built from its component parts, * Returns the full text of the header, built from its component parts,

View file

@ -3,6 +3,14 @@
#import <GNUstepBase/GSMime.h> #import <GNUstepBase/GSMime.h>
#import "Testing.h" #import "Testing.h"
@interface GSMimeHeader (Testing)
+ (NSUInteger) appendString: (NSString*)str
to: (NSMutableData*)m
at: (NSUInteger)offset
fold: (NSUInteger)fold
ok: (BOOL*)ok;
@end
static GSMimeDocument * static GSMimeDocument *
parse(GSMimeParser **parserPointer, NSData *data) parse(GSMimeParser **parserPointer, NSData *data)
{ {
@ -239,18 +247,27 @@ int main()
PASS_EQUAL(idoc, doc, "mime12.dat documents are the same"); PASS_EQUAL(idoc, doc, "mime12.dat documents are the same");
data = [idoc rawMimeData]; data = [idoc rawMimeData];
doc = [GSMimeParser documentFromData: data]; doc = [GSMimeParser documentFromData: data];
PASS_EQUAL(idoc, doc, "rawMimeData reproduces document with 'q' header"); PASS_EQUAL(idoc, doc, "rawMimeData reproduces document with 'Q' header");
NSLog(@"Made\n%@\nOrig\n%@", data, orig); // NSLog(@"Made\n%@\nOrig\n%@", data, orig);
if (NO == oldStyleFolding) if (NO == oldStyleFolding)
{ {
[idoc setHeader: @"Subject" value: @"==répà==" parameters: nil]; [idoc setHeader: @"Subject" value: @"==répà==" parameters: nil];
data = [idoc rawMimeData]; data = [idoc rawMimeData];
NSLog(@"Made\n%@", data); // NSLog(@"Made\n%@", data);
doc = [GSMimeParser documentFromData: data]; doc = [GSMimeParser documentFromData: data];
PASS_EQUAL(doc, idoc, "rawMimeData reproduces document with 'b' header"); PASS_EQUAL(doc, idoc, "rawMimeData reproduces document with 'B' header");
/* Header where Euro character would cross folding boundary */
[idoc setHeader: @"Subject" value:
@"Benefit from 6.1% APR rate 111111on los over €20,000"
parameters: nil];
data = [idoc rawMimeData];
// NSLog(@"Made\n%@", data);
doc = [GSMimeParser documentFromData: data];
PASS_EQUAL(doc, idoc, "rawMimeData reproduces document with long header");
} }
[arp release]; arp = nil; [arp release]; arp = nil;
return 0; return 0;
} }