header line folding fix

2025-05-30 08:21:25 +00:00 · 2019-02-15 14:13:14 +00:00 · 2019-02-15 14:13:14 +00:00 · 06019034b0
commit 06019034b0
parent 911c5cdd66
3 changed files with 101 additions and 23 deletions
--- a/7
+++ b/7
@ -1,3 +1,10 @@
 2019-02-15  Richard Frith-Macdonald <rfm@gnu.org>
 	* Source/GSMime.m: When encoding quoted words in a header, ensure
 	we don't split multibyte utf-8 characters into separate words.
 	* Tests/base/GSMime/general.m: Add test for encode/decode folded
 	subject line wit a euro symbol at the fold point.
 2019-02-14  Richard Frith-Macdonald <rfm@gnu.org>
 	* Source/GSPrivate.h:
--- a/Source/Additions/GSMime.m
+++ b/Source/Additions/GSMime.m
@ -3808,7 +3808,7 @@ static char* _charsToEncode = "()<>@,;:_\"/[]?.=";
 static NSUInteger
 quotableLength(const uint8_t *ptr, NSUInteger size, NSUInteger max,
-  NSUInteger *quotedLength)
+  NSUInteger *quotedLength, BOOL utf8)
 {
  NSUInteger    encoded;
  NSUInteger    index;
@ -3816,17 +3816,45 @@ quotableLength(const uint8_t *ptr, NSUInteger size, NSUInteger max,
  for (encoded = index = 0; index < size; index++)
    {
      uint8_t   c = ptr[index];
      int       add = 1;
      if (c < 32 || c >= 127 || strchr(_charsToEncode, c))
        {
-          add += 2;
+          if (encoded + 3 > max)
            {
              break;
            }
          encoded += 3;
        }
-      if (encoded + add > max)
+      else
        {
-          break;
+          if (encoded >= max)
            {
              break;
            }
          encoded++;
        }
    }
  if (YES == utf8 && index < size)
    {
      uint8_t   c = ptr[index];
      /* We are breaking up a utf-8 string, so we must make sure
       * we don't break inside a character.
       */
      if ((c & 0xc0) == 0x80)
        {
          /* The next byte is a continuation byte, so we must be
           * inside a utf-8 codepoint and need to step back out
           * of it.
           */
          do
            {
              encoded -= 3;
              c = ptr[--index];
            }
          while ((c & 0xc0) == 0x80);
        }
      encoded += add;
    }
  *quotedLength = encoded;
  return index;
@ -4062,8 +4090,13 @@ appendString(NSMutableData *m, NSUInteger offset, NSUInteger fold,
      NSString          *cset = selectCharacterSet(str, &d);
      const uint8_t     *ptr = (const uint8_t*)[d bytes];
      NSUInteger        len = [d length];
      BOOL              utf8 = NO;
-      if ([cset isEqualToString: @"us-ascii"])
+      if ([cset isEqualToString: @"utf-8"])
        {
          utf8 = YES;
        }
      else if ([cset isEqualToString: @"us-ascii"])
        {
          if (0 == fold)
            {
@ -4152,7 +4185,7 @@ appendString(NSMutableData *m, NSUInteger offset, NSUInteger fold,
              uint8_t           *buffer;
              NSUInteger        existingLength;
              NSUInteger        quotedLength;
-              NSUInteger        charLength;
+              NSUInteger        byteLength;
              uint8_t           style = 'Q';
              /* Calculate the number of encoded characters we can
@ -4171,23 +4204,34 @@ appendString(NSMutableData *m, NSUInteger offset, NSUInteger fold,
                  offset = 1;
                }
-              charLength = quotableLength(ptr + pos, len - pos,
+              byteLength = quotableLength(ptr + pos, len - pos,
-                fold - offset - overhead, &quotedLength);
+                fold - offset - overhead, &quotedLength, utf8);
-              if (quotedLength > (charLength * 4) / 3)
+              if (quotedLength > (byteLength * 4) / 3)
                {
                  /* Using base64 is more compact than using quoted
                   * text, so lets do that.
                   */
                  style = 'B';
-                  charLength = ((fold - offset - overhead) / 4) * 3;
+                  byteLength = ((fold - offset - overhead) / 4) * 3;
-                  if (charLength >= len - pos)
+                  if (byteLength >= len - pos)
                    {
                      /* If we have less text than we can fit,
                       * just encode all of it.
                       */
-                      charLength = len - pos;
+                      byteLength = len - pos;
                    }
-                  quotedLength = 4 * ((charLength + 2) / 3);
+                  else if (YES == utf8
                    && (ptr[pos + byteLength] % 0xc0) == 0x80)
                    {
                      /* The byte after the end of the data we propose
                       * to encode is a utf8 continuation byte
                       * so step back to the character boundary.
                       */
                      do {
                        byteLength--;
                      } while ((ptr[pos + byteLength] % 0xc0) == 0x80);
                    }
                  quotedLength = 4 * ((byteLength + 2) / 3);
                }
              /* make sure we have enough space in the output buffer.
@ -4207,21 +4251,31 @@ appendString(NSMutableData *m, NSUInteger offset, NSUInteger fold,
              *buffer++ = '?';
              if ('Q' == style)
                {
-                  quotedWord(ptr + pos, charLength, buffer);
+                  quotedWord(ptr + pos, byteLength, buffer);
                }
              else
                {
-                  GSPrivateEncodeBase64(ptr + pos, charLength, buffer);
+                  GSPrivateEncodeBase64(ptr + pos, byteLength, buffer);
                }
              buffer[quotedLength] = '?';
              buffer[quotedLength + 1] = '=';
              offset += quotedLength + overhead;
-              pos += charLength;
+              pos += byteLength;
            }
        }
      return offset;
    }
 }
 /* For testing
 + (NSUInteger) appendString: (NSString*)str
                         to: (NSMutableData*)m
                         at: (NSUInteger)offset
                       fold: (NSUInteger)fold
                         ok: (BOOL*)ok
 {
  return appendString(m, offset, fold, str, ok);
 }
 */
 /**
 * Returns the full text of the header, built from its component parts,
--- a/Tests/base/GSMime/general.m
+++ b/Tests/base/GSMime/general.m
@ -3,6 +3,14 @@
 #import <GNUstepBase/GSMime.h>
 #import "Testing.h"
@interface GSMimeHeader (Testing)
 + (NSUInteger) appendString: (NSString*)str
                         to: (NSMutableData*)m
                         at: (NSUInteger)offset
                       fold: (NSUInteger)fold
                         ok: (BOOL*)ok;
@end
 static GSMimeDocument *
 parse(GSMimeParser **parserPointer, NSData *data)
 {
@ -239,18 +247,27 @@ int main()
  PASS_EQUAL(idoc, doc, "mime12.dat documents are the same");
  data = [idoc rawMimeData];
  doc = [GSMimeParser documentFromData: data];
-  PASS_EQUAL(idoc, doc, "rawMimeData reproduces document with 'q' header");
+  PASS_EQUAL(idoc, doc, "rawMimeData reproduces document with 'Q' header");
-  NSLog(@"Made\n%@\nOrig\n%@", data, orig);
+//  NSLog(@"Made\n%@\nOrig\n%@", data, orig);
  if (NO == oldStyleFolding)
    {
      [idoc setHeader: @"Subject" value: @"==répà==" parameters: nil];
      data = [idoc rawMimeData];
-      NSLog(@"Made\n%@", data);
+//      NSLog(@"Made\n%@", data);
      doc = [GSMimeParser documentFromData: data];
-      PASS_EQUAL(doc, idoc, "rawMimeData reproduces document with 'b' header");
+      PASS_EQUAL(doc, idoc, "rawMimeData reproduces document with 'B' header");
      /* Header where Euro character would cross folding boundary */
      [idoc setHeader: @"Subject" value:
        @"Benefit from 6.1% APR rate 111111on los over €20,000"
        parameters: nil];
      data = [idoc rawMimeData];
 //      NSLog(@"Made\n%@", data);
      doc = [GSMimeParser documentFromData: data];
      PASS_EQUAL(doc, idoc, "rawMimeData reproduces document with long header");
    }
-  
+
  [arp release]; arp = nil;
  return 0;
 }