diff --git a/ChangeLog b/ChangeLog index 5d12b1c15..34e3d9930 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,12 @@ +2000-11-17 Richard Frith-Macdonald + + * Headers/Foundation/GSMime.h: Added GSMimeEncodingContext etc + * Source/GSMime.m: Update to add method for general decoding of + different transfer encoding types including chunked (for http1.1). + * Documentation/gsdoc/GSMime.gsdoc: updated + * Documentation/gsdoc/GSMimeDocument.gsdoc: updated + * Documentation/gsdoc/GSMimeParser.gsdoc: updated + 2000-11-16 Richard Frith-Macdonald * Source/NSUserDefaults.m: ([-userLanguages]) updated to ensure we diff --git a/Documentation/gsdoc/GSMime.gsdoc b/Documentation/gsdoc/GSMime.gsdoc index f73259177..7fa607a4d 100644 --- a/Documentation/gsdoc/GSMime.gsdoc +++ b/Documentation/gsdoc/GSMime.gsdoc @@ -7,8 +7,8 @@ - 0.2 - 16 November, 2000 + 0.3 + 17 November, 2000 @@ -18,6 +18,27 @@ for representing MIME (and HTTP) documents and managing conversions to and from convenient internal formats.

+

+ Eventually the goal is to center round three classes - +

+ + document + + A container for the actual data (and headers) of a mime/http document. + + parser + + An object that can be fed data and will parse it into a document. + This object also provides various utility methods and an API + that permits overriding in order to extend the functionality to + cope with new document types. + + unparser + + An object to take a mime/http document and produce a data object + suitable for transmission. + +
The classes diff --git a/Documentation/gsdoc/GSMime.html b/Documentation/gsdoc/GSMime.html index 0d76126a6..78f144b7d 100644 --- a/Documentation/gsdoc/GSMime.html +++ b/Documentation/gsdoc/GSMime.html @@ -9,8 +9,8 @@
Richard Frith-Macdonald
-

Version: 0.2

-

Date: 16 November, 2000

+

Version: 0.3

+

Date: 17 November, 2000

Mime Parser

@@ -18,6 +18,28 @@ for representing MIME (and HTTP) documents and managing conversions to and from convenient internal formats.

+

+ + Eventually the goal is to center round three classes - +

+
+
document +
+ A container for the actual data (and headers) of a mime/http document. + +
parser +
+ An object that can be fed data and will parse it into a document. + This object also provides various utility methods and an API + that permits overriding in order to extend the functionality to + cope with new document types. + +
unparser +
+ An object to take a mime/http document and produce a data object + suitable for transmission. + +

The classes

  • GSMimeDocument diff --git a/Documentation/gsdoc/GSMimeDocument.gsdoc b/Documentation/gsdoc/GSMimeDocument.gsdoc index 50fe8fdce..e117ee9e5 100644 --- a/Documentation/gsdoc/GSMimeDocument.gsdoc +++ b/Documentation/gsdoc/GSMimeDocument.gsdoc @@ -48,6 +48,9 @@ Value This is the value of the header (normally lower case). + It may only be a small subset of the information in the header + with other information being split into separate fields + depending on the type of header.

    diff --git a/Documentation/gsdoc/GSMimeDocument.html b/Documentation/gsdoc/GSMimeDocument.html index 9186eb002..cf8d5d1a3 100644 --- a/Documentation/gsdoc/GSMimeDocument.html +++ b/Documentation/gsdoc/GSMimeDocument.html @@ -57,6 +57,9 @@

    Value
    This is the value of the header (normally lower case). + It may only be a small subset of the information in the header + with other information being split into separate fields + depending on the type of header. diff --git a/Documentation/gsdoc/GSMimeParser.gsdoc b/Documentation/gsdoc/GSMimeParser.gsdoc index 8271a4b65..a7c9d7b09 100644 --- a/Documentation/gsdoc/GSMimeParser.gsdoc +++ b/Documentation/gsdoc/GSMimeParser.gsdoc @@ -19,9 +19,8 @@

    This class provides support for parsing MIME messages - into GSMimeDocument objects. It is imtimately related - to the GSMimeDocument class and relys on that class for - aspects of the parsing operation. + into GSMimeDocument objects. Each parser object maintains + an associated document into which data is stored.

    @@ -101,6 +100,24 @@ subclasses override scanHeaders:named:into: to implement custom scanning.

    +

    + As a special case, for HTTP support, this method also parses + lines in the format of HTTP responses as if they were headers + named http. The resulting header info dictionary + contains - +

    + + HttpVersion + The full HTTP protocol version number + HttpMajorVersion + The first part of the version number + HttpMinorVersion + The second part of the version number + HttpStatus + The HTTP status code + Value + The text message (if any) after the status code + @@ -110,7 +127,7 @@ Returns YES if the parser is expecting to read mime headers, Returns NO is the parser has already been passed all the data containing headers, and is now waiting for the body of - trhe mime message (or has been passed all data). + the mime message (or has been passed all data). @@ -140,6 +157,89 @@ You should not call this method directly yourself, but may override it to support parsing of new headers.

    +

    + You should be aware of the parsing that the standard + implementation performs, and that needs to be + done for certain headers in order to permit the parser to + work generally - +

    + + + content-disposition + + + Parameters + + A dictionary containing parameters as key-value pairs + in lowercase + + Value + + The content disposition (excluding parameters) as a + lowercase string. + + + + + content-type + + + Parameters + + A dictionary containing parameters as key-value pairs + in lowercase. + + SubType + The MIME subtype lowercase + Type + The MIME type lowercase + value + The full MIME type (xxx/yyy) in lowercase + + + + content-transfer-encoding + + + Value + The transfer encoding type in lowercase + + + + http + + + HttpVersion + The HTTP protocol version number + HttpMajorVersion + The first component of the version number + HttpMinorVersion + The second component of the version number + HttpStatus + The response status value (numeric code) + Value + The text message (if any) + + + + transfer-encoding + + + Value + The transfer encoding type in lowercase + + + + + + + + scanSpace: + aScanner + + A convenience method to scan past any whitespace in the scanner + in preparation for scanning something more interesting that + comes after it. Returns YES if any space was read, NO otherwise. diff --git a/Documentation/gsdoc/GSMimeParser.html b/Documentation/gsdoc/GSMimeParser.html index ac64104e2..49d68e34b 100644 --- a/Documentation/gsdoc/GSMimeParser.html +++ b/Documentation/gsdoc/GSMimeParser.html @@ -23,9 +23,8 @@

    This class provides support for parsing MIME messages - into GSMimeDocument objects. It is imtimately related - to the GSMimeDocument class and relys on that class for - aspects of the parsing operation. + into GSMimeDocument objects. Each parser object maintains + an associated document into which data is stored.

    @@ -41,8 +40,9 @@
  • -parseHeader:
  • -parsingHeaders
  • -scanHeader:named:inTo: -
  • -scanSpecial: -
  • -scanToken: +
  • -scanSpace: +
  • -scanSpecial: +
  • -scanToken:

Class Methods

mimeParser

@@ -121,6 +121,27 @@ implement custom scanning.

+

+ + As a special case, for HTTP support, this method also parses + lines in the format of HTTP responses as if they were headers + named http. The resulting header info dictionary + contains - +

+ +
+
HttpVersion +
The full HTTP protocol version number +
HttpMajorVersion +
The first part of the version number +
HttpMinorVersion +
The second part of the version number +
HttpStatus +
The HTTP status code +
Value +
The text message (if any) after the status code +
+

parsingHeaders

@@ -129,7 +150,7 @@ Returns YES if the parser is expecting to read mime headers, Returns NO is the parser has already been passed all the data containing headers, and is now waiting for the body of - trhe mime message (or has been passed all data). + the mime message (or has been passed all data).

scanHeader:named:inTo:

@@ -161,9 +182,93 @@ override it to support parsing of new headers.

+

+ + You should be aware of the parsing that the standard + implementation performs, and that needs to be + done for certain headers in order to permit the parser to + work generally - +

+ +
+
content-disposition +
+
+
Parameters +
+ A dictionary containing parameters as key-value pairs + in lowercase + +
Value +
+ The content disposition (excluding parameters) as a + lowercase string. + +
+ + +
content-type +
+
+
Parameters +
+ A dictionary containing parameters as key-value pairs + in lowercase. + +
SubType +
The MIME subtype lowercase +
Type +
The MIME type lowercase +
value +
The full MIME type (xxx/yyy) in lowercase +
+ + +
content-transfer-encoding +
+
+
Value +
The transfer encoding type in lowercase +
+ + +
http +
+
+
HttpVersion +
The HTTP protocol version number +
HttpMajorVersion +
The first component of the version number +
HttpMinorVersion +
The second component of the version number +
HttpStatus +
The response status value (numeric code) +
Value +
The text message (if any) +
+ + +
transfer-encoding +
+
+
Value +
The transfer encoding type in lowercase +
+ + +
+
-

scanSpecial:

+

scanSpace:

+- (BOOL) scanSpace: (NSScanner*)aScanner;
+ + A convenience method to scan past any whitespace in the scanner + in preparation for scanning something more interesting that + comes after it. Returns YES if any space was read, NO otherwise. + +
+

scanSpecial:

- (NSString*) scanSpecial: (NSScanner*)aScanner;
A convenience method to use a scanner (that is set up to scan a @@ -173,7 +278,7 @@ will contain a single space character.
-

scanToken:

+

scanToken:

- (NSString*) scanToken: (NSScanner*)aScanner;
A convenience method to use a scanner (that is set up to scan a diff --git a/Headers/gnustep/base/GSMime.h b/Headers/gnustep/base/GSMime.h index 1dc087de5..c72f7e958 100644 --- a/Headers/gnustep/base/GSMime.h +++ b/Headers/gnustep/base/GSMime.h @@ -58,6 +58,7 @@ typedef enum { GSMimeEncoding type; /* The encoding type to be used. */ unsigned char buf[8]; /* Temporary data storage area. */ int pos; /* Context position count. */ + BOOL foot; /* Reading footer near end of data. */ BOOL atEnd; /* Flag to say that data has ended. */ } @end @@ -111,6 +112,7 @@ typedef enum { - (BOOL) scanHeader: (NSScanner*)aScanner named: (NSString*)headerName inTo: (NSMutableDictionary*)info; +- (BOOL) scanPastSpace: (NSScanner*)aScanner; - (NSString*) scanSpecial: (NSScanner*)aScanner; - (NSString*) scanToken: (NSScanner*)aScanner; diff --git a/Source/GSMime.m b/Source/GSMime.m index 7cf333af1..43a75b847 100644 --- a/Source/GSMime.m +++ b/Source/GSMime.m @@ -194,7 +194,7 @@ parseCharacterSet(NSString *token) @interface GSMimeParser (Private) -- (BOOL) _decodeBody; +- (BOOL) _decodeBody: (NSData*)data; - (NSString*) _decodeHeader; - (BOOL) _unfoldHeader; @end @@ -330,7 +330,7 @@ parseCharacterSet(NSString *token) decodebase64(dst, ctxt->buf); size += len; } - [dData setLength: dst - beg]; + [dData setLength: size + dst - beg]; break; case GSMimeEncodingQuotedPrintable: @@ -377,12 +377,36 @@ parseCharacterSet(NSString *token) } src++; } - [dData setLength: dst - beg]; + [dData setLength: size + dst - beg]; break; case GSMimeEncodingChunked: while (ctxt->atEnd == NO && src < end) { + /* + * If we are reading a chunk footer, look for a blank line + * that terminates it. + */ + if (ctxt->foot == YES) + { + if (*src == '\r') + { + src++; + } + else if (*src != '\n' || ctxt->buf[0] != '\n') + { + ctxt->buf[0] = *src++; + } + else + { + ctxt->foot = NO; + ctxt->atEnd = YES; + src++; + break; + } + continue; + } + /* * Keep track of chunk size in the context. * A negative 'pos' indicates that we are reading the chunk size. @@ -453,7 +477,8 @@ parseCharacterSet(NSString *token) */ if (ctxt->pos == 0) { - ctxt->atEnd = YES; + ctxt->foot = YES; + ctxt->buf[0] = src[-1]; // last char read } } else @@ -482,7 +507,7 @@ parseCharacterSet(NSString *token) } src++; } - [dData setLength: dst - beg]; + [dData setLength: size + dst - beg]; } } break; @@ -495,8 +520,7 @@ parseCharacterSet(NSString *token) [dData setLength: size + (end - src)]; dst = (unsigned char*)[dData mutableBytes]; memcpy(&dst[size], src, (end - src)); - size += (end - src); - [dData setLength: size]; + [dData setLength: size + end - src]; break; } @@ -537,38 +561,45 @@ parseCharacterSet(NSString *token) } if ([d length] > 0) { - [data appendBytes: [d bytes] length: [d length]]; - bytes = (unsigned char*)[data mutableBytes]; - dataEnd = [data length]; - while (inBody == NO) + if (inBody == NO) { - if ([self _unfoldHeader] == NO) - { - return YES; /* Needs more data to fill line. */ - } - if (inBody == NO) - { - NSString *header; + [data appendBytes: [d bytes] length: [d length]]; + bytes = (unsigned char*)[data mutableBytes]; + dataEnd = [data length]; - header = [self _decodeHeader]; - if (header == nil) + while (inBody == NO) + { + if ([self _unfoldHeader] == NO) { - return NO; /* Couldn't handle word encodings. */ + return YES; /* Needs more data to fill line. */ } - if ([self parseHeader: header] == NO) + if (inBody == NO) { - return NO; /* Header was not parsed properly. */ + NSString *header; + + header = [self _decodeHeader]; + if (header == nil) + { + return NO; /* Couldn't handle words. */ + } + if ([self parseHeader: header] == NO) + { + return NO; /* Header not parsed properly. */ + } } } + /* + * All headers have been parsed, so we empty our internal buffer + * (which we will now use to store decoded data) and place unused + * information back in the incoming data object to act as input. + */ + d = AUTORELEASE([data copy]); + [data setLength: 0]; } - /* - * If we have a multipart document, we must feed the data to - * a child parser to decode the subsidiary parts. - */ - if (boundary != nil) + if ([d length] > 0) { - [self _decodeBody]; + [self _decodeBody: d]; } return YES; /* Want more data for body */ } @@ -578,7 +609,7 @@ parseCharacterSet(NSString *token) if (inBody == YES) { - result = [self _decodeBody]; + result = [self _decodeBody: d]; } else { @@ -609,27 +640,35 @@ parseCharacterSet(NSString *token) */ [info setObject: [scanner string] forKey: @"RawHeader"]; + /* + * Special case - permit web response status line to act like a header. + */ + if ([scanner scanString: @"HTTP" intoString: &name] == NO + || [scanner scanString: @"/" intoString: 0] == NO) + { + if ([scanner scanUpToString: @":" intoString: &name] == NO) + { + NSLog(@"Not a valid header (%@)", [scanner string]); + return NO; + } + /* + * Position scanner after colon and any white space. + */ + if ([scanner scanString: @":" intoString: 0] == NO) + { + NSLog(@"No colon terminating name in header (%@)", [scanner string]); + return NO; + } + } + /* * Store the Raw header name and a lowercase version too. */ - if ([scanner scanUpToString: @":" intoString: &name] == NO) - { - NSLog(@"No colon terminated name in header (%@)", [scanner string]); - return NO; - } name = [name stringByTrimmingTailSpaces]; [info setObject: name forKey: @"BaseName"]; name = [name lowercaseString]; [info setObject: name forKey: @"Name"]; - /* - * Position scanner after colon and any white space. - */ - if ([scanner scanString: @":" intoString: 0] == NO) - { - NSLog(@"No colon terminating name in header (%@)", [scanner string]); - return NO; - } skip = RETAIN([scanner charactersToBeSkipped]); [scanner setCharactersToBeSkipped: nil]; [scanner scanCharactersFromSet: skip intoString: 0]; @@ -658,7 +697,7 @@ parseCharacterSet(NSString *token) int majv = 0; int minv = 0; - value = [info objectForKey: @"Value"]; + value = [info objectForKey: @"BaseValue"]; if ([value length] == 0) { NSLog(@"Missing value for mime-version header"); @@ -819,16 +858,45 @@ parseCharacterSet(NSString *token) /* * Now see if we are interested in any of it. */ - if ([name isEqualToString: @"mime-version"] == YES) + if ([name isEqualToString: @"http"] == YES) { - value = [self scanToken: scanner]; - if ([value length] == 0) + int major; + int minor; + int status; + + if ([scanner scanInt: &major] == NO || major < 0) { - NSLog(@"Bad value for mime-version header"); + NSLog(@"Bad value for http major version"); return NO; } + if ([scanner scanString: @"." intoString: 0] == NO) + { + NSLog(@"Bad format for http version"); + return NO; + } + if ([scanner scanInt: &minor] == NO || minor < 0) + { + NSLog(@"Bad value for http minor version"); + return NO; + } + if ([scanner scanInt: &status] == NO || status < 0) + { + NSLog(@"Bad value for http status"); + return NO; + } + [info setObject: [NSString stringWithFormat: @"%d", major] + forKey: @"HttpMajorVersion"]; + [info setObject: [NSString stringWithFormat: @"%d", minor] + forKey: @"HttpMinorVersion"]; + [info setObject: [NSString stringWithFormat: @"%d.%d", major, minor] + forKey: @"HttpVersion"]; + [info setObject: [NSString stringWithFormat: @"%d", status] + forKey: @"HttpStatus"]; + [self scanPastSpace: scanner]; + value = [[scanner string] substringFromIndex: [scanner scanLocation]]; } - else if ([name isEqualToString: @"content-transfer-encoding"] == YES) + else if ([name isEqualToString: @"content-transfer-encoding"] == YES + || [name isEqualToString: @"transfer-encoding"] == YES) { value = [self scanToken: scanner]; if ([value length] == 0) @@ -966,20 +1034,25 @@ parseCharacterSet(NSString *token) return YES; } -- (NSString*) scanSpecial: (NSScanner*)scanner +- (BOOL) scanPastSpace: (NSScanner*)scanner { NSCharacterSet *skip; + BOOL scanned; + + skip = RETAIN([scanner charactersToBeSkipped]); + [scanner setCharactersToBeSkipped: nil]; + scanned = [scanner scanCharactersFromSet: skip intoString: 0]; + [scanner setCharactersToBeSkipped: skip]; + RELEASE(skip); + return scanned; +} + +- (NSString*) scanSpecial: (NSScanner*)scanner +{ unsigned location; unichar c; - /* - * Move past white space. - */ - skip = RETAIN([scanner charactersToBeSkipped]); - [scanner setCharactersToBeSkipped: nil]; - [scanner scanCharactersFromSet: skip intoString: 0]; - [scanner setCharactersToBeSkipped: skip]; - RELEASE(skip); + [self scanPastSpace: scanner]; /* * Now return token delimiter (may be whitespace) @@ -1231,7 +1304,7 @@ parseCharacterSet(NSString *token) return hdr; } -- (BOOL) _decodeBody +- (BOOL) _decodeBody: (NSData*)d { if (boundary == nil) { @@ -1247,52 +1320,51 @@ parseCharacterSet(NSString *token) } else { - unsigned length = [data length]; - NSMutableData *decoded = [NSMutableData dataWithCapacity: length]; + if (context->atEnd == YES) + { + if ([d length] > 0) + { + NSLog(@"Additional data ignored after parse complete"); + } + return YES; /* Nothing more to do */ + } - [self decodeData: data - fromRange: NSMakeRange(0, length) - intoData: decoded + [self decodeData: d + fromRange: NSMakeRange(0, [d length]) + intoData: data withContext: context]; - if (context->pos != 0) - { - context->atEnd = YES; - [self decodeData: nil - fromRange: NSMakeRange(0, 0) - intoData: decoded - withContext: context]; - } - /* - * If no content type is supplied, we assume text. - */ - if (type == nil || [type isEqualToString: @"text"] == YES) - { - NSDictionary *params; - NSString *charset; - NSStringEncoding stringEncoding; - NSString *string; - - /* - * Assume that content type is best represented as NSString. - */ - params = [typeInfo objectForKey: @"Parameters"]; - charset = [params objectForKey: @"charset"]; - stringEncoding = parseCharacterSet(charset); - string = [[NSString alloc] initWithData: decoded - encoding: stringEncoding]; - [document setContent: string]; - RELEASE(string); - } - else + if (context->atEnd == YES) { /* - * Assume that any non-text content type is best - * represented as NSData. + * If no content type is supplied, we assume text. */ - decoded = [decoded copy]; /* Ensure it's immutable */ - [document setContent: decoded]; - RELEASE(decoded); + if (type == nil || [type isEqualToString: @"text"] == YES) + { + NSDictionary *params; + NSString *charset; + NSStringEncoding stringEncoding; + NSString *string; + + /* + * Assume that content type is best represented as NSString. + */ + params = [typeInfo objectForKey: @"Parameters"]; + charset = [params objectForKey: @"charset"]; + stringEncoding = parseCharacterSet(charset); + string = [[NSString alloc] initWithData: data + encoding: stringEncoding]; + [document setContent: string]; + RELEASE(string); + } + else + { + /* + * Assume that any non-text content type is best + * represented as NSData. + */ + [document setContent: AUTORELEASE([data copy])]; + } } return YES; } @@ -1304,6 +1376,10 @@ parseCharacterSet(NSString *token) unsigned char bInit = bBytes[0]; BOOL done = NO; + [data appendBytes: [d bytes] length: [d length]]; + bytes = (unsigned char*)[data mutableBytes]; + dataEnd = [data length]; + while (done == NO) { /* @@ -1367,10 +1443,11 @@ parseCharacterSet(NSString *token) if ([child parse: d] == YES && [child parse: nil] == YES) { NSMutableArray *a; + GSMimeDocument *doc; /* * Store the document produced by the child, and - * create anew parser for the next section. + * create a new parser for the next section. */ a = [document content]; if (a == nil) @@ -1379,7 +1456,11 @@ parseCharacterSet(NSString *token) [document setContent: a]; RELEASE(a); } - [a addObject: [child document]]; + doc = [child document]; + if (doc != nil) + { + [a addObject: doc]; + } RELEASE(child); child = [GSMimeParser new]; } @@ -1533,6 +1614,11 @@ parseCharacterSet(NSString *token) return content; } +- (id) copyWithZone: (NSZone*)z +{ + return RETAIN(self); +} + - (void) dealloc { RELEASE(headers);