Improve handling of character sets in multipart/form-data

git-svn-id: svn+ssh://svn.gna.org/svn/gnustep/libs/base/trunk@19428 72102866-910b-0410-8b05-ffd578937521
This commit is contained in:
Richard Frith-Macdonald 2004-05-30 09:05:10 +00:00
parent a1e4d0770c
commit 955690403e
4 changed files with 79 additions and 20 deletions

View file

@ -1,3 +1,11 @@
2004-05-30 Richard Frith-Macdonald <rfm@gnu.org>
* Source/GSMime.m: Add support for setting default characterset for
text elements ... needed for parsing multipart/form-data where the
charset is not set in individual parts of the form.
* Source/GSXML.h: tempoorarily disable sax2 support ... seem it
doesn't work reliably.
2004-05-28 Adam Fedor <fedor@gnu.org>
* configure.ac: Check for kvm access and use it if available.

View file

@ -173,6 +173,7 @@
GSMimeDocument *document;
GSMimeParser *child;
GSMimeCodingContext *context;
NSStringEncoding _defaultEncoding;
}
+ (GSMimeDocument*) documentFromData: (NSData*)mimeData;
@ -198,6 +199,7 @@
- (NSString*) scanSpecial: (NSScanner*)scanner;
- (NSString*) scanToken: (NSScanner*)scanner;
- (void) setBuggyQuotes: (BOOL)flag;
- (void) setDefaultCharset: (NSString*)aName;
- (void) setIsHttp;
@end

View file

@ -1121,6 +1121,7 @@ wordData(NSString *word)
{
data = [[NSMutableData alloc] init];
document = [[GSMimeDocument alloc] init];
_defaultEncoding = NSASCIIStringEncoding;
}
return self;
}
@ -1930,6 +1931,16 @@ NSDebugMLLog(@"GSMime", @"Header parsed - %@", info);
}
}
/**
* Method to inform the parser that body parts with no content-type
* header (which are treated as text/plain) should use the specified
* characterset rather than the default (us-ascii)
*/
- (void) setDefaultCharset: (NSString*)aName
{
_defaultEncoding = [GSMimeDocument encodingFromCharset: aName];
}
/**
* Method to inform the parser that the data it is parsing is an HTTP
* document rather than true MIME. This method is called internally
@ -1943,6 +1954,23 @@ NSDebugMLLog(@"GSMime", @"Header parsed - %@", info);
@end
@implementation GSMimeParser (Private)
/*
* Make a new child to parse a subsidiary document
*/
- (void) _child
{
DESTROY(child);
child = [GSMimeParser new];
if (flags.buggyQuotes == 1)
{
[child setBuggyQuotes: YES];
}
/*
* Tell child parser the default encoding to use.
*/
child->_defaultEncoding = _defaultEncoding;
}
/*
* This method takes the raw data of an unfolded header line, and handles
* Method to inform the parser that the data it is parsing is an HTTP
@ -2208,16 +2236,24 @@ NSDebugMLLog(@"GSMime", @"Header parsed - %@", info);
if ([type isEqualToString: @"text"] == YES)
{
NSString *charset;
NSStringEncoding stringEncoding;
NSString *string;
if (typeInfo == nil)
{
stringEncoding = _defaultEncoding;
}
else
{
NSString *charset;
charset = [typeInfo parameterForKey: @"charset"];
stringEncoding
= [GSMimeDocument encodingFromCharset: charset];
}
/*
* Assume that content type is best represented as NSString.
*/
charset = [typeInfo parameterForKey: @"charset"];
stringEncoding
= [GSMimeDocument encodingFromCharset: charset];
string = [[NSString alloc] initWithData: data
encoding: stringEncoding];
[document setContent: string];
@ -2298,17 +2334,28 @@ NSDebugMLLog(@"GSMime", @"Header parsed - %@", info);
}
else if (child == nil)
{
NSString *cset;
/*
* Found boundary at the start of the first section.
* Set sectionStart to point immediately after boundary.
*/
lineStart += bLength;
sectionStart = lineStart;
child = [GSMimeParser new];
if (flags.buggyQuotes == 1)
/*
* If we have an explicit character set for the multipart
* document, we set it as the default characterset inherited
* by any child documents.
*/
cset = [[document headerNamed: @"content-type"]
parameterForKey: @"charset"];
if (cset != nil)
{
[child setBuggyQuotes: YES];
[self setDefaultCharset: cset];
}
[self _child];
}
else
{
@ -2372,13 +2419,12 @@ NSDebugMLLog(@"GSMime", @"Header parsed - %@", info);
if (doc != nil)
{
[document addContent: doc];
if ([doc headerNamed: @"content-type"] == nil)
{
NSLog(@"Content: %@", [[doc convertToText] dataUsingEncoding: NSUnicodeStringEncoding]);
}
}
RELEASE(child);
child = [GSMimeParser new];
if (flags.buggyQuotes == 1)
{
[child setBuggyQuotes: YES];
}
[self _child];
}
else
{
@ -2386,12 +2432,7 @@ NSDebugMLLog(@"GSMime", @"Header parsed - %@", info);
* Section failed to decode properly!
*/
NSLog(@"Failed to decode section of multipart");
RELEASE(child);
child = [GSMimeParser new];
if (flags.buggyQuotes == 1)
{
[child setBuggyQuotes: YES];
}
[self _child];
}
/*

View file

@ -60,6 +60,8 @@
#include <Foundation/Foundation.h>
#endif
#undef HAVE_LIBXML_SAX2_H
/* libxml headers */
#include <libxml/tree.h>
#include <libxml/entities.h>
@ -128,6 +130,10 @@ setupCache()
if (cacheDone == NO)
{
cacheDone = YES;
#if HAVE_LIBXML_SAX2_H
xmlDefaultSAXHandlerInit();
#endif
NSString_class = [NSString class];
usSel = @selector(stringWithUTF8String:);
usImp = [NSString_class methodForSelector: usSel];
@ -2457,7 +2463,9 @@ static NSString *endMarker = @"At end of incremental parse";
+ (void) initialize
{
if (cacheDone == NO)
setupCache();
{
setupCache();
}
}
/*