mirror of
https://github.com/gnustep/libs-base.git
synced 2025-04-22 16:33:29 +00:00
Be aware of unicode BOM in UTF8 data.
git-svn-id: svn+ssh://svn.gna.org/svn/gnustep/libs/base/trunk@14639 72102866-910b-0410-8b05-ffd578937521
This commit is contained in:
parent
5dae3b400b
commit
326694568f
2 changed files with 47 additions and 8 deletions
|
@ -4,7 +4,8 @@
|
|||
is released properly on failure, and we don't generate log messages
|
||||
where a lower level API should be doing it. Added some documentation.
|
||||
* Source/NSDictionary.m: ditto
|
||||
* Source/NSString.m: ditto
|
||||
* Source/NSString.m: ditto. Also add support for understanding the
|
||||
unicode BOM at the start of UTF8 data and stripping it.
|
||||
* Source/NSData.m: Tidied read from and write to file,
|
||||
adding lots of logging information. Also documented quite a bit.
|
||||
Resolved all conflicts found with Adam's change ... generally in
|
||||
|
|
|
@ -1135,6 +1135,14 @@ handle_printf_atsign (FILE *stream,
|
|||
}
|
||||
#endif
|
||||
|
||||
/**
|
||||
* Initialises the receiver with the supplied data, using the
|
||||
* specified encoding.<br />
|
||||
* For NSUnicodeStringEncoding and NSUTF8String encoding, a Byte Order
|
||||
* Marker (if present at the start of the data) is removed automatically.<br />
|
||||
* If the data can not be interpreted using the encoding, the receiver
|
||||
* is released and nil is returned.
|
||||
*/
|
||||
- (id) initWithData: (NSData*)data
|
||||
encoding: (NSStringEncoding)encoding
|
||||
{
|
||||
|
@ -1161,8 +1169,18 @@ handle_printf_atsign (FILE *stream,
|
|||
}
|
||||
else if (encoding == NSUTF8StringEncoding)
|
||||
{
|
||||
const char *bytes = [data bytes];
|
||||
unsigned i = 0;
|
||||
const unsigned char *bytes = [data bytes];
|
||||
unsigned i = 0;
|
||||
|
||||
/*
|
||||
* If the data begins with the UTF8 Byte Order Marker (as a
|
||||
* signature for UTF8 data) we must remove it.
|
||||
*/
|
||||
if (len > 2 && bytes[0] == 0xEF && bytes[1] == 0xBB && bytes[2] == 0xBF)
|
||||
{
|
||||
len -= 3;
|
||||
bytes += 3;
|
||||
}
|
||||
|
||||
if (_ByteEncodingOk)
|
||||
{
|
||||
|
@ -1172,7 +1190,7 @@ handle_printf_atsign (FILE *stream,
|
|||
*/
|
||||
while (i < len)
|
||||
{
|
||||
if (((unsigned char*)bytes)[i] > 127)
|
||||
if ((bytes)[i] > 127)
|
||||
{
|
||||
break;
|
||||
}
|
||||
|
@ -1280,12 +1298,22 @@ handle_printf_atsign (FILE *stream,
|
|||
}
|
||||
|
||||
/**
|
||||
* Initialises the receiver with the contents of the file at path.<br />
|
||||
* Invokes [NSData-initWithContentsOfFile:] to read the file, then
|
||||
* <p>Initialises the receiver with the contents of the file at path.
|
||||
* </p>
|
||||
* <p>Invokes [NSData-initWithContentsOfFile:] to read the file, then
|
||||
* examines the data to infer its encoding type, and converts the
|
||||
* data to a string using -initWithData:encoding:<br />
|
||||
* Releases the receiver and returns nil if the file could not be read
|
||||
* data to a string using -initWithData:encoding:
|
||||
* </p>
|
||||
* <p>The encoding to use is determined as follows ... if the data begins
|
||||
* with the 16-bit unicode Byte Order Marker, then it is assumed to be
|
||||
* unicode data in the appropriate ordering and converted as such.<br />
|
||||
* If it begins with a UTF8 representation of the BOM, the UTF8 encoding
|
||||
* is used.<br />
|
||||
* Otherwise, the default C String encoding is used.
|
||||
* </p>
|
||||
* <p>Releases the receiver and returns nil if the file could not be read
|
||||
* and converted to a string.
|
||||
* </p>
|
||||
*/
|
||||
- (id) initWithContentsOfFile: (NSString*)path
|
||||
{
|
||||
|
@ -1303,6 +1331,8 @@ handle_printf_atsign (FILE *stream,
|
|||
len = [d length];
|
||||
if (len == 0)
|
||||
{
|
||||
RELEASE(d);
|
||||
RELEASE(self);
|
||||
return @"";
|
||||
}
|
||||
test = [d bytes];
|
||||
|
@ -1313,6 +1343,10 @@ handle_printf_atsign (FILE *stream,
|
|||
/* somebody set up us the BOM! */
|
||||
enc = NSUnicodeStringEncoding;
|
||||
}
|
||||
else if (len > 2 && test[0] == 0xEF && test[1] == 0xBB && test[2] == 0xBF)
|
||||
{
|
||||
enc = NSUTF8StringEncoding;
|
||||
}
|
||||
}
|
||||
self = [self initWithData: d encoding: enc];
|
||||
RELEASE(d);
|
||||
|
@ -1348,6 +1382,10 @@ handle_printf_atsign (FILE *stream,
|
|||
{
|
||||
enc = NSUnicodeStringEncoding;
|
||||
}
|
||||
else if (len > 2 && test[0] == 0xEF && test[1] == 0xBB && test[2] == 0xBF)
|
||||
{
|
||||
enc = NSUTF8StringEncoding;
|
||||
}
|
||||
}
|
||||
self = [self initWithData: d encoding: enc];
|
||||
if (self == nil)
|
||||
|
|
Loading…
Reference in a new issue