mirror of
https://github.com/gnustep/libs-base.git
synced 2025-05-31 16:50:58 +00:00
Be aware of unicode BOM in UTF8 data.
git-svn-id: svn+ssh://svn.gna.org/svn/gnustep/libs/base/trunk@14639 72102866-910b-0410-8b05-ffd578937521
This commit is contained in:
parent
72d997aa58
commit
89663963e2
2 changed files with 47 additions and 8 deletions
|
@ -4,7 +4,8 @@
|
||||||
is released properly on failure, and we don't generate log messages
|
is released properly on failure, and we don't generate log messages
|
||||||
where a lower level API should be doing it. Added some documentation.
|
where a lower level API should be doing it. Added some documentation.
|
||||||
* Source/NSDictionary.m: ditto
|
* Source/NSDictionary.m: ditto
|
||||||
* Source/NSString.m: ditto
|
* Source/NSString.m: ditto. Also add support for understanding the
|
||||||
|
unicode BOM at the start of UTF8 data and stripping it.
|
||||||
* Source/NSData.m: Tidied read from and write to file,
|
* Source/NSData.m: Tidied read from and write to file,
|
||||||
adding lots of logging information. Also documented quite a bit.
|
adding lots of logging information. Also documented quite a bit.
|
||||||
Resolved all conflicts found with Adam's change ... generally in
|
Resolved all conflicts found with Adam's change ... generally in
|
||||||
|
|
|
@ -1135,6 +1135,14 @@ handle_printf_atsign (FILE *stream,
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Initialises the receiver with the supplied data, using the
|
||||||
|
* specified encoding.<br />
|
||||||
|
* For NSUnicodeStringEncoding and NSUTF8String encoding, a Byte Order
|
||||||
|
* Marker (if present at the start of the data) is removed automatically.<br />
|
||||||
|
* If the data can not be interpreted using the encoding, the receiver
|
||||||
|
* is released and nil is returned.
|
||||||
|
*/
|
||||||
- (id) initWithData: (NSData*)data
|
- (id) initWithData: (NSData*)data
|
||||||
encoding: (NSStringEncoding)encoding
|
encoding: (NSStringEncoding)encoding
|
||||||
{
|
{
|
||||||
|
@ -1161,8 +1169,18 @@ handle_printf_atsign (FILE *stream,
|
||||||
}
|
}
|
||||||
else if (encoding == NSUTF8StringEncoding)
|
else if (encoding == NSUTF8StringEncoding)
|
||||||
{
|
{
|
||||||
const char *bytes = [data bytes];
|
const unsigned char *bytes = [data bytes];
|
||||||
unsigned i = 0;
|
unsigned i = 0;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* If the data begins with the UTF8 Byte Order Marker (as a
|
||||||
|
* signature for UTF8 data) we must remove it.
|
||||||
|
*/
|
||||||
|
if (len > 2 && bytes[0] == 0xEF && bytes[1] == 0xBB && bytes[2] == 0xBF)
|
||||||
|
{
|
||||||
|
len -= 3;
|
||||||
|
bytes += 3;
|
||||||
|
}
|
||||||
|
|
||||||
if (_ByteEncodingOk)
|
if (_ByteEncodingOk)
|
||||||
{
|
{
|
||||||
|
@ -1172,7 +1190,7 @@ handle_printf_atsign (FILE *stream,
|
||||||
*/
|
*/
|
||||||
while (i < len)
|
while (i < len)
|
||||||
{
|
{
|
||||||
if (((unsigned char*)bytes)[i] > 127)
|
if ((bytes)[i] > 127)
|
||||||
{
|
{
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
@ -1280,12 +1298,22 @@ handle_printf_atsign (FILE *stream,
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Initialises the receiver with the contents of the file at path.<br />
|
* <p>Initialises the receiver with the contents of the file at path.
|
||||||
* Invokes [NSData-initWithContentsOfFile:] to read the file, then
|
* </p>
|
||||||
|
* <p>Invokes [NSData-initWithContentsOfFile:] to read the file, then
|
||||||
* examines the data to infer its encoding type, and converts the
|
* examines the data to infer its encoding type, and converts the
|
||||||
* data to a string using -initWithData:encoding:<br />
|
* data to a string using -initWithData:encoding:
|
||||||
* Releases the receiver and returns nil if the file could not be read
|
* </p>
|
||||||
|
* <p>The encoding to use is determined as follows ... if the data begins
|
||||||
|
* with the 16-bit unicode Byte Order Marker, then it is assumed to be
|
||||||
|
* unicode data in the appropriate ordering and converted as such.<br />
|
||||||
|
* If it begins with a UTF8 representation of the BOM, the UTF8 encoding
|
||||||
|
* is used.<br />
|
||||||
|
* Otherwise, the default C String encoding is used.
|
||||||
|
* </p>
|
||||||
|
* <p>Releases the receiver and returns nil if the file could not be read
|
||||||
* and converted to a string.
|
* and converted to a string.
|
||||||
|
* </p>
|
||||||
*/
|
*/
|
||||||
- (id) initWithContentsOfFile: (NSString*)path
|
- (id) initWithContentsOfFile: (NSString*)path
|
||||||
{
|
{
|
||||||
|
@ -1303,6 +1331,8 @@ handle_printf_atsign (FILE *stream,
|
||||||
len = [d length];
|
len = [d length];
|
||||||
if (len == 0)
|
if (len == 0)
|
||||||
{
|
{
|
||||||
|
RELEASE(d);
|
||||||
|
RELEASE(self);
|
||||||
return @"";
|
return @"";
|
||||||
}
|
}
|
||||||
test = [d bytes];
|
test = [d bytes];
|
||||||
|
@ -1313,6 +1343,10 @@ handle_printf_atsign (FILE *stream,
|
||||||
/* somebody set up us the BOM! */
|
/* somebody set up us the BOM! */
|
||||||
enc = NSUnicodeStringEncoding;
|
enc = NSUnicodeStringEncoding;
|
||||||
}
|
}
|
||||||
|
else if (len > 2 && test[0] == 0xEF && test[1] == 0xBB && test[2] == 0xBF)
|
||||||
|
{
|
||||||
|
enc = NSUTF8StringEncoding;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
self = [self initWithData: d encoding: enc];
|
self = [self initWithData: d encoding: enc];
|
||||||
RELEASE(d);
|
RELEASE(d);
|
||||||
|
@ -1348,6 +1382,10 @@ handle_printf_atsign (FILE *stream,
|
||||||
{
|
{
|
||||||
enc = NSUnicodeStringEncoding;
|
enc = NSUnicodeStringEncoding;
|
||||||
}
|
}
|
||||||
|
else if (len > 2 && test[0] == 0xEF && test[1] == 0xBB && test[2] == 0xBF)
|
||||||
|
{
|
||||||
|
enc = NSUTF8StringEncoding;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
self = [self initWithData: d encoding: enc];
|
self = [self initWithData: d encoding: enc];
|
||||||
if (self == nil)
|
if (self == nil)
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue