mirror of
https://github.com/gnustep/libs-base.git
synced 2025-04-22 16:33:29 +00:00
Simplify conversion of a string to data (use base class code) and fix to handle the insertion of BOM consistently (as on OSX) for encodings where the byte order matters but is not specified by the encoding.
This commit is contained in:
parent
9aa17b452c
commit
bd5f2909e6
3 changed files with 31 additions and 171 deletions
|
@ -482,6 +482,10 @@ GS_EXPORT_CLASS
|
|||
// Working With Encodings
|
||||
- (BOOL) canBeConvertedToEncoding: (NSStringEncoding)encoding;
|
||||
- (NSData*) dataUsingEncoding: (NSStringEncoding)encoding;
|
||||
/** Conversion to an encoding where byte order matters but is not specified
|
||||
* (NSUnicodeStringEncoding, NSUTF16StringEncoding, NSUTF32StringEncoding)
|
||||
* produces data with a Byte Order Marker (BOM) at the start of the data.
|
||||
*/
|
||||
- (NSData*) dataUsingEncoding: (NSStringEncoding)encoding
|
||||
allowLossyConversion: (BOOL)flag;
|
||||
+ (NSStringEncoding) defaultCStringEncoding;
|
||||
|
|
|
@ -2379,133 +2379,6 @@ cStringLength_u(GSStr self, NSStringEncoding enc)
|
|||
}
|
||||
}
|
||||
|
||||
static inline NSData*
|
||||
dataUsingEncoding_c(GSStr self, NSStringEncoding encoding, BOOL lossy)
|
||||
{
|
||||
unsigned len = self->_count;
|
||||
|
||||
if (len == 0)
|
||||
{
|
||||
return [NSDataClass data];
|
||||
}
|
||||
|
||||
if ((encoding == internalEncoding)
|
||||
|| ((internalEncoding == NSASCIIStringEncoding)
|
||||
&& (encoding == NSUTF8StringEncoding || isByteEncoding(encoding))))
|
||||
{
|
||||
unsigned char *buff;
|
||||
|
||||
buff = (unsigned char*)NSZoneMalloc(NSDefaultMallocZone(), len);
|
||||
memcpy(buff, self->_contents.c, len);
|
||||
return [NSDataClass dataWithBytesNoCopy: buff length: len];
|
||||
}
|
||||
else if (encoding == NSUnicodeStringEncoding)
|
||||
{
|
||||
unsigned int l = 0;
|
||||
unichar *r = 0;
|
||||
unsigned int options = GSUniBOM;
|
||||
|
||||
if (lossy == NO)
|
||||
{
|
||||
options |= GSUniStrict;
|
||||
}
|
||||
|
||||
if (GSToUnicode(&r, &l, self->_contents.c, self->_count, internalEncoding,
|
||||
NSDefaultMallocZone(), options) == NO)
|
||||
{
|
||||
return nil;
|
||||
}
|
||||
return [NSDataClass dataWithBytesNoCopy: r length: l * sizeof(unichar)];
|
||||
}
|
||||
else
|
||||
{
|
||||
unichar *u = 0;
|
||||
unsigned l = 0;
|
||||
unsigned char *r = 0;
|
||||
unsigned s = 0;
|
||||
|
||||
if (GSToUnicode(&u, &l, self->_contents.c, self->_count, internalEncoding,
|
||||
NSDefaultMallocZone(), 0) == NO)
|
||||
{
|
||||
[NSException raise: NSCharacterConversionException
|
||||
format: @"Can't convert to Unicode string."];
|
||||
}
|
||||
if (GSFromUnicode(&r, &s, u, l, encoding, NSDefaultMallocZone(),
|
||||
(lossy == NO) ? GSUniStrict : 0) == NO)
|
||||
{
|
||||
NSZoneFree(NSDefaultMallocZone(), u);
|
||||
return nil;
|
||||
}
|
||||
NSZoneFree(NSDefaultMallocZone(), u);
|
||||
return [NSDataClass dataWithBytesNoCopy: r length: s];
|
||||
}
|
||||
}
|
||||
|
||||
static inline NSData*
|
||||
dataUsingEncoding_u(GSStr self, NSStringEncoding encoding, BOOL lossy)
|
||||
{
|
||||
unsigned len = self->_count;
|
||||
|
||||
if (len == 0)
|
||||
{
|
||||
return [NSDataClass data];
|
||||
}
|
||||
|
||||
if (encoding == NSUnicodeStringEncoding)
|
||||
{
|
||||
unichar *buff;
|
||||
unsigned l;
|
||||
unsigned from = 0;
|
||||
unsigned to = 1;
|
||||
|
||||
if ((l = GSUnicode(self->_contents.u, len, 0, 0)) != len)
|
||||
{
|
||||
if (lossy == NO)
|
||||
{
|
||||
return nil;
|
||||
}
|
||||
}
|
||||
buff = (unichar*)NSZoneMalloc(NSDefaultMallocZone(),
|
||||
sizeof(unichar)*(len+1));
|
||||
buff[0] = 0xFEFF;
|
||||
|
||||
while (len > 0)
|
||||
{
|
||||
if (l > 0)
|
||||
{
|
||||
memcpy(buff + to, self->_contents.u + from, sizeof(unichar)*l);
|
||||
from += l;
|
||||
to += l;
|
||||
len -= l;
|
||||
}
|
||||
if (len > 0)
|
||||
{
|
||||
// A bad character in the string ... skip it.
|
||||
if (--len > 0)
|
||||
{
|
||||
// Not at end ... try another batch.
|
||||
from++;
|
||||
l = GSUnicode(self->_contents.u + from, len, 0, 0);
|
||||
}
|
||||
}
|
||||
}
|
||||
return [NSDataClass dataWithBytesNoCopy: buff
|
||||
length: sizeof(unichar)*to];
|
||||
}
|
||||
else
|
||||
{
|
||||
unsigned char *r = 0;
|
||||
unsigned int l = 0;
|
||||
|
||||
if (GSFromUnicode(&r, &l, self->_contents.u, self->_count, encoding,
|
||||
NSDefaultMallocZone(), (lossy == NO) ? GSUniStrict : 0) == NO)
|
||||
{
|
||||
return nil;
|
||||
}
|
||||
return [NSDataClass dataWithBytesNoCopy: r length: l];
|
||||
}
|
||||
}
|
||||
|
||||
static inline void
|
||||
fillHole(GSStr self, unsigned index, unsigned size)
|
||||
{
|
||||
|
@ -3830,12 +3703,6 @@ agree, create a new GSCInlineString otherwise.
|
|||
return cStringLength_c((GSStr)self, externalEncoding);
|
||||
}
|
||||
|
||||
- (NSData*) dataUsingEncoding: (NSStringEncoding)encoding
|
||||
allowLossyConversion: (BOOL)flag
|
||||
{
|
||||
return dataUsingEncoding_c((GSStr)self, encoding, flag);
|
||||
}
|
||||
|
||||
- (void) encodeWithCoder: (NSCoder*)aCoder
|
||||
{
|
||||
if ([aCoder allowsKeyedCoding])
|
||||
|
@ -4196,12 +4063,6 @@ agree, create a new GSCInlineString otherwise.
|
|||
return cStringLength_u((GSStr)self, externalEncoding);
|
||||
}
|
||||
|
||||
- (NSData*) dataUsingEncoding: (NSStringEncoding)encoding
|
||||
allowLossyConversion: (BOOL)flag
|
||||
{
|
||||
return dataUsingEncoding_u((GSStr)self, encoding, flag);
|
||||
}
|
||||
|
||||
- (void) encodeWithCoder: (NSCoder*)aCoder
|
||||
{
|
||||
if ([aCoder allowsKeyedCoding])
|
||||
|
@ -4690,15 +4551,6 @@ agree, create a new GSUInlineString otherwise.
|
|||
return cStringLength_c((GSStr)self, externalEncoding);
|
||||
}
|
||||
|
||||
- (NSData*) dataUsingEncoding: (NSStringEncoding)encoding
|
||||
allowLossyConversion: (BOOL)flag
|
||||
{
|
||||
if (_flags.wide == 1)
|
||||
return dataUsingEncoding_u((GSStr)self, encoding, flag);
|
||||
else
|
||||
return dataUsingEncoding_c((GSStr)self, encoding, flag);
|
||||
}
|
||||
|
||||
- (void) dealloc
|
||||
{
|
||||
if (_contents.c != 0)
|
||||
|
@ -5829,11 +5681,6 @@ literalIsEqual(NXConstantString *self, id anObject)
|
|||
BOOL latin1;
|
||||
unsigned length;
|
||||
|
||||
if (0 == nxcslen)
|
||||
{
|
||||
return [NSDataClass data];
|
||||
}
|
||||
|
||||
/* Check what is actually in this string ... if it's corrupt an exception
|
||||
* is raised.
|
||||
*/
|
||||
|
|
|
@ -4126,30 +4126,22 @@ GSICUCollatorOpen(NSStringCompareOptions mask, NSLocale *locale)
|
|||
unsigned len = [self length];
|
||||
NSData *d;
|
||||
|
||||
if (len == 0)
|
||||
{
|
||||
d = [NSDataClass data];
|
||||
}
|
||||
else if (encoding == NSUnicodeStringEncoding)
|
||||
if (NSUnicodeStringEncoding == encoding)
|
||||
{
|
||||
unichar *u;
|
||||
unsigned l;
|
||||
|
||||
/* Fast path for Unicode (UTF16) without a specific byte order,
|
||||
* where we must prepend a byte order mark.
|
||||
* The case for UTF32 is handled in the slower branch.
|
||||
*/
|
||||
u = (unichar*)NSZoneMalloc(NSDefaultMallocZone(),
|
||||
(len + 1) * sizeof(unichar));
|
||||
*u = byteOrderMark;
|
||||
[self getCharacters: u + 1];
|
||||
l = GSUnicode(u, len, 0, 0);
|
||||
if (l == len || flag == YES)
|
||||
{
|
||||
d = [NSDataClass dataWithBytesNoCopy: u
|
||||
length: (l + 1) * sizeof(unichar)];
|
||||
}
|
||||
else
|
||||
{
|
||||
d = nil;
|
||||
NSZoneFree(NSDefaultMallocZone(), u);
|
||||
}
|
||||
d = [NSDataClass dataWithBytesNoCopy: u
|
||||
length: (l + 1) * sizeof(unichar)];
|
||||
}
|
||||
else
|
||||
{
|
||||
|
@ -4164,11 +4156,28 @@ GSICUCollatorOpen(NSStringCompareOptions mask, NSLocale *locale)
|
|||
* We can then use our concrete subclass implementation to do the
|
||||
* work of converting to the desired encoding.
|
||||
*/
|
||||
if (len >= 4096)
|
||||
if (NSUTF32StringEncoding == encoding)
|
||||
{
|
||||
u = NSZoneMalloc(NSDefaultMallocZone(), len * sizeof(unichar));
|
||||
/* For UTF32 without byte order specified, we must include a
|
||||
* BOM at the start of the data.
|
||||
*/
|
||||
len++;
|
||||
if (len >= 4096)
|
||||
{
|
||||
u = NSZoneMalloc(NSDefaultMallocZone(), len * sizeof(unichar));
|
||||
}
|
||||
*u = byteOrderMark;
|
||||
[self getCharacters: u+1];
|
||||
}
|
||||
[self getCharacters: u];
|
||||
else
|
||||
{
|
||||
if (len >= 4096)
|
||||
{
|
||||
u = NSZoneMalloc(NSDefaultMallocZone(), len * sizeof(unichar));
|
||||
}
|
||||
[self getCharacters: u];
|
||||
}
|
||||
|
||||
if (flag == NO)
|
||||
{
|
||||
options = GSUniStrict;
|
||||
|
|
Loading…
Reference in a new issue