Low level character encoding rewrite.

git-svn-id: svn+ssh://svn.gna.org/svn/gnustep/libs/base/trunk@13133 72102866-910b-0410-8b05-ffd578937521
This commit is contained in:
Richard Frith-Macdonald 2002-03-16 09:54:50 +00:00
parent 4d9788c3f6
commit 3ee7db1018
7 changed files with 819 additions and 582 deletions

View file

@ -1,3 +1,25 @@
2002-03-16 Richard Frith-Macdonald <rfm@gnu.org>
* Headers/gnustep/base/Unicode.h: Add more options for character
encoding conversion routines.
* Source/GSPrivate.h: Add a couple of private functions for character
encoding management.
* Source/GSString.m: Convert throughoput to use new functions for
converting from one string encoding to another. Make changes to
handle setting of default C string encoding to be an encoding which
is incompatible with internal e-bit string objects.
* Source/NSString.m: ditto
* Source/Unicode.m: new string encoding conversion functions extended
with a few new options. Also fixed some memory allocation bugs to
cure memory leaks.
Rewritten low level support for different character encodings ...
should provide more efficient and maintainable conversion between
encodings and permit use of wide character encodings and encodings
with multibyte sequences as the default C string encoding.
Testing ... minimal ... we could do with decent tests for this stuff.
So this version must be viewed as possibly very unstable!
2002-03-14 Adam Fedor <fedor@gnu.org>
* Version: 1.3.0

View file

@ -63,6 +63,8 @@ GS_EXPORT unichar *uni_is_decomp(unichar u);
#define GSUniTerminate 0x01
#define GSUniTemporary 0x02
#define GSUniStrict 0x04
#define GSUniBOM 0x08
#define GSUniShortOk 0x10
GS_EXPORT BOOL GSFromUnicode(unsigned char **dst, unsigned int *size,
const unichar *src, unsigned int slen, NSStringEncoding enc, NSZone *zone,

View file

@ -24,6 +24,17 @@
#define __GSPrivate_h_
/*
* Function to get the name of a string encoding as an NSString.
*/
GS_EXPORT NSString *GSEncodingName(NSStringEncoding encoding);
/*
* Function to determine whether data in a particular encoding can
* generally be represented as 8-bit characters including ascii.
*/
GS_EXPORT BOOL GSIsByteEncoding(NSStringEncoding encoding);
/*
* Private concrete string classes.
* NB. All these concrete string classes MUST have the same initial ivar
@ -35,8 +46,8 @@
@interface GSString : NSString
{
union {
unichar *u;
unsigned char *c;
unichar *u; // 16-bit unicode characters.
unsigned char *c; // 8-bit characters.
} _contents;
unsigned int _count;
struct {

View file

@ -224,6 +224,7 @@ static SEL hashSel;
static unsigned (*hashImp)(id, SEL);
static NSStringEncoding defEnc = 0;
static NSStringEncoding intEnc = NSISOLatin1StringEncoding;
/*
* The setup() function is called when any concrete string class is
@ -277,9 +278,14 @@ setup()
ranSel = @selector(rangeOfComposedCharacterSequenceAtIndex:);
/*
* Cache the default string encoding.
* Cache the default string encoding, and set the internal encoding
* used by 8-bit character strings to match if possible.
*/
defEnc = GetDefEncoding();
if (GSIsByteEncoding(defEnc) == YES)
{
intEnc = defEnc;
}
}
}
@ -521,12 +527,12 @@ boolValue_u(ivars self)
}
else
{
unsigned len = self->_count < 10 ? self->_count : 9;
char buf[len+1];
unsigned int l = self->_count < 10 ? self->_count : 9;
unsigned char buf[l+1];
unsigned char *b = buf;
len = encode_ustrtocstr(buf, len, self->_contents.u, len, defEnc, NO);
buf[len] = '\0';
if (len == 3
GSFromUnicode(&b, &l, self->_contents.u, l, intEnc, 0, GSUniTerminate);
if (l == 3
&& (buf[0] == 'Y' || buf[0] == 'y')
&& (buf[1] == 'E' || buf[1] == 'e')
&& (buf[2] == 'S' || buf[2] == 's'))
@ -543,8 +549,10 @@ boolValue_u(ivars self)
static inline BOOL
canBeConvertedToEncoding_c(ivars self, NSStringEncoding enc)
{
if (enc == defEnc)
return YES;
if (enc == intEnc)
{
return YES;
}
else
{
BOOL result = (*convertImp)((id)self, convertSel, enc);
@ -571,7 +579,7 @@ characterAtIndex_c(ivars self, unsigned index)
c = self->_contents.c[index];
if (c > 127)
{
c = encode_chartouni(c, defEnc);
c = encode_chartouni(c, intEnc);
}
return c;
}
@ -631,13 +639,48 @@ compare_u(ivars self, NSString *aString, unsigned mask, NSRange aRange)
static inline char*
cString_c(ivars self)
{
char *r = (char*)_fastMallocBuffer(self->_count+1);
char *r;
if (self->_count > 0)
if (self->_count == 0)
{
memcpy(r, self->_contents.c, self->_count);
return "";
}
if (defEnc == intEnc)
{
r = (char*)_fastMallocBuffer(self->_count+1);
if (self->_count > 0)
{
memcpy(r, self->_contents.c, self->_count);
}
r[self->_count] = '\0';
}
else
{
unichar *u = 0;
unsigned l = 0;
unsigned s = 0;
/*
* The external C string encoding is not compatible with the internal
* C strings ... we must convert from internal format to unicode and
* then to the external C string encoding.
*/
if (GSToUnicode(&u, &l, self->_contents.c, self->_count, intEnc,
NSDefaultMallocZone(), 0) == NO)
{
[NSException raise: NSCharacterConversionException
format: @"Can't convert to/from Unicode string."];
}
if (GSFromUnicode((unsigned char**)&r, &s, u, l, defEnc,
NSDefaultMallocZone(), GSUniTerminate|GSUniTemporary|GSUniStrict) == NO)
{
NSZoneFree(NSDefaultMallocZone(), u);
[NSException raise: NSCharacterConversionException
format: @"Can't convert to/from Unicode string."];
}
NSZoneFree(NSDefaultMallocZone(), u);
}
r[self->_count] = '\0';
return r;
}
@ -645,58 +688,90 @@ cString_c(ivars self)
static inline char*
cString_u(ivars self)
{
int l = self->_count;
char *r = (char*)_fastMallocBuffer(l*2 + 1);
unsigned limit = 0;
unsigned c = self->_count;
if (l > 0)
if (c == 0)
{
limit = encode_ustrtocstr(r, l, self->_contents.u, l, defEnc, YES);
if (limit == 0)
return "";
}
else
{
unsigned int l = 0;
unsigned char *r = 0;
if (GSFromUnicode(&r, &l, self->_contents.u, c, defEnc,
NSDefaultMallocZone(), GSUniTerminate|GSUniTemporary|GSUniStrict) == NO)
{
[NSException raise: NSCharacterConversionException
format: @"Can't get cString from Unicode string."];
}
return r;
}
r[limit] = '\0';
return r;
}
static inline unsigned int
cStringLength_c(ivars self)
{
return self->_count;
if (defEnc == intEnc)
{
return self->_count;
}
else
{
/*
* The external C string encoding is not compatible with the internal
* C strings ... we must convert from internal format to unicode and
* then to the external C string encoding.
*/
if (self->_count == 0)
{
return 0;
}
else
{
unichar *u = 0;
unsigned l = 0;
unsigned s = 0;
if (GSToUnicode(&u, &l, self->_contents.c, self->_count, intEnc,
NSDefaultMallocZone(), 0) == NO)
{
[NSException raise: NSCharacterConversionException
format: @"Can't convert to/from Unicode string."];
}
if (GSFromUnicode(0, &s, u, l, defEnc, 0, GSUniStrict) == NO)
{
NSZoneFree(NSDefaultMallocZone(), u);
[NSException raise: NSCharacterConversionException
format: @"Can't get cStringLength from string."];
}
NSZoneFree(NSDefaultMallocZone(), u);
return s;
}
}
}
static inline unsigned int
cStringLength_u(ivars self)
{
unsigned c;
unsigned l = self->_count;
unsigned limit = 0;
unsigned c = self->_count;
if (l > 0)
if (c == 0)
{
char *r;
r = (char*)NSZoneMalloc(NSDefaultMallocZone(), l*2 + 1);
limit = encode_ustrtocstr(r, l, self->_contents.u, l, defEnc, NO);
if (limit == 0)
{
NSZoneFree(NSDefaultMallocZone(), r);
[NSException raise: NSCharacterConversionException
format: @"Can't get cStringLength from Unicode string."];
}
r[limit] = '\0';
c = strlen(r);
NSZoneFree(NSDefaultMallocZone(), r);
return 0;
}
else
{
c = 0;
unsigned l = 0;
if (GSFromUnicode(0, &l, self->_contents.u, c, defEnc, 0, GSUniStrict)
== NO)
{
[NSException raise: NSCharacterConversionException
format: @"Can't get cStringLength from Unicode string."];
}
return l;
}
return c;
}
static inline NSData*
@ -709,8 +784,8 @@ dataUsingEncoding_c(ivars self, NSStringEncoding encoding, BOOL flag)
return [NSDataClass data];
}
if ((encoding == defEnc)
|| ((defEnc == NSASCIIStringEncoding)
if ((encoding == intEnc)
|| ((intEnc == NSASCIIStringEncoding)
&& ((encoding == NSISOLatin1StringEncoding)
|| (encoding == NSISOLatin2StringEncoding)
|| (encoding == NSNEXTSTEPStringEncoding)
@ -724,51 +799,43 @@ dataUsingEncoding_c(ivars self, NSStringEncoding encoding, BOOL flag)
}
else if (encoding == NSUnicodeStringEncoding)
{
int t;
unichar *buff;
unsigned int l = 0;
unichar *r = 0;
unsigned int options = GSUniBOM;
buff = (unichar*)NSZoneMalloc(NSDefaultMallocZone(),
sizeof(unichar)*(len+1));
buff[0] = 0xFEFF;
t = encode_cstrtoustr(buff+1, len, self->_contents.c, len, defEnc);
return [NSDataClass dataWithBytesNoCopy: buff
length: sizeof(unichar)*(t+1)];
if (flag == NO)
{
options |= GSUniStrict;
}
if (GSToUnicode(&r, &l, self->_contents.c, self->_count, intEnc,
NSDefaultMallocZone(), options) == NO)
{
return nil;
}
return [NSDataClass dataWithBytesNoCopy: r length: l];
}
else
{
int t;
int bsiz;
unichar *ubuff;
unsigned char *buff;
unichar *u = 0;
unsigned l = 0;
unsigned char *r = 0;
unsigned s = 0;
ubuff = (unichar*)NSZoneMalloc(NSDefaultMallocZone(),
sizeof(unichar)*len);
t = encode_cstrtoustr(ubuff, len, self->_contents.c, len, defEnc);
if (encoding == NSUTF8StringEncoding)
if (GSToUnicode(&u, &l, self->_contents.c, self->_count, intEnc,
NSDefaultMallocZone(), 0) == NO)
{
bsiz = t*4;
[NSException raise: NSCharacterConversionException
format: @"Can't convert to Unicode string."];
}
else
if (GSFromUnicode(&r, &s, u, l, encoding, NSDefaultMallocZone(),
(flag == NO) ? GSUniStrict : 0) == NO)
{
bsiz = t;
}
buff = (unsigned char*)NSZoneMalloc(NSDefaultMallocZone(), bsiz);
flag = (flag == YES) ? NO : YES;
t = encode_ustrtocstr(buff, bsiz, ubuff, t, encoding, flag);
NSZoneFree(NSDefaultMallocZone(), ubuff);
if (t == 0)
{
NSZoneFree(NSDefaultMallocZone(), buff);
NSZoneFree(NSDefaultMallocZone(), u);
return nil;
}
else
{
if (t != bsiz)
{
buff = NSZoneRealloc(NSDefaultMallocZone(), buff, t);
}
return [NSDataClass dataWithBytesNoCopy: buff length: t];
}
NSZoneFree(NSDefaultMallocZone(), u);
return [NSDataClass dataWithBytesNoCopy: r length: s];
}
}
@ -795,34 +862,15 @@ dataUsingEncoding_u(ivars self, NSStringEncoding encoding, BOOL flag)
}
else
{
int t;
int bsiz;
unsigned char *buff;
unsigned char *r = 0;
unsigned int l = 0;
if (encoding == NSUTF8StringEncoding)
if (GSFromUnicode(&r, &l, self->_contents.u, self->_count, encoding,
NSDefaultMallocZone(), (flag == NO) ? GSUniStrict : 0) == NO)
{
bsiz = len*4;
}
else
{
bsiz = len;
}
buff = (unsigned char*)NSZoneMalloc(NSDefaultMallocZone(), bsiz);
flag = (flag == YES) ? NO : YES;
t = encode_ustrtocstr(buff, bsiz, self->_contents.u, len, encoding, flag);
if (t == 0)
{
NSZoneFree(NSDefaultMallocZone(), buff);
return nil;
}
else
{
if (t != bsiz)
{
buff = NSZoneRealloc(NSDefaultMallocZone(), buff, t);
}
return [NSDataClass dataWithBytesNoCopy: buff length: t];
}
return [NSDataClass dataWithBytesNoCopy: r length: l];
}
}
@ -853,11 +901,11 @@ doubleValue_u(ivars self)
}
else
{
unsigned len = self->_count < 32 ? self->_count : 31;
char buf[len+1];
unsigned int l = self->_count < 10 ? self->_count : 9;
unsigned char buf[l+1];
unsigned char *b = buf;
len = encode_ustrtocstr(buf, len, self->_contents.u, len, defEnc, NO);
buf[len] = '\0';
GSFromUnicode(&b, &l, self->_contents.u, l, intEnc, 0, GSUniTerminate);
return atof(buf);
}
}
@ -907,8 +955,10 @@ fillHole(ivars self, unsigned index, unsigned size)
static inline void
getCharacters_c(ivars self, unichar *buffer, NSRange aRange)
{
encode_cstrtoustr(buffer, aRange.length, self->_contents.c + aRange.location,
aRange.length, defEnc);
unsigned len = aRange.length;
GSToUnicode(&buffer, &len, self->_contents.c + aRange.location,
aRange.length, intEnc, 0, 0);
}
static inline void
@ -955,8 +1005,7 @@ static inline void
getCString_u(ivars self, char *buffer, unsigned int maxLength,
NSRange aRange, NSRange *leftoverRange)
{
int len;
int result;
unsigned int len;
if (maxLength > self->_count)
{
@ -981,9 +1030,8 @@ getCString_u(ivars self, char *buffer, unsigned int maxLength,
}
}
result = encode_ustrtocstr(buffer, len, &self->_contents.u[aRange.location],
len, defEnc, YES);
if (result != len)
if (GSFromUnicode((unsigned char **)&buffer, &len, self->_contents.u, len,
defEnc, 0, GSUniTerminate | GSUniStrict) == NO)
{
[NSException raise: NSCharacterConversionException
format: @"Can't get cString from Unicode string."];
@ -1018,11 +1066,11 @@ intValue_u(ivars self)
}
else
{
unsigned len = self->_count < 32 ? self->_count : 31;
char buf[len+1];
unsigned int l = self->_count < 10 ? self->_count : 9;
unsigned char buf[l+1];
unsigned char *b = buf;
len = encode_ustrtocstr(buf, len, self->_contents.u, len, defEnc, NO);
buf[len] = '\0';
GSFromUnicode(&b, &l, self->_contents.u, l, intEnc, 0, GSUniTerminate);
return atol(buf);
}
}
@ -1177,8 +1225,7 @@ lossyCString_u(ivars self)
unsigned l = self->_count;
unsigned char *r = (unsigned char*)_fastMallocBuffer(l + 1);
encode_ustrtocstr(r, l, self->_contents.u, l, defEnc, NO);
r[l] = '\0';
GSFromUnicode(&r, &l, self->_contents.u, l, intEnc, 0, GSUniTerminate);
return (const char*)r;
}
@ -1357,7 +1404,7 @@ rangeOfCharacter_c(ivars self, NSCharacterSet *aSet, unsigned mask,
if (letter > 127)
{
letter = encode_chartouni(letter, defEnc);
letter = encode_chartouni(letter, intEnc);
}
if ((*mImp)(aSet, cMemberSel, letter))
{
@ -1534,7 +1581,7 @@ transmute(ivars self, NSString *aString)
*/
transmute = NO;
}
else if ([aString canBeConvertedToEncoding: defEnc] == YES)
else if ([aString canBeConvertedToEncoding: intEnc] == YES)
{
/*
* This is a C string, but the other string can be converted to
@ -1567,11 +1614,11 @@ transmute(ivars self, NSString *aString)
if (transmute == YES)
{
unichar *tmp;
int len = self->_count;
unichar *tmp = 0;
int len = 0;
tmp = NSZoneMalloc(self->_zone, self->_capacity * sizeof(unichar));
len = encode_cstrtoustr(tmp, len, self->_contents.c, len, defEnc);
GSToUnicode(&tmp, &len, self->_contents.c, self->_count, intEnc,
self->_zone, 0);
if (self->_flags.free == 1)
{
NSZoneFree(self->_zone, self->_contents.c);
@ -1761,7 +1808,7 @@ transmute(ivars self, NSString *aString)
[aCoder encodeValueOfObjCType: @encode(unsigned) at: &_count];
if (_count > 0)
{
[aCoder encodeValueOfObjCType: @encode(NSStringEncoding) at: &defEnc];
[aCoder encodeValueOfObjCType: @encode(NSStringEncoding) at: &intEnc];
[aCoder encodeArrayOfObjCType: @encode(unsigned char)
count: _count
at: _contents.c];
@ -1770,7 +1817,7 @@ transmute(ivars self, NSString *aString)
- (NSStringEncoding) fastestEncoding
{
return defEnc;
return intEnc;
}
- (float) floatValue
@ -1885,7 +1932,7 @@ transmute(ivars self, NSString *aString)
- (NSStringEncoding) smallestEncoding
{
return defEnc;
return intEnc;
}
- (NSString*) substringFromRange: (NSRange)aRange
@ -2487,7 +2534,7 @@ transmute(ivars self, NSString *aString)
}
else
{
[aCoder encodeValueOfObjCType: @encode(NSStringEncoding) at: &defEnc];
[aCoder encodeValueOfObjCType: @encode(NSStringEncoding) at: &intEnc];
[aCoder encodeArrayOfObjCType: @encode(unsigned char)
count: _count
at: _contents.c];
@ -2500,7 +2547,7 @@ transmute(ivars self, NSString *aString)
if (_flags.wide == 1)
return NSUnicodeStringEncoding;
else
return defEnc;
return intEnc;
}
- (float) floatValue
@ -2817,7 +2864,7 @@ transmute(ivars self, NSString *aString)
maxLength: l];
}
_contents.c[aRange.location + l]
= encode_unitochar([aString characterAtIndex: l], defEnc);
= encode_unitochar([aString characterAtIndex: l], intEnc);
}
else
{
@ -2880,7 +2927,7 @@ transmute(ivars self, NSString *aString)
[aString getCString: _contents.c maxLength: l];
}
_contents.c[l]
= encode_unitochar([aString characterAtIndex: l], defEnc);
= encode_unitochar([aString characterAtIndex: l], intEnc);
}
else
{
@ -2896,7 +2943,7 @@ transmute(ivars self, NSString *aString)
return NSUnicodeStringEncoding;
}
else
return defEnc;
return intEnc;
}
- (NSString*) substringFromRange: (NSRange)aRange
@ -3209,7 +3256,7 @@ transmute(ivars self, NSString *aString)
if (((ivars)_parent)->_flags.wide == 1)
return NSUnicodeStringEncoding;
else
return defEnc;
return intEnc;
}
- (void) getCharacters: (unichar*)buffer
@ -3313,7 +3360,7 @@ transmute(ivars self, NSString *aString)
return NSUnicodeStringEncoding;
}
else
return defEnc;
return intEnc;
}
@end
@ -3436,7 +3483,7 @@ transmute(ivars self, NSString *aString)
if (c > 127)
{
c = encode_chartouni(c, defEnc);
c = encode_chartouni(c, intEnc);
}
ret = (ret << 5) + ret + c;
}

View file

@ -259,6 +259,7 @@ surrogatePairValue(unichar high, unichar low)
@implementation NSString
static NSStringEncoding _DefaultStringEncoding;
static BOOL _ByteEncodingOk;
static const unichar byteOrderMark = 0xFEFF;
static const unichar byteOrderMarkSwapped = 0xFFFE;
@ -344,6 +345,8 @@ handle_printf_atsign (FILE *stream,
ranSel = @selector(rangeOfComposedCharacterSequenceAtIndex:);
_DefaultStringEncoding = GetDefEncoding();
_ByteEncodingOk = GSIsByteEncoding(_DefaultStringEncoding);
NSStringClass = self;
[self setVersion: 1];
NSMutableStringClass = [NSMutableString class];
@ -606,16 +609,22 @@ handle_printf_atsign (FILE *stream,
length: (unsigned int)length
freeWhenDone: (BOOL)flag
{
unichar *buf;
unichar *buf = 0;
unsigned int l = 0;
buf = (unichar*)NSZoneMalloc(GSObjCZone(self), sizeof(unichar)*length);
length = encode_cstrtoustr(buf, length, byteString, length,
_DefaultStringEncoding);
if (flag == YES && byteString != 0)
if (GSToUnicode(&buf, &l, byteString, length, _DefaultStringEncoding,
[self zone], 0) == NO)
{
NSZoneFree(NSZoneFromPointer(byteString), byteString);
DESTROY(self);
}
else
{
if (flag == YES && byteString != 0)
{
NSZoneFree(NSZoneFromPointer(byteString), byteString);
}
self = [self initWithCharactersNoCopy: buf length: l freeWhenDone: YES];
}
self = [self initWithCharactersNoCopy: buf length: length freeWhenDone: YES];
return self;
}
@ -682,32 +691,44 @@ handle_printf_atsign (FILE *stream,
if (length > 0)
{
unsigned i;
unsigned i = 0;
/*
* Check to see if we have in fact got an ascii string
*/
for (i = 0; i < length; i++)
if (_ByteEncodingOk)
{
if (((unsigned char*)bytes)[i] > 127)
/*
* If it's ok to store ascii strings as internal C strings,
* check to see if we have in fact got an ascii string.
*/
while (i < length)
{
break;
if (((unsigned char*)bytes)[i] > 127)
{
break;
}
i++;
}
}
if (i == length)
{
self = [self initWithCString: bytes length: length];
}
else
{
unichar *s;
unichar *u = 0;
unsigned int l = 0;
s = NSZoneMalloc(GSObjCZone(self), sizeof(unichar)*length);
length = encode_cstrtoustr(s, length, bytes, length,
NSUTF8StringEncoding);
self = [self initWithCharactersNoCopy: s
length: length
freeWhenDone: YES];
if (GSToUnicode(&u, &l, bytes, length, NSUTF8StringEncoding,
GSObjCZone(self), 0) == NO)
{
DESTROY(self);
}
else
{
self = [self initWithCharactersNoCopy: u
length: l
freeWhenDone: YES];
}
}
}
else
@ -1071,96 +1092,139 @@ handle_printf_atsign (FILE *stream,
if (len == 0)
{
self = [self initWithCStringNoCopy: "" length: 0 freeWhenDone: NO];
self = [self initWithCharactersNoCopy: (unichar*)""
length: 0
freeWhenDone: NO];
}
else if (encoding == NSASCIIStringEncoding
|| encoding == _DefaultStringEncoding)
else if (_ByteEncodingOk == YES
&& (encoding==_DefaultStringEncoding || encoding==NSASCIIStringEncoding))
{
char *s = NSZoneMalloc(GSObjCZone(self), len);
char *s;
/*
* We can only create an internal C string if the default C string
* encoding is Ok, and the specified encoding matches it.
*/
s = NSZoneMalloc(GSObjCZone(self), len);
[data getBytes: s];
self = [self initWithCStringNoCopy: s length: len freeWhenDone: YES];
}
else if (encoding == NSUTF8StringEncoding)
{
const char *bytes = [data bytes];
unsigned i;
unsigned i = 0;
/*
* Check to see if we have in fact got an ascii string
*/
for (i = 0; i < len; i++)
if (_ByteEncodingOk)
{
if (((unsigned char*)bytes)[i] > 127)
/*
* If it's ok to store ascii strings as internal C strings,
* check to see if we have in fact got an ascii string.
*/
while (i < len)
{
break;
if (((unsigned char*)bytes)[i] > 127)
{
break;
}
i++;
}
}
if (i == len)
{
self = [self initWithCString: bytes length: len];
}
else
{
unichar *u;
unichar *u = 0;
unsigned int l = 0;
u = NSZoneMalloc(GSObjCZone(self), sizeof(unichar)*len);
len = encode_cstrtoustr(u, len, bytes, len,
NSUTF8StringEncoding);
if (len > 0)
if (GSToUnicode(&u, &l, bytes, len, NSUTF8StringEncoding,
GSObjCZone(self), 0) == NO)
{
self = [self initWithCharactersNoCopy: u
length: len
freeWhenDone: YES];
DESTROY(self);
}
else
{
DESTROY(self);
self = [self initWithCharactersNoCopy: u
length: l
freeWhenDone: YES];
}
}
}
else if (encoding == NSUnicodeStringEncoding)
{
if (len%2 != 0)
{
DESTROY(self); // Not valid unicode data.
}
else
{
BOOL swapped = NO;
unsigned char *b;
unichar *uptr;
b = (unsigned char*)[data bytes];
uptr = (unichar*)b;
if (*uptr == 0xFFFE)
{
b = (unsigned char*)++uptr;
len -= sizeof(unichar);
}
else if (*uptr == 0xFEFF)
{
b = (unsigned char*)++uptr;
len -= sizeof(unichar);
swapped = YES;
}
if (len == 0)
{
self = [self initWithCharactersNoCopy: (unichar*)""
length: 0
freeWhenDone: NO];
}
else
{
unsigned char *u;
u = (unsigned char*)NSZoneMalloc(GSObjCZone(self), len);
if (swapped == YES)
{
unsigned i;
for (i = 0; i < len; i += 2)
{
u[i] = b[i + 1];
u[i + 1] = b[i];
}
}
else
{
memcpy(u, b, len);
}
self = [self initWithCharactersNoCopy: (unichar*)u
length: len/2
freeWhenDone: YES];
}
}
}
else
{
unichar *u;
unsigned count;
const unsigned char *b;
unsigned char *b;
unichar *u = 0;
unsigned l = 0;
if (len < 1 || (len < 2 && encoding == NSUnicodeStringEncoding))
b = (unsigned char*)[data bytes];
if (GSToUnicode(&u, &l, b, len, NSUTF8StringEncoding, GSObjCZone(self),
0) == NO)
{
return [self initWithCStringNoCopy: "" length: 0 freeWhenDone: NO];
}
b = [data bytes];
u = NSZoneMalloc(GSObjCZone(self), sizeof(unichar)*(len+1));
if (encoding == NSUnicodeStringEncoding)
{
if ((b[0]==0xFE) & (b[1]==0xFF))
{
b = &b[2];
count -= 2;
}
for (count = 0; count < (len - 1); count += 2)
{
u[count/2 - 1] = 256*b[count + 1] + b[count];
}
count = count/2;
self = [self initWithCharactersNoCopy: u
length: count
freeWhenDone: YES];
DESTROY(self);
}
else
{
count = encode_cstrtoustr(u, len, b, len, encoding);
if (count < 1)
{
DESTROY(self);
}
else
{
self = [self initWithCharactersNoCopy: u
length: count
freeWhenDone: YES];
}
self = [self initWithCharactersNoCopy: u
length: l
freeWhenDone: YES];
}
}
return self;
@ -2314,47 +2378,27 @@ handle_printf_atsign (FILE *stream,
buff = (unichar*)NSZoneMalloc(NSDefaultMallocZone(),
sizeof(unichar)*(len+1));
buff[0] = 0xFEFF;
for (count = 0; count < len; count++)
{
buff[count+1] = (*caiImp)(self, caiSel, count);
}
[self getCharacters: &buff[1]];
return [NSDataClass dataWithBytesNoCopy: buff
length: sizeof(unichar)*(len+1)];
}
else
{
int t;
int bsiz;
unsigned char *b = 0;
int l = 0;
unichar *u;
unsigned char *buff;
u = (unichar*)NSZoneMalloc(NSDefaultMallocZone(), len*sizeof(unichar));
[self getCharacters: u];
if (encoding == NSUTF8StringEncoding)
if (GSFromUnicode(&b, &l, u, len, encoding, NSDefaultMallocZone(),
(flag == NO) ? GSUniStrict : 0)
== NO)
{
bsiz = len * 4;
}
else
{
bsiz = len;
}
buff = (unsigned char*)NSZoneMalloc(NSDefaultMallocZone(), bsiz);
flag = (flag == YES) ? NO : YES;
t = encode_ustrtocstr(buff, bsiz, u, len, encoding, flag);
NSZoneFree(NSDefaultMallocZone(), u);
if (t == 0)
{
NSZoneFree(NSDefaultMallocZone(), buff);
NSZoneFree(NSDefaultMallocZone(), u);
return nil;
}
else
{
if (bsiz != t)
{
buff = NSZoneRealloc(NSDefaultMallocZone(), buff, t);
}
return [NSDataClass dataWithBytesNoCopy: buff length: t];
}
NSZoneFree(NSDefaultMallocZone(), u);
return [NSDataClass dataWithBytesNoCopy: b length: l];
}
return nil;
}

View file

@ -28,6 +28,7 @@
#include <config.h>
#include <Foundation/NSString.h>
#include <Foundation/NSLock.h>
#include <base/Unicode.h>
#include <stdio.h>
#include <stdlib.h>
@ -50,8 +51,10 @@ typedef struct {unichar from; char to;} _ucc_;
#endif
#include <errno.h>
// The rest of the GNUstep code stores UNICODE in internal byte order,
// so we do the same. This should be UCS-2-INTERNAL for libiconv
/*
* The whole of the GNUstep code stores UNICODE in internal byte order,
* so we do the same. This should be UCS-2-INTERNAL for libiconv
*/
#ifdef WORDS_BIGENDIAN
#define UNICODE_INT "UNICODEBIG"
#else
@ -62,236 +65,6 @@ typedef struct {unichar from; char to;} _ucc_;
static const char *unicode_enc = NULL;
#endif
typedef unsigned char unc;
static NSStringEncoding defEnc = GSUndefinedEncoding;
#ifdef HAVE_ICONV
/*
* FIXME: We should check dynamically which encodings are found on this
* computer as different implementation of iconv will support different
* encodings.
*/
static NSStringEncoding _availableEncodings[] = {
NSASCIIStringEncoding,
NSNEXTSTEPStringEncoding,
NSJapaneseEUCStringEncoding,
NSUTF8StringEncoding,
NSISOLatin1StringEncoding,
// NSSymbolStringEncoding,
// NSNonLossyASCIIStringEncoding,
NSShiftJISStringEncoding,
NSISOLatin2StringEncoding,
NSUnicodeStringEncoding,
NSWindowsCP1251StringEncoding,
NSWindowsCP1252StringEncoding,
NSWindowsCP1253StringEncoding,
NSWindowsCP1254StringEncoding,
NSWindowsCP1250StringEncoding,
NSISO2022JPStringEncoding,
NSMacOSRomanStringEncoding,
// NSProprietaryStringEncoding,
// GNUstep additions
NSISOCyrillicStringEncoding,
NSKOI8RStringEncoding,
NSISOLatin3StringEncoding,
NSISOLatin4StringEncoding,
NSISOArabicStringEncoding,
NSISOGreekStringEncoding,
NSISOHebrewStringEncoding,
NSGB2312StringEncoding,
NSGSM0338StringEncoding,
NSBIG5StringEncoding,
0
};
#else
// Uncomment when implemented
static NSStringEncoding _availableEncodings[] = {
NSASCIIStringEncoding,
NSNEXTSTEPStringEncoding,
// NSJapaneseEUCStringEncoding,
// NSUTF8StringEncoding,
NSISOLatin1StringEncoding,
// NSSymbolStringEncoding,
// NSNonLossyASCIIStringEncoding,
// NSShiftJISStringEncoding,
NSISOLatin2StringEncoding,
NSUnicodeStringEncoding,
// NSWindowsCP1251StringEncoding,
// NSWindowsCP1252StringEncoding,
// NSWindowsCP1253StringEncoding,
// NSWindowsCP1254StringEncoding,
// NSWindowsCP1250StringEncoding,
// NSISO2022JPStringEncoding,
// NSMacOSRomanStringEncoding,
// NSProprietaryStringEncoding,
// GNUstep additions
NSISOCyrillicStringEncoding,
// NSKOI8RStringEncoding,
// NSISOLatin3StringEncoding,
// NSISOLatin4StringEncoding,
// NSISOArabicStringEncoding,
// NSISOGreekStringEncoding,
// NSISOHebrewStringEncoding,
// NSGB2312StringEncoding,
NSGSM0338StringEncoding,
NSBIG5StringEncoding,
0
};
#endif
struct _strenc_ {NSStringEncoding enc; char *ename;};
const struct _strenc_ str_encoding_table[]=
{
{NSASCIIStringEncoding,"NSASCIIStringEncoding"},
{NSNEXTSTEPStringEncoding,"NSNEXTSTEPStringEncoding"},
{NSJapaneseEUCStringEncoding, "NSJapaneseEUCStringEncoding"},
{NSUTF8StringEncoding,"NSUTF8StringEncoding"},
{NSISOLatin1StringEncoding,"NSISOLatin1StringEncoding"},
{NSSymbolStringEncoding,"NSSymbolStringEncoding"},
{NSNonLossyASCIIStringEncoding,"NSNonLossyASCIIStringEncoding"},
{NSShiftJISStringEncoding,"NSShiftJISStringEncoding"},
{NSISOLatin2StringEncoding,"NSISOLatin2StringEncoding"},
{NSUnicodeStringEncoding, "NSUnicodeStringEncoding"},
{NSWindowsCP1251StringEncoding,"NSWindowsCP1251StringEncoding"},
{NSWindowsCP1252StringEncoding,"NSWindowsCP1252StringEncoding"},
{NSWindowsCP1253StringEncoding,"NSWindowsCP1253StringEncoding"},
{NSWindowsCP1254StringEncoding,"NSWindowsCP1254StringEncoding"},
{NSWindowsCP1250StringEncoding,"NSWindowsCP1250StringEncoding"},
{NSISO2022JPStringEncoding,"NSISO2022JPStringEncoding "},
{NSMacOSRomanStringEncoding, "NSMacOSRomanStringEncoding"},
{NSProprietaryStringEncoding, "NSProprietaryStringEncoding"},
// GNUstep additions
{NSISOCyrillicStringEncoding,"NSISOCyrillicStringEncoding"},
{NSKOI8RStringEncoding, "NSKOI8RStringEncoding"},
{NSISOLatin3StringEncoding, "NSISOLatin3StringEncoding"},
{NSISOLatin4StringEncoding, "NSISOLatin4StringEncoding"},
{NSISOArabicStringEncoding, "NSISOArabicStringEncoding"},
{NSISOGreekStringEncoding, "NSISOGreekStringEncoding"},
{NSISOHebrewStringEncoding, "NSISOHebrewStringEncoding"},
{NSISOLatin5StringEncoding, "NSISOLatin5StringEncoding"},
{NSISOLatin6StringEncoding, "NSISOLatin6StringEncoding"},
{NSISOLatin7StringEncoding, "NSISOLatin7StringEncoding"},
{NSISOLatin8StringEncoding, "NSISOLatin8StringEncoding"},
{NSISOLatin9StringEncoding, "NSISOLatin9StringEncoding"},
{NSUTF7StringEncoding, "NSUTF7StringEncoding"},
{NSGB2312StringEncoding, "NSGB2312StringEncoding"},
{NSGSM0338StringEncoding, "NSGSM0338StringEncoding"},
{NSBIG5StringEncoding, "NSBIG5StringEncoding"},
{0, "Unknown encoding"}
};
NSStringEncoding *GetAvailableEncodings()
{
// FIXME: This should check which iconv definitions are available and
// add them to the availble encodings
return _availableEncodings;
}
NSStringEncoding
GetDefEncoding()
{
if (defEnc == GSUndefinedEncoding)
{
char *encoding;
unsigned int count;
NSStringEncoding tmp;
NSStringEncoding *availableEncodings;
availableEncodings = GetAvailableEncodings();
encoding = getenv("GNUSTEP_STRING_ENCODING");
if (encoding != 0)
{
count = 0;
while (str_encoding_table[count].enc
&& strcmp(str_encoding_table[count].ename,encoding))
{
count++;
}
if (str_encoding_table[count].enc)
{
defEnc = str_encoding_table[count].enc;
if ((defEnc == NSUnicodeStringEncoding)
|| (defEnc == NSUTF8StringEncoding)
|| (defEnc == NSSymbolStringEncoding))
{
fprintf(stderr, "WARNING: %s - encoding not supported as "
"default c string encoding.\n", encoding);
fprintf(stderr,
"NSISOLatin1StringEncoding set as default.\n");
defEnc = NSISOLatin1StringEncoding;
}
else /*encoding should be supported but is it implemented?*/
{
count = 0;
tmp = 0;
while (availableEncodings[count] != 0)
{
if (defEnc != availableEncodings[count])
{
tmp = 0;
}
else
{
tmp = defEnc;
break;
}
count++;
}
if (tmp == 0 && defEnc != NSISOLatin1StringEncoding)
{
fprintf(stderr,
"WARNING: %s - encoding not yet implemented.\n",
encoding);
fprintf(stderr,
"NSISOLatin1StringEncoding set as default.\n");
defEnc = NSISOLatin1StringEncoding;
}
}
}
else /* encoding not found */
{
fprintf(stderr,
"WARNING: %s - encoding not supported.\n", encoding);
fprintf(stderr, "NSISOLatin1StringEncoding set as default.\n");
defEnc = NSISOLatin1StringEncoding;
}
}
else /* environment var not found */
{
/* shouldn't be required. It really should be in UserDefaults - asf */
//fprintf(stderr, "WARNING: GNUSTEP_STRING_ENCODING environment");
//fprintf(stderr, " variable not found.\n");
//fprintf(stderr, "NSISOLatin1StringEncoding set as default.\n");
defEnc = NSISOLatin1StringEncoding;
}
}
return defEnc;
}
NSString*
GetEncodingName(NSStringEncoding encoding)
{
unsigned int count=0;
while (str_encoding_table[count].enc
&& (str_encoding_table[count].enc != encoding))
{
count++;
}
return [NSString stringWithCString: str_encoding_table[count].ename];
}
#ifdef HAVE_ICONV
/* Check to see what type of internal unicode format the library supports */
static const char *
internal_unicode_enc()
@ -316,79 +89,302 @@ internal_unicode_enc()
return unicode_enc;
}
static const char *
iconv_stringforencoding(NSStringEncoding enc)
#endif
typedef unsigned char unc;
static NSStringEncoding defEnc = GSUndefinedEncoding;
static NSStringEncoding *_availableEncodings = 0;
struct _strenc_ {
NSStringEncoding enc; // Constant representing the encoding.
const char *ename; // ASCII string representation of name.
const char *iconv; /* Iconv name of encoding. If this
* is nul, we cannot use iconv to
* perform conversions to/from this
* encoding.
*/
BOOL eightBit; /* Flag to say whether this encoding
* can be stored in a byte array ...
* ie whether the encoding consists
* entirely of single byte charcters
* and the first 128 are identical to
* the ASCII character set.
*/
BOOL supported; /* Is this supported? Some encodings
* have builtin conversion to/from
* unicode, but for others we must
* check with iconv to see if it
* supports them on this platform.
*/
};
/*
* The str_encoding_table is a compact representation of all the string
* encoding information we might need. It gets modified at runtime.
*/
static struct _strenc_ str_encoding_table[] = {
{NSASCIIStringEncoding,"NSASCIIStringEncoding","ASCII",1,1},
{NSNEXTSTEPStringEncoding,"NSNEXTSTEPStringEncoding","NEXTSTEP",1,1},
{NSJapaneseEUCStringEncoding, "NSJapaneseEUCStringEncoding","EUC-JP",0,0},
{NSUTF8StringEncoding,"NSUTF8StringEncoding","UTF-8",0,0},
{NSISOLatin1StringEncoding,"NSISOLatin1StringEncoding","ISO-8859-1",1,1},
{NSSymbolStringEncoding,"NSSymbolStringEncoding",0,0,0},
{NSNonLossyASCIIStringEncoding,"NSNonLossyASCIIStringEncoding",0,1,1},
{NSShiftJISStringEncoding,"NSShiftJISStringEncoding","SHIFT-JIS",0,0},
{NSISOLatin2StringEncoding,"NSISOLatin2StringEncoding","ISO-8859-2",1,1},
{NSUnicodeStringEncoding, "NSUnicodeStringEncoding",0,0,1},
{NSWindowsCP1251StringEncoding,"NSWindowsCP1251StringEncoding","CP1251",0,0},
{NSWindowsCP1252StringEncoding,"NSWindowsCP1252StringEncoding","CP1252",0,0},
{NSWindowsCP1253StringEncoding,"NSWindowsCP1253StringEncoding","CP1253",0,0},
{NSWindowsCP1254StringEncoding,"NSWindowsCP1254StringEncoding","CP1254",0,0},
{NSWindowsCP1250StringEncoding,"NSWindowsCP1250StringEncoding","CP1250",0,0},
{NSISO2022JPStringEncoding,"NSISO2022JPStringEncoding","ISO-2022-JP",0,0},
{NSMacOSRomanStringEncoding, "NSMacOSRomanStringEncoding","MACINTOSH",0,0},
{NSProprietaryStringEncoding, "NSProprietaryStringEncoding",0,0,0},
// GNUstep additions
{NSISOCyrillicStringEncoding,"NSISOCyrillicStringEncoding","ISO-8859-5",0,1},
{NSKOI8RStringEncoding, "NSKOI8RStringEncoding","KOI8-R",0,0},
{NSISOLatin3StringEncoding, "NSISOLatin3StringEncoding","ISO-8859-3",0,0},
{NSISOLatin4StringEncoding, "NSISOLatin4StringEncoding","ISO-8859-4",0,0},
{NSISOArabicStringEncoding, "NSISOArabicStringEncoding","ISO-8859-6",0,0},
{NSISOGreekStringEncoding, "NSISOGreekStringEncoding","ISO-8859-7",0,0},
{NSISOHebrewStringEncoding, "NSISOHebrewStringEncoding","ISO-8859-8",0,0},
{NSISOLatin5StringEncoding, "NSISOLatin5StringEncoding","ISO-8859-9",0,0},
{NSISOLatin6StringEncoding, "NSISOLatin6StringEncoding","ISO-8859-10",0,0},
{NSISOLatin7StringEncoding, "NSISOLatin7StringEncoding","ISO-8859-13",0,0},
{NSISOLatin8StringEncoding, "NSISOLatin8StringEncoding","ISO-8859-14",0,0},
{NSISOLatin9StringEncoding, "NSISOLatin9StringEncoding","ISO-8859-15",0,0},
{NSUTF7StringEncoding, "NSUTF7StringEncoding",0,0,0},
{NSGB2312StringEncoding, "NSGB2312StringEncoding","EUC-CN",0,0},
{NSGSM0338StringEncoding, "NSGSM0338StringEncoding",0,0,1},
{NSBIG5StringEncoding, "NSBIG5StringEncoding","BIG5",0,0},
{0,"Unknown encoding",0,0,0}
};
static struct _strenc_ **encodingTable = 0;
static unsigned encTableSize = 0;
NSStringEncoding *GetAvailableEncodings()
{
switch (enc)
if (_availableEncodings == 0)
{
case NSASCIIStringEncoding:
return "ASCII";
case NSNEXTSTEPStringEncoding:
return "NEXTSTEP";
case NSISOLatin1StringEncoding:
return "ISO-8859-1";
case NSISOLatin2StringEncoding:
return "ISO-8859-2";
case NSUnicodeStringEncoding:
return UNICODE_ENC;
case NSJapaneseEUCStringEncoding:
return "EUC-JP";
case NSUTF8StringEncoding:
return "UTF-8";
case NSShiftJISStringEncoding:
return "SHIFT-JIS";
case NSWindowsCP1250StringEncoding:
return "CP1250";
case NSWindowsCP1251StringEncoding:
return "CP1251";
case NSWindowsCP1252StringEncoding:
return "CP1252";
case NSWindowsCP1253StringEncoding:
return "CP1253";
case NSWindowsCP1254StringEncoding:
return "CP1254";
case NSISO2022JPStringEncoding:
return "ISO-2022-JP";
case NSMacOSRomanStringEncoding:
return "MACINTOSH";
[gnustep_global_lock lock];
if (_availableEncodings == 0)
{
NSStringEncoding *encodings;
unsigned count;
unsigned pos;
unsigned i;
// GNUstep extensions
case NSKOI8RStringEncoding:
return "KOI8-R";
case NSISOLatin3StringEncoding:
return "ISO-8859-3";
case NSISOLatin4StringEncoding:
return "ISO-8859-4";
case NSISOCyrillicStringEncoding:
return "ISO-8859-5";
case NSISOArabicStringEncoding:
return "ISO-8859-6";
case NSISOGreekStringEncoding:
return "ISO-8859-7";
case NSISOHebrewStringEncoding:
return "ISO-8859-8";
/*
* We want to store pointers to our string encoding info in a
* large table so we can do efficient lookup by encoding value.
*/
#define MAX_ENCODING 128
count = sizeof(str_encoding_table) / sizeof(struct _strenc_);
case NSISOLatin5StringEncoding:
return "ISO-8859-9";
case NSISOLatin6StringEncoding:
return "ISO-8859-10";
case NSISOLatin7StringEncoding:
return "ISO-8859-13";
case NSISOLatin8StringEncoding:
return "ISO-8859-14";
case NSISOLatin9StringEncoding:
return "ISO-8859-15";
/*
* First determine the largest encoding value and create a
* large enough table of pointers.
*/
encTableSize = 0;
for (i = 0; i < count; i++)
{
unsigned tmp = str_encoding_table[i].enc;
case NSGB2312StringEncoding:
return "EUC-CN";
if (tmp >= MAX_ENCODING)
{
fprintf(stderr, "ERROR ... illegal NSStringEncoding "
"value in str_encoding_table. Ignored\n");
}
else if (tmp > encTableSize)
{
encTableSize = tmp;
}
}
encodingTable = malloc((encTableSize+1)*sizeof(struct _strenc_ *));
memset(encodingTable, 0, (encTableSize+1)*sizeof(struct _strenc_ *));
case NSBIG5StringEncoding:
return "BIG5";
default:
return "";
/*
* Now set up the pointers at the correct location in the table.
*/
for (i = 0; i < count; i++)
{
unsigned tmp = str_encoding_table[i].enc;
if (tmp < MAX_ENCODING)
{
encodingTable[tmp] = &str_encoding_table[i];
}
}
/*
* Now build up a list of supported encodings ... in the
* format needed to support [NSStirng+availableStringEncodings]
* Check to see what iconv support we have as we go along.
* This is also the palce where we determine the name we use
* for iconv to support unicode.
*/
encodings = objc_malloc(sizeof(NSStringEncoding) * count);
pos = 0;
for (i = 0; i < count; i++)
{
NSStringEncoding enc = str_encoding_table[i].enc;
if (enc == 0 || enc >= MAX_ENCODING)
{
continue;
}
#ifdef HAVE_ICONV
if (enc == NSUnicodeStringEncoding)
{
encodingTable[enc]->iconv = UNICODE_ENC;
encodingTable[enc]->supported = 1;
}
if (encodingTable[enc]->supported == 0)
{
if (encodingTable[enc]->iconv == 0)
{
continue; // Not handled by iconv.
}
else
{
iconv_t c;
c = iconv_open(UNICODE_ENC, encodingTable[enc]->iconv);
if (c == (iconv_t)-1)
{
continue; // Can't convert to unicode
}
iconv_close(c);
c = iconv_open(encodingTable[enc]->iconv, UNICODE_ENC);
if (c == (iconv_t)-1)
{
continue; // Can't convert from unicode
}
iconv_close(c);
encodingTable[enc]->supported = 1;
}
}
#else
if (encodingTable[enc]->supported == 0)
{
continue;
}
#endif
encodings[pos++] = enc;
}
encodings[pos] = 0;
_availableEncodings = encodings;
}
[gnustep_global_lock unlock];
}
return _availableEncodings;
}
NSStringEncoding
GetDefEncoding()
{
if (defEnc == GSUndefinedEncoding)
{
char *encoding;
unsigned int count;
NSStringEncoding *availableEncodings;
[gnustep_global_lock lock];
if (defEnc != GSUndefinedEncoding)
{
[gnustep_global_lock unlock];
return defEnc;
}
availableEncodings = GetAvailableEncodings();
encoding = getenv("GNUSTEP_STRING_ENCODING");
if (encoding != 0)
{
count = 0;
while (str_encoding_table[count].enc
&& strcmp(str_encoding_table[count].ename, encoding))
{
count++;
}
if (str_encoding_table[count].enc)
{
defEnc = str_encoding_table[count].enc;
if (str_encoding_table[count].supported == 0)
{
fprintf(stderr, "WARNING: %s - encoding not implemented as "
"default c string encoding.\n", encoding);
fprintf(stderr,
"NSISOLatin1StringEncoding set as default.\n");
defEnc = NSISOLatin1StringEncoding;
}
}
else /* encoding not found */
{
fprintf(stderr,
"WARNING: %s - encoding not supported.\n", encoding);
fprintf(stderr, "NSISOLatin1StringEncoding set as default.\n");
defEnc = NSISOLatin1StringEncoding;
}
}
else /* environment var not found */
{
/* shouldn't be required. It really should be in UserDefaults - asf */
//fprintf(stderr, "WARNING: GNUSTEP_STRING_ENCODING environment");
//fprintf(stderr, " variable not found.\n");
//fprintf(stderr, "NSISOLatin1StringEncoding set as default.\n");
defEnc = NSISOLatin1StringEncoding;
}
[gnustep_global_lock unlock];
}
return defEnc;
}
BOOL
GSIsByteEncoding(NSStringEncoding encoding)
{
GetAvailableEncodings();
if (encoding == 0 || encoding >= encTableSize || encodingTable[encoding] == 0)
{
return NO;
}
return encodingTable[encoding]->eightBit;
}
NSString*
GSEncodingName(NSStringEncoding encoding)
{
GetAvailableEncodings();
if (encoding == 0 || encoding >= encTableSize || encodingTable[encoding] == 0)
{
return @"Unknown encoding";
}
return [NSString stringWithCString: encodingTable[encoding]->ename];
}
NSString*
GetEncodingName(NSStringEncoding encoding)
{
return GSEncodingName(encoding);
}
static const char *
iconv_stringforencoding(NSStringEncoding encoding)
{
GetAvailableEncodings();
if (encoding == 0 || encoding >= encTableSize || encodingTable[encoding] == 0)
{
return "";
}
return encodingTable[encoding]->iconv;
}
#ifdef HAVE_ICONV
int
iconv_cstrtoustr(unichar *u2, int size2, const char *s1, int size1,
NSStringEncoding enc)
@ -1368,7 +1364,8 @@ if (dst == 0) \
} \
else if (zone == 0) \
{ \
return NO; /* No buffer growth possible ... fail. */ \
result = NO; /* No buffer growth possible ... fail. */ \
break; \
} \
else \
{ \
@ -1397,7 +1394,8 @@ else \
} \
if (ptr == 0) \
{ \
return NO; /* Not enough memory */ \
result = NO; /* Not enough memory */ \
break; \
} \
bsize = grow / sizeof(unichar); \
}
@ -1441,6 +1439,11 @@ else \
* <item>If GSUniTemporary is set, the function will return the results in
* an autoreleased buffer rather than in a buffer that the caller must
* release.</item>
* <item>If GSUniBOM is set, the function will write the first unicode
* character as a byte order marker.</item>
* </list>
* <item>If GSUniShortOk is set, the function will return a buffer containing
* any decoded characters even if the whole conversion fails.</item>
* </list>
* <p>On return, the function result is a flag indicating success (YES)
* or failure (NO), and on success, the value stored in size is the number
@ -1464,9 +1467,11 @@ GSToUnicode(unichar **dst, unsigned int *size, const unsigned char *src,
unsigned extra = (options & GSUniTerminate) ? sizeof(unichar) : 0;
unichar base = 0;
unichar *table = 0;
BOOL result = YES;
if (slen == 0)
{
*size = 0;
return YES;
}
@ -1484,6 +1489,15 @@ GSToUnicode(unichar **dst, unsigned int *size, const unsigned char *src,
bsize = *size;
}
if (options & GSUniBOM)
{
while (dpos >= bsize)
{
GROW();
}
ptr[dpos++] = (unichar)0xFEFF; // Insert byte order marker.
}
switch (enc)
{
case NSNonLossyASCIIStringEncoding:
@ -1577,19 +1591,20 @@ tables:
default:
#ifdef HAVE_ICONV
{
iconv_t cd;
char *inbuf;
char *outbuf;
size_t inbytesleft;
size_t outbytesleft;
size_t result;
size_t rval;
iconv_t cd;
cd = iconv_open(UNICODE_ENC, iconv_stringforencoding(enc));
if (cd == (iconv_t)-1)
{
NSLog(@"No iconv for encoding %@ tried to use %s",
GetEncodingName(enc), iconv_stringforencoding(enc));
return NO;
result = NO;
break;
}
inbuf = (char*)src;
@ -1606,10 +1621,12 @@ tables:
outbuf = (char*)&ptr[dpos];
outbytesleft = (bsize - old) * sizeof(unichar);
}
result = iconv(cd, &inbuf, &inbytesleft, &outbuf, &outbytesleft);
if (result == (size_t)-1 && errno != E2BIG)
rval = iconv(cd, &inbuf, &inbytesleft, &outbuf, &outbytesleft);
if (rval == (size_t)-1 && errno != E2BIG)
{
return NO;
result = NO;
iconv_close(cd);
break;
}
dpos = (bsize * sizeof(unichar) - outbytesleft) / sizeof(unichar);
}
@ -1617,7 +1634,7 @@ tables:
iconv_close(cd);
}
#else
return NO;
result = NO;
#endif
}
@ -1629,7 +1646,7 @@ tables:
ptr[dpos] = (unichar)0;
}
*size = dpos;
if (dst != 0)
if (dst != 0 && (result == YES || (options & GSUniShortOk)))
{
if (options & GSUniTemporary)
{
@ -1670,14 +1687,14 @@ tables:
{
ptr = NSZoneRealloc(zone, ptr, bytes);
}
if (ptr == 0)
{
return NO;
}
}
*dst = ptr;
}
return YES;
else if (ptr != buf && ptr != *dst)
{
NSZoneFree(zone, ptr);
}
return result;
}
#undef GROW
@ -1696,7 +1713,8 @@ if (dst == 0) \
} \
else if (zone == 0) \
{ \
return NO; /* No buffer growth possible ... fail. */ \
result = NO; /* No buffer growth possible ... fail. */ \
break; \
} \
else \
{ \
@ -1724,7 +1742,8 @@ else \
} \
if (ptr == 0) \
{ \
return NO; /* Not enough memory */ \
result = NO; /* Not enough memory */ \
break; \
} \
bsize = grow; \
}
@ -1772,6 +1791,11 @@ else \
* <item>If GSUniTemporary is set, the function will return the results in
* an autoreleased buffer rather than in a buffer that the caller must
* release.</item>
* <item>If GSUniBOM is set, the function will read the first unicode
* character as a byte order marker.</item>
* <item>If GSUniShortOk is set, the function will return a buffer containing
* any decoded characters even if the whole conversion fails.</item>
* </list>
* </list>
* <p>On return, the function result is a flag indicating success (YES)
* or failure (NO), and on success, the value stored in size is the number
@ -1797,12 +1821,40 @@ GSFromUnicode(unsigned char **dst, unsigned int *size, const unichar *src,
unichar base = 0;
_ucc_ *table = 0;
unsigned tsize = 0;
BOOL swapped = NO;
BOOL result = YES;
if (options & GSUniBOM)
{
unichar c;
if (slen == 0)
{
*size = 0;
return NO; // Missing byte order marker.
}
c = *src++;
slen--;
if (c != 0xFEFF)
{
if (c == 0xFFFE)
{
swapped = YES;
}
else
{
*size = 0;
return NO; // Illegal byte order marker.
}
}
}
if (slen == 0)
{
*size = 0;
return YES;
}
/*
* Ensure we have an initial buffer set up to decode data into.
*/
@ -1829,12 +1881,17 @@ GSFromUnicode(unsigned char **dst, unsigned int *size, const unichar *src,
goto bases;
bases:
if (strict == YES)
if (strict == NO)
{
while (spos < slen)
{
unichar u = src[spos++];
if (swapped == YES)
{
u = ((u & 0xff00 >> 8) + ((u & 0x00ff) << 8));
}
if (dpos >= bsize)
{
GROW();
@ -1855,6 +1912,10 @@ bases:
{
unichar u = src[spos++];
if (swapped == YES)
{
u = ((u & 0xff00 >> 8) + ((u & 0x00ff) << 8));
}
if (dpos >= bsize)
{
GROW();
@ -1865,7 +1926,8 @@ bases:
}
else
{
return NO;
result = NO;
break;
}
}
}
@ -1904,6 +1966,11 @@ tables:
{
unichar u = src[spos++];
if (swapped == YES)
{
u = ((u & 0xff00 >> 8) + ((u & 0x00ff) << 8));
}
if (dpos >= bsize)
{
GROW();
@ -1941,6 +2008,11 @@ tables:
{
unichar u = src[spos++];
if (swapped == YES)
{
u = ((u & 0xff00 >> 8) + ((u & 0x00ff) << 8));
}
if (dpos >= bsize)
{
GROW();
@ -1958,7 +2030,9 @@ tables:
{
if (++i >= tsize)
{
return NO;
result = NO;
spos = slen;
break;
}
}
ptr[dpos++] = table[--i].to;
@ -1971,8 +2045,13 @@ tables:
while (spos < slen)
{
unichar u = src[spos++];
int res;
int i = 0;
int res;
int i = 0;
if (swapped == YES)
{
u = ((u & 0xff00 >> 8) + ((u & 0x00ff) << 8));
}
if (dpos >= bsize)
{
@ -1994,7 +2073,8 @@ tables:
{
if (strict == YES)
{
return NO;
result = NO;
break;
}
for (i = 0; i < GSM0338_esize; i++)
{
@ -2026,14 +2106,15 @@ tables:
char *outbuf;
size_t inbytesleft;
size_t outbytesleft;
size_t result;
size_t rval;
cd = iconv_open(iconv_stringforencoding(enc), UNICODE_ENC);
if (cd == (iconv_t)-1)
{
NSLog(@"No iconv for encoding %@ tried to use %s",
GetEncodingName(enc), iconv_stringforencoding(enc));
return NO;
result = NO;
break;
}
inbuf = (char*)src;
@ -2050,14 +2131,15 @@ tables:
outbuf = (char*)&ptr[dpos];
outbytesleft = (bsize - old);
}
result = iconv(cd, &inbuf, &inbytesleft, &outbuf, &outbytesleft);
if (result == (size_t)-1 && errno != E2BIG)
rval = iconv(cd, &inbuf, &inbytesleft, &outbuf, &outbytesleft);
if (rval == (size_t)-1 && errno != E2BIG)
{
if (errno == EILSEQ)
{
if (strict == YES)
{
return NO;
result = NO;
break;
}
/*
* If we are allowing lossy conversion, we replace any
@ -2073,7 +2155,8 @@ tables:
}
else if (errno != E2BIG)
{
return NO;
result = NO;
break;
}
}
dpos = bsize - outbytesleft;
@ -2082,7 +2165,8 @@ tables:
iconv_close(cd);
}
#else
return NO;
result = NO;
break;
#endif
}
@ -2094,7 +2178,7 @@ tables:
ptr[dpos] = (unsigned char)0;
}
*size = dpos;
if (dst != 0)
if (dst != 0 && (result == YES || (options & GSUniShortOk)))
{
if (options & GSUniTemporary)
{
@ -2135,14 +2219,14 @@ tables:
{
ptr = NSZoneRealloc(zone, ptr, bytes);
}
if (ptr == 0)
{
return NO;
}
}
*dst = ptr;
}
return YES;
else if (ptr != buf && ptr != *dst)
{
NSZoneFree(zone, ptr);
}
return result;
}
#undef GROW

View file

@ -11,7 +11,7 @@
void
print_string(NSString* s)
{
printf("The string [%s], length %d\n", [s cString], [s length]);
printf("The string [%s], length %d\n", [s lossyCString], [s length]);
}
#include <Foundation/NSString.h>
@ -24,21 +24,31 @@ int main()
id s = @"This is a test string";
id s2, s3;
int a;
unichar uc[6] = { '1', '2', '.', '3', '4', 0};
unichar u0[5] = { 0xFE66, 'a', 'b', 'c', 'd'};
unichar u1[6] = { '1', '2', '.', '3', '4', 0xFE66};
unichar u2[7] = { 'a', 'b', 0xFE66, 'a', 'b', 'c', 'd'};
NSString *us0 = [NSString stringWithCharacters: u0 length: 5];
NSString *us1 = [NSString stringWithCharacters: u1 length: 6];
NSString *us2 = [NSString stringWithCharacters: u2 length: 7];
NSMutableString *fo = [NSMutableString stringWithString: @"abcdef"];
NSMutableString *f1 = [NSMutableString stringWithString: @"ab"];
NSMutableString *fo = [NSMutableString stringWithString: @"abcdefg"];
NS_DURING
[fo replaceCharactersInRange: [fo rangeOfString: @"xx"] withString: @"aa"];
[fo replaceCharactersInRange: [fo rangeOfString: @"xx"] withString: us1];
NS_HANDLER
printf("Caught exception during string replacement (expected)\n");
NS_ENDHANDLER
[f1 appendString: us0];
print_string(f1);
printf("%d\n", [f1 isEqual: us2]);
print_string(s);
s2 = NSStringFromPoint(NSMakePoint(1.374, 5.100));
print_string(s2);
printf("%f", [[NSString stringWithCharacters: uc length: 5] floatValue]);
printf("%f", [[NSString stringWithCharacters: u1 length: 5] floatValue]);
s2 = [s copy];
print_string(s2);
@ -69,6 +79,23 @@ int main()
NSLog(@"A string with precision %d is :%.*@:", a, a, @"String");
#endif
{
NSMutableString *base = [@"hello" mutableCopy];
NSString *ext = [@"\"\\UFE66???\"" propertyList];
NSString *want = [@"\"hello\\UFE66???\"" propertyList];
int i;
[base appendString: ext];
printf("%u\n", [base length]);
printf("%u\n", [ext length]);
printf("%u\n", [want length]);
for (i = 0; i < 4; i++)
printf("%x\n", [ext characterAtIndex: i]);
for (i = 0; i < 9; i++)
printf("%x,%x\n", [base characterAtIndex: i], [want characterAtIndex: i]);
printf("%u\n", [want isEqual: base]);
}
[arp release];
exit(0);
}