Low level character encoding rewrite.

git-svn-id: svn+ssh://svn.gna.org/svn/gnustep/libs/base/trunk@13133 72102866-910b-0410-8b05-ffd578937521
This commit is contained in:
CaS 2002-03-16 09:54:50 +00:00
parent 7b2cb59cfa
commit 7f44507081
7 changed files with 819 additions and 582 deletions

View file

@ -1,3 +1,25 @@
2002-03-16 Richard Frith-Macdonald <rfm@gnu.org>
* Headers/gnustep/base/Unicode.h: Add more options for character
encoding conversion routines.
* Source/GSPrivate.h: Add a couple of private functions for character
encoding management.
* Source/GSString.m: Convert throughoput to use new functions for
converting from one string encoding to another. Make changes to
handle setting of default C string encoding to be an encoding which
is incompatible with internal e-bit string objects.
* Source/NSString.m: ditto
* Source/Unicode.m: new string encoding conversion functions extended
with a few new options. Also fixed some memory allocation bugs to
cure memory leaks.
Rewritten low level support for different character encodings ...
should provide more efficient and maintainable conversion between
encodings and permit use of wide character encodings and encodings
with multibyte sequences as the default C string encoding.
Testing ... minimal ... we could do with decent tests for this stuff.
So this version must be viewed as possibly very unstable!
2002-03-14 Adam Fedor <fedor@gnu.org> 2002-03-14 Adam Fedor <fedor@gnu.org>
* Version: 1.3.0 * Version: 1.3.0

View file

@ -63,6 +63,8 @@ GS_EXPORT unichar *uni_is_decomp(unichar u);
#define GSUniTerminate 0x01 #define GSUniTerminate 0x01
#define GSUniTemporary 0x02 #define GSUniTemporary 0x02
#define GSUniStrict 0x04 #define GSUniStrict 0x04
#define GSUniBOM 0x08
#define GSUniShortOk 0x10
GS_EXPORT BOOL GSFromUnicode(unsigned char **dst, unsigned int *size, GS_EXPORT BOOL GSFromUnicode(unsigned char **dst, unsigned int *size,
const unichar *src, unsigned int slen, NSStringEncoding enc, NSZone *zone, const unichar *src, unsigned int slen, NSStringEncoding enc, NSZone *zone,

View file

@ -24,6 +24,17 @@
#define __GSPrivate_h_ #define __GSPrivate_h_
/*
* Function to get the name of a string encoding as an NSString.
*/
GS_EXPORT NSString *GSEncodingName(NSStringEncoding encoding);
/*
* Function to determine whether data in a particular encoding can
* generally be represented as 8-bit characters including ascii.
*/
GS_EXPORT BOOL GSIsByteEncoding(NSStringEncoding encoding);
/* /*
* Private concrete string classes. * Private concrete string classes.
* NB. All these concrete string classes MUST have the same initial ivar * NB. All these concrete string classes MUST have the same initial ivar
@ -35,8 +46,8 @@
@interface GSString : NSString @interface GSString : NSString
{ {
union { union {
unichar *u; unichar *u; // 16-bit unicode characters.
unsigned char *c; unsigned char *c; // 8-bit characters.
} _contents; } _contents;
unsigned int _count; unsigned int _count;
struct { struct {

View file

@ -224,6 +224,7 @@ static SEL hashSel;
static unsigned (*hashImp)(id, SEL); static unsigned (*hashImp)(id, SEL);
static NSStringEncoding defEnc = 0; static NSStringEncoding defEnc = 0;
static NSStringEncoding intEnc = NSISOLatin1StringEncoding;
/* /*
* The setup() function is called when any concrete string class is * The setup() function is called when any concrete string class is
@ -277,9 +278,14 @@ setup()
ranSel = @selector(rangeOfComposedCharacterSequenceAtIndex:); ranSel = @selector(rangeOfComposedCharacterSequenceAtIndex:);
/* /*
* Cache the default string encoding. * Cache the default string encoding, and set the internal encoding
* used by 8-bit character strings to match if possible.
*/ */
defEnc = GetDefEncoding(); defEnc = GetDefEncoding();
if (GSIsByteEncoding(defEnc) == YES)
{
intEnc = defEnc;
}
} }
} }
@ -521,12 +527,12 @@ boolValue_u(ivars self)
} }
else else
{ {
unsigned len = self->_count < 10 ? self->_count : 9; unsigned int l = self->_count < 10 ? self->_count : 9;
char buf[len+1]; unsigned char buf[l+1];
unsigned char *b = buf;
len = encode_ustrtocstr(buf, len, self->_contents.u, len, defEnc, NO); GSFromUnicode(&b, &l, self->_contents.u, l, intEnc, 0, GSUniTerminate);
buf[len] = '\0'; if (l == 3
if (len == 3
&& (buf[0] == 'Y' || buf[0] == 'y') && (buf[0] == 'Y' || buf[0] == 'y')
&& (buf[1] == 'E' || buf[1] == 'e') && (buf[1] == 'E' || buf[1] == 'e')
&& (buf[2] == 'S' || buf[2] == 's')) && (buf[2] == 'S' || buf[2] == 's'))
@ -543,8 +549,10 @@ boolValue_u(ivars self)
static inline BOOL static inline BOOL
canBeConvertedToEncoding_c(ivars self, NSStringEncoding enc) canBeConvertedToEncoding_c(ivars self, NSStringEncoding enc)
{ {
if (enc == defEnc) if (enc == intEnc)
{
return YES; return YES;
}
else else
{ {
BOOL result = (*convertImp)((id)self, convertSel, enc); BOOL result = (*convertImp)((id)self, convertSel, enc);
@ -571,7 +579,7 @@ characterAtIndex_c(ivars self, unsigned index)
c = self->_contents.c[index]; c = self->_contents.c[index];
if (c > 127) if (c > 127)
{ {
c = encode_chartouni(c, defEnc); c = encode_chartouni(c, intEnc);
} }
return c; return c;
} }
@ -631,13 +639,48 @@ compare_u(ivars self, NSString *aString, unsigned mask, NSRange aRange)
static inline char* static inline char*
cString_c(ivars self) cString_c(ivars self)
{ {
char *r = (char*)_fastMallocBuffer(self->_count+1); char *r;
if (self->_count == 0)
{
return "";
}
if (defEnc == intEnc)
{
r = (char*)_fastMallocBuffer(self->_count+1);
if (self->_count > 0) if (self->_count > 0)
{ {
memcpy(r, self->_contents.c, self->_count); memcpy(r, self->_contents.c, self->_count);
} }
r[self->_count] = '\0'; r[self->_count] = '\0';
}
else
{
unichar *u = 0;
unsigned l = 0;
unsigned s = 0;
/*
* The external C string encoding is not compatible with the internal
* C strings ... we must convert from internal format to unicode and
* then to the external C string encoding.
*/
if (GSToUnicode(&u, &l, self->_contents.c, self->_count, intEnc,
NSDefaultMallocZone(), 0) == NO)
{
[NSException raise: NSCharacterConversionException
format: @"Can't convert to/from Unicode string."];
}
if (GSFromUnicode((unsigned char**)&r, &s, u, l, defEnc,
NSDefaultMallocZone(), GSUniTerminate|GSUniTemporary|GSUniStrict) == NO)
{
NSZoneFree(NSDefaultMallocZone(), u);
[NSException raise: NSCharacterConversionException
format: @"Can't convert to/from Unicode string."];
}
NSZoneFree(NSDefaultMallocZone(), u);
}
return r; return r;
} }
@ -645,58 +688,90 @@ cString_c(ivars self)
static inline char* static inline char*
cString_u(ivars self) cString_u(ivars self)
{ {
int l = self->_count; unsigned c = self->_count;
char *r = (char*)_fastMallocBuffer(l*2 + 1);
unsigned limit = 0;
if (l > 0) if (c == 0)
{ {
limit = encode_ustrtocstr(r, l, self->_contents.u, l, defEnc, YES); return "";
if (limit == 0) }
else
{
unsigned int l = 0;
unsigned char *r = 0;
if (GSFromUnicode(&r, &l, self->_contents.u, c, defEnc,
NSDefaultMallocZone(), GSUniTerminate|GSUniTemporary|GSUniStrict) == NO)
{ {
[NSException raise: NSCharacterConversionException [NSException raise: NSCharacterConversionException
format: @"Can't get cString from Unicode string."]; format: @"Can't get cString from Unicode string."];
} }
}
r[limit] = '\0';
return r; return r;
} }
}
static inline unsigned int static inline unsigned int
cStringLength_c(ivars self) cStringLength_c(ivars self)
{
if (defEnc == intEnc)
{ {
return self->_count; return self->_count;
} }
else
{
/*
* The external C string encoding is not compatible with the internal
* C strings ... we must convert from internal format to unicode and
* then to the external C string encoding.
*/
if (self->_count == 0)
{
return 0;
}
else
{
unichar *u = 0;
unsigned l = 0;
unsigned s = 0;
if (GSToUnicode(&u, &l, self->_contents.c, self->_count, intEnc,
NSDefaultMallocZone(), 0) == NO)
{
[NSException raise: NSCharacterConversionException
format: @"Can't convert to/from Unicode string."];
}
if (GSFromUnicode(0, &s, u, l, defEnc, 0, GSUniStrict) == NO)
{
NSZoneFree(NSDefaultMallocZone(), u);
[NSException raise: NSCharacterConversionException
format: @"Can't get cStringLength from string."];
}
NSZoneFree(NSDefaultMallocZone(), u);
return s;
}
}
}
static inline unsigned int static inline unsigned int
cStringLength_u(ivars self) cStringLength_u(ivars self)
{ {
unsigned c; unsigned c = self->_count;
unsigned l = self->_count;
unsigned limit = 0;
if (l > 0) if (c == 0)
{ {
char *r; return 0;
r = (char*)NSZoneMalloc(NSDefaultMallocZone(), l*2 + 1);
limit = encode_ustrtocstr(r, l, self->_contents.u, l, defEnc, NO);
if (limit == 0)
{
NSZoneFree(NSDefaultMallocZone(), r);
[NSException raise: NSCharacterConversionException
format: @"Can't get cStringLength from Unicode string."];
}
r[limit] = '\0';
c = strlen(r);
NSZoneFree(NSDefaultMallocZone(), r);
} }
else else
{ {
c = 0; unsigned l = 0;
if (GSFromUnicode(0, &l, self->_contents.u, c, defEnc, 0, GSUniStrict)
== NO)
{
[NSException raise: NSCharacterConversionException
format: @"Can't get cStringLength from Unicode string."];
}
return l;
} }
return c;
} }
static inline NSData* static inline NSData*
@ -709,8 +784,8 @@ dataUsingEncoding_c(ivars self, NSStringEncoding encoding, BOOL flag)
return [NSDataClass data]; return [NSDataClass data];
} }
if ((encoding == defEnc) if ((encoding == intEnc)
|| ((defEnc == NSASCIIStringEncoding) || ((intEnc == NSASCIIStringEncoding)
&& ((encoding == NSISOLatin1StringEncoding) && ((encoding == NSISOLatin1StringEncoding)
|| (encoding == NSISOLatin2StringEncoding) || (encoding == NSISOLatin2StringEncoding)
|| (encoding == NSNEXTSTEPStringEncoding) || (encoding == NSNEXTSTEPStringEncoding)
@ -724,51 +799,43 @@ dataUsingEncoding_c(ivars self, NSStringEncoding encoding, BOOL flag)
} }
else if (encoding == NSUnicodeStringEncoding) else if (encoding == NSUnicodeStringEncoding)
{ {
int t; unsigned int l = 0;
unichar *buff; unichar *r = 0;
unsigned int options = GSUniBOM;
buff = (unichar*)NSZoneMalloc(NSDefaultMallocZone(), if (flag == NO)
sizeof(unichar)*(len+1));
buff[0] = 0xFEFF;
t = encode_cstrtoustr(buff+1, len, self->_contents.c, len, defEnc);
return [NSDataClass dataWithBytesNoCopy: buff
length: sizeof(unichar)*(t+1)];
}
else
{ {
int t; options |= GSUniStrict;
int bsiz; }
unichar *ubuff;
unsigned char *buff;
ubuff = (unichar*)NSZoneMalloc(NSDefaultMallocZone(), if (GSToUnicode(&r, &l, self->_contents.c, self->_count, intEnc,
sizeof(unichar)*len); NSDefaultMallocZone(), options) == NO)
t = encode_cstrtoustr(ubuff, len, self->_contents.c, len, defEnc);
if (encoding == NSUTF8StringEncoding)
{ {
bsiz = t*4;
}
else
{
bsiz = t;
}
buff = (unsigned char*)NSZoneMalloc(NSDefaultMallocZone(), bsiz);
flag = (flag == YES) ? NO : YES;
t = encode_ustrtocstr(buff, bsiz, ubuff, t, encoding, flag);
NSZoneFree(NSDefaultMallocZone(), ubuff);
if (t == 0)
{
NSZoneFree(NSDefaultMallocZone(), buff);
return nil; return nil;
} }
return [NSDataClass dataWithBytesNoCopy: r length: l];
}
else else
{ {
if (t != bsiz) unichar *u = 0;
unsigned l = 0;
unsigned char *r = 0;
unsigned s = 0;
if (GSToUnicode(&u, &l, self->_contents.c, self->_count, intEnc,
NSDefaultMallocZone(), 0) == NO)
{ {
buff = NSZoneRealloc(NSDefaultMallocZone(), buff, t); [NSException raise: NSCharacterConversionException
format: @"Can't convert to Unicode string."];
} }
return [NSDataClass dataWithBytesNoCopy: buff length: t]; if (GSFromUnicode(&r, &s, u, l, encoding, NSDefaultMallocZone(),
(flag == NO) ? GSUniStrict : 0) == NO)
{
NSZoneFree(NSDefaultMallocZone(), u);
return nil;
} }
NSZoneFree(NSDefaultMallocZone(), u);
return [NSDataClass dataWithBytesNoCopy: r length: s];
} }
} }
@ -795,34 +862,15 @@ dataUsingEncoding_u(ivars self, NSStringEncoding encoding, BOOL flag)
} }
else else
{ {
int t; unsigned char *r = 0;
int bsiz; unsigned int l = 0;
unsigned char *buff;
if (encoding == NSUTF8StringEncoding) if (GSFromUnicode(&r, &l, self->_contents.u, self->_count, encoding,
NSDefaultMallocZone(), (flag == NO) ? GSUniStrict : 0) == NO)
{ {
bsiz = len*4;
}
else
{
bsiz = len;
}
buff = (unsigned char*)NSZoneMalloc(NSDefaultMallocZone(), bsiz);
flag = (flag == YES) ? NO : YES;
t = encode_ustrtocstr(buff, bsiz, self->_contents.u, len, encoding, flag);
if (t == 0)
{
NSZoneFree(NSDefaultMallocZone(), buff);
return nil; return nil;
} }
else return [NSDataClass dataWithBytesNoCopy: r length: l];
{
if (t != bsiz)
{
buff = NSZoneRealloc(NSDefaultMallocZone(), buff, t);
}
return [NSDataClass dataWithBytesNoCopy: buff length: t];
}
} }
} }
@ -853,11 +901,11 @@ doubleValue_u(ivars self)
} }
else else
{ {
unsigned len = self->_count < 32 ? self->_count : 31; unsigned int l = self->_count < 10 ? self->_count : 9;
char buf[len+1]; unsigned char buf[l+1];
unsigned char *b = buf;
len = encode_ustrtocstr(buf, len, self->_contents.u, len, defEnc, NO); GSFromUnicode(&b, &l, self->_contents.u, l, intEnc, 0, GSUniTerminate);
buf[len] = '\0';
return atof(buf); return atof(buf);
} }
} }
@ -907,8 +955,10 @@ fillHole(ivars self, unsigned index, unsigned size)
static inline void static inline void
getCharacters_c(ivars self, unichar *buffer, NSRange aRange) getCharacters_c(ivars self, unichar *buffer, NSRange aRange)
{ {
encode_cstrtoustr(buffer, aRange.length, self->_contents.c + aRange.location, unsigned len = aRange.length;
aRange.length, defEnc);
GSToUnicode(&buffer, &len, self->_contents.c + aRange.location,
aRange.length, intEnc, 0, 0);
} }
static inline void static inline void
@ -955,8 +1005,7 @@ static inline void
getCString_u(ivars self, char *buffer, unsigned int maxLength, getCString_u(ivars self, char *buffer, unsigned int maxLength,
NSRange aRange, NSRange *leftoverRange) NSRange aRange, NSRange *leftoverRange)
{ {
int len; unsigned int len;
int result;
if (maxLength > self->_count) if (maxLength > self->_count)
{ {
@ -981,9 +1030,8 @@ getCString_u(ivars self, char *buffer, unsigned int maxLength,
} }
} }
result = encode_ustrtocstr(buffer, len, &self->_contents.u[aRange.location], if (GSFromUnicode((unsigned char **)&buffer, &len, self->_contents.u, len,
len, defEnc, YES); defEnc, 0, GSUniTerminate | GSUniStrict) == NO)
if (result != len)
{ {
[NSException raise: NSCharacterConversionException [NSException raise: NSCharacterConversionException
format: @"Can't get cString from Unicode string."]; format: @"Can't get cString from Unicode string."];
@ -1018,11 +1066,11 @@ intValue_u(ivars self)
} }
else else
{ {
unsigned len = self->_count < 32 ? self->_count : 31; unsigned int l = self->_count < 10 ? self->_count : 9;
char buf[len+1]; unsigned char buf[l+1];
unsigned char *b = buf;
len = encode_ustrtocstr(buf, len, self->_contents.u, len, defEnc, NO); GSFromUnicode(&b, &l, self->_contents.u, l, intEnc, 0, GSUniTerminate);
buf[len] = '\0';
return atol(buf); return atol(buf);
} }
} }
@ -1177,8 +1225,7 @@ lossyCString_u(ivars self)
unsigned l = self->_count; unsigned l = self->_count;
unsigned char *r = (unsigned char*)_fastMallocBuffer(l + 1); unsigned char *r = (unsigned char*)_fastMallocBuffer(l + 1);
encode_ustrtocstr(r, l, self->_contents.u, l, defEnc, NO); GSFromUnicode(&r, &l, self->_contents.u, l, intEnc, 0, GSUniTerminate);
r[l] = '\0';
return (const char*)r; return (const char*)r;
} }
@ -1357,7 +1404,7 @@ rangeOfCharacter_c(ivars self, NSCharacterSet *aSet, unsigned mask,
if (letter > 127) if (letter > 127)
{ {
letter = encode_chartouni(letter, defEnc); letter = encode_chartouni(letter, intEnc);
} }
if ((*mImp)(aSet, cMemberSel, letter)) if ((*mImp)(aSet, cMemberSel, letter))
{ {
@ -1534,7 +1581,7 @@ transmute(ivars self, NSString *aString)
*/ */
transmute = NO; transmute = NO;
} }
else if ([aString canBeConvertedToEncoding: defEnc] == YES) else if ([aString canBeConvertedToEncoding: intEnc] == YES)
{ {
/* /*
* This is a C string, but the other string can be converted to * This is a C string, but the other string can be converted to
@ -1567,11 +1614,11 @@ transmute(ivars self, NSString *aString)
if (transmute == YES) if (transmute == YES)
{ {
unichar *tmp; unichar *tmp = 0;
int len = self->_count; int len = 0;
tmp = NSZoneMalloc(self->_zone, self->_capacity * sizeof(unichar)); GSToUnicode(&tmp, &len, self->_contents.c, self->_count, intEnc,
len = encode_cstrtoustr(tmp, len, self->_contents.c, len, defEnc); self->_zone, 0);
if (self->_flags.free == 1) if (self->_flags.free == 1)
{ {
NSZoneFree(self->_zone, self->_contents.c); NSZoneFree(self->_zone, self->_contents.c);
@ -1761,7 +1808,7 @@ transmute(ivars self, NSString *aString)
[aCoder encodeValueOfObjCType: @encode(unsigned) at: &_count]; [aCoder encodeValueOfObjCType: @encode(unsigned) at: &_count];
if (_count > 0) if (_count > 0)
{ {
[aCoder encodeValueOfObjCType: @encode(NSStringEncoding) at: &defEnc]; [aCoder encodeValueOfObjCType: @encode(NSStringEncoding) at: &intEnc];
[aCoder encodeArrayOfObjCType: @encode(unsigned char) [aCoder encodeArrayOfObjCType: @encode(unsigned char)
count: _count count: _count
at: _contents.c]; at: _contents.c];
@ -1770,7 +1817,7 @@ transmute(ivars self, NSString *aString)
- (NSStringEncoding) fastestEncoding - (NSStringEncoding) fastestEncoding
{ {
return defEnc; return intEnc;
} }
- (float) floatValue - (float) floatValue
@ -1885,7 +1932,7 @@ transmute(ivars self, NSString *aString)
- (NSStringEncoding) smallestEncoding - (NSStringEncoding) smallestEncoding
{ {
return defEnc; return intEnc;
} }
- (NSString*) substringFromRange: (NSRange)aRange - (NSString*) substringFromRange: (NSRange)aRange
@ -2487,7 +2534,7 @@ transmute(ivars self, NSString *aString)
} }
else else
{ {
[aCoder encodeValueOfObjCType: @encode(NSStringEncoding) at: &defEnc]; [aCoder encodeValueOfObjCType: @encode(NSStringEncoding) at: &intEnc];
[aCoder encodeArrayOfObjCType: @encode(unsigned char) [aCoder encodeArrayOfObjCType: @encode(unsigned char)
count: _count count: _count
at: _contents.c]; at: _contents.c];
@ -2500,7 +2547,7 @@ transmute(ivars self, NSString *aString)
if (_flags.wide == 1) if (_flags.wide == 1)
return NSUnicodeStringEncoding; return NSUnicodeStringEncoding;
else else
return defEnc; return intEnc;
} }
- (float) floatValue - (float) floatValue
@ -2817,7 +2864,7 @@ transmute(ivars self, NSString *aString)
maxLength: l]; maxLength: l];
} }
_contents.c[aRange.location + l] _contents.c[aRange.location + l]
= encode_unitochar([aString characterAtIndex: l], defEnc); = encode_unitochar([aString characterAtIndex: l], intEnc);
} }
else else
{ {
@ -2880,7 +2927,7 @@ transmute(ivars self, NSString *aString)
[aString getCString: _contents.c maxLength: l]; [aString getCString: _contents.c maxLength: l];
} }
_contents.c[l] _contents.c[l]
= encode_unitochar([aString characterAtIndex: l], defEnc); = encode_unitochar([aString characterAtIndex: l], intEnc);
} }
else else
{ {
@ -2896,7 +2943,7 @@ transmute(ivars self, NSString *aString)
return NSUnicodeStringEncoding; return NSUnicodeStringEncoding;
} }
else else
return defEnc; return intEnc;
} }
- (NSString*) substringFromRange: (NSRange)aRange - (NSString*) substringFromRange: (NSRange)aRange
@ -3209,7 +3256,7 @@ transmute(ivars self, NSString *aString)
if (((ivars)_parent)->_flags.wide == 1) if (((ivars)_parent)->_flags.wide == 1)
return NSUnicodeStringEncoding; return NSUnicodeStringEncoding;
else else
return defEnc; return intEnc;
} }
- (void) getCharacters: (unichar*)buffer - (void) getCharacters: (unichar*)buffer
@ -3313,7 +3360,7 @@ transmute(ivars self, NSString *aString)
return NSUnicodeStringEncoding; return NSUnicodeStringEncoding;
} }
else else
return defEnc; return intEnc;
} }
@end @end
@ -3436,7 +3483,7 @@ transmute(ivars self, NSString *aString)
if (c > 127) if (c > 127)
{ {
c = encode_chartouni(c, defEnc); c = encode_chartouni(c, intEnc);
} }
ret = (ret << 5) + ret + c; ret = (ret << 5) + ret + c;
} }

View file

@ -259,6 +259,7 @@ surrogatePairValue(unichar high, unichar low)
@implementation NSString @implementation NSString
static NSStringEncoding _DefaultStringEncoding; static NSStringEncoding _DefaultStringEncoding;
static BOOL _ByteEncodingOk;
static const unichar byteOrderMark = 0xFEFF; static const unichar byteOrderMark = 0xFEFF;
static const unichar byteOrderMarkSwapped = 0xFFFE; static const unichar byteOrderMarkSwapped = 0xFFFE;
@ -344,6 +345,8 @@ handle_printf_atsign (FILE *stream,
ranSel = @selector(rangeOfComposedCharacterSequenceAtIndex:); ranSel = @selector(rangeOfComposedCharacterSequenceAtIndex:);
_DefaultStringEncoding = GetDefEncoding(); _DefaultStringEncoding = GetDefEncoding();
_ByteEncodingOk = GSIsByteEncoding(_DefaultStringEncoding);
NSStringClass = self; NSStringClass = self;
[self setVersion: 1]; [self setVersion: 1];
NSMutableStringClass = [NSMutableString class]; NSMutableStringClass = [NSMutableString class];
@ -606,16 +609,22 @@ handle_printf_atsign (FILE *stream,
length: (unsigned int)length length: (unsigned int)length
freeWhenDone: (BOOL)flag freeWhenDone: (BOOL)flag
{ {
unichar *buf; unichar *buf = 0;
unsigned int l = 0;
buf = (unichar*)NSZoneMalloc(GSObjCZone(self), sizeof(unichar)*length); if (GSToUnicode(&buf, &l, byteString, length, _DefaultStringEncoding,
length = encode_cstrtoustr(buf, length, byteString, length, [self zone], 0) == NO)
_DefaultStringEncoding); {
DESTROY(self);
}
else
{
if (flag == YES && byteString != 0) if (flag == YES && byteString != 0)
{ {
NSZoneFree(NSZoneFromPointer(byteString), byteString); NSZoneFree(NSZoneFromPointer(byteString), byteString);
} }
self = [self initWithCharactersNoCopy: buf length: length freeWhenDone: YES]; self = [self initWithCharactersNoCopy: buf length: l freeWhenDone: YES];
}
return self; return self;
} }
@ -682,34 +691,46 @@ handle_printf_atsign (FILE *stream,
if (length > 0) if (length > 0)
{ {
unsigned i; unsigned i = 0;
if (_ByteEncodingOk)
{
/* /*
* Check to see if we have in fact got an ascii string * If it's ok to store ascii strings as internal C strings,
* check to see if we have in fact got an ascii string.
*/ */
for (i = 0; i < length; i++) while (i < length)
{ {
if (((unsigned char*)bytes)[i] > 127) if (((unsigned char*)bytes)[i] > 127)
{ {
break; break;
} }
i++;
} }
}
if (i == length) if (i == length)
{ {
self = [self initWithCString: bytes length: length]; self = [self initWithCString: bytes length: length];
} }
else else
{ {
unichar *s; unichar *u = 0;
unsigned int l = 0;
s = NSZoneMalloc(GSObjCZone(self), sizeof(unichar)*length); if (GSToUnicode(&u, &l, bytes, length, NSUTF8StringEncoding,
length = encode_cstrtoustr(s, length, bytes, length, GSObjCZone(self), 0) == NO)
NSUTF8StringEncoding); {
self = [self initWithCharactersNoCopy: s DESTROY(self);
length: length }
else
{
self = [self initWithCharactersNoCopy: u
length: l
freeWhenDone: YES]; freeWhenDone: YES];
} }
} }
}
else else
{ {
self = [self initWithCharactersNoCopy: (unichar*)"" self = [self initWithCharactersNoCopy: (unichar*)""
@ -1071,98 +1092,141 @@ handle_printf_atsign (FILE *stream,
if (len == 0) if (len == 0)
{ {
self = [self initWithCStringNoCopy: "" length: 0 freeWhenDone: NO]; self = [self initWithCharactersNoCopy: (unichar*)""
length: 0
freeWhenDone: NO];
} }
else if (encoding == NSASCIIStringEncoding else if (_ByteEncodingOk == YES
|| encoding == _DefaultStringEncoding) && (encoding==_DefaultStringEncoding || encoding==NSASCIIStringEncoding))
{ {
char *s = NSZoneMalloc(GSObjCZone(self), len); char *s;
/*
* We can only create an internal C string if the default C string
* encoding is Ok, and the specified encoding matches it.
*/
s = NSZoneMalloc(GSObjCZone(self), len);
[data getBytes: s]; [data getBytes: s];
self = [self initWithCStringNoCopy: s length: len freeWhenDone: YES]; self = [self initWithCStringNoCopy: s length: len freeWhenDone: YES];
} }
else if (encoding == NSUTF8StringEncoding) else if (encoding == NSUTF8StringEncoding)
{ {
const char *bytes = [data bytes]; const char *bytes = [data bytes];
unsigned i; unsigned i = 0;
if (_ByteEncodingOk)
{
/* /*
* Check to see if we have in fact got an ascii string * If it's ok to store ascii strings as internal C strings,
* check to see if we have in fact got an ascii string.
*/ */
for (i = 0; i < len; i++) while (i < len)
{ {
if (((unsigned char*)bytes)[i] > 127) if (((unsigned char*)bytes)[i] > 127)
{ {
break; break;
} }
i++;
} }
}
if (i == len) if (i == len)
{ {
self = [self initWithCString: bytes length: len]; self = [self initWithCString: bytes length: len];
} }
else else
{ {
unichar *u; unichar *u = 0;
unsigned int l = 0;
u = NSZoneMalloc(GSObjCZone(self), sizeof(unichar)*len); if (GSToUnicode(&u, &l, bytes, len, NSUTF8StringEncoding,
len = encode_cstrtoustr(u, len, bytes, len, GSObjCZone(self), 0) == NO)
NSUTF8StringEncoding);
if (len > 0)
{
self = [self initWithCharactersNoCopy: u
length: len
freeWhenDone: YES];
}
else
{
DESTROY(self);
}
}
}
else
{
unichar *u;
unsigned count;
const unsigned char *b;
if (len < 1 || (len < 2 && encoding == NSUnicodeStringEncoding))
{
return [self initWithCStringNoCopy: "" length: 0 freeWhenDone: NO];
}
b = [data bytes];
u = NSZoneMalloc(GSObjCZone(self), sizeof(unichar)*(len+1));
if (encoding == NSUnicodeStringEncoding)
{
if ((b[0]==0xFE) & (b[1]==0xFF))
{
b = &b[2];
count -= 2;
}
for (count = 0; count < (len - 1); count += 2)
{
u[count/2 - 1] = 256*b[count + 1] + b[count];
}
count = count/2;
self = [self initWithCharactersNoCopy: u
length: count
freeWhenDone: YES];
}
else
{
count = encode_cstrtoustr(u, len, b, len, encoding);
if (count < 1)
{ {
DESTROY(self); DESTROY(self);
} }
else else
{ {
self = [self initWithCharactersNoCopy: u self = [self initWithCharactersNoCopy: u
length: count length: l
freeWhenDone: YES]; freeWhenDone: YES];
} }
} }
} }
else if (encoding == NSUnicodeStringEncoding)
{
if (len%2 != 0)
{
DESTROY(self); // Not valid unicode data.
}
else
{
BOOL swapped = NO;
unsigned char *b;
unichar *uptr;
b = (unsigned char*)[data bytes];
uptr = (unichar*)b;
if (*uptr == 0xFFFE)
{
b = (unsigned char*)++uptr;
len -= sizeof(unichar);
}
else if (*uptr == 0xFEFF)
{
b = (unsigned char*)++uptr;
len -= sizeof(unichar);
swapped = YES;
}
if (len == 0)
{
self = [self initWithCharactersNoCopy: (unichar*)""
length: 0
freeWhenDone: NO];
}
else
{
unsigned char *u;
u = (unsigned char*)NSZoneMalloc(GSObjCZone(self), len);
if (swapped == YES)
{
unsigned i;
for (i = 0; i < len; i += 2)
{
u[i] = b[i + 1];
u[i + 1] = b[i];
}
}
else
{
memcpy(u, b, len);
}
self = [self initWithCharactersNoCopy: (unichar*)u
length: len/2
freeWhenDone: YES];
}
}
}
else
{
unsigned char *b;
unichar *u = 0;
unsigned l = 0;
b = (unsigned char*)[data bytes];
if (GSToUnicode(&u, &l, b, len, NSUTF8StringEncoding, GSObjCZone(self),
0) == NO)
{
DESTROY(self);
}
else
{
self = [self initWithCharactersNoCopy: u
length: l
freeWhenDone: YES];
}
}
return self; return self;
} }
@ -2314,47 +2378,27 @@ handle_printf_atsign (FILE *stream,
buff = (unichar*)NSZoneMalloc(NSDefaultMallocZone(), buff = (unichar*)NSZoneMalloc(NSDefaultMallocZone(),
sizeof(unichar)*(len+1)); sizeof(unichar)*(len+1));
buff[0] = 0xFEFF; buff[0] = 0xFEFF;
for (count = 0; count < len; count++) [self getCharacters: &buff[1]];
{
buff[count+1] = (*caiImp)(self, caiSel, count);
}
return [NSDataClass dataWithBytesNoCopy: buff return [NSDataClass dataWithBytesNoCopy: buff
length: sizeof(unichar)*(len+1)]; length: sizeof(unichar)*(len+1)];
} }
else else
{ {
int t; unsigned char *b = 0;
int bsiz; int l = 0;
unichar *u; unichar *u;
unsigned char *buff;
u = (unichar*)NSZoneMalloc(NSDefaultMallocZone(), len*sizeof(unichar)); u = (unichar*)NSZoneMalloc(NSDefaultMallocZone(), len*sizeof(unichar));
[self getCharacters: u]; [self getCharacters: u];
if (encoding == NSUTF8StringEncoding) if (GSFromUnicode(&b, &l, u, len, encoding, NSDefaultMallocZone(),
(flag == NO) ? GSUniStrict : 0)
== NO)
{ {
bsiz = len * 4;
}
else
{
bsiz = len;
}
buff = (unsigned char*)NSZoneMalloc(NSDefaultMallocZone(), bsiz);
flag = (flag == YES) ? NO : YES;
t = encode_ustrtocstr(buff, bsiz, u, len, encoding, flag);
NSZoneFree(NSDefaultMallocZone(), u); NSZoneFree(NSDefaultMallocZone(), u);
if (t == 0)
{
NSZoneFree(NSDefaultMallocZone(), buff);
return nil; return nil;
} }
else NSZoneFree(NSDefaultMallocZone(), u);
{ return [NSDataClass dataWithBytesNoCopy: b length: l];
if (bsiz != t)
{
buff = NSZoneRealloc(NSDefaultMallocZone(), buff, t);
}
return [NSDataClass dataWithBytesNoCopy: buff length: t];
}
} }
return nil; return nil;
} }

View file

@ -28,6 +28,7 @@
#include <config.h> #include <config.h>
#include <Foundation/NSString.h> #include <Foundation/NSString.h>
#include <Foundation/NSLock.h>
#include <base/Unicode.h> #include <base/Unicode.h>
#include <stdio.h> #include <stdio.h>
#include <stdlib.h> #include <stdlib.h>
@ -50,8 +51,10 @@ typedef struct {unichar from; char to;} _ucc_;
#endif #endif
#include <errno.h> #include <errno.h>
// The rest of the GNUstep code stores UNICODE in internal byte order, /*
// so we do the same. This should be UCS-2-INTERNAL for libiconv * The whole of the GNUstep code stores UNICODE in internal byte order,
* so we do the same. This should be UCS-2-INTERNAL for libiconv
*/
#ifdef WORDS_BIGENDIAN #ifdef WORDS_BIGENDIAN
#define UNICODE_INT "UNICODEBIG" #define UNICODE_INT "UNICODEBIG"
#else #else
@ -62,236 +65,6 @@ typedef struct {unichar from; char to;} _ucc_;
static const char *unicode_enc = NULL; static const char *unicode_enc = NULL;
#endif
typedef unsigned char unc;
static NSStringEncoding defEnc = GSUndefinedEncoding;
#ifdef HAVE_ICONV
/*
* FIXME: We should check dynamically which encodings are found on this
* computer as different implementation of iconv will support different
* encodings.
*/
static NSStringEncoding _availableEncodings[] = {
NSASCIIStringEncoding,
NSNEXTSTEPStringEncoding,
NSJapaneseEUCStringEncoding,
NSUTF8StringEncoding,
NSISOLatin1StringEncoding,
// NSSymbolStringEncoding,
// NSNonLossyASCIIStringEncoding,
NSShiftJISStringEncoding,
NSISOLatin2StringEncoding,
NSUnicodeStringEncoding,
NSWindowsCP1251StringEncoding,
NSWindowsCP1252StringEncoding,
NSWindowsCP1253StringEncoding,
NSWindowsCP1254StringEncoding,
NSWindowsCP1250StringEncoding,
NSISO2022JPStringEncoding,
NSMacOSRomanStringEncoding,
// NSProprietaryStringEncoding,
// GNUstep additions
NSISOCyrillicStringEncoding,
NSKOI8RStringEncoding,
NSISOLatin3StringEncoding,
NSISOLatin4StringEncoding,
NSISOArabicStringEncoding,
NSISOGreekStringEncoding,
NSISOHebrewStringEncoding,
NSGB2312StringEncoding,
NSGSM0338StringEncoding,
NSBIG5StringEncoding,
0
};
#else
// Uncomment when implemented
static NSStringEncoding _availableEncodings[] = {
NSASCIIStringEncoding,
NSNEXTSTEPStringEncoding,
// NSJapaneseEUCStringEncoding,
// NSUTF8StringEncoding,
NSISOLatin1StringEncoding,
// NSSymbolStringEncoding,
// NSNonLossyASCIIStringEncoding,
// NSShiftJISStringEncoding,
NSISOLatin2StringEncoding,
NSUnicodeStringEncoding,
// NSWindowsCP1251StringEncoding,
// NSWindowsCP1252StringEncoding,
// NSWindowsCP1253StringEncoding,
// NSWindowsCP1254StringEncoding,
// NSWindowsCP1250StringEncoding,
// NSISO2022JPStringEncoding,
// NSMacOSRomanStringEncoding,
// NSProprietaryStringEncoding,
// GNUstep additions
NSISOCyrillicStringEncoding,
// NSKOI8RStringEncoding,
// NSISOLatin3StringEncoding,
// NSISOLatin4StringEncoding,
// NSISOArabicStringEncoding,
// NSISOGreekStringEncoding,
// NSISOHebrewStringEncoding,
// NSGB2312StringEncoding,
NSGSM0338StringEncoding,
NSBIG5StringEncoding,
0
};
#endif
struct _strenc_ {NSStringEncoding enc; char *ename;};
const struct _strenc_ str_encoding_table[]=
{
{NSASCIIStringEncoding,"NSASCIIStringEncoding"},
{NSNEXTSTEPStringEncoding,"NSNEXTSTEPStringEncoding"},
{NSJapaneseEUCStringEncoding, "NSJapaneseEUCStringEncoding"},
{NSUTF8StringEncoding,"NSUTF8StringEncoding"},
{NSISOLatin1StringEncoding,"NSISOLatin1StringEncoding"},
{NSSymbolStringEncoding,"NSSymbolStringEncoding"},
{NSNonLossyASCIIStringEncoding,"NSNonLossyASCIIStringEncoding"},
{NSShiftJISStringEncoding,"NSShiftJISStringEncoding"},
{NSISOLatin2StringEncoding,"NSISOLatin2StringEncoding"},
{NSUnicodeStringEncoding, "NSUnicodeStringEncoding"},
{NSWindowsCP1251StringEncoding,"NSWindowsCP1251StringEncoding"},
{NSWindowsCP1252StringEncoding,"NSWindowsCP1252StringEncoding"},
{NSWindowsCP1253StringEncoding,"NSWindowsCP1253StringEncoding"},
{NSWindowsCP1254StringEncoding,"NSWindowsCP1254StringEncoding"},
{NSWindowsCP1250StringEncoding,"NSWindowsCP1250StringEncoding"},
{NSISO2022JPStringEncoding,"NSISO2022JPStringEncoding "},
{NSMacOSRomanStringEncoding, "NSMacOSRomanStringEncoding"},
{NSProprietaryStringEncoding, "NSProprietaryStringEncoding"},
// GNUstep additions
{NSISOCyrillicStringEncoding,"NSISOCyrillicStringEncoding"},
{NSKOI8RStringEncoding, "NSKOI8RStringEncoding"},
{NSISOLatin3StringEncoding, "NSISOLatin3StringEncoding"},
{NSISOLatin4StringEncoding, "NSISOLatin4StringEncoding"},
{NSISOArabicStringEncoding, "NSISOArabicStringEncoding"},
{NSISOGreekStringEncoding, "NSISOGreekStringEncoding"},
{NSISOHebrewStringEncoding, "NSISOHebrewStringEncoding"},
{NSISOLatin5StringEncoding, "NSISOLatin5StringEncoding"},
{NSISOLatin6StringEncoding, "NSISOLatin6StringEncoding"},
{NSISOLatin7StringEncoding, "NSISOLatin7StringEncoding"},
{NSISOLatin8StringEncoding, "NSISOLatin8StringEncoding"},
{NSISOLatin9StringEncoding, "NSISOLatin9StringEncoding"},
{NSUTF7StringEncoding, "NSUTF7StringEncoding"},
{NSGB2312StringEncoding, "NSGB2312StringEncoding"},
{NSGSM0338StringEncoding, "NSGSM0338StringEncoding"},
{NSBIG5StringEncoding, "NSBIG5StringEncoding"},
{0, "Unknown encoding"}
};
NSStringEncoding *GetAvailableEncodings()
{
// FIXME: This should check which iconv definitions are available and
// add them to the availble encodings
return _availableEncodings;
}
NSStringEncoding
GetDefEncoding()
{
if (defEnc == GSUndefinedEncoding)
{
char *encoding;
unsigned int count;
NSStringEncoding tmp;
NSStringEncoding *availableEncodings;
availableEncodings = GetAvailableEncodings();
encoding = getenv("GNUSTEP_STRING_ENCODING");
if (encoding != 0)
{
count = 0;
while (str_encoding_table[count].enc
&& strcmp(str_encoding_table[count].ename,encoding))
{
count++;
}
if (str_encoding_table[count].enc)
{
defEnc = str_encoding_table[count].enc;
if ((defEnc == NSUnicodeStringEncoding)
|| (defEnc == NSUTF8StringEncoding)
|| (defEnc == NSSymbolStringEncoding))
{
fprintf(stderr, "WARNING: %s - encoding not supported as "
"default c string encoding.\n", encoding);
fprintf(stderr,
"NSISOLatin1StringEncoding set as default.\n");
defEnc = NSISOLatin1StringEncoding;
}
else /*encoding should be supported but is it implemented?*/
{
count = 0;
tmp = 0;
while (availableEncodings[count] != 0)
{
if (defEnc != availableEncodings[count])
{
tmp = 0;
}
else
{
tmp = defEnc;
break;
}
count++;
}
if (tmp == 0 && defEnc != NSISOLatin1StringEncoding)
{
fprintf(stderr,
"WARNING: %s - encoding not yet implemented.\n",
encoding);
fprintf(stderr,
"NSISOLatin1StringEncoding set as default.\n");
defEnc = NSISOLatin1StringEncoding;
}
}
}
else /* encoding not found */
{
fprintf(stderr,
"WARNING: %s - encoding not supported.\n", encoding);
fprintf(stderr, "NSISOLatin1StringEncoding set as default.\n");
defEnc = NSISOLatin1StringEncoding;
}
}
else /* environment var not found */
{
/* shouldn't be required. It really should be in UserDefaults - asf */
//fprintf(stderr, "WARNING: GNUSTEP_STRING_ENCODING environment");
//fprintf(stderr, " variable not found.\n");
//fprintf(stderr, "NSISOLatin1StringEncoding set as default.\n");
defEnc = NSISOLatin1StringEncoding;
}
}
return defEnc;
}
NSString*
GetEncodingName(NSStringEncoding encoding)
{
unsigned int count=0;
while (str_encoding_table[count].enc
&& (str_encoding_table[count].enc != encoding))
{
count++;
}
return [NSString stringWithCString: str_encoding_table[count].ename];
}
#ifdef HAVE_ICONV
/* Check to see what type of internal unicode format the library supports */ /* Check to see what type of internal unicode format the library supports */
static const char * static const char *
internal_unicode_enc() internal_unicode_enc()
@ -316,79 +89,302 @@ internal_unicode_enc()
return unicode_enc; return unicode_enc;
} }
#endif
typedef unsigned char unc;
static NSStringEncoding defEnc = GSUndefinedEncoding;
static NSStringEncoding *_availableEncodings = 0;
struct _strenc_ {
NSStringEncoding enc; // Constant representing the encoding.
const char *ename; // ASCII string representation of name.
const char *iconv; /* Iconv name of encoding. If this
* is nul, we cannot use iconv to
* perform conversions to/from this
* encoding.
*/
BOOL eightBit; /* Flag to say whether this encoding
* can be stored in a byte array ...
* ie whether the encoding consists
* entirely of single byte charcters
* and the first 128 are identical to
* the ASCII character set.
*/
BOOL supported; /* Is this supported? Some encodings
* have builtin conversion to/from
* unicode, but for others we must
* check with iconv to see if it
* supports them on this platform.
*/
};
/*
* The str_encoding_table is a compact representation of all the string
* encoding information we might need. It gets modified at runtime.
*/
static struct _strenc_ str_encoding_table[] = {
{NSASCIIStringEncoding,"NSASCIIStringEncoding","ASCII",1,1},
{NSNEXTSTEPStringEncoding,"NSNEXTSTEPStringEncoding","NEXTSTEP",1,1},
{NSJapaneseEUCStringEncoding, "NSJapaneseEUCStringEncoding","EUC-JP",0,0},
{NSUTF8StringEncoding,"NSUTF8StringEncoding","UTF-8",0,0},
{NSISOLatin1StringEncoding,"NSISOLatin1StringEncoding","ISO-8859-1",1,1},
{NSSymbolStringEncoding,"NSSymbolStringEncoding",0,0,0},
{NSNonLossyASCIIStringEncoding,"NSNonLossyASCIIStringEncoding",0,1,1},
{NSShiftJISStringEncoding,"NSShiftJISStringEncoding","SHIFT-JIS",0,0},
{NSISOLatin2StringEncoding,"NSISOLatin2StringEncoding","ISO-8859-2",1,1},
{NSUnicodeStringEncoding, "NSUnicodeStringEncoding",0,0,1},
{NSWindowsCP1251StringEncoding,"NSWindowsCP1251StringEncoding","CP1251",0,0},
{NSWindowsCP1252StringEncoding,"NSWindowsCP1252StringEncoding","CP1252",0,0},
{NSWindowsCP1253StringEncoding,"NSWindowsCP1253StringEncoding","CP1253",0,0},
{NSWindowsCP1254StringEncoding,"NSWindowsCP1254StringEncoding","CP1254",0,0},
{NSWindowsCP1250StringEncoding,"NSWindowsCP1250StringEncoding","CP1250",0,0},
{NSISO2022JPStringEncoding,"NSISO2022JPStringEncoding","ISO-2022-JP",0,0},
{NSMacOSRomanStringEncoding, "NSMacOSRomanStringEncoding","MACINTOSH",0,0},
{NSProprietaryStringEncoding, "NSProprietaryStringEncoding",0,0,0},
// GNUstep additions
{NSISOCyrillicStringEncoding,"NSISOCyrillicStringEncoding","ISO-8859-5",0,1},
{NSKOI8RStringEncoding, "NSKOI8RStringEncoding","KOI8-R",0,0},
{NSISOLatin3StringEncoding, "NSISOLatin3StringEncoding","ISO-8859-3",0,0},
{NSISOLatin4StringEncoding, "NSISOLatin4StringEncoding","ISO-8859-4",0,0},
{NSISOArabicStringEncoding, "NSISOArabicStringEncoding","ISO-8859-6",0,0},
{NSISOGreekStringEncoding, "NSISOGreekStringEncoding","ISO-8859-7",0,0},
{NSISOHebrewStringEncoding, "NSISOHebrewStringEncoding","ISO-8859-8",0,0},
{NSISOLatin5StringEncoding, "NSISOLatin5StringEncoding","ISO-8859-9",0,0},
{NSISOLatin6StringEncoding, "NSISOLatin6StringEncoding","ISO-8859-10",0,0},
{NSISOLatin7StringEncoding, "NSISOLatin7StringEncoding","ISO-8859-13",0,0},
{NSISOLatin8StringEncoding, "NSISOLatin8StringEncoding","ISO-8859-14",0,0},
{NSISOLatin9StringEncoding, "NSISOLatin9StringEncoding","ISO-8859-15",0,0},
{NSUTF7StringEncoding, "NSUTF7StringEncoding",0,0,0},
{NSGB2312StringEncoding, "NSGB2312StringEncoding","EUC-CN",0,0},
{NSGSM0338StringEncoding, "NSGSM0338StringEncoding",0,0,1},
{NSBIG5StringEncoding, "NSBIG5StringEncoding","BIG5",0,0},
{0,"Unknown encoding",0,0,0}
};
static struct _strenc_ **encodingTable = 0;
static unsigned encTableSize = 0;
NSStringEncoding *GetAvailableEncodings()
{
if (_availableEncodings == 0)
{
[gnustep_global_lock lock];
if (_availableEncodings == 0)
{
NSStringEncoding *encodings;
unsigned count;
unsigned pos;
unsigned i;
/*
* We want to store pointers to our string encoding info in a
* large table so we can do efficient lookup by encoding value.
*/
#define MAX_ENCODING 128
count = sizeof(str_encoding_table) / sizeof(struct _strenc_);
/*
* First determine the largest encoding value and create a
* large enough table of pointers.
*/
encTableSize = 0;
for (i = 0; i < count; i++)
{
unsigned tmp = str_encoding_table[i].enc;
if (tmp >= MAX_ENCODING)
{
fprintf(stderr, "ERROR ... illegal NSStringEncoding "
"value in str_encoding_table. Ignored\n");
}
else if (tmp > encTableSize)
{
encTableSize = tmp;
}
}
encodingTable = malloc((encTableSize+1)*sizeof(struct _strenc_ *));
memset(encodingTable, 0, (encTableSize+1)*sizeof(struct _strenc_ *));
/*
* Now set up the pointers at the correct location in the table.
*/
for (i = 0; i < count; i++)
{
unsigned tmp = str_encoding_table[i].enc;
if (tmp < MAX_ENCODING)
{
encodingTable[tmp] = &str_encoding_table[i];
}
}
/*
* Now build up a list of supported encodings ... in the
* format needed to support [NSStirng+availableStringEncodings]
* Check to see what iconv support we have as we go along.
* This is also the palce where we determine the name we use
* for iconv to support unicode.
*/
encodings = objc_malloc(sizeof(NSStringEncoding) * count);
pos = 0;
for (i = 0; i < count; i++)
{
NSStringEncoding enc = str_encoding_table[i].enc;
if (enc == 0 || enc >= MAX_ENCODING)
{
continue;
}
#ifdef HAVE_ICONV
if (enc == NSUnicodeStringEncoding)
{
encodingTable[enc]->iconv = UNICODE_ENC;
encodingTable[enc]->supported = 1;
}
if (encodingTable[enc]->supported == 0)
{
if (encodingTable[enc]->iconv == 0)
{
continue; // Not handled by iconv.
}
else
{
iconv_t c;
c = iconv_open(UNICODE_ENC, encodingTable[enc]->iconv);
if (c == (iconv_t)-1)
{
continue; // Can't convert to unicode
}
iconv_close(c);
c = iconv_open(encodingTable[enc]->iconv, UNICODE_ENC);
if (c == (iconv_t)-1)
{
continue; // Can't convert from unicode
}
iconv_close(c);
encodingTable[enc]->supported = 1;
}
}
#else
if (encodingTable[enc]->supported == 0)
{
continue;
}
#endif
encodings[pos++] = enc;
}
encodings[pos] = 0;
_availableEncodings = encodings;
}
[gnustep_global_lock unlock];
}
return _availableEncodings;
}
NSStringEncoding
GetDefEncoding()
{
if (defEnc == GSUndefinedEncoding)
{
char *encoding;
unsigned int count;
NSStringEncoding *availableEncodings;
[gnustep_global_lock lock];
if (defEnc != GSUndefinedEncoding)
{
[gnustep_global_lock unlock];
return defEnc;
}
availableEncodings = GetAvailableEncodings();
encoding = getenv("GNUSTEP_STRING_ENCODING");
if (encoding != 0)
{
count = 0;
while (str_encoding_table[count].enc
&& strcmp(str_encoding_table[count].ename, encoding))
{
count++;
}
if (str_encoding_table[count].enc)
{
defEnc = str_encoding_table[count].enc;
if (str_encoding_table[count].supported == 0)
{
fprintf(stderr, "WARNING: %s - encoding not implemented as "
"default c string encoding.\n", encoding);
fprintf(stderr,
"NSISOLatin1StringEncoding set as default.\n");
defEnc = NSISOLatin1StringEncoding;
}
}
else /* encoding not found */
{
fprintf(stderr,
"WARNING: %s - encoding not supported.\n", encoding);
fprintf(stderr, "NSISOLatin1StringEncoding set as default.\n");
defEnc = NSISOLatin1StringEncoding;
}
}
else /* environment var not found */
{
/* shouldn't be required. It really should be in UserDefaults - asf */
//fprintf(stderr, "WARNING: GNUSTEP_STRING_ENCODING environment");
//fprintf(stderr, " variable not found.\n");
//fprintf(stderr, "NSISOLatin1StringEncoding set as default.\n");
defEnc = NSISOLatin1StringEncoding;
}
[gnustep_global_lock unlock];
}
return defEnc;
}
BOOL
GSIsByteEncoding(NSStringEncoding encoding)
{
GetAvailableEncodings();
if (encoding == 0 || encoding >= encTableSize || encodingTable[encoding] == 0)
{
return NO;
}
return encodingTable[encoding]->eightBit;
}
NSString*
GSEncodingName(NSStringEncoding encoding)
{
GetAvailableEncodings();
if (encoding == 0 || encoding >= encTableSize || encodingTable[encoding] == 0)
{
return @"Unknown encoding";
}
return [NSString stringWithCString: encodingTable[encoding]->ename];
}
NSString*
GetEncodingName(NSStringEncoding encoding)
{
return GSEncodingName(encoding);
}
static const char * static const char *
iconv_stringforencoding(NSStringEncoding enc) iconv_stringforencoding(NSStringEncoding encoding)
{ {
switch (enc) GetAvailableEncodings();
if (encoding == 0 || encoding >= encTableSize || encodingTable[encoding] == 0)
{ {
case NSASCIIStringEncoding:
return "ASCII";
case NSNEXTSTEPStringEncoding:
return "NEXTSTEP";
case NSISOLatin1StringEncoding:
return "ISO-8859-1";
case NSISOLatin2StringEncoding:
return "ISO-8859-2";
case NSUnicodeStringEncoding:
return UNICODE_ENC;
case NSJapaneseEUCStringEncoding:
return "EUC-JP";
case NSUTF8StringEncoding:
return "UTF-8";
case NSShiftJISStringEncoding:
return "SHIFT-JIS";
case NSWindowsCP1250StringEncoding:
return "CP1250";
case NSWindowsCP1251StringEncoding:
return "CP1251";
case NSWindowsCP1252StringEncoding:
return "CP1252";
case NSWindowsCP1253StringEncoding:
return "CP1253";
case NSWindowsCP1254StringEncoding:
return "CP1254";
case NSISO2022JPStringEncoding:
return "ISO-2022-JP";
case NSMacOSRomanStringEncoding:
return "MACINTOSH";
// GNUstep extensions
case NSKOI8RStringEncoding:
return "KOI8-R";
case NSISOLatin3StringEncoding:
return "ISO-8859-3";
case NSISOLatin4StringEncoding:
return "ISO-8859-4";
case NSISOCyrillicStringEncoding:
return "ISO-8859-5";
case NSISOArabicStringEncoding:
return "ISO-8859-6";
case NSISOGreekStringEncoding:
return "ISO-8859-7";
case NSISOHebrewStringEncoding:
return "ISO-8859-8";
case NSISOLatin5StringEncoding:
return "ISO-8859-9";
case NSISOLatin6StringEncoding:
return "ISO-8859-10";
case NSISOLatin7StringEncoding:
return "ISO-8859-13";
case NSISOLatin8StringEncoding:
return "ISO-8859-14";
case NSISOLatin9StringEncoding:
return "ISO-8859-15";
case NSGB2312StringEncoding:
return "EUC-CN";
case NSBIG5StringEncoding:
return "BIG5";
default:
return ""; return "";
} }
return encodingTable[encoding]->iconv;
} }
#ifdef HAVE_ICONV
int int
iconv_cstrtoustr(unichar *u2, int size2, const char *s1, int size1, iconv_cstrtoustr(unichar *u2, int size2, const char *s1, int size1,
NSStringEncoding enc) NSStringEncoding enc)
@ -1368,7 +1364,8 @@ if (dst == 0) \
} \ } \
else if (zone == 0) \ else if (zone == 0) \
{ \ { \
return NO; /* No buffer growth possible ... fail. */ \ result = NO; /* No buffer growth possible ... fail. */ \
break; \
} \ } \
else \ else \
{ \ { \
@ -1397,7 +1394,8 @@ else \
} \ } \
if (ptr == 0) \ if (ptr == 0) \
{ \ { \
return NO; /* Not enough memory */ \ result = NO; /* Not enough memory */ \
break; \
} \ } \
bsize = grow / sizeof(unichar); \ bsize = grow / sizeof(unichar); \
} }
@ -1441,6 +1439,11 @@ else \
* <item>If GSUniTemporary is set, the function will return the results in * <item>If GSUniTemporary is set, the function will return the results in
* an autoreleased buffer rather than in a buffer that the caller must * an autoreleased buffer rather than in a buffer that the caller must
* release.</item> * release.</item>
* <item>If GSUniBOM is set, the function will write the first unicode
* character as a byte order marker.</item>
* </list>
* <item>If GSUniShortOk is set, the function will return a buffer containing
* any decoded characters even if the whole conversion fails.</item>
* </list> * </list>
* <p>On return, the function result is a flag indicating success (YES) * <p>On return, the function result is a flag indicating success (YES)
* or failure (NO), and on success, the value stored in size is the number * or failure (NO), and on success, the value stored in size is the number
@ -1464,9 +1467,11 @@ GSToUnicode(unichar **dst, unsigned int *size, const unsigned char *src,
unsigned extra = (options & GSUniTerminate) ? sizeof(unichar) : 0; unsigned extra = (options & GSUniTerminate) ? sizeof(unichar) : 0;
unichar base = 0; unichar base = 0;
unichar *table = 0; unichar *table = 0;
BOOL result = YES;
if (slen == 0) if (slen == 0)
{ {
*size = 0;
return YES; return YES;
} }
@ -1484,6 +1489,15 @@ GSToUnicode(unichar **dst, unsigned int *size, const unsigned char *src,
bsize = *size; bsize = *size;
} }
if (options & GSUniBOM)
{
while (dpos >= bsize)
{
GROW();
}
ptr[dpos++] = (unichar)0xFEFF; // Insert byte order marker.
}
switch (enc) switch (enc)
{ {
case NSNonLossyASCIIStringEncoding: case NSNonLossyASCIIStringEncoding:
@ -1577,19 +1591,20 @@ tables:
default: default:
#ifdef HAVE_ICONV #ifdef HAVE_ICONV
{ {
iconv_t cd;
char *inbuf; char *inbuf;
char *outbuf; char *outbuf;
size_t inbytesleft; size_t inbytesleft;
size_t outbytesleft; size_t outbytesleft;
size_t result; size_t rval;
iconv_t cd;
cd = iconv_open(UNICODE_ENC, iconv_stringforencoding(enc)); cd = iconv_open(UNICODE_ENC, iconv_stringforencoding(enc));
if (cd == (iconv_t)-1) if (cd == (iconv_t)-1)
{ {
NSLog(@"No iconv for encoding %@ tried to use %s", NSLog(@"No iconv for encoding %@ tried to use %s",
GetEncodingName(enc), iconv_stringforencoding(enc)); GetEncodingName(enc), iconv_stringforencoding(enc));
return NO; result = NO;
break;
} }
inbuf = (char*)src; inbuf = (char*)src;
@ -1606,10 +1621,12 @@ tables:
outbuf = (char*)&ptr[dpos]; outbuf = (char*)&ptr[dpos];
outbytesleft = (bsize - old) * sizeof(unichar); outbytesleft = (bsize - old) * sizeof(unichar);
} }
result = iconv(cd, &inbuf, &inbytesleft, &outbuf, &outbytesleft); rval = iconv(cd, &inbuf, &inbytesleft, &outbuf, &outbytesleft);
if (result == (size_t)-1 && errno != E2BIG) if (rval == (size_t)-1 && errno != E2BIG)
{ {
return NO; result = NO;
iconv_close(cd);
break;
} }
dpos = (bsize * sizeof(unichar) - outbytesleft) / sizeof(unichar); dpos = (bsize * sizeof(unichar) - outbytesleft) / sizeof(unichar);
} }
@ -1617,7 +1634,7 @@ tables:
iconv_close(cd); iconv_close(cd);
} }
#else #else
return NO; result = NO;
#endif #endif
} }
@ -1629,7 +1646,7 @@ tables:
ptr[dpos] = (unichar)0; ptr[dpos] = (unichar)0;
} }
*size = dpos; *size = dpos;
if (dst != 0) if (dst != 0 && (result == YES || (options & GSUniShortOk)))
{ {
if (options & GSUniTemporary) if (options & GSUniTemporary)
{ {
@ -1670,14 +1687,14 @@ tables:
{ {
ptr = NSZoneRealloc(zone, ptr, bytes); ptr = NSZoneRealloc(zone, ptr, bytes);
} }
if (ptr == 0)
{
return NO;
}
} }
*dst = ptr; *dst = ptr;
} }
return YES; else if (ptr != buf && ptr != *dst)
{
NSZoneFree(zone, ptr);
}
return result;
} }
#undef GROW #undef GROW
@ -1696,7 +1713,8 @@ if (dst == 0) \
} \ } \
else if (zone == 0) \ else if (zone == 0) \
{ \ { \
return NO; /* No buffer growth possible ... fail. */ \ result = NO; /* No buffer growth possible ... fail. */ \
break; \
} \ } \
else \ else \
{ \ { \
@ -1724,7 +1742,8 @@ else \
} \ } \
if (ptr == 0) \ if (ptr == 0) \
{ \ { \
return NO; /* Not enough memory */ \ result = NO; /* Not enough memory */ \
break; \
} \ } \
bsize = grow; \ bsize = grow; \
} }
@ -1772,6 +1791,11 @@ else \
* <item>If GSUniTemporary is set, the function will return the results in * <item>If GSUniTemporary is set, the function will return the results in
* an autoreleased buffer rather than in a buffer that the caller must * an autoreleased buffer rather than in a buffer that the caller must
* release.</item> * release.</item>
* <item>If GSUniBOM is set, the function will read the first unicode
* character as a byte order marker.</item>
* <item>If GSUniShortOk is set, the function will return a buffer containing
* any decoded characters even if the whole conversion fails.</item>
* </list>
* </list> * </list>
* <p>On return, the function result is a flag indicating success (YES) * <p>On return, the function result is a flag indicating success (YES)
* or failure (NO), and on success, the value stored in size is the number * or failure (NO), and on success, the value stored in size is the number
@ -1797,9 +1821,37 @@ GSFromUnicode(unsigned char **dst, unsigned int *size, const unichar *src,
unichar base = 0; unichar base = 0;
_ucc_ *table = 0; _ucc_ *table = 0;
unsigned tsize = 0; unsigned tsize = 0;
BOOL swapped = NO;
BOOL result = YES;
if (options & GSUniBOM)
{
unichar c;
if (slen == 0) if (slen == 0)
{ {
*size = 0;
return NO; // Missing byte order marker.
}
c = *src++;
slen--;
if (c != 0xFEFF)
{
if (c == 0xFFFE)
{
swapped = YES;
}
else
{
*size = 0;
return NO; // Illegal byte order marker.
}
}
}
if (slen == 0)
{
*size = 0;
return YES; return YES;
} }
@ -1829,12 +1881,17 @@ GSFromUnicode(unsigned char **dst, unsigned int *size, const unichar *src,
goto bases; goto bases;
bases: bases:
if (strict == YES) if (strict == NO)
{ {
while (spos < slen) while (spos < slen)
{ {
unichar u = src[spos++]; unichar u = src[spos++];
if (swapped == YES)
{
u = ((u & 0xff00 >> 8) + ((u & 0x00ff) << 8));
}
if (dpos >= bsize) if (dpos >= bsize)
{ {
GROW(); GROW();
@ -1855,6 +1912,10 @@ bases:
{ {
unichar u = src[spos++]; unichar u = src[spos++];
if (swapped == YES)
{
u = ((u & 0xff00 >> 8) + ((u & 0x00ff) << 8));
}
if (dpos >= bsize) if (dpos >= bsize)
{ {
GROW(); GROW();
@ -1865,7 +1926,8 @@ bases:
} }
else else
{ {
return NO; result = NO;
break;
} }
} }
} }
@ -1904,6 +1966,11 @@ tables:
{ {
unichar u = src[spos++]; unichar u = src[spos++];
if (swapped == YES)
{
u = ((u & 0xff00 >> 8) + ((u & 0x00ff) << 8));
}
if (dpos >= bsize) if (dpos >= bsize)
{ {
GROW(); GROW();
@ -1941,6 +2008,11 @@ tables:
{ {
unichar u = src[spos++]; unichar u = src[spos++];
if (swapped == YES)
{
u = ((u & 0xff00 >> 8) + ((u & 0x00ff) << 8));
}
if (dpos >= bsize) if (dpos >= bsize)
{ {
GROW(); GROW();
@ -1958,7 +2030,9 @@ tables:
{ {
if (++i >= tsize) if (++i >= tsize)
{ {
return NO; result = NO;
spos = slen;
break;
} }
} }
ptr[dpos++] = table[--i].to; ptr[dpos++] = table[--i].to;
@ -1974,6 +2048,11 @@ tables:
int res; int res;
int i = 0; int i = 0;
if (swapped == YES)
{
u = ((u & 0xff00 >> 8) + ((u & 0x00ff) << 8));
}
if (dpos >= bsize) if (dpos >= bsize)
{ {
GROW(); GROW();
@ -1994,7 +2073,8 @@ tables:
{ {
if (strict == YES) if (strict == YES)
{ {
return NO; result = NO;
break;
} }
for (i = 0; i < GSM0338_esize; i++) for (i = 0; i < GSM0338_esize; i++)
{ {
@ -2026,14 +2106,15 @@ tables:
char *outbuf; char *outbuf;
size_t inbytesleft; size_t inbytesleft;
size_t outbytesleft; size_t outbytesleft;
size_t result; size_t rval;
cd = iconv_open(iconv_stringforencoding(enc), UNICODE_ENC); cd = iconv_open(iconv_stringforencoding(enc), UNICODE_ENC);
if (cd == (iconv_t)-1) if (cd == (iconv_t)-1)
{ {
NSLog(@"No iconv for encoding %@ tried to use %s", NSLog(@"No iconv for encoding %@ tried to use %s",
GetEncodingName(enc), iconv_stringforencoding(enc)); GetEncodingName(enc), iconv_stringforencoding(enc));
return NO; result = NO;
break;
} }
inbuf = (char*)src; inbuf = (char*)src;
@ -2050,14 +2131,15 @@ tables:
outbuf = (char*)&ptr[dpos]; outbuf = (char*)&ptr[dpos];
outbytesleft = (bsize - old); outbytesleft = (bsize - old);
} }
result = iconv(cd, &inbuf, &inbytesleft, &outbuf, &outbytesleft); rval = iconv(cd, &inbuf, &inbytesleft, &outbuf, &outbytesleft);
if (result == (size_t)-1 && errno != E2BIG) if (rval == (size_t)-1 && errno != E2BIG)
{ {
if (errno == EILSEQ) if (errno == EILSEQ)
{ {
if (strict == YES) if (strict == YES)
{ {
return NO; result = NO;
break;
} }
/* /*
* If we are allowing lossy conversion, we replace any * If we are allowing lossy conversion, we replace any
@ -2073,7 +2155,8 @@ tables:
} }
else if (errno != E2BIG) else if (errno != E2BIG)
{ {
return NO; result = NO;
break;
} }
} }
dpos = bsize - outbytesleft; dpos = bsize - outbytesleft;
@ -2082,7 +2165,8 @@ tables:
iconv_close(cd); iconv_close(cd);
} }
#else #else
return NO; result = NO;
break;
#endif #endif
} }
@ -2094,7 +2178,7 @@ tables:
ptr[dpos] = (unsigned char)0; ptr[dpos] = (unsigned char)0;
} }
*size = dpos; *size = dpos;
if (dst != 0) if (dst != 0 && (result == YES || (options & GSUniShortOk)))
{ {
if (options & GSUniTemporary) if (options & GSUniTemporary)
{ {
@ -2135,14 +2219,14 @@ tables:
{ {
ptr = NSZoneRealloc(zone, ptr, bytes); ptr = NSZoneRealloc(zone, ptr, bytes);
} }
if (ptr == 0)
{
return NO;
}
} }
*dst = ptr; *dst = ptr;
} }
return YES; else if (ptr != buf && ptr != *dst)
{
NSZoneFree(zone, ptr);
}
return result;
} }
#undef GROW #undef GROW

View file

@ -11,7 +11,7 @@
void void
print_string(NSString* s) print_string(NSString* s)
{ {
printf("The string [%s], length %d\n", [s cString], [s length]); printf("The string [%s], length %d\n", [s lossyCString], [s length]);
} }
#include <Foundation/NSString.h> #include <Foundation/NSString.h>
@ -24,21 +24,31 @@ int main()
id s = @"This is a test string"; id s = @"This is a test string";
id s2, s3; id s2, s3;
int a; int a;
unichar uc[6] = { '1', '2', '.', '3', '4', 0}; unichar u0[5] = { 0xFE66, 'a', 'b', 'c', 'd'};
unichar u1[6] = { '1', '2', '.', '3', '4', 0xFE66};
unichar u2[7] = { 'a', 'b', 0xFE66, 'a', 'b', 'c', 'd'};
NSString *us0 = [NSString stringWithCharacters: u0 length: 5];
NSString *us1 = [NSString stringWithCharacters: u1 length: 6];
NSString *us2 = [NSString stringWithCharacters: u2 length: 7];
NSMutableString *fo = [NSMutableString stringWithString: @"abcdef"];
NSMutableString *f1 = [NSMutableString stringWithString: @"ab"];
NSMutableString *fo = [NSMutableString stringWithString: @"abcdefg"];
NS_DURING NS_DURING
[fo replaceCharactersInRange: [fo rangeOfString: @"xx"] withString: @"aa"]; [fo replaceCharactersInRange: [fo rangeOfString: @"xx"] withString: us1];
NS_HANDLER NS_HANDLER
printf("Caught exception during string replacement (expected)\n"); printf("Caught exception during string replacement (expected)\n");
NS_ENDHANDLER NS_ENDHANDLER
[f1 appendString: us0];
print_string(f1);
printf("%d\n", [f1 isEqual: us2]);
print_string(s); print_string(s);
s2 = NSStringFromPoint(NSMakePoint(1.374, 5.100)); s2 = NSStringFromPoint(NSMakePoint(1.374, 5.100));
print_string(s2); print_string(s2);
printf("%f", [[NSString stringWithCharacters: uc length: 5] floatValue]); printf("%f", [[NSString stringWithCharacters: u1 length: 5] floatValue]);
s2 = [s copy]; s2 = [s copy];
print_string(s2); print_string(s2);
@ -69,6 +79,23 @@ int main()
NSLog(@"A string with precision %d is :%.*@:", a, a, @"String"); NSLog(@"A string with precision %d is :%.*@:", a, a, @"String");
#endif #endif
{
NSMutableString *base = [@"hello" mutableCopy];
NSString *ext = [@"\"\\UFE66???\"" propertyList];
NSString *want = [@"\"hello\\UFE66???\"" propertyList];
int i;
[base appendString: ext];
printf("%u\n", [base length]);
printf("%u\n", [ext length]);
printf("%u\n", [want length]);
for (i = 0; i < 4; i++)
printf("%x\n", [ext characterAtIndex: i]);
for (i = 0; i < 9; i++)
printf("%x,%x\n", [base characterAtIndex: i], [want characterAtIndex: i]);
printf("%u\n", [want isEqual: base]);
}
[arp release]; [arp release];
exit(0); exit(0);
} }