mirror of
https://github.com/gnustep/libs-base.git
synced 2025-04-22 16:33:29 +00:00
Low level character encoding rewrite.
git-svn-id: svn+ssh://svn.gna.org/svn/gnustep/libs/base/trunk@13133 72102866-910b-0410-8b05-ffd578937521
This commit is contained in:
parent
4d9788c3f6
commit
3ee7db1018
7 changed files with 819 additions and 582 deletions
22
ChangeLog
22
ChangeLog
|
@ -1,3 +1,25 @@
|
|||
2002-03-16 Richard Frith-Macdonald <rfm@gnu.org>
|
||||
|
||||
* Headers/gnustep/base/Unicode.h: Add more options for character
|
||||
encoding conversion routines.
|
||||
* Source/GSPrivate.h: Add a couple of private functions for character
|
||||
encoding management.
|
||||
* Source/GSString.m: Convert throughoput to use new functions for
|
||||
converting from one string encoding to another. Make changes to
|
||||
handle setting of default C string encoding to be an encoding which
|
||||
is incompatible with internal e-bit string objects.
|
||||
* Source/NSString.m: ditto
|
||||
* Source/Unicode.m: new string encoding conversion functions extended
|
||||
with a few new options. Also fixed some memory allocation bugs to
|
||||
cure memory leaks.
|
||||
|
||||
Rewritten low level support for different character encodings ...
|
||||
should provide more efficient and maintainable conversion between
|
||||
encodings and permit use of wide character encodings and encodings
|
||||
with multibyte sequences as the default C string encoding.
|
||||
Testing ... minimal ... we could do with decent tests for this stuff.
|
||||
So this version must be viewed as possibly very unstable!
|
||||
|
||||
2002-03-14 Adam Fedor <fedor@gnu.org>
|
||||
|
||||
* Version: 1.3.0
|
||||
|
|
|
@ -63,6 +63,8 @@ GS_EXPORT unichar *uni_is_decomp(unichar u);
|
|||
#define GSUniTerminate 0x01
|
||||
#define GSUniTemporary 0x02
|
||||
#define GSUniStrict 0x04
|
||||
#define GSUniBOM 0x08
|
||||
#define GSUniShortOk 0x10
|
||||
|
||||
GS_EXPORT BOOL GSFromUnicode(unsigned char **dst, unsigned int *size,
|
||||
const unichar *src, unsigned int slen, NSStringEncoding enc, NSZone *zone,
|
||||
|
|
|
@ -24,6 +24,17 @@
|
|||
#define __GSPrivate_h_
|
||||
|
||||
|
||||
/*
|
||||
* Function to get the name of a string encoding as an NSString.
|
||||
*/
|
||||
GS_EXPORT NSString *GSEncodingName(NSStringEncoding encoding);
|
||||
|
||||
/*
|
||||
* Function to determine whether data in a particular encoding can
|
||||
* generally be represented as 8-bit characters including ascii.
|
||||
*/
|
||||
GS_EXPORT BOOL GSIsByteEncoding(NSStringEncoding encoding);
|
||||
|
||||
/*
|
||||
* Private concrete string classes.
|
||||
* NB. All these concrete string classes MUST have the same initial ivar
|
||||
|
@ -35,8 +46,8 @@
|
|||
@interface GSString : NSString
|
||||
{
|
||||
union {
|
||||
unichar *u;
|
||||
unsigned char *c;
|
||||
unichar *u; // 16-bit unicode characters.
|
||||
unsigned char *c; // 8-bit characters.
|
||||
} _contents;
|
||||
unsigned int _count;
|
||||
struct {
|
||||
|
|
|
@ -224,6 +224,7 @@ static SEL hashSel;
|
|||
static unsigned (*hashImp)(id, SEL);
|
||||
|
||||
static NSStringEncoding defEnc = 0;
|
||||
static NSStringEncoding intEnc = NSISOLatin1StringEncoding;
|
||||
|
||||
/*
|
||||
* The setup() function is called when any concrete string class is
|
||||
|
@ -277,9 +278,14 @@ setup()
|
|||
ranSel = @selector(rangeOfComposedCharacterSequenceAtIndex:);
|
||||
|
||||
/*
|
||||
* Cache the default string encoding.
|
||||
* Cache the default string encoding, and set the internal encoding
|
||||
* used by 8-bit character strings to match if possible.
|
||||
*/
|
||||
defEnc = GetDefEncoding();
|
||||
if (GSIsByteEncoding(defEnc) == YES)
|
||||
{
|
||||
intEnc = defEnc;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -521,12 +527,12 @@ boolValue_u(ivars self)
|
|||
}
|
||||
else
|
||||
{
|
||||
unsigned len = self->_count < 10 ? self->_count : 9;
|
||||
char buf[len+1];
|
||||
unsigned int l = self->_count < 10 ? self->_count : 9;
|
||||
unsigned char buf[l+1];
|
||||
unsigned char *b = buf;
|
||||
|
||||
len = encode_ustrtocstr(buf, len, self->_contents.u, len, defEnc, NO);
|
||||
buf[len] = '\0';
|
||||
if (len == 3
|
||||
GSFromUnicode(&b, &l, self->_contents.u, l, intEnc, 0, GSUniTerminate);
|
||||
if (l == 3
|
||||
&& (buf[0] == 'Y' || buf[0] == 'y')
|
||||
&& (buf[1] == 'E' || buf[1] == 'e')
|
||||
&& (buf[2] == 'S' || buf[2] == 's'))
|
||||
|
@ -543,8 +549,10 @@ boolValue_u(ivars self)
|
|||
static inline BOOL
|
||||
canBeConvertedToEncoding_c(ivars self, NSStringEncoding enc)
|
||||
{
|
||||
if (enc == defEnc)
|
||||
return YES;
|
||||
if (enc == intEnc)
|
||||
{
|
||||
return YES;
|
||||
}
|
||||
else
|
||||
{
|
||||
BOOL result = (*convertImp)((id)self, convertSel, enc);
|
||||
|
@ -571,7 +579,7 @@ characterAtIndex_c(ivars self, unsigned index)
|
|||
c = self->_contents.c[index];
|
||||
if (c > 127)
|
||||
{
|
||||
c = encode_chartouni(c, defEnc);
|
||||
c = encode_chartouni(c, intEnc);
|
||||
}
|
||||
return c;
|
||||
}
|
||||
|
@ -631,13 +639,48 @@ compare_u(ivars self, NSString *aString, unsigned mask, NSRange aRange)
|
|||
static inline char*
|
||||
cString_c(ivars self)
|
||||
{
|
||||
char *r = (char*)_fastMallocBuffer(self->_count+1);
|
||||
char *r;
|
||||
|
||||
if (self->_count > 0)
|
||||
if (self->_count == 0)
|
||||
{
|
||||
memcpy(r, self->_contents.c, self->_count);
|
||||
return "";
|
||||
}
|
||||
if (defEnc == intEnc)
|
||||
{
|
||||
r = (char*)_fastMallocBuffer(self->_count+1);
|
||||
|
||||
if (self->_count > 0)
|
||||
{
|
||||
memcpy(r, self->_contents.c, self->_count);
|
||||
}
|
||||
r[self->_count] = '\0';
|
||||
}
|
||||
else
|
||||
{
|
||||
unichar *u = 0;
|
||||
unsigned l = 0;
|
||||
unsigned s = 0;
|
||||
|
||||
/*
|
||||
* The external C string encoding is not compatible with the internal
|
||||
* C strings ... we must convert from internal format to unicode and
|
||||
* then to the external C string encoding.
|
||||
*/
|
||||
if (GSToUnicode(&u, &l, self->_contents.c, self->_count, intEnc,
|
||||
NSDefaultMallocZone(), 0) == NO)
|
||||
{
|
||||
[NSException raise: NSCharacterConversionException
|
||||
format: @"Can't convert to/from Unicode string."];
|
||||
}
|
||||
if (GSFromUnicode((unsigned char**)&r, &s, u, l, defEnc,
|
||||
NSDefaultMallocZone(), GSUniTerminate|GSUniTemporary|GSUniStrict) == NO)
|
||||
{
|
||||
NSZoneFree(NSDefaultMallocZone(), u);
|
||||
[NSException raise: NSCharacterConversionException
|
||||
format: @"Can't convert to/from Unicode string."];
|
||||
}
|
||||
NSZoneFree(NSDefaultMallocZone(), u);
|
||||
}
|
||||
r[self->_count] = '\0';
|
||||
|
||||
return r;
|
||||
}
|
||||
|
@ -645,58 +688,90 @@ cString_c(ivars self)
|
|||
static inline char*
|
||||
cString_u(ivars self)
|
||||
{
|
||||
int l = self->_count;
|
||||
char *r = (char*)_fastMallocBuffer(l*2 + 1);
|
||||
unsigned limit = 0;
|
||||
unsigned c = self->_count;
|
||||
|
||||
if (l > 0)
|
||||
if (c == 0)
|
||||
{
|
||||
limit = encode_ustrtocstr(r, l, self->_contents.u, l, defEnc, YES);
|
||||
if (limit == 0)
|
||||
return "";
|
||||
}
|
||||
else
|
||||
{
|
||||
unsigned int l = 0;
|
||||
unsigned char *r = 0;
|
||||
|
||||
if (GSFromUnicode(&r, &l, self->_contents.u, c, defEnc,
|
||||
NSDefaultMallocZone(), GSUniTerminate|GSUniTemporary|GSUniStrict) == NO)
|
||||
{
|
||||
[NSException raise: NSCharacterConversionException
|
||||
format: @"Can't get cString from Unicode string."];
|
||||
}
|
||||
return r;
|
||||
}
|
||||
r[limit] = '\0';
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
static inline unsigned int
|
||||
cStringLength_c(ivars self)
|
||||
{
|
||||
return self->_count;
|
||||
if (defEnc == intEnc)
|
||||
{
|
||||
return self->_count;
|
||||
}
|
||||
else
|
||||
{
|
||||
/*
|
||||
* The external C string encoding is not compatible with the internal
|
||||
* C strings ... we must convert from internal format to unicode and
|
||||
* then to the external C string encoding.
|
||||
*/
|
||||
if (self->_count == 0)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
else
|
||||
{
|
||||
unichar *u = 0;
|
||||
unsigned l = 0;
|
||||
unsigned s = 0;
|
||||
|
||||
if (GSToUnicode(&u, &l, self->_contents.c, self->_count, intEnc,
|
||||
NSDefaultMallocZone(), 0) == NO)
|
||||
{
|
||||
[NSException raise: NSCharacterConversionException
|
||||
format: @"Can't convert to/from Unicode string."];
|
||||
}
|
||||
if (GSFromUnicode(0, &s, u, l, defEnc, 0, GSUniStrict) == NO)
|
||||
{
|
||||
NSZoneFree(NSDefaultMallocZone(), u);
|
||||
[NSException raise: NSCharacterConversionException
|
||||
format: @"Can't get cStringLength from string."];
|
||||
}
|
||||
NSZoneFree(NSDefaultMallocZone(), u);
|
||||
return s;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static inline unsigned int
|
||||
cStringLength_u(ivars self)
|
||||
{
|
||||
unsigned c;
|
||||
unsigned l = self->_count;
|
||||
unsigned limit = 0;
|
||||
unsigned c = self->_count;
|
||||
|
||||
if (l > 0)
|
||||
if (c == 0)
|
||||
{
|
||||
char *r;
|
||||
|
||||
r = (char*)NSZoneMalloc(NSDefaultMallocZone(), l*2 + 1);
|
||||
limit = encode_ustrtocstr(r, l, self->_contents.u, l, defEnc, NO);
|
||||
if (limit == 0)
|
||||
{
|
||||
NSZoneFree(NSDefaultMallocZone(), r);
|
||||
[NSException raise: NSCharacterConversionException
|
||||
format: @"Can't get cStringLength from Unicode string."];
|
||||
}
|
||||
r[limit] = '\0';
|
||||
c = strlen(r);
|
||||
NSZoneFree(NSDefaultMallocZone(), r);
|
||||
return 0;
|
||||
}
|
||||
else
|
||||
{
|
||||
c = 0;
|
||||
unsigned l = 0;
|
||||
|
||||
if (GSFromUnicode(0, &l, self->_contents.u, c, defEnc, 0, GSUniStrict)
|
||||
== NO)
|
||||
{
|
||||
[NSException raise: NSCharacterConversionException
|
||||
format: @"Can't get cStringLength from Unicode string."];
|
||||
}
|
||||
return l;
|
||||
}
|
||||
return c;
|
||||
}
|
||||
|
||||
static inline NSData*
|
||||
|
@ -709,8 +784,8 @@ dataUsingEncoding_c(ivars self, NSStringEncoding encoding, BOOL flag)
|
|||
return [NSDataClass data];
|
||||
}
|
||||
|
||||
if ((encoding == defEnc)
|
||||
|| ((defEnc == NSASCIIStringEncoding)
|
||||
if ((encoding == intEnc)
|
||||
|| ((intEnc == NSASCIIStringEncoding)
|
||||
&& ((encoding == NSISOLatin1StringEncoding)
|
||||
|| (encoding == NSISOLatin2StringEncoding)
|
||||
|| (encoding == NSNEXTSTEPStringEncoding)
|
||||
|
@ -724,51 +799,43 @@ dataUsingEncoding_c(ivars self, NSStringEncoding encoding, BOOL flag)
|
|||
}
|
||||
else if (encoding == NSUnicodeStringEncoding)
|
||||
{
|
||||
int t;
|
||||
unichar *buff;
|
||||
unsigned int l = 0;
|
||||
unichar *r = 0;
|
||||
unsigned int options = GSUniBOM;
|
||||
|
||||
buff = (unichar*)NSZoneMalloc(NSDefaultMallocZone(),
|
||||
sizeof(unichar)*(len+1));
|
||||
buff[0] = 0xFEFF;
|
||||
t = encode_cstrtoustr(buff+1, len, self->_contents.c, len, defEnc);
|
||||
return [NSDataClass dataWithBytesNoCopy: buff
|
||||
length: sizeof(unichar)*(t+1)];
|
||||
if (flag == NO)
|
||||
{
|
||||
options |= GSUniStrict;
|
||||
}
|
||||
|
||||
if (GSToUnicode(&r, &l, self->_contents.c, self->_count, intEnc,
|
||||
NSDefaultMallocZone(), options) == NO)
|
||||
{
|
||||
return nil;
|
||||
}
|
||||
return [NSDataClass dataWithBytesNoCopy: r length: l];
|
||||
}
|
||||
else
|
||||
{
|
||||
int t;
|
||||
int bsiz;
|
||||
unichar *ubuff;
|
||||
unsigned char *buff;
|
||||
unichar *u = 0;
|
||||
unsigned l = 0;
|
||||
unsigned char *r = 0;
|
||||
unsigned s = 0;
|
||||
|
||||
ubuff = (unichar*)NSZoneMalloc(NSDefaultMallocZone(),
|
||||
sizeof(unichar)*len);
|
||||
t = encode_cstrtoustr(ubuff, len, self->_contents.c, len, defEnc);
|
||||
if (encoding == NSUTF8StringEncoding)
|
||||
if (GSToUnicode(&u, &l, self->_contents.c, self->_count, intEnc,
|
||||
NSDefaultMallocZone(), 0) == NO)
|
||||
{
|
||||
bsiz = t*4;
|
||||
[NSException raise: NSCharacterConversionException
|
||||
format: @"Can't convert to Unicode string."];
|
||||
}
|
||||
else
|
||||
if (GSFromUnicode(&r, &s, u, l, encoding, NSDefaultMallocZone(),
|
||||
(flag == NO) ? GSUniStrict : 0) == NO)
|
||||
{
|
||||
bsiz = t;
|
||||
}
|
||||
buff = (unsigned char*)NSZoneMalloc(NSDefaultMallocZone(), bsiz);
|
||||
flag = (flag == YES) ? NO : YES;
|
||||
t = encode_ustrtocstr(buff, bsiz, ubuff, t, encoding, flag);
|
||||
NSZoneFree(NSDefaultMallocZone(), ubuff);
|
||||
if (t == 0)
|
||||
{
|
||||
NSZoneFree(NSDefaultMallocZone(), buff);
|
||||
NSZoneFree(NSDefaultMallocZone(), u);
|
||||
return nil;
|
||||
}
|
||||
else
|
||||
{
|
||||
if (t != bsiz)
|
||||
{
|
||||
buff = NSZoneRealloc(NSDefaultMallocZone(), buff, t);
|
||||
}
|
||||
return [NSDataClass dataWithBytesNoCopy: buff length: t];
|
||||
}
|
||||
NSZoneFree(NSDefaultMallocZone(), u);
|
||||
return [NSDataClass dataWithBytesNoCopy: r length: s];
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -795,34 +862,15 @@ dataUsingEncoding_u(ivars self, NSStringEncoding encoding, BOOL flag)
|
|||
}
|
||||
else
|
||||
{
|
||||
int t;
|
||||
int bsiz;
|
||||
unsigned char *buff;
|
||||
unsigned char *r = 0;
|
||||
unsigned int l = 0;
|
||||
|
||||
if (encoding == NSUTF8StringEncoding)
|
||||
if (GSFromUnicode(&r, &l, self->_contents.u, self->_count, encoding,
|
||||
NSDefaultMallocZone(), (flag == NO) ? GSUniStrict : 0) == NO)
|
||||
{
|
||||
bsiz = len*4;
|
||||
}
|
||||
else
|
||||
{
|
||||
bsiz = len;
|
||||
}
|
||||
buff = (unsigned char*)NSZoneMalloc(NSDefaultMallocZone(), bsiz);
|
||||
flag = (flag == YES) ? NO : YES;
|
||||
t = encode_ustrtocstr(buff, bsiz, self->_contents.u, len, encoding, flag);
|
||||
if (t == 0)
|
||||
{
|
||||
NSZoneFree(NSDefaultMallocZone(), buff);
|
||||
return nil;
|
||||
}
|
||||
else
|
||||
{
|
||||
if (t != bsiz)
|
||||
{
|
||||
buff = NSZoneRealloc(NSDefaultMallocZone(), buff, t);
|
||||
}
|
||||
return [NSDataClass dataWithBytesNoCopy: buff length: t];
|
||||
}
|
||||
return [NSDataClass dataWithBytesNoCopy: r length: l];
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -853,11 +901,11 @@ doubleValue_u(ivars self)
|
|||
}
|
||||
else
|
||||
{
|
||||
unsigned len = self->_count < 32 ? self->_count : 31;
|
||||
char buf[len+1];
|
||||
unsigned int l = self->_count < 10 ? self->_count : 9;
|
||||
unsigned char buf[l+1];
|
||||
unsigned char *b = buf;
|
||||
|
||||
len = encode_ustrtocstr(buf, len, self->_contents.u, len, defEnc, NO);
|
||||
buf[len] = '\0';
|
||||
GSFromUnicode(&b, &l, self->_contents.u, l, intEnc, 0, GSUniTerminate);
|
||||
return atof(buf);
|
||||
}
|
||||
}
|
||||
|
@ -907,8 +955,10 @@ fillHole(ivars self, unsigned index, unsigned size)
|
|||
static inline void
|
||||
getCharacters_c(ivars self, unichar *buffer, NSRange aRange)
|
||||
{
|
||||
encode_cstrtoustr(buffer, aRange.length, self->_contents.c + aRange.location,
|
||||
aRange.length, defEnc);
|
||||
unsigned len = aRange.length;
|
||||
|
||||
GSToUnicode(&buffer, &len, self->_contents.c + aRange.location,
|
||||
aRange.length, intEnc, 0, 0);
|
||||
}
|
||||
|
||||
static inline void
|
||||
|
@ -955,8 +1005,7 @@ static inline void
|
|||
getCString_u(ivars self, char *buffer, unsigned int maxLength,
|
||||
NSRange aRange, NSRange *leftoverRange)
|
||||
{
|
||||
int len;
|
||||
int result;
|
||||
unsigned int len;
|
||||
|
||||
if (maxLength > self->_count)
|
||||
{
|
||||
|
@ -981,9 +1030,8 @@ getCString_u(ivars self, char *buffer, unsigned int maxLength,
|
|||
}
|
||||
}
|
||||
|
||||
result = encode_ustrtocstr(buffer, len, &self->_contents.u[aRange.location],
|
||||
len, defEnc, YES);
|
||||
if (result != len)
|
||||
if (GSFromUnicode((unsigned char **)&buffer, &len, self->_contents.u, len,
|
||||
defEnc, 0, GSUniTerminate | GSUniStrict) == NO)
|
||||
{
|
||||
[NSException raise: NSCharacterConversionException
|
||||
format: @"Can't get cString from Unicode string."];
|
||||
|
@ -1018,11 +1066,11 @@ intValue_u(ivars self)
|
|||
}
|
||||
else
|
||||
{
|
||||
unsigned len = self->_count < 32 ? self->_count : 31;
|
||||
char buf[len+1];
|
||||
unsigned int l = self->_count < 10 ? self->_count : 9;
|
||||
unsigned char buf[l+1];
|
||||
unsigned char *b = buf;
|
||||
|
||||
len = encode_ustrtocstr(buf, len, self->_contents.u, len, defEnc, NO);
|
||||
buf[len] = '\0';
|
||||
GSFromUnicode(&b, &l, self->_contents.u, l, intEnc, 0, GSUniTerminate);
|
||||
return atol(buf);
|
||||
}
|
||||
}
|
||||
|
@ -1177,8 +1225,7 @@ lossyCString_u(ivars self)
|
|||
unsigned l = self->_count;
|
||||
unsigned char *r = (unsigned char*)_fastMallocBuffer(l + 1);
|
||||
|
||||
encode_ustrtocstr(r, l, self->_contents.u, l, defEnc, NO);
|
||||
r[l] = '\0';
|
||||
GSFromUnicode(&r, &l, self->_contents.u, l, intEnc, 0, GSUniTerminate);
|
||||
return (const char*)r;
|
||||
}
|
||||
|
||||
|
@ -1357,7 +1404,7 @@ rangeOfCharacter_c(ivars self, NSCharacterSet *aSet, unsigned mask,
|
|||
|
||||
if (letter > 127)
|
||||
{
|
||||
letter = encode_chartouni(letter, defEnc);
|
||||
letter = encode_chartouni(letter, intEnc);
|
||||
}
|
||||
if ((*mImp)(aSet, cMemberSel, letter))
|
||||
{
|
||||
|
@ -1534,7 +1581,7 @@ transmute(ivars self, NSString *aString)
|
|||
*/
|
||||
transmute = NO;
|
||||
}
|
||||
else if ([aString canBeConvertedToEncoding: defEnc] == YES)
|
||||
else if ([aString canBeConvertedToEncoding: intEnc] == YES)
|
||||
{
|
||||
/*
|
||||
* This is a C string, but the other string can be converted to
|
||||
|
@ -1567,11 +1614,11 @@ transmute(ivars self, NSString *aString)
|
|||
|
||||
if (transmute == YES)
|
||||
{
|
||||
unichar *tmp;
|
||||
int len = self->_count;
|
||||
unichar *tmp = 0;
|
||||
int len = 0;
|
||||
|
||||
tmp = NSZoneMalloc(self->_zone, self->_capacity * sizeof(unichar));
|
||||
len = encode_cstrtoustr(tmp, len, self->_contents.c, len, defEnc);
|
||||
GSToUnicode(&tmp, &len, self->_contents.c, self->_count, intEnc,
|
||||
self->_zone, 0);
|
||||
if (self->_flags.free == 1)
|
||||
{
|
||||
NSZoneFree(self->_zone, self->_contents.c);
|
||||
|
@ -1761,7 +1808,7 @@ transmute(ivars self, NSString *aString)
|
|||
[aCoder encodeValueOfObjCType: @encode(unsigned) at: &_count];
|
||||
if (_count > 0)
|
||||
{
|
||||
[aCoder encodeValueOfObjCType: @encode(NSStringEncoding) at: &defEnc];
|
||||
[aCoder encodeValueOfObjCType: @encode(NSStringEncoding) at: &intEnc];
|
||||
[aCoder encodeArrayOfObjCType: @encode(unsigned char)
|
||||
count: _count
|
||||
at: _contents.c];
|
||||
|
@ -1770,7 +1817,7 @@ transmute(ivars self, NSString *aString)
|
|||
|
||||
- (NSStringEncoding) fastestEncoding
|
||||
{
|
||||
return defEnc;
|
||||
return intEnc;
|
||||
}
|
||||
|
||||
- (float) floatValue
|
||||
|
@ -1885,7 +1932,7 @@ transmute(ivars self, NSString *aString)
|
|||
|
||||
- (NSStringEncoding) smallestEncoding
|
||||
{
|
||||
return defEnc;
|
||||
return intEnc;
|
||||
}
|
||||
|
||||
- (NSString*) substringFromRange: (NSRange)aRange
|
||||
|
@ -2487,7 +2534,7 @@ transmute(ivars self, NSString *aString)
|
|||
}
|
||||
else
|
||||
{
|
||||
[aCoder encodeValueOfObjCType: @encode(NSStringEncoding) at: &defEnc];
|
||||
[aCoder encodeValueOfObjCType: @encode(NSStringEncoding) at: &intEnc];
|
||||
[aCoder encodeArrayOfObjCType: @encode(unsigned char)
|
||||
count: _count
|
||||
at: _contents.c];
|
||||
|
@ -2500,7 +2547,7 @@ transmute(ivars self, NSString *aString)
|
|||
if (_flags.wide == 1)
|
||||
return NSUnicodeStringEncoding;
|
||||
else
|
||||
return defEnc;
|
||||
return intEnc;
|
||||
}
|
||||
|
||||
- (float) floatValue
|
||||
|
@ -2817,7 +2864,7 @@ transmute(ivars self, NSString *aString)
|
|||
maxLength: l];
|
||||
}
|
||||
_contents.c[aRange.location + l]
|
||||
= encode_unitochar([aString characterAtIndex: l], defEnc);
|
||||
= encode_unitochar([aString characterAtIndex: l], intEnc);
|
||||
}
|
||||
else
|
||||
{
|
||||
|
@ -2880,7 +2927,7 @@ transmute(ivars self, NSString *aString)
|
|||
[aString getCString: _contents.c maxLength: l];
|
||||
}
|
||||
_contents.c[l]
|
||||
= encode_unitochar([aString characterAtIndex: l], defEnc);
|
||||
= encode_unitochar([aString characterAtIndex: l], intEnc);
|
||||
}
|
||||
else
|
||||
{
|
||||
|
@ -2896,7 +2943,7 @@ transmute(ivars self, NSString *aString)
|
|||
return NSUnicodeStringEncoding;
|
||||
}
|
||||
else
|
||||
return defEnc;
|
||||
return intEnc;
|
||||
}
|
||||
|
||||
- (NSString*) substringFromRange: (NSRange)aRange
|
||||
|
@ -3209,7 +3256,7 @@ transmute(ivars self, NSString *aString)
|
|||
if (((ivars)_parent)->_flags.wide == 1)
|
||||
return NSUnicodeStringEncoding;
|
||||
else
|
||||
return defEnc;
|
||||
return intEnc;
|
||||
}
|
||||
|
||||
- (void) getCharacters: (unichar*)buffer
|
||||
|
@ -3313,7 +3360,7 @@ transmute(ivars self, NSString *aString)
|
|||
return NSUnicodeStringEncoding;
|
||||
}
|
||||
else
|
||||
return defEnc;
|
||||
return intEnc;
|
||||
}
|
||||
|
||||
@end
|
||||
|
@ -3436,7 +3483,7 @@ transmute(ivars self, NSString *aString)
|
|||
|
||||
if (c > 127)
|
||||
{
|
||||
c = encode_chartouni(c, defEnc);
|
||||
c = encode_chartouni(c, intEnc);
|
||||
}
|
||||
ret = (ret << 5) + ret + c;
|
||||
}
|
||||
|
|
|
@ -259,6 +259,7 @@ surrogatePairValue(unichar high, unichar low)
|
|||
@implementation NSString
|
||||
|
||||
static NSStringEncoding _DefaultStringEncoding;
|
||||
static BOOL _ByteEncodingOk;
|
||||
static const unichar byteOrderMark = 0xFEFF;
|
||||
static const unichar byteOrderMarkSwapped = 0xFFFE;
|
||||
|
||||
|
@ -344,6 +345,8 @@ handle_printf_atsign (FILE *stream,
|
|||
ranSel = @selector(rangeOfComposedCharacterSequenceAtIndex:);
|
||||
|
||||
_DefaultStringEncoding = GetDefEncoding();
|
||||
_ByteEncodingOk = GSIsByteEncoding(_DefaultStringEncoding);
|
||||
|
||||
NSStringClass = self;
|
||||
[self setVersion: 1];
|
||||
NSMutableStringClass = [NSMutableString class];
|
||||
|
@ -606,16 +609,22 @@ handle_printf_atsign (FILE *stream,
|
|||
length: (unsigned int)length
|
||||
freeWhenDone: (BOOL)flag
|
||||
{
|
||||
unichar *buf;
|
||||
unichar *buf = 0;
|
||||
unsigned int l = 0;
|
||||
|
||||
buf = (unichar*)NSZoneMalloc(GSObjCZone(self), sizeof(unichar)*length);
|
||||
length = encode_cstrtoustr(buf, length, byteString, length,
|
||||
_DefaultStringEncoding);
|
||||
if (flag == YES && byteString != 0)
|
||||
if (GSToUnicode(&buf, &l, byteString, length, _DefaultStringEncoding,
|
||||
[self zone], 0) == NO)
|
||||
{
|
||||
NSZoneFree(NSZoneFromPointer(byteString), byteString);
|
||||
DESTROY(self);
|
||||
}
|
||||
else
|
||||
{
|
||||
if (flag == YES && byteString != 0)
|
||||
{
|
||||
NSZoneFree(NSZoneFromPointer(byteString), byteString);
|
||||
}
|
||||
self = [self initWithCharactersNoCopy: buf length: l freeWhenDone: YES];
|
||||
}
|
||||
self = [self initWithCharactersNoCopy: buf length: length freeWhenDone: YES];
|
||||
return self;
|
||||
}
|
||||
|
||||
|
@ -682,32 +691,44 @@ handle_printf_atsign (FILE *stream,
|
|||
|
||||
if (length > 0)
|
||||
{
|
||||
unsigned i;
|
||||
unsigned i = 0;
|
||||
|
||||
/*
|
||||
* Check to see if we have in fact got an ascii string
|
||||
*/
|
||||
for (i = 0; i < length; i++)
|
||||
if (_ByteEncodingOk)
|
||||
{
|
||||
if (((unsigned char*)bytes)[i] > 127)
|
||||
/*
|
||||
* If it's ok to store ascii strings as internal C strings,
|
||||
* check to see if we have in fact got an ascii string.
|
||||
*/
|
||||
while (i < length)
|
||||
{
|
||||
break;
|
||||
if (((unsigned char*)bytes)[i] > 127)
|
||||
{
|
||||
break;
|
||||
}
|
||||
i++;
|
||||
}
|
||||
}
|
||||
|
||||
if (i == length)
|
||||
{
|
||||
self = [self initWithCString: bytes length: length];
|
||||
}
|
||||
else
|
||||
{
|
||||
unichar *s;
|
||||
unichar *u = 0;
|
||||
unsigned int l = 0;
|
||||
|
||||
s = NSZoneMalloc(GSObjCZone(self), sizeof(unichar)*length);
|
||||
length = encode_cstrtoustr(s, length, bytes, length,
|
||||
NSUTF8StringEncoding);
|
||||
self = [self initWithCharactersNoCopy: s
|
||||
length: length
|
||||
freeWhenDone: YES];
|
||||
if (GSToUnicode(&u, &l, bytes, length, NSUTF8StringEncoding,
|
||||
GSObjCZone(self), 0) == NO)
|
||||
{
|
||||
DESTROY(self);
|
||||
}
|
||||
else
|
||||
{
|
||||
self = [self initWithCharactersNoCopy: u
|
||||
length: l
|
||||
freeWhenDone: YES];
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
|
@ -1071,96 +1092,139 @@ handle_printf_atsign (FILE *stream,
|
|||
|
||||
if (len == 0)
|
||||
{
|
||||
self = [self initWithCStringNoCopy: "" length: 0 freeWhenDone: NO];
|
||||
self = [self initWithCharactersNoCopy: (unichar*)""
|
||||
length: 0
|
||||
freeWhenDone: NO];
|
||||
}
|
||||
else if (encoding == NSASCIIStringEncoding
|
||||
|| encoding == _DefaultStringEncoding)
|
||||
else if (_ByteEncodingOk == YES
|
||||
&& (encoding==_DefaultStringEncoding || encoding==NSASCIIStringEncoding))
|
||||
{
|
||||
char *s = NSZoneMalloc(GSObjCZone(self), len);
|
||||
char *s;
|
||||
|
||||
/*
|
||||
* We can only create an internal C string if the default C string
|
||||
* encoding is Ok, and the specified encoding matches it.
|
||||
*/
|
||||
s = NSZoneMalloc(GSObjCZone(self), len);
|
||||
[data getBytes: s];
|
||||
self = [self initWithCStringNoCopy: s length: len freeWhenDone: YES];
|
||||
}
|
||||
else if (encoding == NSUTF8StringEncoding)
|
||||
{
|
||||
const char *bytes = [data bytes];
|
||||
unsigned i;
|
||||
unsigned i = 0;
|
||||
|
||||
/*
|
||||
* Check to see if we have in fact got an ascii string
|
||||
*/
|
||||
for (i = 0; i < len; i++)
|
||||
if (_ByteEncodingOk)
|
||||
{
|
||||
if (((unsigned char*)bytes)[i] > 127)
|
||||
/*
|
||||
* If it's ok to store ascii strings as internal C strings,
|
||||
* check to see if we have in fact got an ascii string.
|
||||
*/
|
||||
while (i < len)
|
||||
{
|
||||
break;
|
||||
if (((unsigned char*)bytes)[i] > 127)
|
||||
{
|
||||
break;
|
||||
}
|
||||
i++;
|
||||
}
|
||||
}
|
||||
|
||||
if (i == len)
|
||||
{
|
||||
self = [self initWithCString: bytes length: len];
|
||||
}
|
||||
else
|
||||
{
|
||||
unichar *u;
|
||||
unichar *u = 0;
|
||||
unsigned int l = 0;
|
||||
|
||||
u = NSZoneMalloc(GSObjCZone(self), sizeof(unichar)*len);
|
||||
len = encode_cstrtoustr(u, len, bytes, len,
|
||||
NSUTF8StringEncoding);
|
||||
if (len > 0)
|
||||
if (GSToUnicode(&u, &l, bytes, len, NSUTF8StringEncoding,
|
||||
GSObjCZone(self), 0) == NO)
|
||||
{
|
||||
self = [self initWithCharactersNoCopy: u
|
||||
length: len
|
||||
freeWhenDone: YES];
|
||||
DESTROY(self);
|
||||
}
|
||||
else
|
||||
{
|
||||
DESTROY(self);
|
||||
self = [self initWithCharactersNoCopy: u
|
||||
length: l
|
||||
freeWhenDone: YES];
|
||||
}
|
||||
}
|
||||
}
|
||||
else if (encoding == NSUnicodeStringEncoding)
|
||||
{
|
||||
if (len%2 != 0)
|
||||
{
|
||||
DESTROY(self); // Not valid unicode data.
|
||||
}
|
||||
else
|
||||
{
|
||||
BOOL swapped = NO;
|
||||
unsigned char *b;
|
||||
unichar *uptr;
|
||||
|
||||
b = (unsigned char*)[data bytes];
|
||||
uptr = (unichar*)b;
|
||||
if (*uptr == 0xFFFE)
|
||||
{
|
||||
b = (unsigned char*)++uptr;
|
||||
len -= sizeof(unichar);
|
||||
}
|
||||
else if (*uptr == 0xFEFF)
|
||||
{
|
||||
b = (unsigned char*)++uptr;
|
||||
len -= sizeof(unichar);
|
||||
swapped = YES;
|
||||
}
|
||||
if (len == 0)
|
||||
{
|
||||
self = [self initWithCharactersNoCopy: (unichar*)""
|
||||
length: 0
|
||||
freeWhenDone: NO];
|
||||
}
|
||||
else
|
||||
{
|
||||
unsigned char *u;
|
||||
|
||||
u = (unsigned char*)NSZoneMalloc(GSObjCZone(self), len);
|
||||
if (swapped == YES)
|
||||
{
|
||||
unsigned i;
|
||||
|
||||
for (i = 0; i < len; i += 2)
|
||||
{
|
||||
u[i] = b[i + 1];
|
||||
u[i + 1] = b[i];
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
memcpy(u, b, len);
|
||||
}
|
||||
self = [self initWithCharactersNoCopy: (unichar*)u
|
||||
length: len/2
|
||||
freeWhenDone: YES];
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
unichar *u;
|
||||
unsigned count;
|
||||
const unsigned char *b;
|
||||
unsigned char *b;
|
||||
unichar *u = 0;
|
||||
unsigned l = 0;
|
||||
|
||||
if (len < 1 || (len < 2 && encoding == NSUnicodeStringEncoding))
|
||||
b = (unsigned char*)[data bytes];
|
||||
if (GSToUnicode(&u, &l, b, len, NSUTF8StringEncoding, GSObjCZone(self),
|
||||
0) == NO)
|
||||
{
|
||||
return [self initWithCStringNoCopy: "" length: 0 freeWhenDone: NO];
|
||||
}
|
||||
|
||||
b = [data bytes];
|
||||
u = NSZoneMalloc(GSObjCZone(self), sizeof(unichar)*(len+1));
|
||||
if (encoding == NSUnicodeStringEncoding)
|
||||
{
|
||||
if ((b[0]==0xFE) & (b[1]==0xFF))
|
||||
{
|
||||
b = &b[2];
|
||||
count -= 2;
|
||||
}
|
||||
for (count = 0; count < (len - 1); count += 2)
|
||||
{
|
||||
u[count/2 - 1] = 256*b[count + 1] + b[count];
|
||||
}
|
||||
count = count/2;
|
||||
self = [self initWithCharactersNoCopy: u
|
||||
length: count
|
||||
freeWhenDone: YES];
|
||||
DESTROY(self);
|
||||
}
|
||||
else
|
||||
{
|
||||
count = encode_cstrtoustr(u, len, b, len, encoding);
|
||||
if (count < 1)
|
||||
{
|
||||
DESTROY(self);
|
||||
}
|
||||
else
|
||||
{
|
||||
self = [self initWithCharactersNoCopy: u
|
||||
length: count
|
||||
freeWhenDone: YES];
|
||||
}
|
||||
self = [self initWithCharactersNoCopy: u
|
||||
length: l
|
||||
freeWhenDone: YES];
|
||||
}
|
||||
}
|
||||
return self;
|
||||
|
@ -2314,47 +2378,27 @@ handle_printf_atsign (FILE *stream,
|
|||
buff = (unichar*)NSZoneMalloc(NSDefaultMallocZone(),
|
||||
sizeof(unichar)*(len+1));
|
||||
buff[0] = 0xFEFF;
|
||||
for (count = 0; count < len; count++)
|
||||
{
|
||||
buff[count+1] = (*caiImp)(self, caiSel, count);
|
||||
}
|
||||
[self getCharacters: &buff[1]];
|
||||
return [NSDataClass dataWithBytesNoCopy: buff
|
||||
length: sizeof(unichar)*(len+1)];
|
||||
}
|
||||
else
|
||||
{
|
||||
int t;
|
||||
int bsiz;
|
||||
unsigned char *b = 0;
|
||||
int l = 0;
|
||||
unichar *u;
|
||||
unsigned char *buff;
|
||||
|
||||
u = (unichar*)NSZoneMalloc(NSDefaultMallocZone(), len*sizeof(unichar));
|
||||
[self getCharacters: u];
|
||||
if (encoding == NSUTF8StringEncoding)
|
||||
if (GSFromUnicode(&b, &l, u, len, encoding, NSDefaultMallocZone(),
|
||||
(flag == NO) ? GSUniStrict : 0)
|
||||
== NO)
|
||||
{
|
||||
bsiz = len * 4;
|
||||
}
|
||||
else
|
||||
{
|
||||
bsiz = len;
|
||||
}
|
||||
buff = (unsigned char*)NSZoneMalloc(NSDefaultMallocZone(), bsiz);
|
||||
flag = (flag == YES) ? NO : YES;
|
||||
t = encode_ustrtocstr(buff, bsiz, u, len, encoding, flag);
|
||||
NSZoneFree(NSDefaultMallocZone(), u);
|
||||
if (t == 0)
|
||||
{
|
||||
NSZoneFree(NSDefaultMallocZone(), buff);
|
||||
NSZoneFree(NSDefaultMallocZone(), u);
|
||||
return nil;
|
||||
}
|
||||
else
|
||||
{
|
||||
if (bsiz != t)
|
||||
{
|
||||
buff = NSZoneRealloc(NSDefaultMallocZone(), buff, t);
|
||||
}
|
||||
return [NSDataClass dataWithBytesNoCopy: buff length: t];
|
||||
}
|
||||
NSZoneFree(NSDefaultMallocZone(), u);
|
||||
return [NSDataClass dataWithBytesNoCopy: b length: l];
|
||||
}
|
||||
return nil;
|
||||
}
|
||||
|
|
750
Source/Unicode.m
750
Source/Unicode.m
|
@ -28,6 +28,7 @@
|
|||
|
||||
#include <config.h>
|
||||
#include <Foundation/NSString.h>
|
||||
#include <Foundation/NSLock.h>
|
||||
#include <base/Unicode.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
|
@ -50,8 +51,10 @@ typedef struct {unichar from; char to;} _ucc_;
|
|||
#endif
|
||||
#include <errno.h>
|
||||
|
||||
// The rest of the GNUstep code stores UNICODE in internal byte order,
|
||||
// so we do the same. This should be UCS-2-INTERNAL for libiconv
|
||||
/*
|
||||
* The whole of the GNUstep code stores UNICODE in internal byte order,
|
||||
* so we do the same. This should be UCS-2-INTERNAL for libiconv
|
||||
*/
|
||||
#ifdef WORDS_BIGENDIAN
|
||||
#define UNICODE_INT "UNICODEBIG"
|
||||
#else
|
||||
|
@ -62,236 +65,6 @@ typedef struct {unichar from; char to;} _ucc_;
|
|||
|
||||
static const char *unicode_enc = NULL;
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
typedef unsigned char unc;
|
||||
static NSStringEncoding defEnc = GSUndefinedEncoding;
|
||||
|
||||
#ifdef HAVE_ICONV
|
||||
/*
|
||||
* FIXME: We should check dynamically which encodings are found on this
|
||||
* computer as different implementation of iconv will support different
|
||||
* encodings.
|
||||
*/
|
||||
static NSStringEncoding _availableEncodings[] = {
|
||||
NSASCIIStringEncoding,
|
||||
NSNEXTSTEPStringEncoding,
|
||||
NSJapaneseEUCStringEncoding,
|
||||
NSUTF8StringEncoding,
|
||||
NSISOLatin1StringEncoding,
|
||||
// NSSymbolStringEncoding,
|
||||
// NSNonLossyASCIIStringEncoding,
|
||||
NSShiftJISStringEncoding,
|
||||
NSISOLatin2StringEncoding,
|
||||
NSUnicodeStringEncoding,
|
||||
NSWindowsCP1251StringEncoding,
|
||||
NSWindowsCP1252StringEncoding,
|
||||
NSWindowsCP1253StringEncoding,
|
||||
NSWindowsCP1254StringEncoding,
|
||||
NSWindowsCP1250StringEncoding,
|
||||
NSISO2022JPStringEncoding,
|
||||
NSMacOSRomanStringEncoding,
|
||||
// NSProprietaryStringEncoding,
|
||||
// GNUstep additions
|
||||
NSISOCyrillicStringEncoding,
|
||||
NSKOI8RStringEncoding,
|
||||
NSISOLatin3StringEncoding,
|
||||
NSISOLatin4StringEncoding,
|
||||
NSISOArabicStringEncoding,
|
||||
NSISOGreekStringEncoding,
|
||||
NSISOHebrewStringEncoding,
|
||||
NSGB2312StringEncoding,
|
||||
NSGSM0338StringEncoding,
|
||||
NSBIG5StringEncoding,
|
||||
0
|
||||
};
|
||||
#else
|
||||
// Uncomment when implemented
|
||||
static NSStringEncoding _availableEncodings[] = {
|
||||
NSASCIIStringEncoding,
|
||||
NSNEXTSTEPStringEncoding,
|
||||
// NSJapaneseEUCStringEncoding,
|
||||
// NSUTF8StringEncoding,
|
||||
NSISOLatin1StringEncoding,
|
||||
// NSSymbolStringEncoding,
|
||||
// NSNonLossyASCIIStringEncoding,
|
||||
// NSShiftJISStringEncoding,
|
||||
NSISOLatin2StringEncoding,
|
||||
NSUnicodeStringEncoding,
|
||||
// NSWindowsCP1251StringEncoding,
|
||||
// NSWindowsCP1252StringEncoding,
|
||||
// NSWindowsCP1253StringEncoding,
|
||||
// NSWindowsCP1254StringEncoding,
|
||||
// NSWindowsCP1250StringEncoding,
|
||||
// NSISO2022JPStringEncoding,
|
||||
// NSMacOSRomanStringEncoding,
|
||||
// NSProprietaryStringEncoding,
|
||||
// GNUstep additions
|
||||
NSISOCyrillicStringEncoding,
|
||||
// NSKOI8RStringEncoding,
|
||||
// NSISOLatin3StringEncoding,
|
||||
// NSISOLatin4StringEncoding,
|
||||
// NSISOArabicStringEncoding,
|
||||
// NSISOGreekStringEncoding,
|
||||
// NSISOHebrewStringEncoding,
|
||||
// NSGB2312StringEncoding,
|
||||
NSGSM0338StringEncoding,
|
||||
NSBIG5StringEncoding,
|
||||
0
|
||||
};
|
||||
#endif
|
||||
|
||||
struct _strenc_ {NSStringEncoding enc; char *ename;};
|
||||
const struct _strenc_ str_encoding_table[]=
|
||||
{
|
||||
{NSASCIIStringEncoding,"NSASCIIStringEncoding"},
|
||||
{NSNEXTSTEPStringEncoding,"NSNEXTSTEPStringEncoding"},
|
||||
{NSJapaneseEUCStringEncoding, "NSJapaneseEUCStringEncoding"},
|
||||
{NSUTF8StringEncoding,"NSUTF8StringEncoding"},
|
||||
{NSISOLatin1StringEncoding,"NSISOLatin1StringEncoding"},
|
||||
{NSSymbolStringEncoding,"NSSymbolStringEncoding"},
|
||||
{NSNonLossyASCIIStringEncoding,"NSNonLossyASCIIStringEncoding"},
|
||||
{NSShiftJISStringEncoding,"NSShiftJISStringEncoding"},
|
||||
{NSISOLatin2StringEncoding,"NSISOLatin2StringEncoding"},
|
||||
{NSUnicodeStringEncoding, "NSUnicodeStringEncoding"},
|
||||
{NSWindowsCP1251StringEncoding,"NSWindowsCP1251StringEncoding"},
|
||||
{NSWindowsCP1252StringEncoding,"NSWindowsCP1252StringEncoding"},
|
||||
{NSWindowsCP1253StringEncoding,"NSWindowsCP1253StringEncoding"},
|
||||
{NSWindowsCP1254StringEncoding,"NSWindowsCP1254StringEncoding"},
|
||||
{NSWindowsCP1250StringEncoding,"NSWindowsCP1250StringEncoding"},
|
||||
{NSISO2022JPStringEncoding,"NSISO2022JPStringEncoding "},
|
||||
{NSMacOSRomanStringEncoding, "NSMacOSRomanStringEncoding"},
|
||||
{NSProprietaryStringEncoding, "NSProprietaryStringEncoding"},
|
||||
|
||||
// GNUstep additions
|
||||
{NSISOCyrillicStringEncoding,"NSISOCyrillicStringEncoding"},
|
||||
{NSKOI8RStringEncoding, "NSKOI8RStringEncoding"},
|
||||
{NSISOLatin3StringEncoding, "NSISOLatin3StringEncoding"},
|
||||
{NSISOLatin4StringEncoding, "NSISOLatin4StringEncoding"},
|
||||
{NSISOArabicStringEncoding, "NSISOArabicStringEncoding"},
|
||||
{NSISOGreekStringEncoding, "NSISOGreekStringEncoding"},
|
||||
{NSISOHebrewStringEncoding, "NSISOHebrewStringEncoding"},
|
||||
{NSISOLatin5StringEncoding, "NSISOLatin5StringEncoding"},
|
||||
{NSISOLatin6StringEncoding, "NSISOLatin6StringEncoding"},
|
||||
{NSISOLatin7StringEncoding, "NSISOLatin7StringEncoding"},
|
||||
{NSISOLatin8StringEncoding, "NSISOLatin8StringEncoding"},
|
||||
{NSISOLatin9StringEncoding, "NSISOLatin9StringEncoding"},
|
||||
{NSUTF7StringEncoding, "NSUTF7StringEncoding"},
|
||||
{NSGB2312StringEncoding, "NSGB2312StringEncoding"},
|
||||
{NSGSM0338StringEncoding, "NSGSM0338StringEncoding"},
|
||||
{NSBIG5StringEncoding, "NSBIG5StringEncoding"},
|
||||
|
||||
{0, "Unknown encoding"}
|
||||
};
|
||||
|
||||
|
||||
|
||||
NSStringEncoding *GetAvailableEncodings()
|
||||
{
|
||||
// FIXME: This should check which iconv definitions are available and
|
||||
// add them to the availble encodings
|
||||
return _availableEncodings;
|
||||
}
|
||||
|
||||
NSStringEncoding
|
||||
GetDefEncoding()
|
||||
{
|
||||
if (defEnc == GSUndefinedEncoding)
|
||||
{
|
||||
char *encoding;
|
||||
unsigned int count;
|
||||
NSStringEncoding tmp;
|
||||
NSStringEncoding *availableEncodings;
|
||||
|
||||
availableEncodings = GetAvailableEncodings();
|
||||
|
||||
encoding = getenv("GNUSTEP_STRING_ENCODING");
|
||||
if (encoding != 0)
|
||||
{
|
||||
count = 0;
|
||||
while (str_encoding_table[count].enc
|
||||
&& strcmp(str_encoding_table[count].ename,encoding))
|
||||
{
|
||||
count++;
|
||||
}
|
||||
if (str_encoding_table[count].enc)
|
||||
{
|
||||
defEnc = str_encoding_table[count].enc;
|
||||
if ((defEnc == NSUnicodeStringEncoding)
|
||||
|| (defEnc == NSUTF8StringEncoding)
|
||||
|| (defEnc == NSSymbolStringEncoding))
|
||||
{
|
||||
fprintf(stderr, "WARNING: %s - encoding not supported as "
|
||||
"default c string encoding.\n", encoding);
|
||||
fprintf(stderr,
|
||||
"NSISOLatin1StringEncoding set as default.\n");
|
||||
defEnc = NSISOLatin1StringEncoding;
|
||||
}
|
||||
else /*encoding should be supported but is it implemented?*/
|
||||
{
|
||||
count = 0;
|
||||
tmp = 0;
|
||||
while (availableEncodings[count] != 0)
|
||||
{
|
||||
if (defEnc != availableEncodings[count])
|
||||
{
|
||||
tmp = 0;
|
||||
}
|
||||
else
|
||||
{
|
||||
tmp = defEnc;
|
||||
break;
|
||||
}
|
||||
count++;
|
||||
}
|
||||
if (tmp == 0 && defEnc != NSISOLatin1StringEncoding)
|
||||
{
|
||||
fprintf(stderr,
|
||||
"WARNING: %s - encoding not yet implemented.\n",
|
||||
encoding);
|
||||
fprintf(stderr,
|
||||
"NSISOLatin1StringEncoding set as default.\n");
|
||||
defEnc = NSISOLatin1StringEncoding;
|
||||
}
|
||||
}
|
||||
}
|
||||
else /* encoding not found */
|
||||
{
|
||||
fprintf(stderr,
|
||||
"WARNING: %s - encoding not supported.\n", encoding);
|
||||
fprintf(stderr, "NSISOLatin1StringEncoding set as default.\n");
|
||||
defEnc = NSISOLatin1StringEncoding;
|
||||
}
|
||||
}
|
||||
else /* environment var not found */
|
||||
{
|
||||
/* shouldn't be required. It really should be in UserDefaults - asf */
|
||||
//fprintf(stderr, "WARNING: GNUSTEP_STRING_ENCODING environment");
|
||||
//fprintf(stderr, " variable not found.\n");
|
||||
//fprintf(stderr, "NSISOLatin1StringEncoding set as default.\n");
|
||||
defEnc = NSISOLatin1StringEncoding;
|
||||
}
|
||||
}
|
||||
return defEnc;
|
||||
}
|
||||
|
||||
NSString*
|
||||
GetEncodingName(NSStringEncoding encoding)
|
||||
{
|
||||
unsigned int count=0;
|
||||
|
||||
while (str_encoding_table[count].enc
|
||||
&& (str_encoding_table[count].enc != encoding))
|
||||
{
|
||||
count++;
|
||||
}
|
||||
|
||||
return [NSString stringWithCString: str_encoding_table[count].ename];
|
||||
}
|
||||
|
||||
#ifdef HAVE_ICONV
|
||||
|
||||
/* Check to see what type of internal unicode format the library supports */
|
||||
static const char *
|
||||
internal_unicode_enc()
|
||||
|
@ -316,79 +89,302 @@ internal_unicode_enc()
|
|||
return unicode_enc;
|
||||
}
|
||||
|
||||
static const char *
|
||||
iconv_stringforencoding(NSStringEncoding enc)
|
||||
#endif
|
||||
|
||||
typedef unsigned char unc;
|
||||
static NSStringEncoding defEnc = GSUndefinedEncoding;
|
||||
static NSStringEncoding *_availableEncodings = 0;
|
||||
|
||||
struct _strenc_ {
|
||||
NSStringEncoding enc; // Constant representing the encoding.
|
||||
const char *ename; // ASCII string representation of name.
|
||||
const char *iconv; /* Iconv name of encoding. If this
|
||||
* is nul, we cannot use iconv to
|
||||
* perform conversions to/from this
|
||||
* encoding.
|
||||
*/
|
||||
BOOL eightBit; /* Flag to say whether this encoding
|
||||
* can be stored in a byte array ...
|
||||
* ie whether the encoding consists
|
||||
* entirely of single byte charcters
|
||||
* and the first 128 are identical to
|
||||
* the ASCII character set.
|
||||
*/
|
||||
BOOL supported; /* Is this supported? Some encodings
|
||||
* have builtin conversion to/from
|
||||
* unicode, but for others we must
|
||||
* check with iconv to see if it
|
||||
* supports them on this platform.
|
||||
*/
|
||||
};
|
||||
|
||||
/*
|
||||
* The str_encoding_table is a compact representation of all the string
|
||||
* encoding information we might need. It gets modified at runtime.
|
||||
*/
|
||||
static struct _strenc_ str_encoding_table[] = {
|
||||
{NSASCIIStringEncoding,"NSASCIIStringEncoding","ASCII",1,1},
|
||||
{NSNEXTSTEPStringEncoding,"NSNEXTSTEPStringEncoding","NEXTSTEP",1,1},
|
||||
{NSJapaneseEUCStringEncoding, "NSJapaneseEUCStringEncoding","EUC-JP",0,0},
|
||||
{NSUTF8StringEncoding,"NSUTF8StringEncoding","UTF-8",0,0},
|
||||
{NSISOLatin1StringEncoding,"NSISOLatin1StringEncoding","ISO-8859-1",1,1},
|
||||
{NSSymbolStringEncoding,"NSSymbolStringEncoding",0,0,0},
|
||||
{NSNonLossyASCIIStringEncoding,"NSNonLossyASCIIStringEncoding",0,1,1},
|
||||
{NSShiftJISStringEncoding,"NSShiftJISStringEncoding","SHIFT-JIS",0,0},
|
||||
{NSISOLatin2StringEncoding,"NSISOLatin2StringEncoding","ISO-8859-2",1,1},
|
||||
{NSUnicodeStringEncoding, "NSUnicodeStringEncoding",0,0,1},
|
||||
{NSWindowsCP1251StringEncoding,"NSWindowsCP1251StringEncoding","CP1251",0,0},
|
||||
{NSWindowsCP1252StringEncoding,"NSWindowsCP1252StringEncoding","CP1252",0,0},
|
||||
{NSWindowsCP1253StringEncoding,"NSWindowsCP1253StringEncoding","CP1253",0,0},
|
||||
{NSWindowsCP1254StringEncoding,"NSWindowsCP1254StringEncoding","CP1254",0,0},
|
||||
{NSWindowsCP1250StringEncoding,"NSWindowsCP1250StringEncoding","CP1250",0,0},
|
||||
{NSISO2022JPStringEncoding,"NSISO2022JPStringEncoding","ISO-2022-JP",0,0},
|
||||
{NSMacOSRomanStringEncoding, "NSMacOSRomanStringEncoding","MACINTOSH",0,0},
|
||||
{NSProprietaryStringEncoding, "NSProprietaryStringEncoding",0,0,0},
|
||||
|
||||
// GNUstep additions
|
||||
{NSISOCyrillicStringEncoding,"NSISOCyrillicStringEncoding","ISO-8859-5",0,1},
|
||||
{NSKOI8RStringEncoding, "NSKOI8RStringEncoding","KOI8-R",0,0},
|
||||
{NSISOLatin3StringEncoding, "NSISOLatin3StringEncoding","ISO-8859-3",0,0},
|
||||
{NSISOLatin4StringEncoding, "NSISOLatin4StringEncoding","ISO-8859-4",0,0},
|
||||
{NSISOArabicStringEncoding, "NSISOArabicStringEncoding","ISO-8859-6",0,0},
|
||||
{NSISOGreekStringEncoding, "NSISOGreekStringEncoding","ISO-8859-7",0,0},
|
||||
{NSISOHebrewStringEncoding, "NSISOHebrewStringEncoding","ISO-8859-8",0,0},
|
||||
{NSISOLatin5StringEncoding, "NSISOLatin5StringEncoding","ISO-8859-9",0,0},
|
||||
{NSISOLatin6StringEncoding, "NSISOLatin6StringEncoding","ISO-8859-10",0,0},
|
||||
{NSISOLatin7StringEncoding, "NSISOLatin7StringEncoding","ISO-8859-13",0,0},
|
||||
{NSISOLatin8StringEncoding, "NSISOLatin8StringEncoding","ISO-8859-14",0,0},
|
||||
{NSISOLatin9StringEncoding, "NSISOLatin9StringEncoding","ISO-8859-15",0,0},
|
||||
{NSUTF7StringEncoding, "NSUTF7StringEncoding",0,0,0},
|
||||
{NSGB2312StringEncoding, "NSGB2312StringEncoding","EUC-CN",0,0},
|
||||
{NSGSM0338StringEncoding, "NSGSM0338StringEncoding",0,0,1},
|
||||
{NSBIG5StringEncoding, "NSBIG5StringEncoding","BIG5",0,0},
|
||||
|
||||
{0,"Unknown encoding",0,0,0}
|
||||
};
|
||||
|
||||
static struct _strenc_ **encodingTable = 0;
|
||||
static unsigned encTableSize = 0;
|
||||
|
||||
NSStringEncoding *GetAvailableEncodings()
|
||||
{
|
||||
switch (enc)
|
||||
if (_availableEncodings == 0)
|
||||
{
|
||||
case NSASCIIStringEncoding:
|
||||
return "ASCII";
|
||||
case NSNEXTSTEPStringEncoding:
|
||||
return "NEXTSTEP";
|
||||
case NSISOLatin1StringEncoding:
|
||||
return "ISO-8859-1";
|
||||
case NSISOLatin2StringEncoding:
|
||||
return "ISO-8859-2";
|
||||
case NSUnicodeStringEncoding:
|
||||
return UNICODE_ENC;
|
||||
case NSJapaneseEUCStringEncoding:
|
||||
return "EUC-JP";
|
||||
case NSUTF8StringEncoding:
|
||||
return "UTF-8";
|
||||
case NSShiftJISStringEncoding:
|
||||
return "SHIFT-JIS";
|
||||
case NSWindowsCP1250StringEncoding:
|
||||
return "CP1250";
|
||||
case NSWindowsCP1251StringEncoding:
|
||||
return "CP1251";
|
||||
case NSWindowsCP1252StringEncoding:
|
||||
return "CP1252";
|
||||
case NSWindowsCP1253StringEncoding:
|
||||
return "CP1253";
|
||||
case NSWindowsCP1254StringEncoding:
|
||||
return "CP1254";
|
||||
case NSISO2022JPStringEncoding:
|
||||
return "ISO-2022-JP";
|
||||
case NSMacOSRomanStringEncoding:
|
||||
return "MACINTOSH";
|
||||
[gnustep_global_lock lock];
|
||||
if (_availableEncodings == 0)
|
||||
{
|
||||
NSStringEncoding *encodings;
|
||||
unsigned count;
|
||||
unsigned pos;
|
||||
unsigned i;
|
||||
|
||||
// GNUstep extensions
|
||||
case NSKOI8RStringEncoding:
|
||||
return "KOI8-R";
|
||||
case NSISOLatin3StringEncoding:
|
||||
return "ISO-8859-3";
|
||||
case NSISOLatin4StringEncoding:
|
||||
return "ISO-8859-4";
|
||||
case NSISOCyrillicStringEncoding:
|
||||
return "ISO-8859-5";
|
||||
case NSISOArabicStringEncoding:
|
||||
return "ISO-8859-6";
|
||||
case NSISOGreekStringEncoding:
|
||||
return "ISO-8859-7";
|
||||
case NSISOHebrewStringEncoding:
|
||||
return "ISO-8859-8";
|
||||
/*
|
||||
* We want to store pointers to our string encoding info in a
|
||||
* large table so we can do efficient lookup by encoding value.
|
||||
*/
|
||||
#define MAX_ENCODING 128
|
||||
count = sizeof(str_encoding_table) / sizeof(struct _strenc_);
|
||||
|
||||
case NSISOLatin5StringEncoding:
|
||||
return "ISO-8859-9";
|
||||
case NSISOLatin6StringEncoding:
|
||||
return "ISO-8859-10";
|
||||
case NSISOLatin7StringEncoding:
|
||||
return "ISO-8859-13";
|
||||
case NSISOLatin8StringEncoding:
|
||||
return "ISO-8859-14";
|
||||
case NSISOLatin9StringEncoding:
|
||||
return "ISO-8859-15";
|
||||
/*
|
||||
* First determine the largest encoding value and create a
|
||||
* large enough table of pointers.
|
||||
*/
|
||||
encTableSize = 0;
|
||||
for (i = 0; i < count; i++)
|
||||
{
|
||||
unsigned tmp = str_encoding_table[i].enc;
|
||||
|
||||
case NSGB2312StringEncoding:
|
||||
return "EUC-CN";
|
||||
if (tmp >= MAX_ENCODING)
|
||||
{
|
||||
fprintf(stderr, "ERROR ... illegal NSStringEncoding "
|
||||
"value in str_encoding_table. Ignored\n");
|
||||
}
|
||||
else if (tmp > encTableSize)
|
||||
{
|
||||
encTableSize = tmp;
|
||||
}
|
||||
}
|
||||
encodingTable = malloc((encTableSize+1)*sizeof(struct _strenc_ *));
|
||||
memset(encodingTable, 0, (encTableSize+1)*sizeof(struct _strenc_ *));
|
||||
|
||||
case NSBIG5StringEncoding:
|
||||
return "BIG5";
|
||||
default:
|
||||
return "";
|
||||
/*
|
||||
* Now set up the pointers at the correct location in the table.
|
||||
*/
|
||||
for (i = 0; i < count; i++)
|
||||
{
|
||||
unsigned tmp = str_encoding_table[i].enc;
|
||||
|
||||
if (tmp < MAX_ENCODING)
|
||||
{
|
||||
encodingTable[tmp] = &str_encoding_table[i];
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Now build up a list of supported encodings ... in the
|
||||
* format needed to support [NSStirng+availableStringEncodings]
|
||||
* Check to see what iconv support we have as we go along.
|
||||
* This is also the palce where we determine the name we use
|
||||
* for iconv to support unicode.
|
||||
*/
|
||||
encodings = objc_malloc(sizeof(NSStringEncoding) * count);
|
||||
pos = 0;
|
||||
for (i = 0; i < count; i++)
|
||||
{
|
||||
NSStringEncoding enc = str_encoding_table[i].enc;
|
||||
|
||||
if (enc == 0 || enc >= MAX_ENCODING)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
#ifdef HAVE_ICONV
|
||||
if (enc == NSUnicodeStringEncoding)
|
||||
{
|
||||
encodingTable[enc]->iconv = UNICODE_ENC;
|
||||
encodingTable[enc]->supported = 1;
|
||||
}
|
||||
if (encodingTable[enc]->supported == 0)
|
||||
{
|
||||
if (encodingTable[enc]->iconv == 0)
|
||||
{
|
||||
continue; // Not handled by iconv.
|
||||
}
|
||||
else
|
||||
{
|
||||
iconv_t c;
|
||||
|
||||
c = iconv_open(UNICODE_ENC, encodingTable[enc]->iconv);
|
||||
if (c == (iconv_t)-1)
|
||||
{
|
||||
continue; // Can't convert to unicode
|
||||
}
|
||||
iconv_close(c);
|
||||
c = iconv_open(encodingTable[enc]->iconv, UNICODE_ENC);
|
||||
if (c == (iconv_t)-1)
|
||||
{
|
||||
continue; // Can't convert from unicode
|
||||
}
|
||||
iconv_close(c);
|
||||
encodingTable[enc]->supported = 1;
|
||||
}
|
||||
}
|
||||
#else
|
||||
if (encodingTable[enc]->supported == 0)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
#endif
|
||||
encodings[pos++] = enc;
|
||||
}
|
||||
encodings[pos] = 0;
|
||||
_availableEncodings = encodings;
|
||||
}
|
||||
[gnustep_global_lock unlock];
|
||||
}
|
||||
return _availableEncodings;
|
||||
}
|
||||
|
||||
NSStringEncoding
|
||||
GetDefEncoding()
|
||||
{
|
||||
if (defEnc == GSUndefinedEncoding)
|
||||
{
|
||||
char *encoding;
|
||||
unsigned int count;
|
||||
NSStringEncoding *availableEncodings;
|
||||
|
||||
[gnustep_global_lock lock];
|
||||
if (defEnc != GSUndefinedEncoding)
|
||||
{
|
||||
[gnustep_global_lock unlock];
|
||||
return defEnc;
|
||||
}
|
||||
|
||||
availableEncodings = GetAvailableEncodings();
|
||||
|
||||
encoding = getenv("GNUSTEP_STRING_ENCODING");
|
||||
if (encoding != 0)
|
||||
{
|
||||
count = 0;
|
||||
while (str_encoding_table[count].enc
|
||||
&& strcmp(str_encoding_table[count].ename, encoding))
|
||||
{
|
||||
count++;
|
||||
}
|
||||
if (str_encoding_table[count].enc)
|
||||
{
|
||||
defEnc = str_encoding_table[count].enc;
|
||||
if (str_encoding_table[count].supported == 0)
|
||||
{
|
||||
fprintf(stderr, "WARNING: %s - encoding not implemented as "
|
||||
"default c string encoding.\n", encoding);
|
||||
fprintf(stderr,
|
||||
"NSISOLatin1StringEncoding set as default.\n");
|
||||
defEnc = NSISOLatin1StringEncoding;
|
||||
}
|
||||
}
|
||||
else /* encoding not found */
|
||||
{
|
||||
fprintf(stderr,
|
||||
"WARNING: %s - encoding not supported.\n", encoding);
|
||||
fprintf(stderr, "NSISOLatin1StringEncoding set as default.\n");
|
||||
defEnc = NSISOLatin1StringEncoding;
|
||||
}
|
||||
}
|
||||
else /* environment var not found */
|
||||
{
|
||||
/* shouldn't be required. It really should be in UserDefaults - asf */
|
||||
//fprintf(stderr, "WARNING: GNUSTEP_STRING_ENCODING environment");
|
||||
//fprintf(stderr, " variable not found.\n");
|
||||
//fprintf(stderr, "NSISOLatin1StringEncoding set as default.\n");
|
||||
defEnc = NSISOLatin1StringEncoding;
|
||||
}
|
||||
[gnustep_global_lock unlock];
|
||||
}
|
||||
return defEnc;
|
||||
}
|
||||
|
||||
BOOL
|
||||
GSIsByteEncoding(NSStringEncoding encoding)
|
||||
{
|
||||
GetAvailableEncodings();
|
||||
if (encoding == 0 || encoding >= encTableSize || encodingTable[encoding] == 0)
|
||||
{
|
||||
return NO;
|
||||
}
|
||||
return encodingTable[encoding]->eightBit;
|
||||
}
|
||||
|
||||
NSString*
|
||||
GSEncodingName(NSStringEncoding encoding)
|
||||
{
|
||||
GetAvailableEncodings();
|
||||
if (encoding == 0 || encoding >= encTableSize || encodingTable[encoding] == 0)
|
||||
{
|
||||
return @"Unknown encoding";
|
||||
}
|
||||
return [NSString stringWithCString: encodingTable[encoding]->ename];
|
||||
}
|
||||
|
||||
NSString*
|
||||
GetEncodingName(NSStringEncoding encoding)
|
||||
{
|
||||
return GSEncodingName(encoding);
|
||||
}
|
||||
|
||||
static const char *
|
||||
iconv_stringforencoding(NSStringEncoding encoding)
|
||||
{
|
||||
GetAvailableEncodings();
|
||||
if (encoding == 0 || encoding >= encTableSize || encodingTable[encoding] == 0)
|
||||
{
|
||||
return "";
|
||||
}
|
||||
return encodingTable[encoding]->iconv;
|
||||
}
|
||||
|
||||
#ifdef HAVE_ICONV
|
||||
|
||||
int
|
||||
iconv_cstrtoustr(unichar *u2, int size2, const char *s1, int size1,
|
||||
NSStringEncoding enc)
|
||||
|
@ -1368,7 +1364,8 @@ if (dst == 0) \
|
|||
} \
|
||||
else if (zone == 0) \
|
||||
{ \
|
||||
return NO; /* No buffer growth possible ... fail. */ \
|
||||
result = NO; /* No buffer growth possible ... fail. */ \
|
||||
break; \
|
||||
} \
|
||||
else \
|
||||
{ \
|
||||
|
@ -1397,7 +1394,8 @@ else \
|
|||
} \
|
||||
if (ptr == 0) \
|
||||
{ \
|
||||
return NO; /* Not enough memory */ \
|
||||
result = NO; /* Not enough memory */ \
|
||||
break; \
|
||||
} \
|
||||
bsize = grow / sizeof(unichar); \
|
||||
}
|
||||
|
@ -1441,6 +1439,11 @@ else \
|
|||
* <item>If GSUniTemporary is set, the function will return the results in
|
||||
* an autoreleased buffer rather than in a buffer that the caller must
|
||||
* release.</item>
|
||||
* <item>If GSUniBOM is set, the function will write the first unicode
|
||||
* character as a byte order marker.</item>
|
||||
* </list>
|
||||
* <item>If GSUniShortOk is set, the function will return a buffer containing
|
||||
* any decoded characters even if the whole conversion fails.</item>
|
||||
* </list>
|
||||
* <p>On return, the function result is a flag indicating success (YES)
|
||||
* or failure (NO), and on success, the value stored in size is the number
|
||||
|
@ -1464,9 +1467,11 @@ GSToUnicode(unichar **dst, unsigned int *size, const unsigned char *src,
|
|||
unsigned extra = (options & GSUniTerminate) ? sizeof(unichar) : 0;
|
||||
unichar base = 0;
|
||||
unichar *table = 0;
|
||||
BOOL result = YES;
|
||||
|
||||
if (slen == 0)
|
||||
{
|
||||
*size = 0;
|
||||
return YES;
|
||||
}
|
||||
|
||||
|
@ -1484,6 +1489,15 @@ GSToUnicode(unichar **dst, unsigned int *size, const unsigned char *src,
|
|||
bsize = *size;
|
||||
}
|
||||
|
||||
if (options & GSUniBOM)
|
||||
{
|
||||
while (dpos >= bsize)
|
||||
{
|
||||
GROW();
|
||||
}
|
||||
ptr[dpos++] = (unichar)0xFEFF; // Insert byte order marker.
|
||||
}
|
||||
|
||||
switch (enc)
|
||||
{
|
||||
case NSNonLossyASCIIStringEncoding:
|
||||
|
@ -1577,19 +1591,20 @@ tables:
|
|||
default:
|
||||
#ifdef HAVE_ICONV
|
||||
{
|
||||
iconv_t cd;
|
||||
char *inbuf;
|
||||
char *outbuf;
|
||||
size_t inbytesleft;
|
||||
size_t outbytesleft;
|
||||
size_t result;
|
||||
size_t rval;
|
||||
iconv_t cd;
|
||||
|
||||
cd = iconv_open(UNICODE_ENC, iconv_stringforencoding(enc));
|
||||
if (cd == (iconv_t)-1)
|
||||
{
|
||||
NSLog(@"No iconv for encoding %@ tried to use %s",
|
||||
GetEncodingName(enc), iconv_stringforencoding(enc));
|
||||
return NO;
|
||||
result = NO;
|
||||
break;
|
||||
}
|
||||
|
||||
inbuf = (char*)src;
|
||||
|
@ -1606,10 +1621,12 @@ tables:
|
|||
outbuf = (char*)&ptr[dpos];
|
||||
outbytesleft = (bsize - old) * sizeof(unichar);
|
||||
}
|
||||
result = iconv(cd, &inbuf, &inbytesleft, &outbuf, &outbytesleft);
|
||||
if (result == (size_t)-1 && errno != E2BIG)
|
||||
rval = iconv(cd, &inbuf, &inbytesleft, &outbuf, &outbytesleft);
|
||||
if (rval == (size_t)-1 && errno != E2BIG)
|
||||
{
|
||||
return NO;
|
||||
result = NO;
|
||||
iconv_close(cd);
|
||||
break;
|
||||
}
|
||||
dpos = (bsize * sizeof(unichar) - outbytesleft) / sizeof(unichar);
|
||||
}
|
||||
|
@ -1617,7 +1634,7 @@ tables:
|
|||
iconv_close(cd);
|
||||
}
|
||||
#else
|
||||
return NO;
|
||||
result = NO;
|
||||
#endif
|
||||
}
|
||||
|
||||
|
@ -1629,7 +1646,7 @@ tables:
|
|||
ptr[dpos] = (unichar)0;
|
||||
}
|
||||
*size = dpos;
|
||||
if (dst != 0)
|
||||
if (dst != 0 && (result == YES || (options & GSUniShortOk)))
|
||||
{
|
||||
if (options & GSUniTemporary)
|
||||
{
|
||||
|
@ -1670,14 +1687,14 @@ tables:
|
|||
{
|
||||
ptr = NSZoneRealloc(zone, ptr, bytes);
|
||||
}
|
||||
if (ptr == 0)
|
||||
{
|
||||
return NO;
|
||||
}
|
||||
}
|
||||
*dst = ptr;
|
||||
}
|
||||
return YES;
|
||||
else if (ptr != buf && ptr != *dst)
|
||||
{
|
||||
NSZoneFree(zone, ptr);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
#undef GROW
|
||||
|
@ -1696,7 +1713,8 @@ if (dst == 0) \
|
|||
} \
|
||||
else if (zone == 0) \
|
||||
{ \
|
||||
return NO; /* No buffer growth possible ... fail. */ \
|
||||
result = NO; /* No buffer growth possible ... fail. */ \
|
||||
break; \
|
||||
} \
|
||||
else \
|
||||
{ \
|
||||
|
@ -1724,7 +1742,8 @@ else \
|
|||
} \
|
||||
if (ptr == 0) \
|
||||
{ \
|
||||
return NO; /* Not enough memory */ \
|
||||
result = NO; /* Not enough memory */ \
|
||||
break; \
|
||||
} \
|
||||
bsize = grow; \
|
||||
}
|
||||
|
@ -1772,6 +1791,11 @@ else \
|
|||
* <item>If GSUniTemporary is set, the function will return the results in
|
||||
* an autoreleased buffer rather than in a buffer that the caller must
|
||||
* release.</item>
|
||||
* <item>If GSUniBOM is set, the function will read the first unicode
|
||||
* character as a byte order marker.</item>
|
||||
* <item>If GSUniShortOk is set, the function will return a buffer containing
|
||||
* any decoded characters even if the whole conversion fails.</item>
|
||||
* </list>
|
||||
* </list>
|
||||
* <p>On return, the function result is a flag indicating success (YES)
|
||||
* or failure (NO), and on success, the value stored in size is the number
|
||||
|
@ -1797,12 +1821,40 @@ GSFromUnicode(unsigned char **dst, unsigned int *size, const unichar *src,
|
|||
unichar base = 0;
|
||||
_ucc_ *table = 0;
|
||||
unsigned tsize = 0;
|
||||
BOOL swapped = NO;
|
||||
BOOL result = YES;
|
||||
|
||||
if (options & GSUniBOM)
|
||||
{
|
||||
unichar c;
|
||||
|
||||
if (slen == 0)
|
||||
{
|
||||
*size = 0;
|
||||
return NO; // Missing byte order marker.
|
||||
}
|
||||
c = *src++;
|
||||
slen--;
|
||||
if (c != 0xFEFF)
|
||||
{
|
||||
if (c == 0xFFFE)
|
||||
{
|
||||
swapped = YES;
|
||||
}
|
||||
else
|
||||
{
|
||||
*size = 0;
|
||||
return NO; // Illegal byte order marker.
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (slen == 0)
|
||||
{
|
||||
*size = 0;
|
||||
return YES;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* Ensure we have an initial buffer set up to decode data into.
|
||||
*/
|
||||
|
@ -1829,12 +1881,17 @@ GSFromUnicode(unsigned char **dst, unsigned int *size, const unichar *src,
|
|||
goto bases;
|
||||
|
||||
bases:
|
||||
if (strict == YES)
|
||||
if (strict == NO)
|
||||
{
|
||||
while (spos < slen)
|
||||
{
|
||||
unichar u = src[spos++];
|
||||
|
||||
if (swapped == YES)
|
||||
{
|
||||
u = ((u & 0xff00 >> 8) + ((u & 0x00ff) << 8));
|
||||
}
|
||||
|
||||
if (dpos >= bsize)
|
||||
{
|
||||
GROW();
|
||||
|
@ -1855,6 +1912,10 @@ bases:
|
|||
{
|
||||
unichar u = src[spos++];
|
||||
|
||||
if (swapped == YES)
|
||||
{
|
||||
u = ((u & 0xff00 >> 8) + ((u & 0x00ff) << 8));
|
||||
}
|
||||
if (dpos >= bsize)
|
||||
{
|
||||
GROW();
|
||||
|
@ -1865,7 +1926,8 @@ bases:
|
|||
}
|
||||
else
|
||||
{
|
||||
return NO;
|
||||
result = NO;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -1904,6 +1966,11 @@ tables:
|
|||
{
|
||||
unichar u = src[spos++];
|
||||
|
||||
if (swapped == YES)
|
||||
{
|
||||
u = ((u & 0xff00 >> 8) + ((u & 0x00ff) << 8));
|
||||
}
|
||||
|
||||
if (dpos >= bsize)
|
||||
{
|
||||
GROW();
|
||||
|
@ -1941,6 +2008,11 @@ tables:
|
|||
{
|
||||
unichar u = src[spos++];
|
||||
|
||||
if (swapped == YES)
|
||||
{
|
||||
u = ((u & 0xff00 >> 8) + ((u & 0x00ff) << 8));
|
||||
}
|
||||
|
||||
if (dpos >= bsize)
|
||||
{
|
||||
GROW();
|
||||
|
@ -1958,7 +2030,9 @@ tables:
|
|||
{
|
||||
if (++i >= tsize)
|
||||
{
|
||||
return NO;
|
||||
result = NO;
|
||||
spos = slen;
|
||||
break;
|
||||
}
|
||||
}
|
||||
ptr[dpos++] = table[--i].to;
|
||||
|
@ -1971,8 +2045,13 @@ tables:
|
|||
while (spos < slen)
|
||||
{
|
||||
unichar u = src[spos++];
|
||||
int res;
|
||||
int i = 0;
|
||||
int res;
|
||||
int i = 0;
|
||||
|
||||
if (swapped == YES)
|
||||
{
|
||||
u = ((u & 0xff00 >> 8) + ((u & 0x00ff) << 8));
|
||||
}
|
||||
|
||||
if (dpos >= bsize)
|
||||
{
|
||||
|
@ -1994,7 +2073,8 @@ tables:
|
|||
{
|
||||
if (strict == YES)
|
||||
{
|
||||
return NO;
|
||||
result = NO;
|
||||
break;
|
||||
}
|
||||
for (i = 0; i < GSM0338_esize; i++)
|
||||
{
|
||||
|
@ -2026,14 +2106,15 @@ tables:
|
|||
char *outbuf;
|
||||
size_t inbytesleft;
|
||||
size_t outbytesleft;
|
||||
size_t result;
|
||||
size_t rval;
|
||||
|
||||
cd = iconv_open(iconv_stringforencoding(enc), UNICODE_ENC);
|
||||
if (cd == (iconv_t)-1)
|
||||
{
|
||||
NSLog(@"No iconv for encoding %@ tried to use %s",
|
||||
GetEncodingName(enc), iconv_stringforencoding(enc));
|
||||
return NO;
|
||||
result = NO;
|
||||
break;
|
||||
}
|
||||
|
||||
inbuf = (char*)src;
|
||||
|
@ -2050,14 +2131,15 @@ tables:
|
|||
outbuf = (char*)&ptr[dpos];
|
||||
outbytesleft = (bsize - old);
|
||||
}
|
||||
result = iconv(cd, &inbuf, &inbytesleft, &outbuf, &outbytesleft);
|
||||
if (result == (size_t)-1 && errno != E2BIG)
|
||||
rval = iconv(cd, &inbuf, &inbytesleft, &outbuf, &outbytesleft);
|
||||
if (rval == (size_t)-1 && errno != E2BIG)
|
||||
{
|
||||
if (errno == EILSEQ)
|
||||
{
|
||||
if (strict == YES)
|
||||
{
|
||||
return NO;
|
||||
result = NO;
|
||||
break;
|
||||
}
|
||||
/*
|
||||
* If we are allowing lossy conversion, we replace any
|
||||
|
@ -2073,7 +2155,8 @@ tables:
|
|||
}
|
||||
else if (errno != E2BIG)
|
||||
{
|
||||
return NO;
|
||||
result = NO;
|
||||
break;
|
||||
}
|
||||
}
|
||||
dpos = bsize - outbytesleft;
|
||||
|
@ -2082,7 +2165,8 @@ tables:
|
|||
iconv_close(cd);
|
||||
}
|
||||
#else
|
||||
return NO;
|
||||
result = NO;
|
||||
break;
|
||||
#endif
|
||||
}
|
||||
|
||||
|
@ -2094,7 +2178,7 @@ tables:
|
|||
ptr[dpos] = (unsigned char)0;
|
||||
}
|
||||
*size = dpos;
|
||||
if (dst != 0)
|
||||
if (dst != 0 && (result == YES || (options & GSUniShortOk)))
|
||||
{
|
||||
if (options & GSUniTemporary)
|
||||
{
|
||||
|
@ -2135,14 +2219,14 @@ tables:
|
|||
{
|
||||
ptr = NSZoneRealloc(zone, ptr, bytes);
|
||||
}
|
||||
if (ptr == 0)
|
||||
{
|
||||
return NO;
|
||||
}
|
||||
}
|
||||
*dst = ptr;
|
||||
}
|
||||
return YES;
|
||||
else if (ptr != buf && ptr != *dst)
|
||||
{
|
||||
NSZoneFree(zone, ptr);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
#undef GROW
|
||||
|
|
|
@ -11,7 +11,7 @@
|
|||
void
|
||||
print_string(NSString* s)
|
||||
{
|
||||
printf("The string [%s], length %d\n", [s cString], [s length]);
|
||||
printf("The string [%s], length %d\n", [s lossyCString], [s length]);
|
||||
}
|
||||
|
||||
#include <Foundation/NSString.h>
|
||||
|
@ -24,21 +24,31 @@ int main()
|
|||
id s = @"This is a test string";
|
||||
id s2, s3;
|
||||
int a;
|
||||
unichar uc[6] = { '1', '2', '.', '3', '4', 0};
|
||||
unichar u0[5] = { 0xFE66, 'a', 'b', 'c', 'd'};
|
||||
unichar u1[6] = { '1', '2', '.', '3', '4', 0xFE66};
|
||||
unichar u2[7] = { 'a', 'b', 0xFE66, 'a', 'b', 'c', 'd'};
|
||||
NSString *us0 = [NSString stringWithCharacters: u0 length: 5];
|
||||
NSString *us1 = [NSString stringWithCharacters: u1 length: 6];
|
||||
NSString *us2 = [NSString stringWithCharacters: u2 length: 7];
|
||||
NSMutableString *fo = [NSMutableString stringWithString: @"abcdef"];
|
||||
NSMutableString *f1 = [NSMutableString stringWithString: @"ab"];
|
||||
|
||||
NSMutableString *fo = [NSMutableString stringWithString: @"abcdefg"];
|
||||
NS_DURING
|
||||
[fo replaceCharactersInRange: [fo rangeOfString: @"xx"] withString: @"aa"];
|
||||
[fo replaceCharactersInRange: [fo rangeOfString: @"xx"] withString: us1];
|
||||
NS_HANDLER
|
||||
printf("Caught exception during string replacement (expected)\n");
|
||||
NS_ENDHANDLER
|
||||
|
||||
[f1 appendString: us0];
|
||||
print_string(f1);
|
||||
printf("%d\n", [f1 isEqual: us2]);
|
||||
|
||||
print_string(s);
|
||||
|
||||
s2 = NSStringFromPoint(NSMakePoint(1.374, 5.100));
|
||||
print_string(s2);
|
||||
|
||||
printf("%f", [[NSString stringWithCharacters: uc length: 5] floatValue]);
|
||||
printf("%f", [[NSString stringWithCharacters: u1 length: 5] floatValue]);
|
||||
|
||||
s2 = [s copy];
|
||||
print_string(s2);
|
||||
|
@ -69,6 +79,23 @@ int main()
|
|||
NSLog(@"A string with precision %d is :%.*@:", a, a, @"String");
|
||||
#endif
|
||||
|
||||
{
|
||||
NSMutableString *base = [@"hello" mutableCopy];
|
||||
NSString *ext = [@"\"\\UFE66???\"" propertyList];
|
||||
NSString *want = [@"\"hello\\UFE66???\"" propertyList];
|
||||
int i;
|
||||
|
||||
[base appendString: ext];
|
||||
printf("%u\n", [base length]);
|
||||
printf("%u\n", [ext length]);
|
||||
printf("%u\n", [want length]);
|
||||
for (i = 0; i < 4; i++)
|
||||
printf("%x\n", [ext characterAtIndex: i]);
|
||||
for (i = 0; i < 9; i++)
|
||||
printf("%x,%x\n", [base characterAtIndex: i], [want characterAtIndex: i]);
|
||||
|
||||
printf("%u\n", [want isEqual: base]);
|
||||
}
|
||||
[arp release];
|
||||
exit(0);
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue