2001-12-17 14:31:42 +00:00
|
|
|
/** Support functions for Unicode implementation
|
2000-09-30 18:53:27 +00:00
|
|
|
Function to determine default c string encoding for
|
|
|
|
GNUstep based on GNUSTEP_STRING_ENCODING environment variable.
|
|
|
|
|
1997-05-03 18:04:21 +00:00
|
|
|
Copyright (C) 1997 Free Software Foundation, Inc.
|
|
|
|
|
1999-09-13 04:11:39 +00:00
|
|
|
Written by: Stevo Crvenkovski < stevo@btinternet.com >
|
1997-05-03 18:04:21 +00:00
|
|
|
Date: March 1997
|
2000-09-30 18:53:27 +00:00
|
|
|
Merged with GetDefEncoding.m and iconv by: Fred Kiefer <fredkiefer@gmx.de>
|
|
|
|
Date: September 2000
|
2002-05-11 05:50:19 +00:00
|
|
|
Rewrite by: Richard Frith-Macdonald <rfm@gnu.org>
|
1997-05-03 18:04:21 +00:00
|
|
|
|
|
|
|
This file is part of the GNUstep Base Library.
|
|
|
|
|
|
|
|
This library is free software; you can redistribute it and/or
|
2007-09-14 11:36:11 +00:00
|
|
|
modify it under the terms of the GNU Lesser General Public
|
1997-05-03 18:04:21 +00:00
|
|
|
License as published by the Free Software Foundation; either
|
2008-06-08 10:38:33 +00:00
|
|
|
version 2 of the License, or (at your option) any later version.
|
2005-02-22 11:22:44 +00:00
|
|
|
|
1997-05-03 18:04:21 +00:00
|
|
|
This library is distributed in the hope that it will be useful,
|
|
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
|
|
Library General Public License for more details.
|
|
|
|
|
2007-09-14 11:36:11 +00:00
|
|
|
You should have received a copy of the GNU Lesser General Public
|
1997-05-03 18:04:21 +00:00
|
|
|
License along with this library; if not, write to the Free
|
2006-10-09 14:00:01 +00:00
|
|
|
Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
|
|
|
|
Boston, MA 02111 USA.
|
2005-02-22 11:22:44 +00:00
|
|
|
*/
|
1997-05-03 18:04:21 +00:00
|
|
|
|
2003-01-26 19:38:42 +00:00
|
|
|
#include "config.h"
|
2003-02-03 04:15:27 +00:00
|
|
|
#ifndef NeXT_Foundation_LIBRARY
|
2002-10-22 03:22:30 +00:00
|
|
|
#include <Foundation/NSArray.h>
|
|
|
|
#include <Foundation/NSBundle.h>
|
|
|
|
#include <Foundation/NSDictionary.h>
|
2006-10-20 10:56:27 +00:00
|
|
|
#include <Foundation/NSError.h>
|
2003-05-05 13:36:25 +00:00
|
|
|
#include <Foundation/NSException.h>
|
1997-05-03 18:04:21 +00:00
|
|
|
#include <Foundation/NSString.h>
|
2002-03-16 09:54:50 +00:00
|
|
|
#include <Foundation/NSLock.h>
|
2002-10-22 03:22:30 +00:00
|
|
|
#include <Foundation/NSPathUtilities.h>
|
2003-02-03 04:15:27 +00:00
|
|
|
#else
|
|
|
|
#include <Foundation/Foundation.h>
|
|
|
|
#endif
|
2007-04-01 11:12:12 +00:00
|
|
|
|
2003-10-30 13:44:55 +00:00
|
|
|
#include "GNUstepBase/GSLock.h"
|
2006-10-29 09:30:07 +00:00
|
|
|
#include "GNUstepBase/GSMime.h"
|
2003-09-13 22:42:50 +00:00
|
|
|
#include "GNUstepBase/GSCategories.h"
|
2003-07-31 23:49:32 +00:00
|
|
|
#include "GNUstepBase/Unicode.h"
|
2007-04-01 11:12:12 +00:00
|
|
|
|
2006-10-09 14:00:01 +00:00
|
|
|
#include "../GSPrivate.h"
|
2000-09-30 18:53:27 +00:00
|
|
|
#include <stdio.h>
|
|
|
|
#include <stdlib.h>
|
2002-10-22 03:22:30 +00:00
|
|
|
#include <string.h>
|
1997-05-03 18:04:21 +00:00
|
|
|
|
2004-02-18 04:03:24 +00:00
|
|
|
#if HAVE_LANGINFO_CODESET
|
|
|
|
#include <langinfo.h>
|
|
|
|
#endif
|
|
|
|
|
2002-05-11 05:21:46 +00:00
|
|
|
typedef struct {unichar from; unsigned char to;} _ucc_;
|
1997-05-03 18:04:21 +00:00
|
|
|
|
2006-10-09 14:00:01 +00:00
|
|
|
#include "unicode/cyrillic.h"
|
|
|
|
#include "unicode/latin2.h"
|
|
|
|
#include "unicode/latin9.h"
|
|
|
|
#include "unicode/nextstep.h"
|
|
|
|
#include "unicode/caseconv.h"
|
|
|
|
#include "unicode/cop.h"
|
|
|
|
#include "unicode/decomp.h"
|
|
|
|
#include "unicode/gsm0338.h"
|
|
|
|
#include "unicode/thai.h"
|
1997-05-03 18:04:21 +00:00
|
|
|
|
2000-09-12 23:12:06 +00:00
|
|
|
#ifdef HAVE_ICONV
|
2001-05-24 04:00:15 +00:00
|
|
|
#ifdef HAVE_GICONV_H
|
|
|
|
#include <giconv.h>
|
|
|
|
#else
|
2000-09-12 23:12:06 +00:00
|
|
|
#include <iconv.h>
|
2001-05-24 04:00:15 +00:00
|
|
|
#endif
|
2000-09-12 23:12:06 +00:00
|
|
|
#include <errno.h>
|
|
|
|
|
2002-03-16 09:54:50 +00:00
|
|
|
/*
|
|
|
|
* The whole of the GNUstep code stores UNICODE in internal byte order,
|
2003-09-10 08:14:45 +00:00
|
|
|
* so we do the same. We have switched to using UTF16 so the defines here
|
2003-09-11 05:40:55 +00:00
|
|
|
* recognise this. We use the endian specific versions of UTF16 so that
|
|
|
|
* iconv does not introduce a BOM where we do not want it.
|
|
|
|
* If UTF16 does not work, we revert to UCS-2-INTERNAL.
|
2002-03-16 09:54:50 +00:00
|
|
|
*/
|
2000-09-12 23:12:06 +00:00
|
|
|
#ifdef WORDS_BIGENDIAN
|
2003-09-10 08:14:45 +00:00
|
|
|
#define UNICODE_UTF16 "UTF-16BE"
|
2008-07-11 09:10:46 +00:00
|
|
|
#define UNICODE_UTF32 "UTF-32BE"
|
2001-09-21 16:25:37 +00:00
|
|
|
#define UNICODE_INT "UNICODEBIG"
|
2000-09-12 23:12:06 +00:00
|
|
|
#else
|
2003-09-10 08:14:45 +00:00
|
|
|
#define UNICODE_UTF16 "UTF-16LE"
|
2008-07-11 09:10:46 +00:00
|
|
|
#define UNICODE_UTF32 "UTF-32LE"
|
2001-09-21 16:13:11 +00:00
|
|
|
#define UNICODE_INT "UNICODELITTLE"
|
2000-09-12 23:12:06 +00:00
|
|
|
#endif
|
|
|
|
|
2001-09-21 16:13:11 +00:00
|
|
|
#define UNICODE_ENC ((unicode_enc) ? unicode_enc : internal_unicode_enc())
|
|
|
|
|
|
|
|
static const char *unicode_enc = NULL;
|
|
|
|
|
2002-03-16 09:54:50 +00:00
|
|
|
/* Check to see what type of internal unicode format the library supports */
|
|
|
|
static const char *
|
2002-11-09 07:00:37 +00:00
|
|
|
internal_unicode_enc(void)
|
2002-03-16 09:54:50 +00:00
|
|
|
{
|
|
|
|
iconv_t conv;
|
2003-09-11 05:40:55 +00:00
|
|
|
|
2003-09-10 08:14:45 +00:00
|
|
|
unicode_enc = UNICODE_UTF16;
|
|
|
|
conv = iconv_open(unicode_enc, "ASCII");
|
|
|
|
if (conv != (iconv_t)-1)
|
|
|
|
{
|
|
|
|
iconv_close(conv);
|
|
|
|
return unicode_enc;
|
|
|
|
}
|
2007-05-14 05:11:07 +00:00
|
|
|
fprintf(stderr, "Could not initialise iconv() for UTF16, using UCS-2\n");
|
|
|
|
fprintf(stderr, "Using characters outside 16 bits may give bad results.\n");
|
2003-09-10 08:14:45 +00:00
|
|
|
|
2002-03-16 09:54:50 +00:00
|
|
|
unicode_enc = UNICODE_INT;
|
|
|
|
conv = iconv_open(unicode_enc, "ASCII");
|
|
|
|
if (conv != (iconv_t)-1)
|
|
|
|
{
|
|
|
|
iconv_close(conv);
|
|
|
|
return unicode_enc;
|
|
|
|
}
|
|
|
|
unicode_enc = "UCS-2-INTERNAL";
|
|
|
|
conv = iconv_open(unicode_enc, "ASCII");
|
|
|
|
if (conv != (iconv_t)-1)
|
|
|
|
{
|
|
|
|
iconv_close(conv);
|
|
|
|
return unicode_enc;
|
|
|
|
}
|
|
|
|
unicode_enc = "UCS-2";
|
|
|
|
/* This had better work */
|
|
|
|
return unicode_enc;
|
|
|
|
}
|
2000-09-30 18:53:27 +00:00
|
|
|
|
2008-09-26 10:05:49 +00:00
|
|
|
#else
|
|
|
|
#define UNICODE_UTF32 ""
|
2005-02-22 11:22:44 +00:00
|
|
|
#endif
|
2000-09-30 18:53:27 +00:00
|
|
|
|
2003-11-01 07:09:12 +00:00
|
|
|
static GSLazyLock *local_lock = nil;
|
2003-10-30 20:51:44 +00:00
|
|
|
|
2000-09-30 18:53:27 +00:00
|
|
|
typedef unsigned char unc;
|
|
|
|
static NSStringEncoding defEnc = GSUndefinedEncoding;
|
2007-03-14 16:04:13 +00:00
|
|
|
static NSStringEncoding natEnc = GSUndefinedEncoding;
|
2002-03-16 09:54:50 +00:00
|
|
|
static NSStringEncoding *_availableEncodings = 0;
|
|
|
|
|
|
|
|
struct _strenc_ {
|
|
|
|
NSStringEncoding enc; // Constant representing the encoding.
|
|
|
|
const char *ename; // ASCII string representation of name.
|
|
|
|
const char *iconv; /* Iconv name of encoding. If this
|
2002-03-18 08:31:39 +00:00
|
|
|
* is the empty string, we cannot use
|
|
|
|
* iconv perform conversions to/from
|
|
|
|
* this encoding.
|
2002-08-20 15:07:58 +00:00
|
|
|
* NB. do not put a null pointer in this
|
2002-03-18 08:31:39 +00:00
|
|
|
* field in the table, use "" instread.
|
2002-03-16 09:54:50 +00:00
|
|
|
*/
|
|
|
|
BOOL eightBit; /* Flag to say whether this encoding
|
|
|
|
* can be stored in a byte array ...
|
|
|
|
* ie whether the encoding consists
|
2002-12-17 12:06:39 +00:00
|
|
|
* entirely of single byte characters
|
2002-03-16 09:54:50 +00:00
|
|
|
* and the first 128 are identical to
|
|
|
|
* the ASCII character set.
|
|
|
|
*/
|
2002-08-28 13:41:54 +00:00
|
|
|
char supported; /* Is this supported? Some encodings
|
2002-03-16 09:54:50 +00:00
|
|
|
* have builtin conversion to/from
|
|
|
|
* unicode, but for others we must
|
|
|
|
* check with iconv to see if it
|
|
|
|
* supports them on this platform.
|
2002-08-28 13:41:54 +00:00
|
|
|
* A one means supported.
|
|
|
|
* A negative means unsupported.
|
|
|
|
* A zero means not yet checked.
|
2002-03-16 09:54:50 +00:00
|
|
|
*/
|
2006-02-01 06:18:41 +00:00
|
|
|
const char *lossy; /* Iconv name for lossy encoding */
|
2002-03-16 09:54:50 +00:00
|
|
|
};
|
2000-09-30 18:53:27 +00:00
|
|
|
|
2001-08-03 13:26:28 +00:00
|
|
|
/*
|
2002-03-16 09:54:50 +00:00
|
|
|
* The str_encoding_table is a compact representation of all the string
|
|
|
|
* encoding information we might need. It gets modified at runtime.
|
2001-08-03 13:26:28 +00:00
|
|
|
*/
|
2002-03-16 09:54:50 +00:00
|
|
|
static struct _strenc_ str_encoding_table[] = {
|
2006-02-01 06:18:41 +00:00
|
|
|
{NSASCIIStringEncoding,
|
|
|
|
"NSASCIIStringEncoding","ASCII",1,1,0},
|
|
|
|
{NSNEXTSTEPStringEncoding,
|
|
|
|
"NSNEXTSTEPStringEncoding","NEXTSTEP",1,1,0},
|
|
|
|
{NSJapaneseEUCStringEncoding,
|
|
|
|
"NSJapaneseEUCStringEncoding","EUC-JP",0,0,0},
|
|
|
|
{NSUTF8StringEncoding,
|
|
|
|
"NSUTF8StringEncoding","UTF-8",0,1,0},
|
|
|
|
{NSISOLatin1StringEncoding,
|
|
|
|
"NSISOLatin1StringEncoding","ISO-8859-1",1,1,0},
|
|
|
|
{NSSymbolStringEncoding,
|
|
|
|
"NSSymbolStringEncoding","",0,0,0},
|
|
|
|
{NSNonLossyASCIIStringEncoding,
|
|
|
|
"NSNonLossyASCIIStringEncoding","",1,1,0},
|
|
|
|
{NSShiftJISStringEncoding,
|
|
|
|
"NSShiftJISStringEncoding","SHIFT-JIS",0,0,0},
|
|
|
|
{NSISOLatin2StringEncoding,
|
|
|
|
"NSISOLatin2StringEncoding","ISO-8859-2",1,1,0},
|
|
|
|
{NSUnicodeStringEncoding,
|
|
|
|
"NSUnicodeStringEncoding","",0,1,0},
|
|
|
|
{NSWindowsCP1251StringEncoding,
|
|
|
|
"NSWindowsCP1251StringEncoding","CP1251",0,0,0},
|
|
|
|
{NSWindowsCP1252StringEncoding,
|
|
|
|
"NSWindowsCP1252StringEncoding","CP1252",0,0,0},
|
|
|
|
{NSWindowsCP1253StringEncoding,
|
|
|
|
"NSWindowsCP1253StringEncoding","CP1253",0,0,0},
|
|
|
|
{NSWindowsCP1254StringEncoding,
|
|
|
|
"NSWindowsCP1254StringEncoding","CP1254",0,0,0},
|
|
|
|
{NSWindowsCP1250StringEncoding,
|
|
|
|
"NSWindowsCP1250StringEncoding","CP1250",0,0,0},
|
|
|
|
{NSISO2022JPStringEncoding,
|
|
|
|
"NSISO2022JPStringEncoding","ISO-2022-JP",0,0,0},
|
|
|
|
{NSMacOSRomanStringEncoding,
|
|
|
|
"NSMacOSRomanStringEncoding","MACINTOSH",0,0,0},
|
2008-02-20 08:02:28 +00:00
|
|
|
#if defined(GNUSTEP)
|
2006-02-01 06:18:41 +00:00
|
|
|
{NSProprietaryStringEncoding,
|
|
|
|
"NSProprietaryStringEncoding","",0,0,0},
|
2008-02-20 08:02:28 +00:00
|
|
|
#endif
|
2002-03-16 09:54:50 +00:00
|
|
|
|
2000-09-30 18:53:27 +00:00
|
|
|
// GNUstep additions
|
2006-02-01 06:18:41 +00:00
|
|
|
{NSISOCyrillicStringEncoding,
|
|
|
|
"NSISOCyrillicStringEncoding","ISO-8859-5",0,1,0},
|
|
|
|
{NSKOI8RStringEncoding,
|
|
|
|
"NSKOI8RStringEncoding","KOI8-R",0,0,0},
|
|
|
|
{NSISOLatin3StringEncoding,
|
|
|
|
"NSISOLatin3StringEncoding","ISO-8859-3",0,0,0},
|
|
|
|
{NSISOLatin4StringEncoding,
|
|
|
|
"NSISOLatin4StringEncoding","ISO-8859-4",0,0,0},
|
|
|
|
{NSISOArabicStringEncoding,
|
|
|
|
"NSISOArabicStringEncoding","ISO-8859-6",0,0,0},
|
|
|
|
{NSISOGreekStringEncoding,
|
|
|
|
"NSISOGreekStringEncoding","ISO-8859-7",0,0,0},
|
|
|
|
{NSISOHebrewStringEncoding,
|
|
|
|
"NSISOHebrewStringEncoding","ISO-8859-8",0,0,0},
|
|
|
|
{NSISOLatin5StringEncoding,
|
|
|
|
"NSISOLatin5StringEncoding","ISO-8859-9",0,0,0},
|
|
|
|
{NSISOLatin6StringEncoding,
|
|
|
|
"NSISOLatin6StringEncoding","ISO-8859-10",0,0,0},
|
|
|
|
{NSISOThaiStringEncoding,
|
|
|
|
"NSISOThaiStringEncoding","ISO-8859-11",1,1,0},
|
|
|
|
{NSISOLatin7StringEncoding,
|
|
|
|
"NSISOLatin7StringEncoding","ISO-8859-13",0,0,0},
|
|
|
|
{NSISOLatin8StringEncoding,
|
|
|
|
"NSISOLatin8StringEncoding","ISO-8859-14",0,0,0},
|
|
|
|
{NSISOLatin9StringEncoding,
|
|
|
|
"NSISOLatin9StringEncoding","ISO-8859-15",1,1,0},
|
|
|
|
{NSUTF7StringEncoding,
|
|
|
|
"NSUTF7StringEncoding","UTF-7",0,0,0},
|
|
|
|
{NSGB2312StringEncoding,
|
|
|
|
"NSGB2312StringEncoding","EUC-CN",0,0,0},
|
|
|
|
{NSGSM0338StringEncoding,
|
|
|
|
"NSGSM0338StringEncoding","",0,1,0},
|
|
|
|
{NSBIG5StringEncoding,
|
|
|
|
"NSBIG5StringEncoding","BIG5",0,0,0},
|
|
|
|
{NSKoreanEUCStringEncoding,
|
|
|
|
"NSKoreanEUCStringEncoding","EUC-KR",0,0,0},
|
|
|
|
|
2008-07-11 09:10:46 +00:00
|
|
|
/* Now Apple encodings which have high numeric values.
|
|
|
|
*/
|
2008-07-11 08:00:26 +00:00
|
|
|
{NSUTF16BigEndianStringEncoding,
|
2008-07-11 09:10:46 +00:00
|
|
|
"NSUTF16BigEndianStringEncoding","UTF-16BE",0,0,0},
|
2008-07-11 08:00:26 +00:00
|
|
|
{NSUTF16LittleEndianStringEncoding,
|
2008-07-11 09:10:46 +00:00
|
|
|
"NSUTF16LittleEndianStringEncoding","UTF-16LE",0,0,0},
|
2008-07-11 08:00:26 +00:00
|
|
|
{NSUTF32StringEncoding,
|
2008-07-11 09:10:46 +00:00
|
|
|
"NSUTF32StringEncoding",UNICODE_UTF32,0,0,0},
|
2008-07-11 08:00:26 +00:00
|
|
|
{NSUTF32BigEndianStringEncoding,
|
2008-07-11 09:10:46 +00:00
|
|
|
"NSUTF32BigEndianStringEncoding","UTF-32BE",0,0,0},
|
2008-07-11 08:00:26 +00:00
|
|
|
{NSUTF32LittleEndianStringEncoding,
|
2008-07-11 09:10:46 +00:00
|
|
|
"NSUTF32LittleEndianStringEncoding","UTF-32LE",0,0,0},
|
2008-07-11 08:00:26 +00:00
|
|
|
|
2006-02-01 06:18:41 +00:00
|
|
|
{0,"Unknown encoding","",0,0,0}
|
2000-09-30 18:53:27 +00:00
|
|
|
};
|
|
|
|
|
2002-03-16 09:54:50 +00:00
|
|
|
static struct _strenc_ **encodingTable = 0;
|
|
|
|
static unsigned encTableSize = 0;
|
|
|
|
|
2002-11-09 07:00:37 +00:00
|
|
|
static void GSSetupEncodingTable(void)
|
2000-09-30 18:53:27 +00:00
|
|
|
{
|
2002-08-28 13:41:54 +00:00
|
|
|
if (encodingTable == 0)
|
2002-03-16 09:54:50 +00:00
|
|
|
{
|
2003-11-01 07:09:12 +00:00
|
|
|
[GS_INITIALIZED_LOCK(local_lock, GSLazyLock) lock];
|
2002-08-28 13:41:54 +00:00
|
|
|
if (encodingTable == 0)
|
2002-03-16 09:54:50 +00:00
|
|
|
{
|
2002-08-28 13:41:54 +00:00
|
|
|
static struct _strenc_ **encTable = 0;
|
2008-07-11 09:10:46 +00:00
|
|
|
unsigned count;
|
|
|
|
unsigned i;
|
2000-09-30 18:53:27 +00:00
|
|
|
|
2002-03-16 09:54:50 +00:00
|
|
|
/*
|
|
|
|
* We want to store pointers to our string encoding info in a
|
|
|
|
* large table so we can do efficient lookup by encoding value.
|
|
|
|
*/
|
|
|
|
#define MAX_ENCODING 128
|
|
|
|
count = sizeof(str_encoding_table) / sizeof(struct _strenc_);
|
2000-09-30 18:53:27 +00:00
|
|
|
|
2002-03-16 09:54:50 +00:00
|
|
|
/*
|
|
|
|
* First determine the largest encoding value and create a
|
|
|
|
* large enough table of pointers.
|
|
|
|
*/
|
|
|
|
encTableSize = 0;
|
|
|
|
for (i = 0; i < count; i++)
|
|
|
|
{
|
|
|
|
unsigned tmp = str_encoding_table[i].enc;
|
2001-09-21 16:13:11 +00:00
|
|
|
|
2008-07-11 09:10:46 +00:00
|
|
|
if (tmp > encTableSize)
|
2002-03-16 09:54:50 +00:00
|
|
|
{
|
2008-07-11 09:10:46 +00:00
|
|
|
if (tmp < MAX_ENCODING)
|
|
|
|
{
|
|
|
|
encTableSize = tmp;
|
|
|
|
}
|
2002-03-16 09:54:50 +00:00
|
|
|
}
|
|
|
|
}
|
2006-05-24 08:27:39 +00:00
|
|
|
encTable = objc_malloc((encTableSize+1)*sizeof(struct _strenc_ *));
|
2002-08-28 13:41:54 +00:00
|
|
|
memset(encTable, 0, (encTableSize+1)*sizeof(struct _strenc_ *));
|
2001-09-21 16:13:11 +00:00
|
|
|
|
2002-03-16 09:54:50 +00:00
|
|
|
/*
|
|
|
|
* Now set up the pointers at the correct location in the table.
|
|
|
|
*/
|
|
|
|
for (i = 0; i < count; i++)
|
|
|
|
{
|
2008-07-11 09:10:46 +00:00
|
|
|
struct _strenc_ *entry = &str_encoding_table[i];
|
|
|
|
unsigned tmp = entry->enc;
|
2002-03-16 09:54:50 +00:00
|
|
|
|
|
|
|
if (tmp < MAX_ENCODING)
|
|
|
|
{
|
2008-07-11 09:10:46 +00:00
|
|
|
encTable[tmp] = entry;
|
|
|
|
}
|
2006-02-01 06:18:41 +00:00
|
|
|
#ifdef HAVE_ICONV
|
2008-07-11 09:10:46 +00:00
|
|
|
if (entry->iconv != 0 && *(entry->iconv) != 0)
|
|
|
|
{
|
|
|
|
iconv_t c;
|
|
|
|
char *lossy;
|
2006-02-01 06:18:41 +00:00
|
|
|
|
2008-07-11 09:10:46 +00:00
|
|
|
/*
|
|
|
|
* See if we can do a lossy conversion.
|
|
|
|
*/
|
|
|
|
lossy = objc_malloc(strlen(entry->iconv) + 12);
|
|
|
|
strcpy(lossy, entry->iconv);
|
|
|
|
strcat(lossy, "//TRANSLIT");
|
|
|
|
c = iconv_open(UNICODE_ENC, entry->iconv);
|
|
|
|
if (c == (iconv_t)-1)
|
|
|
|
{
|
|
|
|
objc_free(lossy);
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
entry->lossy = lossy;
|
|
|
|
iconv_close(c);
|
2006-02-01 06:18:41 +00:00
|
|
|
}
|
2002-08-28 13:41:54 +00:00
|
|
|
}
|
2008-07-11 09:10:46 +00:00
|
|
|
#endif
|
2002-08-28 13:41:54 +00:00
|
|
|
}
|
|
|
|
encodingTable = encTable;
|
|
|
|
}
|
2003-10-30 13:44:55 +00:00
|
|
|
[local_lock unlock];
|
2002-08-28 13:41:54 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2008-07-11 09:10:46 +00:00
|
|
|
static struct _strenc_ *
|
|
|
|
EntryForEncoding(NSStringEncoding enc)
|
2002-08-28 13:41:54 +00:00
|
|
|
{
|
2008-07-11 09:10:46 +00:00
|
|
|
struct _strenc_ *entry = 0;
|
2002-08-28 13:41:54 +00:00
|
|
|
|
2008-07-11 09:10:46 +00:00
|
|
|
if (enc > 0)
|
|
|
|
{
|
|
|
|
GSSetupEncodingTable();
|
|
|
|
if (enc <= encTableSize)
|
|
|
|
{
|
|
|
|
entry = encodingTable[enc];
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
unsigned i = 0;
|
|
|
|
|
|
|
|
while (i < sizeof(str_encoding_table) / sizeof(struct _strenc_))
|
|
|
|
{
|
|
|
|
if (str_encoding_table[i].enc == enc)
|
|
|
|
{
|
|
|
|
entry = &str_encoding_table[i];
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
i++;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return entry;
|
|
|
|
}
|
|
|
|
|
|
|
|
static struct _strenc_ *
|
|
|
|
EntrySupported(NSStringEncoding enc)
|
|
|
|
{
|
|
|
|
struct _strenc_ *entry = EntryForEncoding(enc);
|
|
|
|
|
|
|
|
if (entry == 0)
|
2002-08-28 13:41:54 +00:00
|
|
|
{
|
|
|
|
return NO;
|
|
|
|
}
|
|
|
|
#ifdef HAVE_ICONV
|
2008-07-11 09:10:46 +00:00
|
|
|
if (entry->iconv != 0 && entry->supported == 0)
|
2002-08-28 13:41:54 +00:00
|
|
|
{
|
|
|
|
if (enc == NSUnicodeStringEncoding)
|
|
|
|
{
|
2008-07-11 09:10:46 +00:00
|
|
|
entry->iconv = UNICODE_ENC;
|
|
|
|
entry->supported = 1;
|
2002-08-28 13:41:54 +00:00
|
|
|
}
|
2008-07-11 09:10:46 +00:00
|
|
|
else if (entry->iconv[0] == 0)
|
2002-11-20 15:50:46 +00:00
|
|
|
{
|
|
|
|
/* explicitly check for empty encoding name since some systems
|
|
|
|
* have buggy iconv_open() code which succeeds on an empty name.
|
|
|
|
*/
|
2008-07-11 09:10:46 +00:00
|
|
|
entry->supported = -1;
|
2002-11-20 15:50:46 +00:00
|
|
|
}
|
2002-08-28 13:41:54 +00:00
|
|
|
else
|
|
|
|
{
|
|
|
|
iconv_t c;
|
|
|
|
|
2008-07-11 09:10:46 +00:00
|
|
|
c = iconv_open(UNICODE_ENC, entry->iconv);
|
2002-08-28 13:41:54 +00:00
|
|
|
if (c == (iconv_t)-1)
|
|
|
|
{
|
2008-07-11 09:10:46 +00:00
|
|
|
entry->supported = -1;
|
2002-08-28 13:41:54 +00:00
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
iconv_close(c);
|
2008-07-11 09:10:46 +00:00
|
|
|
c = iconv_open(entry->iconv, UNICODE_ENC);
|
2002-08-28 13:41:54 +00:00
|
|
|
if (c == (iconv_t)-1)
|
|
|
|
{
|
2008-07-11 09:10:46 +00:00
|
|
|
entry->supported = -1;
|
2002-08-28 13:41:54 +00:00
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
iconv_close(c);
|
2008-07-11 09:10:46 +00:00
|
|
|
entry->supported = 1;
|
2002-03-16 09:54:50 +00:00
|
|
|
}
|
|
|
|
}
|
2002-08-28 13:41:54 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
#endif
|
2008-07-11 09:10:46 +00:00
|
|
|
if (entry->supported == 1)
|
2002-08-28 13:41:54 +00:00
|
|
|
{
|
2008-07-11 09:10:46 +00:00
|
|
|
return entry;
|
2002-08-28 13:41:54 +00:00
|
|
|
}
|
2008-07-11 09:10:46 +00:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
BOOL
|
|
|
|
GSPrivateIsEncodingSupported(NSStringEncoding enc)
|
|
|
|
{
|
|
|
|
if (EntrySupported(enc) == 0)
|
|
|
|
{
|
|
|
|
return NO;
|
|
|
|
}
|
|
|
|
return YES;
|
2002-08-28 13:41:54 +00:00
|
|
|
}
|
|
|
|
|
2002-10-22 03:22:30 +00:00
|
|
|
/** Returns the NSStringEncoding that matches the specified
|
|
|
|
* character set registry and encoding information. For instance,
|
|
|
|
* for the iso8859-5 character set, the registry is iso8859 and
|
|
|
|
* the encoding is 5, and the returned NSStringEncoding is
|
|
|
|
* NSISOCyrillicStringEncoding. If there is no specific encoding,
|
2002-10-22 03:39:31 +00:00
|
|
|
* use @"0". Returns GSUndefinedEncoding if there is no match.
|
2002-10-22 03:22:30 +00:00
|
|
|
*/
|
|
|
|
NSStringEncoding
|
|
|
|
GSEncodingForRegistry (NSString *registry, NSString *encoding)
|
|
|
|
{
|
2006-10-29 09:30:07 +00:00
|
|
|
NSString *charset = registry;
|
|
|
|
|
|
|
|
if ([encoding length] > 0 && [encoding isEqualToString: @"0"] == NO)
|
2002-10-22 03:22:30 +00:00
|
|
|
{
|
2006-10-29 09:30:07 +00:00
|
|
|
charset = [NSString stringWithFormat: @"%@-%@", registry, encoding];
|
2002-10-22 03:22:30 +00:00
|
|
|
}
|
2006-10-29 09:30:07 +00:00
|
|
|
return [GSMimeDocument encodingFromCharset: charset];
|
2002-10-22 03:22:30 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
/** Try to deduce the string encoding from the locale string
|
|
|
|
* clocale. This function looks in the Locale.encodings file
|
|
|
|
* installed as part of GNUstep Base if the encoding cannot be
|
|
|
|
* deduced from the clocale string itself. If clocale isn't set or
|
|
|
|
* no match can be found, returns GSUndefinedEncoding.
|
|
|
|
*/
|
2006-10-09 14:00:01 +00:00
|
|
|
/* It would be really nice if this could be used in +defaultCStringEncoding, but
|
2002-10-22 03:22:30 +00:00
|
|
|
* there are too many dependancies on other parts of the library to
|
|
|
|
* make this practical (even if everything possible was written in C,
|
|
|
|
* we'd still need some way to find the Locale.encodings file).
|
|
|
|
*/
|
|
|
|
NSStringEncoding
|
|
|
|
GSEncodingFromLocale(const char *clocale)
|
|
|
|
{
|
2003-07-28 16:44:24 +00:00
|
|
|
NSStringEncoding encoding = GSUndefinedEncoding;
|
|
|
|
NSString *encodstr;
|
2002-10-22 03:22:30 +00:00
|
|
|
|
2005-02-22 11:22:44 +00:00
|
|
|
if (clocale == NULL || strcmp(clocale, "C") == 0
|
|
|
|
|| strcmp(clocale, "POSIX") == 0)
|
2002-10-22 03:22:30 +00:00
|
|
|
{
|
|
|
|
/* Don't make any assumptions. Let caller handle that */
|
2003-07-28 16:44:24 +00:00
|
|
|
return encoding;
|
2002-10-22 03:22:30 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
if (strchr (clocale, '.') != NULL)
|
|
|
|
{
|
|
|
|
/* Locale contains the 'codeset' section. Parse it and see
|
2006-02-01 06:18:41 +00:00
|
|
|
if we know what encoding this cooresponds to */
|
2003-07-28 16:44:24 +00:00
|
|
|
NSString *registry;
|
2006-10-29 09:43:14 +00:00
|
|
|
NSString *charset;
|
2003-07-28 16:44:24 +00:00
|
|
|
NSArray *array;
|
|
|
|
char *s;
|
|
|
|
|
2002-10-22 03:22:30 +00:00
|
|
|
s = strchr (clocale, '.');
|
2006-10-09 14:00:01 +00:00
|
|
|
registry = [[NSString stringWithUTF8String: s+1] lowercaseString];
|
2002-10-22 03:22:30 +00:00
|
|
|
array = [registry componentsSeparatedByString: @"-"];
|
|
|
|
registry = [array objectAtIndex: 0];
|
|
|
|
if ([array count] > 1)
|
|
|
|
{
|
2006-10-29 09:43:14 +00:00
|
|
|
charset = [NSString stringWithFormat: @"%@-%@",
|
|
|
|
registry, [array lastObject]];
|
2002-10-22 03:22:30 +00:00
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
2006-10-29 09:43:14 +00:00
|
|
|
charset = registry;
|
2002-10-22 03:22:30 +00:00
|
|
|
}
|
2005-02-22 11:22:44 +00:00
|
|
|
|
2006-10-29 09:43:14 +00:00
|
|
|
encoding = [GSMimeDocument encodingFromCharset: charset];
|
2002-10-22 03:22:30 +00:00
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
/* Look up the locale in our table of encodings */
|
2003-04-28 02:29:49 +00:00
|
|
|
NSBundle *gbundle;
|
2002-10-22 03:22:30 +00:00
|
|
|
NSString *table;
|
|
|
|
|
2005-03-12 17:38:18 +00:00
|
|
|
#ifdef GNUSTEP
|
2003-04-28 02:29:49 +00:00
|
|
|
gbundle = [NSBundle bundleForLibrary: @"gnustep-base"];
|
2005-03-12 17:38:18 +00:00
|
|
|
#else
|
|
|
|
gbundle = [NSBundle bundleForClass: NSClassFromString(@"GSXMLNode")];
|
|
|
|
#endif
|
2003-04-28 02:29:49 +00:00
|
|
|
table = [gbundle pathForResource: @"Locale"
|
|
|
|
ofType: @"encodings"
|
2005-02-22 11:22:44 +00:00
|
|
|
inDirectory: @"Languages"];
|
2002-10-22 03:22:30 +00:00
|
|
|
if (table != nil)
|
|
|
|
{
|
2003-07-28 16:44:24 +00:00
|
|
|
unsigned count;
|
2002-10-22 03:22:30 +00:00
|
|
|
NSDictionary *dict;
|
2005-02-22 11:22:44 +00:00
|
|
|
|
2002-10-22 03:22:30 +00:00
|
|
|
dict = [NSDictionary dictionaryWithContentsOfFile: table];
|
2005-02-22 11:22:44 +00:00
|
|
|
encodstr = [dict objectForKey:
|
2006-10-09 14:00:01 +00:00
|
|
|
[NSString stringWithUTF8String: clocale]];
|
2002-10-22 03:39:31 +00:00
|
|
|
if (encodstr == nil)
|
|
|
|
return GSUndefinedEncoding;
|
2002-10-22 03:22:30 +00:00
|
|
|
|
|
|
|
/* Find the matching encoding */
|
|
|
|
count = 0;
|
|
|
|
while (str_encoding_table[count].enc
|
2003-07-28 16:44:24 +00:00
|
|
|
&& strcmp(str_encoding_table[count].ename, [encodstr lossyCString]))
|
2002-10-22 03:22:30 +00:00
|
|
|
{
|
|
|
|
count++;
|
|
|
|
}
|
|
|
|
if (str_encoding_table[count].enc)
|
|
|
|
{
|
|
|
|
encoding = str_encoding_table[count].enc;
|
|
|
|
}
|
|
|
|
if (encoding == GSUndefinedEncoding)
|
|
|
|
{
|
|
|
|
NSLog(@"No known GNUstep encoding for %s = %@",
|
|
|
|
clocale, encodstr);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2005-02-22 11:22:44 +00:00
|
|
|
|
2002-10-22 03:22:30 +00:00
|
|
|
return encoding;
|
|
|
|
}
|
|
|
|
|
2002-12-31 11:48:36 +00:00
|
|
|
/**
|
|
|
|
* Uses direct access into a two-level table to map cases.<br />
|
2001-08-03 12:24:25 +00:00
|
|
|
* The two-level table method is less space efficient (but still not bad) than
|
|
|
|
* a single table and a linear search, but it reduces the number of
|
|
|
|
* conditional statements to just one.
|
|
|
|
*/
|
1999-09-13 04:11:39 +00:00
|
|
|
unichar
|
|
|
|
uni_tolower(unichar ch)
|
1997-05-03 18:04:21 +00:00
|
|
|
{
|
2001-03-05 01:08:57 +00:00
|
|
|
unichar result = gs_tolower_map[ch / 256][ch % 256];
|
1999-09-13 04:11:39 +00:00
|
|
|
|
2001-03-05 01:08:57 +00:00
|
|
|
return result ? result : ch;
|
1999-09-13 04:11:39 +00:00
|
|
|
}
|
2005-02-22 11:22:44 +00:00
|
|
|
|
2002-12-31 11:48:36 +00:00
|
|
|
/**
|
|
|
|
* Uses direct access into a two-level table to map cases.<br />
|
|
|
|
* The two-level table method is less space efficient (but still not bad) than
|
|
|
|
* a single table and a linear search, but it reduces the number of
|
|
|
|
* conditional statements to just one.
|
|
|
|
*/
|
1999-09-13 04:11:39 +00:00
|
|
|
unichar
|
|
|
|
uni_toupper(unichar ch)
|
1997-05-03 18:04:21 +00:00
|
|
|
{
|
2001-03-05 01:08:57 +00:00
|
|
|
unichar result = gs_toupper_map[ch / 256][ch % 256];
|
1997-05-03 18:04:21 +00:00
|
|
|
|
2001-03-05 01:08:57 +00:00
|
|
|
return result ? result : ch;
|
1999-09-13 04:11:39 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
unsigned char
|
2006-10-29 09:17:05 +00:00
|
|
|
GSPrivateUniCop(unichar u)
|
1997-05-03 18:04:21 +00:00
|
|
|
{
|
2002-12-24 09:42:59 +00:00
|
|
|
if (u < uni_cop_table[0].code)
|
|
|
|
{
|
|
|
|
return 0; // Special case for latin1
|
|
|
|
}
|
|
|
|
else
|
1997-05-03 18:04:21 +00:00
|
|
|
{
|
2002-12-24 09:42:59 +00:00
|
|
|
unichar code;
|
|
|
|
unichar count = 0;
|
|
|
|
unichar first = 0;
|
|
|
|
unichar last = uni_cop_table_size;
|
|
|
|
|
|
|
|
while (first <= last)
|
1999-09-13 04:11:39 +00:00
|
|
|
{
|
2000-08-07 22:00:31 +00:00
|
|
|
if (first != last)
|
1999-09-13 04:11:39 +00:00
|
|
|
{
|
|
|
|
count = (first + last) / 2;
|
2002-12-24 09:42:59 +00:00
|
|
|
code = uni_cop_table[count].code;
|
|
|
|
if (code < u)
|
2000-08-07 22:00:31 +00:00
|
|
|
{
|
|
|
|
first = count+1;
|
|
|
|
}
|
2002-12-24 09:42:59 +00:00
|
|
|
else if (code > u)
|
|
|
|
{
|
|
|
|
last = count-1;
|
|
|
|
}
|
1999-09-13 04:11:39 +00:00
|
|
|
else
|
2000-08-07 22:00:31 +00:00
|
|
|
{
|
2002-12-24 09:42:59 +00:00
|
|
|
return uni_cop_table[count].cop;
|
2000-08-07 22:00:31 +00:00
|
|
|
}
|
1999-09-13 04:11:39 +00:00
|
|
|
}
|
|
|
|
else /* first == last */
|
|
|
|
{
|
|
|
|
if (u == uni_cop_table[first].code)
|
2002-12-24 09:42:59 +00:00
|
|
|
{
|
|
|
|
return uni_cop_table[first].cop;
|
|
|
|
}
|
1999-09-13 04:11:39 +00:00
|
|
|
return 0;
|
2002-12-24 09:42:59 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
return 0;
|
1999-09-13 04:11:39 +00:00
|
|
|
}
|
1997-05-03 18:04:21 +00:00
|
|
|
}
|
|
|
|
|
2006-10-29 09:17:05 +00:00
|
|
|
unsigned char
|
|
|
|
uni_cop(unichar u)
|
|
|
|
{
|
|
|
|
return GSPrivateUniCop(u);
|
|
|
|
}
|
|
|
|
|
1999-09-13 04:11:39 +00:00
|
|
|
BOOL
|
|
|
|
uni_isnonsp(unichar u)
|
1997-05-03 18:04:21 +00:00
|
|
|
{
|
2003-09-10 08:14:45 +00:00
|
|
|
/*
|
|
|
|
* Treating upper surrogates as non-spacing is a convenient solution
|
|
|
|
* to a number of issues with UTF-16
|
|
|
|
*/
|
|
|
|
if ((u >= 0xdc00) && (u <= 0xdfff))
|
|
|
|
return YES;
|
|
|
|
|
2002-12-24 09:42:59 +00:00
|
|
|
// FIXME check is uni_cop good for this
|
2006-10-29 09:17:05 +00:00
|
|
|
if (GSPrivateUniCop(u))
|
2000-08-07 22:00:31 +00:00
|
|
|
return YES;
|
1997-05-03 18:04:21 +00:00
|
|
|
else
|
2000-08-07 22:00:31 +00:00
|
|
|
return NO;
|
1997-05-03 18:04:21 +00:00
|
|
|
}
|
|
|
|
|
1999-09-13 04:11:39 +00:00
|
|
|
unichar*
|
|
|
|
uni_is_decomp(unichar u)
|
1997-05-03 18:04:21 +00:00
|
|
|
{
|
2002-12-24 09:42:59 +00:00
|
|
|
if (u < uni_dec_table[0].code)
|
1997-05-03 18:04:21 +00:00
|
|
|
{
|
2002-12-24 09:42:59 +00:00
|
|
|
return 0; // Special case for latin1
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
unichar code;
|
|
|
|
unichar count = 0;
|
|
|
|
unichar first = 0;
|
|
|
|
unichar last = uni_dec_table_size;
|
|
|
|
|
|
|
|
while (first <= last)
|
1999-09-13 04:11:39 +00:00
|
|
|
{
|
2002-12-24 09:42:59 +00:00
|
|
|
if (first != last)
|
1999-09-13 04:11:39 +00:00
|
|
|
{
|
|
|
|
count = (first + last) / 2;
|
2002-12-24 09:42:59 +00:00
|
|
|
code = uni_dec_table[count].code;
|
|
|
|
if (code < u)
|
|
|
|
{
|
|
|
|
first = count+1;
|
|
|
|
}
|
|
|
|
else if (code > u)
|
|
|
|
{
|
|
|
|
last = count-1;
|
|
|
|
}
|
1999-09-13 04:11:39 +00:00
|
|
|
else
|
2000-08-07 22:00:31 +00:00
|
|
|
{
|
2002-12-24 09:42:59 +00:00
|
|
|
return uni_dec_table[count].decomp;
|
2000-08-07 22:00:31 +00:00
|
|
|
}
|
1999-09-13 04:11:39 +00:00
|
|
|
}
|
|
|
|
else /* first == last */
|
|
|
|
{
|
|
|
|
if (u == uni_dec_table[first].code)
|
2002-12-24 09:42:59 +00:00
|
|
|
{
|
|
|
|
return uni_dec_table[first].decomp;
|
|
|
|
}
|
1999-09-13 04:11:39 +00:00
|
|
|
return 0;
|
2002-12-24 09:42:59 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
return 0;
|
1999-09-13 04:11:39 +00:00
|
|
|
}
|
1997-05-03 18:04:21 +00:00
|
|
|
}
|
|
|
|
|
2006-03-26 10:59:57 +00:00
|
|
|
/**
|
|
|
|
* Function to check a block of data for validity as a unicode string and
|
|
|
|
* say whether it contains solely ASCII or solely Latin1 data.<br />
|
|
|
|
* Any leading BOM must already have been removed and the data must already
|
2006-03-26 11:34:47 +00:00
|
|
|
* be in native byte order.<br />
|
|
|
|
* Returns the number of characters which were found valid.
|
2006-03-26 10:59:57 +00:00
|
|
|
*/
|
2006-03-26 11:34:47 +00:00
|
|
|
unsigned
|
|
|
|
GSUnicode(const unichar *chars, unsigned length,
|
2006-03-26 10:59:57 +00:00
|
|
|
BOOL *isASCII, BOOL *isLatin1)
|
|
|
|
{
|
|
|
|
unsigned i = 0;
|
|
|
|
unichar c;
|
|
|
|
|
2006-03-26 11:34:47 +00:00
|
|
|
if (isASCII) *isASCII = YES;
|
|
|
|
if (isLatin1) *isLatin1 = YES;
|
2006-03-26 10:59:57 +00:00
|
|
|
while (i < length)
|
|
|
|
{
|
|
|
|
if ((c = chars[i++]) > 127)
|
|
|
|
{
|
2006-03-26 11:34:47 +00:00
|
|
|
if (isASCII) *isASCII = NO;
|
2006-03-26 10:59:57 +00:00
|
|
|
i--;
|
|
|
|
while (i < length)
|
|
|
|
{
|
|
|
|
if ((c = chars[i++]) > 255)
|
|
|
|
{
|
2006-03-26 11:34:47 +00:00
|
|
|
if (isLatin1) *isLatin1 = NO;
|
2006-03-26 10:59:57 +00:00
|
|
|
i--;
|
|
|
|
while (i < length)
|
|
|
|
{
|
|
|
|
c = chars[i++];
|
|
|
|
if (c == 0xfffe || c == 0xffff
|
|
|
|
|| (c >= 0xfdd0 && c <= 0xfdef))
|
|
|
|
{
|
2006-03-26 11:34:47 +00:00
|
|
|
return i - 1; // Non-characters.
|
2006-03-26 10:59:57 +00:00
|
|
|
}
|
|
|
|
if (c >= 0xdc00 && c <= 0xdfff)
|
|
|
|
{
|
2006-03-26 11:34:47 +00:00
|
|
|
return i - 1; // Second half of a surrogate pair.
|
2006-03-26 10:59:57 +00:00
|
|
|
}
|
|
|
|
if (c >= 0xd800 && c <= 0xdbff)
|
|
|
|
{
|
|
|
|
// First half of a surrogate pair.
|
|
|
|
if (i >= length)
|
|
|
|
{
|
2006-03-26 11:34:47 +00:00
|
|
|
return i - 1; // Second half missing
|
2006-03-26 10:59:57 +00:00
|
|
|
}
|
|
|
|
c = chars[i];
|
|
|
|
if (c < 0xdc00 || c > 0xdfff)
|
|
|
|
{
|
2006-03-26 11:34:47 +00:00
|
|
|
return i - 1; // Second half missing
|
2006-03-26 10:59:57 +00:00
|
|
|
}
|
|
|
|
i++; // Step past second half
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2006-03-26 11:34:47 +00:00
|
|
|
return i;
|
2006-03-26 10:59:57 +00:00
|
|
|
}
|
2002-03-14 13:58:52 +00:00
|
|
|
|
2009-03-09 15:11:51 +00:00
|
|
|
#if GS_WITH_GC
|
|
|
|
|
|
|
|
#define GROW() \
|
|
|
|
if (dst == 0) \
|
|
|
|
{ \
|
|
|
|
/* \
|
|
|
|
* Data is just being discarded anyway, so we can \
|
|
|
|
* reset the offset into the local buffer on the \
|
|
|
|
* stack and pretend the buffer has grown. \
|
|
|
|
*/ \
|
|
|
|
ptr = buf - dpos; \
|
|
|
|
bsize = dpos + BUFSIZ; \
|
|
|
|
if (extra != 0) \
|
|
|
|
{ \
|
|
|
|
bsize--; \
|
|
|
|
} \
|
|
|
|
} \
|
|
|
|
else if (zone == 0) \
|
|
|
|
{ \
|
|
|
|
result = NO; /* No buffer growth possible ... fail. */ \
|
|
|
|
goto done; \
|
|
|
|
} \
|
|
|
|
else \
|
|
|
|
{ \
|
|
|
|
unsigned grow = slen; \
|
|
|
|
\
|
|
|
|
if (grow < bsize + BUFSIZ) \
|
|
|
|
{ \
|
|
|
|
grow = bsize + BUFSIZ; \
|
|
|
|
} \
|
|
|
|
grow *= sizeof(unichar); \
|
|
|
|
\
|
|
|
|
if (ptr == buf || ptr == *dst) \
|
|
|
|
{ \
|
|
|
|
unichar *tmp; \
|
|
|
|
\
|
|
|
|
tmp = NSAllocateCollectable(grow + extra, 0); \
|
|
|
|
if (tmp != 0) \
|
|
|
|
{ \
|
|
|
|
memcpy(tmp, ptr, bsize * sizeof(unichar)); \
|
|
|
|
} \
|
|
|
|
ptr = tmp; \
|
|
|
|
} \
|
|
|
|
else \
|
|
|
|
{ \
|
|
|
|
ptr = NSReallocateCollectable(ptr, grow + extra, 0); \
|
|
|
|
} \
|
|
|
|
if (ptr == 0) \
|
|
|
|
{ \
|
|
|
|
result = NO; /* Not enough memory */ \
|
|
|
|
break; \
|
|
|
|
} \
|
|
|
|
bsize = grow / sizeof(unichar); \
|
|
|
|
}
|
|
|
|
|
|
|
|
#else /* GS_WITH_GC */
|
|
|
|
|
2002-03-14 13:58:52 +00:00
|
|
|
#define GROW() \
|
|
|
|
if (dst == 0) \
|
|
|
|
{ \
|
|
|
|
/* \
|
|
|
|
* Data is just being discarded anyway, so we can \
|
2008-06-13 05:03:31 +00:00
|
|
|
* reset the offset into the local buffer on the \
|
2002-03-14 13:58:52 +00:00
|
|
|
* stack and pretend the buffer has grown. \
|
|
|
|
*/ \
|
2008-06-13 14:19:23 +00:00
|
|
|
ptr = buf - dpos; \
|
|
|
|
bsize = dpos + BUFSIZ; \
|
|
|
|
if (extra != 0) \
|
2003-07-21 19:22:41 +00:00
|
|
|
{ \
|
2008-06-13 14:19:23 +00:00
|
|
|
bsize--; \
|
2003-07-21 19:22:41 +00:00
|
|
|
} \
|
2002-03-14 13:58:52 +00:00
|
|
|
} \
|
|
|
|
else if (zone == 0) \
|
|
|
|
{ \
|
2002-03-16 09:54:50 +00:00
|
|
|
result = NO; /* No buffer growth possible ... fail. */ \
|
2006-05-30 05:52:51 +00:00
|
|
|
goto done; \
|
2002-03-14 13:58:52 +00:00
|
|
|
} \
|
|
|
|
else \
|
|
|
|
{ \
|
|
|
|
unsigned grow = slen; \
|
|
|
|
\
|
|
|
|
if (grow < bsize + BUFSIZ) \
|
|
|
|
{ \
|
|
|
|
grow = bsize + BUFSIZ; \
|
|
|
|
} \
|
|
|
|
grow *= sizeof(unichar); \
|
|
|
|
\
|
|
|
|
if (ptr == buf || ptr == *dst) \
|
|
|
|
{ \
|
|
|
|
unichar *tmp; \
|
|
|
|
\
|
|
|
|
tmp = NSZoneMalloc(zone, grow + extra); \
|
|
|
|
if (tmp != 0) \
|
|
|
|
{ \
|
|
|
|
memcpy(tmp, ptr, bsize * sizeof(unichar)); \
|
|
|
|
} \
|
|
|
|
ptr = tmp; \
|
|
|
|
} \
|
|
|
|
else \
|
|
|
|
{ \
|
|
|
|
ptr = NSZoneRealloc(zone, ptr, grow + extra); \
|
|
|
|
} \
|
|
|
|
if (ptr == 0) \
|
|
|
|
{ \
|
2002-03-16 09:54:50 +00:00
|
|
|
result = NO; /* Not enough memory */ \
|
|
|
|
break; \
|
2002-03-14 13:58:52 +00:00
|
|
|
} \
|
|
|
|
bsize = grow / sizeof(unichar); \
|
|
|
|
}
|
|
|
|
|
2009-03-09 15:11:51 +00:00
|
|
|
#endif /* GS_WITH_GC */
|
|
|
|
|
2002-03-14 13:58:52 +00:00
|
|
|
/**
|
2003-07-09 02:13:58 +00:00
|
|
|
* Function to convert from 8-bit data to 16-bit unicode characters.
|
2002-03-14 13:58:52 +00:00
|
|
|
* <p>The dst argument is a pointer to a pointer to a buffer in which the
|
2002-08-20 15:07:58 +00:00
|
|
|
* converted string is to be stored. If it is a null pointer, this function
|
2002-03-14 13:58:52 +00:00
|
|
|
* discards converted data, and is used only to determine the length of the
|
|
|
|
* converted string. If the zone argument is non-nul, the function is free
|
|
|
|
* to allocate a larger buffer if necessary, and store this new buffer in
|
|
|
|
* the dst argument. It will *NOT* deallocate the original buffer!
|
|
|
|
* </p>
|
|
|
|
* <p>The size argument is a pointer to the initial size of the destination
|
|
|
|
* buffer. If the function changes the buffer size, this value will be
|
2003-07-09 02:13:58 +00:00
|
|
|
* altered to the new size. This is measured in 16-bit unicode characters,
|
|
|
|
* not bytes.
|
2002-03-14 13:58:52 +00:00
|
|
|
* </p>
|
2003-07-09 02:13:58 +00:00
|
|
|
* <p>The src argument is a pointer to the byte sequence which is
|
2002-03-14 13:58:52 +00:00
|
|
|
* to be converted to 16-bit unicode.
|
|
|
|
* </p>
|
2003-07-09 02:13:58 +00:00
|
|
|
* <p>The slen argument is the length of the byte sequence
|
2002-03-14 13:58:52 +00:00
|
|
|
* which is to be converted to 16-bit unicode.
|
2003-07-09 02:13:58 +00:00
|
|
|
* This is measured in bytes.
|
2002-03-14 13:58:52 +00:00
|
|
|
* </p>
|
2003-07-09 02:13:58 +00:00
|
|
|
* <p>The enc argument specifies the encoding type of the 8-bit byte sequence
|
|
|
|
* which is to be converted to 16-bit unicode.
|
2002-03-14 13:58:52 +00:00
|
|
|
* </p>
|
|
|
|
* <p>The zone argument specifies a memory zone in which the function may
|
|
|
|
* allocate a buffer to return data in.
|
|
|
|
* If this is nul, the function will fail if the originally supplied buffer
|
2002-08-20 15:07:58 +00:00
|
|
|
* is not big enough (unless dst is a null pointer ... indicating that
|
2009-03-09 15:11:51 +00:00
|
|
|
* converted data is to be discarded).<br />
|
|
|
|
* If the library is built for garbage collecting, the zone argument is used
|
|
|
|
* only as a marker to say whether the function may allocate memory (zone
|
|
|
|
* is non-null) or not (zone is null).
|
2002-03-14 13:58:52 +00:00
|
|
|
* </p>
|
2002-03-14 15:27:12 +00:00
|
|
|
* The options argument controls some special behavior.
|
|
|
|
* <list>
|
2002-08-20 15:07:58 +00:00
|
|
|
* <item>If GSUniTerminate is set, the function is expected to null terminate
|
2002-03-14 15:27:12 +00:00
|
|
|
* the output string, and will assume that it is safe to place the nul
|
2003-07-09 02:13:58 +00:00
|
|
|
* just beyond the end of the stated buffer size.
|
2002-03-14 15:27:12 +00:00
|
|
|
* Also, if the function grows the buffer, it will allow for an extra
|
|
|
|
* termination character.</item>
|
|
|
|
* <item>If GSUniTemporary is set, the function will return the results in
|
|
|
|
* an autoreleased buffer rather than in a buffer that the caller must
|
|
|
|
* release.</item>
|
2002-03-16 09:54:50 +00:00
|
|
|
* <item>If GSUniBOM is set, the function will write the first unicode
|
|
|
|
* character as a byte order marker.</item>
|
|
|
|
* <item>If GSUniShortOk is set, the function will return a buffer containing
|
|
|
|
* any decoded characters even if the whole conversion fails.</item>
|
2002-03-14 15:27:12 +00:00
|
|
|
* </list>
|
2002-03-14 13:58:52 +00:00
|
|
|
* <p>On return, the function result is a flag indicating success (YES)
|
|
|
|
* or failure (NO), and on success, the value stored in size is the number
|
2004-06-22 22:40:40 +00:00
|
|
|
* of characters in the converted string. The converted string itself is
|
2003-05-06 06:54:22 +00:00
|
|
|
* stored in the location given by dst.<br />
|
2002-03-14 13:58:52 +00:00
|
|
|
* NB. If the value stored in dst has been changed, it is a pointer to
|
|
|
|
* allocated memory which the caller is responsible for freeing, and the
|
|
|
|
* caller is <em>still</em> responsible for freeing the original buffer.
|
|
|
|
* </p>
|
|
|
|
*/
|
|
|
|
BOOL
|
2002-03-14 15:27:12 +00:00
|
|
|
GSToUnicode(unichar **dst, unsigned int *size, const unsigned char *src,
|
|
|
|
unsigned int slen, NSStringEncoding enc, NSZone *zone,
|
|
|
|
unsigned int options)
|
2002-03-14 13:58:52 +00:00
|
|
|
{
|
|
|
|
unichar buf[BUFSIZ];
|
|
|
|
unichar *ptr;
|
|
|
|
unsigned bsize;
|
|
|
|
unsigned dpos = 0; // Offset into destination buffer.
|
|
|
|
unsigned spos = 0; // Offset into source buffer.
|
2002-03-14 15:27:12 +00:00
|
|
|
unsigned extra = (options & GSUniTerminate) ? sizeof(unichar) : 0;
|
2002-03-14 13:58:52 +00:00
|
|
|
unichar base = 0;
|
|
|
|
unichar *table = 0;
|
2002-03-16 09:54:50 +00:00
|
|
|
BOOL result = YES;
|
2002-03-14 13:58:52 +00:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Ensure we have an initial buffer set up to decode data into.
|
|
|
|
*/
|
|
|
|
if (dst == 0 || *size == 0)
|
|
|
|
{
|
|
|
|
ptr = buf;
|
2002-03-14 15:27:12 +00:00
|
|
|
bsize = (extra != 0) ? BUFSIZ - 1 : BUFSIZ;
|
2002-03-14 13:58:52 +00:00
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
ptr = *dst;
|
|
|
|
bsize = *size;
|
|
|
|
}
|
|
|
|
|
2002-03-16 09:54:50 +00:00
|
|
|
if (options & GSUniBOM)
|
|
|
|
{
|
|
|
|
while (dpos >= bsize)
|
|
|
|
{
|
|
|
|
GROW();
|
|
|
|
}
|
|
|
|
ptr[dpos++] = (unichar)0xFEFF; // Insert byte order marker.
|
|
|
|
}
|
|
|
|
|
2002-03-14 13:58:52 +00:00
|
|
|
switch (enc)
|
|
|
|
{
|
2002-12-17 12:06:39 +00:00
|
|
|
case NSUTF8StringEncoding:
|
|
|
|
{
|
|
|
|
while (spos < slen)
|
|
|
|
{
|
2003-09-10 08:14:45 +00:00
|
|
|
unsigned char c = src[spos];
|
|
|
|
unsigned long u = c;
|
2002-12-17 12:06:39 +00:00
|
|
|
|
|
|
|
if (c > 0x7f)
|
2003-09-10 08:14:45 +00:00
|
|
|
{
|
|
|
|
int i, sle = 0;
|
|
|
|
|
|
|
|
/* calculated the expected sequence length */
|
|
|
|
while (c & 0x80)
|
|
|
|
{
|
|
|
|
c = c << 1;
|
|
|
|
sle++;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* legal ? */
|
|
|
|
if ((sle < 2) || (sle > 6))
|
|
|
|
{
|
|
|
|
result = NO;
|
2006-05-30 05:52:51 +00:00
|
|
|
goto done;
|
2003-09-10 08:14:45 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
/* do we have enough bytes ? */
|
|
|
|
if ((spos + sle) > slen)
|
|
|
|
{
|
|
|
|
result = NO;
|
2006-05-30 05:52:51 +00:00
|
|
|
goto done;
|
2003-09-10 08:14:45 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
/* get the codepoint */
|
|
|
|
for (i = 1; i < sle; i++)
|
2002-12-17 12:06:39 +00:00
|
|
|
{
|
2004-04-14 16:23:58 +00:00
|
|
|
if (src[spos + i] < 0x80 || src[spos + i] >= 0xc0)
|
|
|
|
break;
|
2003-09-10 08:14:45 +00:00
|
|
|
u = (u << 6) | (src[spos + i] & 0x3f);
|
2002-12-17 12:06:39 +00:00
|
|
|
}
|
2004-04-14 16:23:58 +00:00
|
|
|
if (i < sle)
|
|
|
|
{
|
|
|
|
result = NO;
|
2006-05-30 05:52:51 +00:00
|
|
|
goto done;
|
2004-04-14 16:23:58 +00:00
|
|
|
}
|
2003-09-10 08:14:45 +00:00
|
|
|
u = u & ~(0xffffffff << ((5 * sle) + 1));
|
|
|
|
spos += sle;
|
2006-03-28 06:05:04 +00:00
|
|
|
|
|
|
|
/*
|
|
|
|
* We discard invalid codepoints here.
|
|
|
|
*/
|
|
|
|
if (u > 0x10ffff || u == 0xfffe || u == 0xffff
|
|
|
|
|| (u >= 0xfdd0 && u <= 0xfdef))
|
|
|
|
{
|
|
|
|
result = NO; // Invalid character.
|
2006-05-30 05:52:51 +00:00
|
|
|
goto done;
|
2006-03-28 06:05:04 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
if ((u >= 0xd800) && (u <= 0xdfff))
|
|
|
|
{
|
|
|
|
result = NO; // Unmatched half of surrogate pair.
|
2006-05-30 05:52:51 +00:00
|
|
|
goto done;
|
2006-03-28 06:05:04 +00:00
|
|
|
}
|
2003-09-10 08:14:45 +00:00
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
spos++;
|
2002-12-17 12:06:39 +00:00
|
|
|
}
|
|
|
|
|
2003-09-10 08:14:45 +00:00
|
|
|
/*
|
|
|
|
* Add codepoint as either a single unichar for BMP
|
|
|
|
* or as a pair of surrogates for codepoints over 16 bits.
|
|
|
|
*/
|
|
|
|
|
2002-12-17 12:06:39 +00:00
|
|
|
if (dpos >= bsize)
|
|
|
|
{
|
|
|
|
GROW();
|
|
|
|
}
|
2003-09-10 08:14:45 +00:00
|
|
|
if (u < 0x10000)
|
|
|
|
{
|
|
|
|
ptr[dpos++] = u;
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
unichar ul, uh;
|
|
|
|
|
|
|
|
u -= 0x10000;
|
|
|
|
ul = u & 0x3ff;
|
|
|
|
uh = (u >> 10) & 0x3ff;
|
|
|
|
|
|
|
|
ptr[dpos++] = uh + 0xd800;
|
|
|
|
if (dpos >= bsize)
|
|
|
|
{
|
|
|
|
GROW();
|
|
|
|
}
|
|
|
|
ptr[dpos++] = ul + 0xdc00;
|
|
|
|
}
|
2002-12-17 12:06:39 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
|
2002-03-14 13:58:52 +00:00
|
|
|
case NSNonLossyASCIIStringEncoding:
|
|
|
|
case NSASCIIStringEncoding:
|
2004-07-05 15:42:32 +00:00
|
|
|
while (spos < slen)
|
|
|
|
{
|
|
|
|
unichar c = (unichar)((unc)src[spos++]);
|
|
|
|
|
|
|
|
if (c > 127)
|
|
|
|
{
|
|
|
|
result = NO; // Non-ascii data found in input.
|
2006-05-30 05:52:51 +00:00
|
|
|
goto done;
|
2004-07-05 15:42:32 +00:00
|
|
|
}
|
|
|
|
if (dpos >= bsize)
|
|
|
|
{
|
|
|
|
GROW();
|
|
|
|
}
|
|
|
|
ptr[dpos++] = c;
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
|
2002-03-14 13:58:52 +00:00
|
|
|
case NSISOLatin1StringEncoding:
|
|
|
|
while (spos < slen)
|
|
|
|
{
|
|
|
|
if (dpos >= bsize)
|
|
|
|
{
|
|
|
|
GROW();
|
|
|
|
}
|
|
|
|
ptr[dpos++] = (unichar)((unc)src[spos++]);
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
|
|
|
|
case NSNEXTSTEPStringEncoding:
|
|
|
|
base = Next_conv_base;
|
|
|
|
table = Next_char_to_uni_table;
|
|
|
|
goto tables;
|
|
|
|
|
|
|
|
case NSISOCyrillicStringEncoding:
|
|
|
|
base = Cyrillic_conv_base;
|
|
|
|
table = Cyrillic_char_to_uni_table;
|
|
|
|
goto tables;
|
|
|
|
|
|
|
|
case NSISOLatin2StringEncoding:
|
|
|
|
base = Latin2_conv_base;
|
|
|
|
table = Latin2_char_to_uni_table;
|
|
|
|
goto tables;
|
2002-10-22 14:29:34 +00:00
|
|
|
|
2003-01-20 09:58:45 +00:00
|
|
|
case NSISOLatin9StringEncoding:
|
|
|
|
base = Latin9_conv_base;
|
|
|
|
table = Latin9_char_to_uni_table;
|
|
|
|
goto tables;
|
|
|
|
|
2002-10-22 14:29:34 +00:00
|
|
|
case NSISOThaiStringEncoding:
|
|
|
|
base = Thai_conv_base;
|
|
|
|
table = Thai_char_to_uni_table;
|
|
|
|
goto tables;
|
2005-02-22 11:22:44 +00:00
|
|
|
|
2002-03-14 13:58:52 +00:00
|
|
|
#if 0
|
|
|
|
case NSSymbolStringEncoding:
|
|
|
|
base = Symbol_conv_base;
|
|
|
|
table = Symbol_char_to_uni_table;
|
2005-02-22 11:22:44 +00:00
|
|
|
goto tables;
|
2002-03-14 13:58:52 +00:00
|
|
|
#endif
|
|
|
|
|
|
|
|
tables:
|
|
|
|
while (spos < slen)
|
|
|
|
{
|
|
|
|
unc c = (unc)src[spos];
|
|
|
|
|
|
|
|
if (dpos >= bsize)
|
|
|
|
{
|
|
|
|
GROW();
|
|
|
|
}
|
|
|
|
if (c < base)
|
|
|
|
{
|
|
|
|
ptr[dpos++] = c;
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
ptr[dpos++] = table[c - base];
|
|
|
|
}
|
|
|
|
spos++;
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
|
|
|
|
case NSGSM0338StringEncoding:
|
|
|
|
while (spos < slen)
|
|
|
|
{
|
|
|
|
unc c = (unc)src[spos];
|
|
|
|
|
|
|
|
if (dpos >= bsize)
|
|
|
|
{
|
|
|
|
GROW();
|
|
|
|
}
|
|
|
|
|
|
|
|
ptr[dpos] = GSM0338_char_to_uni_table[c];
|
|
|
|
if (c == 0x1b && spos < slen)
|
|
|
|
{
|
|
|
|
unsigned i = 0;
|
|
|
|
|
|
|
|
c = (unc)src[spos+1];
|
|
|
|
while (i < sizeof(GSM0338_escapes)/sizeof(GSM0338_escapes[0]))
|
|
|
|
{
|
|
|
|
if (GSM0338_escapes[i].to == c)
|
|
|
|
{
|
|
|
|
ptr[dpos] = GSM0338_escapes[i].from;
|
|
|
|
spos++;
|
|
|
|
break;
|
|
|
|
}
|
2002-10-15 13:37:21 +00:00
|
|
|
i++;
|
2002-03-14 13:58:52 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
dpos++;
|
|
|
|
spos++;
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
|
|
|
|
default:
|
|
|
|
#ifdef HAVE_ICONV
|
|
|
|
{
|
2008-07-11 09:10:46 +00:00
|
|
|
struct _strenc_ *encInfo;
|
2003-07-28 16:44:24 +00:00
|
|
|
unsigned char *inbuf;
|
|
|
|
unsigned char *outbuf;
|
2002-03-14 13:58:52 +00:00
|
|
|
size_t inbytesleft;
|
|
|
|
size_t outbytesleft;
|
2002-03-16 09:54:50 +00:00
|
|
|
size_t rval;
|
|
|
|
iconv_t cd;
|
2006-02-01 06:18:41 +00:00
|
|
|
const char *estr = 0;
|
2003-07-01 08:33:22 +00:00
|
|
|
BOOL done = NO;
|
2002-03-14 13:58:52 +00:00
|
|
|
|
2008-07-11 09:10:46 +00:00
|
|
|
if ((encInfo = EntrySupported(enc)) != 0)
|
2006-02-01 06:18:41 +00:00
|
|
|
{
|
2008-07-11 09:10:46 +00:00
|
|
|
estr = encInfo->iconv;
|
2006-02-01 06:18:41 +00:00
|
|
|
}
|
2002-11-20 15:50:46 +00:00
|
|
|
/* explicitly check for empty encoding name since some systems
|
|
|
|
* have buggy iconv_open() code which succeeds on an empty name.
|
|
|
|
*/
|
2002-11-20 13:50:55 +00:00
|
|
|
if (estr == 0)
|
|
|
|
{
|
|
|
|
NSLog(@"No iconv for encoding x%02x", enc);
|
|
|
|
result = NO;
|
2006-05-30 05:52:51 +00:00
|
|
|
goto done;
|
2002-11-20 13:50:55 +00:00
|
|
|
}
|
2003-07-01 08:33:22 +00:00
|
|
|
if (slen == 0)
|
|
|
|
{
|
|
|
|
break; // Nothing to do
|
|
|
|
}
|
2002-11-20 13:50:55 +00:00
|
|
|
cd = iconv_open(UNICODE_ENC, estr);
|
2002-03-14 13:58:52 +00:00
|
|
|
if (cd == (iconv_t)-1)
|
|
|
|
{
|
2005-02-22 11:22:44 +00:00
|
|
|
NSLog(@"No iconv for encoding %@ tried to use %s",
|
2006-10-20 10:56:27 +00:00
|
|
|
GSPrivateEncodingName(enc), estr);
|
2002-03-16 09:54:50 +00:00
|
|
|
result = NO;
|
2006-05-30 05:52:51 +00:00
|
|
|
goto done;
|
2002-03-14 13:58:52 +00:00
|
|
|
}
|
|
|
|
|
2003-07-28 16:44:24 +00:00
|
|
|
inbuf = (unsigned char*)src;
|
2002-03-14 13:58:52 +00:00
|
|
|
inbytesleft = slen;
|
2003-07-28 16:44:24 +00:00
|
|
|
outbuf = (unsigned char*)ptr;
|
2002-03-14 13:58:52 +00:00
|
|
|
outbytesleft = bsize * sizeof(unichar);
|
2003-07-08 01:50:42 +00:00
|
|
|
do
|
2002-03-14 13:58:52 +00:00
|
|
|
{
|
2003-07-01 08:33:22 +00:00
|
|
|
if (inbytesleft == 0)
|
|
|
|
{
|
2003-07-08 01:50:42 +00:00
|
|
|
done = YES; // Flush iconv
|
2003-07-28 16:44:24 +00:00
|
|
|
rval = iconv(cd, 0, 0, (void*)&outbuf, &outbytesleft);
|
2003-07-01 08:33:22 +00:00
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
rval = iconv(cd,
|
2003-07-28 16:44:24 +00:00
|
|
|
(void*)&inbuf, &inbytesleft, (void*)&outbuf, &outbytesleft);
|
2003-07-01 08:33:22 +00:00
|
|
|
}
|
2002-03-20 22:37:22 +00:00
|
|
|
dpos = (bsize * sizeof(unichar) - outbytesleft) / sizeof(unichar);
|
2002-03-20 17:30:01 +00:00
|
|
|
if (rval == (size_t)-1)
|
2002-03-14 13:58:52 +00:00
|
|
|
{
|
2002-03-20 17:30:01 +00:00
|
|
|
if (errno == E2BIG)
|
|
|
|
{
|
|
|
|
unsigned old = bsize;
|
|
|
|
|
|
|
|
GROW();
|
2003-07-28 16:44:24 +00:00
|
|
|
outbuf = (unsigned char*)&ptr[dpos];
|
2002-03-20 22:37:22 +00:00
|
|
|
outbytesleft += (bsize - old) * sizeof(unichar);
|
2002-03-20 17:30:01 +00:00
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
result = NO;
|
2006-05-30 05:52:51 +00:00
|
|
|
goto done;
|
2002-03-20 17:30:01 +00:00
|
|
|
}
|
2002-03-14 13:58:52 +00:00
|
|
|
}
|
2003-07-08 01:50:42 +00:00
|
|
|
} while (!done || rval != 0);
|
2002-03-14 13:58:52 +00:00
|
|
|
// close the converter
|
|
|
|
iconv_close(cd);
|
|
|
|
}
|
2005-02-22 11:22:44 +00:00
|
|
|
#else
|
2002-03-16 09:54:50 +00:00
|
|
|
result = NO;
|
2005-02-22 11:22:44 +00:00
|
|
|
#endif
|
2002-03-14 13:58:52 +00:00
|
|
|
}
|
|
|
|
|
2006-05-30 05:52:51 +00:00
|
|
|
done:
|
|
|
|
|
2002-03-14 13:58:52 +00:00
|
|
|
/*
|
|
|
|
* Post conversion ... set output values.
|
|
|
|
*/
|
2002-03-14 15:27:12 +00:00
|
|
|
if (extra != 0)
|
2002-03-14 13:58:52 +00:00
|
|
|
{
|
|
|
|
ptr[dpos] = (unichar)0;
|
|
|
|
}
|
|
|
|
*size = dpos;
|
2002-03-16 09:54:50 +00:00
|
|
|
if (dst != 0 && (result == YES || (options & GSUniShortOk)))
|
2002-03-14 13:58:52 +00:00
|
|
|
{
|
2002-03-14 15:27:12 +00:00
|
|
|
if (options & GSUniTemporary)
|
|
|
|
{
|
|
|
|
unsigned bytes = dpos * sizeof(unichar) + extra;
|
|
|
|
void *r;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Temporary string was requested ... make one.
|
|
|
|
*/
|
2009-03-09 15:11:51 +00:00
|
|
|
#if GS_WITH_GC
|
|
|
|
r = NSAllocateCollectable(bytes, 0);
|
|
|
|
memcpy(r, ptr, bytes);
|
|
|
|
#else
|
2003-03-02 07:47:18 +00:00
|
|
|
r = GSAutoreleasedBuffer(bytes);
|
2002-03-14 15:27:12 +00:00
|
|
|
memcpy(r, ptr, bytes);
|
2002-06-27 13:18:47 +00:00
|
|
|
if (ptr != buf && (dst == 0 || ptr != *dst))
|
2002-03-14 15:27:12 +00:00
|
|
|
{
|
|
|
|
NSZoneFree(zone, ptr);
|
|
|
|
}
|
2009-03-09 15:11:51 +00:00
|
|
|
#endif
|
2002-03-14 15:27:12 +00:00
|
|
|
ptr = r;
|
2003-07-09 13:57:43 +00:00
|
|
|
*dst = ptr;
|
2002-03-14 15:27:12 +00:00
|
|
|
}
|
2002-05-15 05:28:43 +00:00
|
|
|
else if (zone != 0 && (ptr == buf || bsize > dpos))
|
2002-03-14 13:58:52 +00:00
|
|
|
{
|
|
|
|
unsigned bytes = dpos * sizeof(unichar) + extra;
|
|
|
|
|
2002-03-14 15:27:12 +00:00
|
|
|
/*
|
|
|
|
* Resizing is permitted, try ensure we return a buffer which
|
|
|
|
* is just big enough to hold the converted string.
|
|
|
|
*/
|
2002-03-14 13:58:52 +00:00
|
|
|
if (ptr == buf || ptr == *dst)
|
|
|
|
{
|
|
|
|
unichar *tmp;
|
|
|
|
|
2009-03-09 15:11:51 +00:00
|
|
|
#if GS_WITH_GC
|
|
|
|
tmp = NSAllocateCollectable(bytes, 0);
|
|
|
|
#else
|
2002-03-14 13:58:52 +00:00
|
|
|
tmp = NSZoneMalloc(zone, bytes);
|
2009-03-09 15:11:51 +00:00
|
|
|
#endif
|
2002-03-14 13:58:52 +00:00
|
|
|
if (tmp != 0)
|
|
|
|
{
|
|
|
|
memcpy(tmp, ptr, bytes);
|
|
|
|
}
|
|
|
|
ptr = tmp;
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
2009-03-09 15:11:51 +00:00
|
|
|
#if GS_WITH_GC
|
|
|
|
ptr = NSReallocateCollectable(ptr, bytes, 0);
|
|
|
|
#else
|
2002-03-14 13:58:52 +00:00
|
|
|
ptr = NSZoneRealloc(zone, ptr, bytes);
|
2009-03-09 15:11:51 +00:00
|
|
|
#endif
|
2002-03-14 13:58:52 +00:00
|
|
|
}
|
2003-07-09 13:57:43 +00:00
|
|
|
*dst = ptr;
|
2002-03-14 13:58:52 +00:00
|
|
|
}
|
2003-05-05 13:36:25 +00:00
|
|
|
else if (ptr == buf)
|
|
|
|
{
|
|
|
|
ptr = NULL;
|
|
|
|
result = NO;
|
|
|
|
}
|
2003-07-09 13:57:43 +00:00
|
|
|
else
|
|
|
|
{
|
|
|
|
*dst = ptr;
|
|
|
|
}
|
2002-03-14 13:58:52 +00:00
|
|
|
}
|
2009-03-09 15:11:51 +00:00
|
|
|
#if !GS_WITH_GC
|
2002-06-27 13:18:47 +00:00
|
|
|
else if (ptr != buf && dst != 0 && ptr != *dst)
|
2002-03-16 09:54:50 +00:00
|
|
|
{
|
|
|
|
NSZoneFree(zone, ptr);
|
|
|
|
}
|
2009-03-09 15:11:51 +00:00
|
|
|
#endif
|
2003-05-05 13:36:25 +00:00
|
|
|
|
2006-02-01 06:18:41 +00:00
|
|
|
if (dst)
|
|
|
|
NSCAssert(*dst != buf, @"attempted to pass out pointer to internal buffer");
|
|
|
|
|
2002-03-16 09:54:50 +00:00
|
|
|
return result;
|
2002-03-14 13:58:52 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
#undef GROW
|
|
|
|
|
|
|
|
|
2009-03-09 15:11:51 +00:00
|
|
|
#if GS_WITH_GC
|
|
|
|
|
|
|
|
#define GROW() \
|
|
|
|
if (dst == 0) \
|
|
|
|
{ \
|
|
|
|
/* \
|
|
|
|
* Data is just being discarded anyway, so we can \
|
|
|
|
* reset the offset into the local buffer on the \
|
|
|
|
* stack and pretend the buffer has grown. \
|
|
|
|
*/ \
|
|
|
|
ptr = buf - dpos; \
|
|
|
|
bsize = dpos + BUFSIZ; \
|
|
|
|
if (extra != 0) \
|
|
|
|
{ \
|
|
|
|
bsize--; \
|
|
|
|
} \
|
|
|
|
} \
|
|
|
|
else if (zone == 0) \
|
|
|
|
{ \
|
|
|
|
result = NO; /* No buffer growth possible ... fail. */ \
|
|
|
|
goto done; \
|
|
|
|
} \
|
|
|
|
else \
|
|
|
|
{ \
|
|
|
|
unsigned grow = slen; \
|
|
|
|
\
|
|
|
|
if (grow < bsize + BUFSIZ) \
|
|
|
|
{ \
|
|
|
|
grow = bsize + BUFSIZ; \
|
|
|
|
} \
|
|
|
|
\
|
|
|
|
if (ptr == buf || ptr == *dst) \
|
|
|
|
{ \
|
|
|
|
unsigned char *tmp; \
|
|
|
|
\
|
|
|
|
tmp = NSAllocateCollectable(grow + extra, 0); \
|
|
|
|
if (tmp != 0) \
|
|
|
|
{ \
|
|
|
|
memcpy(tmp, ptr, bsize); \
|
|
|
|
} \
|
|
|
|
ptr = tmp; \
|
|
|
|
} \
|
|
|
|
else \
|
|
|
|
{ \
|
|
|
|
ptr = NSReallocateCollectable(ptr, grow + extra, 0); \
|
|
|
|
} \
|
|
|
|
if (ptr == 0) \
|
|
|
|
{ \
|
|
|
|
result = NO; /* Not enough memory */ \
|
|
|
|
break; \
|
|
|
|
} \
|
|
|
|
bsize = grow; \
|
|
|
|
}
|
|
|
|
|
|
|
|
#else /* GS_WITH_GC */
|
|
|
|
|
2002-03-14 13:58:52 +00:00
|
|
|
#define GROW() \
|
|
|
|
if (dst == 0) \
|
|
|
|
{ \
|
|
|
|
/* \
|
|
|
|
* Data is just being discarded anyway, so we can \
|
2008-06-13 05:03:31 +00:00
|
|
|
* reset the offset into the local buffer on the \
|
2002-03-14 13:58:52 +00:00
|
|
|
* stack and pretend the buffer has grown. \
|
|
|
|
*/ \
|
2008-06-13 14:19:23 +00:00
|
|
|
ptr = buf - dpos; \
|
|
|
|
bsize = dpos + BUFSIZ; \
|
|
|
|
if (extra != 0) \
|
2003-07-21 19:22:41 +00:00
|
|
|
{ \
|
2008-06-13 14:19:23 +00:00
|
|
|
bsize--; \
|
2003-07-21 19:22:41 +00:00
|
|
|
} \
|
2002-03-14 13:58:52 +00:00
|
|
|
} \
|
|
|
|
else if (zone == 0) \
|
|
|
|
{ \
|
2002-03-16 09:54:50 +00:00
|
|
|
result = NO; /* No buffer growth possible ... fail. */ \
|
2006-05-30 05:52:51 +00:00
|
|
|
goto done; \
|
2002-03-14 13:58:52 +00:00
|
|
|
} \
|
|
|
|
else \
|
|
|
|
{ \
|
|
|
|
unsigned grow = slen; \
|
|
|
|
\
|
|
|
|
if (grow < bsize + BUFSIZ) \
|
|
|
|
{ \
|
|
|
|
grow = bsize + BUFSIZ; \
|
|
|
|
} \
|
|
|
|
\
|
|
|
|
if (ptr == buf || ptr == *dst) \
|
|
|
|
{ \
|
|
|
|
unsigned char *tmp; \
|
|
|
|
\
|
|
|
|
tmp = NSZoneMalloc(zone, grow + extra); \
|
|
|
|
if (tmp != 0) \
|
|
|
|
{ \
|
|
|
|
memcpy(tmp, ptr, bsize); \
|
|
|
|
} \
|
|
|
|
ptr = tmp; \
|
|
|
|
} \
|
|
|
|
else \
|
|
|
|
{ \
|
|
|
|
ptr = NSZoneRealloc(zone, ptr, grow + extra); \
|
|
|
|
} \
|
|
|
|
if (ptr == 0) \
|
|
|
|
{ \
|
2002-03-16 09:54:50 +00:00
|
|
|
result = NO; /* Not enough memory */ \
|
|
|
|
break; \
|
2002-03-14 13:58:52 +00:00
|
|
|
} \
|
|
|
|
bsize = grow; \
|
|
|
|
}
|
|
|
|
|
2009-03-09 15:11:51 +00:00
|
|
|
#endif /* GS_WITH_GC */
|
2002-09-08 08:53:35 +00:00
|
|
|
|
|
|
|
static inline int chop(unichar c, _ucc_ *table, int hi)
|
|
|
|
{
|
|
|
|
int lo = 0;
|
|
|
|
|
|
|
|
while (hi > lo)
|
|
|
|
{
|
|
|
|
int i = (hi + lo) / 2;
|
|
|
|
unichar from = table[i].from;
|
|
|
|
|
|
|
|
if (from < c)
|
|
|
|
{
|
|
|
|
lo = i + 1;
|
|
|
|
}
|
|
|
|
else if (from > c)
|
|
|
|
{
|
|
|
|
hi = i;
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
return i; // Found
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return -1; // Not found
|
|
|
|
}
|
|
|
|
|
2002-03-14 13:58:52 +00:00
|
|
|
/**
|
2003-07-09 02:13:58 +00:00
|
|
|
* Function to convert from 16-bit unicode to 8-bit data.
|
2002-03-14 13:58:52 +00:00
|
|
|
* <p>The dst argument is a pointer to a pointer to a buffer in which the
|
2003-07-09 02:13:58 +00:00
|
|
|
* converted data is to be stored. If it is a null pointer, this function
|
2002-03-14 13:58:52 +00:00
|
|
|
* discards converted data, and is used only to determine the length of the
|
2003-07-09 02:13:58 +00:00
|
|
|
* converted data. If the zone argument is non-nul, the function is free
|
2002-03-14 13:58:52 +00:00
|
|
|
* to allocate a larger buffer if necessary, and store this new buffer in
|
|
|
|
* the dst argument. It will *NOT* deallocate the original buffer!
|
|
|
|
* </p>
|
|
|
|
* <p>The size argument is a pointer to the initial size of the destination
|
|
|
|
* buffer. If the function changes the buffer size, this value will be
|
2003-07-09 02:13:58 +00:00
|
|
|
* altered to the new size. This is measured in bytes.
|
2002-03-14 13:58:52 +00:00
|
|
|
* </p>
|
|
|
|
* <p>The src argument is a pointer to the 16-bit unicode string which is
|
|
|
|
* to be converted to 8-bit data.
|
|
|
|
* </p>
|
2003-07-09 02:13:58 +00:00
|
|
|
* <p>The slen argument is the length of the 16-bit unicode string
|
2002-03-14 13:58:52 +00:00
|
|
|
* which is to be converted to 8-bit data.
|
2003-07-09 02:13:58 +00:00
|
|
|
* This is measured in 16-bit characters, not bytes.
|
2002-03-14 13:58:52 +00:00
|
|
|
* </p>
|
2003-07-09 02:13:58 +00:00
|
|
|
* <p>The enc argument specifies the encoding type of the 8-bit byte sequence
|
|
|
|
* which is to be produced from the 16-bit unicode.
|
2002-03-14 13:58:52 +00:00
|
|
|
* </p>
|
|
|
|
* <p>The zone argument specifies a memory zone in which the function may
|
|
|
|
* allocate a buffer to return data in.
|
|
|
|
* If this is nul, the function will fail if the originally supplied buffer
|
2002-08-20 15:07:58 +00:00
|
|
|
* is not big enough (unless dst is a null pointer ... indicating that
|
2009-03-09 15:11:51 +00:00
|
|
|
* converted data is to be discarded).<br />
|
|
|
|
* If the library is built for garbage collecting, the zone argument is used
|
|
|
|
* only as a marker to say whether the function may allocate memory (zone
|
|
|
|
* is non-null) or not (zone is null).
|
2002-03-14 13:58:52 +00:00
|
|
|
* </p>
|
2002-03-14 15:27:12 +00:00
|
|
|
* The options argument controls some special behavior.
|
|
|
|
* <list>
|
|
|
|
* <item>If GSUniStrict is set, the function will fail if a character is
|
2003-07-09 02:13:58 +00:00
|
|
|
* encountered in the source which can't be converted. Otherwise, some
|
2002-03-14 15:27:12 +00:00
|
|
|
* approximation or marker will be placed in the destination.</item>
|
2003-07-09 02:13:58 +00:00
|
|
|
* <item>If GSUniTerminate is set, the function is expected to nul terminate
|
|
|
|
* the output data, and will assume that it is safe to place the nul
|
|
|
|
* just beyond the end of the stated buffer size.
|
2002-03-14 15:27:12 +00:00
|
|
|
* Also, if the function grows the buffer, it will allow for an extra
|
2003-07-09 02:13:58 +00:00
|
|
|
* termination byte.</item>
|
2002-03-14 15:27:12 +00:00
|
|
|
* <item>If GSUniTemporary is set, the function will return the results in
|
|
|
|
* an autoreleased buffer rather than in a buffer that the caller must
|
|
|
|
* release.</item>
|
2002-03-16 09:54:50 +00:00
|
|
|
* <item>If GSUniBOM is set, the function will read the first unicode
|
|
|
|
* character as a byte order marker.</item>
|
|
|
|
* <item>If GSUniShortOk is set, the function will return a buffer containing
|
|
|
|
* any decoded characters even if the whole conversion fails.</item>
|
|
|
|
* </list>
|
2002-03-14 13:58:52 +00:00
|
|
|
* <p>On return, the function result is a flag indicating success (YES)
|
|
|
|
* or failure (NO), and on success, the value stored in size is the number
|
2003-07-09 02:13:58 +00:00
|
|
|
* of bytes in the converted data. The converted data itself is
|
|
|
|
* stored in the location given by dst.<br />
|
2002-03-14 13:58:52 +00:00
|
|
|
* NB. If the value stored in dst has been changed, it is a pointer to
|
|
|
|
* allocated memory which the caller is responsible for freeing, and the
|
|
|
|
* caller is <em>still</em> responsible for freeing the original buffer.
|
|
|
|
* </p>
|
|
|
|
*/
|
|
|
|
BOOL
|
2002-03-14 15:27:12 +00:00
|
|
|
GSFromUnicode(unsigned char **dst, unsigned int *size, const unichar *src,
|
|
|
|
unsigned int slen, NSStringEncoding enc, NSZone *zone,
|
|
|
|
unsigned int options)
|
2002-03-14 13:58:52 +00:00
|
|
|
{
|
|
|
|
unsigned char buf[BUFSIZ];
|
|
|
|
unsigned char *ptr;
|
|
|
|
unsigned bsize;
|
|
|
|
unsigned dpos = 0; // Offset into destination buffer.
|
|
|
|
unsigned spos = 0; // Offset into source buffer.
|
2002-03-14 15:27:12 +00:00
|
|
|
unsigned extra = (options & GSUniTerminate) ? 1 : 0;
|
|
|
|
BOOL strict = (options & GSUniStrict) ? YES : NO;
|
2002-03-14 13:58:52 +00:00
|
|
|
unichar base = 0;
|
|
|
|
_ucc_ *table = 0;
|
|
|
|
unsigned tsize = 0;
|
2002-09-18 09:34:33 +00:00
|
|
|
unsigned char escape = 0;
|
|
|
|
_ucc_ *etable = 0;
|
|
|
|
unsigned etsize = 0;
|
|
|
|
_ucc_ *ltable = 0;
|
|
|
|
unsigned ltsize = 0;
|
2002-03-16 09:54:50 +00:00
|
|
|
BOOL swapped = NO;
|
|
|
|
BOOL result = YES;
|
2003-07-08 01:50:42 +00:00
|
|
|
|
2002-03-16 09:54:50 +00:00
|
|
|
if (options & GSUniBOM)
|
|
|
|
{
|
|
|
|
if (slen == 0)
|
|
|
|
{
|
|
|
|
*size = 0;
|
2002-03-25 14:11:39 +00:00
|
|
|
result = NO; // Missing byte order marker.
|
2002-03-16 09:54:50 +00:00
|
|
|
}
|
2002-03-25 14:11:39 +00:00
|
|
|
else
|
2002-03-16 09:54:50 +00:00
|
|
|
{
|
2002-03-25 14:11:39 +00:00
|
|
|
unichar c;
|
|
|
|
|
|
|
|
c = *src++;
|
|
|
|
slen--;
|
|
|
|
if (c != 0xFEFF)
|
2002-03-16 09:54:50 +00:00
|
|
|
{
|
2002-03-25 14:11:39 +00:00
|
|
|
if (c == 0xFFFE)
|
|
|
|
{
|
|
|
|
swapped = YES;
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
*size = 0;
|
|
|
|
result = NO; // Illegal byte order marker.
|
|
|
|
}
|
2002-03-16 09:54:50 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2002-03-14 13:58:52 +00:00
|
|
|
/*
|
|
|
|
* Ensure we have an initial buffer set up to decode data into.
|
|
|
|
*/
|
|
|
|
if (dst == 0 || *size == 0)
|
|
|
|
{
|
|
|
|
ptr = buf;
|
2002-03-14 15:27:12 +00:00
|
|
|
bsize = (extra != 0) ? BUFSIZ - 1 : BUFSIZ;
|
2002-03-14 13:58:52 +00:00
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
ptr = *dst;
|
|
|
|
bsize = *size;
|
|
|
|
}
|
|
|
|
|
2006-05-30 05:52:51 +00:00
|
|
|
if (result == NO)
|
|
|
|
{
|
|
|
|
goto done;
|
|
|
|
}
|
|
|
|
|
2006-02-01 06:18:41 +00:00
|
|
|
#ifdef HAVE_ICONV
|
|
|
|
if (strict == NO
|
|
|
|
&& enc != NSUTF8StringEncoding
|
|
|
|
&& enc != NSGSM0338StringEncoding)
|
|
|
|
{
|
|
|
|
goto iconv_start; // For lossy conversion
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
|
2002-03-14 13:58:52 +00:00
|
|
|
switch (enc)
|
|
|
|
{
|
2002-12-17 12:06:39 +00:00
|
|
|
case NSUTF8StringEncoding:
|
|
|
|
{
|
|
|
|
while (spos < slen)
|
|
|
|
{
|
2003-09-10 08:14:45 +00:00
|
|
|
unichar u1, u2;
|
|
|
|
unsigned long u;
|
|
|
|
int sl = 0;
|
2002-12-17 12:06:39 +00:00
|
|
|
|
2003-09-10 08:14:45 +00:00
|
|
|
/* get first unichar */
|
|
|
|
u1 = src[spos++];
|
2002-12-17 12:06:39 +00:00
|
|
|
if (swapped == YES)
|
|
|
|
{
|
2006-05-22 13:01:00 +00:00
|
|
|
u1 = (((u1 & 0xff00) >> 8) + ((u1 & 0x00ff) << 8));
|
2002-12-17 12:06:39 +00:00
|
|
|
}
|
2006-05-30 06:30:09 +00:00
|
|
|
// 0xfeff is a zero-width-no-break-space inside text (not a BOM).
|
|
|
|
if (u1 == 0xfffe // unexpected BOM
|
|
|
|
|| u1 == 0xffff // not a character
|
2006-03-26 11:55:56 +00:00
|
|
|
|| (u1 >= 0xfdd0 && u1 <= 0xfdef) // invalid character
|
|
|
|
|| (u1 >= 0xdc00 && u1 <= 0xdfff)) // bad pairing
|
|
|
|
{
|
|
|
|
if (strict)
|
|
|
|
{
|
|
|
|
result = NO;
|
2006-05-30 05:52:51 +00:00
|
|
|
goto done;
|
2006-03-26 11:55:56 +00:00
|
|
|
}
|
|
|
|
continue; // Skip invalid character.
|
|
|
|
}
|
2002-12-17 12:06:39 +00:00
|
|
|
|
2003-09-11 05:02:26 +00:00
|
|
|
/* possibly get second character and calculate 'u' */
|
2003-09-10 08:14:45 +00:00
|
|
|
if ((u1 >= 0xd800) && (u1 < 0xdc00))
|
|
|
|
{
|
|
|
|
if (spos >= slen)
|
|
|
|
{
|
2006-03-26 11:55:56 +00:00
|
|
|
if (strict)
|
|
|
|
{
|
|
|
|
result = NO;
|
2006-05-30 05:52:51 +00:00
|
|
|
goto done;
|
2006-03-26 11:55:56 +00:00
|
|
|
}
|
|
|
|
continue; // At end.
|
2003-09-10 08:14:45 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
/* get second unichar */
|
|
|
|
u2 = src[spos++];
|
|
|
|
if (swapped == YES)
|
|
|
|
{
|
2006-05-22 13:01:00 +00:00
|
|
|
u2 = (((u2 & 0xff00) >> 8) + ((u2 & 0x00ff) << 8));
|
2003-09-10 08:14:45 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
if ((u2 < 0xdc00) && (u2 > 0xdfff))
|
|
|
|
{
|
2003-09-11 05:02:26 +00:00
|
|
|
spos--;
|
2006-03-26 11:55:56 +00:00
|
|
|
if (strict)
|
|
|
|
{
|
|
|
|
result = NO;
|
2006-05-30 05:52:51 +00:00
|
|
|
goto done;
|
2006-03-26 11:55:56 +00:00
|
|
|
}
|
|
|
|
continue; // Skip bad half of surrogate pair.
|
2003-09-10 08:14:45 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
/* make the full value */
|
|
|
|
u = ((unsigned long)(u1 - 0xd800) * 0x400)
|
|
|
|
+ (u2 - 0xdc00) + 0x10000;
|
|
|
|
}
|
|
|
|
else
|
2002-12-17 12:06:39 +00:00
|
|
|
{
|
2003-09-10 08:14:45 +00:00
|
|
|
u = u1;
|
2002-12-17 12:06:39 +00:00
|
|
|
}
|
2003-09-10 08:14:45 +00:00
|
|
|
|
|
|
|
/* calculate the sequence length */
|
|
|
|
if (u <= 0x7f)
|
2002-12-17 12:06:39 +00:00
|
|
|
{
|
2003-09-10 08:14:45 +00:00
|
|
|
sl = 1;
|
2002-12-17 12:06:39 +00:00
|
|
|
}
|
2003-09-10 08:14:45 +00:00
|
|
|
else if (u <= 0x7ff)
|
2002-12-17 12:06:39 +00:00
|
|
|
{
|
2003-09-10 08:14:45 +00:00
|
|
|
sl = 2;
|
2002-12-17 12:06:39 +00:00
|
|
|
}
|
2003-09-10 08:14:45 +00:00
|
|
|
else if (u <= 0xffff)
|
2002-12-17 12:06:39 +00:00
|
|
|
{
|
2003-09-10 08:14:45 +00:00
|
|
|
sl = 3;
|
2002-12-17 12:06:39 +00:00
|
|
|
}
|
2003-09-10 08:14:45 +00:00
|
|
|
else if (u <= 0x1fffff)
|
2002-12-17 12:06:39 +00:00
|
|
|
{
|
2003-09-10 08:14:45 +00:00
|
|
|
sl = 4;
|
2002-12-17 12:06:39 +00:00
|
|
|
}
|
2003-09-10 08:14:45 +00:00
|
|
|
else if (u <= 0x3ffffff)
|
|
|
|
{
|
|
|
|
sl = 5;
|
2002-12-17 12:06:39 +00:00
|
|
|
}
|
2003-09-10 08:14:45 +00:00
|
|
|
else
|
2002-12-17 12:06:39 +00:00
|
|
|
{
|
2003-09-10 08:14:45 +00:00
|
|
|
sl = 6;
|
|
|
|
}
|
|
|
|
|
2006-02-01 06:18:41 +00:00
|
|
|
/* make sure we have enough space for it */
|
|
|
|
while (dpos + sl >= bsize)
|
|
|
|
{
|
|
|
|
GROW();
|
|
|
|
}
|
|
|
|
|
2003-09-10 08:14:45 +00:00
|
|
|
if (sl == 1)
|
|
|
|
{
|
|
|
|
ptr[dpos++] = u & 0x7f;
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
2003-09-11 05:02:26 +00:00
|
|
|
int i;
|
2003-09-10 08:14:45 +00:00
|
|
|
unsigned char reversed[8];
|
|
|
|
|
|
|
|
/* split value into reversed array */
|
|
|
|
for (i = 0; i < sl; i++)
|
|
|
|
{
|
|
|
|
reversed[i] = (u & 0x3f);
|
|
|
|
u = u >> 6;
|
|
|
|
}
|
|
|
|
|
|
|
|
ptr[dpos++] = reversed[sl-1] | ((0xff << (8-sl)) & 0xff);
|
|
|
|
/* add bytes into the output sequence */
|
|
|
|
for (i = sl - 2; i >= 0; i--)
|
|
|
|
{
|
|
|
|
ptr[dpos++] = reversed[i] | 0x80;
|
|
|
|
}
|
|
|
|
}
|
2002-12-17 12:06:39 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
|
2002-03-14 13:58:52 +00:00
|
|
|
case NSNonLossyASCIIStringEncoding:
|
|
|
|
case NSASCIIStringEncoding:
|
|
|
|
base = 128;
|
|
|
|
goto bases;
|
2002-12-17 12:06:39 +00:00
|
|
|
|
2002-03-14 13:58:52 +00:00
|
|
|
case NSISOLatin1StringEncoding:
|
2004-04-14 16:23:58 +00:00
|
|
|
case NSUnicodeStringEncoding:
|
2002-03-14 13:58:52 +00:00
|
|
|
base = 256;
|
|
|
|
goto bases;
|
|
|
|
|
|
|
|
bases:
|
2002-03-16 09:54:50 +00:00
|
|
|
if (strict == NO)
|
2002-03-14 13:58:52 +00:00
|
|
|
{
|
|
|
|
while (spos < slen)
|
|
|
|
{
|
|
|
|
unichar u = src[spos++];
|
|
|
|
|
2002-03-16 09:54:50 +00:00
|
|
|
if (swapped == YES)
|
|
|
|
{
|
2006-05-22 13:01:00 +00:00
|
|
|
u = (((u & 0xff00) >> 8) + ((u & 0x00ff) << 8));
|
2002-03-16 09:54:50 +00:00
|
|
|
}
|
|
|
|
|
2002-03-14 13:58:52 +00:00
|
|
|
if (dpos >= bsize)
|
|
|
|
{
|
|
|
|
GROW();
|
|
|
|
}
|
2002-03-18 08:31:39 +00:00
|
|
|
if (u < base)
|
2002-03-14 13:58:52 +00:00
|
|
|
{
|
2003-07-28 16:44:24 +00:00
|
|
|
ptr[dpos++] = (unsigned char)u;
|
2002-03-14 13:58:52 +00:00
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
2006-02-01 06:18:41 +00:00
|
|
|
ptr[dpos++] = '?';
|
2002-03-14 13:58:52 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
while (spos < slen)
|
|
|
|
{
|
|
|
|
unichar u = src[spos++];
|
|
|
|
|
2002-03-16 09:54:50 +00:00
|
|
|
if (swapped == YES)
|
|
|
|
{
|
2006-05-22 13:01:00 +00:00
|
|
|
u = (((u & 0xff00) >> 8) + ((u & 0x00ff) << 8));
|
2002-03-16 09:54:50 +00:00
|
|
|
}
|
2002-03-14 13:58:52 +00:00
|
|
|
if (dpos >= bsize)
|
|
|
|
{
|
|
|
|
GROW();
|
|
|
|
}
|
2002-03-18 08:31:39 +00:00
|
|
|
if (u < base)
|
2002-03-14 13:58:52 +00:00
|
|
|
{
|
2003-07-28 16:44:24 +00:00
|
|
|
ptr[dpos++] = (unsigned char)u;
|
2002-03-14 13:58:52 +00:00
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
2002-03-16 09:54:50 +00:00
|
|
|
result = NO;
|
2006-05-30 05:52:51 +00:00
|
|
|
goto done;
|
2002-03-14 13:58:52 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
|
|
|
|
case NSNEXTSTEPStringEncoding:
|
2003-06-07 23:33:58 +00:00
|
|
|
base = Next_conv_base;
|
2002-03-14 13:58:52 +00:00
|
|
|
table = Next_uni_to_char_table;
|
|
|
|
tsize = Next_uni_to_char_table_size;
|
|
|
|
goto tables;
|
|
|
|
|
|
|
|
case NSISOCyrillicStringEncoding:
|
2003-06-07 23:33:58 +00:00
|
|
|
base = Cyrillic_conv_base;
|
2002-03-14 13:58:52 +00:00
|
|
|
table = Cyrillic_uni_to_char_table;
|
|
|
|
tsize = Cyrillic_uni_to_char_table_size;
|
|
|
|
goto tables;
|
|
|
|
|
|
|
|
case NSISOLatin2StringEncoding:
|
2003-06-07 23:33:58 +00:00
|
|
|
base = Latin2_conv_base;
|
2002-03-14 13:58:52 +00:00
|
|
|
table = Latin2_uni_to_char_table;
|
|
|
|
tsize = Latin2_uni_to_char_table_size;
|
|
|
|
goto tables;
|
|
|
|
|
2003-01-20 09:58:45 +00:00
|
|
|
case NSISOLatin9StringEncoding:
|
2003-06-07 23:33:58 +00:00
|
|
|
base = Latin9_conv_base;
|
2003-01-20 09:58:45 +00:00
|
|
|
table = Latin9_uni_to_char_table;
|
|
|
|
tsize = Latin9_uni_to_char_table_size;
|
|
|
|
goto tables;
|
|
|
|
|
2002-10-22 14:29:34 +00:00
|
|
|
case NSISOThaiStringEncoding:
|
2003-06-07 23:33:58 +00:00
|
|
|
base = Thai_conv_base;
|
2002-10-22 14:29:34 +00:00
|
|
|
table = Thai_uni_to_char_table;
|
|
|
|
tsize = Thai_uni_to_char_table_size;
|
|
|
|
goto tables;
|
|
|
|
|
2002-03-14 13:58:52 +00:00
|
|
|
#if 0
|
|
|
|
case NSSymbolStringEncoding:
|
2003-06-07 23:33:58 +00:00
|
|
|
base = Symbol_conv_base;
|
2002-03-14 13:58:52 +00:00
|
|
|
table = Symbol_uni_to_char_table;
|
|
|
|
tsize = Symbol_uni_to_char_table_size;
|
|
|
|
goto tables;
|
|
|
|
#endif
|
|
|
|
|
2002-09-18 09:34:33 +00:00
|
|
|
case NSGSM0338StringEncoding:
|
|
|
|
base = 0;
|
|
|
|
table = GSM0338_uni_to_char_table;
|
|
|
|
tsize = GSM0338_tsize;
|
|
|
|
escape = 0x1b;
|
|
|
|
etable = GSM0338_escapes;
|
|
|
|
etsize = GSM0338_esize;
|
2002-04-16 13:48:14 +00:00
|
|
|
if (strict == NO)
|
2002-03-14 13:58:52 +00:00
|
|
|
{
|
2002-09-18 09:34:33 +00:00
|
|
|
ltable = GSM0338_lossy;
|
|
|
|
ltsize = GSM0338_lsize;
|
2002-03-14 13:58:52 +00:00
|
|
|
}
|
2002-09-18 09:34:33 +00:00
|
|
|
goto tables;
|
2002-03-14 13:58:52 +00:00
|
|
|
|
2002-09-18 09:34:33 +00:00
|
|
|
tables:
|
2002-03-14 13:58:52 +00:00
|
|
|
while (spos < slen)
|
|
|
|
{
|
|
|
|
unichar u = src[spos++];
|
2002-09-18 09:34:33 +00:00
|
|
|
int i;
|
2002-03-16 09:54:50 +00:00
|
|
|
|
2002-09-18 09:34:33 +00:00
|
|
|
/* Swap byte order if necessary */
|
2002-03-16 09:54:50 +00:00
|
|
|
if (swapped == YES)
|
|
|
|
{
|
2006-05-22 13:01:00 +00:00
|
|
|
u = (((u & 0xff00) >> 8) + ((u & 0x00ff) << 8));
|
2002-03-16 09:54:50 +00:00
|
|
|
}
|
2002-03-14 13:58:52 +00:00
|
|
|
|
2002-09-18 09:34:33 +00:00
|
|
|
/* Grow output buffer to make room if necessary */
|
2002-03-14 13:58:52 +00:00
|
|
|
if (dpos >= bsize)
|
|
|
|
{
|
|
|
|
GROW();
|
|
|
|
}
|
|
|
|
|
2002-09-18 09:34:33 +00:00
|
|
|
if (u < base)
|
2002-03-14 13:58:52 +00:00
|
|
|
{
|
2002-09-18 09:34:33 +00:00
|
|
|
/*
|
|
|
|
* The character set has a lower section whose contents
|
|
|
|
* are identical to unicode, so no mapping is needed.
|
|
|
|
*/
|
2003-07-28 16:44:24 +00:00
|
|
|
ptr[dpos++] = (unsigned char)u;
|
2002-03-14 13:58:52 +00:00
|
|
|
}
|
2002-09-18 09:34:33 +00:00
|
|
|
else if (table != 0 && (i = chop(u, table, tsize)) >= 0)
|
2002-03-14 13:58:52 +00:00
|
|
|
{
|
2002-09-18 09:34:33 +00:00
|
|
|
/*
|
|
|
|
* The character mapping is found in a basic table.
|
|
|
|
*/
|
|
|
|
ptr[dpos++] = table[i].to;
|
|
|
|
}
|
|
|
|
else if (etable != 0 && (i = chop(u, etable, etsize)) >= 0)
|
|
|
|
{
|
|
|
|
/*
|
|
|
|
* The character mapping is found in a table of simple
|
|
|
|
* escape sequences consisting of an escape byte followed
|
|
|
|
* by another single byte.
|
|
|
|
*/
|
|
|
|
ptr[dpos++] = escape;
|
|
|
|
if (dpos >= bsize)
|
2002-03-14 13:58:52 +00:00
|
|
|
{
|
2002-09-18 09:34:33 +00:00
|
|
|
GROW();
|
2002-03-14 13:58:52 +00:00
|
|
|
}
|
2002-09-18 09:34:33 +00:00
|
|
|
ptr[dpos++] = etable[i].to;
|
|
|
|
}
|
|
|
|
else if (ltable != 0 && (i = chop(u, ltable, ltsize)) >= 0)
|
|
|
|
{
|
|
|
|
/*
|
|
|
|
* The character is found in a lossy mapping table.
|
|
|
|
*/
|
|
|
|
ptr[dpos++] = ltable[i].to;
|
|
|
|
}
|
|
|
|
else if (strict == NO)
|
|
|
|
{
|
|
|
|
/*
|
2006-02-01 06:18:41 +00:00
|
|
|
* The default lossy mapping generates a question mark.
|
2002-09-18 09:34:33 +00:00
|
|
|
*/
|
2006-02-01 06:18:41 +00:00
|
|
|
ptr[dpos++] = '?';
|
2002-09-18 09:34:33 +00:00
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
/*
|
|
|
|
* No mapping has been found.
|
|
|
|
*/
|
|
|
|
result = NO;
|
|
|
|
spos = slen;
|
2006-05-30 05:52:51 +00:00
|
|
|
goto done;
|
2002-03-14 13:58:52 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
|
|
|
|
default:
|
|
|
|
#ifdef HAVE_ICONV
|
2006-02-01 06:18:41 +00:00
|
|
|
iconv_start:
|
2002-03-14 13:58:52 +00:00
|
|
|
{
|
2008-07-11 09:10:46 +00:00
|
|
|
struct _strenc_ *encInfo;
|
2002-03-14 13:58:52 +00:00
|
|
|
iconv_t cd;
|
2003-07-28 16:44:24 +00:00
|
|
|
unsigned char *inbuf;
|
|
|
|
unsigned char *outbuf;
|
2002-03-14 13:58:52 +00:00
|
|
|
size_t inbytesleft;
|
|
|
|
size_t outbytesleft;
|
2002-03-16 09:54:50 +00:00
|
|
|
size_t rval;
|
2006-02-01 06:18:41 +00:00
|
|
|
const char *estr = 0;
|
2003-07-01 08:33:22 +00:00
|
|
|
BOOL done = NO;
|
2002-03-14 13:58:52 +00:00
|
|
|
|
2008-07-11 09:10:46 +00:00
|
|
|
if ((encInfo = EntrySupported(enc)) != 0)
|
2006-02-01 06:18:41 +00:00
|
|
|
{
|
|
|
|
if (strict == NO)
|
|
|
|
{
|
|
|
|
/*
|
|
|
|
* Try to transliterate where no direct conversion
|
|
|
|
* is available.
|
|
|
|
*/
|
2008-07-11 09:10:46 +00:00
|
|
|
estr = encInfo->lossy;
|
2006-02-01 06:18:41 +00:00
|
|
|
}
|
|
|
|
if (estr == 0)
|
|
|
|
{
|
2008-07-11 09:10:46 +00:00
|
|
|
estr = encInfo->iconv;
|
2006-02-01 06:18:41 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2002-11-20 15:50:46 +00:00
|
|
|
/* explicitly check for empty encoding name since some systems
|
|
|
|
* have buggy iconv_open() code which succeeds on an empty name.
|
|
|
|
*/
|
2002-11-20 13:50:55 +00:00
|
|
|
if (estr == 0)
|
|
|
|
{
|
|
|
|
NSLog(@"No iconv for encoding x%02x", enc);
|
|
|
|
result = NO;
|
2006-05-30 05:52:51 +00:00
|
|
|
goto done;
|
2002-11-20 13:50:55 +00:00
|
|
|
}
|
2003-07-01 08:33:22 +00:00
|
|
|
if (slen == 0)
|
|
|
|
{
|
|
|
|
break; // Nothing to convert.
|
|
|
|
}
|
2002-11-20 13:50:55 +00:00
|
|
|
cd = iconv_open(estr, UNICODE_ENC);
|
2002-03-14 13:58:52 +00:00
|
|
|
if (cd == (iconv_t)-1)
|
|
|
|
{
|
2005-02-22 11:22:44 +00:00
|
|
|
NSLog(@"No iconv for encoding %@ tried to use %s",
|
2006-10-20 10:56:27 +00:00
|
|
|
GSPrivateEncodingName(enc), estr);
|
2002-03-16 09:54:50 +00:00
|
|
|
result = NO;
|
2006-05-30 05:52:51 +00:00
|
|
|
goto done;
|
2002-03-14 13:58:52 +00:00
|
|
|
}
|
|
|
|
|
2003-07-28 16:44:24 +00:00
|
|
|
inbuf = (unsigned char*)src;
|
2002-03-14 13:58:52 +00:00
|
|
|
inbytesleft = slen * sizeof(unichar);
|
2003-07-28 16:44:24 +00:00
|
|
|
outbuf = (unsigned char*)ptr;
|
2002-03-14 13:58:52 +00:00
|
|
|
outbytesleft = bsize;
|
2003-07-08 01:50:42 +00:00
|
|
|
do
|
2002-03-14 13:58:52 +00:00
|
|
|
{
|
2003-07-01 08:33:22 +00:00
|
|
|
if (inbytesleft == 0)
|
|
|
|
{
|
2003-07-08 01:50:42 +00:00
|
|
|
done = YES; // Flush buffer
|
2003-07-28 16:44:24 +00:00
|
|
|
rval = iconv(cd, 0, 0, (void*)&outbuf, &outbytesleft);
|
2003-07-01 08:33:22 +00:00
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
rval = iconv(cd,
|
2003-07-28 16:44:24 +00:00
|
|
|
(void*)&inbuf, &inbytesleft, (void*)&outbuf, &outbytesleft);
|
2003-07-01 08:33:22 +00:00
|
|
|
}
|
2002-03-20 22:37:22 +00:00
|
|
|
dpos = bsize - outbytesleft;
|
2002-09-08 08:53:35 +00:00
|
|
|
if (rval != 0)
|
2002-03-14 13:58:52 +00:00
|
|
|
{
|
2002-09-08 08:53:35 +00:00
|
|
|
if (rval == (size_t)-1)
|
2002-03-20 17:30:01 +00:00
|
|
|
{
|
2002-09-08 08:53:35 +00:00
|
|
|
if (errno == E2BIG)
|
|
|
|
{
|
|
|
|
unsigned old = bsize;
|
2002-03-20 17:30:01 +00:00
|
|
|
|
2002-09-08 08:53:35 +00:00
|
|
|
GROW();
|
2003-07-28 16:44:24 +00:00
|
|
|
outbuf = (unsigned char*)&ptr[dpos];
|
2002-09-08 08:53:35 +00:00
|
|
|
outbytesleft += (bsize - old);
|
|
|
|
}
|
|
|
|
else if (errno == EILSEQ)
|
2002-03-14 13:58:52 +00:00
|
|
|
{
|
2002-09-08 08:53:35 +00:00
|
|
|
if (strict == YES)
|
|
|
|
{
|
|
|
|
result = NO;
|
2006-05-30 05:52:51 +00:00
|
|
|
goto done;
|
2002-09-08 08:53:35 +00:00
|
|
|
}
|
|
|
|
/*
|
|
|
|
* If we are allowing lossy conversion, we replace any
|
2006-02-01 06:18:41 +00:00
|
|
|
* unconvertable character with a question mark.
|
2002-09-08 08:53:35 +00:00
|
|
|
*/
|
|
|
|
if (outbytesleft > 0)
|
|
|
|
{
|
2006-02-01 06:18:41 +00:00
|
|
|
*outbuf++ = '?';
|
2002-09-08 08:53:35 +00:00
|
|
|
outbytesleft--;
|
|
|
|
inbuf += sizeof(unichar);
|
|
|
|
inbytesleft -= sizeof(unichar);
|
|
|
|
}
|
2002-03-14 13:58:52 +00:00
|
|
|
}
|
2002-09-08 08:53:35 +00:00
|
|
|
else
|
2002-03-14 13:58:52 +00:00
|
|
|
{
|
2002-09-08 08:53:35 +00:00
|
|
|
result = NO;
|
2006-05-30 05:52:51 +00:00
|
|
|
goto done;
|
2002-03-14 13:58:52 +00:00
|
|
|
}
|
|
|
|
}
|
2002-09-08 08:53:35 +00:00
|
|
|
else if (strict == YES)
|
2002-03-14 13:58:52 +00:00
|
|
|
{
|
2002-09-08 08:53:35 +00:00
|
|
|
/*
|
|
|
|
* A positive return from iconv indicates some
|
|
|
|
* irreversible (ie lossy) conversions took place,
|
|
|
|
* so if we are doing strict conversions we must fail.
|
|
|
|
*/
|
2002-03-16 09:54:50 +00:00
|
|
|
result = NO;
|
2006-05-30 05:52:51 +00:00
|
|
|
goto done;
|
2002-03-14 13:58:52 +00:00
|
|
|
}
|
|
|
|
}
|
2003-07-08 01:50:42 +00:00
|
|
|
} while (!done || rval != 0);
|
2002-03-14 13:58:52 +00:00
|
|
|
// close the converter
|
|
|
|
iconv_close(cd);
|
|
|
|
}
|
2003-07-08 01:50:42 +00:00
|
|
|
#else
|
2002-03-16 09:54:50 +00:00
|
|
|
result = NO;
|
2006-05-30 05:52:51 +00:00
|
|
|
goto done;
|
2005-02-22 11:22:44 +00:00
|
|
|
#endif
|
2002-03-14 13:58:52 +00:00
|
|
|
}
|
|
|
|
|
2006-05-30 05:52:51 +00:00
|
|
|
done:
|
|
|
|
|
2002-03-14 13:58:52 +00:00
|
|
|
/*
|
|
|
|
* Post conversion ... set output values.
|
|
|
|
*/
|
2002-03-14 15:27:12 +00:00
|
|
|
if (extra != 0)
|
2002-03-14 13:58:52 +00:00
|
|
|
{
|
|
|
|
ptr[dpos] = (unsigned char)0;
|
|
|
|
}
|
|
|
|
*size = dpos;
|
2002-03-16 09:54:50 +00:00
|
|
|
if (dst != 0 && (result == YES || (options & GSUniShortOk)))
|
2002-03-14 13:58:52 +00:00
|
|
|
{
|
2002-03-14 15:27:12 +00:00
|
|
|
if (options & GSUniTemporary)
|
|
|
|
{
|
|
|
|
unsigned bytes = dpos + extra;
|
|
|
|
void *r;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Temporary string was requested ... make one.
|
|
|
|
*/
|
2009-03-09 15:11:51 +00:00
|
|
|
#if GS_WITH_GC
|
|
|
|
r = NSAllocateCollectable(bytes, 0);
|
|
|
|
memcpy(r, ptr, bytes);
|
|
|
|
#else
|
2003-03-02 07:47:18 +00:00
|
|
|
r = GSAutoreleasedBuffer(bytes);
|
2002-03-14 15:27:12 +00:00
|
|
|
memcpy(r, ptr, bytes);
|
2002-06-27 13:18:47 +00:00
|
|
|
if (ptr != buf && (dst == 0 || ptr != *dst))
|
2002-03-14 15:27:12 +00:00
|
|
|
{
|
|
|
|
NSZoneFree(zone, ptr);
|
|
|
|
}
|
2009-03-09 15:11:51 +00:00
|
|
|
#endif
|
2002-03-14 15:27:12 +00:00
|
|
|
ptr = r;
|
2003-07-09 02:13:58 +00:00
|
|
|
*dst = ptr;
|
2002-03-14 15:27:12 +00:00
|
|
|
}
|
2002-05-15 05:28:43 +00:00
|
|
|
else if (zone != 0 && (ptr == buf || bsize > dpos))
|
2002-03-14 13:58:52 +00:00
|
|
|
{
|
|
|
|
unsigned bytes = dpos + extra;
|
|
|
|
|
2002-03-14 15:27:12 +00:00
|
|
|
/*
|
|
|
|
* Resizing is permitted - try ensure we return a buffer
|
|
|
|
* which is just big enough to hold the converted string.
|
|
|
|
*/
|
2002-03-14 13:58:52 +00:00
|
|
|
if (ptr == buf || ptr == *dst)
|
|
|
|
{
|
|
|
|
unsigned char *tmp;
|
|
|
|
|
2009-03-09 15:11:51 +00:00
|
|
|
#if GS_WITH_GC
|
|
|
|
tmp = NSAllocateCollectable(bytes, 0);
|
|
|
|
#else
|
2002-03-14 13:58:52 +00:00
|
|
|
tmp = NSZoneMalloc(zone, bytes);
|
2009-03-09 15:11:51 +00:00
|
|
|
#endif
|
2002-03-14 13:58:52 +00:00
|
|
|
if (tmp != 0)
|
|
|
|
{
|
|
|
|
memcpy(tmp, ptr, bytes);
|
|
|
|
}
|
|
|
|
ptr = tmp;
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
2009-03-09 15:11:51 +00:00
|
|
|
#if GS_WITH_GC
|
|
|
|
ptr = NSReallocateCollectable(ptr, bytes, 0);
|
|
|
|
#else
|
2002-03-14 13:58:52 +00:00
|
|
|
ptr = NSZoneRealloc(zone, ptr, bytes);
|
2009-03-09 15:11:51 +00:00
|
|
|
#endif
|
2002-03-14 13:58:52 +00:00
|
|
|
}
|
2003-07-09 02:13:58 +00:00
|
|
|
*dst = ptr;
|
2002-03-14 13:58:52 +00:00
|
|
|
}
|
2003-05-05 13:36:25 +00:00
|
|
|
else if (ptr == buf)
|
|
|
|
{
|
|
|
|
ptr = NULL;
|
|
|
|
result = NO;
|
|
|
|
}
|
2003-07-09 13:57:43 +00:00
|
|
|
else
|
|
|
|
{
|
|
|
|
*dst = ptr;
|
|
|
|
}
|
2002-03-14 13:58:52 +00:00
|
|
|
}
|
2009-03-09 15:11:51 +00:00
|
|
|
#if !GS_WITH_GC
|
2002-06-27 13:18:47 +00:00
|
|
|
else if (ptr != buf && dst != 0 && ptr != *dst)
|
2002-03-16 09:54:50 +00:00
|
|
|
{
|
|
|
|
NSZoneFree(zone, ptr);
|
|
|
|
}
|
2009-03-09 15:11:51 +00:00
|
|
|
#endif
|
2003-05-05 13:36:25 +00:00
|
|
|
|
2006-02-01 06:18:41 +00:00
|
|
|
if (dst)
|
|
|
|
NSCAssert(*dst != buf, @"attempted to pass out pointer to internal buffer");
|
|
|
|
|
2002-03-16 09:54:50 +00:00
|
|
|
return result;
|
2002-03-14 13:58:52 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
#undef GROW
|
|
|
|
|
2006-10-09 14:00:01 +00:00
|
|
|
|
2006-10-20 10:56:27 +00:00
|
|
|
|
|
|
|
NSStringEncoding*
|
|
|
|
GSPrivateAvailableEncodings()
|
2006-10-09 14:00:01 +00:00
|
|
|
{
|
|
|
|
if (_availableEncodings == 0)
|
|
|
|
{
|
|
|
|
GSSetupEncodingTable();
|
|
|
|
[GS_INITIALIZED_LOCK(local_lock, GSLazyLock) lock];
|
|
|
|
if (_availableEncodings == 0)
|
|
|
|
{
|
|
|
|
NSStringEncoding *encodings;
|
|
|
|
unsigned pos;
|
|
|
|
unsigned i;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Now build up a list of supported encodings ... in the
|
|
|
|
* format needed to support [NSString+availableStringEncodings]
|
|
|
|
* Check to see what iconv support we have as we go along.
|
|
|
|
* This is also the place where we determine the name we use
|
|
|
|
* for iconv to support unicode.
|
|
|
|
*/
|
|
|
|
encodings = objc_malloc(sizeof(NSStringEncoding) * (encTableSize+1));
|
|
|
|
pos = 0;
|
|
|
|
for (i = 0; i < encTableSize+1; i++)
|
|
|
|
{
|
2006-10-20 10:56:27 +00:00
|
|
|
if (GSPrivateIsEncodingSupported(i) == YES)
|
2006-10-09 14:00:01 +00:00
|
|
|
{
|
|
|
|
encodings[pos++] = i;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
encodings[pos] = 0;
|
|
|
|
_availableEncodings = encodings;
|
|
|
|
}
|
|
|
|
[local_lock unlock];
|
|
|
|
}
|
|
|
|
return _availableEncodings;
|
|
|
|
}
|
|
|
|
|
2006-10-20 10:56:27 +00:00
|
|
|
NSStringEncoding
|
|
|
|
GSPrivateDefaultCStringEncoding()
|
2006-10-09 14:00:01 +00:00
|
|
|
{
|
|
|
|
if (defEnc == GSUndefinedEncoding)
|
|
|
|
{
|
|
|
|
char *encoding;
|
2008-02-20 09:22:43 +00:00
|
|
|
#if HAVE_LANGINFO_CODESET
|
|
|
|
char encbuf[BUFSIZ];
|
|
|
|
#endif
|
2006-10-09 14:00:01 +00:00
|
|
|
unsigned int count;
|
|
|
|
|
|
|
|
GSSetupEncodingTable();
|
|
|
|
|
|
|
|
[GS_INITIALIZED_LOCK(local_lock, GSLazyLock) lock];
|
|
|
|
if (defEnc != GSUndefinedEncoding)
|
|
|
|
{
|
|
|
|
[local_lock unlock];
|
|
|
|
return defEnc;
|
|
|
|
}
|
|
|
|
|
2007-03-14 16:04:13 +00:00
|
|
|
if (natEnc == GSUndefinedEncoding)
|
2006-10-09 14:00:01 +00:00
|
|
|
{
|
2008-02-20 09:22:43 +00:00
|
|
|
|
2006-10-09 14:00:01 +00:00
|
|
|
/* Encoding not set */
|
2007-03-20 11:59:07 +00:00
|
|
|
#if HAVE_LANGINFO_CODESET
|
2006-10-09 14:00:01 +00:00
|
|
|
/* Take it from the system locale information. */
|
2008-02-20 09:22:43 +00:00
|
|
|
[gnustep_global_lock lock];
|
|
|
|
strncpy(encbuf, nl_langinfo(CODESET), sizeof(encbuf)-1);
|
|
|
|
[gnustep_global_lock unlock];
|
|
|
|
encbuf[sizeof(encbuf)-1] = '\0';
|
|
|
|
encoding = encbuf;
|
|
|
|
|
2007-03-14 16:04:13 +00:00
|
|
|
/*
|
|
|
|
* First handle the fallback response from nl_langinfo() ...
|
|
|
|
* if we are getting the default value we can't assume that
|
|
|
|
* the user has set anything up at all, so we must use the
|
|
|
|
* OpenStep/GNUstep default encopding ... latin1, even though
|
|
|
|
* the nl_langinfo() stuff would say default is ascii.
|
|
|
|
*/
|
2006-10-09 14:00:01 +00:00
|
|
|
if (strcmp(encoding, "ANSI_X3.4-1968") == 0 /* glibc */
|
|
|
|
|| strcmp(encoding, "ISO_646.IRV:1983") == 0 /* glibc */
|
|
|
|
|| strcmp(encoding, "646") == 0 /* Solaris NetBSD */)
|
2007-03-14 16:04:13 +00:00
|
|
|
natEnc = NSISOLatin1StringEncoding;
|
2006-10-09 14:00:01 +00:00
|
|
|
else if (strcmp(encoding, "EUC-JP") == 0 /* glibc */
|
|
|
|
/* HP-UX IRIX OSF/1 Solaris NetBSD */
|
|
|
|
|| strcmp(encoding, "eucJP") == 0
|
|
|
|
|| strcmp(encoding, "IBM-eucJP") == 0 /* AIX */)
|
2007-03-14 16:04:13 +00:00
|
|
|
natEnc = NSJapaneseEUCStringEncoding;
|
2006-10-09 14:00:01 +00:00
|
|
|
else if (strcmp(encoding, "UTF-8") == 0 /* glibc AIX OSF/1 Solaris */
|
|
|
|
|| strcmp(encoding, "utf8") == 0 /* HP-UX */)
|
2007-03-14 16:04:13 +00:00
|
|
|
natEnc = NSUTF8StringEncoding;
|
2006-10-09 14:00:01 +00:00
|
|
|
else if (strcmp(encoding, "ISO-8859-1") == 0 /* glibc */
|
|
|
|
/* AIX IRIX OSF/1 Solaris NetBSD */
|
|
|
|
|| strcmp(encoding, "ISO8859-1") == 0
|
|
|
|
|| strcmp(encoding, "iso88591") == 0 /* HP-UX */)
|
2007-03-14 16:04:13 +00:00
|
|
|
natEnc = NSISOLatin1StringEncoding;
|
2006-10-09 14:00:01 +00:00
|
|
|
else if (strcmp(encoding, "IBM-932") == 0 /* AIX */
|
|
|
|
|| strcmp(encoding, "SJIS") == 0 /* HP-UX OSF/1 NetBSD */
|
|
|
|
|| strcmp(encoding, "PCK") == 0 /* Solaris */)
|
2007-03-14 16:04:13 +00:00
|
|
|
natEnc = NSShiftJISStringEncoding;
|
2006-10-09 14:00:01 +00:00
|
|
|
else if (strcmp(encoding, "ISO-8859-2") == 0 /* glibc */
|
|
|
|
/* AIX IRIX OSF/1 Solaris NetBSD */
|
|
|
|
|| strcmp(encoding, "ISO8859-2") == 0
|
|
|
|
|| strcmp(encoding, "iso88592") == 0 /* HP-UX */)
|
2007-03-14 16:04:13 +00:00
|
|
|
natEnc = NSISOLatin2StringEncoding;
|
2006-10-09 14:00:01 +00:00
|
|
|
else if (strcmp(encoding, "CP1251") == 0 /* glibc */
|
|
|
|
|| strcmp(encoding, "ansi-1251") == 0 /* Solaris */)
|
2007-03-14 16:04:13 +00:00
|
|
|
natEnc = NSWindowsCP1251StringEncoding;
|
2006-10-09 14:00:01 +00:00
|
|
|
else if (strcmp(encoding, "CP1252") == 0 /* */
|
|
|
|
|| strcmp(encoding, "IBM-1252") == 0 /* AIX */)
|
2007-03-14 16:04:13 +00:00
|
|
|
natEnc = NSWindowsCP1252StringEncoding;
|
2006-10-09 14:00:01 +00:00
|
|
|
else if (strcmp(encoding, "ISO-8859-5") == 0 /* glibc */
|
|
|
|
/* AIX IRIX OSF/1 Solaris NetBSD */
|
|
|
|
|| strcmp(encoding, "ISO8859-5") == 0
|
|
|
|
|| strcmp(encoding, "iso88595") == 0 /* HP-UX */)
|
2007-03-14 16:04:13 +00:00
|
|
|
natEnc = NSISOCyrillicStringEncoding;
|
2006-10-09 14:00:01 +00:00
|
|
|
else if (strcmp(encoding, "KOI8-R") == 0 /* glibc */
|
|
|
|
|| strcmp(encoding, "koi8-r") == 0 /* Solaris */)
|
2007-03-14 16:04:13 +00:00
|
|
|
natEnc = NSKOI8RStringEncoding;
|
2006-10-09 14:00:01 +00:00
|
|
|
else if (strcmp(encoding, "ISO-8859-3") == 0 /* glibc */
|
|
|
|
|| strcmp(encoding, "ISO8859-3") == 0 /* Solaris */)
|
2007-03-14 16:04:13 +00:00
|
|
|
natEnc = NSISOLatin3StringEncoding;
|
2006-10-09 14:00:01 +00:00
|
|
|
else if (strcmp(encoding, "ISO-8859-4") == 0 /* */
|
|
|
|
|| strcmp(encoding, "ISO8859-4") == 0 /* OSF/1 Solaris NetBSD */)
|
2007-03-14 16:04:13 +00:00
|
|
|
natEnc = NSISOLatin4StringEncoding;
|
2006-10-09 14:00:01 +00:00
|
|
|
else if (strcmp(encoding, "ISO-8859-6") == 0 /* glibc */
|
|
|
|
|| strcmp(encoding, "ISO8859-6") == 0 /* AIX Solaris */
|
|
|
|
|| strcmp(encoding, "iso88596") == 0 /* HP-UX */)
|
2007-03-14 16:04:13 +00:00
|
|
|
natEnc = NSISOArabicStringEncoding;
|
2006-10-09 14:00:01 +00:00
|
|
|
else if (strcmp(encoding, "ISO-8859-7") == 0 /* glibc */
|
|
|
|
|| strcmp(encoding, "ISO8859-7") == 0 /* AIX IRIX OSF/1 Solaris */
|
|
|
|
|| strcmp(encoding, "iso88597") == 0 /* HP-UX */)
|
2007-03-14 16:04:13 +00:00
|
|
|
natEnc = NSISOGreekStringEncoding;
|
2006-10-09 14:00:01 +00:00
|
|
|
else if (strcmp(encoding, "ISO-8859-8") == 0 /* glibc */
|
|
|
|
|| strcmp(encoding, "ISO8859-8") == 0 /* AIX OSF/1 Solaris */
|
|
|
|
|| strcmp(encoding, "iso88598") == 0 /* HP-UX */)
|
2007-03-14 16:04:13 +00:00
|
|
|
natEnc = NSISOHebrewStringEncoding;
|
2006-10-09 14:00:01 +00:00
|
|
|
else if (strcmp(encoding, "ISO-8859-9") == 0 /* glibc */
|
|
|
|
|| strcmp(encoding, "ISO8859-9") == 0 /* AIX IRIX OSF/1 Solaris */
|
|
|
|
|| strcmp(encoding, "iso88599") == 0 /* HP-UX */)
|
2007-03-14 16:04:13 +00:00
|
|
|
natEnc = NSISOLatin5StringEncoding;
|
2006-10-09 14:00:01 +00:00
|
|
|
else if (strcmp(encoding, "ISO-8859-10") == 0 /* */
|
|
|
|
|| strcmp(encoding, "ISO8859-10") == 0 /* */)
|
2007-03-14 16:04:13 +00:00
|
|
|
natEnc = NSISOLatin6StringEncoding;
|
2006-10-09 14:00:01 +00:00
|
|
|
else if (strcmp(encoding, "TIS-620") == 0 /* glibc AIX */
|
|
|
|
|| strcmp(encoding, "tis620") == 0 /* HP-UX */
|
|
|
|
|| strcmp(encoding, "TIS620.2533") == 0 /* Solaris */
|
|
|
|
|| strcmp(encoding, "TACTIS") == 0 /* OSF/1 */)
|
2007-03-14 16:04:13 +00:00
|
|
|
natEnc = NSISOThaiStringEncoding;
|
2006-10-09 14:00:01 +00:00
|
|
|
else if (strcmp(encoding, "ISO-8859-13") == 0 /* glibc */
|
|
|
|
|| strcmp(encoding, "ISO8859-13") == 0 /* */
|
|
|
|
|| strcmp(encoding, "IBM-921") == 0 /* AIX */)
|
2007-03-14 16:04:13 +00:00
|
|
|
natEnc = NSISOLatin7StringEncoding;
|
2006-10-09 14:00:01 +00:00
|
|
|
else if (strcmp(encoding, "ISO-8859-14") == 0 /* glibc */
|
|
|
|
|| strcmp(encoding, "ISO8859-14") == 0 /* */)
|
2007-03-14 16:04:13 +00:00
|
|
|
natEnc = NSISOLatin8StringEncoding;
|
2006-10-09 14:00:01 +00:00
|
|
|
else if (strcmp(encoding, "ISO-8859-15") == 0 /* glibc */
|
|
|
|
/* AIX OSF/1 Solaris NetBSD */
|
|
|
|
|| strcmp(encoding, "ISO8859-15") == 0
|
|
|
|
|| strcmp(encoding, "iso885915") == 0 /* HP-UX */)
|
2007-03-14 16:04:13 +00:00
|
|
|
natEnc = NSISOLatin9StringEncoding;
|
2006-10-09 14:00:01 +00:00
|
|
|
else if (strcmp(encoding, "GB2312") == 0 /* glibc */
|
|
|
|
|| strcmp(encoding, "gb2312") == 0 /* Solaris */
|
|
|
|
|| strcmp(encoding, "eucCN") == 0 /* IRIX NetBSD */
|
|
|
|
|| strcmp(encoding, "IBM-eucCN") == 0 /* AIX */
|
|
|
|
|| strcmp(encoding, "hp15CN") == 0 /* HP-UX */)
|
2007-03-14 16:04:13 +00:00
|
|
|
natEnc = NSGB2312StringEncoding;
|
2006-10-09 14:00:01 +00:00
|
|
|
else if (strcmp(encoding, "BIG5") == 0 /* glibc Solaris NetBSD */
|
|
|
|
|| strcmp(encoding, "big5") == 0 /* AIX HP-UX OSF/1 */)
|
2007-03-14 16:04:13 +00:00
|
|
|
natEnc = NSBIG5StringEncoding;
|
2006-10-09 14:00:01 +00:00
|
|
|
else if (strcmp(encoding, "EUC-KR") == 0 /* glibc */
|
|
|
|
|| strcmp(encoding, "eucKR") == 0 /* HP-UX IRIX OSF/1 NetBSD */
|
|
|
|
|| strcmp(encoding, "IBM-eucKR") == 0 /* AIX */
|
|
|
|
|| strcmp(encoding, "5601") == 0 /* Solaris */)
|
2007-03-14 16:04:13 +00:00
|
|
|
natEnc = NSKoreanEUCStringEncoding;
|
2007-03-20 11:59:07 +00:00
|
|
|
#endif
|
2007-03-14 16:04:13 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
encoding = getenv("GNUSTEP_STRING_ENCODING");
|
|
|
|
if (encoding != 0)
|
|
|
|
{
|
|
|
|
count = 0;
|
|
|
|
while (str_encoding_table[count].enc
|
|
|
|
&& strcasecmp(str_encoding_table[count].ename, encoding)
|
|
|
|
&& strcasecmp(str_encoding_table[count].iconv, encoding))
|
|
|
|
{
|
|
|
|
count++;
|
|
|
|
}
|
|
|
|
if (str_encoding_table[count].enc)
|
|
|
|
{
|
|
|
|
defEnc = str_encoding_table[count].enc;
|
|
|
|
}
|
2006-10-09 14:00:01 +00:00
|
|
|
else
|
2007-03-14 16:04:13 +00:00
|
|
|
{
|
|
|
|
fprintf(stderr,
|
|
|
|
"WARNING: %s - encoding not supported.\n", encoding);
|
|
|
|
fprintf(stderr,
|
|
|
|
" NSISOLatin1StringEncoding set as default.\n");
|
|
|
|
defEnc = NSISOLatin1StringEncoding;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if (defEnc == GSUndefinedEncoding)
|
|
|
|
{
|
|
|
|
defEnc = natEnc;
|
|
|
|
}
|
|
|
|
if (defEnc == GSUndefinedEncoding)
|
|
|
|
{
|
|
|
|
defEnc = NSISOLatin1StringEncoding;
|
2006-10-09 14:00:01 +00:00
|
|
|
}
|
2006-10-20 10:56:27 +00:00
|
|
|
else if (GSPrivateIsEncodingSupported(defEnc) == NO)
|
2006-10-09 14:00:01 +00:00
|
|
|
{
|
|
|
|
fprintf(stderr, "WARNING: %s - encoding not implemented as "
|
|
|
|
"default c string encoding.\n", encoding);
|
|
|
|
fprintf(stderr,
|
|
|
|
" NSISOLatin1StringEncoding set as default.\n");
|
|
|
|
defEnc = NSISOLatin1StringEncoding;
|
|
|
|
}
|
2007-03-14 16:04:13 +00:00
|
|
|
|
|
|
|
if (natEnc == GSUndefinedEncoding)
|
|
|
|
{
|
|
|
|
natEnc = defEnc;
|
|
|
|
}
|
|
|
|
|
2006-10-09 14:00:01 +00:00
|
|
|
[local_lock unlock];
|
|
|
|
}
|
|
|
|
return defEnc;
|
|
|
|
}
|
|
|
|
|
2006-10-20 10:56:27 +00:00
|
|
|
NSString*
|
|
|
|
GSPrivateEncodingName(NSStringEncoding encoding)
|
2006-10-09 14:00:01 +00:00
|
|
|
{
|
2008-07-11 09:10:46 +00:00
|
|
|
struct _strenc_ *encInfo;
|
|
|
|
|
|
|
|
if ((encInfo = EntrySupported(encoding)) == NO)
|
2006-10-09 14:00:01 +00:00
|
|
|
{
|
|
|
|
return @"Unknown encoding";
|
|
|
|
}
|
2008-07-11 09:10:46 +00:00
|
|
|
return [NSString stringWithUTF8String: encInfo->ename];
|
2006-10-09 14:00:01 +00:00
|
|
|
}
|
|
|
|
|
2006-10-20 10:56:27 +00:00
|
|
|
BOOL
|
|
|
|
GSPrivateIsByteEncoding(NSStringEncoding encoding)
|
2006-10-09 14:00:01 +00:00
|
|
|
{
|
2008-07-11 09:10:46 +00:00
|
|
|
struct _strenc_ *encInfo;
|
|
|
|
|
|
|
|
if ((encInfo = EntrySupported(encoding)) == NO)
|
2006-10-09 14:00:01 +00:00
|
|
|
{
|
|
|
|
return NO;
|
|
|
|
}
|
2008-07-11 09:10:46 +00:00
|
|
|
return encInfo->eightBit;
|
2006-10-09 14:00:01 +00:00
|
|
|
}
|
|
|
|
|
2007-03-14 16:04:13 +00:00
|
|
|
NSStringEncoding
|
|
|
|
GSPrivateNativeCStringEncoding()
|
|
|
|
{
|
|
|
|
if (natEnc == GSUndefinedEncoding)
|
|
|
|
{
|
|
|
|
/* GSPrivateDefaultCStringEncoding() will actually set the encoding.
|
|
|
|
*/
|
|
|
|
GSPrivateDefaultCStringEncoding();
|
|
|
|
}
|
|
|
|
return natEnc;
|
|
|
|
}
|
2006-10-09 14:00:01 +00:00
|
|
|
|