mirror of
https://github.com/gnustep/libs-base.git
synced 2025-04-22 16:33:29 +00:00
another attempt to improve characterset detection
git-svn-id: svn+ssh://svn.gna.org/svn/gnustep/libs/base/trunk@37059 72102866-910b-0410-8b05-ffd578937521
This commit is contained in:
parent
bb57e65aa2
commit
a319576293
5 changed files with 157 additions and 123 deletions
|
@ -1,3 +1,11 @@
|
|||
2013-09-10 Richard Frith-Macdonald <rfm@gnu.org>
|
||||
|
||||
* configure.ac: Check for another unicode header
|
||||
* Headers/GNUstepBase/config.h.in: regenerate
|
||||
* configure: regenerate
|
||||
* Source/Additions/Unicode.m: If ICU is available, use it to obtain
|
||||
default C-string encoding.
|
||||
|
||||
2013-09-09 Pirmin Braun <pb@intars.de>
|
||||
|
||||
* Source/Additions/GSMime.m: When creating a child parser, pass on
|
||||
|
|
|
@ -670,6 +670,9 @@
|
|||
/* Define to 1 if you have the <unicode/ucal.h> header file. */
|
||||
#undef HAVE_UNICODE_UCAL_H
|
||||
|
||||
/* Define to 1 if you have the <unicode/ucnv.h> header file. */
|
||||
#undef HAVE_UNICODE_UCNV_H
|
||||
|
||||
/* Define to 1 if you have the <unicode/ucol.h> header file. */
|
||||
#undef HAVE_UNICODE_UCOL_H
|
||||
|
||||
|
|
|
@ -54,6 +54,10 @@
|
|||
#if HAVE_LANGINFO_CODESET
|
||||
#include <langinfo.h>
|
||||
#endif
|
||||
#if defined(HAVE_UNICODE_UCNV_H)
|
||||
#include <unicode/ucnv.h>
|
||||
#endif
|
||||
|
||||
|
||||
typedef struct {unichar from; unsigned char to;} _ucc_;
|
||||
|
||||
|
@ -2544,7 +2548,7 @@ GSPrivateDefaultCStringEncoding()
|
|||
{
|
||||
if (defEnc == GSUndefinedEncoding)
|
||||
{
|
||||
char *encoding;
|
||||
const char *encoding = 0;
|
||||
#if HAVE_LANGINFO_CODESET
|
||||
char encbuf[BUFSIZ];
|
||||
#endif
|
||||
|
@ -2562,128 +2566,146 @@ GSPrivateDefaultCStringEncoding()
|
|||
if (natEnc == GSUndefinedEncoding)
|
||||
{
|
||||
/* Encoding not set */
|
||||
#if HAVE_LANGINFO_CODESET
|
||||
char *old;
|
||||
/* Take it from the system locale information. */
|
||||
[gnustep_global_lock lock];
|
||||
/* Initialise locale system by setting current locale from
|
||||
* environment and then resetting it. Must be done before
|
||||
* any call to nl_langinfo()
|
||||
*/
|
||||
if (0 != (old = setlocale(LC_CTYPE, "")))
|
||||
{
|
||||
setlocale(LC_CTYPE, old);
|
||||
}
|
||||
strncpy(encbuf, nl_langinfo(CODESET), sizeof(encbuf)-1);
|
||||
encbuf[sizeof(encbuf)-1] = '\0';
|
||||
[gnustep_global_lock unlock];
|
||||
encoding = encbuf;
|
||||
#if GS_USE_ICU == 1
|
||||
|
||||
/*
|
||||
* First handle the fallback response from nl_langinfo() ...
|
||||
* if we are getting the default value we can't assume that
|
||||
* the user has set anything up at all, so we must use the
|
||||
* OpenStep/GNUstep default encoding ... latin1, even though
|
||||
* the nl_langinfo() stuff would say default is ascii.
|
||||
*/
|
||||
if (strcmp(encoding, "ANSI_X3.4-1968") == 0 /* glibc */
|
||||
|| strcmp(encoding, "ISO_646.IRV:1983") == 0 /* glibc */
|
||||
|| strcmp(encoding, "646") == 0 /* Solaris NetBSD */)
|
||||
natEnc = NSISOLatin1StringEncoding;
|
||||
else if (strcmp(encoding, "EUC-JP") == 0 /* glibc */
|
||||
/* HP-UX IRIX OSF/1 Solaris NetBSD */
|
||||
|| strcmp(encoding, "eucJP") == 0
|
||||
|| strcmp(encoding, "IBM-eucJP") == 0 /* AIX */)
|
||||
natEnc = NSJapaneseEUCStringEncoding;
|
||||
else if (strcmp(encoding, "UTF-8") == 0 /* glibc AIX OSF/1 Solaris */
|
||||
|| strcmp(encoding, "utf8") == 0 /* HP-UX */)
|
||||
natEnc = NSUTF8StringEncoding;
|
||||
else if (strcmp(encoding, "ISO-8859-1") == 0 /* glibc */
|
||||
/* AIX IRIX OSF/1 Solaris NetBSD */
|
||||
|| strcmp(encoding, "ISO8859-1") == 0
|
||||
|| strcmp(encoding, "iso88591") == 0 /* HP-UX */)
|
||||
natEnc = NSISOLatin1StringEncoding;
|
||||
else if (strcmp(encoding, "IBM-932") == 0 /* AIX */
|
||||
|| strcmp(encoding, "SJIS") == 0 /* HP-UX OSF/1 NetBSD */
|
||||
|| strcmp(encoding, "PCK") == 0 /* Solaris */)
|
||||
natEnc = NSShiftJISStringEncoding;
|
||||
else if (strcmp(encoding, "ISO-8859-2") == 0 /* glibc */
|
||||
/* AIX IRIX OSF/1 Solaris NetBSD */
|
||||
|| strcmp(encoding, "ISO8859-2") == 0
|
||||
|| strcmp(encoding, "iso88592") == 0 /* HP-UX */)
|
||||
natEnc = NSISOLatin2StringEncoding;
|
||||
else if (strcmp(encoding, "CP1251") == 0 /* glibc */
|
||||
|| strcmp(encoding, "ansi-1251") == 0 /* Solaris */)
|
||||
natEnc = NSWindowsCP1251StringEncoding;
|
||||
else if (strcmp(encoding, "CP1252") == 0 /* */
|
||||
|| strcmp(encoding, "IBM-1252") == 0 /* AIX */)
|
||||
natEnc = NSWindowsCP1252StringEncoding;
|
||||
else if (strcmp(encoding, "ISO-8859-5") == 0 /* glibc */
|
||||
/* AIX IRIX OSF/1 Solaris NetBSD */
|
||||
|| strcmp(encoding, "ISO8859-5") == 0
|
||||
|| strcmp(encoding, "iso88595") == 0 /* HP-UX */)
|
||||
natEnc = NSISOCyrillicStringEncoding;
|
||||
else if (strcmp(encoding, "KOI8-R") == 0 /* glibc */
|
||||
|| strcmp(encoding, "koi8-r") == 0 /* Solaris */)
|
||||
natEnc = NSKOI8RStringEncoding;
|
||||
else if (strcmp(encoding, "ISO-8859-3") == 0 /* glibc */
|
||||
|| strcmp(encoding, "ISO8859-3") == 0 /* Solaris */)
|
||||
natEnc = NSISOLatin3StringEncoding;
|
||||
else if (strcmp(encoding, "ISO-8859-4") == 0 /* */
|
||||
|| strcmp(encoding, "ISO8859-4") == 0 /* OSF/1 Solaris NetBSD */)
|
||||
natEnc = NSISOLatin4StringEncoding;
|
||||
else if (strcmp(encoding, "ISO-8859-6") == 0 /* glibc */
|
||||
|| strcmp(encoding, "ISO8859-6") == 0 /* AIX Solaris */
|
||||
|| strcmp(encoding, "iso88596") == 0 /* HP-UX */)
|
||||
natEnc = NSISOArabicStringEncoding;
|
||||
else if (strcmp(encoding, "ISO-8859-7") == 0 /* glibc */
|
||||
|| strcmp(encoding, "ISO8859-7") == 0 /* AIX IRIX OSF/1 Solaris */
|
||||
|| strcmp(encoding, "iso88597") == 0 /* HP-UX */)
|
||||
natEnc = NSISOGreekStringEncoding;
|
||||
else if (strcmp(encoding, "ISO-8859-8") == 0 /* glibc */
|
||||
|| strcmp(encoding, "ISO8859-8") == 0 /* AIX OSF/1 Solaris */
|
||||
|| strcmp(encoding, "iso88598") == 0 /* HP-UX */)
|
||||
natEnc = NSISOHebrewStringEncoding;
|
||||
else if (strcmp(encoding, "ISO-8859-9") == 0 /* glibc */
|
||||
|| strcmp(encoding, "ISO8859-9") == 0 /* AIX IRIX OSF/1 Solaris */
|
||||
|| strcmp(encoding, "iso88599") == 0 /* HP-UX */)
|
||||
natEnc = NSISOLatin5StringEncoding;
|
||||
else if (strcmp(encoding, "ISO-8859-10") == 0 /* */
|
||||
|| strcmp(encoding, "ISO8859-10") == 0 /* */)
|
||||
natEnc = NSISOLatin6StringEncoding;
|
||||
else if (strcmp(encoding, "TIS-620") == 0 /* glibc AIX */
|
||||
|| strcmp(encoding, "tis620") == 0 /* HP-UX */
|
||||
|| strcmp(encoding, "TIS620.2533") == 0 /* Solaris */
|
||||
|| strcmp(encoding, "TACTIS") == 0 /* OSF/1 */)
|
||||
natEnc = NSISOThaiStringEncoding;
|
||||
else if (strcmp(encoding, "ISO-8859-13") == 0 /* glibc */
|
||||
|| strcmp(encoding, "ISO8859-13") == 0 /* */
|
||||
|| strcmp(encoding, "IBM-921") == 0 /* AIX */)
|
||||
natEnc = NSISOLatin7StringEncoding;
|
||||
else if (strcmp(encoding, "ISO-8859-14") == 0 /* glibc */
|
||||
|| strcmp(encoding, "ISO8859-14") == 0 /* */)
|
||||
natEnc = NSISOLatin8StringEncoding;
|
||||
else if (strcmp(encoding, "ISO-8859-15") == 0 /* glibc */
|
||||
/* AIX OSF/1 Solaris NetBSD */
|
||||
|| strcmp(encoding, "ISO8859-15") == 0
|
||||
|| strcmp(encoding, "iso885915") == 0 /* HP-UX */)
|
||||
natEnc = NSISOLatin9StringEncoding;
|
||||
else if (strcmp(encoding, "GB2312") == 0 /* glibc */
|
||||
|| strcmp(encoding, "gb2312") == 0 /* Solaris */
|
||||
|| strcmp(encoding, "eucCN") == 0 /* IRIX NetBSD */
|
||||
|| strcmp(encoding, "IBM-eucCN") == 0 /* AIX */
|
||||
|| strcmp(encoding, "hp15CN") == 0 /* HP-UX */)
|
||||
natEnc = NSGB2312StringEncoding;
|
||||
else if (strcmp(encoding, "BIG5") == 0 /* glibc Solaris NetBSD */
|
||||
|| strcmp(encoding, "big5") == 0 /* AIX HP-UX OSF/1 */)
|
||||
natEnc = NSBIG5StringEncoding;
|
||||
else if (strcmp(encoding, "EUC-KR") == 0 /* glibc */
|
||||
|| strcmp(encoding, "eucKR") == 0 /* HP-UX IRIX OSF/1 NetBSD */
|
||||
|| strcmp(encoding, "IBM-eucKR") == 0 /* AIX */
|
||||
|| strcmp(encoding, "5601") == 0 /* Solaris */)
|
||||
natEnc = NSKoreanEUCStringEncoding;
|
||||
const char *defaultName;
|
||||
UErrorCode err = U_ZERO_ERROR;
|
||||
|
||||
defaultName = ucnv_getDefaultName ();
|
||||
encoding = ucnv_getStandardName (defaultName, "MIME", &err);
|
||||
if (0 == encoding)
|
||||
{
|
||||
encoding = ucnv_getStandardName (defaultName, "IANA", &err);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
#if HAVE_LANGINFO_CODESET
|
||||
if (0 == encoding)
|
||||
{
|
||||
char *old;
|
||||
/* Take it from the system locale information. */
|
||||
[gnustep_global_lock lock];
|
||||
/* Initialise locale system by setting current locale from
|
||||
* environment and then resetting it. Must be done before
|
||||
* any call to nl_langinfo()
|
||||
*/
|
||||
if (0 != (old = setlocale(LC_CTYPE, "")))
|
||||
{
|
||||
setlocale(LC_CTYPE, old);
|
||||
}
|
||||
strncpy(encbuf, nl_langinfo(CODESET), sizeof(encbuf)-1);
|
||||
encbuf[sizeof(encbuf)-1] = '\0';
|
||||
[gnustep_global_lock unlock];
|
||||
encoding = encbuf;
|
||||
}
|
||||
#endif
|
||||
|
||||
if (0 != encoding)
|
||||
{
|
||||
/*
|
||||
* First handle the fallback response from nl_langinfo() ...
|
||||
* if we are getting the default value we can't assume that
|
||||
* the user has set anything up at all, so we must use the
|
||||
* OpenStep/GNUstep default encoding ... latin1, even though
|
||||
* the nl_langinfo() stuff would say default is ascii.
|
||||
*/
|
||||
if (strcmp(encoding, "ANSI_X3.4-1968") == 0 /* glibc */
|
||||
|| strcmp(encoding, "ISO_646.IRV:1983") == 0 /* glibc */
|
||||
|| strcmp(encoding, "646") == 0 /* Solaris NetBSD */)
|
||||
natEnc = NSISOLatin1StringEncoding;
|
||||
else if (strcmp(encoding, "EUC-JP") == 0 /* glibc */
|
||||
/* HP-UX IRIX OSF/1 Solaris NetBSD */
|
||||
|| strcmp(encoding, "eucJP") == 0
|
||||
|| strcmp(encoding, "IBM-eucJP") == 0 /* AIX */)
|
||||
natEnc = NSJapaneseEUCStringEncoding;
|
||||
else if (strcmp(encoding, "UTF-8") == 0 /* glibc AIX OSF/1 Solaris */
|
||||
|| strcmp(encoding, "utf8") == 0 /* HP-UX */)
|
||||
natEnc = NSUTF8StringEncoding;
|
||||
else if (strcmp(encoding, "ISO-8859-1") == 0 /* glibc */
|
||||
/* AIX IRIX OSF/1 Solaris NetBSD */
|
||||
|| strcmp(encoding, "ISO8859-1") == 0
|
||||
|| strcmp(encoding, "iso88591") == 0 /* HP-UX */)
|
||||
natEnc = NSISOLatin1StringEncoding;
|
||||
else if (strcmp(encoding, "IBM-932") == 0 /* AIX */
|
||||
|| strcmp(encoding, "SJIS") == 0 /* HP-UX OSF/1 NetBSD */
|
||||
|| strcmp(encoding, "PCK") == 0 /* Solaris */)
|
||||
natEnc = NSShiftJISStringEncoding;
|
||||
else if (strcmp(encoding, "ISO-8859-2") == 0 /* glibc */
|
||||
/* AIX IRIX OSF/1 Solaris NetBSD */
|
||||
|| strcmp(encoding, "ISO8859-2") == 0
|
||||
|| strcmp(encoding, "iso88592") == 0 /* HP-UX */)
|
||||
natEnc = NSISOLatin2StringEncoding;
|
||||
else if (strcmp(encoding, "CP1251") == 0 /* glibc */
|
||||
|| strcmp(encoding, "ansi-1251") == 0 /* Solaris */)
|
||||
natEnc = NSWindowsCP1251StringEncoding;
|
||||
else if (strcmp(encoding, "CP1252") == 0 /* */
|
||||
|| strcmp(encoding, "IBM-1252") == 0 /* AIX */)
|
||||
natEnc = NSWindowsCP1252StringEncoding;
|
||||
else if (strcmp(encoding, "ISO-8859-5") == 0 /* glibc */
|
||||
/* AIX IRIX OSF/1 Solaris NetBSD */
|
||||
|| strcmp(encoding, "ISO8859-5") == 0
|
||||
|| strcmp(encoding, "iso88595") == 0 /* HP-UX */)
|
||||
natEnc = NSISOCyrillicStringEncoding;
|
||||
else if (strcmp(encoding, "KOI8-R") == 0 /* glibc */
|
||||
|| strcmp(encoding, "koi8-r") == 0 /* Solaris */)
|
||||
natEnc = NSKOI8RStringEncoding;
|
||||
else if (strcmp(encoding, "ISO-8859-3") == 0 /* glibc */
|
||||
|| strcmp(encoding, "ISO8859-3") == 0 /* Solaris */)
|
||||
natEnc = NSISOLatin3StringEncoding;
|
||||
else if (strcmp(encoding, "ISO-8859-4") == 0 /* */
|
||||
|| strcmp(encoding, "ISO8859-4") == 0 /* OSF/1 Solaris NetBSD */)
|
||||
natEnc = NSISOLatin4StringEncoding;
|
||||
else if (strcmp(encoding, "ISO-8859-6") == 0 /* glibc */
|
||||
|| strcmp(encoding, "ISO8859-6") == 0 /* AIX Solaris */
|
||||
|| strcmp(encoding, "iso88596") == 0 /* HP-UX */)
|
||||
natEnc = NSISOArabicStringEncoding;
|
||||
else if (strcmp(encoding, "ISO-8859-7") == 0 /* glibc */
|
||||
|| strcmp(encoding, "ISO8859-7") == 0 /* AIX IRIX OSF/1 Solaris */
|
||||
|| strcmp(encoding, "iso88597") == 0 /* HP-UX */)
|
||||
natEnc = NSISOGreekStringEncoding;
|
||||
else if (strcmp(encoding, "ISO-8859-8") == 0 /* glibc */
|
||||
|| strcmp(encoding, "ISO8859-8") == 0 /* AIX OSF/1 Solaris */
|
||||
|| strcmp(encoding, "iso88598") == 0 /* HP-UX */)
|
||||
natEnc = NSISOHebrewStringEncoding;
|
||||
else if (strcmp(encoding, "ISO-8859-9") == 0 /* glibc */
|
||||
|| strcmp(encoding, "ISO8859-9") == 0 /* AIX IRIX OSF/1 Solaris */
|
||||
|| strcmp(encoding, "iso88599") == 0 /* HP-UX */)
|
||||
natEnc = NSISOLatin5StringEncoding;
|
||||
else if (strcmp(encoding, "ISO-8859-10") == 0 /* */
|
||||
|| strcmp(encoding, "ISO8859-10") == 0 /* */)
|
||||
natEnc = NSISOLatin6StringEncoding;
|
||||
else if (strcmp(encoding, "TIS-620") == 0 /* glibc AIX */
|
||||
|| strcmp(encoding, "tis620") == 0 /* HP-UX */
|
||||
|| strcmp(encoding, "TIS620.2533") == 0 /* Solaris */
|
||||
|| strcmp(encoding, "TACTIS") == 0 /* OSF/1 */)
|
||||
natEnc = NSISOThaiStringEncoding;
|
||||
else if (strcmp(encoding, "ISO-8859-13") == 0 /* glibc */
|
||||
|| strcmp(encoding, "ISO8859-13") == 0 /* */
|
||||
|| strcmp(encoding, "IBM-921") == 0 /* AIX */)
|
||||
natEnc = NSISOLatin7StringEncoding;
|
||||
else if (strcmp(encoding, "ISO-8859-14") == 0 /* glibc */
|
||||
|| strcmp(encoding, "ISO8859-14") == 0 /* */)
|
||||
natEnc = NSISOLatin8StringEncoding;
|
||||
else if (strcmp(encoding, "ISO-8859-15") == 0 /* glibc */
|
||||
/* AIX OSF/1 Solaris NetBSD */
|
||||
|| strcmp(encoding, "ISO8859-15") == 0
|
||||
|| strcmp(encoding, "iso885915") == 0 /* HP-UX */)
|
||||
natEnc = NSISOLatin9StringEncoding;
|
||||
else if (strcmp(encoding, "GB2312") == 0 /* glibc */
|
||||
|| strcmp(encoding, "gb2312") == 0 /* Solaris */
|
||||
|| strcmp(encoding, "eucCN") == 0 /* IRIX NetBSD */
|
||||
|| strcmp(encoding, "IBM-eucCN") == 0 /* AIX */
|
||||
|| strcmp(encoding, "hp15CN") == 0 /* HP-UX */)
|
||||
natEnc = NSGB2312StringEncoding;
|
||||
else if (strcmp(encoding, "BIG5") == 0 /* glibc Solaris NetBSD */
|
||||
|| strcmp(encoding, "big5") == 0 /* AIX HP-UX OSF/1 */)
|
||||
natEnc = NSBIG5StringEncoding;
|
||||
else if (strcmp(encoding, "EUC-KR") == 0 /* glibc */
|
||||
|| strcmp(encoding, "eucKR") == 0 /* HP-UX IRIX OSF/1 NetBSD */
|
||||
|| strcmp(encoding, "IBM-eucKR") == 0 /* AIX */
|
||||
|| strcmp(encoding, "5601") == 0 /* Solaris */)
|
||||
natEnc = NSKoreanEUCStringEncoding;
|
||||
}
|
||||
}
|
||||
|
||||
encoding = getenv("GNUSTEP_STRING_ENCODING");
|
||||
if (encoding != 0)
|
||||
|
|
3
configure
vendored
3
configure
vendored
|
@ -26090,7 +26090,8 @@ $as_echo "yes" >&6; }
|
|||
|
||||
|
||||
|
||||
for ac_header in unicode/uloc.h unicode/ulocdata.h unicode/ucol.h unicode/ucurr.h unicode/uregex.h unicode/ucal.h unicode/unorm2.h unicode/unum.h unicode/udat.h unicode/udatpg.h unicode/ustring.h unicode/usearch.h
|
||||
|
||||
for ac_header in unicode/uloc.h unicode/ulocdata.h unicode/ucol.h unicode/ucurr.h unicode/uregex.h unicode/ucal.h unicode/unorm2.h unicode/unum.h unicode/udat.h unicode/udatpg.h unicode/ustring.h unicode/usearch.h unicode/ucnv.h
|
||||
do
|
||||
as_ac_Header=`$as_echo "ac_cv_header_$ac_header" | $as_tr_sh`
|
||||
if { as_var=$as_ac_Header; eval "test \"\${$as_var+set}\" = set"; }; then
|
||||
|
|
|
@ -3204,7 +3204,7 @@ if test $enable_icu = yes; then
|
|||
|
||||
if test "$have_icu" = "yes"; then
|
||||
AC_MSG_RESULT(yes)
|
||||
AC_CHECK_HEADERS(unicode/uloc.h unicode/ulocdata.h unicode/ucol.h unicode/ucurr.h unicode/uregex.h unicode/ucal.h unicode/unorm2.h unicode/unum.h unicode/udat.h unicode/udatpg.h unicode/ustring.h unicode/usearch.h)
|
||||
AC_CHECK_HEADERS(unicode/uloc.h unicode/ulocdata.h unicode/ucol.h unicode/ucurr.h unicode/uregex.h unicode/ucal.h unicode/unorm2.h unicode/unum.h unicode/udat.h unicode/udatpg.h unicode/ustring.h unicode/usearch.h unicode/ucnv.h)
|
||||
LIBS="$LIBS $ICU_LIBS"
|
||||
LDFLAGS="$LDFLAGS $ICU_LDFLAGS"
|
||||
LDIR_FLAGS="$LDIR_FLAGS $ICU_LDFLAGS"
|
||||
|
|
Loading…
Reference in a new issue