mirror of
https://github.com/gnustep/libs-base.git
synced 2025-04-22 08:26:27 +00:00
Unicode.m: New functions to guess encoding from locale.
git-svn-id: svn+ssh://svn.gna.org/svn/gnustep/libs/base/trunk@14831 72102866-910b-0410-8b05-ffd578937521
This commit is contained in:
parent
b6b810294b
commit
0662b0c200
6 changed files with 231 additions and 41 deletions
11
ChangeLog
11
ChangeLog
|
@ -1,3 +1,14 @@
|
|||
2002-10-21 Adam Fedor <fedor@gnu.org>
|
||||
|
||||
* Source/GSLocale.m (GSLanguageFromLocale): Implement regardless
|
||||
of HAVE_LOCALE_H.
|
||||
|
||||
* Source/Unicode.m (GSEncodingForRegistry): New (from
|
||||
[GSFontInfo +encodingForRegistry:encoding:]).
|
||||
(GSEncodingFromLocale): New.
|
||||
(GetDefEncoding): Rearrange encoding check.
|
||||
* Resources/Languages/Locale.encodings: New file.
|
||||
|
||||
2002-10-21 Adam Fedor <fedor@gnu.org>
|
||||
|
||||
* Documentation/coding-standards.texi: Update.
|
||||
|
|
|
@ -38,6 +38,9 @@
|
|||
|
||||
GS_EXPORT NSStringEncoding *GetAvailableEncodings();
|
||||
GS_EXPORT NSStringEncoding GetDefEncoding();
|
||||
GS_EXPORT NSStringEncoding GSEncodingFromLocale(const char *clocale);
|
||||
GS_EXPORT NSStringEncoding GSEncodingForRegistry(NSString *registry,
|
||||
NSString *encoding);
|
||||
GS_EXPORT NSString* GetEncodingName(NSStringEncoding encoding);
|
||||
|
||||
GS_EXPORT unichar chartouni(unsigned char c);
|
||||
|
|
|
@ -45,7 +45,8 @@ Languages/Russian \
|
|||
Languages/Slovak \
|
||||
Languages/UkraineRussian
|
||||
|
||||
LANGUAGE_INSTALL_FILES = $(LANGUAGE_FILES) Languages/Locale.aliases
|
||||
LANGUAGE_INSTALL_FILES = $(LANGUAGE_FILES) Languages/Locale.aliases \
|
||||
Languages/Locale.encodings
|
||||
|
||||
-include GNUmakefile.local
|
||||
|
||||
|
|
3
Resources/Languages/Locale.encodings
Normal file
3
Resources/Languages/Locale.encodings
Normal file
|
@ -0,0 +1,3 @@
|
|||
{
|
||||
th_TH = NSThaiStringEncoding;
|
||||
}
|
|
@ -211,6 +211,21 @@ GSDomainFromDefaultLocale(void)
|
|||
#endif
|
||||
}
|
||||
|
||||
#else /* HAVE_LOCALE_H */
|
||||
NSString *
|
||||
GSSetLocale(NSString *locale)
|
||||
{
|
||||
return nil;
|
||||
}
|
||||
|
||||
NSDictionary *
|
||||
GSDomainFromDefaultLocale(void)
|
||||
{
|
||||
return nil;
|
||||
}
|
||||
|
||||
#endif /* !HAVE_LOCALE_H */
|
||||
|
||||
NSString *
|
||||
GSLanguageFromLocale(NSString *locale)
|
||||
{
|
||||
|
@ -243,26 +258,3 @@ GSLanguageFromLocale(NSString *locale)
|
|||
|
||||
return language;
|
||||
}
|
||||
|
||||
#else /* HAVE_LOCALE_H */
|
||||
NSString *
|
||||
GSSetLocale(NSString *locale)
|
||||
{
|
||||
return nil;
|
||||
}
|
||||
|
||||
NSDictionary *
|
||||
GSDomainFromDefaultLocale(void)
|
||||
{
|
||||
return nil;
|
||||
}
|
||||
|
||||
NSString *
|
||||
GSLanguageFromLocale(NSString *locale)
|
||||
{
|
||||
return nil;
|
||||
}
|
||||
|
||||
#endif /* !HAVE_LOCALE_H */
|
||||
|
||||
|
||||
|
|
214
Source/Unicode.m
214
Source/Unicode.m
|
@ -28,11 +28,16 @@
|
|||
*/
|
||||
|
||||
#include <config.h>
|
||||
#include <Foundation/NSArray.h>
|
||||
#include <Foundation/NSBundle.h>
|
||||
#include <Foundation/NSDictionary.h>
|
||||
#include <Foundation/NSString.h>
|
||||
#include <Foundation/NSLock.h>
|
||||
#include <Foundation/NSPathUtilities.h>
|
||||
#include <base/Unicode.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
typedef struct {unichar from; unsigned char to;} _ucc_;
|
||||
|
||||
|
@ -315,6 +320,183 @@ NSStringEncoding *GetAvailableEncodings()
|
|||
return _availableEncodings;
|
||||
}
|
||||
|
||||
/** Returns the NSStringEncoding that matches the specified
|
||||
* character set registry and encoding information. For instance,
|
||||
* for the iso8859-5 character set, the registry is iso8859 and
|
||||
* the encoding is 5, and the returned NSStringEncoding is
|
||||
* NSISOCyrillicStringEncoding. If there is no specific encoding,
|
||||
* use @"0".
|
||||
*/
|
||||
NSStringEncoding
|
||||
GSEncodingForRegistry (NSString *registry, NSString *encoding)
|
||||
{
|
||||
if ([registry isEqualToString: @"iso8859"])
|
||||
{
|
||||
if ([encoding isEqualToString: @"1"])
|
||||
return NSISOLatin1StringEncoding;
|
||||
else if ([encoding isEqualToString: @"2"])
|
||||
return NSISOLatin2StringEncoding;
|
||||
else if ([encoding isEqualToString: @"3"])
|
||||
return NSISOLatin3StringEncoding;
|
||||
else if ([encoding isEqualToString: @"4"])
|
||||
return NSISOLatin4StringEncoding;
|
||||
else if ([encoding isEqualToString: @"5"])
|
||||
return NSISOCyrillicStringEncoding;
|
||||
else if ([encoding isEqualToString: @"6"])
|
||||
return NSISOArabicStringEncoding;
|
||||
else if ([encoding isEqualToString: @"7"])
|
||||
return NSISOGreekStringEncoding;
|
||||
else if ([encoding isEqualToString: @"8"])
|
||||
return NSISOHebrewStringEncoding;
|
||||
// Other latin encodings are currently not supported
|
||||
}
|
||||
else if ([registry isEqualToString: @"iso10646"])
|
||||
{
|
||||
if ([encoding isEqualToString: @"1"])
|
||||
return NSUnicodeStringEncoding;
|
||||
}
|
||||
else if ([registry isEqualToString: @"microsoft"])
|
||||
{
|
||||
if ([encoding isEqualToString: @"symbol"])
|
||||
return NSSymbolStringEncoding;
|
||||
else if ([encoding isEqualToString: @"cp1250"])
|
||||
return NSWindowsCP1250StringEncoding;
|
||||
else if ([encoding isEqualToString: @"cp1251"])
|
||||
return NSWindowsCP1251StringEncoding;
|
||||
else if ([encoding isEqualToString: @"cp1252"])
|
||||
return NSWindowsCP1252StringEncoding;
|
||||
else if ([encoding isEqualToString: @"cp1253"])
|
||||
return NSWindowsCP1253StringEncoding;
|
||||
else if ([encoding isEqualToString: @"cp1254"])
|
||||
return NSWindowsCP1254StringEncoding;
|
||||
}
|
||||
else if ([registry isEqualToString: @"apple"])
|
||||
{
|
||||
if ([encoding isEqualToString: @"roman"])
|
||||
return NSMacOSRomanStringEncoding;
|
||||
}
|
||||
else if ([registry isEqualToString: @"jisx0201.1976"])
|
||||
{
|
||||
if ([encoding isEqualToString: @"0"])
|
||||
return NSShiftJISStringEncoding;
|
||||
}
|
||||
else if ([registry isEqualToString: @"iso646.1991"])
|
||||
{
|
||||
if ([encoding isEqualToString: @"irv"])
|
||||
return NSASCIIStringEncoding;
|
||||
}
|
||||
else if ([registry isEqualToString: @"koi8"])
|
||||
{
|
||||
if ([encoding isEqualToString: @"r"])
|
||||
return NSKOI8RStringEncoding;
|
||||
}
|
||||
else if ([registry isEqualToString: @"gb2312.1980"])
|
||||
{
|
||||
if ([encoding isEqualToString: @"0"])
|
||||
return NSGB2312StringEncoding;
|
||||
}
|
||||
else if ([registry isEqualToString: @"big5"])
|
||||
{
|
||||
if ([encoding isEqualToString: @"0"])
|
||||
return NSBIG5StringEncoding;
|
||||
}
|
||||
else if ([registry isEqualToString:@"utf8"]
|
||||
|| [registry isEqualToString:@"utf-8"] )
|
||||
{
|
||||
return NSUTF8StringEncoding;
|
||||
}
|
||||
|
||||
return NSASCIIStringEncoding;
|
||||
}
|
||||
|
||||
/** Try to deduce the string encoding from the locale string
|
||||
* clocale. This function looks in the Locale.encodings file
|
||||
* installed as part of GNUstep Base if the encoding cannot be
|
||||
* deduced from the clocale string itself. If clocale isn't set or
|
||||
* no match can be found, returns GSUndefinedEncoding.
|
||||
*/
|
||||
/* It would be really nice if this could be used in GetDefEncoding, but
|
||||
* there are too many dependancies on other parts of the library to
|
||||
* make this practical (even if everything possible was written in C,
|
||||
* we'd still need some way to find the Locale.encodings file).
|
||||
*/
|
||||
NSStringEncoding
|
||||
GSEncodingFromLocale(const char *clocale)
|
||||
{
|
||||
NSStringEncoding encoding;
|
||||
NSString *encodstr;
|
||||
|
||||
if (clocale == NULL || strcmp(clocale, "C") == 0
|
||||
|| strcmp(clocale, "POSIX") == 0)
|
||||
{
|
||||
/* Don't make any assumptions. Let caller handle that */
|
||||
return GSUndefinedEncoding;
|
||||
}
|
||||
|
||||
if (strchr (clocale, '.') != NULL)
|
||||
{
|
||||
/* Locale contains the 'codeset' section. Parse it and see
|
||||
if we know what encoding this cooresponds to */
|
||||
NSString *registry;
|
||||
NSArray *array;
|
||||
char *s;
|
||||
s = strchr (clocale, '.');
|
||||
registry = [[NSString stringWithCString: s+1] lowercaseString];
|
||||
array = [registry componentsSeparatedByString: @"-"];
|
||||
registry = [array objectAtIndex: 0];
|
||||
if ([array count] > 1)
|
||||
{
|
||||
encodstr = [array lastObject];
|
||||
}
|
||||
else
|
||||
{
|
||||
encodstr = @"0";
|
||||
}
|
||||
|
||||
encoding = GSEncodingForRegistry(registry, encodstr);
|
||||
if (encoding == NSASCIIStringEncoding)
|
||||
encoding = GSUndefinedEncoding;
|
||||
}
|
||||
else
|
||||
{
|
||||
/* Look up the locale in our table of encodings */
|
||||
NSString *table;
|
||||
|
||||
table = [NSBundle pathForGNUstepResource: @"Locale"
|
||||
ofType: @"encodings"
|
||||
inDirectory: @"Resources/Languages"];
|
||||
if (table != nil)
|
||||
{
|
||||
int count;
|
||||
NSDictionary *dict;
|
||||
|
||||
dict = [NSDictionary dictionaryWithContentsOfFile: table];
|
||||
encodstr = [dict objectForKey:
|
||||
[NSString stringWithCString: clocale]];
|
||||
|
||||
/* Find the matching encoding */
|
||||
count = 0;
|
||||
while (str_encoding_table[count].enc
|
||||
&& strcmp(str_encoding_table[count].ename,
|
||||
[encodstr lossyCString]))
|
||||
{
|
||||
count++;
|
||||
}
|
||||
if (str_encoding_table[count].enc)
|
||||
{
|
||||
encoding = str_encoding_table[count].enc;
|
||||
}
|
||||
if (encoding == GSUndefinedEncoding)
|
||||
{
|
||||
NSLog(@"No known GNUstep encoding for %s = %@",
|
||||
clocale, encodstr);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return encoding;
|
||||
}
|
||||
|
||||
/**
|
||||
* Return the default encoding
|
||||
*/
|
||||
|
@ -340,36 +522,34 @@ GetDefEncoding()
|
|||
{
|
||||
count = 0;
|
||||
while (str_encoding_table[count].enc
|
||||
&& strcmp(str_encoding_table[count].ename, encoding))
|
||||
&& strcmp(str_encoding_table[count].ename, encoding))
|
||||
{
|
||||
count++;
|
||||
}
|
||||
if (str_encoding_table[count].enc)
|
||||
{
|
||||
defEnc = str_encoding_table[count].enc;
|
||||
if (GSEncodingSupported(defEnc) == NO)
|
||||
{
|
||||
fprintf(stderr, "WARNING: %s - encoding not implemented as "
|
||||
"default c string encoding.\n", encoding);
|
||||
fprintf(stderr,
|
||||
"NSISOLatin1StringEncoding set as default.\n");
|
||||
defEnc = NSISOLatin1StringEncoding;
|
||||
}
|
||||
}
|
||||
else /* encoding not found */
|
||||
else
|
||||
{
|
||||
fprintf(stderr,
|
||||
"WARNING: %s - encoding not supported.\n", encoding);
|
||||
fprintf(stderr, "NSISOLatin1StringEncoding set as default.\n");
|
||||
"WARNING: %s - encoding not supported.\n", encoding);
|
||||
fprintf(stderr,
|
||||
" NSISOLatin1StringEncoding set as default.\n");
|
||||
defEnc = NSISOLatin1StringEncoding;
|
||||
}
|
||||
}
|
||||
else /* environment var not found */
|
||||
if (defEnc == GSUndefinedEncoding)
|
||||
{
|
||||
/* shouldn't be required. It really should be in UserDefaults - asf */
|
||||
//fprintf(stderr, "WARNING: GNUSTEP_STRING_ENCODING environment");
|
||||
//fprintf(stderr, " variable not found.\n");
|
||||
//fprintf(stderr, "NSISOLatin1StringEncoding set as default.\n");
|
||||
/* Encoding not set */
|
||||
defEnc = NSISOLatin1StringEncoding;
|
||||
}
|
||||
else if (GSEncodingSupported(defEnc) == NO)
|
||||
{
|
||||
fprintf(stderr, "WARNING: %s - encoding not implemented as "
|
||||
"default c string encoding.\n", encoding);
|
||||
fprintf(stderr,
|
||||
" NSISOLatin1StringEncoding set as default.\n");
|
||||
defEnc = NSISOLatin1StringEncoding;
|
||||
}
|
||||
[gnustep_global_lock unlock];
|
||||
|
|
Loading…
Reference in a new issue