From f6b9dbb990e6e8e335f242aa33c533d82deb2ee2 Mon Sep 17 00:00:00 2001 From: Richard Frith-Macdonald Date: Sun, 7 Apr 2002 18:56:08 +0000 Subject: [PATCH] More character encoding fixes ... correct language files. git-svn-id: svn+ssh://svn.gna.org/svn/gnustep/libs/base/trunk@13390 72102866-910b-0410-8b05-ffd578937521 --- ChangeLog | 15 ++++- Resources/Languages/French | 12 ++-- Resources/Languages/German | 6 +- Resources/Languages/Italian | 4 +- Resources/Languages/README | 19 +++++++ Source/NSLog.m | 8 ++- Tools/cvtenc.m | 110 ++++++++++++++++++++++++++++++++++-- 7 files changed, 155 insertions(+), 19 deletions(-) create mode 100644 Resources/Languages/README diff --git a/ChangeLog b/ChangeLog index 4667be217..13364a27a 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,10 +1,23 @@ +2002-04-07 Richard Frith-Macdonald + + * Source/GSString.m: Tidy initialisers for case where the default + C-string encoding is not usable internally. + * Source/NSLog.m: Try default cString encoding. + * Tools/cvtenc.m: Add -EscapeIn and -EscapeOut flags to handle \u + escapes in files. + * Resources/Languages/Italian: Correct to use \u escapes + * Resources/Languages/French: ditto. + * Resources/Languages/German: ditto. + * Resources/Languages/README: New file to tell editors that they + should use \u escapes ... and suggesting use of the cvtenc tool + to make the job easy. + 2002-04-06 Richard Frith-Macdonald * Source/NSString.m: Fix error in unicode BOM handling Write plist unicode escapes using the conventional \u rather than \U, but read in either form. * Source/GSString.m: Fix error in length of unicode data produced. - Tidy initilisers. * Tools/cvtenc.m: Made easy to use and added instructions. Sat Apr 6 02:42:43 2002 Nicola Pero diff --git a/Resources/Languages/French b/Resources/Languages/French index a18236780..9f20795d0 100644 --- a/Resources/Languages/French +++ b/Resources/Languages/French @@ -2,7 +2,7 @@ { NSLanguageName = "French"; NSLanguageCode = FRE; - NSFormalName = "Français"; + NSFormalName = "Fran\u00e7ais"; NSParentContext = "Default"; NSCurrencySymbol = "EUR"; @@ -18,11 +18,11 @@ NSDateFormatString = "%A %d %B %Y"; NSDateTimeOrdering = DMYH; NSEarlierTimeDesignations = (avant); - NSHourNameDesignations = ((0, minuit), (12, midi), (10, matin), (14, "après-midi" ), (19, soir)); - NSLaterTimeDesignations = ("après"); - NSMonthNameArray = (Janvier, "Février", Mars, Avril, Mai, Juin, Juillet, "Août", Septembre, Octobre, Novembre, "Décembre"); + NSHourNameDesignations = ((0, minuit), (12, midi), (10, matin), (14, "apr\u00e8s-midi" ), (19, soir)); + NSLaterTimeDesignations = ("apr\u00e8s"); + NSMonthNameArray = (Janvier, "F\u00e9vrier", Mars, Avril, Mai, Juin, Juillet, "Ao\u00fbt", Septembre, Octobre, Novembre, "D\u00e9cembre"); NSNextDayDesignations = (demain); - NSNextNextDayDesignations = ("après demain"); + NSNextNextDayDesignations = ("apr\u00e8s demain"); NSPriorDayDesignations = (hier); NSShortDateFormatString = "%d/%m/%Y"; /* 20/11/1974 */ NSShortMonthNameArray = (Jan, Fev, Mar, Avr, Mai, Jun, Jui, Aou, Sep, Oct, Nov, Dec); @@ -32,7 +32,7 @@ NSTimeDateFormatString = "%a %d %b %H:%M:%S %z %Y"; /* Gio 22 Feb 15:33:42 +0100 2001 */ NSTimeFormatString = "%H:%M:%S"; /* 21:21:08 */ NSWeekDayNameArray = (Dimanche, Lundi, Mardi, Mercredi, Jeudi, Vendredi, Samedi); - NSYearMonthWeekDesignations = ("année", mois, semaine); + NSYearMonthWeekDesignations = ("ann\u00e9e", mois, semaine); } diff --git a/Resources/Languages/German b/Resources/Languages/German index 3f079df42..b8834f6b7 100644 --- a/Resources/Languages/German +++ b/Resources/Languages/German @@ -13,10 +13,10 @@ NSEarlierTimeDesignations = (vor, letzten, letzte, vorige, vorherige, davor); NSHourNameDesignations = ((0, Mitternacht), (12, Mittag, mittags), (10, morgens), (14, nachmittags), (19, abends)); NSInternationalCurrencyString = EUR; - NSLaterTimeDesignations = ("später"); - NSMonthNameArray = (Januar, Februar, "März", April, Mai, Juni, Juli, August, September, Oktober, November, Dezember); + NSLaterTimeDesignations = ("sp\u00e4ter"); + NSMonthNameArray = (Januar, Februar, "M\u00e4rz", April, Mai, Juni, Juli, August, September, Oktober, November, Dezember); NSNextDayDesignations = (morgen); - NSNextNextDayDesignations = ("nächsten Tag"); /* (as in: am nächsten Tag) */ + NSNextNextDayDesignations = ("n\u00e4chsten Tag"); /* (as in: am n\u00e4chsten Tag) */ NSPriorDayDesignations = (gestern); NSShortDateFormatString = "%d.%m.%Y"; /* "07.12.95" for example */ NSShortMonthNameArray = (Jan, Feb, Mrz, Apr, Mai, Jun, Jul, Aug, Sep, Okt, Nov, Dez); diff --git a/Resources/Languages/Italian b/Resources/Languages/Italian index e31588005..5b1c8b194 100644 --- a/Resources/Languages/Italian +++ b/Resources/Languages/Italian @@ -15,7 +15,7 @@ NSThousandsSeparator = "'"; /* not sure but better than `.' */ NSAMPMDesignation = (AM, PM); - NSDateFormatString = "%A %d %B %Y"; /* Giovedì 22 Febbraio 2001 */ + NSDateFormatString = "%A %d %B %Y"; /* Gioved\u00ec 22 Febbraio 2001 */ NSDateTimeOrdering = DMYH; NSEarlierTimeDesignations = (prima, passato, fa); NSHourNameDesignations = ((0, mezzanotte), (12, mezzogiorno), (10, mattina), (14, pomeriggio), (19, sera)); @@ -31,7 +31,7 @@ NSThisDayDesignations = (oggi); NSTimeDateFormatString = "%a %d %b %H:%M:%S %z %Y"; /* Gio 22 Feb 15:33:42 +0100 2001 */ NSTimeFormatString = "%H:%M:%S"; /* 21:21:08 */ - NSWeekDayNameArray = (Domenica, "Lunedì", "Martedì", "Mercoledì", "Giovedì", "Venerdì", Sabato); + NSWeekDayNameArray = (Domenica, "Luned\u00ec", "Marted\u00ec", "Mercoled\u00ec", "Gioved\u00ec", "Venerd\u00ec", Sabato); NSYearMonthWeekDesignations = (anno, mese, settimana); } diff --git a/Resources/Languages/README b/Resources/Languages/README new file mode 100644 index 000000000..b7218885d --- /dev/null +++ b/Resources/Languages/README @@ -0,0 +1,19 @@ + +Language files which contain non-ascii characters should either be unicode +files or should encode those characters as \u escape sequences so that they +are usable on systems where the default C-String encoding is not the same +as the one on which the language files were created! + +Since it is generally not easy to edit unicode directly, or enter the +correct unicode escape sequences, it is recommended that you use the +cvtenc tool to perform conversions of the files before and after editing +in the default encoding used by your system. + +For example, to edit the French language file so that it contains an ascii +propertly list with \u escape sequeneces, you wcould do - + +cvtenc -EscacpeIn yes French > tmpfile +vi tmpfile +cvtenc -EscapeOut yes tmpfile > French +rm tmpfile + diff --git a/Source/NSLog.m b/Source/NSLog.m index 0c17b3c25..eba349504 100644 --- a/Source/NSLog.m +++ b/Source/NSLog.m @@ -52,9 +52,13 @@ _NSLog_standard_printf_handler (NSString* message) NSData *d; const char *buf; unsigned len; + static NSStringEncoding enc = 0; - d = [message dataUsingEncoding: NSASCIIStringEncoding - allowLossyConversion: NO]; + if (enc == 0) + { + enc = [NSString defaultCStringEncoding]; + } + d = [message dataUsingEncoding: enc allowLossyConversion: NO]; if (d == nil) { d = [message dataUsingEncoding: NSUTF8StringEncoding diff --git a/Tools/cvtenc.m b/Tools/cvtenc.m index 4254d9a90..85d96029f 100644 --- a/Tools/cvtenc.m +++ b/Tools/cvtenc.m @@ -30,6 +30,14 @@ #include #include +#include + +#define inrange(ch,min,max) ((ch)>=(min) && (ch)<=(max)) +#define char2num(ch) \ +inrange(ch,'0','9') \ +? ((ch)-0x30) \ +: (inrange(ch,'a','f') \ +? ((ch)-0x57) : ((ch)-0x37)) int main(int argc, char** argv, char **env) @@ -39,6 +47,8 @@ main(int argc, char** argv, char **env) NSArray *args; unsigned i; BOOL found = NO; + BOOL eIn; + BOOL eOut; NSString *n; NSStringEncoding enc = 0; @@ -56,6 +66,8 @@ main(int argc, char** argv, char **env) args = [proc arguments]; + eIn = [[NSUserDefaults standardUserDefaults] boolForKey: @"EscapeIn"]; + eOut = [[NSUserDefaults standardUserDefaults] boolForKey: @"EscapeOut"]; n = [[NSUserDefaults standardUserDefaults] stringForKey: @"Encoding"]; if (n == nil) { @@ -93,7 +105,7 @@ main(int argc, char** argv, char **env) { NSString *file = [args objectAtIndex: i]; - if ([file isEqual: @"-Encoding"] == YES) + if ([file hasPrefix: @"-"] == YES) { i++; continue; @@ -111,7 +123,7 @@ main(int argc, char** argv, char **env) else { unsigned l = [myData length]; - const unichar *b = (const unichar*)[myData bytes]; + const unichar *b = (const unichar*)[myData bytes]; NSStringEncoding iEnc; NSStringEncoding oEnc; NSString *myString; @@ -136,8 +148,92 @@ main(int argc, char** argv, char **env) } else { - myData = [myString dataUsingEncoding: oEnc - allowLossyConversion: NO]; + if (eIn == YES) + { + unsigned l = [myString length]; + unichar *u; + NSZone *z = NSDefaultMallocZone(); + unsigned i = 0; + unsigned o = 0; + + u = NSZoneMalloc(z, sizeof(unichar)*l); + [myString getCharacters: u]; + + while (i < l) + { + unichar c = u[i++]; + + if (c == '\\' && i <= l - 6) + { + c = u[i++]; + + if (c == 'u' || c == 'U') + { + unichar v; + + v = 0; + c = u[i++]; + v |= char2num(c); + + v <<= 4; + c = u[i++]; + v |= char2num(c); + + v <<= 4; + c = u[i++]; + v |= char2num(c); + + v <<= 4; + c = u[i++]; + v |= char2num(c); + + c = v; + } + else + { + u[o++] = '\\'; + } + } + u[o++] = c; + } + + RELEASE(myString); + myString = [[NSString alloc] initWithCharactersNoCopy: u + length: o freeWhenDone: YES]; + } + if (eOut == YES) + { + unsigned l = [myString length]; + unichar *u; + char *c; + NSZone *z = NSDefaultMallocZone(); + unsigned o = 0; + unsigned i; + + u = NSZoneMalloc(z, sizeof(unichar)*l); + c = NSZoneMalloc(z, 6*l); + [myString getCharacters: u]; + for (i = 0; i < l; i++) + { + if (u[i] < 128) + { + c[o++] = u[i]; + } + else + { + sprintf(&c[o], "\\u%04x", u[i]); + o += 6; + } + } + NSZoneFree(z, u); + myData = [[NSData alloc] initWithBytesNoCopy: c + length: o]; + } + else + { + myData = [myString dataUsingEncoding: oEnc + allowLossyConversion: NO]; + } RELEASE(myString); if (myData == nil) { @@ -168,7 +264,11 @@ main(int argc, char** argv, char **env) @"It reads the file, and writes it to STDOUT after converting it\n" @"to unicode from C string encoding or vice versa.\n" @"You can supply a '-Encoding name' option to specify the C string\n" - @"encoding to be used, if you don't want to use the default."); + @"encoding to be used, if you don't want to use the default.\n" + @"You can supply a '-EscapeIn YES' option to specify that input\n" + @"should be parsed for \\u escape sequences (as in property lists).\n" + @"You can supply a '-EscapeOut YES' option to specify that output\n" + @"should be ascii with \\u escape sequences (for property lists).\n"); } [pool release]; return 0;