minor improvement to charset conversion tool.

git-svn-id: svn+ssh://svn.gna.org/svn/gnustep/libs/base/trunk@31220 72102866-910b-0410-8b05-ffd578937521
This commit is contained in:
Richard Frith-MacDonald 2010-08-31 12:14:07 +00:00
parent ed56c6bbc1
commit e67a8b6e5b
3 changed files with 71 additions and 10 deletions

View file

@ -1,3 +1,8 @@
2010-08-31 Richard Frith-Macdonald <rfm@gnu.org>
* Tools/cvtenc.m: Add -Unicode option
* Tools/cvtenc.1: update
2010-08-30 Richard Frith-Macdonald <rfm@gnu.org>
* Source/NSThread.m: try to make inter-thread notification via

View file

@ -16,9 +16,10 @@ cvtenc \- file encoding converter
.SH DESCRIPTION
.P
Converts a file encoded in a specified or default non-unicode encoding to
unicode, or, if the file is already in unicode, converts it to a specified
or default non-unicode encoding. The converted text is printed to standard
out.
unicode (UTF16), or, if the file is already in unicode, converts it to a
specified or default non-unicode encoding.
The converted text is printed to standard out.
If the filename is a hyphen then this tool reads from standard input.
.SH OPTIONS
.IP "\fB\-Encoding\fR \fI encoding\fR" 4
@ -34,11 +35,18 @@ escape sequences (as in property lists).
Specify '-EscapeOut YES' (the default is 'NO') to generate \\u escape
sequences (as in property lists) in the output. Note, this might produce
unexpected results for some encodings.
.IP "\fB\-Unicode\fR \fIIN|OUT\fR" 4
Specify '-Unicode IN' (or '-Unicode OUT') to control the direction of
conversion rather than having the tool guess it on the basis of the content
it reads. Using '-Unicode IN' means that the tool reads UTF16 data and
writes C-String data, while using '-Unicode OUT' merans that the tool reads
C-String data and writes UTF16 data.
.SH HISTORY
.RS 0
Written in 2002.
Minr update in 2010.
.P
This manual page first appeared in gnustep-base 1.9.2 (March 2004).
.P

View file

@ -28,6 +28,8 @@
#import "Foundation/NSUserDefaults.h"
#import "Foundation/NSFileHandle.h"
#import "Foundation/NSAutoreleasePool.h"
#import "GNUstepBase/NSString+GNUstepBase.h"
#import "GNUstepBase/GSMime.h"
#ifdef NeXT_Foundation_LIBRARY
#import "GNUstepBase/Additions.h"
#endif
@ -83,7 +85,7 @@ main(int argc, char** argv, char **env)
{
enc = [NSString defaultCStringEncoding];
}
else
else if (0 == (enc = [GSMimeDocument encodingFromCharset: n]))
{
const NSStringEncoding *e;
NSMutableString *names;
@ -94,7 +96,7 @@ main(int argc, char** argv, char **env)
{
NSString *name = [NSString localizedNameOfStringEncoding: *e];
[names appendFormat: @" %@\n", name];
[names appendFormat: @" '%@'\n", name];
if ([n isEqual: name] == YES)
{
enc = *e;
@ -105,12 +107,34 @@ main(int argc, char** argv, char **env)
if (enc == 0)
{
NSLog(@"defaults: unable to find encoding '%@'!\n"
@"Known encoding names are -\n%@", n, names);
@"Localised encoding names are -\n%@", n, names);
[pool release];
exit(EXIT_SUCCESS);
}
}
n = [[NSUserDefaults standardUserDefaults] stringForKey: @"Unicode"];
n = [[n stringByTrimmingSpaces] lowercaseString];
if ([n length] > 0)
{
if ([n isEqual: @"in"] || [n isEqual: @"i"])
{
n = @"i";
}
else if ([n isEqual: @"out"] || [n isEqual: @"o"])
{
n = @"o";
}
else
{
n = nil;
}
}
else
{
n = nil;
}
for (i = 1; found == NO && i < [args count]; i++)
{
NSString *file = [args objectAtIndex: i];
@ -138,13 +162,30 @@ main(int argc, char** argv, char **env)
NSStringEncoding oEnc;
NSString *myString;
if (l > 1 && (*b == 0xFFFE || *b == 0xFEFF))
if (nil == n)
{
if (l > 1 && (*b == 0xFFFE || *b == 0xFEFF))
{
iEnc = NSUnicodeStringEncoding;
oEnc = enc;
}
else
{
iEnc = enc;
oEnc = NSUnicodeStringEncoding;
}
}
else if ([n isEqualToString: @"i"])
{
/* Unicode (UTF16) in
*/
iEnc = NSUnicodeStringEncoding;
oEnc = enc;
}
else
{
/* Unicode (UTF16) out
*/
iEnc = enc;
oEnc = NSUnicodeStringEncoding;
}
@ -277,13 +318,20 @@ main(int argc, char** argv, char **env)
{
NSLog(@"\nThis utility expects a filename as an argument.\n"
@"It reads the file, and writes it to STDOUT after converting it\n"
@"to unicode from C string encoding or vice versa.\n"
@"You can supply a '-Encoding name' option to specify the C string\n"
@"to unicode (UTF16) from C-string encoding or vice versa.\n"
@"You can use '-' as the filename argument to read from STDIN.\n"
@"You can supply a '-Encoding name' option to specify the C-string\n"
@"encoding to be used, if you don't want to use the default.\n"
@"If you supply an unknown encoding the tool will print a list\n"
@"of all the known encodings.\n"
@"You can supply a '-EscapeIn YES' option to specify that input\n"
@"should be parsed for \\U escape sequences (as in property lists).\n"
@"You can supply a '-EscapeOut YES' option to specify that output\n"
@"should be ascii with \\U escape sequences (for property lists).\n");
@"should be ascii with \\U escape sequences (for property lists).\n"
@"You can supply a '-Unicode in/out' option to specify that the\n"
@"conversion is from/to unicode (UTF16). This suppresses the normal\n"
@"behavior of guessing the direction of conversion from the content\n"
@"of the incoming data.\n");
}
[pool release];
return 0;