Improve character conversion code a little.

git-svn-id: svn+ssh://svn.gna.org/svn/gnustep/libs/base/trunk@14481 72102866-910b-0410-8b05-ffd578937521
This commit is contained in:
Richard Frith-Macdonald 2002-09-18 09:34:33 +00:00
parent 6ef88fbcd8
commit 61c5624b4c
4 changed files with 111 additions and 103 deletions

View file

@ -1,3 +1,11 @@
2002-09-17 Richard Frith-Macdonald <rfm@gnu.org>
* Source/Unicode.m: Restructure conversion from unicode slightly,
to make it clearer and more readable and to include handling of
lossy conversions.
* Headers/gnustep/unicode/gsm0338.h: Added table for lossy conversion
from unicode.
2002-09-16 Richard Frith-Macdonald <rfm@gnu.org>
* Tools/AGSParser.m: Bugfix ... look for source files more

View file

@ -271,17 +271,39 @@ _ucc_ GSM0338_uni_to_char_table[] =
_ucc_ GSM0338_escapes[] =
{
{0x000C,0x0A},
{0x005B,0x3C},
{0x005C,0x2F},
{0x005D,0x3E},
{0x005E,0x14},
{0x007B,0x28},
{0x007C,0x40},
{0x007D,0x29},
{0x007E,0x3D},
{0x20AC,0x65}
{0x000C,0x0A}, /* Form feed */
{0x005B,0x3C}, /* '[' */
{0x005C,0x2F}, /* '\\' */
{0x005D,0x3E}, /* ']' */
{0x005E,0x14}, /* '^' */
{0x007B,0x28}, /* '{' */
{0x007C,0x40}, /* '|' */
{0x007D,0x29}, /* '}' */
{0x007E,0x3D}, /* '~' */
{0x20AC,0x65} /* Euro symbol */
};
#define GSM0338_esize (sizeof(GSM0338_escapes)/sizeof(_ucc_))
/*
* Some of these conversions should not be needed because they are
* already handled by escape sequences ... I put them here so we can
* support two varieties of the GSM alphabet. The official one, and
* a cut down version suitable for use when delivering data to phones
* which don't support escape sequences.
*/
_ucc_ GSM0338_lossy[] =
{
{0x005B,0x3C}, /* '[' => '<' */
{0x005C,0x2F}, /* '\\' => '/' */
{0x005D,0x3E}, /* ']' => '>' */
{0x005E,0x14}, /* '^' => lambda */
{0x0060,0x27}, /* '`' => '\'' */
{0x007B,0x28}, /* '{' => '(' */
{0x007C,0x40}, /* '|' => 'i' */
{0x007D,0x29}, /* '}' => ')' */
{0x007E,0x3D} /* '~' => '=' */
};
#define GSM0338_lsize (sizeof(GSM0338_lossy)/sizeof(_ucc_))

View file

@ -1135,6 +1135,11 @@ GSFromUnicode(unsigned char **dst, unsigned int *size, const unichar *src,
unichar base = 0;
_ucc_ *table = 0;
unsigned tsize = 0;
unsigned char escape = 0;
_ucc_ *etable = 0;
unsigned etsize = 0;
_ucc_ *ltable = 0;
unsigned ltsize = 0;
BOOL swapped = NO;
BOOL result = YES;
@ -1270,120 +1275,90 @@ bases:
goto tables;
#endif
tables:
case NSGSM0338StringEncoding:
base = 0;
table = GSM0338_uni_to_char_table;
tsize = GSM0338_tsize;
escape = 0x1b;
etable = GSM0338_escapes;
etsize = GSM0338_esize;
if (strict == NO)
{
while (spos < slen)
{
unichar u = src[spos++];
if (swapped == YES)
{
u = ((u & 0xff00 >> 8) + ((u & 0x00ff) << 8));
}
if (dpos >= bsize)
{
GROW();
}
if (u < base)
{
ptr[dpos++] = (char)u;
}
else
{
int i = chop(u, table, tsize);
if (i < 0)
{
ptr[dpos++] = '*';
}
else
{
ptr[dpos++] = table[i].to;
}
}
}
ltable = GSM0338_lossy;
ltsize = GSM0338_lsize;
}
else
{
while (spos < slen)
{
unichar u = src[spos++];
goto tables;
if (swapped == YES)
{
u = ((u & 0xff00 >> 8) + ((u & 0x00ff) << 8));
}
if (dpos >= bsize)
{
GROW();
}
if (u < base)
{
ptr[dpos++] = (char)u;
}
else
{
int i = chop(u, table, tsize);
if (i < 0)
{
result = NO;
spos = slen;
break;
}
ptr[dpos++] = table[i].to;
}
}
}
break;
case NSGSM0338StringEncoding:
tables:
while (spos < slen)
{
unichar u = src[spos++];
int i;
int i;
/* Swap byte order if necessary */
if (swapped == YES)
{
u = ((u & 0xff00 >> 8) + ((u & 0x00ff) << 8));
}
/* Grow output buffer to make room if necessary */
if (dpos >= bsize)
{
GROW();
}
i = chop(u, GSM0338_uni_to_char_table, GSM0338_tsize);
if (i >= 0)
if (u < base)
{
ptr[dpos] = GSM0338_uni_to_char_table[i].to;
/*
* The character set has a lower section whose contents
* are identical to unicode, so no mapping is needed.
*/
ptr[dpos++] = (char)u;
}
else if (table != 0 && (i = chop(u, table, tsize)) >= 0)
{
/*
* The character mapping is found in a basic table.
*/
ptr[dpos++] = table[i].to;
}
else if (etable != 0 && (i = chop(u, etable, etsize)) >= 0)
{
/*
* The character mapping is found in a table of simple
* escape sequences consisting of an escape byte followed
* by another single byte.
*/
ptr[dpos++] = escape;
if (dpos >= bsize)
{
GROW();
}
ptr[dpos++] = etable[i].to;
}
else if (ltable != 0 && (i = chop(u, ltable, ltsize)) >= 0)
{
/*
* The character is found in a lossy mapping table.
*/
ptr[dpos++] = ltable[i].to;
}
else if (strict == NO)
{
/*
* The default lossy mapping generates an asterisk.
*/
ptr[dpos++] = '*';
}
else
{
i = chop(u, GSM0338_escapes, GSM0338_esize);
if (i >= 0)
{
ptr[dpos++] = 0x1b;
if (dpos >= bsize)
{
GROW();
}
ptr[dpos] = GSM0338_escapes[i].to;
}
else if (strict == YES)
{
result = NO;
break;
}
else
{
ptr[dpos] = '*';
}
/*
* No mapping has been found.
*/
result = NO;
spos = slen;
break;
}
dpos++;
}
break;

View file

@ -35,8 +35,9 @@ int main()
NSMutableString *f1 = [NSMutableString stringWithString: @"ab"];
NSStringEncoding *encs;
{
unichar buf[] = { '\243' };
#if 0
{ // GSM test
unichar buf[] = { 163, '[', ']', '{', '}', '\\', '^', '|', '~', '_' };
NSString *str = [NSString stringWithCharacters: buf
length: sizeof(buf)/sizeof(unichar)];
NSData *gsm = [str dataUsingEncoding: NSGSM0338StringEncoding];
@ -44,6 +45,8 @@ int main()
NSLog(@"GSM: %*.*s", [gsm length], [gsm length], [gsm bytes]);
return 0;
}
#endif
NS_DURING
[fo replaceCharactersInRange: [fo rangeOfString: @"xx"] withString: us1];
NS_HANDLER