mirror of
https://github.com/gnustep/libs-base.git
synced 2025-04-23 09:04:13 +00:00
Improve character conversion code a little.
git-svn-id: svn+ssh://svn.gna.org/svn/gnustep/libs/base/trunk@14481 72102866-910b-0410-8b05-ffd578937521
This commit is contained in:
parent
6ef88fbcd8
commit
61c5624b4c
4 changed files with 111 additions and 103 deletions
|
@ -1,3 +1,11 @@
|
|||
2002-09-17 Richard Frith-Macdonald <rfm@gnu.org>
|
||||
|
||||
* Source/Unicode.m: Restructure conversion from unicode slightly,
|
||||
to make it clearer and more readable and to include handling of
|
||||
lossy conversions.
|
||||
* Headers/gnustep/unicode/gsm0338.h: Added table for lossy conversion
|
||||
from unicode.
|
||||
|
||||
2002-09-16 Richard Frith-Macdonald <rfm@gnu.org>
|
||||
|
||||
* Tools/AGSParser.m: Bugfix ... look for source files more
|
||||
|
|
|
@ -271,17 +271,39 @@ _ucc_ GSM0338_uni_to_char_table[] =
|
|||
|
||||
_ucc_ GSM0338_escapes[] =
|
||||
{
|
||||
{0x000C,0x0A},
|
||||
{0x005B,0x3C},
|
||||
{0x005C,0x2F},
|
||||
{0x005D,0x3E},
|
||||
{0x005E,0x14},
|
||||
{0x007B,0x28},
|
||||
{0x007C,0x40},
|
||||
{0x007D,0x29},
|
||||
{0x007E,0x3D},
|
||||
{0x20AC,0x65}
|
||||
{0x000C,0x0A}, /* Form feed */
|
||||
{0x005B,0x3C}, /* '[' */
|
||||
{0x005C,0x2F}, /* '\\' */
|
||||
{0x005D,0x3E}, /* ']' */
|
||||
{0x005E,0x14}, /* '^' */
|
||||
{0x007B,0x28}, /* '{' */
|
||||
{0x007C,0x40}, /* '|' */
|
||||
{0x007D,0x29}, /* '}' */
|
||||
{0x007E,0x3D}, /* '~' */
|
||||
{0x20AC,0x65} /* Euro symbol */
|
||||
};
|
||||
|
||||
#define GSM0338_esize (sizeof(GSM0338_escapes)/sizeof(_ucc_))
|
||||
|
||||
/*
|
||||
* Some of these conversions should not be needed because they are
|
||||
* already handled by escape sequences ... I put them here so we can
|
||||
* support two varieties of the GSM alphabet. The official one, and
|
||||
* a cut down version suitable for use when delivering data to phones
|
||||
* which don't support escape sequences.
|
||||
*/
|
||||
_ucc_ GSM0338_lossy[] =
|
||||
{
|
||||
{0x005B,0x3C}, /* '[' => '<' */
|
||||
{0x005C,0x2F}, /* '\\' => '/' */
|
||||
{0x005D,0x3E}, /* ']' => '>' */
|
||||
{0x005E,0x14}, /* '^' => lambda */
|
||||
{0x0060,0x27}, /* '`' => '\'' */
|
||||
{0x007B,0x28}, /* '{' => '(' */
|
||||
{0x007C,0x40}, /* '|' => 'i' */
|
||||
{0x007D,0x29}, /* '}' => ')' */
|
||||
{0x007E,0x3D} /* '~' => '=' */
|
||||
};
|
||||
|
||||
#define GSM0338_lsize (sizeof(GSM0338_lossy)/sizeof(_ucc_))
|
||||
|
||||
|
|
157
Source/Unicode.m
157
Source/Unicode.m
|
@ -1135,6 +1135,11 @@ GSFromUnicode(unsigned char **dst, unsigned int *size, const unichar *src,
|
|||
unichar base = 0;
|
||||
_ucc_ *table = 0;
|
||||
unsigned tsize = 0;
|
||||
unsigned char escape = 0;
|
||||
_ucc_ *etable = 0;
|
||||
unsigned etsize = 0;
|
||||
_ucc_ *ltable = 0;
|
||||
unsigned ltsize = 0;
|
||||
BOOL swapped = NO;
|
||||
BOOL result = YES;
|
||||
|
||||
|
@ -1270,120 +1275,90 @@ bases:
|
|||
goto tables;
|
||||
#endif
|
||||
|
||||
tables:
|
||||
case NSGSM0338StringEncoding:
|
||||
base = 0;
|
||||
table = GSM0338_uni_to_char_table;
|
||||
tsize = GSM0338_tsize;
|
||||
escape = 0x1b;
|
||||
etable = GSM0338_escapes;
|
||||
etsize = GSM0338_esize;
|
||||
if (strict == NO)
|
||||
{
|
||||
while (spos < slen)
|
||||
{
|
||||
unichar u = src[spos++];
|
||||
|
||||
if (swapped == YES)
|
||||
{
|
||||
u = ((u & 0xff00 >> 8) + ((u & 0x00ff) << 8));
|
||||
}
|
||||
|
||||
if (dpos >= bsize)
|
||||
{
|
||||
GROW();
|
||||
}
|
||||
if (u < base)
|
||||
{
|
||||
ptr[dpos++] = (char)u;
|
||||
}
|
||||
else
|
||||
{
|
||||
int i = chop(u, table, tsize);
|
||||
|
||||
if (i < 0)
|
||||
{
|
||||
ptr[dpos++] = '*';
|
||||
}
|
||||
else
|
||||
{
|
||||
ptr[dpos++] = table[i].to;
|
||||
}
|
||||
}
|
||||
}
|
||||
ltable = GSM0338_lossy;
|
||||
ltsize = GSM0338_lsize;
|
||||
}
|
||||
else
|
||||
{
|
||||
while (spos < slen)
|
||||
{
|
||||
unichar u = src[spos++];
|
||||
goto tables;
|
||||
|
||||
if (swapped == YES)
|
||||
{
|
||||
u = ((u & 0xff00 >> 8) + ((u & 0x00ff) << 8));
|
||||
}
|
||||
|
||||
if (dpos >= bsize)
|
||||
{
|
||||
GROW();
|
||||
}
|
||||
if (u < base)
|
||||
{
|
||||
ptr[dpos++] = (char)u;
|
||||
}
|
||||
else
|
||||
{
|
||||
int i = chop(u, table, tsize);
|
||||
|
||||
if (i < 0)
|
||||
{
|
||||
result = NO;
|
||||
spos = slen;
|
||||
break;
|
||||
}
|
||||
ptr[dpos++] = table[i].to;
|
||||
}
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
||||
case NSGSM0338StringEncoding:
|
||||
tables:
|
||||
while (spos < slen)
|
||||
{
|
||||
unichar u = src[spos++];
|
||||
int i;
|
||||
int i;
|
||||
|
||||
/* Swap byte order if necessary */
|
||||
if (swapped == YES)
|
||||
{
|
||||
u = ((u & 0xff00 >> 8) + ((u & 0x00ff) << 8));
|
||||
}
|
||||
|
||||
/* Grow output buffer to make room if necessary */
|
||||
if (dpos >= bsize)
|
||||
{
|
||||
GROW();
|
||||
}
|
||||
|
||||
i = chop(u, GSM0338_uni_to_char_table, GSM0338_tsize);
|
||||
if (i >= 0)
|
||||
if (u < base)
|
||||
{
|
||||
ptr[dpos] = GSM0338_uni_to_char_table[i].to;
|
||||
/*
|
||||
* The character set has a lower section whose contents
|
||||
* are identical to unicode, so no mapping is needed.
|
||||
*/
|
||||
ptr[dpos++] = (char)u;
|
||||
}
|
||||
else if (table != 0 && (i = chop(u, table, tsize)) >= 0)
|
||||
{
|
||||
/*
|
||||
* The character mapping is found in a basic table.
|
||||
*/
|
||||
ptr[dpos++] = table[i].to;
|
||||
}
|
||||
else if (etable != 0 && (i = chop(u, etable, etsize)) >= 0)
|
||||
{
|
||||
/*
|
||||
* The character mapping is found in a table of simple
|
||||
* escape sequences consisting of an escape byte followed
|
||||
* by another single byte.
|
||||
*/
|
||||
ptr[dpos++] = escape;
|
||||
if (dpos >= bsize)
|
||||
{
|
||||
GROW();
|
||||
}
|
||||
ptr[dpos++] = etable[i].to;
|
||||
}
|
||||
else if (ltable != 0 && (i = chop(u, ltable, ltsize)) >= 0)
|
||||
{
|
||||
/*
|
||||
* The character is found in a lossy mapping table.
|
||||
*/
|
||||
ptr[dpos++] = ltable[i].to;
|
||||
}
|
||||
else if (strict == NO)
|
||||
{
|
||||
/*
|
||||
* The default lossy mapping generates an asterisk.
|
||||
*/
|
||||
ptr[dpos++] = '*';
|
||||
}
|
||||
else
|
||||
{
|
||||
i = chop(u, GSM0338_escapes, GSM0338_esize);
|
||||
if (i >= 0)
|
||||
{
|
||||
ptr[dpos++] = 0x1b;
|
||||
if (dpos >= bsize)
|
||||
{
|
||||
GROW();
|
||||
}
|
||||
ptr[dpos] = GSM0338_escapes[i].to;
|
||||
}
|
||||
else if (strict == YES)
|
||||
{
|
||||
result = NO;
|
||||
break;
|
||||
}
|
||||
else
|
||||
{
|
||||
ptr[dpos] = '*';
|
||||
}
|
||||
/*
|
||||
* No mapping has been found.
|
||||
*/
|
||||
result = NO;
|
||||
spos = slen;
|
||||
break;
|
||||
}
|
||||
dpos++;
|
||||
}
|
||||
break;
|
||||
|
||||
|
|
|
@ -35,8 +35,9 @@ int main()
|
|||
NSMutableString *f1 = [NSMutableString stringWithString: @"ab"];
|
||||
NSStringEncoding *encs;
|
||||
|
||||
{
|
||||
unichar buf[] = { '\243' };
|
||||
#if 0
|
||||
{ // GSM test
|
||||
unichar buf[] = { 163, '[', ']', '{', '}', '\\', '^', '|', '~', '_' };
|
||||
NSString *str = [NSString stringWithCharacters: buf
|
||||
length: sizeof(buf)/sizeof(unichar)];
|
||||
NSData *gsm = [str dataUsingEncoding: NSGSM0338StringEncoding];
|
||||
|
@ -44,6 +45,8 @@ int main()
|
|||
NSLog(@"GSM: %*.*s", [gsm length], [gsm length], [gsm bytes]);
|
||||
return 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
NS_DURING
|
||||
[fo replaceCharactersInRange: [fo rangeOfString: @"xx"] withString: us1];
|
||||
NS_HANDLER
|
||||
|
|
Loading…
Reference in a new issue