mirror of
https://github.com/gnustep/libs-base.git
synced 2025-04-25 17:51:01 +00:00
git-svn-id: svn+ssh://svn.gna.org/svn/gnustep/libs/base/trunk@6438 72102866-910b-0410-8b05-ffd578937521
354 lines
7.4 KiB
Objective-C
354 lines
7.4 KiB
Objective-C
/* Support functions for Unicode implementation
|
|
Copyright (C) 1997 Free Software Foundation, Inc.
|
|
|
|
Written by: Stevo Crvenkovski < stevo@btinternet.com >
|
|
Date: March 1997
|
|
|
|
This file is part of the GNUstep Base Library.
|
|
|
|
This library is free software; you can redistribute it and/or
|
|
modify it under the terms of the GNU Library General Public
|
|
License as published by the Free Software Foundation; either
|
|
version 2 of the License, or (at your option) any later version.
|
|
|
|
This library is distributed in the hope that it will be useful,
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
Library General Public License for more details.
|
|
|
|
You should have received a copy of the GNU Library General Public
|
|
License along with this library; if not, write to the Free
|
|
Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111 USA.
|
|
*/
|
|
|
|
#include <config.h>
|
|
#include <Foundation/NSString.h>
|
|
|
|
struct _ucc_ {unichar from; char to;};
|
|
|
|
#include "unicode/cyrillic.h"
|
|
#include "unicode/latin2.h"
|
|
#include "unicode/nextstep.h"
|
|
#include "unicode/caseconv.h"
|
|
#include "unicode/cop.h"
|
|
#include "unicode/decomp.h"
|
|
|
|
#define FALSE 0
|
|
#define TRUE 1
|
|
|
|
typedef unsigned char unc;
|
|
|
|
unichar
|
|
encode_chartouni(char c, NSStringEncoding enc)
|
|
{
|
|
/* All that I could find in Next documentation
|
|
on NSNonLossyASCIIStringEncoding was << forthcoming >>. */
|
|
if ((enc == NSNonLossyASCIIStringEncoding)
|
|
|| (enc == NSASCIIStringEncoding)
|
|
|| (enc == NSISOLatin1StringEncoding))
|
|
{
|
|
return (unichar)((unc)c);
|
|
}
|
|
if ((enc == NSNEXTSTEPStringEncoding))
|
|
{
|
|
if ((unc)c < Next_conv_base)
|
|
return (unichar)((unc)c);
|
|
else
|
|
return(Next_char_to_uni_table[(unc)c - Next_conv_base]);
|
|
}
|
|
if ((enc == NSCyrillicStringEncoding))
|
|
{
|
|
if ((unc)c < Cyrillic_conv_base)
|
|
return (unichar)((unc)c);
|
|
else
|
|
return(Cyrillic_char_to_uni_table[(unc)c - Cyrillic_conv_base]);
|
|
}
|
|
if ((enc == NSISOLatin2StringEncoding))
|
|
{
|
|
if ((unc)c < Latin2_conv_base)
|
|
return (unichar)((unc)c);
|
|
else
|
|
return(Latin2_char_to_uni_table[(unc)c - Latin2_conv_base]);
|
|
}
|
|
|
|
#if 0
|
|
if ((enc == NSSymbolStringEncoding))
|
|
{
|
|
if ((unc)c < Symbol_conv_base)
|
|
return (unichar)((unc)c);
|
|
else
|
|
return(Symbol_char_to_uni_table[(unc)c - Symbol_conv_base]);
|
|
}
|
|
#endif
|
|
|
|
return 0;
|
|
}
|
|
|
|
char
|
|
encode_unitochar(unichar u, NSStringEncoding enc)
|
|
{
|
|
int res;
|
|
int i =0;
|
|
|
|
if ((enc == NSNonLossyASCIIStringEncoding)
|
|
|| (enc == NSASCIIStringEncoding))
|
|
{
|
|
if (u <128)
|
|
return (char)u;
|
|
else
|
|
return 0;
|
|
}
|
|
|
|
if ((enc == NSISOLatin1StringEncoding))
|
|
{
|
|
if (u <256)
|
|
return (char)u;
|
|
else
|
|
return 0;
|
|
}
|
|
|
|
if ((enc == NSNEXTSTEPStringEncoding))
|
|
{
|
|
if (u <(unichar)Next_conv_base)
|
|
return (char)u;
|
|
else
|
|
{
|
|
while (((res = u-Next_uni_to_char_table[i++].from) > 0)
|
|
&& (i < Next_uni_to_char_table_size));
|
|
return res?0:Next_uni_to_char_table[--i].to;
|
|
}
|
|
}
|
|
|
|
if ((enc == NSCyrillicStringEncoding))
|
|
{
|
|
if (u <(unichar)Cyrillic_conv_base)
|
|
return (char)u;
|
|
else
|
|
{
|
|
while (((res = u-Cyrillic_uni_to_char_table[i++].from) > 0)
|
|
&& (i < Cyrillic_uni_to_char_table_size));
|
|
return res ? 0 : Cyrillic_uni_to_char_table[--i].to;
|
|
}
|
|
}
|
|
|
|
if ((enc == NSISOLatin2StringEncoding))
|
|
{
|
|
if (u <(unichar)Latin2_conv_base)
|
|
return (char)u;
|
|
else
|
|
{
|
|
while (((res = u-Latin2_uni_to_char_table[i++].from) > 0)
|
|
&& (i < Latin2_uni_to_char_table_size));
|
|
return res ? 0 : Latin2_uni_to_char_table[--i].to;
|
|
}
|
|
}
|
|
|
|
#if 0
|
|
if ((enc == NSSymbolStringEncoding))
|
|
{
|
|
if (u <(unichar)Symbol_conv_base)
|
|
return (char)u;
|
|
else
|
|
{
|
|
while (((res = u-Symbol_uni_to_char_table[i++].from) > 0)
|
|
&& (i < Symbol_uni_to_char_table_size));
|
|
return res ? '*' : Symbol_uni_to_char_table[--i].to;
|
|
}
|
|
}
|
|
#endif
|
|
|
|
return 0;
|
|
}
|
|
|
|
unichar
|
|
chartouni(char c)
|
|
{
|
|
static NSStringEncoding enc = GSUndefinedEncoding;
|
|
|
|
if (enc == 0)
|
|
enc = [NSString defaultCStringEncoding];
|
|
return encode_chartouni(c, enc);
|
|
}
|
|
|
|
char
|
|
unitochar(unichar u)
|
|
{
|
|
unc res;
|
|
static NSStringEncoding enc = GSUndefinedEncoding;
|
|
|
|
if (enc == 0)
|
|
enc = [NSString defaultCStringEncoding];
|
|
if ((res = encode_unitochar(u, enc)))
|
|
return res;
|
|
else
|
|
return '*';
|
|
}
|
|
|
|
int
|
|
strtoustr(unichar * u1,const char *s1,int size)
|
|
{
|
|
int count;
|
|
|
|
for (count = 0; (count < size) && (s1[count] != 0); count++)
|
|
u1[count] = chartouni(s1[count]);
|
|
return count;
|
|
}
|
|
|
|
int
|
|
ustrtostr(char *s2,unichar *u1,int size)
|
|
{
|
|
int count;
|
|
|
|
for (count = 0; (count < size) && (u1[count] != (unichar)0); count++)
|
|
s2[count] = unitochar(u1[count]);
|
|
return(count);
|
|
}
|
|
|
|
int
|
|
encode_strtoustr(unichar * u1,const char *s1,int size, NSStringEncoding enc)
|
|
{
|
|
int count;
|
|
|
|
for (count = 0; (count < size) && (s1[count] != 0); count++)
|
|
u1[count] = encode_chartouni(s1[count], enc);
|
|
return count;
|
|
}
|
|
|
|
int
|
|
encode_ustrtostr(char *s2,unichar *u1,int size, NSStringEncoding enc)
|
|
{
|
|
int count;
|
|
|
|
for (count = 0; (count < size) && (u1[count] != (unichar)0); count++)
|
|
s2[count] = encode_unitochar(u1[count], enc);
|
|
return(count);
|
|
}
|
|
|
|
/* Be carefull if you use this. Unicode arrays returned by
|
|
-getCharacters methods are not zero terminated */
|
|
int
|
|
uslen (unichar *u)
|
|
{
|
|
int len = 0;
|
|
|
|
while (u[len] != 0)
|
|
{
|
|
if (u[++len] == 0)
|
|
return len;
|
|
++len;
|
|
}
|
|
return len;
|
|
}
|
|
|
|
unichar
|
|
uni_tolower(unichar ch)
|
|
{
|
|
int res;
|
|
int count = 0;
|
|
|
|
while (((res = ch - t_tolower[count++][0]) > 0) && (count < t_len_tolower));
|
|
return res ? ch : t_tolower[--count][1];
|
|
}
|
|
|
|
unichar
|
|
uni_toupper(unichar ch)
|
|
{
|
|
int res;
|
|
int count = 0;
|
|
|
|
while (((res = ch - t_toupper[count++][0]) > 0) && (count < t_len_toupper));
|
|
return res ? ch : t_toupper[--count][1];
|
|
}
|
|
|
|
unsigned char
|
|
uni_cop(unichar u)
|
|
{
|
|
unichar count,first,last,comp;
|
|
BOOL notfound;
|
|
|
|
first = 0;
|
|
last = uni_cop_table_size;
|
|
notfound = TRUE;
|
|
count =0;
|
|
|
|
if (u > (unichar)0x0080) // no nonspacing in ascii
|
|
{
|
|
while (notfound && (first <= last))
|
|
{
|
|
if (!(first == last))
|
|
{
|
|
count = (first + last) / 2;
|
|
comp = uni_cop_table[count].code;
|
|
if (comp < u)
|
|
first = count+1;
|
|
else
|
|
if (comp > u)
|
|
last = count-1;
|
|
else
|
|
notfound = FALSE;
|
|
}
|
|
else /* first == last */
|
|
{
|
|
if (u == uni_cop_table[first].code)
|
|
return uni_cop_table[first].cop;
|
|
return 0;
|
|
} /* else */
|
|
} /* while notfound ...*/
|
|
return notfound ? 0 : uni_cop_table[count].cop;
|
|
}
|
|
else /* u is ascii */
|
|
return 0;
|
|
}
|
|
|
|
BOOL
|
|
uni_isnonsp(unichar u)
|
|
{
|
|
#define TRUE 1
|
|
#define FALSE 0
|
|
// check is uni_cop good for this
|
|
if (uni_cop(u))
|
|
return TRUE;
|
|
else
|
|
return FALSE;
|
|
}
|
|
|
|
unichar*
|
|
uni_is_decomp(unichar u)
|
|
{
|
|
unichar count,first,last,comp;
|
|
BOOL notfound;
|
|
|
|
first = 0;
|
|
last = uni_dec_table_size;
|
|
notfound = TRUE;
|
|
count =0;
|
|
|
|
if (u > (unichar)0x0080) // no composites in ascii
|
|
{
|
|
while (notfound && (first <= last))
|
|
{
|
|
if (!(first == last))
|
|
{
|
|
count = (first + last) / 2;
|
|
comp = uni_dec_table[count].code;
|
|
if (comp < u)
|
|
first = count+1;
|
|
else
|
|
if (comp > u)
|
|
last = count-1;
|
|
else
|
|
notfound = FALSE;
|
|
}
|
|
else /* first == last */
|
|
{
|
|
if (u == uni_dec_table[first].code)
|
|
return uni_dec_table[first].decomp;
|
|
return 0;
|
|
} /* else */
|
|
} /* while notfound ...*/
|
|
return notfound ? 0 : uni_dec_table[count].decomp;
|
|
}
|
|
else /* u is ascii */
|
|
return 0;
|
|
}
|
|
|