/* Support functions for Unicode implementation Function to determine default c string encoding for GNUstep based on GNUSTEP_STRING_ENCODING environment variable. Copyright (C) 1997 Free Software Foundation, Inc. Written by: Stevo Crvenkovski < stevo@btinternet.com > Date: March 1997 Merged with GetDefEncoding.m and iconv by: Fred Kiefer Date: September 2000 This file is part of the GNUstep Base Library. This library is free software; you can redistribute it and/or modify it under the terms of the GNU Library General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public License for more details. You should have received a copy of the GNU Library General Public License along with this library; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111 USA. */ #include #include #include #include #include struct _ucc_ {unichar from; char to;}; #include "unicode/cyrillic.h" #include "unicode/latin2.h" #include "unicode/nextstep.h" #include "unicode/caseconv.h" #include "unicode/cop.h" #include "unicode/decomp.h" #ifdef HAVE_ICONV #include #include // The rest of the GNUstep code stores UNICODE in internal byte order, // so we do the same. This should be UCS-2-INTERNAL for libiconv #ifdef WORDS_BIGENDIAN #define UNICODE_ENC "UNICODEBIG" #else #define UNICODE_ENC "UNICODELITTLE" #endif #endif typedef unsigned char unc; static NSStringEncoding defEnc = GSUndefinedEncoding; #ifdef HAVE_ICONV // FIXME: We should check dynamically which encodings are found on this computer, // as different implementation of iconv will support different encodings. static NSStringEncoding _availableEncodings[] = { NSASCIIStringEncoding, NSNEXTSTEPStringEncoding, NSJapaneseEUCStringEncoding, NSUTF8StringEncoding, NSISOLatin1StringEncoding, // NSSymbolStringEncoding, // NSNonLossyASCIIStringEncoding, NSShiftJISStringEncoding, NSISOLatin2StringEncoding, NSUnicodeStringEncoding, NSWindowsCP1251StringEncoding, NSWindowsCP1252StringEncoding, NSWindowsCP1253StringEncoding, NSWindowsCP1254StringEncoding, NSWindowsCP1250StringEncoding, NSISO2022JPStringEncoding, NSMacOSRomanStringEncoding, // NSProprietaryStringEncoding, // GNUstep additions NSCyrillicStringEncoding, NSKOI8RStringEncoding, NSISOLatin3StringEncoding, NSISOLatin4StringEncoding, NSArabicStringEncoding, NSGreekStringEncoding, NSHebrewStringEncoding, NSGB2312StringEncoding, 0 }; #else // Uncomment when implemented static NSStringEncoding _availableEncodings[] = { NSASCIIStringEncoding, NSNEXTSTEPStringEncoding, // NSJapaneseEUCStringEncoding, // NSUTF8StringEncoding, NSISOLatin1StringEncoding, // NSSymbolStringEncoding, // NSNonLossyASCIIStringEncoding, // NSShiftJISStringEncoding, NSISOLatin2StringEncoding, NSUnicodeStringEncoding, // NSWindowsCP1251StringEncoding, // NSWindowsCP1252StringEncoding, // NSWindowsCP1253StringEncoding, // NSWindowsCP1254StringEncoding, // NSWindowsCP1250StringEncoding, // NSISO2022JPStringEncoding, // NSMacOSRomanStringEncoding, // NSProprietaryStringEncoding, // GNUstep additions NSCyrillicStringEncoding, // NSKOI8RStringEncoding, // NSISOLatin3StringEncoding, // NSISOLatin4StringEncoding, // NSArabicStringEncoding, // NSGreekStringEncoding, // NSHebrewStringEncoding, // NSGB2312StringEncoding, 0 }; #endif struct _strenc_ {NSStringEncoding enc; char *ename;}; const struct _strenc_ str_encoding_table[]= { {NSASCIIStringEncoding,"NSASCIIStringEncoding"}, {NSNEXTSTEPStringEncoding,"NSNEXTSTEPStringEncoding"}, {NSJapaneseEUCStringEncoding, "NSJapaneseEUCStringEncoding"}, {NSUTF8StringEncoding,"NSUTF8StringEncoding"}, {NSISOLatin1StringEncoding,"NSISOLatin1StringEncoding"}, {NSSymbolStringEncoding,"NSSymbolStringEncoding"}, {NSNonLossyASCIIStringEncoding,"NSNonLossyASCIIStringEncoding"}, {NSShiftJISStringEncoding,"NSShiftJISStringEncoding"}, {NSISOLatin2StringEncoding,"NSISOLatin2StringEncoding"}, {NSUnicodeStringEncoding, "NSUnicodeStringEncoding"}, {NSWindowsCP1251StringEncoding,"NSWindowsCP1251StringEncoding"}, {NSWindowsCP1252StringEncoding,"NSWindowsCP1252StringEncoding"}, {NSWindowsCP1253StringEncoding,"NSWindowsCP1253StringEncoding"}, {NSWindowsCP1254StringEncoding,"NSWindowsCP1254StringEncoding"}, {NSWindowsCP1250StringEncoding,"NSWindowsCP1250StringEncoding"}, {NSISO2022JPStringEncoding,"NSISO2022JPStringEncoding "}, {NSMacOSRomanStringEncoding, "NSMacOSRomanStringEncoding"}, {NSProprietaryStringEncoding, "NSProprietaryStringEncoding"}, // GNUstep additions {NSCyrillicStringEncoding,"NSCyrillicStringEncoding"}, {NSKOI8RStringEncoding, "NSKOI8RStringEncoding"}, {NSISOLatin3StringEncoding, "NSISOLatin3StringEncoding"}, {NSISOLatin4StringEncoding, "NSISOLatin4StringEncoding"}, {NSArabicStringEncoding, "NSArabicStringEncoding"}, {NSGreekStringEncoding, "NSGreekStringEncoding"}, {NSHebrewStringEncoding, "NSHebrewStringEncoding"}, {NSGB2312StringEncoding, "NSGB2312StringEncoding"}, {0, "Unknown encoding"} }; NSStringEncoding *GetAvailableEncodings() { // FIXME: This should check which iconv definitions are available and // add them to the availble encodings return _availableEncodings; } NSStringEncoding GetDefEncoding() { char *encoding; unsigned int count; NSStringEncoding ret,tmp; NSStringEncoding *availableEncodings; availableEncodings = GetAvailableEncodings(); encoding = getenv("GNUSTEP_STRING_ENCODING"); if (encoding) { count = 0; while (str_encoding_table[count].enc && strcmp(str_encoding_table[count].ename,encoding)) { count++; } if (str_encoding_table[count].enc) { ret = str_encoding_table[count].enc; if ((ret == NSUnicodeStringEncoding) || (ret == NSSymbolStringEncoding)) { fprintf(stderr, "WARNING: %s - encoding not supported as default c string encoding.\n", encoding); fprintf(stderr, "NSASCIIStringEncoding set as default.\n"); ret = NSASCIIStringEncoding; } else /*encoding should be supported but is it implemented?*/ { count = 0; tmp = 0; while ( !(availableEncodings[count] == 0) ) { if ( !(ret == availableEncodings[count]) ) tmp = 0; else { tmp = ret; break; } count++; }; if (!tmp) { fprintf(stderr, "WARNING: %s - encoding not yet implemented.\n", encoding); fprintf(stderr, "NSASCIIStringEncoding set as default.\n"); ret = NSASCIIStringEncoding; }; }; } else /* encoding not found */ { fprintf(stderr, "WARNING: %s - encoding not supported.\n", encoding); fprintf(stderr, "NSASCIIStringEncoding set as default.\n"); ret = NSASCIIStringEncoding; } } else /* environment var not found */ { /* This shouldn't be required. It really should be in UserDefaults - asf */ //fprintf(stderr,"WARNING: GNUSTEP_STRING_ENCODING environment variable not found\n"); //fprintf(stderr, "NSASCIIStringEncoding set as default.\n"); ret = NSASCIIStringEncoding; } // Cache the encoding defEnc = ret; return ret; }; NSString* GetEncodingName(NSStringEncoding encoding) { unsigned int count=0; while (str_encoding_table[count].enc && !(str_encoding_table[count].enc == encoding)) { count++; } return [NSString stringWithCString: str_encoding_table[count].ename]; }; #ifdef HAVE_ICONV static char * iconv_stringforencoding(NSStringEncoding enc) { switch (enc) { case NSASCIIStringEncoding: return "ASCII"; case NSNEXTSTEPStringEncoding: return "NEXTSTEP"; case NSISOLatin1StringEncoding: return "ISO-8859-1"; case NSISOLatin2StringEncoding: return "ISO-8859-2"; case NSUnicodeStringEncoding: return UNICODE_ENC; case NSJapaneseEUCStringEncoding: return "EUC-JP"; case NSUTF8StringEncoding: return "UTF-8"; case NSShiftJISStringEncoding: return "SHIFT-JIS"; case NSWindowsCP1250StringEncoding: return "CP1250"; case NSWindowsCP1251StringEncoding: return "CP1251"; case NSWindowsCP1252StringEncoding: return "CP1252"; case NSWindowsCP1253StringEncoding: return "CP1253"; case NSWindowsCP1254StringEncoding: return "CP1254"; case NSISO2022JPStringEncoding: return "ISO-2022-JP"; case NSMacOSRomanStringEncoding: return "MACINTOSH"; // GNUstep extensions case NSKOI8RStringEncoding: return "KOI8-R"; case NSISOLatin3StringEncoding: return "ISO-8859-3"; case NSISOLatin4StringEncoding: return "ISO-8859-4"; case NSCyrillicStringEncoding: return "ISO-8859-5"; case NSArabicStringEncoding: return "ISO-8859-6"; case NSGreekStringEncoding: return "ISO-8859-7"; case NSHebrewStringEncoding: return "ISO-8859-8"; case NSGB2312StringEncoding: return "EUC-CN"; default: return ""; } } int iconv_strtoustr(unichar *u2, int size2, const char *s1, int size1, NSStringEncoding enc) { iconv_t conv; int usize = sizeof(unichar)*size2; char *u1 = (char*)u2; int ret_val; conv = iconv_open(UNICODE_ENC, iconv_stringforencoding(enc)); if (conv == (iconv_t)-1) { return 0; } ret_val = iconv(conv, &s1, &size1, &u1, &usize); // close the converter iconv_close(conv); if (ret_val == -1) { return 0; } return u1 - (char*)u2; } int iconv_ustrtostr(char *s2, int size2, unichar *u1, int size1, NSStringEncoding enc) { iconv_t conv; int usize = sizeof(unichar)*size1; char *s1 = s2; const char *u2 = (const char*)u1; int ret_val; conv = iconv_open(iconv_stringforencoding(enc), UNICODE_ENC); if (conv == (iconv_t)-1) { return 0; } ret_val = iconv(conv, &u2, &usize, &s2, &size2); // close the converter iconv_close(conv); if (ret_val == -1) { return 0; } return s2 - s1; } #endif unichar encode_chartouni(char c, NSStringEncoding enc) { /* All that I could find in Next documentation on NSNonLossyASCIIStringEncoding was << forthcoming >>. */ switch (enc) { case NSNonLossyASCIIStringEncoding: case NSASCIIStringEncoding: case NSISOLatin1StringEncoding: case NSUnicodeStringEncoding: return (unichar)((unc)c); case NSNEXTSTEPStringEncoding: if ((unc)c < Next_conv_base) return (unichar)((unc)c); else return(Next_char_to_uni_table[(unc)c - Next_conv_base]); case NSCyrillicStringEncoding: if ((unc)c < Cyrillic_conv_base) return (unichar)((unc)c); else return(Cyrillic_char_to_uni_table[(unc)c - Cyrillic_conv_base]); case NSISOLatin2StringEncoding: if ((unc)c < Latin2_conv_base) return (unichar)((unc)c); else return(Latin2_char_to_uni_table[(unc)c - Latin2_conv_base]); #if 0 case NSSymbolStringEncoding: if ((unc)c < Symbol_conv_base) return (unichar)((unc)c); else return(Symbol_char_to_uni_table[(unc)c - Symbol_conv_base]); #endif default: #ifdef HAVE_ICONV { unichar u; if (iconv_strtoustr(&u, 1, &c, 1, enc) > 0) return u; else return 0; } #else return 0; #endif } } char encode_unitochar(unichar u, NSStringEncoding enc) { int res; int i = 0; switch (enc) { case NSNonLossyASCIIStringEncoding: if (u < 128) return (char)u; else return '*'; case NSASCIIStringEncoding: if (u < 128) return (char)u; else return '*'; case NSISOLatin1StringEncoding: case NSUnicodeStringEncoding: if (u < 256) return (char)u; else return '*'; case NSNEXTSTEPStringEncoding: if (u < (unichar)Next_conv_base) return (char)u; else { while (((res = u - Next_uni_to_char_table[i++].from) > 0) && (i < Next_uni_to_char_table_size)); return res ? '*' : Next_uni_to_char_table[--i].to; } case NSCyrillicStringEncoding: if (u < (unichar)Cyrillic_conv_base) return (char)u; else { while (((res = u - Cyrillic_uni_to_char_table[i++].from) > 0) && (i < Cyrillic_uni_to_char_table_size)); return res ? '*' : Cyrillic_uni_to_char_table[--i].to; } case NSISOLatin2StringEncoding: if (u < (unichar)Latin2_conv_base) return (char)u; else { while (((res = u - Latin2_uni_to_char_table[i++].from) > 0) && (i < Latin2_uni_to_char_table_size)); return res ? '*' : Latin2_uni_to_char_table[--i].to; } #if 0 case NSSymbolStringEncoding: if (u < (unichar)Symbol_conv_base) return (char)u; else { while (((res = u - Symbol_uni_to_char_table[i++].from) > 0) && (i < Symbol_uni_to_char_table_size)); return res ? '*' : Symbol_uni_to_char_table[--i].to; } #endif default: #ifdef HAVE_ICONV { char c[4]; int r = iconv_ustrtostr(c, 4, &u, 1, enc); if (r > 0) return c[0]; else return '*'; } #else return '*'; #endif } } unsigned encode_unitochar_strict(unichar u, NSStringEncoding enc) { int res; int i = 0; switch (enc) { case NSNonLossyASCIIStringEncoding: if (u < 128) return (char)u; else return 0; case NSASCIIStringEncoding: if (u < 128) return (char)u; else return 0; case NSISOLatin1StringEncoding: if (u < 256) return (char)u; else return 0; case NSUnicodeStringEncoding: return u; case NSNEXTSTEPStringEncoding: if (u < (unichar)Next_conv_base) return (char)u; else { while (((res = u - Next_uni_to_char_table[i++].from) > 0) && (i < Next_uni_to_char_table_size)); return res ? 0 : Next_uni_to_char_table[--i].to; } case NSCyrillicStringEncoding: if (u < (unichar)Cyrillic_conv_base) return (char)u; else { while (((res = u - Cyrillic_uni_to_char_table[i++].from) > 0) && (i < Cyrillic_uni_to_char_table_size)); return res ? 0 : Cyrillic_uni_to_char_table[--i].to; } case NSISOLatin2StringEncoding: if (u < (unichar)Latin2_conv_base) return (char)u; else { while (((res = u - Latin2_uni_to_char_table[i++].from) > 0) && (i < Latin2_uni_to_char_table_size)); return res ? 0 : Latin2_uni_to_char_table[--i].to; } #if 0 case NSSymbolStringEncoding: if (u < (unichar)Symbol_conv_base) return (char)u; else { while (((res = u - Symbol_uni_to_char_table[i++].from) > 0) && (i < Symbol_uni_to_char_table_size)); return res ? 0 : Symbol_uni_to_char_table[--i].to; } #endif default: #ifdef HAVE_ICONV { unsigned char c[4]; int r = iconv_ustrtostr(c, 4, &u, 1, enc); if (r == 2) #ifdef WORDS_BIGENDIAN return 256*c[0] + c[1]; #else return 256*c[1] + c[0]; #endif else if (r > 0) return c[0]; else return 0; } #else return 0; #endif } } unichar chartouni(char c) { if (defEnc == GSUndefinedEncoding) { defEnc = [NSString defaultCStringEncoding]; } return encode_chartouni(c, defEnc); } char unitochar(unichar u) { if (defEnc == GSUndefinedEncoding) { defEnc = [NSString defaultCStringEncoding]; } return encode_unitochar(u, defEnc); } int strtoustr(unichar *u1, const char *s1, int size) { if (defEnc == GSUndefinedEncoding) { defEnc = [NSString defaultCStringEncoding]; } return encode_strtoustr(u1, s1, size, defEnc); } int ustrtostr(char *s2, unichar *u1, int size) { if (defEnc == GSUndefinedEncoding) { defEnc = [NSString defaultCStringEncoding]; } return encode_ustrtostr(s2, u1, size, defEnc); } int encode_strtoustr(unichar *u1, const char *s1, int size, NSStringEncoding enc) { int count; switch (enc) { case NSNonLossyASCIIStringEncoding: case NSASCIIStringEncoding: case NSISOLatin1StringEncoding: case NSUnicodeStringEncoding: for (count = 0; (count < size) && (s1[count] != 0); count++) { u1[count] = (unichar)((unc)s1[count]); } return count; case NSNEXTSTEPStringEncoding: for (count = 0; (count < size) && (s1[count] != 0); count++) { unc c = (unc)s1[count]; if (c < Next_conv_base) u1[count] = (unichar)c; else u1[count] = Next_char_to_uni_table[c - Next_conv_base]; } return count; case NSCyrillicStringEncoding: for (count = 0; (count < size) && (s1[count] != 0); count++) { unc c = (unc)s1[count]; if (c < Cyrillic_conv_base) u1[count] = (unichar)c; else u1[count] = Cyrillic_char_to_uni_table[c - Cyrillic_conv_base]; } return count; case NSISOLatin2StringEncoding: for (count = 0; (count < size) && (s1[count] != 0); count++) { unc c = (unc)s1[count]; if (c < Latin2_conv_base) u1[count] = (unichar)c; else u1[count] = Latin2_char_to_uni_table[c - Latin2_conv_base]; } return count; #if 0 case NSSymbolStringEncoding: for (count = 0; (count < size) && (s1[count] != 0); count++) { unc c = (unc)s1[count]; if (c < Symbol_conv_base) u1[count] = (unichar)c; else u1[count] = Symbol_char_to_uni_table[c - Symbol_conv_base]; } return count; #endif default: #ifdef HAVE_ICONV return iconv_strtoustr(u1, size, s1, size, enc); #else return 0; #endif } /* for (count = 0; (count < size) && (s1[count] != 0); count++) { u1[count] = encode_chartouni(s1[count], enc); } return count; */ } int encode_ustrtostr(char *s2, unichar *u1, int size, NSStringEncoding enc) { int count; unichar u; switch (enc) { case NSNonLossyASCIIStringEncoding: for (count = 0; (count < size) && (u1[count] != (unichar)0); count++) { u = u1[count]; if (u < 128) s2[count] = (char)u; else s2[count] = '*'; } return count; case NSASCIIStringEncoding: for (count = 0; (count < size) && (u1[count] != (unichar)0); count++) { u = u1[count]; if (u < 128) s2[count] = (char)u; else s2[count] = '*'; } return count; case NSISOLatin1StringEncoding: case NSUnicodeStringEncoding: for (count = 0; (count < size) && (u1[count] != (unichar)0); count++) { u = u1[count]; if (u < 256) s2[count] = (char)u; else s2[count] = '*'; } return count; case NSNEXTSTEPStringEncoding: for (count = 0; (count < size) && (u1[count] != (unichar)0); count++) { u = u1[count]; if (u < (unichar)Next_conv_base) s2[count] = (char)u; else { int res; int i = 0; while (((res = u - Next_uni_to_char_table[i++].from) > 0) && (i < Next_uni_to_char_table_size)); s2[count] = res ? '*' : Next_uni_to_char_table[--i].to; } } return count; case NSCyrillicStringEncoding: for (count = 0; (count < size) && (u1[count] != (unichar)0); count++) { u = u1[count]; if (u < (unichar)Cyrillic_conv_base) s2[count] = (char)u; else { int res; int i = 0; while (((res = u - Cyrillic_uni_to_char_table[i++].from) > 0) && (i < Cyrillic_uni_to_char_table_size)); s2[count] = res ? '*' : Cyrillic_uni_to_char_table[--i].to; } } return count; case NSISOLatin2StringEncoding: for (count = 0; (count < size) && (u1[count] != (unichar)0); count++) { u = u1[count]; if (u < (unichar)Latin2_conv_base) s2[count] = (char)u; else { int res; int i = 0; while (((res = u - Latin2_uni_to_char_table[i++].from) > 0) && (i < Latin2_uni_to_char_table_size)); s2[count] = res ? '*' : Latin2_uni_to_char_table[--i].to; } } return count; #if 0 case NSSymbolStringEncoding: for (count = 0; (count < size) && (u1[count] != (unichar)0); count++) { u = u1[count]; if (u < (unichar)Symbol_conv_base) s2[count] = (char)u; else { int res; int i = 0; while (((res = u - Symbol_uni_to_char_table[i++].from) > 0) && (i < Symbol_uni_to_char_table_size)); s2[count] = res ? '*' : Symbol_uni_to_char_table[--i].to; } } return count; #endif default: #ifdef HAVE_ICONV // FIXME: The non-strict encoding is still missing return iconv_ustrtostr(s2, size, u1, size, enc); #else return 0; #endif } } int encode_ustrtostr_strict(char *s2, unichar *u1, int size, NSStringEncoding enc) { int count; unichar u; switch (enc) { case NSNonLossyASCIIStringEncoding: for (count = 0; count < size; count++) { u = u1[count]; if (u < 128) s2[count] = (char)u; else return 0; } return count; case NSASCIIStringEncoding: for (count = 0; count < size; count++) { u = u1[count]; if (u < 128) s2[count] = (char)u; else return 0; } return count; case NSISOLatin1StringEncoding: case NSUnicodeStringEncoding: for (count = 0; count < size; count++) { u = u1[count]; if (u < 256) s2[count] = (char)u; else return 0; } return count; case NSNEXTSTEPStringEncoding: for (count = 0; count < size; count++) { u = u1[count]; if (u < (unichar)Next_conv_base) s2[count] = (char)u; else { int res; int i = 0; while (((res = u - Next_uni_to_char_table[i++].from) > 0) && (i < Next_uni_to_char_table_size)); if (!res) s2[count] = Next_uni_to_char_table[--i].to; else return 0; } } return count; case NSCyrillicStringEncoding: for (count = 0; count < size; count++) { u = u1[count]; if (u < (unichar)Cyrillic_conv_base) s2[count] = (char)u; else { int res; int i = 0; while (((res = u - Cyrillic_uni_to_char_table[i++].from) > 0) && (i < Cyrillic_uni_to_char_table_size)); if (!res) s2[count] = Cyrillic_uni_to_char_table[--i].to; else return 0; } } return count; case NSISOLatin2StringEncoding: for (count = 0; count < size; count++) { u = u1[count]; if (u < (unichar)Latin2_conv_base) s2[count] = (char)u; else { int res; int i = 0; while (((res = u - Latin2_uni_to_char_table[i++].from) > 0) && (i < Latin2_uni_to_char_table_size)); if (!res) s2[count] = Latin2_uni_to_char_table[--i].to; else return 0; } } return count; #if 0 case NSSymbolStringEncoding: for (count = 0; count < size; count++) { u = u1[count]; if (u < (unichar)Symbol_conv_base) s2[count] = (char)u; else { int res; int i = 0; while (((res = u - Symbol_uni_to_char_table[i++].from) > 0) && (i < Symbol_uni_to_char_table_size)); if (!res) s2[count] = Symbol_uni_to_char_table[--i].to; else return 0; } } return count; #endif default: #ifdef HAVE_ICONV return iconv_ustrtostr(s2, size, u1, size, enc); #else return 0; #endif } } unichar uni_tolower(unichar ch) { int res; int count = 0; while (((res = ch - t_tolower[count++][0]) > 0) && (count < t_len_tolower)); return res ? ch : t_tolower[--count][1]; } unichar uni_toupper(unichar ch) { int res; int count = 0; while (((res = ch - t_toupper[count++][0]) > 0) && (count < t_len_toupper)); return res ? ch : t_toupper[--count][1]; } unsigned char uni_cop(unichar u) { unichar count, first, last, comp; BOOL notfound; first = 0; last = uni_cop_table_size; notfound = YES; count = 0; if (u > (unichar)0x0080) // no nonspacing in ascii { while (notfound && (first <= last)) { if (first != last) { count = (first + last) / 2; comp = uni_cop_table[count].code; if (comp < u) { first = count+1; } else { if (comp > u) last = count-1; else notfound = NO; } } else /* first == last */ { if (u == uni_cop_table[first].code) return uni_cop_table[first].cop; return 0; } /* else */ } /* while notfound ...*/ return notfound ? 0 : uni_cop_table[count].cop; } else /* u is ascii */ return 0; } BOOL uni_isnonsp(unichar u) { // check is uni_cop good for this if (uni_cop(u)) return YES; else return NO; } unichar* uni_is_decomp(unichar u) { unichar count, first, last, comp; BOOL notfound; first = 0; last = uni_dec_table_size; notfound = YES; count = 0; if (u > (unichar)0x0080) // no composites in ascii { while (notfound && (first <= last)) { if (!(first == last)) { count = (first + last) / 2; comp = uni_dec_table[count].code; if (comp < u) first = count+1; else { if (comp > u) last = count-1; else notfound = NO; } } else /* first == last */ { if (u == uni_dec_table[first].code) return uni_dec_table[first].decomp; return 0; } /* else */ } /* while notfound ...*/ return notfound ? 0 : uni_dec_table[count].decomp; } else /* u is ascii */ return 0; }