mirror of
https://github.com/gnustep/libs-base.git
synced 2025-04-22 16:33:29 +00:00
Minor unicode range handling improvements
This commit is contained in:
parent
7bf1179f60
commit
7274cbaa55
4 changed files with 43 additions and 29 deletions
10
ChangeLog
10
ChangeLog
|
@ -1,3 +1,13 @@
|
|||
2018-04-09 Richard Frith-Macdonald <rfm@gnu.org>
|
||||
|
||||
* Source/Additions/Unicode.m:
|
||||
* Source/NSString.m:
|
||||
Move uni_isnonsp() to NSString.m and make it use nonBaseCharacterSet
|
||||
so that it correctly copes with both surrogate pairs and traditional
|
||||
composed character sequences. NB. David points out that this is not
|
||||
full/correct unicode grapheme cluster handling (it's the main part
|
||||
of the handling for 'legacy' grapheme clusters).
|
||||
|
||||
2018-04-04 Richard Frith-Macdonald <rfm@gnu.org>
|
||||
|
||||
* Headers/Foundation/NSLock.h:
|
||||
|
|
|
@ -652,22 +652,7 @@ uni_cop(unichar u)
|
|||
return GSPrivateUniCop(u);
|
||||
}
|
||||
|
||||
BOOL
|
||||
uni_isnonsp(unichar u)
|
||||
{
|
||||
/*
|
||||
* Treating upper surrogates as non-spacing is a convenient solution
|
||||
* to a number of issues with UTF-16
|
||||
*/
|
||||
if ((u >= 0xdc00) && (u <= 0xdfff))
|
||||
return YES;
|
||||
|
||||
// FIXME check is uni_cop good for this
|
||||
if (GSPrivateUniCop(u))
|
||||
return YES;
|
||||
else
|
||||
return NO;
|
||||
}
|
||||
// uni_isnonsp(unichar u) now implemented in NSString.m
|
||||
|
||||
unichar*
|
||||
uni_is_decomp(unichar u)
|
||||
|
|
|
@ -1,4 +1,7 @@
|
|||
/* COP table */
|
||||
/* COP table
|
||||
* This records diacriticals and their copmbining class
|
||||
* FIXME ... needs updating to latest unicode
|
||||
*/
|
||||
/*
|
||||
Copyright (C) 2005 Free Software Foundation
|
||||
|
||||
|
|
|
@ -151,7 +151,9 @@ static GSPlaceholderString *defaultPlaceholderString;
|
|||
static NSMapTable *placeholderMap;
|
||||
static NSLock *placeholderLock;
|
||||
|
||||
static SEL cMemberSel = 0;
|
||||
static SEL cMemberSel = 0;
|
||||
static NSCharacterSet *nonBase = nil;
|
||||
static BOOL (*nonBaseImp)(id, SEL, unichar) = 0;
|
||||
|
||||
/* Macro to return the receiver if it is already immutable, but an
|
||||
* autoreleased copy otherwise. Used where we have to return an
|
||||
|
@ -196,6 +198,24 @@ static void setupWhitespace(void)
|
|||
}
|
||||
}
|
||||
|
||||
/* A non-spacing character is one which is part of a 'user-perceived character'
|
||||
* where the user perceived character consists of a base character followed
|
||||
* by a sequence of non-spacing characters. Non-spacing characters do not
|
||||
* exist in isolation.
|
||||
* eg. an accented 'a' might be represented as the 'a' followed by the accent.
|
||||
*/
|
||||
inline BOOL
|
||||
uni_isnonsp(unichar u)
|
||||
{
|
||||
/* Treating upper surrogates as non-spacing is a convenient solution
|
||||
* to a number of issues with UTF-16
|
||||
*/
|
||||
if ((u >= 0xdc00) && (u <= 0xdfff))
|
||||
return YES;
|
||||
|
||||
return (*nonBaseImp)(nonBase, cMemberSel, u);
|
||||
}
|
||||
|
||||
/*
|
||||
* Include sequence handling code with instructions to generate search
|
||||
* and compare functions for NSString objects.
|
||||
|
@ -778,6 +798,11 @@ GSICUCollatorOpen(NSStringCompareOptions mask, NSLocale *locale)
|
|||
gcrSel = @selector(getCharacters:range:);
|
||||
ranSel = @selector(rangeOfComposedCharacterSequenceAtIndex:);
|
||||
|
||||
nonBase = [NSCharacterSet nonBaseCharacterSet];
|
||||
nonBase = [NSObject leakAt: &nonBase];
|
||||
nonBaseImp
|
||||
= (BOOL(*)(id,SEL,unichar))[nonBase methodForSelector: cMemberSel];
|
||||
|
||||
_DefaultStringEncoding = GSPrivateDefaultCStringEncoding();
|
||||
_ByteEncodingOk = GSPrivateIsByteEncoding(_DefaultStringEncoding);
|
||||
|
||||
|
@ -2764,9 +2789,6 @@ GSICUCollatorOpen(NSStringCompareOptions mask, NSLocale *locale)
|
|||
*/
|
||||
- (NSRange) rangeOfComposedCharacterSequenceAtIndex: (NSUInteger)anIndex
|
||||
{
|
||||
static NSCharacterSet *nonbase = nil;
|
||||
static SEL nbSel;
|
||||
static BOOL (*nbImp)(id, SEL, unichar) = 0;
|
||||
unsigned start;
|
||||
unsigned end;
|
||||
unsigned length = [self length];
|
||||
|
@ -2778,22 +2800,16 @@ static BOOL (*nbImp)(id, SEL, unichar) = 0;
|
|||
caiImp = (unichar (*)(NSString*,SEL,NSUInteger))
|
||||
[self methodForSelector: caiSel];
|
||||
|
||||
if (nil == nonbase)
|
||||
{
|
||||
nonbase = [[NSCharacterSet nonBaseCharacterSet] retain];
|
||||
nbSel = @selector(characterIsMember:);
|
||||
nbImp = (BOOL(*)(id,SEL,unichar))[nonbase methodForSelector: nbSel];
|
||||
}
|
||||
for (start = anIndex; start > 0; start--)
|
||||
{
|
||||
ch = (*caiImp)(self, caiSel, start);
|
||||
if ((*nbImp)(nonbase, nbSel, ch) == NO)
|
||||
if (uni_isnonsp(ch) == NO)
|
||||
break;
|
||||
}
|
||||
for (end = start+1; end < length; end++)
|
||||
{
|
||||
ch = (*caiImp)(self, caiSel, end);
|
||||
if ((*nbImp)(nonbase, nbSel, ch) == NO)
|
||||
if (uni_isnonsp(ch) == NO)
|
||||
break;
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in a new issue