From 670d202b1a3393da44aeecf3da49e47fb9f94676 Mon Sep 17 00:00:00 2001 From: Richard Frith-MacDonald Date: Thu, 30 Jun 2016 14:21:32 +0000 Subject: [PATCH] ICU string access rewrite git-svn-id: svn+ssh://svn.gna.org/svn/gnustep/libs/base/trunk@39951 72102866-910b-0410-8b05-ffd578937521 --- ChangeLog | 10 +++++ Source/GSICUString.m | 97 +++++++++++++++++++++++++++++--------------- 2 files changed, 75 insertions(+), 32 deletions(-) diff --git a/ChangeLog b/ChangeLog index 4c4403d90..4dbdb3989 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,13 @@ +2016-06-30 Richard Frith-Macdonald + + * Source/GSICUString.m: Re-implement the function to let ICU access + the contents of an NSString. The original version looks like it was + broken for cases where an algorithm is moving backwards in the string + (reverse search and regular expression parsing). + I hope the new implementation is correct (or at least clearer); the + ICU documentation is minimal and I didn't find any reference/example + implementations to work from. + 2016-06-28 Richard Frith-Macdonald * Source/NSDebug.m: Yse setjmp/longjmp from NSException.h diff --git a/Source/GSICUString.m b/Source/GSICUString.m index 201557dca..34c809168 100644 --- a/Source/GSICUString.m +++ b/Source/GSICUString.m @@ -53,48 +53,81 @@ UTextNSStringAccess(UText *ut, int64_t nativeIndex, UBool forward) { NSString *str = (NSString*)ut->p; NSUInteger length = [str length]; + NSUInteger nativeStart = ut->chunkNativeStart; + NSUInteger nativeLimit = ut->chunkNativeLimit; NSRange r; - if (nativeIndex >= length) - { - return FALSE; - } - - /* Special case if the chunk already contains this index - */ - if (nativeIndex >= ut->chunkNativeStart - && nativeIndex < (ut->chunkNativeStart + ut->chunkLength)) - { - ut->chunkOffset = nativeIndex - ut->chunkNativeStart; - return TRUE; - } - r = NSMakeRange(nativeIndex, chunkSize); - forward = TRUE; if (forward) { - if (nativeIndex + chunkSize > length) - { - r.length = length - nativeIndex; - } + if (nativeIndex < nativeLimit && nativeIndex >= ut->chunkNativeStart) + { + /* The chunk already contains the index, set the offset + * to match it. + */ + ut->chunkOffset = nativeIndex - ut->chunkNativeStart; + return TRUE; + } + + if (nativeIndex >= length && nativeLimit >= length) + { + /* Asking for a position beyond the end of the string; + * Limit it to point just after the last character. + */ + ut->chunkOffset = ut->chunkLength; + return FALSE; + } + + /* Set up to fill the chunk with characters from the string + * and to start at the beginning of that buffer. + */ + nativeStart = nativeIndex; + nativeLimit = nativeIndex + chunkSize; + if (nativeLimit > length) + { + nativeLimit = length; + } + r.location = nativeIndex; + r.length = nativeLimit - nativeIndex; + ut->chunkOffset = 0; } else { - if (nativeIndex - chunkSize > 0) - { - r.location = nativeIndex - chunkSize; - r.length = chunkSize; - } - else - { - r.location = 0; - r.length = chunkSize - nativeIndex; - } + if (nativeIndex <= nativeLimit && nativeIndex > ut->chunkNativeStart) + { + /* The chunk already contains the index, set the offset + * to match it. + */ + ut->chunkOffset = nativeIndex - ut->chunkNativeStart; + return TRUE; + } + + if (nativeIndex <= 0 && nativeStart <= 0) + { + /* Asking for a position beyond the start of the string; + * Limit it to position of the first character. + */ + ut->chunkOffset = 0; + return FALSE; + } + + nativeLimit = nativeIndex; + if (nativeLimit > length) + { + nativeLimit = length; + } + nativeStart = nativeLimit - chunkSize; + if (nativeStart < 0) + { + nativeStart = 0; + } + r.location = nativeStart; + r.length = nativeLimit - nativeStart; + ut->chunkOffset = r.length; } [str getCharacters: ut->pExtra range: r]; - ut->chunkNativeStart = r.location; - ut->chunkNativeLimit = r.location + r.length; + ut->chunkNativeLimit = nativeLimit; + ut->chunkNativeStart = nativeStart; ut->chunkLength = r.length; - ut->chunkOffset = 0; return TRUE; }