From d1254419852d1816e204d98b081884fe31c6ea07 Mon Sep 17 00:00:00 2001 From: rfm Date: Sat, 6 Jun 2009 15:52:11 +0000 Subject: [PATCH] Add some optimisation for converting from unicode to latin1 or ascii ... move as much as possible outside the loop iterating over the characters. git-svn-id: svn+ssh://svn.gna.org/svn/gnustep/libs/base/trunk@28332 72102866-910b-0410-8b05-ffd578937521 --- ChangeLog | 5 ++ Source/Additions/Unicode.m | 130 ++++++++++++++++++++++++++++--------- 2 files changed, 104 insertions(+), 31 deletions(-) diff --git a/ChangeLog b/ChangeLog index a31b4b2d2..204eb4444 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,8 @@ +2009-06-06 Richard Frith-Macdonald + + * Source/Additions/Unicode.m: Optimise case where we are converting + from unicode to latin1 or ascii. + 2009-06-06 Wolfgang Lux * Source/GSFFIInvocation.m diff --git a/Source/Additions/Unicode.m b/Source/Additions/Unicode.m index c628d95c3..c16971391 100644 --- a/Source/Additions/Unicode.m +++ b/Source/Additions/Unicode.m @@ -1922,53 +1922,121 @@ GSFromUnicode(unsigned char **dst, unsigned int *size, const unichar *src, goto bases; bases: - if (strict == NO) + if (dst == 0) { - while (spos < slen) + /* Just counting bytes, and we know there is exactly one + * unicode codepoint needed for each character. + */ + dpos = slen; + } + else + { + /* Because we know that each ascii chartacter is exactly + * one unicode character, we can check the destination + * buffer size and allocate more space in one go, before + * entering the loop where we deal with each character. + */ + if (slen > bsize) { - unichar u = src[spos++]; - - if (swapped == YES) + if (zone == 0) { - u = (((u & 0xff00) >> 8) + ((u & 0x00ff) << 8)); - } - - if (dpos >= bsize) - { - GROW(); - } - if (u < base) - { - ptr[dpos++] = (unsigned char)u; + result = NO; /* No buffer growth possible ... fail. */ + goto done; } else { - ptr[dpos++] = '?'; + uint8_t *tmp; + +#if GS_WITH_GC + tmp = NSAllocateCollectable(slen, 0); +#else + tmp = NSZoneMalloc(zone, slen); + if (ptr != buf) + { + NSZoneFree(zone, ptr); + } +#endif + ptr = tmp; + if (ptr == 0) + { + result = NO; /* Not enough memory */ + break; + } + bsize = slen; + } + } + } + if (strict == NO) + { + if (swapped == YES) + { + while (spos < slen) + { + unichar u = src[spos++]; + + u = (((u & 0xff00) >> 8) + ((u & 0x00ff) << 8)); + if (u < base) + { + ptr[dpos++] = (unsigned char)u; + } + else + { + ptr[dpos++] = '?'; + } + } + } + else + { + while (spos < slen) + { + unichar u = src[spos++]; + + if (u < base) + { + ptr[dpos++] = (unsigned char)u; + } + else + { + ptr[dpos++] = '?'; + } } } } else { - while (spos < slen) + if (swapped == YES) { - unichar u = src[spos++]; + while (spos < slen) + { + unichar u = src[spos++]; - if (swapped == YES) - { u = (((u & 0xff00) >> 8) + ((u & 0x00ff) << 8)); + if (u < base) + { + ptr[dpos++] = (unsigned char)u; + } + else + { + result = NO; + goto done; + } } - if (dpos >= bsize) + } + else + { + while (spos < slen) { - GROW(); - } - if (u < base) - { - ptr[dpos++] = (unsigned char)u; - } - else - { - result = NO; - goto done; + unichar u = src[spos++]; + + if (u < base) + { + ptr[dpos++] = (unsigned char)u; + } + else + { + result = NO; + goto done; + } } } }