diff --git a/Source/NSScanner.m b/Source/NSScanner.m index bc8e47176..c6cb9ac7f 100644 --- a/Source/NSScanner.m +++ b/Source/NSScanner.m @@ -55,6 +55,8 @@ #import "Foundation/NSException.h" #import "Foundation/NSUserDefaults.h" +#import "GSPThread.h" + #import "GSPrivate.h" @@ -79,6 +81,13 @@ static NSCharacterSet *defaultSkipSet; static SEL memSel; static NSStringEncoding internalEncoding = NSISOLatin1StringEncoding; +/* Table of binary powers of 10 represented by bits in a byte. + * Used to convert decimal integer exponents to doubles. + */ +static double powersOf10[] = { + 1.0e1, 1.0e2, 1.0e4, 1.0e8, 1.0e16, 1.0e32, 1.0e64, 1.0e128, 1.0e256 +}; + static inline unichar myGetC(unsigned char c) { unsigned int size = 1; @@ -96,9 +105,14 @@ static inline unichar myGetC(unsigned char c) */ typedef GSString *ivars; #define myLength() (((ivars)_string)->_count) -#define myUnicode(I) (((ivars)_string)->_contents.u[I]) +#define myByte(I) (((ivars)_string)->_contents.c[I]) +#define myUnichar(I) (((ivars)_string)->_contents.u[I]) #define myChar(I) myGetC((((ivars)_string)->_contents.c[I])) -#define myCharacter(I) (_isUnicode ? myUnicode(I) : myChar(I)) +#define myCharacter(I) (_isUnicode ? myUnichar(I) : myChar(I)) +/* Macro for getting character values when we do not care about values + * outside the ASCII range (other than to know they are outside the range). + */ +#define mySevenBit(I) (_isUnicode ? myUnichar(I) : myByte(I)) /* * Scan characters to be skipped. @@ -113,7 +127,6 @@ typedef GSString *ivars; (_scanLocation >= myLength()) ? NO : YES;\ }) -BOOL GSScanDouble(unichar *buf, unsigned length, double *result); /** *
@@ -183,6 +196,66 @@ BOOL GSScanDouble(unichar *buf, unsigned length, double *result);
return scanner;
}
+- (BOOL) _setString: (NSString*)aString
+{
+ _scanLocation = 0;
+ if (aString != _string)
+ {
+ Class c = object_getClass(aString);
+
+ DESTROY(_string);
+ if (GSObjCIsKindOf(c, GSMutableStringClass) == YES)
+ {
+ _string = [_holder initWithString: aString];
+ }
+ else if (GSObjCIsKindOf(c, GSUnicodeStringClass) == YES)
+ {
+ _string = RETAIN(aString);
+ }
+ else if (GSObjCIsKindOf(c, GSCStringClass) == YES)
+ {
+ _string = RETAIN(aString);
+ }
+ else if ([aString isKindOfClass: NSStringClass])
+ {
+ _string = [_holder initWithString: aString];
+ }
+ else
+ {
+ NSLog(@"Scanner initialised with something not a string");
+ return NO;
+ }
+ c = object_getClass(_string);
+ if (GSObjCIsKindOf(c, GSUnicodeStringClass) == YES)
+ {
+ _isUnicode = YES;
+ }
+ }
+ return YES;
+}
+
+/** Used by NSString/GSString to avoid creating/destroying a new scanner
+ * every time we want to scan a double.
+ */
++ (BOOL) _scanDouble: (double*)value from: (NSString*)str
+{
+ static pthread_mutex_t myLock = PTHREAD_MUTEX_INITIALIZER;
+ static NSScanner *doubleScanner = nil;
+ BOOL ok = NO;
+
+ pthread_mutex_lock(&myLock);
+ if (nil == doubleScanner)
+ {
+ doubleScanner = [[self alloc] initWithString: @""];
+ }
+ if ([doubleScanner _setString: str])
+ {
+ ok = [doubleScanner scanDouble: value];
+ }
+ pthread_mutex_unlock(&myLock);
+ return ok;
+}
+
/**
* Initialises the scanner to scan aString. The GNUstep
* implementation may make an internal copy of the original
@@ -194,49 +267,24 @@ BOOL GSScanDouble(unichar *buf, unsigned length, double *result);
*/
- (id) initWithString: (NSString *)aString
{
- Class c;
-
- if ((self = [super init]) == nil)
- return nil;
- /*
- * Ensure that we have a known string so we can access its internals directly.
- */
- if (aString == nil)
+ if ((self = [super init]) != nil)
{
- NSLog(@"Scanner initialised with nil string");
- aString = @"";
+ /* Ensure that we have a known string so we can access
+ * its internals directly.
+ */
+ if (aString == nil)
+ {
+ NSLog(@"Scanner initialised with nil string");
+ aString = @"";
+ }
+ if (NO == [self _setString: aString])
+ {
+ DESTROY(self);
+ return nil;
+ }
+ [self setCharactersToBeSkipped: defaultSkipSet];
+ _decimal = '.';
}
-
- c = object_getClass(aString);
- if (GSObjCIsKindOf(c, GSMutableStringClass) == YES)
- {
- _string = [_holder initWithString: aString];
- }
- else if (GSObjCIsKindOf(c, GSUnicodeStringClass) == YES)
- {
- _string = RETAIN(aString);
- }
- else if (GSObjCIsKindOf(c, GSCStringClass) == YES)
- {
- _string = RETAIN(aString);
- }
- else if ([aString isKindOfClass: NSStringClass])
- {
- _string = [_holder initWithString: aString];
- }
- else
- {
- DESTROY(self);
- NSLog(@"Scanner initialised with something not a string");
- return nil;
- }
- c = object_getClass(_string);
- if (GSObjCIsKindOf(c, GSUnicodeStringClass) == YES)
- {
- _isUnicode = YES;
- }
- [self setCharactersToBeSkipped: defaultSkipSet];
- _decimal = '.';
return self;
}
@@ -789,11 +837,21 @@ BOOL GSScanDouble(unichar *buf, unsigned length, double *result);
*/
- (BOOL) scanDouble: (double *)value
{
- unichar buf[2000];
- unsigned pos = 0;
unichar c = 0;
- BOOL got_dot = NO;
- BOOL digits = 0;
+ char mantissa[20];
+ char *ptr;
+ double *d;
+ double result;
+ double e;
+ int exponent = 0;
+ BOOL negativeMantissa = NO;
+ BOOL negativeExponent = NO;
+ unsigned shift = 0;
+ int mantissaLength;
+ int dotPos = -1;
+ int hi = 0;
+ int lo = 0;
+ BOOL mantissaDigit = NO;
unsigned int saveScanLocation = _scanLocation;
/* Skip whitespace */
@@ -806,90 +864,190 @@ BOOL GSScanDouble(unichar *buf, unsigned length, double *result);
/* Check for sign */
if (_scanLocation < myLength())
{
- switch (myCharacter(_scanLocation))
+ switch (mySevenBit(_scanLocation))
{
case '+':
_scanLocation++;
break;
case '-':
_scanLocation++;
- buf[pos++] = '-';
+ negativeMantissa = YES;
break;
}
}
-
- while (_scanLocation < myLength() && pos < 1050)
- {
- c = myCharacter(_scanLocation);
- if ((c >= '0') && (c <= '9'))
- {
- digits++;
- buf[pos++] = c;
- }
- else if (!got_dot && (c == _decimal))
- {
- got_dot = YES;
- buf[pos++] = '.';
- }
- else
- {
- /* Any other character terminates the number. */
- break;
- }
- _scanLocation++;
- }
- if (0 == digits)
+ if (_scanLocation >= myLength())
{
_scanLocation = saveScanLocation;
return NO;
}
- /* Check for trailing exponent */
- if ((_scanLocation < myLength()) && ((c == 'e') || (c == 'E')))
+ /* Now we build up the mantissa digits. Leading zeros are ignored, but
+ * those after the decimal point are counted in order to adjust the
+ * exponent later.
+ * Excess digits are also ignored ... a double can only handle up to 18
+ * digits of precision.
+ */
+ for (mantissaLength = 0; _scanLocation < myLength(); _scanLocation++)
+ {
+ c = mySevenBit(_scanLocation);
+ if (c < '0' || c > '9')
+ {
+ if (_decimal != c || dotPos >= 0)
+ {
+ break; // End of mantissa
+ }
+ dotPos = mantissaLength;
+ }
+ else
+ {
+ mantissaDigit = YES;
+ if (0 == mantissaLength && '0' == c)
+ {
+ if (dotPos >= 0)
+ {
+ shift++; // Leading zero after decimal place
+ }
+ }
+ else if (mantissaLength < 19)
+ {
+ mantissa[mantissaLength++] = c;
+ }
+ }
+ }
+ if (NO == mantissaDigit)
+ {
+ _scanLocation = saveScanLocation;
+ return NO;
+ }
+ if (mantissaLength > 18)
+ {
+ /* Mantissa too long ... ignore excess.
+ */
+ mantissaLength = 18;
+ }
+ if (dotPos < 0)
+ {
+ dotPos = mantissaLength;
+ }
+ dotPos -= mantissaLength;
+
+ /* Convert mantissa characters to a double value
+ */
+ for (ptr = mantissa; mantissaLength > 9; mantissaLength -= 1)
+ {
+ c = *ptr;
+ ptr += 1;
+ hi = hi * 10 + (c - '0');
+ }
+ for (; mantissaLength > 0; mantissaLength -= 1)
+ {
+ c = *ptr;
+ ptr += 1;
+ lo = lo * 10 + (c - '0');
+ }
+ result = (1.0e9 * hi) + lo;
+
+ /* Scan the exponent (if any)
+ */
+ if (_scanLocation < myLength()
+ && ((c = mySevenBit(_scanLocation)) == 'e' || c == 'E'))
{
unsigned saveExpLoc = _scanLocation;
- unsigned saveExpPos = pos;
- BOOL got_exp = NO;
- buf[pos++] = c;
- _scanLocation++;
- if (_scanLocation < myLength())
+ _scanLocation++; // Step past E/e
+ if (_scanLocation >= myLength())
{
- switch (myCharacter(_scanLocation))
+ _scanLocation = saveExpLoc; // No exponent
+ }
+ else
+ {
+ switch (mySevenBit(_scanLocation))
{
case '+':
_scanLocation++;
break;
case '-':
_scanLocation++;
- buf[pos++] = '-';
+ negativeExponent = YES;
break;
}
- }
- while (_scanLocation < myLength() && pos < 1060)
- {
- c = myCharacter(_scanLocation);
- if ((c < '0') || (c > '9'))
+ if (_scanLocation >= myLength()
+ || (c = mySevenBit(_scanLocation)) < '0' || c > '9')
{
- break;
+ _scanLocation = saveExpLoc; // No exponent
+ }
+ else
+ {
+ exponent = c - '0';
+ _scanLocation++;
+ while (_scanLocation < myLength()
+ && (c = mySevenBit(_scanLocation)) >= '0' && c <= '9')
+ {
+ exponent = exponent * 10 + (c - '0');
+ _scanLocation++;
+ }
}
- got_exp = YES;
- _scanLocation++;
- buf[pos++] = c;
- }
- if (!got_exp)
- {
- /* No exponent found: the e/E terminated the number
- */
- _scanLocation = saveExpLoc;
- pos = saveExpPos;
}
}
- if (NO == GSScanDouble(buf, pos, value))
+ /* Add in the amount to shift the exponent depending on the position
+ * of the decimal point in the mantissa and check the adjusted sign
+ * of the exponent.
+ */
+ if (YES == negativeExponent)
+ {
+ exponent = dotPos - exponent;
+ }
+ else
+ {
+ exponent = dotPos + exponent;
+ }
+ exponent -= shift;
+ if (exponent < 0)
+ {
+ negativeExponent = YES;
+ exponent = -exponent;
+ }
+ else
+ {
+ negativeExponent = NO;
+ }
+ if (exponent > 511)
{
_scanLocation = saveScanLocation;
- return NO;
+ return NO; // Maximum exponent exceeded
+ }
+
+ /* Convert the exponent to a double then apply it to the value from
+ * the mantissa.
+ */
+ e = 1.0;
+ for (d = powersOf10; exponent != 0; exponent >>= 1, d += 1)
+ {
+ if (exponent & 1)
+ {
+ e *= *d;
+ }
+ }
+ if (YES == negativeExponent)
+ {
+ result /= e;
+ }
+ else
+ {
+ result *= e;
+ }
+
+ if (0 != result)
+ {
+ if (YES == negativeMantissa)
+ {
+ *value = -result;
+ }
+ else
+ {
+ *value = result;
+ }
}
return YES;
}
@@ -948,7 +1106,7 @@ BOOL GSScanDouble(unichar *buf, unsigned length, double *result);
{
while (_scanLocation < myLength())
{
- if ((*memImp)(aSet, memSel, myUnicode(_scanLocation)) == NO)
+ if ((*memImp)(aSet, memSel, myUnichar(_scanLocation)) == NO)
break;
_scanLocation++;
}
@@ -1007,7 +1165,7 @@ BOOL GSScanDouble(unichar *buf, unsigned length, double *result);
{
while (_scanLocation < myLength())
{
- if ((*memImp)(aSet, memSel, myUnicode(_scanLocation)) == YES)
+ if ((*memImp)(aSet, memSel, myUnichar(_scanLocation)) == YES)
break;
_scanLocation++;
}
@@ -1285,287 +1443,3 @@ BOOL GSScanDouble(unichar *buf, unsigned length, double *result);
}
@end
-/*
- * Some utilities
- */
-BOOL
-GSScanInt(unichar *buf, unsigned length, int *result)
-{
- unsigned int num = 0;
- const unsigned int limit = UINT_MAX / 10;
- BOOL negative = NO;
- BOOL overflow = NO;
- BOOL got_digits = NO;
- unsigned int pos = 0;
-
- /* Check for sign */
- if (pos < length)
- {
- switch (buf[pos])
- {
- case '+':
- pos++;
- break;
- case '-':
- negative = YES;
- pos++;
- break;
- }
- }
-
- /* Process digits */
- while (pos < length)
- {
- unichar digit = buf[pos];
-
- if ((digit < '0') || (digit > '9'))
- break;
- if (!overflow)
- {
- if (num >= limit)
- overflow = YES;
- else
- num = num * 10 + (digit - '0');
- }
- pos++;
- got_digits = YES;
- }
-
- /* Save result */
- if (!got_digits)
- {
- return NO;
- }
- if (result)
- {
- if (overflow
- || (num > (negative ? (NSUInteger)INT_MIN : (NSUInteger)INT_MAX)))
- *result = negative ? INT_MIN: INT_MAX;
- else if (negative)
- *result = -num;
- else
- *result = num;
- }
- return YES;
-}
-
-/* Table of binary powers of 10 represented by bits in a byte.
- * Used to convert decimal integer exponents to doubles.
- */
-static double powersOf10[] = {
- 1.0e1, 1.0e2, 1.0e4, 1.0e8, 1.0e16, 1.0e32, 1.0e64, 1.0e128, 1.0e256
-};
-
-/**
- * Scan in a double value in the standard locale ('.' as decimal point).
- * Return YES on success, NO on failure.
- * The value pointed to by result is unmodified on failure.
- * No value is returned in result if it is a null pointer.
- */
-BOOL
-GSScanDouble(unichar *buf, unsigned length, double *result)
-{
- unichar c = 0;
- char mantissa[20];
- const char *ptr;
- double *d;
- double value;
- double e;
- int exponent = 0;
- BOOL negativeMantissa = NO;
- BOOL negativeExponent = NO;
- unsigned shift = 0;
- unsigned pos = 0;
- int mantissaLength;
- int dotPos = -1;
- int hi = 0;
- int lo = 0;
-
- /* Skip whitespace */
- while (pos < length && isspace((int)buf[pos]))
- {
- pos++;
- }
- if (pos >= length)
- {
- return NO;
- }
-
- /* Check for sign */
- switch (buf[pos])
- {
- case '+':
- pos++;
- break;
- case '-':
- negativeMantissa = YES;
- pos++;
- break;
- }
- if (pos >= length)
- {
- return NO;
- }
-
- /* Scan the mantissa ... at most 18 digits and a decimal point.
- */
- for (mantissaLength = 0; pos < length && mantissaLength < 19; pos++)
- {
- mantissa[mantissaLength] = c = buf[pos];
- if (!isdigit(c))
- {
- if ('.' != c || dotPos >= 0)
- {
- break; // End of mantissa
- }
- dotPos = mantissaLength;
- }
- else
- {
- mantissaLength++;
- }
- if (19 == mantissaLength)
- {
- if (dotPos > 0 && '0' == mantissa[0])
- {
- dotPos--;
- mantissaLength--;
- memmove(mantissa, mantissa + 1, 18);
- }
- else if (0 == dotPos && '0' == mantissa[1])
- {
- mantissaLength--;
- shift++;
- memmove(mantissa + 1, mantissa + 2, 17);
- }
- }
- }
- if (0 == mantissaLength)
- {
- return NO; // No mantissa ... not a double
- }
- if (mantissaLength > 18)
- {
- /* Mantissa too long ... ignore excess.
- */
- mantissaLength = 18;
- }
- if (dotPos < 0)
- {
- dotPos = mantissaLength;
- }
- dotPos -= mantissaLength; // Exponent offset for decimal point
-
- /* Convert mantissa characters to a double value
- */
- for (ptr = mantissa; mantissaLength > 9; mantissaLength -= 1)
- {
- c = *ptr;
- ptr += 1;
- hi = hi * 10 + (c - '0');
- }
- for (; mantissaLength > 0; mantissaLength -= 1)
- {
- c = *ptr;
- ptr += 1;
- lo = lo * 10 + (c - '0');
- }
- value = (1.0e9 * hi) + lo;
-
- /* Scan the exponent (if any)
- */
- if (pos < length && ('E' == (c = buf[pos]) || 'e' == c))
- {
- if (++pos >= length)
- {
- return NO; // Missing exponent
- }
- c = buf[pos];
- if ('-' == c)
- {
- negativeExponent = YES;
- if (++pos >= length)
- {
- return NO; // Missing exponent
- }
- c = buf[pos];
- }
- else if ('+' == c)
- {
- if (++pos >= length)
- {
- return NO; // Missing exponent
- }
- c = buf[pos];
- }
- while (isdigit(c))
- {
- exponent = exponent * 10 + (c - '0');
- if (++pos >= length)
- {
- break;
- }
- c = buf[pos];
- }
- }
-
- /* Add in the amount to shift the exponent depending on the position
- * of the decimal point in the mantissa and check the adjusted sign
- * of the exponent.
- */
- if (YES == negativeExponent)
- {
- exponent = dotPos - exponent;
- }
- else
- {
- exponent = dotPos + exponent;
- }
- exponent -= shift;
- if (exponent < 0)
- {
- negativeExponent = YES;
- exponent = -exponent;
- }
- else
- {
- negativeExponent = NO;
- }
- if (exponent > 511)
- {
- return NO; // Maximum exponent exceeded
- }
-
- /* Convert the exponent to a double then apply it to the value from
- * the mantissa.
- */
- e = 1.0;
- for (d = powersOf10; exponent != 0; exponent >>= 1, d += 1)
- {
- if (exponent & 1)
- {
- e *= *d;
- }
- }
- if (YES == negativeExponent)
- {
- value /= e;
- }
- else
- {
- value *= e;
- }
-
- if (0 != result)
- {
- if (YES == negativeMantissa)
- {
- *result = -value;
- }
- else
- {
- *result = value;
- }
- }
- return YES;
-}