libs-base/Source/NSJSONSerialization.m

980 lines
24 KiB
Mathematica
Raw Normal View History

/**
* NSJSONSerialization.m. This file provides an implementation of the JSON
* reading and writing APIs introduced with OS X 10.7.
*
* The parser is implemented as a simple recursive parser. The JSON is
* unambiguous, so this requires no read-ahead or backtracking. The source of
* data for the parse can be either a static JSON string or some JSON data.
*/
#import <Foundation/Foundation.h>
#import <GNUstepBase/NSObject+GNUstepBase.h>
#import "GSFastEnumeration.h"
/**
* The number of (unicode) characters to fetch from the source at once.
*/
#define BUFFER_SIZE 64
/**
* Structure for storing the internal state of the parser. An instance of this
* is allocated on the stack, and a copy of it passed down to each parse
* function.
*/
typedef struct ParserStateStruct
{
/**
* The data source. This is either an NSString or an NSStream, depending on
* the source.
*/
id source;
/**
* The length of the byte order mark in the source. 0 if there is no BOM.
*/
int BOMLength;
/**
* The string encoding used in the source.
*/
NSStringEncoding enc;
/**
* Function used to pull the next BUFFER_SIZE characters from the string.
*/
void (*updateBuffer)(struct ParserStateStruct*);
/**
* Buffer used to store the next data from the input stream.
*/
unichar buffer[BUFFER_SIZE];
/**
* The index of the parser within the buffer.
*/
NSUInteger bufferIndex;
/**
* The number of bytes stored within the buffer.
*/
NSUInteger bufferLength;
/**
* The index of the parser within the source.
*/
NSInteger sourceIndex;
/**
* Should the parser construct mutable string objects?
*/
BOOL mutableStrings;
/**
* Should the parser construct mutable containers?
*/
BOOL mutableContainers;
/**
* Error value, if this parser is currently in an error state, nil otherwise.
*/
NSError *error;
} ParserState;
/**
* Pulls the next group of characters from a string source.
*/
static inline void
updateStringBuffer(ParserState* state)
{
NSRange r = {state->sourceIndex, BUFFER_SIZE};
NSUInteger end = [state->source length];
if (end - state->sourceIndex < BUFFER_SIZE)
{
r.length = end - state->sourceIndex;
}
[state->source getCharacters: state->buffer range: r];
state->sourceIndex = r.location;
state->bufferIndex = 0;
state->bufferLength = r.length;
if (r.length == 0)
{
state->buffer[0] = 0;
}
}
static inline void
updateStreamBuffer(ParserState* state)
{
NSInputStream *stream = state->source;
uint8_t *buffer;
NSUInteger length;
NSString *str;
// Discard anything that we've already consumed
while (state->sourceIndex > 0)
{
uint8_t discard[128];
NSUInteger toRead = 128;
NSInteger amountRead;
if (state->sourceIndex < 128)
{
toRead = state->sourceIndex;
}
amountRead = [stream read: discard maxLength: toRead];
/* If something goes wrong with the stream, return the stream
* error as our error.
*/
if (amountRead == 0)
{
state->error = [stream streamError];
state->bufferIndex = 0;
state->bufferLength = 0;
state->buffer[0] = 0;
}
state->sourceIndex -= amountRead;
}
/* Get the temporary buffer. We need to read from here so that we can read
* characters from the stream without advancing the stream position.
* If the stream doesn't do buffering, then we need to get data one character
* at a time.
*/
if (![stream getBuffer: &buffer length: &length])
{
uint8_t bytes[7] = { 0 };
switch (state->enc)
{
case NSUTF8StringEncoding:
{
int i = -1;
// Read one UTF8 character from the stream
do
{
[stream read: &bytes[++i] maxLength: 1];
}
while (bytes[i] & 0xf);
str = [[NSString alloc] initWithUTF8String: (char*)bytes];
[str getCharacters: state->buffer range: NSMakeRange(0,1)];
[str release];
break;
}
case NSUTF32LittleEndianStringEncoding:
{
[stream read: bytes maxLength: 4];
state->buffer[0] = (unichar)NSSwapLittleIntToHost
(*(unsigned int*)(void*)bytes);
break;
}
case NSUTF32BigEndianStringEncoding:
{
[stream read: bytes maxLength: 4];
state->buffer[0] = (unichar)NSSwapBigIntToHost
(*(unsigned int*)(void*)bytes);
break;
}
case NSUTF16LittleEndianStringEncoding:
{
[stream read: bytes maxLength: 2];
state->buffer[0] = (unichar)NSSwapLittleShortToHost
(*(unsigned short*)(void*)bytes);
break;
}
case NSUTF16BigEndianStringEncoding:
{
[stream read: bytes maxLength: 4];
state->buffer[0] = (unichar)NSSwapBigShortToHost
(*(unsigned short*)(void*)bytes);
break;
}
default:
GS_UNREACHABLE();
}
// Set the source index to -1 so it will be 0 when we've finished with it
state->sourceIndex = -1;
state->bufferIndex = 0;
state->bufferLength = 1;
}
// Use an NSString to do the character set conversion. We could do this more
// efficiently. We could also reuse the string.
str = [[NSString alloc] initWithBytesNoCopy: buffer
length: length
encoding: state->enc
freeWhenDone: NO];
// Just use the string buffer fetch function to actually get the data
state->source = str;
updateStringBuffer(state);
state->source = stream;
}
/**
* Returns the current character.
*/
static inline unichar
currentChar(ParserState *state)
{
if (state->bufferIndex >= state->bufferLength)
{
state->updateBuffer(state);
}
return state->buffer[state->bufferIndex];
}
/**
* Consumes a character.
*/
static inline unichar
consumeChar(ParserState *state)
{
state->sourceIndex++;
state->bufferIndex++;
if (state->bufferIndex >= BUFFER_SIZE)
{
state->updateBuffer(state);
}
return currentChar(state);
}
/**
* Consumes all whitespace characters and returns the first non-space
* character. Returns 0 if we're past the end of the input.
*/
static inline unichar
consumeSpace(ParserState *state)
{
while (isspace(currentChar(state)))
{
consumeChar(state);
}
return currentChar(state);
}
/**
* Sets an error state.
*/
static void
parseError(ParserState *state)
{
/* TODO: Work out what stuff should go in this and probably add them to
* parameters for this function.
*/
NSDictionary *userInfo = [[NSDictionary alloc] initWithObjectsAndKeys:
_(@"JSON Parse error"), NSLocalizedDescriptionKey,
_(([NSString stringWithFormat: @"Unexpected character %c at index %d",
(char)currentChar(state), state->sourceIndex])),
NSLocalizedFailureReasonErrorKey,
nil];
state->error = [NSError errorWithDomain: NSCocoaErrorDomain
code: 0
userInfo: userInfo];
[userInfo release];
}
NS_RETURNS_RETAINED static id parseValue(ParserState *state);
/**
* Parse a string, as defined by RFC4627, section 2.5
*/
NS_RETURNS_RETAINED static NSString*
parseString(ParserState *state)
{
NSMutableString *val = nil;
unichar buffer[64];
int bufferIndex = 0;
unichar next;
if (state->error)
{
return nil;
}
if (currentChar(state) != '"')
{
parseError(state);
return nil;
}
next = consumeChar(state);
while ((next != 0) && (next != '"'))
{
// Unexpected end of stream
if (next == '\\')
{
next = consumeChar(state);
switch (next)
{
// Simple escapes, just ignore the leading '
case '"':
case '\\':
case '/':
break;
// Map to the unicode values specified in RFC4627
case 'b': next = 0x0008; break;
case 'f': next = 0x000c; break;
case 'n': next = 0x000a; break;
case 'r': next = 0x000d; break;
case 't': next = 0x0009; break;
// decode a unicode value from 4 hex digits
case 'u':
{
char hex[5] = {0};
unsigned i;
for (i = 0 ; i < 4 ; i++)
{
next = consumeChar(state);
if (!isxdigit(next))
{
[val release];
parseError(state);
return nil;
}
hex[i] = next;
}
// Parse 4 hex digits and a NULL terminator into a 16-bit
// unicode character ID.
next = (unichar)strtol(hex, 0, 16);
}
}
}
buffer[bufferIndex++] = next;
if (bufferIndex >= 64)
{
NSMutableString *str;
str = [[NSMutableString alloc] initWithCharacters: buffer
length: 64];
if (nil == val)
{
val = str;
}
else
{
[val appendString: str];
[str release];
}
}
next = consumeChar(state);
}
if (bufferIndex > 0)
{
NSMutableString *str;
str = [[NSMutableString alloc] initWithCharacters: buffer
length: bufferIndex];
if (nil == val)
{
val = str;
}
else
{
[val appendString: str];
[str release];
}
}
if (!state->mutableStrings)
{
val = [val makeImmutableCopyOnFail: YES];
}
// Consume the trailing "
consumeChar(state);
return val;
}
/**
* Parses a number, as defined by section 2.4 of the JSON specification.
*/
NS_RETURNS_RETAINED static NSNumber*
parseNumber(ParserState *state)
{
unichar c = currentChar(state);
char numberBuffer[128];
char *number = numberBuffer;
int bufferSize = 128;
int parsedSize = 0;
double num;
// Define a macro to add a character to the buffer, because we'll need to do
// it a lot. This resizes the buffer if required.
#define BUFFER(x) do {\
if (parsedSize == bufferSize)\
{\
bufferSize *= 2;\
if (number == numberBuffer)\
number = malloc(bufferSize);\
else\
number = realloc(number, bufferSize);\
}\
number[parsedSize++] = (char)x; } while (0)
// JSON numbers must start with a - or a digit
if (!(c == '-' || isdigit(c)))
{
parseError(state);
return nil;
}
// digit or -
BUFFER(c);
// Read as many digits as we see
while (isdigit(c = consumeChar(state)))
{
BUFFER(c);
}
// Parse the fractional component, if there is one
if ('.' == c)
{
BUFFER(c);
while (isdigit(c = consumeChar(state)))
{
BUFFER(c);
}
}
// parse the exponent if there is one
if ('e' == tolower(c))
{
BUFFER(c);
c = consumeChar(state);
// The exponent must be a valid number
if (!(c == '-' || c == '+' || isdigit(c)))
{
if (number != numberBuffer)
{
free(number);
}
}
BUFFER(c);
while (isdigit(c = consumeChar(state)))
{
BUFFER(c);
}
}
// Add a null terminator on the buffer.
BUFFER(0);
num = strtod(number, 0);
if (number != numberBuffer)
{
free(number);
}
return [[NSNumber alloc] initWithDouble: num];
#undef BUFFER
}
/**
* Parse an array, as described by section 2.3 of RFC 4627.
*/
NS_RETURNS_RETAINED static NSArray*
parseArray(ParserState *state)
{
unichar c = consumeSpace(state);
NSMutableArray *array;
if (c != '[')
{
parseError(state);
return nil;
}
// Eat the [
consumeChar(state);
array = [NSMutableArray new];
c = consumeSpace(state);
while (c != ']')
{
// If this fails, it will already set the error, so we don't have to.
id obj = parseValue(state);
if (nil == obj)
{
[array release];
return nil;
}
[array addObject: obj];
[obj release];
c = consumeSpace(state);
if (c == ',')
{
consumeChar(state);
c = consumeSpace(state);
}
}
// Eat the trailing ]
consumeChar(state);
if (!state->mutableContainers)
{
array = [array makeImmutableCopyOnFail: YES];
}
return array;
}
NS_RETURNS_RETAINED static NSDictionary*
parseObject(ParserState *state)
{
unichar c = consumeSpace(state);
NSMutableDictionary *dict;
if (c != '{')
{
parseError(state);
return nil;
}
// Eat the {
consumeChar(state);
dict = [NSMutableDictionary new];
c = consumeSpace(state);
while (c != '}')
{
id key = parseString(state);
id obj;
if (nil == key)
{
[dict release];
return nil;
}
c = consumeSpace(state);
if (':' != c)
{
[key release];
[dict release];
parseError(state);
return nil;
}
// Eat the :
consumeChar(state);
obj = parseValue(state);
if (nil == obj)
{
[key release];
[dict release];
return nil;
}
[dict setObject: obj forKey: key];
[key release];
[obj release];
c = consumeSpace(state);
if (c == ',')
{
c = consumeChar(state);
}
c = consumeSpace(state);
}
// Eat the trailing }
consumeChar(state);
if (!state->mutableContainers)
{
dict = [dict makeImmutableCopyOnFail: YES];
}
return dict;
}
/**
* Parses a JSON value, as defined by RFC4627, section 2.1.
*/
NS_RETURNS_RETAINED static id
parseValue(ParserState *state)
{
unichar c;
if (state->error) { return nil; };
c = consumeSpace(state);
// 2.1: A JSON value MUST be an object, array, number, or string, or one of the
// following three literal names:
// false null true
switch (c)
{
case (unichar)'"':
return parseString(state);
case (unichar)'[':
return parseArray(state);
case (unichar)'{':
return parseObject(state);
case (unichar)'-':
case (unichar)'0' ... (unichar)'9':
return parseNumber(state);
// Literal null
case 'n':
{
if ((consumeChar(state) == 'u') &&
(consumeChar(state) == 'l') &&
(consumeChar(state) == 'l'))
{
return [[NSNull null] retain];
}
break;
}
// literal
case 't':
{
if ((consumeChar(state) == 'r') &&
(consumeChar(state) == 'u') &&
(consumeChar(state) == 'e'))
{
return [[NSNumber alloc] initWithBool: YES];
}
break;
}
case 'f':
{
if ((consumeChar(state) == 'a') &&
(consumeChar(state) == 'l') &&
(consumeChar(state) == 's') &&
(consumeChar(state) == 'e'))
{
return [[NSNumber alloc] initWithBool: NO];
}
break;
}
}
parseError(state);
return nil;
}
/**
* We have to autodetect the string encoding. We know that it is some
* unicode encoding, which may or may not contain a BOM. If it contains a
* BOM, then we need to skip that. If it doesn't, then we need to work out
* the encoding from the position of the NULLs. The first two characters are
* guaranteed to be ASCII in a JSON stream, so we can work out the encoding
* from the pattern of NULLs.
*/
static void
getEncoding(const uint8_t BOM[4], ParserState *state)
{
NSStringEncoding enc = NSUTF8StringEncoding;
int BOMLength = 0;
if ((BOM[0] == 0xEF) && (BOM[1] == 0xBB) && (BOM[2] == 0xBF))
{
BOMLength = 3;
}
else if ((BOM[0] == 0xFE) && (BOM[1] == 0xFF))
{
BOMLength = 2;
enc = NSUTF16BigEndianStringEncoding;
}
else if ((BOM[0] == 0xFF) && (BOM[1] == 0xFE))
{
if ((BOM[2] == 0) && (BOM[3] == 0))
{
BOMLength = 4;
enc = NSUTF32LittleEndianStringEncoding;
}
else
{
BOMLength = 2;
enc = NSUTF16LittleEndianStringEncoding;
}
}
else if ((BOM[0] == 0)
&& (BOM[1] == 0)
&& (BOM[2] == 0xFE)
&& (BOM[3] == 0xFF))
{
BOMLength = 4;
enc = NSUTF32BigEndianStringEncoding;
}
else if (BOM[0] == 0)
{
// TODO: Throw an error if this doesn't match one of the patterns
// described in section 3 of RFC4627
if (BOM[1] == 0)
{
enc = NSUTF32BigEndianStringEncoding;
}
else
{
enc = NSUTF16BigEndianStringEncoding;
}
}
else if (BOM[1] == 0)
{
if (BOM[2] == 0)
{
enc = NSUTF32LittleEndianStringEncoding;
}
else
{
enc = NSUTF16LittleEndianStringEncoding;
}
}
state->enc = enc;
state->BOMLength = BOMLength;
}
/**
* Classes that are permitted to be written.
*/
static Class NSNullClass, NSArrayClass, NSStringClass, NSDictionaryClass,
NSNumberClass;
static NSCharacterSet *escapeSet;
static inline void
writeTabs(NSMutableString *output, NSInteger tabs)
{
NSInteger i;
for (i = 0 ; i < tabs ; i++)
{
[output appendString: @"\t"];
}
}
static inline void
writeNewline(NSMutableString *output, NSInteger tabs)
{
if (tabs >= 0)
{
[output appendString: @"\n"];
}
}
static BOOL
writeObject(id obj, NSMutableString *output, NSInteger tabs)
{
if ([obj isKindOfClass: NSArrayClass])
{
BOOL writeComma = NO;
[output appendString: @"["];
FOR_IN(id, o, obj)
if (writeComma)
{
[output appendString: @","];
}
writeComma = YES;
writeNewline(output, tabs);
writeTabs(output, tabs);
writeObject(o, output, tabs + 1);
END_FOR_IN(obj)
writeNewline(output, tabs);
writeTabs(output, tabs);
[output appendString: @"]"];
}
else if ([obj isKindOfClass: NSDictionaryClass])
{
BOOL writeComma = NO;
[output appendString: @"{"];
FOR_IN(id, o, obj)
// Keys in dictionaries must be strings
if (![o isKindOfClass: NSStringClass]) { return NO; }
if (writeComma)
{
[output appendString: @","];
}
writeComma = YES;
writeNewline(output, tabs);
writeTabs(output, tabs);
writeObject(o, output, tabs + 1);
[output appendString: @": "];
writeObject([obj objectForKey: o], output, tabs + 1);
END_FOR_IN(obj)
writeNewline(output, tabs);
writeTabs(output, tabs);
[output appendString: @"}"];
}
else if ([obj isKindOfClass: NSStringClass])
{
NSRange r = [obj rangeOfCharacterFromSet: escapeSet];
if (r.location != NSNotFound)
{
NSMutableString *str = [obj mutableCopy];
NSCharacterSet *controlSet = [NSCharacterSet controlCharacterSet];
[str replaceOccurrencesOfString: @"\\"
withString: @"\\\\"
options: 0
range: NSMakeRange(0, [str length])];
[str replaceOccurrencesOfString: @"\""
withString: @"\\\""
options: 0
range: NSMakeRange(0, [str length])];
r = [str rangeOfCharacterFromSet: controlSet];
while (r.location != NSNotFound)
{
unichar control = [str characterAtIndex: r.location];
NSString *escaped;
escaped = [[NSString alloc] initWithFormat: @"\\u%.4d",
(int)control];
[str replaceCharactersInRange: r
withString: escaped];
[escaped release];
r = [str rangeOfCharacterFromSet: controlSet];
}
[output appendFormat: @"\"%@\"", str];
[str release];
}
else
{
[output appendFormat: @"\"%@\"", obj];
}
}
else if ([obj isKindOfClass: NSNumberClass])
{
if ([obj objCType][0] == @encode(BOOL)[0])
{
if ([obj boolValue])
{
[output appendString: @"true"];
}
else
{
[output appendString: @"false"];
}
}
else
{
[output appendFormat: @"%f", [obj doubleValue]];
}
}
else if ([obj isKindOfClass: NSNullClass])
{
[output appendString: @"null"];
}
else
{
return NO;
}
return YES;
}
@implementation NSJSONSerialization
+ (void) initialize
{
NSNullClass = [NSNull class];
NSArrayClass = [NSArray class];
NSStringClass = [NSString class];
NSDictionaryClass = [NSDictionary class];
NSNumberClass = [NSNumber class];
escapeSet
= [[NSCharacterSet characterSetWithCharactersInString: @"\"\\"] retain];
}
+ (NSData*) dataWithJSONObject: (id)obj
options: (NSJSONWritingOptions)opt
error: (NSError **)error
{
/* Temporary string: allocate more space than we are likely to use so we just
* quickly claim a page and then give it back later
*/
NSMutableString *str = [[NSMutableString alloc] initWithCapacity: 4096];
NSData *data = nil;
NSUInteger tabs;
tabs = ((opt & NSJSONWritingPrettyPrinted) == NSJSONWritingPrettyPrinted) ?
0 : NSIntegerMin;
if (writeObject(obj, str, tabs))
{
data = [str dataUsingEncoding: NSUTF8StringEncoding];
if (NULL != error)
{
*error = nil;
}
}
else
{
if (NULL != error)
{
NSDictionary *userInfo = [[NSDictionary alloc] initWithObjectsAndKeys:
_(@"JSON writing error"), NSLocalizedDescriptionKey,
nil];
*error = [NSError errorWithDomain: NSCocoaErrorDomain
code: 0
userInfo: userInfo];
}
}
[str release];
return data;
}
+ (BOOL) isValidJSONObject: (id)obj
{
return writeObject(obj, nil, NSIntegerMin);
}
+ (id) JSONObjectWithData: (NSData *)data
options: (NSJSONReadingOptions)opt
error: (NSError **)error
{
uint8_t BOM[4];
ParserState p = { 0 };
id obj;
[data getBytes: BOM length: 4];
getEncoding(BOM, &p);
p.source = [[NSString alloc] initWithData: data encoding: p.enc];
p.updateBuffer = updateStringBuffer;
p.mutableContainers
= (opt & NSJSONReadingMutableContainers) == NSJSONReadingMutableContainers;
p.mutableStrings
= (opt & NSJSONReadingMutableLeaves) == NSJSONReadingMutableLeaves;
obj = parseValue(&p);
[p.source release];
if (NULL != error)
{
*error = p.error;
}
return [obj autorelease];
}
+ (id) JSONObjectWithStream: (NSInputStream *)stream
options: (NSJSONReadingOptions)opt
error: (NSError **)error
{
uint8_t BOM[4];
ParserState p = { 0 };
id obj;
// TODO: Handle failure here!
[stream read: (uint8_t*)BOM maxLength: 4];
getEncoding(BOM, &p);
p.mutableContainers
= (opt & NSJSONReadingMutableContainers) == NSJSONReadingMutableContainers;
p.mutableStrings
= (opt & NSJSONReadingMutableLeaves) == NSJSONReadingMutableLeaves;
if (p.BOMLength < 4)
{
p.source = [[NSString alloc] initWithBytesNoCopy: &BOM[p.BOMLength]
length: 4 - p.BOMLength
encoding: p.enc
freeWhenDone: NO];
updateStringBuffer(&p);
/* Negative source index because we are before the
* current point in the buffer
*/
p.sourceIndex = p.BOMLength - 4;
}
p.source = stream;
p.updateBuffer = updateStreamBuffer;
obj = parseValue(&p);
// Consume any data in the stream that we've failed to read
updateStreamBuffer(&p);
if (NULL != error)
{
*error = p.error;
}
return [obj autorelease];
}
+ (NSInteger) writeJSONObject: (id)obj
toStream: (NSOutputStream *)stream
options: (NSJSONWritingOptions)opt
error: (NSError **)error
{
NSData *data = [self dataWithJSONObject: obj options: opt error: error];
if (nil != data)
{
const char *bytes = [data bytes];
NSUInteger toWrite = [data length];
while (toWrite > 0)
{
NSInteger wrote = [stream write: (uint8_t*)bytes maxLength: toWrite];
bytes += wrote;
toWrite -= wrote;
if (0 == wrote)
{
if (NULL != error)
{
*error = [stream streamError];
}
return 0;
}
}
}
return [data length];
}
@end