Characterset memory usage and code organisation consistency improvements.

This commit is contained in:
Richard Frith-Macdonald 2020-04-12 12:28:07 +01:00
parent 61bd73832f
commit df1b6dd6d5
9 changed files with 188 additions and 173 deletions

View file

@ -1,3 +1,21 @@
2020-04-12 Richard Frith-Macdonald <rfm@gnu.org>
* CharSets/URLFragmentAllowedCharSet.h:
* CharSets/URLHostAllowedCharSet.h:
* CharSets/URLPasswordAllowedCharSet.h:
* CharSets/URLPathAllowedCharSet.h:
* CharSets/URLQueryAllowedCharSet.h:
* CharSets/URLUserAllowedCharSet.h:
Removed character set data headers from non-standard location.
* NSCharacterSet.m:
Fixed to load URL characterset data from standard file.
Changed bitmap representation to use much less space for charsets
residing wholly in the base plane (in particular the URL charsets
which are purely ASCII characters).
* NSCharacterSetData.h:
Regenerated to contain the URL charactersets and to contain the
latest Unicode data (3rd Oct 2019).
2020-04-10 Richard Frith-Macdonald <rfm@gnu.org>
* Headers/GNUstepBase/GNUstep.h:

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

View file

@ -49,12 +49,6 @@
#undef GNUSTEP_INDEX_CHARSET
#import "NSCharacterSetData.h"
#import "CharSets/URLFragmentAllowedCharSet.h"
#import "CharSets/URLHostAllowedCharSet.h"
#import "CharSets/URLPasswordAllowedCharSet.h"
#import "CharSets/URLPathAllowedCharSet.h"
#import "CharSets/URLQueryAllowedCharSet.h"
#import "CharSets/URLUserAllowedCharSet.h"
#define GSUNICODE_MAX 1114112
#define GSBITMAP_SIZE 8192
@ -122,6 +116,20 @@
i--;
}
i *= GSBITMAP_SIZE;
if (GSBITMAP_SIZE == i)
{
/* In the base plane, find the number of used bytes, so we don't
* produce a bitmap that is longer than necessary.
*/
if (_length < i)
{
i = _length;
}
while (i > 0 && 0 == _data[i - 1])
{
i--;
}
}
if (i < _length)
{
return [NSData dataWithBytes: _data length: i];
@ -206,7 +214,7 @@
{
unsigned length = [bitmap length];
if ((length % GSBITMAP_SIZE) != 0 || length > GSBITMAP_MAX)
if (length > GSBITMAP_MAX)
{
NSLog(@"attempt to initialize character set with invalid bitmap");
[self dealloc];
@ -280,13 +288,22 @@
}
/* Make space if needed.
* Use exact size if we have nothing beyond the base plane,
* otherwise round up to a plane boundary.
*/
b = (m - 1) / 8;
if (b >= _length)
{
while (b >= _length)
if (b < GSBITMAP_SIZE)
{
_length += GSBITMAP_SIZE;
_length = b + 1;
}
else
{
while (b >= _length)
{
_length += GSBITMAP_SIZE;
}
}
[_obj setLength: _length];
_data = [_obj mutableBytes];
@ -337,10 +354,14 @@
if (length > 0)
{
NSUInteger i;
unsigned max = _length;
unichar (*get)(id, SEL, NSUInteger);
get = (unichar (*)(id, SEL, NSUInteger))
[aString methodForSelector: @selector(characterAtIndex:)];
/* Determine size of bitmap needed.
*/
for (i = 0; i < length; i++)
{
unichar letter;
@ -358,15 +379,49 @@
+ (second - 0xdc00) + 0x0010000;
}
byte = letter/8;
if (byte >= _length)
if (byte >= max)
{
while (byte >= _length)
max = byte;
}
}
/* Make space if needed.
* Use exact size if we have nothing beyond the base plane,
* otherwise round up to a plane boundary.
*/
if (max >= _length)
{
if (max < GSBITMAP_SIZE)
{
_length = max + 1;
}
else
{
while (max >= _length)
{
_length += GSBITMAP_SIZE;
}
[_obj setLength: _length];
_data = [_obj mutableBytes];
}
[_obj setLength: _length];
_data = [_obj mutableBytes];
}
for (i = 0; i < length; i++)
{
unichar letter;
unichar second;
unsigned byte;
letter = (*get)(aString, @selector(characterAtIndex:), i);
// Convert a surrogate pair if necessary
if (letter >= 0xd800 && letter <= 0xdbff && i < length-1
&& (second = (*get)(aString, @selector(characterAtIndex:), i+1))
>= 0xdc00 && second <= 0xdfff)
{
i++;
letter = ((letter - 0xd800) << 10)
+ (second - 0xdc00) + 0x0010000;
}
byte = letter/8;
GSSETBIT(_data[byte], letter % 8);
}
}
@ -382,6 +437,20 @@
i--;
}
i *= GSBITMAP_SIZE;
if (GSBITMAP_SIZE == i)
{
/* In the base plane, find the number of used bytes, so we don't
* produce a bitmap that is longer than necessary.
*/
if (_length < i)
{
i = _length;
}
while (i > 0 && 0 == _data[i - 1])
{
i--;
}
}
return [NSData dataWithBytes: _data length: i];
}
@ -430,7 +499,7 @@
unsigned length = [bitmap length];
id tmp;
if ((length % GSBITMAP_SIZE) != 0 || length > GSBITMAP_MAX)
if (length > GSBITMAP_MAX)
{
NSLog(@"attempt to initialize character set with invalid bitmap");
[self dealloc];
@ -623,7 +692,8 @@ static Class concreteMutableClass = nil;
- (id) initWithBitmap: (NSData*)bitmap number: (int)number
{
if ((self = (_GSStaticCharSet*)[(NSBitmapCharSet*)self initWithBitmap: bitmap]) != nil)
if ((self = (_GSStaticCharSet*)[(NSBitmapCharSet*)self
initWithBitmap: bitmap]) != nil)
{
_index = number;
}
@ -841,56 +911,66 @@ static Class concreteMutableClass = nil;
+ (id) URLFragmentAllowedCharacterSet
{
return [self _staticSet: urlFragmentAllowedCharSet
length: sizeof(urlFragmentAllowedCharSet)
return [self _staticSet: URLFragmentAllowedCharSet
length: sizeof(URLFragmentAllowedCharSet)
number: 15];
}
+ (id) URLPasswordAllowedCharacterSet
{
return [self _staticSet: urlPasswordAllowedCharSet
length: sizeof(urlPasswordAllowedCharSet)
return [self _staticSet: URLPasswordAllowedCharSet
length: sizeof(URLPasswordAllowedCharSet)
number: 16];
}
+ (id) URLPathAllowedCharacterSet
{
return [self _staticSet: urlPathAllowedCharSet
length: sizeof(urlPathAllowedCharSet)
return [self _staticSet: URLPathAllowedCharSet
length: sizeof(URLPathAllowedCharSet)
number: 17];
}
+ (id) URLQueryAllowedCharacterSet
{
return [self _staticSet: urlQueryAllowedCharSet
length: sizeof(urlQueryAllowedCharSet)
return [self _staticSet: URLQueryAllowedCharSet
length: sizeof(URLQueryAllowedCharSet)
number: 18];
}
+ (id) URLUserAllowedCharacterSet
{
return [self _staticSet: urlUserAllowedCharSet
length: sizeof(urlUserAllowedCharSet)
return [self _staticSet: URLUserAllowedCharSet
length: sizeof(URLUserAllowedCharSet)
number: 19];
}
+ (id) URLHostAllowedCharacterSet
{
return [self _staticSet: urlHostAllowedCharSet
length: sizeof(urlHostAllowedCharSet)
return [self _staticSet: URLHostAllowedCharSet
length: sizeof(URLHostAllowedCharSet)
number: 20];
}
- (NSData*) bitmapRepresentation
{
BOOL (*imp)(id, SEL, unichar);
NSMutableData *m = [NSMutableData dataWithLength: 8192];
unsigned char *p = (unsigned char*)[m mutableBytes];
NSMutableData *m;
unsigned char *p;
unsigned end;
unsigned i;
imp = (BOOL (*)(id,SEL,unichar))
[self methodForSelector: @selector(characterIsMember:)];
for (i = 0; i <= 0xffff; i++)
for (end = 0xffff; end > 0; end--)
{
if (imp(self, @selector(characterIsMember:), end) == YES)
{
break;
}
}
m = [NSMutableData dataWithLength: end / 8 + 1];
p = (unsigned char*)[m mutableBytes];
for (i = 0; i <= end; i++)
{
if (imp(self, @selector(characterIsMember:), i) == YES)
{

File diff suppressed because one or more lines are too long