/** NSString+HTML.m -
GSWeb: NSString / HTML
Copyright (C) 1999-2005 Free Software Foundation, Inc.
Written by: Manuel Guesdon
Date: Jan 1999
$Revision$
$Date$
$Id$
This file is part of the GNUstep Web Library.
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Library General Public
License as published by the Free Software Foundation; either
version 2 of the License, or (at your option) any later version.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Library General Public License for more details.
You should have received a copy of the GNU Library General Public
License along with this library; if not, write to the Free
Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
**/
#include "config.h"
RCS_ID("$Id$")
#include "GSWeb.h"
#include
/*
*/
#define NORMAL_CHARS @"(\"&\", \
\">\", \
\"<\", \
\"\\\"\", \
\"\\U00A3\", \
\"|\", \
\"\\U00B0\", \
\"\\U00E9\", \
\"\\U00E7\", \
\"\\U00E0\", \
\"\\U00E2\", \
\"\\U00E3\", \
\"\\U00E8\", \
\"\\U00EA\", \
\"\\U00EC\", \
\"\\U00EE\", \
\"\\U00F1\", \
\"\\U00F4\", \
\"\\U00F5\", \
\"\\U00F9\", \
\"\\U00FB\")"
#define HTML_CHARS @"( \"&\", \
\">\", \
\"<\", \
\""\", \
\"£\", \
\"¦\", \
\"°\", \
\"é\", \
\"ç\", \
\"à\", \
\"â\", \
\"ã\", \
\"è\", \
\"ê\", \
\"ì\", \
\"î\", \
\"ñ\", \
\"ô\", \
\"õ\", \
\"ù\", \
\"û\")"
#define ESCAPING_HTML_ATTRIBUTE_VALUE_NORMAL_CHARS @"( \
\"&\", \
\"\\\"\", \
\"<\", \
\">\", \
\"\t\", \
\"\n\", \
\"\r\" )"
#define ESCAPING_HTML_ATTRIBUTE_VALUE_HTML_CHARS @"( \
\"&\", \
\""\", \
\"<\", \
\">\", \
\" \", \
\"
\",\
\"
\" )"
#define ESCAPING_HTML_STRING_NORMAL_CHARS @"( \
\"&\", \
\"\\\"\", \
\"<\", \
\">\" )"
#define ESCAPING_HTML_STRING_HTML_CHARS @"( \
\"&\", \
\""\", \
\"<\", \
\">\" )"
GSWHTMLConvertingStruct htmlConvertStruct;
GSWHTMLConvertingStruct htmlConvertAttributeValueStruct;
GSWHTMLConvertingStruct htmlConvertHTMLString;
static unichar unicodeBR[5];
static int unicodeBRLen=4;
#define htmlCharsMaxLength 9
#define htmlCharsAtIndex(convStructPtr,i) (((convStructPtr)->htmlChars)+((i)*(htmlCharsMaxLength+1)))
static Class mutableStringClass = Nil;
static Class stringClass=Nil;
static SEL stringWithCharactersSEL=NULL;
static SEL stringWithStringSEL=NULL;
static IMP stringClass_stringWithCharactersIMP=NULL;
static IMP stringClass_stringWithStringIMP=NULL;
static void initNormalHTMLChars(GSWHTMLConvertingStruct* htmlConvertStruct,
NSString* normalCharsPropertyListString,
NSString* htmlCharsPropertyListString)
{
NSArray* normalCharsStringArray=[normalCharsPropertyListString propertyList];
NSArray* htmlCharsStringArray=[htmlCharsPropertyListString propertyList];
int i=0;
htmlConvertStruct->charsCount=[normalCharsStringArray count];
NSCAssert([htmlCharsStringArray count]==htmlConvertStruct->charsCount,
@"html and normal characters array have not the same count of elements");
htmlConvertStruct->normalChars=NSZoneMalloc(NSDefaultMallocZone(),sizeof(unichar)*(htmlConvertStruct->charsCount));
htmlConvertStruct->htmlChars=NSZoneMalloc(NSDefaultMallocZone(),sizeof(unichar)*(htmlCharsMaxLength+1)*(htmlConvertStruct->charsCount));
htmlConvertStruct->htmlCharsLen=NSZoneMalloc(NSDefaultMallocZone(),sizeof(int)*(htmlConvertStruct->charsCount));
for(i=0;i<(htmlConvertStruct->charsCount);i++)
{
NSString* htmlString=[htmlCharsStringArray objectAtIndex:i];
htmlConvertStruct->htmlCharsLen[i]=[htmlString length];
NSCAssert1(htmlConvertStruct->htmlCharsLen[i]<=htmlCharsMaxLength,
@"html character at inde i is too long",i);
htmlConvertStruct->normalChars[i]=[[normalCharsStringArray objectAtIndex:i]characterAtIndex:0];
[htmlString getCharacters:htmlCharsAtIndex(htmlConvertStruct,i)];
};
}
static void testStringByConvertingHTML();
void NSStringHTML_Initialize()
{
static BOOL initialized=NO;
if (!initialized)
{
initialized=YES;
initNormalHTMLChars(&htmlConvertStruct,
NORMAL_CHARS,
HTML_CHARS);
initNormalHTMLChars(&htmlConvertAttributeValueStruct,
ESCAPING_HTML_ATTRIBUTE_VALUE_NORMAL_CHARS,
ESCAPING_HTML_ATTRIBUTE_VALUE_HTML_CHARS);
initNormalHTMLChars(&htmlConvertHTMLString,
ESCAPING_HTML_STRING_NORMAL_CHARS,
ESCAPING_HTML_STRING_HTML_CHARS);
[@"
" getCharacters:unicodeBR];
ASSIGN(mutableStringClass,[NSMutableString class]);
ASSIGN(stringClass,[NSString class]);
stringWithCharactersSEL=@selector(stringWithCharacters:length:);
stringWithStringSEL=@selector(stringWithString:);
stringClass_stringWithCharactersIMP=[stringClass methodForSelector:stringWithCharactersSEL];
stringClass_stringWithStringIMP=[stringClass methodForSelector:stringWithStringSEL];
//testStringByConvertingHTML();
};
};
//====================================================================
#define GSWMemMove(dst,src,size); \
{ \
int __size=(size); \
unsigned char* __src=((char*)(src)); \
unsigned char* __dst=((char*)(dst)); \
unsigned char* __pDst=__dst+__size-1; \
unsigned char* __pSrc=__src+__size-1; \
if (__dst>__src) \
while(__pDst>=__dst) { *__pDst--=*__pSrc--; } \
else \
while(__pDst>=__dst) { *__dst++=*__src++; }; \
};
#define HTML_TEST_STRINGS @"(\"\", \
\"ABCDEF\", \
\"&12\\U00E9\", \
\"&\n1\", \
\"&\r\n2\\U00E8\", \
\"\")"
void testStringByConvertingHTML()
{
NSArray* testStrings=[HTML_TEST_STRINGS propertyList];
int i=0;
for(i=0;i<[testStrings count];i++)
{
NSString* string=[testStrings objectAtIndex:i];
NSString* result=[string stringByConvertingToHTML];
NSString* reverse=[result stringByConvertingFromHTML];
NSDebugFLog(@"RESULT: %d: '%@' => '%@'",i,string,result);
NSDebugFLog(@"Reverse RESULT: %d: '%@' => '%@'",i,result,reverse);
};
};
void allocOrReallocUnicharString(unichar** ptrPtr,int* capacityPtr,int length,int newCapacity)
{
//Really need ?
if (newCapacity>*capacityPtr)
{
int allocSize=newCapacity*sizeof(unichar);
unichar* newPtr=GSAutoreleasedBuffer(allocSize);
NSCAssert1(newPtr,@"Can't alloc %d allocSize bytes",
allocSize);
if (length>0)
{
// Copy previous parts
GSWMemMove(newPtr,*ptrPtr,length*sizeof(unichar));
};
*capacityPtr=newCapacity;
*ptrPtr=newPtr;
};
};
//--------------------------------------------------------------------
NSString* baseStringByConvertingToHTML(NSString* string,GSWHTMLConvertingStruct* convStructPtr,BOOL includeCRLF)
{
NSString* str=nil;
int length=[string length];
NSCAssert(convStructPtr->charsCount>0,@"normalChars not initialized");
if (length>0)
{
BOOL changed=NO;
int srcLen=0;
int dstLen=0;
unichar* dstChars=NULL;
int capacity=0;
unichar* pString=NULL;
int i=0;
int j=0;
int allocMargin=max(128,length/2);
allocOrReallocUnicharString(&pString,&capacity,0,length+1+allocMargin);
[string getCharacters:pString];
//NSDebugFLog(@"string=%@",string);
while(icharsCount;j++)
{
if (c==convStructPtr->normalChars[j])
{
srcLen=1;
dstLen=convStructPtr->htmlCharsLen[j];
dstChars=htmlCharsAtIndex(convStructPtr,j);
break;
};
};
};
if (srcLen>0)
{
/*NSDebugFLog(@"i=%d j=%d: srcLen=%d dstLen=%d by '%@'",i,j,srcLen,dstLen,[NSString stringWithCharacters:dstChars
length:dstLen]);*/
changed=YES;
/*NSDebugFLog(@"-1==> %@",[NSString stringWithCharacters:pString
length:length]);*/
if (length+1+dstLen-srcLen>capacity)
allocOrReallocUnicharString(&pString,&capacity,length,capacity+allocMargin);
/*NSDebugFLog(@"0==> %@",[NSString stringWithCharacters:pString
length:length]);
NSDebugFLog(@"Copy %d characters from pos %d to pos %d",(length-i-srcLen),i+srcLen,i+dstLen);*/
GSWMemMove(pString+i+dstLen,pString+i+srcLen,sizeof(unichar)*(length-i-srcLen));
/*NSDebugFLog(@"1==> %@",[NSString stringWithCharacters:pString
length:length+dstLen-srcLen]);
NSDebugFLog(@"Copy %d characters to pos %d",dstLen,i);*/
GSWMemMove(pString+i,dstChars,sizeof(unichar)*dstLen);
i+=dstLen;
length+=dstLen-srcLen;
/*NSDebugFLog(@"2==> i=%d length=%d %@",i,length,[NSString stringWithCharacters:pString
length:length]);*/
}
else
i++;
};
if (changed)
str=(*stringClass_stringWithCharactersIMP)(stringClass,stringWithCharactersSEL,pString,length);
else if ([string isKindOfClass:mutableStringClass])
str=(*stringClass_stringWithStringIMP)(stringClass,stringWithStringSEL,string);
else
str=string;
}
else if ([string isKindOfClass:mutableStringClass])
str=@"";
else
str=AUTORELEASE(RETAIN(string));
return str;
};
inline BOOL areUnicharEquals(unichar* p1,unichar* p2,int len)
{
switch(len)
{
case 0:
NSCAssert(NO,@"Too short comparaison");
return NO;
case 1:
return *p1==*p2;
case 2:
return (*p1==*p2
&& *(p1+1)==*(p2+1));
case 3:
return (*p1==*p2
&& *(p1+1)==*(p2+1)
&& *(p1+2)==*(p2+2));
case 4:
return (*p1==*p2
&& *(p1+1)==*(p2+1)
&& *(p1+2)==*(p2+2)
&& *(p1+3)==*(p2+3));
case 5:
return (*p1==*p2
&& *(p1+1)==*(p2+1)
&& *(p1+2)==*(p2+2)
&& *(p1+3)==*(p2+3)
&& *(p1+4)==*(p2+4));
case 6:
return (*p1==*p2
&& *(p1+1)==*(p2+1)
&& *(p1+2)==*(p2+2)
&& *(p1+3)==*(p2+3)
&& *(p1+4)==*(p2+4)
&& *(p1+5)==*(p2+5));
case 7:
return (*p1==*p2
&& *(p1+1)==*(p2+1)
&& *(p1+2)==*(p2+2)
&& *(p1+3)==*(p2+3)
&& *(p1+4)==*(p2+4)
&& *(p1+5)==*(p2+5)
&& *(p1+6)==*(p2+6));
case 8:
return (*p1==*p2
&& *(p1+1)==*(p2+1)
&& *(p1+2)==*(p2+2)
&& *(p1+3)==*(p2+3)
&& *(p1+4)==*(p2+4)
&& *(p1+5)==*(p2+5)
&& *(p1+6)==*(p2+6)
&& *(p1+7)==*(p2+7));
case 9:
return (*p1==*p2
&& *(p1+1)==*(p2+1)
&& *(p1+2)==*(p2+2)
&& *(p1+3)==*(p2+3)
&& *(p1+4)==*(p2+4)
&& *(p1+5)==*(p2+5)
&& *(p1+6)==*(p2+6)
&& *(p1+7)==*(p2+7)
&& *(p1+8)==*(p2+8));
default:
NSCAssert(NO,@"Compraison too long");
return NO;
};
};
//--------------------------------------------------------------------
NSString* baseStringByConvertingFromHTML(NSString* string,GSWHTMLConvertingStruct* convStructPtr,BOOL includeBR)
{
NSString* str=nil;
int length=[string length];
NSCAssert(convStructPtr->charsCount>0,@"normalChars not initialized");
if (length>0)
{
BOOL changed=NO;
int srcLen=0;
int dstLen=0;
unichar dstUnichar;
unichar* pString=GSAutoreleasedBuffer((length+1)*sizeof(unichar));
int i=0;
int j=0;
[string getCharacters:pString];
//NSDebugFLog(@"string=%@",string);
while(i<(length-2)) // at least 2 characters for html coded
{
srcLen=0;
/*NSDebugFLog(@"i=%d: c=%@",i,[NSString stringWithCharacters:pString+i
length:length-i]);*/
if (includeBR
&& length-i>=unicodeBRLen
&& areUnicharEquals(pString+i,unicodeBR,unicodeBRLen))
{
srcLen=unicodeBRLen;
dstLen=1;
dstUnichar='\n';
}
else
{
for(j=0;jcharsCount;j++)
{
if (length-i>=convStructPtr->htmlCharsLen[j]
&& areUnicharEquals(pString+i,htmlCharsAtIndex(convStructPtr,j),convStructPtr->htmlCharsLen[j]))
{
srcLen=convStructPtr->htmlCharsLen[j];
dstLen=1;
dstUnichar=convStructPtr->normalChars[j];
break;
}
};
};
if (srcLen>0)
{
/*NSDebugFLog(@"i=%d j=%d: srcLen=%d dstLen=%d by '%@'",i,j,srcLen,dstLen,[NSString stringWithCharacters:&dstUnichar
length:dstLen]);*/
changed=YES;
/*
NSDebugFLog(@"-1==> %@",[NSString stringWithCharacters:pString
length:length]);
NSDebugFLog(@"0==> %@",[NSString stringWithCharacters:pString
length:length]);
NSDebugFLog(@"Copy %d characters from pos %d to pos %d",(length-i-srcLen),i+srcLen,i+dstLen);
*/
GSWMemMove(pString+i+dstLen,pString+i+srcLen,sizeof(unichar)*(length-i-srcLen));
/*
NSDebugFLog(@"1==> %@",[NSString stringWithCharacters:pString
length:length+dstLen-srcLen]);
NSDebugFLog(@"Copy %d characters to pos %d",dstLen,i);
*/
GSWMemMove(pString+i,&dstUnichar,sizeof(unichar)*dstLen);
i+=dstLen;
length+=dstLen-srcLen;
/*
NSDebugFLog(@"2==> i=%d %@",i,[NSString stringWithCharacters:pString
length:length]);
*/
};
if (srcLen==0)
i++;
};
if (changed)
str=(*stringClass_stringWithCharactersIMP)(stringClass,stringWithCharactersSEL,pString,length);
else if ([string isKindOfClass:mutableStringClass])
str=(*stringClass_stringWithStringIMP)(stringClass,stringWithStringSEL,string);
else
str=string;
}
else if ([string isKindOfClass:mutableStringClass])
str=@"";
else
str=AUTORELEASE(RETAIN(string));
return str;
};
//====================================================================
@implementation NSString (HTMLString)
//--------------------------------------------------------------------
-(NSString*)htmlPlus2Space
{
return [self stringByReplacingString:@"+"
withString:@" "];
};
//--------------------------------------------------------------------
// void decodeURL(String &str)
// Convert the given URL string to a normal string. This means that
// all escaped characters are converted to their normal values. The
// escape character is '%' and is followed by 2 hex digits
// representing the octet.
//
-(NSString*) decodeURLEncoding:(NSStringEncoding) encoding
{
unsigned orglen = [self length];
NSMutableData *new = [NSMutableData dataWithLength: orglen];
const unsigned char *read;
unsigned char *write;
unsigned i,n,l;
read = [self UTF8String];
write = [new mutableBytes];
for (l=0,i=0,n=orglen;i> 4) & 0x0f],digits[*p & 0x0f]];
};
return [NSString stringWithString:temp];
}
//--------------------------------------------------------------------
-(NSDictionary*) dictionaryQueryStringWithEncoding: (NSStringEncoding) encoding
{
return [self dictionaryWithSep1:@"&"
withSep2:@"="
withOptionUnescape:YES
forceArray:YES
encoding: encoding];
};
//--------------------------------------------------------------------
-(NSDictionary*)dictionaryWithSep1:(NSString*)sep1
withSep2:(NSString*)sep2
withOptionUnescape:(BOOL)unescape
{
return [self dictionaryWithSep1:sep1
withSep2:sep2
withOptionUnescape:unescape
forceArray:NO
encoding:[GSWMessage defaultEncoding]];
};
//--------------------------------------------------------------------
-(NSDictionary*)dictionaryWithSep1:(NSString*)sep1
withSep2:(NSString*)sep2
withOptionUnescape:(BOOL)unescape
forceArray:(BOOL)forceArray// Put value in array even if there's only one value
encoding:(NSStringEncoding) encoding
{
NSMutableDictionary* pDico=nil;
if ([self length]>0)
{
NSArray* listItems = [self componentsSeparatedByString:sep1];
int iCount=0;
int itemsCount=[listItems count];
pDico=(NSMutableDictionary*)[NSMutableDictionary dictionary];
for(iCount=0;iCount0)
{
NSArray* listParam = [[listItems objectAtIndex:iCount] componentsSeparatedByString:sep2];
id key=nil;
id value=nil;
if ([listParam count]==1)
{
key=[listParam objectAtIndex:0];
if (unescape)
key=[key decodeURLEncoding: encoding];
}
else if ([listParam count]==2)
{
key=[listParam objectAtIndex:0];
value=[listParam objectAtIndex:1];
if (unescape)
{
key=[key decodeURLEncoding: encoding];
value= [value decodeURLEncoding: encoding];
};
};
if (key)
{
id newValue=nil;
id prevValue=[pDico objectForKey:key];
if (!value)
value=[NSString string];
if (prevValue)
{
if (!forceArray || [prevValue isKindOfClass:[NSArray class]])
newValue=[prevValue arrayByAddingObject:value];
else
newValue=[NSArray arrayWithObjects:prevValue,value,nil];
}
else
{
if (forceArray)
newValue=[NSArray arrayWithObject:value];
else
newValue=value;
};
[pDico setObject:newValue
forKey: key];
};
};
};
pDico=[NSDictionary dictionaryWithDictionary:pDico];
};
return pDico;
};
//--------------------------------------------------------------------
-(BOOL)ismapCoordx:(int*)x
y:(int*)y
{
BOOL ok=NO;
NSScanner* scanner=[NSScanner scannerWithString:self];
if ([scanner scanInt:x])
{
if (x)
{
NSDebugMLLog(@"low",@"x=%d",*x);
};
if ([scanner scanString:@","
intoString:NULL])
{
if ([scanner scanInt:y])
{
if (y)
{
NSDebugMLLog(@"low",@"y=%d",*y);
};
NSDebugMLLog(@"low",@"[scanner isAtEnd]=%d",(int)[scanner isAtEnd]);
if ([scanner isAtEnd])
{
ok=YES;
};
};
};
};
if (!ok)
{
if (x)
*x=INT_MAX;
if (y)
*y=INT_MAX;
};
return ok;
};
//--------------------------------------------------------------------
-(NSString*)stringByEscapingHTMLString
{
return stringByEscapingHTMLString(self);
};
//--------------------------------------------------------------------
-(NSString*)stringByEscapingHTMLAttributeValue
{
return stringByEscapingHTMLAttributeValue(self);
};
//--------------------------------------------------------------------
-(NSString*)stringByConvertingToHTMLEntities
{
return stringByConvertingToHTMLEntities(self);
};
//--------------------------------------------------------------------
-(NSString*)stringByConvertingFromHTMLEntities
{
return stringByConvertingFromHTMLEntities(self);
};
//--------------------------------------------------------------------
-(NSString*)stringByConvertingToHTML
{
return stringByConvertingToHTML(self);
};
//--------------------------------------------------------------------
-(NSString*)stringByConvertingFromHTML
{
return stringByConvertingFromHTML(self);
};
@end