diff --git a/Tools/HTMLLinker.m b/Tools/HTMLLinker.m new file mode 100644 index 000000000..81ff41d2e --- /dev/null +++ b/Tools/HTMLLinker.m @@ -0,0 +1,1048 @@ +/* The GNUstep HTML Linker + Copyright (C) 2002 Free Software Foundation, Inc. + + Written by: Nicola Pero + Date: January 2002 + + This file is part of the GNUstep Project + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License + as published by the Free Software Foundation; either version 2 + of the License, or (at your option) any later version. + + You should have received a copy of the GNU General Public + License along with this program; see the file COPYING.LIB. + If not, write to the Free Software Foundation, + 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + */ + +/* + * This tool implements a HTML linker. + * + * A HTML linker is able to fixup ahref links from one HTML document + * to other HTML ones. + * + * It's a pretty generic tool. Think it in this way - say that you + * have a collection of HTML files, all in the same directory, with + * working links from one file to the other one. + * + * Now you move the files around, scattering them in many directories + * - of course the links no longer work! + * + * But if you run the HTML linker on the files, the HTML linker will + * modify all links inside the files, resolving each of them to point + * to the actual full path of the required file. The links will work + * again. + * + * In the real world, it's more complicated than this because you + * normally put the HTML files across different directories from the + * very beginning. The HTML linker becomes helpful because you can + * create links between these files as if they were in the same + * directory ... and then - at the end - run the HTML linker to + * actually fixup the links and make them work. If you move around + * the files or mess in any way with their paths, you can always fixup + * the links afterwards by rerunning the linker - you don't need to + * regenerate the HTML files. + * + * This is exactly what (auto)gsdoc does when generating the HTML - it + * creates links from one class to another one as if they were in the + * same directory, ignoring the issue of the real full paths on disk + * (and whether the documentation for the other classes actually + * exists :-). + * + * When the documentation is installed, the HTML linker is run, and it + * will actually fix up the links to point to the real full paths on + * disk (and warn about any unresolved reference). Note that when you + * install the documentation, files end up in different dirs of + * GNUSTEP_LOCAL_ROOT or GNUSTEP_SYSTEM_ROOT or GNUSTEP_USER_ROOT + * ... without the linker it would be a pain to keep cross-references + * right. It would probably be impossible. + * + * The HTML linker will only fixup links which have the attribute + * 'rel' set to 'dynamic', as in the following example - + * + * + * + * All other links will be ignored and not fixed up. This is so that you + * can clearly mark the links you want to be dynamically fixed up by the + * linker; other links will not be touched. + * + * The linker might perform 'link checking' if run with the + * '-CheckLinks YES' option. link checking means that when a link is + * fixed up, the linker checks that the destination file actually + * contains the appropriate tag. For example, when + * fixing up , + * the linker will check that the NSObject_Protocol.html file will + * actually contain a tag somewhere, and issue a + * warning otherwise. + * + * If you run the linker without 'link checking' it will not even need + * to read the destination file, which (of course) gives better + * performance. + * + * Last, please notice that when using the HTML linker in practice, + * the tool works with two kind of files - + * + * 'input files' - files whose links need to be fixed up. These files + * are *modified* by the linker. The old version of the file is + * (atomically) replaced with the fixed up one. + * + * 'destination files' - files which can be the destination of links + * in the input files. These files are untouched during processing; + * but they might be read when the linker is run with 'link checking' + * enabled, to check that the links in the input files are actually + * correct. */ + +#include + +/* + * An object representing a file which can be a destination of links. + */ +@interface DestinationFile : NSObject +{ + /* Full name to be used when fixing up links to this file. */ + NSString *fullName; + + /* Path on disk needed to read the file from disk - needed only when + performing link checking. pathOnDisk might be different from + fullName, for example for a file on a web server. In that case, + fullName is the URI to the file on the web server, while + pathOnDisk is the path to the file on disk. */ + NSString *pathOnDisk; + + /* If the file has already been read to perform link checking, names + is the array of all names (for any in the file, + xxx is put in the names array for that file) in the file. If it + hasn't yet been read, it's nil. We read the file and parse it + lazily, only if needed. */ + NSArray *names; +} + +/* Return the full name. */ +- (NSString *)fullName; + +/* Checks that the file on disk contains where xxx is + name, lazily loading and parsing the file if needed. Return YES if + the file contains name, NO if it doesn't. */ +- (BOOL)checkAnchorName: (NSString *)name; + +@end + +/* The HTMLLinker class is very simple and is the core of the linker. + It just keeps a table of the available destination files, and is + able to fixup a link to point to one of those files. */ +@interface HTMLLinker : NSObject +{ + BOOL verbose; + BOOL checkLinks; + NSMutableDictionary *files; +} + +- (id)initWithVerboseFlag: (BOOL)v + checkLinksFlag: (BOOL)f; + +/* Register the file as available for resolving references. */ +- (void)registerFile: (DestinationFile *)file; + +/* Resolve the link 'link' by fixing it up using the registered + destination files. Return the resolved link. 'logFile' is only + used to print error messages. It is the file in which the link is + originally found; if there is problem resolving the link, the + warning message printed out states that the problem is in file + 'logFile'. */ +- (NSString *)resolveLink: (NSString *)link + logFile: (NSString *)logFile; + +@end + +/* All the parsing code is in the following class. It's not a real + parser in the sense that it is just performing its minimal duty in + the quickest possible way, so calling this a parser is a bit of a + exaggeration ... this code can run very quickly through an HTML + string, extracting the tags or fixing up the tags. No more HTML parsing than this + is done. Remarkably, this does not need XML support in the base + library, so you can use the HTML linker on any system. This class + was written in order to perform its trivial, mechanical duty /very + fast/. You want to be able to run the linker often and on a lot of + files and still be happy. */ +@interface HTMLParser : NSObject +{ + /* The HTML code that we work on. */ + unichar *chars; + unsigned length; +} +/* Init with some HTML code to parse. */ +- (id)initWithCode: (NSString *)HTML; + +/* Extract all the tags from the HTML code, and return + a list of them. */ +- (NSArray *)names; + +/* Fix up all the links in the HTML code by feeding each of them to + the provided HTMLLinker; return the fixed up HTML code. logFile is + the file we are fixing up; it's only used when a warning is issued + because there is problem in the linking - the warning message is + displayed as being about links in the file logFile. */ +- (NSString *)resolveLinksUsingHTMLLinker: (HTMLLinker *)linker + logFile: (NSString *)logFile; +@end + + +@implementation HTMLParser + +- (id)initWithCode: (NSString *)HTML +{ + length = [HTML length]; + chars = malloc (sizeof(unichar) * length); + [HTML getCharacters: chars]; + + return [super init]; +} + +- (void)dealloc +{ + free (chars); + [super dealloc]; +} + +- (NSArray *)names +{ + NSMutableArray *names = AUTORELEASE ([NSMutableArray new]); + unsigned i = 0; + + while (i + 3 < length) + { + /* We ignore anything except stuff which begins with "') { break; } + + + /* I suppose i == s might happen if the file contains */ + if (i != s) + { + /* If name != nil we already found it so don't bother. */ + if (name == nil) + { + NSString *attribute; + + attribute = [NSString stringWithCharacters: &chars[s] + length: (i - s)]; + /* Lowercase name so that eg, HREF and href are the + same. */ + attribute = [attribute lowercaseString]; + + if ([attribute isEqualToString: @"name"]) + { + isNameAttribute = YES; + } + } + } + + /* Skip spaces. */ + while (i < length && (chars[i] == ' ' + || chars[i] == '\n' + || chars[i] == '\r' + || chars[i] == '\t')) + { i++; } + + if (i == length) { break; } + + /* Read the '=' */ + if (chars[i] == '=') + { + i++; + } + else + { + /* No '=' -- go on with the next attribute. */ + continue; + } + + if (i == length) { break; } + + /* Skip spaces. */ + while (i < length && (chars[i] == ' ' + || chars[i] == '\n' + || chars[i] == '\r' + || chars[i] == '\t')) + { i++; } + + if (i == length) { break; } + + /* Read the value. */ + if (chars[i] == '"') + { + /* Skip the '"', then read up to a '"'. */ + i++; + if (i == length) { break; } + + s = i; + + while (i < length && (chars[i] != '"')) + { i++; } + } + else if (chars[i] == '\'') + { + /* Skip the '\'', then read up to a '\''. */ + i++; + if (i == length) { break; } + + s = i; + + while (i < length && (chars[i] != '\'')) + { i++; } + } + else + { + /* Read up to a space or '>'. */ + s = i; + + while (i < length + && (chars[i] != ' ' + && chars[i] != '\n' + && chars[i] != '\r' + && chars[i] != '\t' + && chars[i] != '>')) + { i++; } + } + + if (name == nil && isNameAttribute) + { + if (i == s) + { + /* I suppose this might happen if the file + contains */ + name = @""; + } + else + { + name = [NSString stringWithCharacters: &chars[s] + length: (i - s)]; + /* Per HTML specs we lowercase name. */ + name = [name lowercaseString]; + } + } + } + + if (name != nil) + { + [names addObject: name]; + } + } + i++; + } + + return names; +} + + +- (NSString *)resolveLinksUsingHTMLLinker: (HTMLLinker *)linker + logFile: (NSString *)logFile +{ + /* We represent the output as a linked list. Each element in the + linked list represents a string; concatenating all the strings in + the linked list, you obtain the output. The trick is that these + strings in the linked list might actually be pointers inside the + chars array ... we are never copying stuff from the chars array - + just keeping pointers to substrings inside it - till we generate + the final string at the end ... for speed and efficiency reasons + of course. */ + struct stringFragment + { + unichar *chars; + unsigned length; + BOOL needsFreeing; + struct stringFragment *next; + } *head, *tail; + + /* The index of the beginning of the last string fragment (the tail). */ + unsigned tailIndex = 0; + + /* The temporary index. */ + unsigned i = 0; + + /* The total number of chars in the output string. We don't know + this beforehand because each time we fix up a link, we might add + or remove characters from the output. We update + totalNumberOfChars each time we close a stringFragment. */ + unsigned totalNumberOfChars = 0; + + + /* Initialize the linked list. */ + head = malloc (sizeof (struct stringFragment)); + head->chars = chars; + head->length = 0; + head->needsFreeing = NO; + head->next = NULL; + + /* The last string fragment is the first one at the beginning. */ + tail = head; + + while (i + 3 < length) + { + /* We ignore anything except stuff which begins with "') { break; } + + + /* I suppose i == s might happen if the file contains */ + if (i != s) + { + /* If href != nil && rel != nil we already found it + so don't bother. */ + if (href == nil || rel == nil) + { + NSString *attribute; + + attribute = [NSString stringWithCharacters: &chars[s] + length: (i - s)]; + /* Lowercase name so that eg, HREF and href are the + same. */ + attribute = [attribute lowercaseString]; + + if (href == nil + && [attribute isEqualToString: @"href"]) + { + isHrefAttribute = YES; + } + else if (rel == nil + && [attribute isEqualToString: @"rel"]) + { + isRelAttribute = YES; + } + } + } + + /* Skip spaces. */ + while (i < length && (chars[i] == ' ' + || chars[i] == '\n' + || chars[i] == '\r' + || chars[i] == '\t')) + { i++; } + + if (i == length) { break; } + + /* Read the '=' */ + if (chars[i] == '=') + { + i++; + } + else + { + /* No '=' -- go on with the next attribute. */ + continue; + } + + if (i == length) { break; } + + /* Skip spaces. */ + while (i < length && (chars[i] == ' ' + || chars[i] == '\n' + || chars[i] == '\r' + || chars[i] == '\t')) + { i++; } + + if (i == length) { break; } + + /* Read the value. */ + if (isHrefAttribute) + { + /* Remeber that href starts here. */ + hrefStart = i; + } + + if (chars[i] == '"') + { + /* Skip the '"', then read up to a '"'. */ + i++; + if (i == length) { break; } + + s = i; + + while (i < length && (chars[i] != '"')) + { i++; } + + if (isHrefAttribute) + { + /* Remeber that href ends here. We don't want + the ending " because we already insert those + by our own. */ + hrefEnd = i + 1; + } + } + else if (chars[i] == '\'') + { + /* Skip the '\'', then read up to a '\''. */ + i++; + if (i == length) { break; } + + s = i; + + while (i < length && (chars[i] != '\'')) + { i++; } + + if (isHrefAttribute) + { + hrefEnd = i + 1; + } + } + else + { + /* Read up to a space or '>'. */ + s = i; + + while (i < length + && (chars[i] != ' ' + && chars[i] != '\n' + && chars[i] != '\r' + && chars[i] != '\t' + && chars[i] != '>')) + { i++; } + if (isHrefAttribute) + { + /* We do want the ending space. */ + hrefEnd = i; + } + } + + if (i == length) + { + break; + } + + if (hrefEnd >= length) + { + hrefEnd = length - 1; + } + + if (isRelAttribute) + { + if (i == s) + { + /* I suppose this might happen if the file + contains */ + rel = @""; + } + else + { + rel = [NSString stringWithCharacters: &chars[s] + length: (i - s)]; + } + } + + if (isHrefAttribute) + { + if (i == s) + { + /* I suppose this might happen if the file + contains */ + href = @""; + } + else + { + href = [NSString stringWithCharacters: &chars[s] + length: (i - s)]; + } + } + } + if (href != nil && [rel isEqualToString: @"dynamical"]) + { + /* Ok - fixup the link. */ + NSString *link; + struct stringFragment *s; + + link = [linker resolveLink: href logFile: logFile]; + + /* Add " before and after the link. */ + link = [NSString stringWithFormat: @"\"%@\"", link]; + + /* Close the previous string fragment at hrefStart. */ + tail->length = hrefStart - tailIndex; + + totalNumberOfChars += tail->length; + + /* Insert immediately afterwards a string fragment containing + the fixed up link. */ + s = malloc (sizeof (struct stringFragment)); + s->length = [link length]; + + s->chars = malloc (sizeof(unichar) * s->length); + [link getCharacters: s->chars]; + + s->needsFreeing = YES; + s->next = NULL; + + tail->next = s; + tail = s; + + totalNumberOfChars += tail->length; + + /* Now prepare the new tail to start just after the end + of the original href in the original HTML code. */ + s = malloc (sizeof (struct stringFragment)); + s->length = 0; + s->chars = &chars[hrefEnd]; + s->needsFreeing = NO; + s->next = NULL; + tail->next = s; + tail = s; + + tailIndex = hrefEnd; + } + } + i++; + } + + /* Close the last open string fragment. */ + tail->length = length - tailIndex; + totalNumberOfChars += tail->length; + + /* Generate the output. */ + { + /* Allocate space for the whole output in a single chunk now that + we know how big it should be. */ + unichar *outputChars = malloc (sizeof(unichar) * totalNumberOfChars); + unsigned j = 0; + + /* Copy into the output all the string fragments, destroying each + of them as we go on. */ + while (head != NULL) + { + struct stringFragment *s; + + memcpy (&outputChars[j], head->chars, + sizeof(unichar) * head->length); + + j += head->length; + + if (head->needsFreeing) + { + free (head->chars); + } + + s = head->next; + free (head); + head = s; + } + + return [NSString stringWithCharacters: outputChars + length: totalNumberOfChars]; + } +} + +@end + + +@implementation DestinationFile + +- (id)initWithFullName: (NSString *)f + pathOnDisk: (NSString *)p +{ + ASSIGN (fullName, f); + ASSIGN (pathOnDisk, p); + + return [super init]; +} + + +- (void)dealloc +{ + RELEASE (fullName); + RELEASE (pathOnDisk); + RELEASE (names); + [super dealloc]; +} + +- (NSString *)fullName +{ + return fullName; +} + +- (BOOL)checkAnchorName: (NSString *)name +{ + /* No anchor. */ + if (name == nil || [name isEqualToString: @""]) + { + return YES; + } + + if (names == nil) + { + /* Load the file and parse it, saving the result in names. */ + NSString *file = [NSString stringWithContentsOfFile: pathOnDisk]; + HTMLParser *parser = [[HTMLParser alloc] initWithCode: file]; + + ASSIGN (names, [parser names]); + RELEASE (parser); + } + + return [names containsObject: name]; +} + +@end + + +@implementation HTMLLinker + +- (id)initWithVerboseFlag: (BOOL)v + checkLinksFlag: (BOOL)f +{ + verbose = v; + checkLinks = f; + files = [NSMutableDictionary new]; + return [super init]; +} + +- (void)dealloc +{ + RELEASE (files); + [super dealloc]; +} + +- (void)registerFile: (DestinationFile *)file +{ + [files setObject: file forKey: [[file fullName] lastPathComponent]]; +} + +- (NSString *)resolveLink: (NSString *)link + logFile: (NSString *)logFile +{ + NSString *fileLink; + NSString *nameLink; + NSString *relocatedFileLink; + DestinationFile *file; + + { + /* Break the link string into fileLink (everything which is before + the `#'), and nameLink (everything which is after the `#', `#' + not included). For example, if link is + 'NSObject_Class.html#isa', then fileLink is + 'NSObject_Class.html' and nameLink is 'isa'. */ + + /* Look for the #. */ + NSRange hashRange = [link rangeOfString: @"#"]; + + if (hashRange.location == NSNotFound) + { + fileLink = link; + nameLink = nil; + } + else + { + fileLink = [link substringToIndex: hashRange.location]; + + if (hashRange.location + 1 < [link length]) + { + nameLink = [link substringFromIndex: (hashRange.location + 1)]; + } + else + { + nameLink = nil; + } + } + } + + /* Now lookup fileLink. First, extract the path-less filename, + because it might have already been fixed up by a previous run of + the linker. */ + fileLink = [fileLink lastPathComponent]; + + /* Now simply look it up in our list of files. */ + file = [files objectForKey: fileLink]; + + /* Not found - leave it unfixed. */ + if (file == nil) + { + if (verbose || checkLinks) + { + NSString *m; + + m = [NSString stringWithFormat: + @"%@: Unresolved reference to file '%@'\n", + logFile, fileLink]; + fprintf (stderr, [m lossyCString]); + } + + relocatedFileLink = fileLink; + } + else + { + relocatedFileLink = [file fullName]; + + if (checkLinks) + { + if (![file checkAnchorName: nameLink]) + { + NSString *m; + + m = [NSString stringWithFormat: + @"%@: Unresolved reference to '%@' in file '%@'\n", + logFile, nameLink, fileLink]; + fprintf (stderr, [m lossyCString]); + } + } + } + + /* Now build up the final relocated link, and return it. */ + if (nameLink != nil) + { + return [NSString stringWithFormat: @"%@#%@", relocatedFileLink, + nameLink]; + } + else + { + return relocatedFileLink; + } +} + +@end + +static void print_help_and_exit () +{ + printf ("GNUstep HTMLLinker (gnustep-base version %d.%d.%d)\n", + GNUSTEP_BASE_MAJOR_VERSION, + GNUSTEP_BASE_MINOR_VERSION, + GNUSTEP_BASE_SUBMINOR_VERSION); + printf ("Usage: HTMLLinker [options] input_files [--Destinations destination_files]\n"); + printf (" `options' include:\n"); + printf (" --help: print this message;\n"); + printf (" --version: print version information;\n"); + printf (" -Verbose YES: print verbose messages;\n"); + printf (" -CheckLinks YES: check links as they are fixed up;\n"); + exit (0); +} + +static void print_version_and_exit () +{ + printf ("GNUstep HTMLLinker (gnustep-base version %d.%d.%d)\n", + GNUSTEP_BASE_MAJOR_VERSION, + GNUSTEP_BASE_MINOR_VERSION, + GNUSTEP_BASE_SUBMINOR_VERSION); + exit (0); +} + +int main (int argc, char** argv, char** env) +{ + CREATE_AUTORELEASE_POOL(pool); + NSUserDefaults *userDefs; + NSArray *args; + NSMutableArray *inputFiles; + unsigned i, count; + BOOL verbose, checkLinks; + HTMLLinker *linker; + BOOL destinations; + +#ifdef GS_PASS_ARGUMENTS + [NSProcessInfo initializeWithArguments:argv count:argc environment:env]; +#endif + + userDefs = [NSUserDefaults standardUserDefaults]; + + verbose = [userDefs boolForKey: @"Verbose"]; + checkLinks = [userDefs boolForKey: @"CheckLinks"]; + + linker = [[HTMLLinker alloc] initWithVerboseFlag: verbose + checkLinksFlag: checkLinks]; + + /* All non-options on the command line are: + + input files if they come before --Destinations + + destination files if they come after --Destinations + + */ + args = [[NSProcessInfo processInfo] arguments]; + + count = [args count]; + + destinations = NO; + inputFiles = AUTORELEASE ([NSMutableArray new]); + + for (i = 1; i < count; i++) + { + NSString *arg = [args objectAtIndex: i]; + + if ([arg hasPrefix: @"--"]) + { + if ([arg isEqualToString: @"--help"]) + { + print_help_and_exit (); + } + else if ([arg isEqualToString: @"--version"]) + { + print_version_and_exit (); + } + else if ([arg isEqualToString: @"--Destinations"]) + { + /* Next file names to be interpreted as destination + files. */ + destinations = YES; + + } + else + { + /* Ignore it for future expansions. */ + } + } + else if ([arg hasPrefix: @"-"]) + { + /* A GNUstep default - skip it and the next argument. */ + if ((i + 1) < count) + { + i++; + continue; + } + } + else + { + if (destinations) + { + DestinationFile *d; + + if (![arg isAbsolutePath]) + { + /* Not sure what to do here ... will think about it + tomorrow. */ + NSLog (@"Warning - %@ is not an absolute filename!", arg); + } + + d = [[DestinationFile alloc] initWithFullName: arg + pathOnDisk: arg]; + [linker registerFile: d]; + RELEASE (d); + } + else + { + [inputFiles addObject: arg]; + } + } + } + + count = [inputFiles count]; + + if (count == 0) + { + NSLog (@"No input files specified."); + } + + + for (i = 0; i < count; i++) + { + NSString *inputFile; + NSString *inputFileContents; + HTMLParser *parser; + + inputFile = [inputFiles objectAtIndex: i]; + inputFileContents = [NSString stringWithContentsOfFile: inputFile]; + + parser = [[HTMLParser alloc] initWithCode: inputFileContents]; + inputFileContents = [parser resolveLinksUsingHTMLLinker: linker + logFile: inputFile]; + [inputFileContents writeToFile: inputFile + atomically: YES]; + RELEASE (parser); + } + + RELEASE (linker); + RELEASE (pool); + + return 0; +}