From 5c9bd4ea8c4eba63176fcc4952aca0eea108f113 Mon Sep 17 00:00:00 2001 From: fedor Date: Fri, 13 Apr 2007 03:42:07 +0000 Subject: [PATCH] Re-add tool git-svn-id: svn+ssh://svn.gna.org/svn/gnustep/libs/base/trunk@25013 72102866-910b-0410-8b05-ffd578937521 --- ChangeLog | 2 + Tools/GNUmakefile | 3 +- Tools/HTMLLinker.m | 1767 ++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 1771 insertions(+), 1 deletion(-) create mode 100644 Tools/HTMLLinker.m diff --git a/ChangeLog b/ChangeLog index 8bccdb305..1eefff1b8 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,5 +1,7 @@ 2007-04-12 Adam Fedor + * Tool/HTMLLinker.m: Re-add. + * Documentation/coding-standards.texi: Add info section * Documentation/manual/manual.texi: Likewise (patch from Marco Bardelli). diff --git a/Tools/GNUmakefile b/Tools/GNUmakefile index 23ff6b12a..7965f4436 100644 --- a/Tools/GNUmakefile +++ b/Tools/GNUmakefile @@ -62,7 +62,7 @@ ifeq ($(add),yes) TOOL_NAME = autogsdoc cvtenc plmerge sfparse xmlparse else TOOL_NAME = autogsdoc cvtenc gdnc gspath defaults pl plmerge \ - plparse sfparse pldes plget plser pl2link xmlparse + plparse sfparse pldes plget plser pl2link xmlparse HTMLLinker CTOOL_NAME = gdomap SUBPROJECTS = make_strings @@ -90,6 +90,7 @@ sfparse_OBJC_FILES = sfparse.m pl2link_OBJC_FILES = pl2link.m locale_alias_OBJC_FILES = locale_alias.m xmlparse_OBJC_FILES = xmlparse.m +HTMLLinker_OBJC_FILES = HTMLLinker.m DOCUMENT_NAME = autogsdoc diff --git a/Tools/HTMLLinker.m b/Tools/HTMLLinker.m new file mode 100644 index 000000000..d51bfc72d --- /dev/null +++ b/Tools/HTMLLinker.m @@ -0,0 +1,1767 @@ +/** The GNUstep HTML Linker + + HTMLLinker. A tool to fix up href references in html files + Copyright (C) 2002,2007 Free Software Foundation, Inc. + + Written by: Nicola Pero + Date: January 2002 + + This file is part of the GNUstep Project + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License + as published by the Free Software Foundation; either version 2 + of the License, or (at your option) any later version. + + You should have received a copy of the GNU General Public + License along with this program; see the file COPYING.LIB. + If not, write to the Free Software Foundation, + 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + + + The HTMLLinker tool +
+ Introduction +

+ The GNUstep HTML linker is able to fixup links from one HTML + document to other HTML ones. By link we mean the standard + <a href="NSString.html#DescriptionOfNSString"> + tag. By fixing up a link we mean to modify the path in the + href so that it points to the actual file on disk. + For example, if you the DescriptionOfNSString + location is in the file NSStringOverview.html in the + directory /home/nicola/Doc, when the linker fixes up + the <a + href="NSString.html#DescriptionOfNSString"> link, it + will replace it with <a + href="/home/nicola/Doc/NSStringOverview.html#DescriptionOfNSString">. + Please note that when fixing up the link, the linker modifies both + the path and the file name that the link points to, but not the + location inside the file (the DescriptionOfNSString + in the example). + +

Practical Usage of the linker

+ + The typical usage of the linker is with maintaining + cross-references in software documentation. You need to establish + some sort of convention used by all your software documentation + for the link names. For example, suppose that your documentation + is about C libraries. For each C function, you might decide to + tag its documentation in the files with the name + function$function_name. For example, the place in + the doc where it documents the start_library() + function would have the HTML tag <a + name="function$start_library">. Having established this + convention, in any HTML file in your documentation in which you + want to create a link to the documentation for the + start_library() function, you use the code + <a rel="dynamic" + href="#function$start_library"> (please note that you + ignore the problem of locating the actual file which contains the + documentation for the start_library() function, that + is precisely what the linker will do for you). Whenever you + install the documentation for a new project, you first create a + relocation file for the project documentation, by running +
+      HTMLLinker -BuildRelocationFileForDir Documentation
+    
+ if for example the project documentation is in the + Documentation subdirectory. This will create a + Documentation/table.htmlink file, which contains a + list of all names found in the project documentation, and for each + of them, the file in which it's found. Then, you install the + project documentation (say for example that it's installed into + /opt/gnustep/Local/Documentation/MyProject), and once + it's installed, you can run the linker to update all links so that + they point to the actual files +
+      HTMLLinker /opt/gnustep/Local/Documentation/MyProject \
+              -l /opt/gnustep/Local/Documentation/MyProject \
+              -l /opt/gnustep/Local/Documentation/MyOtherProject
+    
+ This will fixup all links in MyProject's HTML files + by using the relocation files of both MyProject and + MyOtherProject, so all links to anything which is + documented inside those files will be generated correctly. + +

Usage of the tool with autogsdoc

+ + You can use the tool with documentation generated by autogsdoc to + perform the linking (or to relink it). Make sure to use the option + -LinksMarker gsdoc because autogsdoc marks the links + to be fixed up by the linker by using rel="gsdoc". + +

Specification

+ +

Modes of operation

+ The HTML linker works in two phases: + +
    + +
  • The first (called generation of the relocation + table) preprocesses a given set of HTML files so that it can + be the destination of links. It builds a relocation table for + the given set of HTML files. This relocation table simply maps + all names (as in <a name="xxx">) in the files + to the file in which the name is found. The HTML files are not + touched. The linker is able to merge this dynamically generated + relocation table with pregenerated relocation tables loaded from + files (called relocation files). +
  • + +
  • The second (called linking) links a given file to + the available HTML files on disk, by using the relocation table + to modify the HTML links in the file so that they point to + existing files. +
  • + +
+ + The HTML linker can also be run in a special mode, to generate a + relocation file for later reuse. In this mode, the HTML linker + will build the relocation table for all files in a directory, then + save the relocation table into a table.htmlink file + in that directory for later reuse. + + There are three kinds of files: + +
    + +
  • + input files: these are HTML files which are modified + as a consequence of linking; they have their links fixed up. +
  • + +
  • + destination files: these are HTML files which are + read to produce relocation tables. +
  • + +
  • + relocation files: these files are not HTML files - + they are only created and read by the linker (unless you have + a tool which can manage them), and are in a specific - very + simple - format. They are used to save relocation information + for later reuse, so that the linker can run faster. Normally, + they have a .htmlink extension. +
  • + +
+ +

Linker behaviour

+ + The linker keeps a main relocation table, which is empty at the + beginning. When run, the linker performs the following steps: + +
    +
  1. + the linker reads and parses all relocation files specified on + the command line, and merges the relocation tables found there + into the main relocation table. +
  2. +
  3. + the linker reads and parses all destination files specified on + the command line, and builds a relocation table for them, + merging it into the main relocation table. +
  4. +
  5. + if any input files are specified on the command line, the + linker links the files using the relocation table. +
  6. +
+ +

Specifying input, destination and relocation files

+ + All command line arguments which do not begin with a hypen + (-), and which are not the values of defaults (for + example, not the YES in -Warn YES, + because that is the value of the default -Warn), are + interpreted as input files. Each destination file is specified by + using a -d option, and each relocation file by using + a -l option. If a directory is specified as an input + (or destination) file, the linker will recurse into the directory + and add to the list of input (or destination) files all files in + the directory (and in the directory's subdirectories, no matter + how deeply nested) which have one of the following extensions: + .html, .HTML, .htm or + .HTM. If a directory is specified as a relocation + file, the linker will add to the list of relocation files all + files in the directory which have the extension + .htmlink. A typical invocation of the linker is as + follows: +
+      HTMLLinker -BuildRelocationFileForDir Doc
+    
+ Builds a relocation file for the documentation in the + directory Doc. After this has been done, the + directory Doc can be used as a -l + argument. +
+      HTMLLinker test.html -l Doc
+    
+ Links the file test.html using the relocation file + just generated in the Doc directory. + +

What is a link

+ + A link is an anchor tag with and href, such as + <a href="dest.html#location">. The destination + file of the link is the file specified in the href; + dest.html in the example. The destination file is + ignored by the linker; the name of the link (which is everything which + follows the #) is used to perform the linking. + +

Which links are fixed up

+ + Normally, the linker will only fixup links which have the + rel attribute set to dynamic, as in the + following example: <a href="nicola.html" + rel="dynamic">. In this way, you can specify in your + HTML document which links you want to be fixed up, and which you + don't want to be. You can change the type of links to be fixed up + by using the -LinksMarker options, as in + -LinksMarker gsdoc, which causes the linker to fixup + all links with the rel attribute set to + gsdoc rather than dynamic. In certain + situations you might want to force the linker to attempt to fixup + all links; you can run the linker with the -FixupAllLinks + YES option to cause this behaviour. As a special + exception, links which obviously are not to be fixed up, such as + links beginning with mailto: or news:, or links + without a name, are never fixed up. + +

How links are fixed up

+ + When the HTML linker encounters a link which needs to be fixed up + (say <a href="dest.html#location">), it + searches the relocation table for a destination file which + contains the location name. If no such file is + found, the HTML linker emits a warning, and replaces the link in + the file with a link to the destination without the filename. In + the example, it would simply emit <a + href="#location">. If the destination file is found in + the list, instead, the HTML linker replaces the link with the full + path to the destination file on disk. For example, if - according + to the relocation table, the file + /home/nicola/Doc/dest.html contains the name + location, the HTML linker will fixup the link to be + <a href="/home/nicola/Doc/dest.html#location"> + (as a special exception, if there is a path mapping which matches + the path to the destination file, it's applied to the path in the + link. See below for a detailed explanation of path mappings). + It's important to notice that you must have unique link names for + the linker to work properly. For example, if you have two + different destination files containing the same name, say + NSObject.html and NSString.html both + containing the name init, then the linker can't + resolve <a href="#init">, because it has no way + to know if you meant the link to point to the first or the second + destination file! You should choose names better so that they + uniquely specify what they represent contents, for example + NSObject_i_init and NSString_i_init if + the first link is in the place documenting the -init + method of the NSObject class and the second one the one of the + NSString class. Then all links will clearly refer to one place or + the other one, and no confusion will arise. If there are multiple + destination files for a link, the linker will guess which one is + the right one, and that might not give the desired result. + +

How links are checked

+ + When a link is fixed up, the linker implicitly checks that the link + is correct, because if the link name can't be found in the relocation + tables, a warning is issued. + +

Path mappings

+ + Path mappings are an additional feature of the HTML linker which + can be used when exporting documentation to be served by a web + server. If you are not putting your documentation on a web server + but simply reading it from the filesystem, then you don't need the + path mappings. The issue with exporting documentation to a web + server is that you refer to files using paths which are not + necessarily the same paths where the files are on disk. For + example, suppose that you have some HTML documentation in + /opt/doc/base and some other HTML documentation in + /opt/doc/gui. The HTML files in the two + documentation directories refer to each other. You can run the + HTML linker and fixup all links, and we are happy. But now + suppose that you set up a web server; the web server, for example, + will serve URLs beginning with /Base (meaning as in + requests from a browser of the form + http://www.server.org/Base) by taking files from + /opt/doc/base, and URLs beginning with + Gui by taking files from /opt/doc/gui. + To fixup the links in this case, you need path mappings. A path + mapping specifies that a certain directory on disk is to be + referred in some different way in links. In the example, you + would pass +
+     -PathMapping '{ "/opt/doc/base"="/Base"; "/opt/doc/gui"="/Gui"; }'
+    
+ to the linker. + + Each path mapping maps a path on disk to a virtual + path. For example, it maps the path on disk + /opt/doc/base to the virtual path /Base. + Each time the linker fixes up a link, after finding the + destination file, it checks the list of path mappings. If the + path to the destination file begins with the path on disk + of one of the path mappings, then that path on disk is + replaced with the corresponding virtual path in the path + to the destination file before the path to the destination file is + written out in the link. + + For example, if you have the path mapping explained above, and if + the linker is fixing up the link <a + href="hi.html#nicola">, where the destination file is + /opt/doc/base/nicola/hi.html, then the destination + path matches the path mapping for /opt/doc/base, so + the path mapping is applied and the link is fixed up to be + <a href="/Base/nicola/hi.html#nicola"> rather than + <a href="/opt/doc/base/nicola/hi.html#nicola"> as it + would normally have been without the path mapping. + +

Specifying path mappings

+ +
On the command line
+ Each path mapping specifies a mapping of a path on disk to a web + server alias. The first way to specify the mappings is on the + command line, in the form of a dictionary argument to the + -PathMappings, as in +
+     -PathMappings '{ "/opt/doc/base"="/Base"; "/opt/doc/gui"="/Gui"; }'
+    
+ where /opt/doc/base and /opt/doc/gui are + the paths on disk and /Base and /Gui are + the corresponding web server URL paths. + +
In a path mappings file
+ The other way to specify mappings is to write them into a file, + in the format of a dictionary, as, for example, in a file containing + the following lines +
+      { 
+        "/opt/doc/base"="/Base"; 
+        "/opt/doc/gui"="/Gui"; 
+      }
+    
+ and then tell the linker to read the path mappings from that file, + by giving the filename as option to the + -PathMappingsFile. For example, if the file + containing the mappings is called mappings, then you need + to pass +
+      -PathMappingsFile mappings
+    
+ to the linker to have it read mappings from the file. + +
Command line path mappings override file path mappings
+ Both command line path mappings and path mappings from a file can + be used at the same time; in case of conflict, command line path + mappings override path mappings from the file. + +

Summary of all the options

+ + Each of the options beginning with a single hypen (-) + require an argument, as in +
+      HTMLLinker Documentation -LinksMarker gsdoc -d Documentation
+    
+ which sets LinksMarker to gsdoc. The + options might be anywhere on the command line. Options which do + not begin with a single hypen (such as --help) do not + require an argument, as in +
+      HTMLLinker --help
+    
+ +

-d

+ + Followed by a destination HTML file, or a directory containing + destination HTML files. + +

-l

+ + Followed by a relocation file, or a directory containing relocation files. + +

-FixupAllLinks

+ + If set to NO (the default) only links containing the + rel attribute set to dynamic (or + whatever specified as LinksMarkers)are fixed up in + the input files. If set to YES, all links are fixed + up. + +

-LinksMarker

+ + If set (and if FixupAllLinks is NO), + only links with the rel attribute set to its value + are processed. By default it is set to dynamic. + +

-PathMappings

+ + If set to a dictionary, read the dictionary as path mappings. See + above for more details of path mappings. + +

-PathMappingsFile

+ + If set to a string, consider it to be the name of a file; read + path mappings from that file. The file must contain the path + mappings in the form of a dictionary. See above for more details + on path mappings. + +

-Verbose

+ + If set to YES prints some more messages than if set + to NO (the default). + +

--help

+ + Prints a quick explanation of the command line syntax and exits. + +

--version

+ + Prints the version and exits. + +
+
Nicola Pero
+ + +Last modified: Sun Jan 6 22:54:58 GMT 2002 + + + + + + */ + +/* + * See the HTMLLinker.html file for documentation on how to use the tool. + */ + +#include + +/* For convenience, cached for the whole tool. */ + +/* [NSFileManager defaultManager] */ +static NSFileManager *fileManager = nil; + +/* [[NSFileManager defaulManager] currentDirectoryPath] */ +static NSString *currentPath = nil; + +static int verbose = 0; + +/* Enumerate all .html (or .htmlink) files in a directory and + subdirectories. */ +@interface HTMLDirectoryEnumerator : NSEnumerator +{ + NSDirectoryEnumerator *e; + NSString *basePath; + BOOL looksForHTMLLinkFiles; + BOOL returnsAbsolutePaths; +} + +- (id)initWithBasePath: (NSString *)path; + +- (void)setReturnsAbsolutePaths: (BOOL)flag; + +- (void)setLooksForHTMLLinkFiles: (BOOL)flag; + +@end + +@implementation HTMLDirectoryEnumerator : NSEnumerator + +- (id)initWithBasePath: (NSString *)path +{ + ASSIGN (e, [fileManager enumeratorAtPath: path]); + ASSIGN (basePath, path); + return [super init]; +} + +- (void)dealloc +{ + RELEASE (e); + RELEASE (basePath); + [super dealloc]; +} + +- (void)setReturnsAbsolutePaths: (BOOL)flag +{ + returnsAbsolutePaths = flag; +} + +- (void)setLooksForHTMLLinkFiles: (BOOL)flag +{ + looksForHTMLLinkFiles = YES; +} + +- (id)nextObject +{ + NSString *s; + + while ((s = [e nextObject]) != nil) + { + BOOL found = NO; + NSString *extension = [s pathExtension]; + + if (looksForHTMLLinkFiles) + { + if ([extension isEqualToString: @"htmlink"]) + { + found = YES; + } + } + else if ([extension isEqualToString: @"html"] + || [extension isEqualToString: @"HTML"] + || [extension isEqualToString: @"htm"] + || [extension isEqualToString: @"HTM"]) + { + found = YES; + } + + if ([[[e fileAttributes] fileType] isEqual: NSFileTypeDirectory] + && verbose) + { + GSPrintf(stdout, @" traversing %@\n", s); + } + + if (found) + { + if (returnsAbsolutePaths) + { + /* NSDirectoryEnumerator returns the relative path, we + return the absolute. */ + return [basePath stringByAppendingPathComponent: s]; + } + else + { + return s; + } + } + } + + return nil; +} + +@end + +/* The HTMLLinker class is very simple and is the core of the linker. + It just keeps a relocation, and is able to fixup a link by using + the relocation table. */ +@interface HTMLLinker : NSObject +{ + BOOL warn; + BOOL hasPathMappings; + NSMutableDictionary *pathMappings; + NSMutableDictionary *relocationTable; +} + +- (id)initWithWarnFlag: (BOOL)v; + +- (void)registerRelocationFile: (NSString *)pathOnDisk; + +- (void)registerDestinationFile: (NSString *)pathOnDisk; + +/* Register a new path mapping. */ +- (void)registerPathMappings: (NSDictionary *)dict; + +/* Resolve the link 'link' by fixing it up using the relocation table. + Return the resolved link. 'logFile' is only used to print error + messages. It is the file in which the link is originally found; if + there is problem resolving the link, the warning message printed + out states that the problem is in file 'logFile'. */ +- (NSString *)resolveLink: (NSString *)link + logFile: (NSString *)logFile; + +@end + +/* All the parsing code is in the following class. It's not a real + parser in the sense that it is just performing its minimal duty in + the quickest possible way, so calling this a parser is a bit of a + exaggeration ... this code can run very quickly through an HTML + string, extracting the tags or fixing up the tags. No more HTML parsing than this + is done. Remarkably, this does not need XML support in the base + library, so you can use the HTML linker on any system. This class + was written in order to perform its trivial, mechanical duty /very + fast/. You want to be able to run the linker often and on a lot of + files and still be happy. FIXME - Need to implement support for + newer HTML where you can use id="name" in any tag. */ +@interface HTMLParser : NSObject +{ + /* The HTML code that we work on. */ + unichar *chars; + unsigned length; +} +/* Init with some HTML code to parse. */ +- (id)initWithCode: (NSString *)HTML; + +/* Extract all the tags from the HTML code, and return + a list of them. */ +- (NSArray *)names; + +/* Fix up all the links in the HTML code by feeding each of them to + the provided HTMLLinker; return the fixed up HTML code. If + linksMarker is nil, attempts to fix up all links in the HTML code; + if it is not-nil, only attempt to fixup links with rel=marker. + logFile is the file we are fixing up; it's only used when a warning + is issued because there is problem in the linking - the warning + message is displayed as being about links in the file logFile. */ +- (NSString *)resolveLinksUsingHTMLLinker: (HTMLLinker *)linker + logFile: (NSString *)logFile + linksMarker: (NSString *)marker; +@end + + +@implementation HTMLParser + +- (id)initWithCode: (NSString *)HTML +{ + length = [HTML length]; + chars = malloc (sizeof(unichar) * length); + [HTML getCharacters: chars]; + + return [super init]; +} + +- (void)dealloc +{ + free (chars); + [super dealloc]; +} + +- (NSArray *)names +{ + NSMutableArray *names = AUTORELEASE ([NSMutableArray new]); + unsigned i = 0; + + while (i + 3 < length) + { + /* We ignore anything except stuff which begins with "') { break; } + + + /* I suppose i == s might happen if the file contains */ + if (i != s) + { + /* If name != nil we already found it so don't bother. */ + if (name == nil) + { + NSString *attribute; + + attribute = [NSString stringWithCharacters: &chars[s] + length: (i - s)]; + /* Lowercase name so that eg, HREF and href are the + same. */ + attribute = [attribute lowercaseString]; + + if ([attribute isEqualToString: @"name"]) + { + isNameAttribute = YES; + } + } + } + + /* Skip spaces. */ + while (i < length && (chars[i] == ' ' + || chars[i] == '\n' + || chars[i] == '\r' + || chars[i] == '\t')) + { i++; } + + if (i == length) { break; } + + /* Read the '=' */ + if (chars[i] == '=') + { + i++; + } + else + { + /* No '=' -- go on with the next attribute. */ + continue; + } + + if (i == length) { break; } + + /* Skip spaces. */ + while (i < length && (chars[i] == ' ' + || chars[i] == '\n' + || chars[i] == '\r' + || chars[i] == '\t')) + { i++; } + + if (i == length) { break; } + + /* Read the value. */ + if (chars[i] == '"') + { + /* Skip the '"', then read up to a '"'. */ + i++; + if (i == length) { break; } + + s = i; + + while (i < length && (chars[i] != '"')) + { i++; } + } + else if (chars[i] == '\'') + { + /* Skip the '\'', then read up to a '\''. */ + i++; + if (i == length) { break; } + + s = i; + + while (i < length && (chars[i] != '\'')) + { i++; } + } + else + { + /* Read up to a space or '>'. */ + s = i; + + while (i < length + && (chars[i] != ' ' + && chars[i] != '\n' + && chars[i] != '\r' + && chars[i] != '\t' + && chars[i] != '>')) + { i++; } + } + + if (name == nil && isNameAttribute) + { + if (i == s) + { + /* I suppose this might happen if the file + contains */ + name = @""; + } + else + { + name = [NSString stringWithCharacters: &chars[s] + length: (i - s)]; + } + } + } + + if (name != nil) + { + [names addObject: name]; + } + } + i++; + } + + return names; +} + + +- (NSString *)resolveLinksUsingHTMLLinker: (HTMLLinker *)linker + logFile: (NSString *)logFile + linksMarker: (NSString *)marker +{ + /* We represent the output as a linked list. Each element in the + linked list represents a string; concatenating all the strings in + the linked list, you obtain the output. The trick is that these + strings in the linked list might actually be pointers inside the + chars array ... we are never copying stuff from the chars array - + just keeping pointers to substrings inside it - till we generate + the final string at the end ... for speed and efficiency reasons + of course. */ + struct stringFragment + { + unichar *chars; + unsigned length; + BOOL needsFreeing; + struct stringFragment *next; + } *head, *tail; + + /* The index of the beginning of the last string fragment (the tail). */ + unsigned tailIndex = 0; + + /* The temporary index. */ + unsigned i = 0; + + /* The total number of chars in the output string. We don't know + this beforehand because each time we fix up a link, we might add + or remove characters from the output. We update + totalNumberOfChars each time we close a stringFragment. */ + unsigned totalNumberOfChars = 0; + + + /* Initialize the linked list. */ + head = malloc (sizeof (struct stringFragment)); + head->chars = chars; + head->length = 0; + head->needsFreeing = NO; + head->next = NULL; + + /* The last string fragment is the first one at the beginning. */ + tail = head; + + while (i + 3 < length) + { + /* We ignore anything except stuff which begins with "') { break; } + + + /* I suppose i == s might happen if the file contains */ + if (i != s) + { + /* If href != nil && rel != nil we already found it + so don't bother. */ + if (href == nil || rel == nil) + { + NSString *attribute; + + attribute = [NSString stringWithCharacters: &chars[s] + length: (i - s)]; + /* Lowercase name so that eg, HREF and href are the + same. */ + attribute = [attribute lowercaseString]; + + if (href == nil + && [attribute isEqualToString: @"href"]) + { + isHrefAttribute = YES; + } + else if (rel == nil + && [attribute isEqualToString: @"rel"]) + { + isRelAttribute = YES; + } + } + } + + /* Skip spaces. */ + while (i < length && (chars[i] == ' ' + || chars[i] == '\n' + || chars[i] == '\r' + || chars[i] == '\t')) + { i++; } + + if (i == length) { break; } + + /* Read the '=' */ + if (chars[i] == '=') + { + i++; + } + else + { + /* No '=' -- go on with the next attribute. */ + continue; + } + + if (i == length) { break; } + + /* Skip spaces. */ + while (i < length && (chars[i] == ' ' + || chars[i] == '\n' + || chars[i] == '\r' + || chars[i] == '\t')) + { i++; } + + if (i == length) { break; } + + /* Read the value. */ + if (isHrefAttribute) + { + /* Remeber that href starts here. */ + hrefStart = i; + } + + if (chars[i] == '"') + { + /* Skip the '"', then read up to a '"'. */ + i++; + if (i == length) { break; } + + s = i; + + while (i < length && (chars[i] != '"')) + { i++; } + + if (isHrefAttribute) + { + /* Remeber that href ends here. We don't want + the ending " because we already insert those + by our own. */ + hrefEnd = i + 1; + } + } + else if (chars[i] == '\'') + { + /* Skip the '\'', then read up to a '\''. */ + i++; + if (i == length) { break; } + + s = i; + + while (i < length && (chars[i] != '\'')) + { i++; } + + if (isHrefAttribute) + { + hrefEnd = i + 1; + } + } + else + { + /* Read up to a space or '>'. */ + s = i; + + while (i < length + && (chars[i] != ' ' + && chars[i] != '\n' + && chars[i] != '\r' + && chars[i] != '\t' + && chars[i] != '>')) + { i++; } + if (isHrefAttribute) + { + /* We do want the ending space. */ + hrefEnd = i; + } + } + + if (i == length) + { + break; + } + + if (hrefEnd >= length) + { + hrefEnd = length - 1; + } + + if (isRelAttribute) + { + if (i == s) + { + /* I suppose this might happen if the file + contains */ + rel = @""; + } + else + { + rel = [NSString stringWithCharacters: &chars[s] + length: (i - s)]; + } + } + + if (isHrefAttribute) + { + if (i == s) + { + /* I suppose this might happen if the file + contains */ + href = @""; + } + else + { + href = [NSString stringWithCharacters: &chars[s] + length: (i - s)]; + } + } + } + if (href != nil && ((marker == nil) + || [rel isEqualToString: marker])) + { + /* Ok - fixup the link. */ + NSString *link; + struct stringFragment *s; + + link = [linker resolveLink: href logFile: logFile]; + + /* Add " before and after the link. */ + link = [NSString stringWithFormat: @"\"%@\"", link]; + + /* Close the previous string fragment at hrefStart. */ + tail->length = hrefStart - tailIndex; + + totalNumberOfChars += tail->length; + + /* Insert immediately afterwards a string fragment containing + the fixed up link. */ + s = malloc (sizeof (struct stringFragment)); + s->length = [link length]; + + s->chars = malloc (sizeof(unichar) * s->length); + [link getCharacters: s->chars]; + + s->needsFreeing = YES; + s->next = NULL; + + tail->next = s; + tail = s; + + totalNumberOfChars += tail->length; + + /* Now prepare the new tail to start just after the end + of the original href in the original HTML code. */ + s = malloc (sizeof (struct stringFragment)); + s->length = 0; + s->chars = &chars[hrefEnd]; + s->needsFreeing = NO; + s->next = NULL; + tail->next = s; + tail = s; + + tailIndex = hrefEnd; + } + } + i++; + } + + /* Close the last open string fragment. */ + tail->length = length - tailIndex; + totalNumberOfChars += tail->length; + + /* Generate the output. */ + { + /* Allocate space for the whole output in a single chunk now that + we know how big it should be. */ + unichar *outputChars = malloc (sizeof(unichar) * totalNumberOfChars); + unsigned j = 0; + + /* Copy into the output all the string fragments, destroying each + of them as we go on. */ + while (head != NULL) + { + struct stringFragment *s; + + memcpy (&outputChars[j], head->chars, + sizeof(unichar) * head->length); + + j += head->length; + + if (head->needsFreeing) + { + free (head->chars); + } + + s = head->next; + free (head); + head = s; + } + + return [NSString stringWithCharacters: outputChars + length: totalNumberOfChars]; + } +} + +@end + + +@implementation HTMLLinker + +- (id)initWithWarnFlag: (BOOL)v +{ + warn = v; + relocationTable = [NSMutableDictionary new]; + pathMappings = [NSMutableDictionary new]; + return [super init]; +} + +- (void)dealloc +{ + RELEASE (relocationTable); + RELEASE (pathMappings); + [super dealloc]; +} + +- (void)registerRelocationFile: (NSString *)pathOnDisk +{ + /* We only accept absolute paths. */ + if (![pathOnDisk isAbsolutePath]) + { + pathOnDisk = [currentPath stringByAppendingPathComponent: pathOnDisk]; + } + + /* Check if it's a directory; if it is, enumerate all .htmlink files + inside it, and add all of them. */ + { + BOOL isDir; + + if (![fileManager fileExistsAtPath: pathOnDisk isDirectory: &isDir]) + { + NSLog (@"Warning - relocation file '%@' not found - ignored", + pathOnDisk); + return; + } + else + { + if (isDir) + { + HTMLDirectoryEnumerator *e; + NSString *filename; + + e = [HTMLDirectoryEnumerator alloc]; + e = [e initWithBasePath: pathOnDisk]; + [e setLooksForHTMLLinkFiles: YES]; + [e setReturnsAbsolutePaths: YES]; + + while ((filename = [e nextObject]) != nil) + { + [self registerRelocationFile: filename]; + } + return; + } + } + } + + /* Now, read the mappings in the file. */ + { + NSString *file = [NSString stringWithContentsOfFile: pathOnDisk]; + NSString *path = [pathOnDisk stringByDeletingLastPathComponent]; + NSDictionary *d = [file propertyList]; + NSEnumerator *e = [d keyEnumerator]; + NSString *name; + + while ((name = [e nextObject]) != nil) + { + NSString *v = [d objectForKey: name]; + NSString *filePath; + + filePath = [path stringByAppendingPathComponent: v]; + + if (hasPathMappings) + { + /* Manage pathMappings: try to match any of the + pathMappings against pathOnDisk, and perform the path + mapping if we can match. */ + NSEnumerator *en = [pathMappings keyEnumerator]; + NSString *key; + while ((key = [en nextObject])) + { + if ([filePath hasPrefix: key]) + { + NSString *value = [pathMappings objectForKey: key]; + + filePath = [filePath substringFromIndex: [key length]]; + filePath = [value stringByAppendingPathComponent: + filePath]; + break; + } + } + } + + [relocationTable setObject: filePath forKey: name]; + } + } +} + + +- (void)registerDestinationFile: (NSString *)pathOnDisk +{ + NSString *fullPath = pathOnDisk; + + /* We only accept absolute paths. */ + if (![pathOnDisk isAbsolutePath]) + { + pathOnDisk = [currentPath stringByAppendingPathComponent: pathOnDisk]; + } + + /* Check if it's a directory; if it is, enumerate all HTML files + inside it, and add all of them. */ + { + BOOL isDir; + + if (![fileManager fileExistsAtPath: pathOnDisk isDirectory: &isDir]) + { + NSLog (@"Warning - destination file '%@' not found - ignored", + pathOnDisk); + return; + } + else + { + if (isDir) + { + HTMLDirectoryEnumerator *e; + NSString *filename; + + e = [HTMLDirectoryEnumerator alloc]; + e = [e initWithBasePath: pathOnDisk]; + [e setReturnsAbsolutePaths: YES]; + + while ((filename = [e nextObject]) != nil) + { + [self registerDestinationFile: filename]; + } + return; + } + } + } + + if (hasPathMappings) + { + /* Manage pathMappings: try to match any of the pathMappings + against pathOnDisk, and perform the path mapping if we can + match. */ + NSEnumerator *e = [pathMappings keyEnumerator]; + NSString *key; + + while ((key = [e nextObject])) + { + if ([pathOnDisk hasPrefix: key]) + { + NSString *value = [pathMappings objectForKey: key]; + + fullPath = [pathOnDisk substringFromIndex: [key length]]; + fullPath = [value stringByAppendingPathComponent: fullPath]; + break; + } + } + } + + /* Now, read all the names from the file. */ + { + NSString *file = [NSString stringWithContentsOfFile: pathOnDisk]; + HTMLParser *p = [[HTMLParser alloc] initWithCode: file]; + NSArray *names = [p names]; + unsigned i, count; + + RELEASE (p); + + count = [names count]; + + for (i = 0; i < count; i++) + { + NSString *name = [names objectAtIndex: i]; + [relocationTable setObject: fullPath forKey: name]; + } + } +} + +- (void)registerPathMappings: (NSDictionary *)dict +{ + NSEnumerator *e = [dict keyEnumerator]; + NSString *key; + + while ((key = [e nextObject])) + { + NSString *value = [dict objectForKey: key]; + [pathMappings setObject: value forKey: key]; + } + hasPathMappings = YES; +} + +- (NSString *)resolveLink: (NSString *)link + logFile: (NSString *)logFile +{ + NSString *fileLink; + NSString *nameLink; + NSString *relocatedFileLink; + NSString *file; + + /* Do nothing if this is evidently *not* a dynamical link to fixup. */ + if ([link hasPrefix: @"mailto:"] || [link hasPrefix: @"news:"]) + { + return link; + } + + { + /* Break the link string into fileLink (everything which is before + the `#'), and nameLink (everything which is after the `#', `#' + not included). For example, if link is + 'NSObject_Class.html#isa', then fileLink is + 'NSObject_Class.html' and nameLink is 'isa'. */ + + /* Look for the #. */ + NSRange hashRange = [link rangeOfString: @"#"]; + + if (hashRange.location == NSNotFound) + { + fileLink = link; + nameLink = nil; + } + else + { + fileLink = [link substringToIndex: hashRange.location]; + + if (hashRange.location + 1 < [link length]) + { + nameLink = [link substringFromIndex: (hashRange.location + 1)]; + } + else + { + nameLink = nil; + } + } + } + + /* Now lookup nameLink. */ + + /* If it's "", it is not something we can fixup. */ + if (nameLink == nil || [nameLink isEqualToString: @""]) + { + relocatedFileLink = fileLink; + } + else + { + /* Now simply look it up in our relocation table. */ + file = [relocationTable objectForKey: nameLink]; + + /* Not found - leave it unfixed. */ + if (file == nil) + { + if (warn && [fileLink length] > 0) + { + GSPrintf(stderr, @"%@: Unresolved reference to '%@'\n", + logFile, nameLink); + } + + relocatedFileLink = fileLink; + } + else + { + relocatedFileLink = file; + } + } + + /* Now build up the final relocated link, and return it. */ + if (nameLink != nil) + { + return [NSString stringWithFormat: @"%@#%@", relocatedFileLink, + nameLink]; + } + else + { + return relocatedFileLink; + } +} + +@end + +static NSDictionary * +build_relocation_table_for_directory (NSString *dir) +{ + BOOL isDir; + + if (verbose) + GSPrintf(stdout, @" Building relcation table for %@\n", dir); + + /* Check if it's a directory; if it is, enumerate all HTML files + inside it, and add all of them. */ + if (![fileManager fileExistsAtPath: dir isDirectory: &isDir]) + { + NSLog (@"%@ does not exist - exiting", dir); + exit (1); + } + else if (!isDir) + { + NSLog (@"%@ is not a directory - exiting", dir); + exit (1); + } + else + { + HTMLDirectoryEnumerator *e; + NSString *filename; + NSMutableDictionary *relocationTable; + + relocationTable = [NSMutableDictionary new]; + AUTORELEASE (relocationTable); + + e = [HTMLDirectoryEnumerator alloc]; + e = [e initWithBasePath: dir]; + /* The relocation table for a directory is relative to the + directory top, so that the whole directory can be moved + around without having to regenerate the .htmlink file. */ + [e setReturnsAbsolutePaths: NO]; + + while ((filename = [e nextObject]) != nil) + { + NSString *fullPath; + NSString *file; + HTMLParser *p; + NSArray *names; + unsigned i, count; + + fullPath = [dir stringByAppendingPathComponent: filename]; + + file = [NSString stringWithContentsOfFile: fullPath]; + + p = [[HTMLParser alloc] initWithCode: file]; + names = [p names]; + RELEASE (p); + count = [names count]; + + for (i = 0; i < count; i++) + { + NSString *name = [names objectAtIndex: i]; + [relocationTable setObject: filename forKey: name]; + } + } + return relocationTable; + } +} + + +static void print_help_and_exit () +{ + printf ("GNUstep HTMLLinker\n"); + printf ("Usage: HTMLLinker [options] input_files [-l relocation_file] [-d destination_file]\n"); + printf ("Multiple input files, and multiple -l and -d options are allowed.\n"); + printf (" `options' include:\n"); + printf (" --help: print this message;\n"); + printf (" --version: print version information;\n"); + printf (" --verbose: print information while processing;\n"); + printf (" -Warn NO: do not print warnings about unresolved links;\n"); + printf (" -LinksMarker xxx: only fixup links with attribute rel=xxx;\n"); + printf (" -FixupAllLinks YES: attempt to fixup all links (not only ones with the marker);\n"); + printf (" -PathMappingsFile file: read path mappings from file (a dictionary);\n"); + printf (" -PathMappings '{\"/usr/doc\"=\"/Doc\";}': use the supplied path mappings;\n"); + printf (" -BuildRelocationFileForDir yyy: build a relocation file for the dir yyy\n"); + printf (" and save it into yyy/table.htmlink. This option is special\n"); + printf (" and prevents any other processing by the linker.\n"); + exit (0); +} + +static void print_version_and_exit () +{ + printf ("GNUstep HTMLLinker (gnustep-base version %d.%d.%d)\n", + GNUSTEP_BASE_MAJOR_VERSION, + GNUSTEP_BASE_MINOR_VERSION, + GNUSTEP_BASE_SUBMINOR_VERSION); + exit (0); +} + +int main (int argc, char** argv, char** env) +{ + NSUserDefaults *userDefs; + NSArray *args; + NSMutableArray *inputFiles; + unsigned i, count; + BOOL warn, fixupAllLinks; + NSString *linksMarker; + HTMLLinker *linker; + CREATE_AUTORELEASE_POOL(pool); + +#ifdef GS_PASS_ARGUMENTS + [NSProcessInfo initializeWithArguments:argv count:argc environment:env]; +#endif + + /* Set up the cache. */ + fileManager = [NSFileManager defaultManager]; + currentPath = [fileManager currentDirectoryPath]; + + /* Read basic defaults. */ + userDefs = [NSUserDefaults standardUserDefaults]; + + /* defaults are - + -Warn YES + -LinksMarker dynamic + -FixupAllLinks NO + */ + [userDefs registerDefaults: [NSDictionary dictionaryWithObjectsAndKeys: + @"dynamic", @"LinksMarker", + @"YES", @"Warn", + nil]]; + + warn = [userDefs boolForKey: @"Warn"]; + fixupAllLinks = [userDefs boolForKey: @"FixupAllLinks"]; + linksMarker = [userDefs stringForKey: @"LinksMarker"]; + + + /* If -BuildRelocationFileForDir xxx is passed on the command line, + build a relocation file for the directory xxx and save it in + xxx/table.htmlink. */ + { + NSString *relFile; + relFile = [userDefs stringForKey: @"BuildRelocationFileForDir"]; + + if (relFile != nil) + { + NSDictionary *table; + NSString *outputFile; + + outputFile = [relFile stringByAppendingPathComponent: + @"table.htmlink"]; + + table = build_relocation_table_for_directory (relFile); + [table writeToFile: outputFile atomically: YES]; + exit (0); + } + } + + /* Create the linker object. */ + linker = [[HTMLLinker alloc] initWithWarnFlag: warn]; + + /* First, read all path mappings (before reading any destination + file / relocation file, so we can relocate properly. */ + + /* Read path mappings from PathMappingsFile if specified. */ + { + NSString *pathMapFile = [userDefs stringForKey: @"PathMappingsFile"]; + + if (pathMapFile != nil) + { + NSDictionary *mappings; + + mappings = [NSDictionary dictionaryWithContentsOfFile: pathMapFile]; + + if (mappings == nil) + { + NSLog (@"Warning - %@ does not contain a dictionary - ignored", + pathMapFile); + } + else + { + [linker registerPathMappings: mappings]; + } + } + } + + /* Add PathMappings specified on the command line if any. */ + { + NSDictionary *paths = [userDefs dictionaryForKey: @"PathMappings"]; + + if (paths != nil) + { + [linker registerPathMappings: paths]; + } + } + + /* All non-options on the command line are: + + input files + + destination files if they come after a -d + + relocation files if they come after a -l + + Directories as input files or destination files means 'all .html, .htm, + .HTML, .HTM files in the directory and subdirectories'. + + */ + args = [[NSProcessInfo processInfo] arguments]; + + count = [args count]; + + inputFiles = AUTORELEASE ([NSMutableArray new]); + + for (i = 1; i < count; i++) + { + NSString *arg = [args objectAtIndex: i]; + if ([arg characterAtIndex: 0] == '-') + { + NSString *opt; + opt = ([arg characterAtIndex: 1] == '-') ? + [arg substringFromIndex: 2] : [arg substringFromIndex: 1]; + if ([opt isEqualToString: @"help"] + || [opt isEqualToString: @"h"]) + { + print_help_and_exit (); + } + else if ([opt isEqualToString: @"version"] + || [opt isEqualToString: @"V"]) + { + print_version_and_exit (); + } + else if ([opt isEqualToString: @"verbose"] + || [opt isEqualToString: @"v"]) + { + verbose++; + } + else if ([opt isEqualToString: @"d"]) + { + if ((i + 1) < count) + { + i++; + /* Register a destination file. */ + [linker registerDestinationFile: [args objectAtIndex: i]]; + } + else + { + NSLog (@"Missing argument to -d"); + } + } + else if ([opt isEqualToString: @"l"]) + { + if ((i + 1) < count) + { + i++; + /* Register a destination file. */ + [linker registerRelocationFile: [args objectAtIndex: i]]; + } + else + { + NSLog (@"Missing argument to -l"); + } + } + else + { + /* A GNUstep default - skip it and the next argument. */ + if ((i + 1) < count) + { + i++; + continue; + } + } + } + else + { + BOOL isDir; + + if (![fileManager fileExistsAtPath: arg isDirectory: &isDir]) + { + NSLog (@"Warning - input file '%@' not found - ignored", arg); + } + else + { + if (isDir) + { + HTMLDirectoryEnumerator *e; + NSString *filename; + + e = [HTMLDirectoryEnumerator alloc]; + e = [e initWithBasePath: arg]; + [e setReturnsAbsolutePaths: YES]; + + while ((filename = [e nextObject]) != nil) + { + [inputFiles addObject: filename]; + } + } + else + { + [inputFiles addObject: arg]; + } + } + } + } + + count = [inputFiles count]; + + if (count == 0) + { + NSLog (@"No input files specified."); + } + + + for (i = 0; i < count; i++) + { + NSString *inputFile; + NSString *inputFileContents; + HTMLParser *parser; + + inputFile = [inputFiles objectAtIndex: i]; + if (verbose) + GSPrintf(stdout, @" %@\n", inputFile); + inputFileContents = [NSString stringWithContentsOfFile: inputFile]; + + parser = [[HTMLParser alloc] initWithCode: inputFileContents]; + inputFileContents = [parser resolveLinksUsingHTMLLinker: linker + logFile: inputFile + linksMarker: linksMarker]; + [inputFileContents writeToFile: inputFile atomically: YES]; + RELEASE (parser); + } + + RELEASE (linker); + RELEASE (pool); + + return 0; +}