mirror of
https://github.com/ZDoom/gzdoom.git
synced 2024-11-15 08:52:02 +00:00
cf11cbdb30
SVN r4 (trunk)
601 lines
17 KiB
Groff
601 lines
17 KiB
Groff
./"
|
|
./" $Id: re2c.1.in,v 1.9 2004/05/26 13:33:25 nuffer Exp $
|
|
./"
|
|
.TH RE2C 1 "14 March 1994" "Version 0.9.3"
|
|
.ds re \fBre2c\fP
|
|
.ds le \fBlex\fP
|
|
.ds rx regular expression
|
|
.ds lx \fIl\fP-expression
|
|
\"$Log: re2c.1.in,v $
|
|
\"Revision 1.9 2004/05/26 13:33:25 nuffer
|
|
\"Added description of -o option.
|
|
\"
|
|
\"Revision 1.8 2004/04/19 22:32:48 helly
|
|
\"Update
|
|
\"
|
|
\"Revision 1.7 2004/04/19 02:13:48 helly
|
|
\"Featurerequest #869298 (Add case insensitive string literals)
|
|
\"
|
|
\"Revision 1.6 2004/04/17 15:49:13 helly
|
|
\"Fix example, cur must be set to make the uncommented printf's working
|
|
\"
|
|
\"Revision 1.5 2004/03/30 01:02:45 helly
|
|
\"Update docu
|
|
\"
|
|
\"Revision 1.4 2004/03/14 14:23:40 helly
|
|
\"Update
|
|
\"
|
|
\"Revision 1.3 2004/03/14 12:54:19 helly
|
|
\"Next step of autogen patch
|
|
\"
|
|
\"Revision 1.2 2004/03/13 20:35:12 helly
|
|
\"Updated configure stuff
|
|
\"
|
|
\"Revision 1.1 2004/01/31 15:44:39 nuffer
|
|
\"Applied patch from Marcus Boerger
|
|
\"
|
|
\"Revision 1.2 1994/04/16 15:50:32 peter
|
|
\"Fix bug in simple example.
|
|
\"
|
|
\"Revision 1.1 1994/04/08 15:39:09 peter
|
|
\"Initial revision
|
|
\"
|
|
.SH NAME
|
|
re2c \- convert regular expressions to C/C++
|
|
|
|
.SH SYNOPSIS
|
|
\*(re [\fB-esbvh\fP] [\fB-o output\fP] file\fP
|
|
|
|
.SH DESCRIPTION
|
|
\*(re is a preprocessor that generates C-based recognizers from regular
|
|
expressions.
|
|
The input to \*(re consists of C/C++ source interleaved with
|
|
comments of the form \fC/*!re2c\fP ... \fC*/\fP which contain
|
|
scanner specifications.
|
|
In the output these comments are replaced with code that, when
|
|
executed, will find the next input token and then execute
|
|
some user-supplied token-specific code.
|
|
|
|
For example, given the following code
|
|
|
|
.in +3
|
|
.nf
|
|
#define NULL ((char*) 0)
|
|
char *scan(char *p){
|
|
char *q;
|
|
#define YYCTYPE char
|
|
#define YYCURSOR p
|
|
#define YYLIMIT p
|
|
#define YYMARKER q
|
|
#define YYFILL(n)
|
|
/*!re2c
|
|
[0-9]+ {return YYCURSOR;}
|
|
[\\000-\\377] {return NULL;}
|
|
*/
|
|
}
|
|
.fi
|
|
.in -3
|
|
|
|
\*(re will generate
|
|
|
|
.in +3
|
|
.nf
|
|
/* Generated by re2c on Sat Apr 16 11:40:58 1994 */
|
|
#line 1 "simple.re"
|
|
#define NULL ((char*) 0)
|
|
char *scan(char *p){
|
|
char *q;
|
|
#define YYCTYPE char
|
|
#define YYCURSOR p
|
|
#define YYLIMIT p
|
|
#define YYMARKER q
|
|
#define YYFILL(n)
|
|
{
|
|
YYCTYPE yych;
|
|
unsigned int yyaccept;
|
|
goto yy0;
|
|
yy1: ++YYCURSOR;
|
|
yy0:
|
|
if((YYLIMIT - YYCURSOR) < 2) YYFILL(2);
|
|
yych = *YYCURSOR;
|
|
if(yych <= '/') goto yy4;
|
|
if(yych >= ':') goto yy4;
|
|
yy2: yych = *++YYCURSOR;
|
|
goto yy7;
|
|
yy3:
|
|
#line 10
|
|
{return YYCURSOR;}
|
|
yy4: yych = *++YYCURSOR;
|
|
yy5:
|
|
#line 11
|
|
{return NULL;}
|
|
yy6: ++YYCURSOR;
|
|
if(YYLIMIT == YYCURSOR) YYFILL(1);
|
|
yych = *YYCURSOR;
|
|
yy7: if(yych <= '/') goto yy3;
|
|
if(yych <= '9') goto yy6;
|
|
goto yy3;
|
|
}
|
|
#line 12
|
|
|
|
}
|
|
.fi
|
|
.in -3
|
|
|
|
.SH OPTIONS
|
|
\*(re provides the following options:
|
|
.TP
|
|
\fB-e\fP
|
|
Cross-compile from an ASCII platform to an EBCDIC one.
|
|
.TP
|
|
\fB-s\fP
|
|
Generate nested \fCif\fPs for some \fCswitch\fPes. Many compilers need this
|
|
assist to generate better code.
|
|
.TP
|
|
\fB-b\fP
|
|
Implies \fB-s\fP. Use bit vectors as well in the attempt to coax better
|
|
code out of the compiler. Most useful for specifications with more than a
|
|
few keywords (e.g. for most programming languages).
|
|
.TP
|
|
\fB-h\fP
|
|
\fB-?\fP
|
|
Invoke a short help.
|
|
.TP
|
|
\fB-v\fP
|
|
Show version information.
|
|
.TP
|
|
\fB-o output\fP
|
|
Specify the output file.
|
|
|
|
.SH "INTERFACE CODE"
|
|
Unlike other scanner generators, \*(re does not generate complete scanners:
|
|
the user must supply some interface code.
|
|
In particular, the user must define the following macros:
|
|
.TP
|
|
\fCYYCHAR\fP
|
|
Type used to hold an input symbol.
|
|
Usually \fCchar\fP or \fCunsigned char\fP.
|
|
.TP
|
|
\fCYYCURSOR\fP
|
|
\*(lx of type \fC*YYCHAR\fP that points to the current input symbol.
|
|
The generated code advances \fCYYCURSOR\fP as symbols are matched.
|
|
On entry, \fCYYCURSOR\fP is assumed to point to the first character of the
|
|
current token. On exit, \fCYYCURSOR\fP will point to the first character of
|
|
the following token.
|
|
.TP
|
|
\fCYLIMIT\fP
|
|
Expression of type \fC*YYCHAR\fP that marks the end of the buffer
|
|
(\fCYLIMIT[-1]\fP is the last character in the buffer).
|
|
The generated code repeatedly compares \fCYYCURSOR\fP to \fCYLIMIT\fP
|
|
to determine when the buffer needs (re)filling.
|
|
.TP
|
|
\fCYYMARKER\fP
|
|
\*(lx of type \fC*YYCHAR\fP.
|
|
The generated code saves backtracking information in \fCYYMARKER\fP.
|
|
.TP
|
|
\fCYYFILL(\fP\fIn\fP\fC)\fP
|
|
The generated code "calls" \fCYYFILL\fP when the buffer needs
|
|
(re)filling: at least \fIn\fP additional characters should
|
|
be provided. \fCYYFILL\fP should adjust \fCYYCURSOR\fP, \fCYYLIMIT\fP and
|
|
\fCYYMARKER\fP as needed. Note that for typical programming languages
|
|
\fIn\fP will be the length of the longest keyword plus one.
|
|
|
|
.SH "SCANNER SPECIFICATIONS"
|
|
Each scanner specification consists of a set of \fIrules\fP and name
|
|
definitions.
|
|
Rules consist of a regular expression along with a block of C/C++ code that
|
|
is to be executed when the associated regular expression is matched.
|
|
Name definitions are of the form
|
|
``\fIname\fP \fC=\fP \fIregular expression\fP\fC;\fP''.
|
|
|
|
.SH "SUMMARY OF RE2C REGULAR EXPRESSIONS"
|
|
.TP
|
|
\fC"foo"\fP
|
|
the literal string \fCfoo\fP.
|
|
ANSI-C escape sequences can be used.
|
|
.TP
|
|
\fC'foo'\fP
|
|
the literal string \fCfoo\fP (characters [a-zA-Z] treated case-insensitive).
|
|
ANSI-C escape sequences can be used.
|
|
.TP
|
|
\fC[xyz]\fP
|
|
a "character class"; in this case,
|
|
the \*(rx matches either an '\fCx\fP', a '\fCy\fP', or a '\fCz\fP'.
|
|
.TP
|
|
\fC[abj-oZ]\fP
|
|
a "character class" with a range in it;
|
|
matches an '\fCa\fP', a '\fCb\fP', any letter from '\fCj\fP' through '\fCo\fP',
|
|
or a '\fCZ\fP'.
|
|
.TP
|
|
\fIr\fP\fC\e\fP\fIs\fP
|
|
match any \fIr\fP which isn't an \fIs\fP. \fIr\fP and \fIs\fP must be regular expressions
|
|
which can be expressed as character classes.
|
|
.TP
|
|
\fIr\fP\fC*\fP
|
|
zero or more \fIr\fP's, where \fIr\fP is any regular expression
|
|
.TP
|
|
\fC\fIr\fP\fC+\fP
|
|
one or more \fIr\fP's
|
|
.TP
|
|
\fC\fIr\fP\fC?\fP
|
|
zero or one \fIr\fP's (that is, "an optional \fIr\fP")
|
|
.TP
|
|
name
|
|
the expansion of the "name" definition (see above)
|
|
.TP
|
|
\fC(\fP\fIr\fP\fC)\fP
|
|
an \fIr\fP; parentheses are used to override precedence
|
|
(see below)
|
|
.TP
|
|
\fIrs\fP
|
|
an \fIr\fP followed by an \fIs\fP ("concatenation")
|
|
.TP
|
|
\fIr\fP\fC|\fP\fIs\fP
|
|
either an \fIr\fP or an \fIs\fP
|
|
.TP
|
|
\fIr\fP\fC/\fP\fIs\fP
|
|
an \fIr\fP but only if it is followed by an \fIs\fP. The s is not part of
|
|
the matched text. This type of \*(rx is called "trailing context".
|
|
.TP
|
|
\fIr\fP\fC{\fP\fIn\fP\fC}\fP
|
|
matches \fIr\fP exactly \fIn\fP times.
|
|
.TP
|
|
\fIr\fP\fC{\fP\fIn\fP\fC,}\fP
|
|
matches \fIr\fP at least \fIn\fP times.
|
|
.TP
|
|
\fIr\fP\fC{\fP\fIn\fP\fC,\fP\fIm\fP\fC}\fP
|
|
matches \fIr\fP at least \fIn\fP but not more than \fIm\fP times.
|
|
|
|
.LP
|
|
The regular expressions listed above are grouped according to
|
|
precedence, from highest precedence at the top to lowest at the bottom.
|
|
Those grouped together have equal precedence.
|
|
|
|
.SH "A LARGER EXAMPLE"
|
|
.LP
|
|
.in +3
|
|
.nf
|
|
#include <stdlib.h>
|
|
#include <stdio.h>
|
|
#include <fcntl.h>
|
|
#include <string.h>
|
|
|
|
#define ADDEQ 257
|
|
#define ANDAND 258
|
|
#define ANDEQ 259
|
|
#define ARRAY 260
|
|
#define ASM 261
|
|
#define AUTO 262
|
|
#define BREAK 263
|
|
#define CASE 264
|
|
#define CHAR 265
|
|
#define CONST 266
|
|
#define CONTINUE 267
|
|
#define DECR 268
|
|
#define DEFAULT 269
|
|
#define DEREF 270
|
|
#define DIVEQ 271
|
|
#define DO 272
|
|
#define DOUBLE 273
|
|
#define ELLIPSIS 274
|
|
#define ELSE 275
|
|
#define ENUM 276
|
|
#define EQL 277
|
|
#define EXTERN 278
|
|
#define FCON 279
|
|
#define FLOAT 280
|
|
#define FOR 281
|
|
#define FUNCTION 282
|
|
#define GEQ 283
|
|
#define GOTO 284
|
|
#define ICON 285
|
|
#define ID 286
|
|
#define IF 287
|
|
#define INCR 288
|
|
#define INT 289
|
|
#define LEQ 290
|
|
#define LONG 291
|
|
#define LSHIFT 292
|
|
#define LSHIFTEQ 293
|
|
#define MODEQ 294
|
|
#define MULEQ 295
|
|
#define NEQ 296
|
|
#define OREQ 297
|
|
#define OROR 298
|
|
#define POINTER 299
|
|
#define REGISTER 300
|
|
#define RETURN 301
|
|
#define RSHIFT 302
|
|
#define RSHIFTEQ 303
|
|
#define SCON 304
|
|
#define SHORT 305
|
|
#define SIGNED 306
|
|
#define SIZEOF 307
|
|
#define STATIC 308
|
|
#define STRUCT 309
|
|
#define SUBEQ 310
|
|
#define SWITCH 311
|
|
#define TYPEDEF 312
|
|
#define UNION 313
|
|
#define UNSIGNED 314
|
|
#define VOID 315
|
|
#define VOLATILE 316
|
|
#define WHILE 317
|
|
#define XOREQ 318
|
|
#define EOI 319
|
|
|
|
typedef unsigned int uint;
|
|
typedef unsigned char uchar;
|
|
|
|
#define BSIZE 8192
|
|
|
|
#define YYCTYPE uchar
|
|
#define YYCURSOR cursor
|
|
#define YYLIMIT s->lim
|
|
#define YYMARKER s->ptr
|
|
#define YYFILL(n) {cursor = fill(s, cursor);}
|
|
|
|
#define RET(i) {s->cur = cursor; return i;}
|
|
|
|
typedef struct Scanner {
|
|
int fd;
|
|
uchar *bot, *tok, *ptr, *cur, *pos, *lim, *top, *eof;
|
|
uint line;
|
|
} Scanner;
|
|
|
|
uchar *fill(Scanner *s, uchar *cursor){
|
|
if(!s->eof){
|
|
uint cnt = s->tok - s->bot;
|
|
if(cnt){
|
|
memcpy(s->bot, s->tok, s->lim - s->tok);
|
|
s->tok = s->bot;
|
|
s->ptr -= cnt;
|
|
cursor -= cnt;
|
|
s->pos -= cnt;
|
|
s->lim -= cnt;
|
|
}
|
|
if((s->top - s->lim) < BSIZE){
|
|
uchar *buf = (uchar*)
|
|
malloc(((s->lim - s->bot) + BSIZE)*sizeof(uchar));
|
|
memcpy(buf, s->tok, s->lim - s->tok);
|
|
s->tok = buf;
|
|
s->ptr = &buf[s->ptr - s->bot];
|
|
cursor = &buf[cursor - s->bot];
|
|
s->pos = &buf[s->pos - s->bot];
|
|
s->lim = &buf[s->lim - s->bot];
|
|
s->top = &s->lim[BSIZE];
|
|
free(s->bot);
|
|
s->bot = buf;
|
|
}
|
|
if((cnt = read(s->fd, (char*) s->lim, BSIZE)) != BSIZE){
|
|
s->eof = &s->lim[cnt]; *(s->eof)++ = '\\n';
|
|
}
|
|
s->lim += cnt;
|
|
}
|
|
s->cur = cursor;
|
|
return cursor;
|
|
}
|
|
|
|
int scan(Scanner *s){
|
|
uchar *cursor = s->cur;
|
|
std:
|
|
s->tok = cursor;
|
|
/*!re2c
|
|
any = [\\000-\\377];
|
|
O = [0-7];
|
|
D = [0-9];
|
|
L = [a-zA-Z_];
|
|
H = [a-fA-F0-9];
|
|
E = [Ee] [+-]? D+;
|
|
FS = [fFlL];
|
|
IS = [uUlL]*;
|
|
ESC = [\\\\] ([abfnrtv?'"\\\\] | "x" H+ | O+);
|
|
*/
|
|
|
|
/*!re2c
|
|
"/*" { goto comment; }
|
|
|
|
"auto" { RET(AUTO); }
|
|
"break" { RET(BREAK); }
|
|
"case" { RET(CASE); }
|
|
"char" { RET(CHAR); }
|
|
"const" { RET(CONST); }
|
|
"continue" { RET(CONTINUE); }
|
|
"default" { RET(DEFAULT); }
|
|
"do" { RET(DO); }
|
|
"double" { RET(DOUBLE); }
|
|
"else" { RET(ELSE); }
|
|
"enum" { RET(ENUM); }
|
|
"extern" { RET(EXTERN); }
|
|
"float" { RET(FLOAT); }
|
|
"for" { RET(FOR); }
|
|
"goto" { RET(GOTO); }
|
|
"if" { RET(IF); }
|
|
"int" { RET(INT); }
|
|
"long" { RET(LONG); }
|
|
"register" { RET(REGISTER); }
|
|
"return" { RET(RETURN); }
|
|
"short" { RET(SHORT); }
|
|
"signed" { RET(SIGNED); }
|
|
"sizeof" { RET(SIZEOF); }
|
|
"static" { RET(STATIC); }
|
|
"struct" { RET(STRUCT); }
|
|
"switch" { RET(SWITCH); }
|
|
"typedef" { RET(TYPEDEF); }
|
|
"union" { RET(UNION); }
|
|
"unsigned" { RET(UNSIGNED); }
|
|
"void" { RET(VOID); }
|
|
"volatile" { RET(VOLATILE); }
|
|
"while" { RET(WHILE); }
|
|
|
|
L (L|D)* { RET(ID); }
|
|
|
|
("0" [xX] H+ IS?) | ("0" D+ IS?) | (D+ IS?) |
|
|
(['] (ESC|any\\[\\n\\\\'])* ['])
|
|
{ RET(ICON); }
|
|
|
|
(D+ E FS?) | (D* "." D+ E? FS?) | (D+ "." D* E? FS?)
|
|
{ RET(FCON); }
|
|
|
|
(["] (ESC|any\\[\\n\\\\"])* ["])
|
|
{ RET(SCON); }
|
|
|
|
"..." { RET(ELLIPSIS); }
|
|
">>=" { RET(RSHIFTEQ); }
|
|
"<<=" { RET(LSHIFTEQ); }
|
|
"+=" { RET(ADDEQ); }
|
|
"-=" { RET(SUBEQ); }
|
|
"*=" { RET(MULEQ); }
|
|
"/=" { RET(DIVEQ); }
|
|
"%=" { RET(MODEQ); }
|
|
"&=" { RET(ANDEQ); }
|
|
"^=" { RET(XOREQ); }
|
|
"|=" { RET(OREQ); }
|
|
">>" { RET(RSHIFT); }
|
|
"<<" { RET(LSHIFT); }
|
|
"++" { RET(INCR); }
|
|
"--" { RET(DECR); }
|
|
"->" { RET(DEREF); }
|
|
"&&" { RET(ANDAND); }
|
|
"||" { RET(OROR); }
|
|
"<=" { RET(LEQ); }
|
|
">=" { RET(GEQ); }
|
|
"==" { RET(EQL); }
|
|
"!=" { RET(NEQ); }
|
|
";" { RET(';'); }
|
|
"{" { RET('{'); }
|
|
"}" { RET('}'); }
|
|
"," { RET(','); }
|
|
":" { RET(':'); }
|
|
"=" { RET('='); }
|
|
"(" { RET('('); }
|
|
")" { RET(')'); }
|
|
"[" { RET('['); }
|
|
"]" { RET(']'); }
|
|
"." { RET('.'); }
|
|
"&" { RET('&'); }
|
|
"!" { RET('!'); }
|
|
"~" { RET('~'); }
|
|
"-" { RET('-'); }
|
|
"+" { RET('+'); }
|
|
"*" { RET('*'); }
|
|
"/" { RET('/'); }
|
|
"%" { RET('%'); }
|
|
"<" { RET('<'); }
|
|
">" { RET('>'); }
|
|
"^" { RET('^'); }
|
|
"|" { RET('|'); }
|
|
"?" { RET('?'); }
|
|
|
|
|
|
[ \\t\\v\\f]+ { goto std; }
|
|
|
|
"\\n"
|
|
{
|
|
if(cursor == s->eof) RET(EOI);
|
|
s->pos = cursor; s->line++;
|
|
goto std;
|
|
}
|
|
|
|
any
|
|
{
|
|
printf("unexpected character: %c\\n", *s->tok);
|
|
goto std;
|
|
}
|
|
*/
|
|
|
|
comment:
|
|
/*!re2c
|
|
"*/" { goto std; }
|
|
"\\n"
|
|
{
|
|
if(cursor == s->eof) RET(EOI);
|
|
s->tok = s->pos = cursor; s->line++;
|
|
goto comment;
|
|
}
|
|
any { goto comment; }
|
|
*/
|
|
}
|
|
|
|
main(){
|
|
Scanner in;
|
|
int t;
|
|
memset((char*) &in, 0, sizeof(in));
|
|
in.fd = 0;
|
|
while((t = scan(&in)) != EOI){
|
|
/*
|
|
printf("%d\\t%.*s\\n", t, in.cur - in.tok, in.tok);
|
|
printf("%d\\n", t);
|
|
*/
|
|
}
|
|
close(in.fd);
|
|
}
|
|
.fi
|
|
.in -3
|
|
|
|
.SH FEATURES
|
|
.LP
|
|
\*(re does not provide a default action:
|
|
the generated code assumes that the input
|
|
will consist of a sequence of tokens.
|
|
Typically this can be dealt with by adding a rule such as the one for
|
|
unexpected characters in the example above.
|
|
.LP
|
|
The user must arrange for a sentinel token to appear at the end of input
|
|
(and provide a rule for matching it):
|
|
\*(re does not provide an \fC<<EOF>>\fP expression.
|
|
If the source is from a null-byte terminated string, a
|
|
rule matching a null character will suffice. If the source is from a
|
|
file then the approach taken in the example can be used: pad the input with
|
|
a newline (or some other character that can't appear within another token);
|
|
upon recognizing such a character check to see if it is the sentinel
|
|
and act accordingly.
|
|
.LP
|
|
\*(re does not provide start conditions: use a separate scanner
|
|
specification for each start condition (as illustrated in the above example).
|
|
.LP
|
|
No [^x]. Use difference instead.
|
|
.SH BUGS
|
|
.LP
|
|
Only fixed length trailing context can be handled.
|
|
.LP
|
|
The maximum value appearing as a parameter \fIn\fP to \fCYYFILL\fP is not
|
|
provided to the generated code (this value is needed for constructing
|
|
the interface code).
|
|
Note that this value is usually relatively small: for
|
|
typical programming languages \fIn\fP will be the length of the longest
|
|
keyword plus one.
|
|
.LP
|
|
Difference only works for character sets.
|
|
.LP
|
|
The \*(re internal algorithms need documentation.
|
|
|
|
.SH "SEE ALSO"
|
|
.LP
|
|
flex(1), lex(1).
|
|
.P
|
|
More information on \fBre2c\fP can be found here:
|
|
.PD 0
|
|
.P
|
|
.B http://sourceforge.net/projects/re2c/
|
|
.PD 1
|
|
|
|
.SH AUTHORS
|
|
.PD 0
|
|
.P
|
|
Peter Bumbulis <peter@csg.uwaterloo.ca>
|
|
.P
|
|
Brian Young <bayoung@acm.org>
|
|
.P
|
|
Dan Nuffer <nuffer@users.sourceforge.net>
|
|
.P
|
|
Marcus Boerger <helly@users.sourceforge.net>
|
|
.P
|
|
Hartmut Kaiser <hkaiser@users.sourceforge.net>
|
|
.P
|
|
.PD 1
|
|
|
|
.SH VERSION INFORMATION
|
|
This manpage describes \fBre2c\fP, version 0.9.3.
|
|
|
|
.fi
|