Update re2c to version 0.16

This commit is contained in:
Randy Heit 2016-02-06 20:58:35 -06:00
parent 5ee988f607
commit 43c6c9b5ee
191 changed files with 25750 additions and 17231 deletions

View file

@ -1,3 +1,202 @@
Version 0.16 (2016-01-21)
---------------------------
- Fixed bug #127 "code generation error with wide chars and bitmaps (omitted 'goto' statement)"
- Added DFA minimization and option '--dfa-minimization <table | moore>'
- Fixed bug #128 "very slow DFA construction (resulting in a very large DFA)"
- Fixed bug #132 "test failure on big endian archs with 0.15.3"
Version 0.15.3 (2015-12-02)
---------------------------
- Fixed bugs and applied patches:
#122 "clang does not compile re2c 0.15.x" (reported and fixed by Oleksii Taran).
#124 "Get rid of UINT32_MAX and friends" (patch by Sergei Trofimovich, fixes FreeBSD builds).
#125 "[OS X] git reports changes not staged for commit in newly cloned repository" (by Oleksii Taran, this fix also applies to Windows).
- Added option --no-version that allows to omit version information.
- Reduced memory and time consumed with -Wundefined-control-flow.
- Improved coverage of input data generated with -S --skeleton.
Version 0.15.2 (2015-11-23)
---------------------------
- Fixed build system: lexer depends on bison-generated parser
(Gentoo bug: https://bugs.gentoo.org/show_bug.cgi?id=566620)
Version 0.15.1 (2015-11-22)
---------------------------
- Fixed test failures caused by locale-sensitive 'sort'.
Version 0.15 (2015-11-22)
-------------------------
- Updated website http://re2c.org:
added examples
updated docs
added news
added web feed (Atom 1.0)
- Added options:
-S, --skeleton
--empty-class <match-empty | match-none | error>
- Added warnings:
-W
-Werror
-W<warning>
-Wno-<warning>
-Werror-<warning>
-Wno-error-<warning>
- Added individual warnings:
-Wundefined-control-flow
-Wunreachable-rules
-Wcondition-order
-Wuseless-escape
-Wempty-character-class
-Wswapped-range
-Wmatch-empty-string
- Fixed options:
-- (interpret remaining arguments as non-options)
- Deprecated options:
-1 --single-pass (single pass is by default now)
- Reduced size of the generated .dot files.
- Fixed bugs:
#27 re2c crashes reading files containing %{ %} (patch by Rui)
#51 default rule doesn't work in reuse mode
#52 eliminate multiple passes
#59 bogus yyaccept in -c mode
#60 redundant use of YYMARKER
#61 empty character class [] matches empty string
#115 flex-style named definitions cause ambiguity in re2c grammar
#119 -f with -b/-g generates incorrect dispatch on fill labels
#116 empty string with non-empty trailing context consumes code units
- Added test options:
-j, -j <N> (run tests in N threads, defaults to the number of CPUs)
--wine (test windows builds using wine)
--skeleton (generate skeleton programs, compile and execute them)
--keep-tmp-files (don't delete intermediate files for successful tests)
- Updated build system:
support out of source builds
support `make distcheck`
added `make bootstrap` (rebuild re2c after building with precomplied .re files)
added `make tests` (run tests with -j)
added `make vtests` (run tests with --valgrind -j)
added `make wtests` (run tests with --wine -j 1)
added Autoconf tests for CXXFLAGS. By default try the following options:
-W -Wall -Wextra -Weffc++ -pedantic -Wformat=2 -Wredundant-decls
-Wsuggest-attribute=format -Wconversion -Wsign-conversion -O2 -Weverything),
respect user-defined CXXFLAGS
support Mingw builds: `configure -host i686-w64-mingw32`
structured source files
removed old MSVC files
- Moved development to github (https://github.com/skvadrik/re2c), keep a mirror on sourceforge.
Version 0.14.3 (2015-05-20)
-----------------------------
- applied patch '#27 re2c crashes reading files containing %{ %}' by Rui
- dropped distfiles for MSVC (they are broken anyway)
Version 0.14.2 (2015-03-25)
-----------------------------
- fixed #57 Wrong result only if another rule is present
Version 0.14.1 (2015-02-27)
-----------------------------
- fixed #55 re2c-0.14: re2c -V outputs null byte
Version 0.14 (2015-02-23)
-----------------------------
- Added generic input API 21 (#21 Support to configure how re2c code interfaced with the symbol buffer?)
- fixed #46 re2c generates an infinite loop, depends on existence of previous parser
- fixed #47 Dot output label escaped characters
Version 0.13.7.5 (2014-08-22)
-----------------------------
- Fixed Gentoo bug: https://bugs.gentoo.org/show_bug.cgi?id=518904 (PHP lexer)
Version 0.13.7.4 (2014-07-29)
-----------------------------
- Enabled 'make docs' only if configured with '--enable-docs'
- Disallowed to use yacc/byacc instead of bison to build parser
- Removed non-portable sed feature in script that runs tests
Version 0.13.7.3 (2014-07-28)
-----------------------------
- Fixed CXX warning
- Got rid of asciidoc build-time dependency
Version 0.13.7.2 (2014-07-27)
-----------------------------
- Included man page into dist, respect users CXXFLAGS.
Version 0.13.7.1 (2014-07-26)
-----------------------------
- Added missing files to tarball
Version 0.13.7 (2014-07-25)
---------------------------
- Added UTF-8 support
- Added UTF-16 support
- Added default rule
- Added option to control ill-formed Unicode
Version 0.13.6 (2013-07-04)
---------------------------
- Fixed #2535084 uint problem with Sun C 5.8
- #3308400: allow Yacc-style %{code brackets}%
- #2506253: allow C++ // comments
- Fixed inplace configuration in -e mode.
- Applied #2482572 Typos in error messages.
- Applied #2482561 Error in manual section on -r mode.
- Fixed #2478216 Wrong start_label in -c mode.
- Fixed #2186718 Unescaped backslash in file name of #line directive.
- Fixed #2102138 Duplicate case labels on EBCDIC.
- Fixed #2088583 Compile problem on AIX.
- Fixed #2038610 Ebcdic problem.
- improve dot support: make char intervals (e.g. [A-Z]) instead of one edge per char
Version 0.13.5 (2008-05-25)
---------------------------
- Fixed #1952896 Segfault in re2c::Scanner::scan.
- Fixed #1952842 Regression.
Version 0.13.4 (2008-04-05)
---------------------------
- Added transparent handling of #line directives in input files.
- Added re2c:yyfill:check inplace configuration.
- Added re2c:define:YYSETSTATE:naked inplace configuration.
- Added re2c:flags:w and re2c:flags:u inplace configurations.
- Added the ability to add rules in 'use:re2c' blocks.
- Changed -r flag to accept only 'rules:re2c' and 'use:re2c' blocks.
Version 0.13.3 (2008-03-14)
---------------------------
- Added -r flag to allow reuse of scanner definitions.
- Added -F flag to support flex syntax in rules.
- Fixed SEGV in scanner that occurs with very large blocks.
- Fixed issue with unused yybm.
- Partial support for flex syntax.
- Changed to allow /* comments with -c switch.
- Added flag -D/--emit-dot.
Version 0.13.2 (2008-02-14)
---------------------------
- Added flag --case-inverted.
- Added flag --case-insensitive.
- Added support for '<!...>' to enable rule setup.
- Added support for '=>' style rules.
- Added support for ':=' style rules.
- Added support for ':=>' style rules.
- Added re2c:cond:divider and re2c:con:goto inplace configuration.
- Fixed code generation to emit space after 'if'.
Version 0.13.1 (2007-08-24)
---------------------------
- Added custom build rules for Visual Studio 2005 (re2c.rules). (William Swanson)
- Fixed issue with some compilers.
- Fixed #1776177 Build on AIX.
- Fixed #1743180 fwrite with 0 length crashes on OS X.
Version 0.13.0 (2007-06-24)
---------------------------
- Added -c and -t to generate scanners with (f)lex-like condition support.
- Fixed issue with short form of switches and parameter if not first switch.
- Fixed #1708378 segfault in actions.cc.
Version 0.12.3 (2007-08-24)
---------------------------
- Fixed issue with some compilers.

View file

@ -7,32 +7,92 @@ include( CheckTypeSize )
set( PACKAGE_NAME re2c )
set( PACKAGE_TARNAME re2c )
set( PACKAGE_VERSION 0.12.3 )
set( PACKAGE_STRING "re2c 0.12.3" )
set( PACKAGE_VERSION 0.16 )
set( PACKAGE_STRING "re2c 0.16" )
set( PACKAGE_BUGREPORT "re2c-general@lists.sourceforge.net" )
CHECK_FUNCTION_EXISTS( strdup HAVE_STRDUP )
CHECK_FUNCTION_EXISTS( strndup HAVE_STRNDUP )
CHECK_TYPE_SIZE( "0i8" SIZEOF_0I8 )
CHECK_TYPE_SIZE( "0l" SIZEOF_0L )
CHECK_TYPE_SIZE( "0ll" SIZEOF_0LL )
CHECK_TYPE_SIZE( char SIZEOF_CHAR )
CHECK_TYPE_SIZE( short SIZEOF_SHORT )
CHECK_TYPE_SIZE( int SIZEOF_INT )
CHECK_TYPE_SIZE( long SIZEOF_LONG )
CHECK_TYPE_SIZE( "long long" SIZEOF_LONG_LONG )
CHECK_TYPE_SIZE( "void *" SIZEOF_VOID_P )
CHECK_TYPE_SIZE( __int64 SIZEOF___INT_64 )
configure_file( ${CMAKE_CURRENT_SOURCE_DIR}/config.h.in ${CMAKE_CURRENT_BINARY_DIR}/config.h )
include_directories( ${CMAKE_CURRENT_BINARY_DIR} )
include_directories( ${CMAKE_CURRENT_BINARY_DIR} ${CMAKE_CURRENT_SOURCE_DIR} )
add_definitions( -DHAVE_CONFIG_H )
file( GLOB SRC_HDR
src/codegen/*.h
src/conf/*.h
src/ir/*.h
src/*.h
src/parse/*.h
src/util/*.h )
add_executable( re2c
actions.cc
code.cc
dfa.cc
main.cc
mbo_getopt.cc
parser.cc
scanner.cc
substr.cc
translate.cc )
${SRC_HDR}
src/codegen/bitmap.cc
src/codegen/emit_action.cc
src/codegen/emit_dfa.cc
src/codegen/label.cc
src/codegen/go_construct.cc
src/codegen/go_destruct.cc
src/codegen/go_emit.cc
src/codegen/go_used_labels.cc
src/codegen/input_api.cc
src/codegen/output.cc
src/codegen/print.cc
src/conf/msg.cc
src/conf/opt.cc
src/conf/parse_opts.cc
src/conf/warn.cc
src/ir/nfa/calc_size.cc
src/ir/nfa/nfa.cc
src/ir/nfa/split.cc
src/ir/adfa/adfa.cc
src/ir/adfa/prepare.cc
src/ir/dfa/determinization.cc
src/ir/dfa/fillpoints.cc
src/ir/dfa/minimization.cc
src/ir/regexp/display.cc
src/ir/regexp/encoding/enc.cc
src/ir/regexp/encoding/range_suffix.cc
src/ir/regexp/encoding/utf8/utf8_regexp.cc
src/ir/regexp/encoding/utf8/utf8_range.cc
src/ir/regexp/encoding/utf8/utf8.cc
src/ir/regexp/encoding/utf16/utf16_regexp.cc
src/ir/regexp/encoding/utf16/utf16.cc
src/ir/regexp/encoding/utf16/utf16_range.cc
src/ir/regexp/fixed_length.cc
src/ir/regexp/regexp.cc
src/ir/compile.cc
src/ir/rule_rank.cc
src/ir/skeleton/control_flow.cc
src/ir/skeleton/generate_code.cc
src/ir/skeleton/generate_data.cc
src/ir/skeleton/match_empty.cc
src/ir/skeleton/maxlen.cc
src/ir/skeleton/skeleton.cc
src/ir/skeleton/unreachable.cc
src/ir/skeleton/way.cc
src/main.cc
src/parse/code.cc
src/parse/input.cc
src/parse/lex.cc
src/parse/lex_conf.cc
src/parse/parser.cc
src/parse/scanner.cc
src/parse/unescape.cc
src/util/s_to_n32_unsafe.cc
src/util/range.cc )
set( CROSS_EXPORTS ${CROSS_EXPORTS} re2c PARENT_SCOPE )

2
tools/re2c/NO_WARRANTY Normal file
View file

@ -0,0 +1,2 @@
re2c is distributed with no warranty whatever. The author and any other
contributors take no responsibility for the consequences of its use.

View file

@ -1,188 +1,159 @@
re2c Version 0.12.3
------------------
re2c
--------------------------------------------------------------------------------
Originally written by Peter Bumbulis (peter@csg.uwaterloo.ca)
Currently maintained by:
Dan Nuffer <nuffer at users.sourceforge.net>
Marcus Boerger <helly at users.sourceforge.net>
Hartmut Kaiser <hkaiser at users.sourceforge.net>
DESCRIPTION
--------------------------------------------------------------------------------
re2c is a tool for generating C-based recognizers from regular expressions.
re2c-based scanners are efficient: for programming languages, given similar
specifications, a re2c-based scanner is typically almost twice as fast as a
flex-based scanner with little or no increase in size (possibly a decrease
on cisc architectures). Indeed, re2c-based scanners are quite competitive with
hand-crafted ones.
Unlike flex, re2c does not generate complete scanners: the user must supply some
interface code. While this code is not bulky (about 50-100 lines for a
flex-like scanner; see the man page and examples in the distribution) careful
coding is required for efficiency (and correctness). One advantage of this
arrangement is that the generated code is not tied to any particular input
model.
--------------------------------------------------------------------------------
DOWNLOAD
--------------------------------------------------------------------------------
The re2c distribution can be found at:
https://sourceforge.net/projects/re2c/
http://sourceforge.net/projects/re2c/
Download the latest tarball:
https://sourceforge.net/projects/re2c/files/latest/download
re2c has been developed and tested with the following compilers on various
platforms in 32 bit and 64 bit mode:
- GCC 3.3 ... 4.1
- Microsoft VC 7, 7.1, 8
- Intel 9.0
- Sun C++ 5.8 (CXXFLAGS='-library=stlport4')
- MIPSpro Compilers: Version 7.4.4m
Clone git repo:
git clone git://git.code.sf.net/p/re2c/code-git
--------------------------------------------------------------------------------
GCC 2.x and Microsoft VC 6 are not capable of compiling re2c.
Building re2c on unix like platforms requires autoconf 2.57 and bison (tested
with 1.875 and later). Under windows you don't need autoconf or bison
and can use the pregenerated files.
BUILD
--------------------------------------------------------------------------------
Contents:
1. simple build
2. bootstrap
3. out-of-source build
4. testing
5. rebuild documentation
6. build for windows with mingw
7. build from git
You can build this software by simply typing the following commands:
./configure
make
1. Simplest possible build:
$ ./configure [--prefix=<prefix>]
$ make
$ make install
This will build re2c and install it (binary and man page) to <prefix> (defaults
to /usr/local).
The above version will be based on the pregenerated scanner.cc file.
If you want to build that file yourself (recommended when installing
re2c) you need the following steps:
./configure
make
rm -f scanner.cc
make install
2. Bootstrap and rebuild:
$ ./configure [--prefix=<prefix>]
$ make bootstrap
$ make install
Usual bootstrap procedure: re2c uses re2c to compile its lexer.
1. build lexer (if make finds re2c binary in build directory, it will build lexer
from source, otherwize it will use prebuilt lexer)
2. build re2c
3. build lexer from source using re2c binary in build directory
4. rebuild re2c
Or you can create a rpm package and install it by the following commands:
./configure
make rpm
rpm -Uhv <packagedir>/re2c-0.12.3-1.rpm
3. Out-of-source build:
$ mkdir <build-directory>
$ cd <build-directory>
$ <path-to-configure>/configure [--prefix=<prefix>]
$ make
$ make install
If you want to build from CVS then the first thing you should do is
regenerating all build files using the following command:
./autogen.sh
and then continue with one of the above described build methods. Or if you
need to generate RPM packages for cvs builds use these commands:
./autogen.sh
./configure
./makerpm <release>
rpm -Uhv <packagedir>/re2c-0.12.3-<release>.rpm
4. Testing:
$ make check
This will redirect test script output to file. If you want to see progress:
$ make tests
Testing under valgrind (takes a long time):
$ make vtests
Here <realease> should be a number like 1. And <packagedir> must equal
the directory where the makerpm step has written the generated rpm to.
5. Rebuild documentation (requires rst2man.py):
$ ./configure --enable-docs [--prefix=<prefix>]
$ make docs
$ make install
If you are on a debian system you can use the tool 'alien' to convert rpms
to debian packages.
6. Build for windows using mingw:
$ ../configure --host i686-w64-mingw32 [--prefix=<prefix>]
$ make
This will result into an executable re2c.exe, which can be tested with wine:
$ make wtests
When building with native SUN compilers you need to set the following compiler
flags: CXXFLAGS='-g -compat5 -library=stlport4'.
7. If you want to build from git, you'll first need to generate autotools files:
$ ./autogen.sh
--------------------------------------------------------------------------------
If you want to build re2c on a windows system you can either use cygwin and one
of the methods described above or use Microsoft Visual C .NET 2002 or later
with the solution files provided (re2c.sln for 2002/2003 and re2c-2005.sln for
version 2005). re2c cannot be built with Microsoft Visual C 6.0 or earlier.
re2c is a great tool for writing fast and flexible lexers. It has
served many people well for many years. re2c is on the order of 2-3
times faster than a flex based scanner, and its input model is much
more flexible.
INFO
--------------------------------------------------------------------------------
$ man re2c
For an introduction to re2c refer to the lessons sub directory.
re2c home page:
re2c.org
Peter's original version 0.5 ANNOUNCE and README follows.
re2c manual:
re2c.org/manual.html
--
Ulya Trofimovich's blog on re2c:
skvadrik.github.io/aleph_null/re2c.html
re2c is a tool for generating C-based recognizers from regular
expressions. re2c-based scanners are efficient: for programming
languages, given similar specifications, an re2c-based scanner is
typically almost twice as fast as a flex-based scanner with little or no
increase in size (possibly a decrease on cisc architectures). Indeed,
re2c-based scanners are quite competitive with hand-crafted ones.
Original paper on re2c: "RE2C: a More Versatile Parser Generator" (1994, Peter
Bumbulis and Donald D. Cowan).
Unlike flex, re2c does not generate complete scanners: the user must
supply some interface code. While this code is not bulky (about 50-100
lines for a flex-like scanner; see the man page and examples in the
distribution) careful coding is required for efficiency (and
correctness). One advantage of this arrangement is that the generated
code is not tied to any particular input model. For example, re2c
generated code can be used to scan data from a null-byte terminated
buffer as illustrated below.
Examples can be found in 'examples' directory.
--------------------------------------------------------------------------------
Given the following source
#define NULL ((char*) 0)
char *scan(char *p)
{
#define YYCTYPE char
#define YYCURSOR p
#define YYLIMIT p
#define YYFILL(n)
/*!re2c
[0-9]+ {return YYCURSOR;}
[\000-\377] {return NULL;}
*/
}
MAILING LISTS
--------------------------------------------------------------------------------
re2c-general:
re2c-general@lists.sourceforge.net
re2c-devel:
re2c-devel@lists.sourceforge.net
re2c will generate
You are welcome to ask for help or share your thoughts and ideas about re2c :)
--------------------------------------------------------------------------------
/* Generated by re2c on Sat Apr 16 11:40:58 1994 */
#line 1 "simple.re"
#define NULL ((char*) 0)
char *scan(char *p)
{
#define YYCTYPE char
#define YYCURSOR p
#define YYLIMIT p
#define YYFILL(n)
{
YYCTYPE yych;
unsigned int yyaccept;
if((YYLIMIT - YYCURSOR) < 2) YYFILL(2);
yych = *YYCURSOR;
if(yych <= '/') goto yy4;
if(yych >= ':') goto yy4;
yy2: yych = *++YYCURSOR;
goto yy7;
yy3:
#line 9
{return YYCURSOR;}
yy4: yych = *++YYCURSOR;
yy5:
#line 10
{return NULL;}
yy6: ++YYCURSOR;
if(YYLIMIT == YYCURSOR) YYFILL(1);
yych = *YYCURSOR;
yy7: if(yych <= '/') goto yy3;
if(yych <= '9') goto yy6;
goto yy3;
}
#line 11
BUGS
--------------------------------------------------------------------------------
Please report any bugs and send feature requests to:
https://sourceforge.net/p/re2c/_list/tickets
--------------------------------------------------------------------------------
}
Note that most compilers will perform dead-code elimination to remove
all YYCURSOR, YYLIMIT comparisions.
AUTHORS
--------------------------------------------------------------------------------
Originally written by Peter Bumbulis (peter@csg.uwaterloo.ca)
Currently maintained by:
Ulya Trofimovich <skvadrik@gmail.com>
Dan Nuffer <nuffer@users.sourceforge.net>
Marcus Boerger <helly@users.sourceforge.net>
Hartmut Kaiser <hkaiser@users.sourceforge.net>
--------------------------------------------------------------------------------
re2c was developed for a particular project (constructing a fast REXX
scanner of all things!) and so while it has some rough edges, it should
be quite usable. More information about re2c can be found in the
(admittedly skimpy) man page; the algorithms and heuristics used are
described in an upcoming LOPLAS article (included in the distribution).
Probably the best way to find out more about re2c is to try the supplied
examples. re2c is written in C++, and is currently being developed
under Linux using gcc 2.5.8.
Peter
LICENSE
--------------------------------------------------------------------------------
re2c is distributed with no warranty whatever. The code is certain to contain
errors. Neither the author nor any contributor takes responsibility for any
consequences of its use.
--
re2c is distributed with no warranty whatever. The code is certain to
contain errors. Neither the author nor any contributor takes
responsibility for any consequences of its use.
re2c is in the public domain. The data structures and algorithms used
in re2c are all either taken from documents available to the general
public or are inventions of the author. Programs generated by re2c may
be distributed freely. re2c itself may be distributed freely, in source
or binary, unchanged or modified. Distributors may charge whatever fees
they can obtain for re2c.
re2c is in the public domain. The data structures and algorithms used in re2c
are all either taken from documents available to the general public or are
inventions of the authors. Programs generated by re2c may be distributed freely.
re2c itself may be distributed freely, in source or binary, unchanged or
modified. Distributors may charge whatever fees they can obtain for re2c.
If you do make use of re2c, or incorporate it into a larger project an
acknowledgement somewhere (documentation, research report, etc.) would
be appreciated.
Please send bug reports and feedback (including suggestions for
improving the distribution) to
peter@csg.uwaterloo.ca
Include a small example and the banner from parser.y with bug reports.
acknowledgement somewhere (documentation, research report, etc.) would be
appreciated.
--------------------------------------------------------------------------------

File diff suppressed because it is too large Load diff

View file

@ -1,57 +0,0 @@
/* $Id: basics.h 520 2006-05-25 13:31:06Z helly $ */
#ifndef _basics_h
#define _basics_h
#ifdef HAVE_CONFIG_H
#include "config.h"
#elif defined(_WIN32)
#include "config_w32.h"
#endif
namespace re2c
{
#if SIZEOF_CHAR == 1
typedef unsigned char byte;
#elif SIZEOF_SHORT == 1
typedef unsigned short byte;
#elif SIZEOF_INT == 1
typedef unsigned int byte;
#elif SIZEOF_LONG == 1
typedef unsigned long byte;
#else
typedef unsigned char byte;
#endif
#if SIZEOF_CHAR == 2
typedef unsigned char word;
#elif SIZEOF_SHORT == 2
typedef unsigned short word;
#elif SIZEOF_INT == 2
typedef unsigned int word;
#elif SIZEOF_LONG == 2
typedef unsigned long word;
#else
typedef unsigned short word;
#endif
#if SIZEOF_CHAR == 4
typedef unsigned char dword;
#elif SIZEOF_SHORT == 4
typedef unsigned short dword;
#elif SIZEOF_INT == 4
typedef unsigned int dword;
#elif SIZEOF_LONG == 4
typedef unsigned long dword;
#else
typedef unsigned long dword;
#endif
typedef unsigned int uint;
typedef unsigned char uchar;
typedef unsigned short ushort;
typedef unsigned long ulong;
} // end namespace re2c
#endif

File diff suppressed because it is too large Load diff

View file

@ -1,53 +0,0 @@
/* $Id: code.h 525 2006-05-25 13:32:49Z helly $ */
#ifndef _code_h
#define _code_h
#include "re.h"
#include "dfa.h"
namespace re2c
{
class BitMap
{
public:
static BitMap *first;
const Go *go;
const State *on;
const BitMap *next;
uint i;
uint m;
public:
static const BitMap *find(const Go*, const State*);
static const BitMap *find(const State*);
static void gen(std::ostream&, uint ind, uint, uint);
static void stats();
BitMap(const Go*, const State*);
~BitMap();
#if PEDANTIC
BitMap(const BitMap& oth)
: go(oth.go)
, on(oth.on)
, next(oth.next)
, i(oth.i)
, m(oth.m)
{
}
BitMap& operator = (const BitMap& oth)
{
new(this) BitMap(oth);
return *this;
}
#endif
};
#ifdef _MSC_VER
# pragma warning(disable: 4355) /* 'this' : used in base member initializer list */
#endif
} // end namespace re2c
#endif

View file

@ -1,33 +0,0 @@
/* $Id: token.h 547 2006-05-25 13:40:35Z helly $ */
#ifndef _code_names_h
#define _code_names_h
#include <string>
#include <map>
namespace re2c
{
class CodeNames: public std::map<std::string, std::string>
{
public:
std::string& operator [] (const char * what);
};
inline std::string& CodeNames::operator [] (const char * what)
{
CodeNames::iterator it = find(std::string(what));
if (it != end())
{
return it->second;
}
else
{
return insert(std::make_pair(std::string(what), std::string(what))).first->second;
}
}
} // end namespace re2c
#endif

View file

@ -1,10 +1,7 @@
/* config.h.in. Generated from configure.in by autoheader. */
/* config.h.in. Generated from configure.ac by autoheader. */
/* Define to 1 if you have the `strdup' function. */
#cmakedefine HAVE_STRDUP
/* Define to 1 if you have the `strndup' function. */
#cmakedefine HAVE_STRNDUP
/* Define to 1 if you have the <stdint.h> header file. */
#cmakedefine HAVE_STDINT_H @HAVE_STDINT_H@
/* Name of package */
#cmakedefine PACKAGE "@PACKAGE_NAME@"
@ -21,21 +18,41 @@
/* Define to the one symbol short name of this package. */
#cmakedefine PACKAGE_TARNAME "@PACKAGE_TARNAME@"
/* Define to the home page for this package. */
#cmakedefine PACKAGE_URL "@PACKAGE_URL@"
/* Define to the version of this package. */
#cmakedefine PACKAGE_VERSION "@PACKAGE_VERSION@"
/* The size of `0i8', as computed by sizeof. */
#cmakedefine SIZEOF_0I8 @SIZEOF_0I8@
/* The size of `0l', as computed by sizeof. */
#cmakedefine SIZEOF_0L @SIZEOF_0L@
/* The size of `0ll', as computed by sizeof. */
#cmakedefine SIZEOF_0LL @SIZEOF_0LL@
/* The size of `char', as computed by sizeof. */
#define SIZEOF_CHAR @SIZEOF_CHAR@
#cmakedefine SIZEOF_CHAR @SIZEOF_CHAR@
/* The size of `int', as computed by sizeof. */
#define SIZEOF_INT @SIZEOF_INT@
#cmakedefine SIZEOF_INT @SIZEOF_INT@
/* The size of `long', as computed by sizeof. */
#define SIZEOF_LONG @SIZEOF_LONG@
#cmakedefine SIZEOF_LONG @SIZEOF_LONG@
/* The size of `long long', as computed by sizeof. */
#cmakedefine SIZEOF_LONG_LONG @SIZEOF_LONG_LONG@
/* The size of `short', as computed by sizeof. */
#define SIZEOF_SHORT @SIZEOF_SHORT@
#cmakedefine SIZEOF_SHORT @SIZEOF_SHORT@
/* The size of `void *', as computed by sizeof. */
#cmakedefine SIZEOF_VOID_P @SIZEOF_VOID_P@
/* The size of `__int64', as computed by sizeof. */
#cmakedefine SIZEOF___INT64 @SIZEOF___INT64@
/* Version number of package */
#define VERSION "@PACKAGE_VERSION@"
#cmakedefine VERSION @PACKAGE_VERSION@

View file

@ -1,102 +0,0 @@
/* config.h. Generated by configure. */
/* config.h.in. Generated from configure.in by autoheader. */
/* Define to 1 if you have the `getpagesize' function. */
#define HAVE_GETPAGESIZE 1
/* Define to 1 if you have the <inttypes.h> header file. */
#define HAVE_INTTYPES_H 1
/* Define to 1 if you have the <memory.h> header file. */
#define HAVE_MEMORY_H 1
/* Define to 1 if you have the `memset' function. */
#define HAVE_MEMSET 1
/* Define to 1 if you have a working `mmap' system call. */
/* #undef HAVE_MMAP */
/* Define to 1 if you have the `munmap' function. */
#define HAVE_MUNMAP 1
/* Define to 1 if stdbool.h conforms to C99. */
#define HAVE_STDBOOL_H 1
/* Define to 1 if you have the <stdint.h> header file. */
#define HAVE_STDINT_H 1
/* Define to 1 if you have the <stdlib.h> header file. */
#define HAVE_STDLIB_H 1
/* Define to 1 if you have the `strdup' function. */
#define HAVE_STRDUP 1
/* Define to 1 if you have the <strings.h> header file. */
#define HAVE_STRINGS_H 1
/* Define to 1 if you have the <string.h> header file. */
#define HAVE_STRING_H 1
/* Define to 1 if you have the <sys/stat.h> header file. */
#define HAVE_SYS_STAT_H 1
/* Define to 1 if you have the <sys/types.h> header file. */
#define HAVE_SYS_TYPES_H 1
/* Define to 1 if you have the <unistd.h> header file. */
#define HAVE_UNISTD_H 1
/* Define to 1 if the system has the type `_Bool'. */
#define HAVE__BOOL 1
/* Name of package */
#define PACKAGE "re2c"
/* Define to the address where bug reports for this package should be sent. */
#define PACKAGE_BUGREPORT "re2c-general@lists.sourceforge.net"
/* Define to the full name of this package. */
#define PACKAGE_NAME "re2c"
/* Define to the full name and version of this package. */
#define PACKAGE_STRING "re2c 0.12.3"
/* Define to the one symbol short name of this package. */
#define PACKAGE_TARNAME "re2c"
/* Define to the version of this package. */
#define PACKAGE_VERSION "0.12.3"
/* The size of a `char', as computed by sizeof. */
#define SIZEOF_CHAR 1
/* The size of a `int', as computed by sizeof. */
#define SIZEOF_INT 4
/* The size of a `long', as computed by sizeof. */
#define SIZEOF_LONG 4
/* The size of a `short', as computed by sizeof. */
#define SIZEOF_SHORT 2
/* Define to 1 if you have the ANSI C header files. */
#define STDC_HEADERS 1
/* Version number of package */
#define VERSION "0.12.3"
/* Define to empty if `const' does not conform to ANSI C. */
/* #undef const */
/* Define to `__inline__' or `__inline' if that's what the C compiler
calls it, or to nothing if 'inline' is not supported under any name. */
#ifndef __cplusplus
/* #undef inline */
#endif
/* Define to `unsigned' if <sys/types.h> does not define. */
/* #undef size_t */
/* Define to empty if the keyword `volatile' does not work. Warning: valid
code using `volatile' can become incorrect without. Disable with care. */
/* #undef volatile */

View file

@ -1,416 +0,0 @@
#include <stdlib.h>
#include <ctype.h>
#include <string.h>
#include "globals.h"
#include "substr.h"
#include "dfa.h"
namespace re2c
{
void prtChOrHex(std::ostream& o, uint c, bool useTalx)
{
int oc = (int)(re2c::wFlag || !useTalx ? c : re2c::talx[c]);
if ((oc < 256) && isprint(oc))
{
o << '\'';
prtCh(o, c);
o << '\'';
}
else
{
prtHex(o, c);
}
}
void prtHex(std::ostream& o, uint c, bool useTalx)
{
int oc = (int)(re2c::wFlag || !useTalx ? c : re2c::talx[c]);
if (re2c::uFlag)
{
o << "0x"
<< hexCh(oc >> 28)
<< hexCh(oc >> 24)
<< hexCh(oc >> 20)
<< hexCh(oc >> 16)
<< hexCh(oc >> 12)
<< hexCh(oc >> 8)
<< hexCh(oc >> 4)
<< hexCh(oc);
}
else if (re2c::wFlag)
{
o << "0x"
<< hexCh(oc >> 12)
<< hexCh(oc >> 8)
<< hexCh(oc >> 4)
<< hexCh(oc);
}
else
{
o << "0x"
<< hexCh(oc >> 4)
<< hexCh(oc);
}
}
void prtCh(std::ostream& o, uint c, bool useTalx)
{
int oc = (int)(re2c::wFlag || !useTalx ? c : re2c::talx[c]);
switch (oc)
{
case '\'':
o << "\\'";
break;
case '\n':
o << "\\n";
break;
case '\t':
o << "\\t";
break;
case '\v':
o << "\\v";
break;
case '\b':
o << "\\b";
break;
case '\r':
o << "\\r";
break;
case '\f':
o << "\\f";
break;
case '\a':
o << "\\a";
break;
case '\\':
o << "\\\\";
break;
default:
if ((oc < 256) && isprint(oc))
{
o << (char) oc;
}
else if (re2c::uFlag)
{
o << "0x"
<< hexCh(oc >> 20)
<< hexCh(oc >> 16)
<< hexCh(oc >> 12)
<< hexCh(oc >> 8)
<< hexCh(oc >> 4)
<< hexCh(oc);
}
else if (re2c::wFlag)
{
o << "0x"
<< hexCh(oc >> 12)
<< hexCh(oc >> 8)
<< hexCh(oc >> 4)
<< hexCh(oc);
}
else
{
o << '\\' << octCh(oc / 64) << octCh(oc / 8) << octCh(oc);
}
}
}
void printSpan(std::ostream& o, uint lb, uint ub)
{
if (lb > ub)
{
o << "*";
}
o << "[";
if ((ub - lb) == 1)
{
prtCh(o, lb);
}
else
{
prtCh(o, lb);
o << "-";
prtCh(o, ub - 1);
}
o << "]";
}
uint Span::show(std::ostream &o, uint lb) const
{
if (to)
{
printSpan(o, lb, ub);
o << " " << to->label << "; ";
}
return ub;
}
std::ostream& operator<<(std::ostream &o, const State &s)
{
o << "state " << s.label;
if (s.rule)
{
o << " accepts " << s.rule->accept;
}
o << "\n";
uint lb = 0;
for (uint i = 0; i < s.go.nSpans; ++i)
{
lb = s.go.span[i].show(o, lb);
}
return o;
}
std::ostream& operator<<(std::ostream &o, const DFA &dfa)
{
for (State *s = dfa.head; s; s = s->next)
{
o << s << "\n\n";
}
return o;
}
State::State()
: label(0)
, rule(NULL)
, next(0)
, link(NULL)
, depth(0)
, kCount(0)
, kernel(NULL)
, isPreCtxt(false)
, isBase(false)
, go()
, action(NULL)
{
}
State::~State()
{
delete action;
delete [] kernel;
delete [] go.span;
}
static Ins **closure(Ins **cP, Ins *i)
{
while (!isMarked(i))
{
mark(i);
*(cP++) = i;
if (i->i.tag == FORK)
{
cP = closure(cP, i + 1);
i = (Ins*) i->i.link;
}
else if (i->i.tag == GOTO || i->i.tag == CTXT)
{
i = (Ins*) i->i.link;
}
else
break;
}
return cP;
}
struct GoTo
{
Char ch;
void *to;
};
DFA::DFA(Ins *ins, uint ni, uint lb, uint ub, Char *rep)
: lbChar(lb)
, ubChar(ub)
, nStates(0)
, head(NULL)
, tail(&head)
, toDo(NULL)
{
Ins **work = new Ins * [ni + 1];
uint nc = ub - lb;
GoTo *goTo = new GoTo[nc];
Span *span = new Span[nc];
memset((char*) goTo, 0, nc*sizeof(GoTo));
findState(work, closure(work, &ins[0]) - work);
while (toDo)
{
State *s = toDo;
toDo = s->link;
Ins **cP, **iP, *i;
uint nGoTos = 0;
uint j;
s->rule = NULL;
for (iP = s->kernel; (i = *iP); ++iP)
{
if (i->i.tag == CHAR)
{
for (Ins *j = i + 1; j < (Ins*) i->i.link; ++j)
{
if (!(j->c.link = goTo[j->c.value - lb].to))
goTo[nGoTos++].ch = j->c.value;
goTo[j->c.value - lb].to = j;
}
}
else if (i->i.tag == TERM)
{
if (!s->rule || ((RuleOp*) i->i.link)->accept < s->rule->accept)
s->rule = (RuleOp*) i->i.link;
}
else if (i->i.tag == CTXT)
{
s->isPreCtxt = true;
}
}
for (j = 0; j < nGoTos; ++j)
{
GoTo *go = &goTo[goTo[j].ch - lb];
i = (Ins*) go->to;
for (cP = work; i; i = (Ins*) i->c.link)
cP = closure(cP, i + i->c.bump);
go->to = findState(work, cP - work);
}
s->go.nSpans = 0;
for (j = 0; j < nc;)
{
State *to = (State*) goTo[rep[j]].to;
while (++j < nc && goTo[rep[j]].to == to) ;
span[s->go.nSpans].ub = lb + j;
span[s->go.nSpans].to = to;
s->go.nSpans++;
}
for (j = nGoTos; j-- > 0;)
goTo[goTo[j].ch - lb].to = NULL;
s->go.span = new Span[s->go.nSpans];
memcpy((char*) s->go.span, (char*) span, s->go.nSpans*sizeof(Span));
(void) new Match(s);
}
delete [] work;
delete [] goTo;
delete [] span;
}
DFA::~DFA()
{
State *s;
while ((s = head))
{
head = s->next;
delete s;
}
}
void DFA::addState(State **a, State *s)
{
s->label = nStates++;
s->next = *a;
*a = s;
if (a == tail)
tail = &s->next;
}
State *DFA::findState(Ins **kernel, uint kCount)
{
Ins **cP, **iP, *i;
State *s;
kernel[kCount] = NULL;
cP = kernel;
for (iP = kernel; (i = *iP); ++iP)
{
if (i->i.tag == CHAR || i->i.tag == TERM || i->i.tag == CTXT)
{
*cP++ = i;
}
else
{
unmark(i);
}
}
kCount = cP - kernel;
kernel[kCount] = NULL;
for (s = head; s; s = s->next)
{
if (s->kCount == kCount)
{
for (iP = s->kernel; (i = *iP); ++iP)
if (!isMarked(i))
goto nextState;
goto unmarkAll;
}
nextState:
;
}
s = new State;
addState(tail, s);
s->kCount = kCount;
s->kernel = new Ins * [kCount + 1];
memcpy(s->kernel, kernel, (kCount + 1)*sizeof(Ins*));
s->link = toDo;
toDo = s;
unmarkAll:
for (iP = kernel; (i = *iP); ++iP)
unmark(i);
return s;
}
} // end namespace re2c

View file

@ -1,366 +0,0 @@
/* $Id: dfa.h 569 2006-06-05 22:14:00Z helly $ */
#ifndef _dfa_h
#define _dfa_h
#include <iosfwd>
#include <map>
#include "re.h"
namespace re2c
{
extern void prtCh(std::ostream&, uint, bool useTalx = true);
extern void prtHex(std::ostream&, uint, bool useTalx = true);
extern void prtChOrHex(std::ostream&, uint, bool useTalx = true);
extern void printSpan(std::ostream&, uint, uint);
class DFA;
class State;
class Action
{
public:
State *state;
public:
Action(State*);
virtual ~Action();
virtual void emit(std::ostream&, uint, bool&) const = 0;
virtual bool isRule() const;
virtual bool isMatch() const;
virtual bool isInitial() const;
virtual bool readAhead() const;
#ifdef PEDANTIC
protected:
Action(const Action& oth)
: state(oth.state)
{
}
Action& operator = (const Action& oth)
{
state = oth.state;
return *this;
}
#endif
};
class Match: public Action
{
public:
Match(State*);
void emit(std::ostream&, uint, bool&) const;
bool isMatch() const;
};
class Enter: public Action
{
public:
uint label;
public:
Enter(State*, uint);
void emit(std::ostream&, uint, bool&) const;
};
class Initial: public Enter
{
public:
bool setMarker;
public:
Initial(State*, uint, bool);
void emit(std::ostream&, uint, bool&) const;
bool isInitial() const;
};
class Save: public Match
{
public:
uint selector;
public:
Save(State*, uint);
void emit(std::ostream&, uint, bool&) const;
bool isMatch() const;
};
class Move: public Action
{
public:
Move(State*);
void emit(std::ostream&, uint, bool&) const;
};
class Accept: public Action
{
public:
typedef std::map<uint, State*> RuleMap;
uint nRules;
uint *saves;
State **rules;
RuleMap mapRules;
public:
Accept(State*, uint, uint*, State**);
void emit(std::ostream&, uint, bool&) const;
void emitBinary(std::ostream &o, uint ind, uint l, uint r, bool &readCh) const;
void genRuleMap();
#ifdef PEDANTIC
private:
Accept(const Accept& oth)
: Action(oth)
, nRules(oth.nRules)
, saves(oth.saves)
, rules(oth.rules)
{
}
Accept& operator=(const Accept& oth)
{
new(this) Accept(oth);
return *this;
}
#endif
};
class Rule: public Action
{
public:
RuleOp *rule;
public:
Rule(State*, RuleOp*);
void emit(std::ostream&, uint, bool&) const;
bool isRule() const;
#ifdef PEDANTIC
private:
Rule (const Rule& oth)
: Action(oth)
, rule(oth.rule)
{
}
Rule& operator=(const Rule& oth)
{
new(this) Rule(oth);
return *this;
}
#endif
};
class Span
{
public:
uint ub;
State *to;
public:
uint show(std::ostream&, uint) const;
};
class Go
{
public:
Go()
: nSpans(0)
, wSpans(~0u)
, lSpans(~0u)
, dSpans(~0u)
, lTargets(~0u)
, span(NULL)
{
}
public:
uint nSpans; // number of spans
uint wSpans; // number of spans in wide mode
uint lSpans; // number of low (non wide) spans
uint dSpans; // number of decision spans (decide between g and b mode)
uint lTargets;
Span *span;
public:
void genGoto( std::ostream&, uint ind, const State *from, const State *next, bool &readCh);
void genBase( std::ostream&, uint ind, const State *from, const State *next, bool &readCh, uint mask) const;
void genLinear(std::ostream&, uint ind, const State *from, const State *next, bool &readCh, uint mask) const;
void genBinary(std::ostream&, uint ind, const State *from, const State *next, bool &readCh, uint mask) const;
void genSwitch(std::ostream&, uint ind, const State *from, const State *next, bool &readCh, uint mask) const;
void genCpGoto(std::ostream&, uint ind, const State *from, const State *next, bool &readCh) const;
void compact();
void unmap(Go*, const State*);
};
class State
{
public:
uint label;
RuleOp *rule;
State *next;
State *link;
uint depth; // for finding SCCs
uint kCount;
Ins **kernel;
bool isPreCtxt;
bool isBase;
Go go;
Action *action;
public:
State();
~State();
void emit(std::ostream&, uint, bool&) const;
friend std::ostream& operator<<(std::ostream&, const State&);
friend std::ostream& operator<<(std::ostream&, const State*);
#ifdef PEDANTIC
private:
State(const State& oth)
: label(oth.label)
, rule(oth.rule)
, next(oth.next)
, link(oth.link)
, depth(oth.depth)
, kCount(oth.kCount)
, kernel(oth.kernel)
, isBase(oth.isBase)
, go(oth.go)
, action(oth.action)
{
}
State& operator = (const State& oth)
{
new(this) State(oth);
return *this;
}
#endif
};
class DFA
{
public:
uint lbChar;
uint ubChar;
uint nStates;
State *head, **tail;
State *toDo;
public:
DFA(Ins*, uint, uint, uint, Char*);
~DFA();
void addState(State**, State*);
State *findState(Ins**, uint);
void split(State*);
void findSCCs();
void findBaseState();
void emit(std::ostream&, uint);
friend std::ostream& operator<<(std::ostream&, const DFA&);
friend std::ostream& operator<<(std::ostream&, const DFA*);
#ifdef PEDANTIC
DFA(const DFA& oth)
: lbChar(oth.lbChar)
, ubChar(oth.ubChar)
, nStates(oth.nStates)
, head(oth.head)
, tail(oth.tail)
, toDo(oth.toDo)
{
}
DFA& operator = (const DFA& oth)
{
new(this) DFA(oth);
return *this;
}
#endif
};
inline Action::Action(State *s) : state(s)
{
delete s->action;
s->action = this;
}
inline Action::~Action()
{
}
inline bool Action::isRule() const
{
return false;
}
inline bool Action::isMatch() const
{
return false;
}
inline bool Action::isInitial() const
{
return false;
}
inline bool Action::readAhead() const
{
return !isMatch() || (state && state->next && state->next->action && !state->next->action->isRule());
}
inline Match::Match(State *s) : Action(s)
{ }
inline bool Match::isMatch() const
{
return true;
}
inline Enter::Enter(State *s, uint l) : Action(s), label(l)
{ }
inline Initial::Initial(State *s, uint l, bool b) : Enter(s, l), setMarker(b)
{ }
inline bool Initial::isInitial() const
{
return true;
}
inline Save::Save(State *s, uint i) : Match(s), selector(i)
{ }
inline bool Save::isMatch() const
{
return false;
}
inline bool Rule::isRule() const
{
return true;
}
inline std::ostream& operator<<(std::ostream &o, const State *s)
{
return o << *s;
}
inline std::ostream& operator<<(std::ostream &o, const DFA *dfa)
{
return o << *dfa;
}
} // end namespace re2c
#endif

File diff suppressed because it is too large Load diff

View file

@ -1,48 +0,0 @@
@Article{Bumbulis94,
author = {Peter Bumbulis and Donald D. Cowan},
title = {RE2C -- A More Versatile Scanner Generator},
journal = "ACM Letters on Programming Languages and Systems",
volume = 2,
number = "1--4",
year = 1994,
abstract = {
It is usually claimed that lexical analysis routines are still coded by
hand, despite the widespread availability of scanner generators, for
efficiency reasons. While efficiency is a consideration, there exist
freely available scanner generators such as GLA \cite{Gray88} that can
generate scanners that are faster than most hand-coded ones. However,
most generated scanners are tailored for a particular environment, and
retargetting these scanners to other environments, if possible, is
usually complex enough to make a hand-coded scanner more appealing. In
this paper we describe RE2C, a scanner generator that not only generates
scanners which are faster (and usually smaller) than those produced by
any other scanner generator known to the authors, including GLA, but
also adapt easily to any environment.
}
}
@Article{Gray88,
author = {Robert W. Gray},
title = {{$\gamma$-GLA} - {A} Generator for Lexical Analyzers That
Programmers Can Use},
journal = {USENIX Conference Proceedings},
year = {1988},
month = {June},
pages = {147-160},
abstract = {Writing an efficient lexical analyzer for even a simple
language is not a trivial task, and should not be done by hand. We
describe GLA, a tool that generates very efficient scanners. These
scanners do not use the conventional transition matrix, but instead
use a few 128 element vectors. Scanning time is only slightly
greater than the absolute minimum --- the time it takes to look at
each character in a file. The GLA language allows simple, concise
specification of scanners. Augmenting regular expressions with
auxiliary scanners easily handles nasty problems such as C comments
and C literal constants. We formalize the connection between token
scanning and token processing by associating a processor with
appropriate patterns. A library of canned descriptions simplifies the
specification of commonly used language pieces --- such as,
C\_IDENTIFIERS, C\_STRINGS, PASCAL\_COMMENTS, etc. Finally, carefully
tuned lexical analysis support modules are provided for error
handling, input buffering, storing identifiers in hash tables and
manipulating denotations.}
}

View file

@ -0,0 +1,83 @@
re2c lesson 001_upn_calculator, (c) M. Boerger 2006
This lesson gets you started with re2c. In the end you will have an easy RPN
(reverse polish notation) calculator for use at command line.
You will learn about the basic interface of re2c when scanning input strings.
How to detect the end of the input and use that to stop scanning in order to
avoid problems.
Once you have successfully installed re2c you can use it to generate *.c files
from the *.re files presented in this lesson. Actually the expected *.c files
are already present. So you should name them *.cc or something alike or just
give them a different name like test.c. To do so you simply change into the
directory and execute the following command:
re2c calc_001.re > test.c
Then use your compiler to compile that code and run it. If you are using gcc
you simply do the following:
gcc -o test.o test.c
./test.o <input_file_name>
If you are using windows you might want to read till the end of this lesson.
When you want to debug the code it helps to make re2c generate working #line
information. To do so you simply specify the output file using the -o switch
followed by the output filename:
re2c -o test.c calc_001.re
The input files *.re each contain basic step by comments that explain what is
going on and what you can see in the examples.
In order to optimize the generated code we will use the -s command line switch
of re2c. This tells re2c to generate code that uses if statements rather
then endless switch/case expressions where appropriate. Note that the file name
extension is actually '.s.re' to tell the test system to use the -s switch. To
invoke re2 you do the following:
re2c -s -o test.c calc_006.s.re
Finally we use the -b switch to have the code use a decision table. The -b
switch also contains the -s behavior.
re2c -b -o test.c calc_007.b.re
-------------------------------------------------------------------------------
For windows users Lynn Allan provided some additional stuff to get you started
in the Microsoft world. This addon resides in the windows subdirectory and
gives you something to expereiment with. The code in that directory is based
on the first step and has the following changes:
* vc6 .dsp/.dsw and vc7/vc8 .sln/.vcproj project files that have "Custom Build
Steps" that can tell when main.re changes, and know how to generate main.c
from main.re. They assume that you unpacked the zip package and have re2c
itself build or installed in Release and Release-2005 directory respectively.
If re2c cannot be found you need to modify the custom build step and correct
the path to re2c.
* BuildAndRun.bat to do command line rec2 and then cl and then run the
executable (discontinues with message if errors).
* built-in cppunit-like test to confirm it worked as expected.
* array of test strings "fed" to scan rather than file contents to facilitate
testing and also reduce the newbie learning curve.
* HiResTimer output for 10,000 loops and 100,000 loops. While this might be
excessive for this lesson, it illustrates how to do it for subsequent lessons
and your own stuff using windows. Also it shows that Release build is as fast
as strncmp for this test and can probably be made significantly faster.
* If you want to build the other steps of this lesson using windows tools
simply copy the *.re files into the windows directory as main.re and rebuild.
-------------------------------------------------------------------------------
Sidenote: UPN is the german translation of RPN, somehow hardcoded into the
authors brain :-)

View file

@ -0,0 +1,84 @@
/* re2c lesson 001_upn_calculator, calc_001, (c) M. Boerger 2006 - 2007 */
/*!ignore:re2c
- basic interface for string reading
. We define the macros YYCTYPE, YYCURSOR, YYLIMIT, YYMARKER, YYFILL
. YYCTYPE is the type re2c operates on or in other words the type that
it generates code for. While it is not a big difference when we were
using 'unsigned char' here we would need to run re2c with option -w
to fully support types with sieof() > 1.
. YYCURSOR is used internally and holds the current scanner position. In
expression handlers, the code blocks after re2c expressions, this can be
used to identify the end of the token.
. YYMARKER is not always being used so we set an initial value to avoid
a compiler warning. Here we could also omit it compleley.
. YYLIMIT stores the end of the input. Unfortunatley we have to use strlen()
in this lesson. In the next example we see one way to get rid of it.
. We use a 'for(;;)'-loop around the scanner block. We could have used a
'while(1)'-loop instead but some compilers generate a warning for it.
. To make the output more readable we use 're2c:indent:top' scanner
configuration that configures re2c to prepend a single tab (the default)
to the beginning of each output line.
. The following lines are expressions and for each expression we output the
token name and continue the scanner loop.
. The second last token detects the end of our input, the terminating zero in
our input string. In other scanners detecting the end of input may vary.
For example binary code may contain \0 as valid input.
. The last expression accepts any input character. It tells re2c to accept
the opposit of the empty range. This includes numbers and our tokens but
as re2c goes from top to botton when evaluating the expressions this is no
problem.
. The first three rules show that re2c actually prioritizes the expressions
from top to bottom. Octal number require a starting "0" and the actual
number. Normal numbers start with a digit greater 0. And zero is finally a
special case. A single "0" is detected by the last rule of this set. And
valid ocal number is already being detected by the first rule. This even
includes multi "0" sequences that in octal notation also means zero.
Another way would be to only use two rules:
"0" [0-9]+
"0" | ( [1-9] [0-9]* )
A full description of re2c rule syntax can be found in the manual.
*/
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
int scan(char *s, int l)
{
char *p = s;
char *q = 0;
#define YYCTYPE char
#define YYCURSOR p
#define YYLIMIT (s+l)
#define YYMARKER q
#define YYFILL(n)
for(;;)
{
/*!re2c
re2c:indent:top = 2;
"0"[0-9]+ { printf("Oct\n"); continue; }
[1-9][0-9]* { printf("Num\n"); continue; }
"0" { printf("Num\n"); continue; }
"+" { printf("+\n"); continue; }
"-" { printf("-\n"); continue; }
"\000" { printf("EOF\n"); return 0; }
[^] { printf("ERR\n"); return 1; }
*/
}
}
int main(int argc, char **argv)
{
if (argc > 1)
{
return scan(argv[1], strlen(argv[1]));
}
else
{
fprintf(stderr, "%s <expr>\n", argv[0]);
return 1;
}
}

View file

@ -0,0 +1,69 @@
/* re2c lesson 001_upn_calculator, calc_002, (c) M. Boerger 2006 - 2007 */
/*!ignore:re2c
- making use of YYFILL
. Here we modified the scanner to not require strlen() on the call. Instead
we compute limit on the fly. That is whenever more input is needed we
search for the terminating \0 in the next n chars the scanner needs.
. If there is not enough input we quit the scanner.
. Note that in lesson_001 YYLIMIT was a character pointer computed only once.
Here is of course also of type YYCTYPE but a variable that gets reevaluated
by YYFILL().
. To make the code smaller we take advantage of the fact that our loop has no
break so far. This allows us to use break here and have the code that is
used for YYFILL() not contain the printf in every occurence. That way the
generated code gets smaller.
*/
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
int fill(char *p, int n, char **l)
{
while (*++p && n--) ;
* l = p;
return n <= 0;
}
int scan(char *s)
{
char *p = s;
char *l = s;
char *q = 0;
#define YYCTYPE char
#define YYCURSOR p
#define YYLIMIT l
#define YYMARKER q
#define YYFILL(n) { if (!fill(p, n, &l)) break; }
for(;;)
{
/*!re2c
re2c:indent:top = 2;
"0"[0-9]+ { printf("Oct\n"); continue; }
[1-9][0-9]* { printf("Num\n"); continue; }
"0" { printf("Num\n"); continue; }
"+" { printf("+\n"); continue; }
"-" { printf("+\n"); continue; }
"\000" { printf("EOF\n"); return 0; }
[^] { printf("ERR\n"); return 1; }
*/
}
printf("OOD\n"); return 2;
}
int main(int argc, char **argv)
{
if (argc > 1)
{
return scan(argv[1]);
}
else
{
fprintf(stderr, "%s <expr>\n", argv[0]);
return 0;
}
}

View file

@ -0,0 +1,61 @@
/* re2c lesson 001_upn_calculator, calc_003, (c) M. Boerger 2006 - 2007 */
/*!ignore:re2c
- making use of YYFILL
. Again provide the length of the input to generate the limit only once. Now
we can use YYFILL() to detect the end and simply return since YYFILL() is
only being used if the next scanner run might use more chars then YYLIMIT
allows.
. Note that we now use (s+l+2) instead of (s+l) as we did in lesson_001. In
the first lesson we did not quit from YYFILL() and used a special rule to
detect the end of input. Here we use the fact that we know the exact end
of input and that this length does not include the terminating zero. Since
YYLIMIT points to the first character behind the used buffer we use "+ 2".
If we would use "+1" we could drop the "\000" rule but could no longer
distinguish between end of input and out of data.
*/
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
int scan(char *s, int l)
{
char *p = s;
char *q = 0;
#define YYCTYPE char
#define YYCURSOR p
#define YYLIMIT (s+l+2)
#define YYMARKER q
#define YYFILL(n) { printf("OOD\n"); return 2; }
for(;;)
{
/*!re2c
re2c:indent:top = 2;
"0"[0-9]+ { printf("Oct\n"); continue; }
[1-9][0-9]* { printf("Num\n"); continue; }
"0" { printf("Num\n"); continue; }
"+" { printf("+\n"); continue; }
"-" { printf("+\n"); continue; }
"\000" { printf("EOF\n"); return 0; }
[^] { printf("ERR\n"); return 1; }
*/
}
return 0;
}
int main(int argc, char **argv)
{
if (argc > 1)
{
return scan(argv[1], strlen(argv[1]));
}
else
{
fprintf(stderr, "%s <expr>\n", argv[0]);
return 0;
}
}

View file

@ -0,0 +1,78 @@
/* re2c lesson 001_upn_calculator, calc_004, (c) M. Boerger 2006 - 2007 */
/*!ignore:re2c
- making use of definitions
. We provide complex rules as definitions. We can even have definitions made
up from other definitions. And we could also use definitions as part of
rules and not only as full rules as shown in this lesson.
- showing the tokens
. re2c does not store the beginning of a token on its own but we can easily
do this by providing variable, in our case t, that is set to YYCURSOR on
every loop. If we were not using a loop here the token, we could have used
s instead of a new variable instead.
. As we use the token for an output function that requires a terminating zero
we copy the token. Alternatively we could store the end of the token, then
replace it with a zero character and replace it after the token has been
used. However that approach is not always acceptable.
*/
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
char * tokendup(const char *t, const char *l)
{
size_t n = l -t + 1;
char *r = (char*)malloc(n);
memmove(r, t, n-1);
r[n] = '\0';
return r;
}
int scan(char *s, int l)
{
char *p = s;
char *q = 0;
char *t;
#define YYCTYPE char
#define YYCURSOR p
#define YYLIMIT (s+l+2)
#define YYMARKER q
#define YYFILL(n) { printf("OOD\n"); return 2; }
for(;;)
{
t = p;
/*!re2c
re2c:indent:top = 2;
DIGIT = [0-9] ;
OCT = "0" DIGIT+ ;
INT = "0" | ( [1-9] DIGIT* ) ;
OCT { t = tokendup(t, p); printf("Oct: %s\n", t); free(t); continue; }
INT { t = tokendup(t, p); printf("Num: %s\n", t); free(t); continue; }
"+" { printf("+\n"); continue; }
"-" { printf("+\n"); continue; }
"\000" { printf("EOF\n"); return 0; }
[^] { printf("ERR\n"); return 1; }
*/
}
return 0;
}
int main(int argc, char **argv)
{
if (argc > 1)
{
return scan(argv[1], strlen(argv[1]));
}
else
{
fprintf(stderr, "%s <expr>\n", argv[0]);
return 0;
}
}

View file

@ -0,0 +1,144 @@
/* re2c lesson 001_upn_calculator, calc_005, (c) M. Boerger 2006 - 2007 */
/*!ignore:re2c
- turning this lesson into an easy calculator
. We are going to write an UPN calculator so we need an additional rule to
ignore white space.
. Then we need to store the scanned input somewhere and do our math on it.
. Also we need to scan all arguments since the main c code gets the input
split up into chunks.
. In contrast to what we did before we now add a variable res that holds the
scanner state. We initialize that variable to 0 and quit the loop when it
is non zero. This will also be our return value so that we can use it in
function main to generate error information.
. To support operating systems where ' and " get passed in program arguments
we check for them being first and last input character. If so we correct
input pointer and input length. Since now our scanner might not see a
terminating zero we change YYLIMIT again and drop the special zero rule.
*/
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#define DEBUG(stmt) stmt
int stack[4];
int depth = 0;
int push_num(const char *t, const char *l, int radix)
{
int num = 0;
if (depth >= sizeof(stack))
{
return 3;
}
--t;
while(++t < l)
{
num = num * radix + (*t - '0');
}
DEBUG(printf("Num: %d\n", num));
stack[depth++] = num;
return 0;
}
int stack_add()
{
if (depth < 2) return 4;
--depth;
stack[depth-1] = stack[depth-1] + stack[depth];
return 0;
}
int stack_sub()
{
if (depth < 2) return 4;
--depth;
stack[depth-1] = stack[depth-1] - stack[depth];
return 0;
}
int scan(char *s, int l)
{
char *p = s;
char *q = 0;
char *t;
int res = 0;
#define YYCTYPE char
#define YYCURSOR p
#define YYLIMIT (s+l+1)
#define YYMARKER q
#define YYFILL(n) { return depth == 1 ? 0 : 2; }
while(!res)
{
t = p;
/*!re2c
re2c:indent:top = 2;
DIGIT = [0-9] ;
OCT = "0" DIGIT+ ;
INT = "0" | ( [1-9] DIGIT* ) ;
WS = [ \t]+ ;
WS { continue; }
OCT { res = push_num(t, p, 8); continue; }
INT { res = push_num(t, p, 10); continue; }
"+" { res = stack_add(); continue; }
"-" { res = stack_sub(); continue; }
[^] { res = 1; continue; }
*/
}
return res;
}
int main(int argc, char **argv)
{
if (argc > 1)
{
char *inp;
int res = 0, argp = 0, len;
while(!res && ++argp < argc)
{
inp = argv[argp];
len = strlen(inp);
if (inp[0] == '\"' && inp[len-1] == '\"')
{
++inp;
len -=2;
}
res = scan(inp, len);
}
switch(res)
{
case 0:
printf("Result: %d\n", stack[0]);
return 0;
case 1:
fprintf(stderr, "Illegal character in input.\n");
return 1;
case 2:
fprintf(stderr, "Premature end of input.\n");
return 2;
case 3:
fprintf(stderr, "Stack overflow.\n");
return 3;
case 4:
fprintf(stderr, "Stack underflow.\n");
return 4;
}
}
else
{
fprintf(stderr, "%s <expr>\n", argv[0]);
return 0;
}
}

View file

@ -0,0 +1,162 @@
/* re2c lesson 001_upn_calculator, calc_006, (c) M. Boerger 2006 - 2007 */
/*!ignore:re2c
- avoiding YYFILL()
. We use the inplace configuration re2c:yyfill to suppress generation of
YYFILL() blocks. This of course means we no longer have to provide the
macro.
. We also drop the YYMARKER stuff since we know that re2c does not generate
it for this example.
. Since re2c does no longer check for out of data situations we must do this.
For that reason we first reintroduce our zero rule and second we need to
ensure that the scanner does not take more than one bytes in one go.
In the example suppose "0" is passed. The scanner reads the first "0" and
then is in an undecided state. The scanner can earliest decide on the next
char what the token is. In case of a zero the input ends and it was a
number, 0 to be precise. In case of a digit it is an octal number and the
next character needs to be read. In case of any other character the scanner
will detect an error with the any rule [^].
Now the above shows that the scanner may read two characters directly. But
only if the first is a "0". So we could easily check that if the first char
is "0" and the next char is a digit then yet another charcter is present.
But we require our inut to be zero terminated. And that means we do not
have to check anything for this scanner.
However with other rule sets re2c might read more then one character in a
row. In those cases it is normally hard to impossible to avoid YYFILL.
- optimizing the generated code by using -s command line switch of re2c
. This tells re2c to generate code that uses if statements rather
then endless switch/case expressions where appropriate. Note that the
generated code now requires the input to be unsigned char rather than char
due to the way comparisons are generated.
*/
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#define DEBUG(stmt) stmt
int stack[4];
int depth = 0;
int push_num(const unsigned char *t, const unsigned char *l, int radix)
{
int num = 0;
if (depth >= sizeof(stack))
{
return 3;
}
--t;
while(++t < l)
{
num = num * radix + (*t - (unsigned char)'0');
}
DEBUG(printf("Num: %d\n", num));
stack[depth++] = num;
return 0;
}
int stack_add()
{
if (depth < 2) return 4;
--depth;
stack[depth-1] = stack[depth-1] + stack[depth];
DEBUG(printf("+\n"));
return 0;
}
int stack_sub()
{
if (depth < 2) return 4;
--depth;
stack[depth-1] = stack[depth-1] - stack[depth];
DEBUG(printf("-\n"));
return 0;
}
int scan(char *s)
{
unsigned char *p = (unsigned char*)s;
unsigned char *t;
int res = 0;
#define YYCTYPE unsigned char
#define YYCURSOR p
while(!res)
{
t = p;
/*!re2c
re2c:indent:top = 2;
re2c:yyfill:enable = 0;
DIGIT = [0-9] ;
OCT = "0" DIGIT+ ;
INT = "0" | ( [1-9] DIGIT* ) ;
WS = [ \t]+ ;
WS { continue; }
OCT { res = push_num(t, p, 8); continue; }
INT { res = push_num(t, p, 10); continue; }
"+" { res = stack_add(); continue; }
"-" { res = stack_sub(); continue; }
"\000" { res = depth == 1 ? 0 : 2; break; }
[^] { res = 1; continue; }
*/
}
return res;
}
int main(int argc, char **argv)
{
if (argc > 1)
{
char *inp;
int res = 0, argp = 0, len;
while(!res && ++argp < argc)
{
inp = strdup(argv[argp]);
len = strlen(inp);
if (inp[0] == '\"' && inp[len-1] == '\"')
{
inp[len - 1] = '\0';
++inp;
}
res = scan(inp);
free(inp);
}
switch(res)
{
case 0:
printf("Result: %d\n", stack[0]);
return 0;
case 1:
fprintf(stderr, "Illegal character in input.\n");
return 1;
case 2:
fprintf(stderr, "Premature end of input.\n");
return 2;
case 3:
fprintf(stderr, "Stack overflow.\n");
return 3;
case 4:
fprintf(stderr, "Stack underflow.\n");
return 4;
}
}
else
{
fprintf(stderr, "%s <expr>\n", argv[0]);
return 0;
}
}

View file

@ -0,0 +1,135 @@
/* re2c lesson 001_upn_calculator, calc_007, (c) M. Boerger 2006 - 2007 */
/*!ignore:re2c
- optimizing the generated code by using -b command line switch of re2c
. This tells re2c to generate code that uses a decision table. The -b switch
also contains the -s behavior. And -b also requires the input to be
unsigned chars.
*/
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#define DEBUG(stmt) stmt
int stack[4];
int depth = 0;
int push_num(const unsigned char *t, const unsigned char *l, int radix)
{
int num = 0;
if (depth >= sizeof(stack))
{
return 3;
}
--t;
while(++t < l)
{
num = num * radix + (*t - (unsigned char)'0');
}
DEBUG(printf("Num: %d\n", num));
stack[depth++] = num;
return 0;
}
int stack_add()
{
if (depth < 2) return 4;
--depth;
stack[depth-1] = stack[depth-1] + stack[depth];
DEBUG(printf("+\n"));
return 0;
}
int stack_sub()
{
if (depth < 2) return 4;
--depth;
stack[depth-1] = stack[depth-1] - stack[depth];
DEBUG(printf("+\n"));
return 0;
}
int scan(char *s)
{
unsigned char *p = (unsigned char*)s;
unsigned char *t;
int res = 0;
#define YYCTYPE unsigned char
#define YYCURSOR p
while(!res)
{
t = p;
/*!re2c
re2c:indent:top = 2;
re2c:yyfill:enable = 0;
DIGIT = [0-9] ;
OCT = "0" DIGIT+ ;
INT = "0" | ( [1-9] DIGIT* ) ;
WS = [ \t]+ ;
WS { continue; }
OCT { res = push_num(t, p, 8); continue; }
INT { res = push_num(t, p, 10); continue; }
"+" { res = stack_add(); continue; }
"-" { res = stack_sub(); continue; }
"\000" { res = depth == 1 ? 0 : 2; break; }
[^] { res = 1; continue; }
*/
}
return res;
}
int main(int argc, char **argv)
{
if (argc > 1)
{
char *inp;
int res = 0, argp = 0, len;
while(!res && ++argp < argc)
{
inp = strdup(argv[argp]);
len = strlen(inp);
if (inp[0] == '\"' && inp[len-1] == '\"')
{
inp[len - 1] = '\0';
++inp;
}
res = scan(inp);
free(inp);
}
switch(res)
{
case 0:
printf("Result: %d\n", stack[0]);
return 0;
case 1:
fprintf(stderr, "Illegal character in input.\n");
return 1;
case 2:
fprintf(stderr, "Premature end of input.\n");
return 2;
case 3:
fprintf(stderr, "Stack overflow.\n");
return 3;
case 4:
fprintf(stderr, "Stack underflow.\n");
return 4;
}
}
else
{
fprintf(stderr, "%s <expr>\n", argv[0]);
return 0;
}
}

View file

@ -0,0 +1,158 @@
/* re2c lesson 001_upn_calculator, calc_008, (c) M. Boerger 2006 - 2007 */
/*!ignore:re2c
- using -b with signed character input
. Since the code is being generated with -b switch re2c requires the internal
character variable yych to use an unsigned character type. For that reason
the previous lessons had a conversion at the beginning of their scan()
function. Other re2c generated code often have the scanners work completely
on unsigned input. Thus requesting a conversion.
To avoid the conversion on input, re2c allows to do the conversion when
reading the internal yych variable. To enable that conversion you need to
use the implace configuration 're2c:yych:conversion' and set it to 1. This
will change the generated code to insert conversions to YYCTYPE whenever
yych is being read.
- More inplace configurations for better/nicer code
. re2c allows to overwrite the generation of any define, label or variable
used in the generated code. For example we overwrite the 'yych' variable
name to 'curr' using inplace configuration 're2c:variable:yych = curr;'.
. We further more use inplace configurations instead of defines. This allows
to use correct conversions to 'unsigned char' instead of having to convert
to 'YYCTYPE' when placing 're2c:define:YYCTYPE = "unsigned char";' infront
of 're2c:yych:conversion'. Note that we have to use apostrophies for the
first setting as it contains a space.
. Last but not least we use 're2c:labelprefix = scan' to change the prefix
of generated labels.
*/
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#define DEBUG(stmt) stmt
int stack[4];
int depth = 0;
int push_num(const char *t, const char *l, int radix)
{
int num = 0;
if (depth >= sizeof(stack))
{
return 3;
}
--t;
while(++t < l)
{
num = num * radix + (*t - '0');
}
DEBUG(printf("Num: %d\n", num));
stack[depth++] = num;
return 0;
}
int stack_add()
{
if (depth < 2) return 4;
--depth;
stack[depth-1] = stack[depth-1] + stack[depth];
DEBUG(printf("+\n"));
return 0;
}
int stack_sub()
{
if (depth < 2) return 4;
--depth;
stack[depth-1] = stack[depth-1] - stack[depth];
DEBUG(printf("+\n"));
return 0;
}
int scan(char *p)
{
char *t;
int res = 0;
while(!res)
{
t = p;
/*!re2c
re2c:define:YYCTYPE = "unsigned char";
re2c:define:YYCURSOR = p;
re2c:variable:yych = curr;
re2c:indent:top = 2;
re2c:yyfill:enable = 0;
re2c:yych:conversion = 1;
re2c:labelprefix = scan;
DIGIT = [0-9] ;
OCT = "0" DIGIT+ ;
INT = "0" | ( [1-9] DIGIT* ) ;
WS = [ \t]+ ;
WS { continue; }
OCT { res = push_num(t, p, 8); continue; }
INT { res = push_num(t, p, 10); continue; }
"+" { res = stack_add(); continue; }
"-" { res = stack_sub(); continue; }
"\000" { res = depth == 1 ? 0 : 2; break; }
[^] { res = 1; continue; }
*/
}
return res;
}
int main(int argc, char **argv)
{
if (argc > 1)
{
char *inp;
int res = 0, argp = 0, len;
while(!res && ++argp < argc)
{
inp = strdup(argv[argp]);
len = strlen(inp);
if (inp[0] == '\"' && inp[len-1] == '\"')
{
inp[len - 1] = '\0';
++inp;
}
res = scan(inp);
free(inp);
}
switch(res)
{
case 0:
printf("Result: %d\n", stack[0]);
return 0;
case 1:
fprintf(stderr, "Illegal character in input.\n");
return 1;
case 2:
fprintf(stderr, "Premature end of input.\n");
return 2;
case 3:
fprintf(stderr, "Stack overflow.\n");
return 3;
case 4:
fprintf(stderr, "Stack underflow.\n");
return 4;
}
}
else
{
fprintf(stderr, "%s <expr>\n", argv[0]);
return 0;
}
}

View file

@ -0,0 +1,54 @@
/**
* @file HiResTimer.h
* @brief
* @note
*/
#ifndef _HI_RES_TIMER_H_
#define _HI_RES_TIMER_H_
#ifdef WIN32
#include <windows.h> // probably already done in stdafx.h
static LARGE_INTEGER start;
static LARGE_INTEGER stop;
static LARGE_INTEGER freq;
static _int64 elapsedCounts;
static double elapsedMillis;
static double elapsedMicros;
static HANDLE processHandle;
static DWORD prevPriorityClass;
void HrtInit()
{
processHandle = GetCurrentProcess();
prevPriorityClass = GetPriorityClass(processHandle);
QueryPerformanceFrequency(&freq);
}
void HrtStart()
{
QueryPerformanceCounter(&start);
}
void HrtSetPriority(DWORD priority)
{
int flag;
prevPriorityClass = GetPriorityClass(processHandle);
flag = SetPriorityClass(processHandle, priority);
}
void HrtResetPriority(void)
{
int flag = SetPriorityClass(processHandle, prevPriorityClass);
}
double HrtElapsedMillis()
{
QueryPerformanceCounter(&stop);
elapsedCounts = (stop.QuadPart - start.QuadPart);
elapsedMillis = ((elapsedCounts * 1000.0) / freq.QuadPart);
return elapsedMillis;
}
#endif
#endif

View file

@ -0,0 +1,291 @@
/* re2c lesson 001_upn_calculator, main.b.re, (c) M. Boerger, L. Allan 2006 */
/*!ignore:re2c
- basic interface for string reading
. We define the macros YYCTYPE, YYCURSOR, YYLIMIT, YYMARKER, YYFILL
. YYCTYPE is the type re2c operates on or in other words the type that
it generates code for. While it is not a big difference when we were
using 'unsigned char' here we would need to run re2c with option -w
to fully support types with sieof() > 1.
. YYCURSOR is used internally and holds the current scanner position. In
expression handlers, the code blocks after re2c expressions, this can be
used to identify the end of the token.
. YYMARKER is not always being used so we set an initial value to avoid
a compiler warning.
. YYLIMIT stores the end of the input. Unfortunatley we have to use strlen()
in this lesson. In the next example we see one way to get rid of it.
. We use a 'for(;;)'-loop around the scanner block. We could have used a
'while(1)'-loop instead but some compilers generate a warning for it.
. To make the output more readable we use 're2c:indent:top' scanner
configuration that configures re2c to prepend a single tab (the default)
to the beginning of each output line.
. The following lines are expressions and for each expression we output the
token name and continue the scanner loop.
. The second last token detects the end of our input, the terminating zero in
our input string. In other scanners detecting the end of input may vary.
For example binary code may contain \0 as valid input.
. The last expression accepts any input character. It tells re2c to accept
the opposit of the empty range. This includes numbers and our tokens but
as re2c goes from top to botton when evaluating the expressions this is no
problem.
. The first three rules show that re2c actually prioritizes the expressions
from top to bottom. Octal number require a starting "0" and the actual
number. Normal numbers start with a digit greater 0. And zero is finally a
special case. A single "0" is detected by the last rule of this set. And
valid ocal number is already being detected by the first rule. This even
includes multi "0" sequences that in octal notation also means zero.
Another way would be to only use two rules:
"0" [0-9]+
"0" | ( [1-9] [0-9]* )
A full description of re2c rule syntax can be found in the manual.
*/
#define VC_EXTRALEAN // Exclude rarely-used stuff from Windows headers
#if _MSC_VER > 1200
#define WINVER 0x0400 // Change this to the appropriate value to target Windows 98 and Windows 2000 or later.
#endif // Prevents warning from vc7.1 complaining about redefinition
#include <stdio.h>
#include <stdlib.h>
#include <assert.h>
#include <string.h>
#include <windows.h>
#include "HiResTimer.h"
static char gTestBuf[1000] = "";
/**
* @brief Setup HiResolution timer and confirm it is working ok
*/
void InitHiResTimerAndVerifyWorking(void)
{
double elapsed;
HrtInit();
HrtSetPriority(ABOVE_NORMAL_PRIORITY_CLASS);
HrtStart();
Sleep(100);
elapsed = HrtElapsedMillis();
if ((elapsed < 90) || (elapsed > 110)) {
printf("HiResTimer misbehaving: %f\n", elapsed);
exit(2);
}
}
/**
* @brief Scan for numbers in different formats
*/
int ScanFullSpeed(char *pzStrToScan, size_t lenStrToScan)
{
unsigned char *pzCurScanPos = (unsigned char*)pzStrToScan;
unsigned char *pzBacktrackInfo = 0;
#define YYCTYPE unsigned char
#define YYCURSOR pzCurScanPos
#define YYLIMIT (pzStrToScan+lenStrToScan)
#define YYMARKER pzBacktrackInfo
#define YYFILL(n)
for(;;)
{
/*!re2c
re2c:indent:top = 2;
[1-9][0-9]* { continue; }
[0][0-9]+ { continue; }
"+" { continue; }
"-" { continue; }
"\000" { return 0; }
[^] { return 1; }
*/
}
}
/**
* @brief Scan for numbers in different formats
*/
int scan(char *pzStrToScan, size_t lenStrToScan)
{
unsigned char *pzCurScanPos = (unsigned char*)pzStrToScan;
unsigned char *pzBacktrackInfo = 0;
#define YYCTYPE unsigned char
#define YYCURSOR pzCurScanPos
#define YYLIMIT (pzStrToScan+lenStrToScan)
#define YYMARKER pzBacktrackInfo
#define YYFILL(n)
for(;;)
{
/*!re2c
re2c:indent:top = 2;
[1-9][0-9]* { printf("Num\n"); strcat(gTestBuf, "Num "); continue; }
[0][0-9]+ { printf("Oct\n"); strcat(gTestBuf, "Oct "); continue; }
"+" { printf("+\n"); strcat(gTestBuf, "+ "); continue; }
"-" { printf("-\n"); strcat(gTestBuf, "- "); continue; }
"\000" { printf("EOF\n"); return 0; }
[^] { printf("ERR\n"); strcat(gTestBuf, "ERR "); return 1; }
*/
}
}
/**
* @brief Show high resolution elapsed time for 10,000 and 100,000 loops
*/
void DoTimingsOfStrnCmp(void)
{
char testStr[] = "Hello, world";
int totLoops = 10000;
int totFoundCount = 0;
int foundCount = 0;
int loop;
int rc;
const int progressAnd = 0xFFFFF000;
double elapsed;
printf("\n\n%d loops with * every %d loops to confirm\n", totLoops, ((~progressAnd) + 1));
HrtStart();
for (loop = 0; loop < totLoops; ++loop) {
foundCount = 0;
rc = strncmp(testStr, "Hello", 5);
if (rc == 0) {
foundCount++;
totFoundCount++;
if ((totFoundCount & progressAnd) == totFoundCount) {
printf("*");
}
}
}
elapsed = HrtElapsedMillis();
printf("\nstrncmp Elapsed for %7d loops milliseconds: %7.3f\n", totLoops, elapsed);
printf("FoundCount each loop: %d\n", foundCount);
printf("TotalFoundCount for all loops: %d\n", totFoundCount);
totLoops = 100000;
HrtStart();
for (loop = 0; loop < totLoops; ++loop) {
foundCount = 0;
rc = strncmp(testStr, "Hello", 5);
if (rc == 0) {
foundCount++;
totFoundCount++;
if ((totFoundCount & progressAnd) == totFoundCount) {
printf("*");
}
}
}
elapsed = HrtElapsedMillis();
printf("\nstrncmp Elapsed for %7d loops milliseconds: %7.3f\n", totLoops, elapsed);
printf("FoundCount each loop: %d\n", foundCount);
printf("TotalFoundCount for all loops: %d\n", totFoundCount);
}
/**
* @brief Show high resolution elapsed time for 10,000 and 100,000 loops
*/
void DoTimingsOfRe2c(void)
{
char* testStrings[] = { "123", "1234", "+123", "01234", "-04321", "abc", "123abc" };
const int testCount = sizeof(testStrings) / sizeof(testStrings[0]);
int i;
int totLoops = 10000 / testCount; // Doing more than one per loop
int totFoundCount = 0;
int foundCount = 0;
int loop;
int rc;
const int progressAnd = 0xFFFFF000;
double elapsed;
printf("\n\n%d loops with * every %d loops to confirm\n", totLoops, ((~progressAnd) + 1));
HrtStart();
for (loop = 0; loop < totLoops; ++loop) {
foundCount = 0;
strcpy(gTestBuf, "");
for (i = 0; i < testCount; ++i) {
char* pzCurStr = testStrings[i];
size_t len = strlen(pzCurStr); // Calc of strlen slows things down ... std::string?
rc = ScanFullSpeed(pzCurStr, len);
if (rc == 0) {
foundCount++;
totFoundCount++;
if ((totFoundCount & progressAnd) == totFoundCount) {
printf("*");
}
}
}
}
elapsed = HrtElapsedMillis();
printf("\nRe2c Elapsed for %7d loops milliseconds: %7.3f\n", totLoops, elapsed);
printf("FoundCount each loop: %d\n", foundCount);
printf("TotalFoundCount for all loops: %d\n", totFoundCount);
totLoops = 100000 / testCount;
printf("\n\n%d loops with * every %d loops to confirm\n", totLoops, ((~progressAnd) + 1));
HrtStart();
for (loop = 0; loop < totLoops; ++loop) {
foundCount = 0;
strcpy(gTestBuf, "");
for (i = 0; i < testCount; ++i) {
char* pzCurStr = testStrings[i];
size_t len = strlen(pzCurStr); // Calc of strlen slows things down ... std::string?
rc = ScanFullSpeed(pzCurStr, len);
if (rc == 0) {
foundCount++;
totFoundCount++;
if ((totFoundCount & progressAnd) == totFoundCount) {
printf("*");
}
}
}
}
elapsed = HrtElapsedMillis();
printf("\nRe2c Elapsed for %7d loops milliseconds: %7.3f\n", totLoops, elapsed);
printf("FoundCount each loop: %d\n", foundCount);
printf("TotalFoundCount for all loops: %d\n", totFoundCount);
}
/**
* @brief Entry point for console app
*/
int main(int argc, char **argv)
{
char testStr_A[] = "123";
char* testStr_B = "456";
char* testStrings[] = { "123", "1234", "+123", "01234", "-04321", "abc", "123abc" };
const int testCount = sizeof(testStrings) / sizeof(testStrings[0]);
int i;
int rc = scan(testStr_A, 3);
printf("rc: %d\n", rc);
rc = scan(testStr_B, 3);
printf("rc: %d\n", rc);
rc = scan("789", 3);
printf("rc: %d\n", rc);
strcpy(gTestBuf, "");
for (i = 0; i < testCount; ++i) {
char* pzCurStr = testStrings[i];
size_t len = strlen(pzCurStr);
scan(pzCurStr, len);
}
printf("%s\n", gTestBuf);
rc = strcmp(gTestBuf, "Num Num + Num Oct - Oct ERR Num ERR ");
if (rc == 0) {
printf("Success\n");
}
else {
printf("Failure\n");
}
assert(0 == rc); // Doesn't work with Release build
InitHiResTimerAndVerifyWorking();
DoTimingsOfStrnCmp();
DoTimingsOfRe2c();
return 0;
}

View file

@ -0,0 +1,21 @@
re2c lesson 002_strip_comments, (c) M. Boerger 2006
In this lesson you will learn how to use multiple scanner blocks and how to
read the input from a file instead of a zero terminated string. In the end you
will have a scanner that filters comments out of c source files but keeps re2c
comments.
The first scanner can be generated with:
re2c -s -o t.c strip_001.s.re
In the second step we will learn about YYMARKER that stores backtracking
information.
re2c -s -0 t.c strip_002.b.re
The third step brings trailing contexts that are stored in YYCTXMARKER. We also
change to use -b instead of -s option since the scanner gets more and more
complex.
re2c -b -0 t.c strip_002.b.re

View file

@ -0,0 +1,147 @@
/* re2c lesson 002_strip_comments, strip_001.s, (c) M. Boerger 2006 - 2007 */
/*!ignore:re2c
- basic interface for file reading
. This scanner will read chunks of input from a file. The easiest way would
be to read the whole file into a memory buffer and use that a zero
terminated string.
. Instead we want to read input chunks of a reasonable size as they are neede
by the scanner. Thus we basically need YYFILL(n) to call fread(n).
. Before we provide a buffer that we constantly reallocate we instead use
one buffer that we get from the stack or global memory just once. When we
reach the end of the buffer we simply move the beginning of our input
that is somewhere in our buffer to the beginning of our buffer and then
append the next chunk of input to the correct end inside our buffer.
. As re2c scanners might read more than one character we need to ensure our
buffer is long enough. We can use re2c to inform about the maximum size
by placing a "!max:re2c" comment somewhere. This gets translated to a
"#define YYMAXFILL <n>" line where <n> is the maximum length value. This
define can be used as precompiler condition.
- multiple scanner blocks
. We use a main scanner block that outputs every input character unless the
input is two /s or a / followed by a *. In the latter two cases we switch
to a special c++ comment and a comment block respectively.
. Both special blocks simply detect their end ignore any other character.
. The c++ block is a bit special. Since the terminating new line needs to
be output and that can either be a new line or a carridge return followed
by a new line.
. In order to ensure that we do not read behind our buffer we reset the token
pointer to the cursor on every scanner run.
*/
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
/*!max:re2c */
#define BSIZE 128
#if BSIZE < YYMAXFILL
# error BSIZE must be greater YYMAXFILL
#endif
#define YYCTYPE unsigned char
#define YYCURSOR s.cur
#define YYLIMIT s.lim
#define YYFILL(n) { if ((res = fill(&s, n)) >= 0) break; }
typedef struct Scanner
{
FILE *fp;
unsigned char *cur, *tok, *lim, *eof;
unsigned char buffer[BSIZE];
} Scanner;
int fill(Scanner *s, int len)
{
if (!len)
{
s->cur = s->tok = s->lim = s->buffer;
s->eof = 0;
}
if (!s->eof)
{
int got, cnt = s->tok - s->buffer;
if (cnt > 0)
{
memcpy(s->buffer, s->tok, s->lim - s->tok);
s->tok -= cnt;
s->cur -= cnt;
s->lim -= cnt;
}
cnt = BSIZE - cnt;
if ((got = fread(s->lim, 1, cnt, s->fp)) != cnt)
{
s->eof = &s->lim[got];
}
s->lim += got;
}
else if (s->cur + len > s->eof)
{
return 0; /* not enough input data */
}
return -1;
}
int scan(FILE *fp)
{
int res = 0;
Scanner s;
if (!fp)
{
return 1; /* no file was opened */
}
s.fp = fp;
fill(&s, 0);
for(;;)
{
s.tok = s.cur;
/*!re2c
re2c:indent:top = 2;
NL = "\r"? "\n" ;
ANY = [^] ;
"/" "/" { goto cppcomment; }
"/" "*" { goto comment; }
ANY { fputc(*s.tok, stdout); continue; }
*/
comment:
s.tok = s.cur;
/*!re2c
"*" "/" { continue; }
ANY { goto comment; }
*/
cppcomment:
s.tok = s.cur;
/*!re2c
NL { fwrite(s.tok, 1, s.cur - s.tok, stdout); continue; }
ANY { goto cppcomment; }
*/
}
if (fp != stdin)
{
fclose(fp); /* close only if not stdin */
}
return res; /* return result */
}
int main(int argc, char **argv)
{
if (argc > 1)
{
return scan(!strcmp(argv[1], "-") ? stdin : fopen(argv[1], "r"));
}
else
{
fprintf(stderr, "%s <expr>\n", argv[0]);
return 1;
}
}

View file

@ -0,0 +1,162 @@
/* re2c lesson 002_strip_comments, strip_002.s, (c) M. Boerger 2006 - 2007 */
/*!ignore:re2c
- complexity
. When a comment is preceeded by a new line and followed by whitespace and a
new line then we can drop the trailing whitespace and new line.
. Additional to what we strip out already what about two consequtive comment
blocks? When two comments are only separated by whitespace we want to drop
both. In other words when detecting the end of a comment block we need to
check whether it is followed by only whitespace and the a new comment in
which case we continure ignoring the input. If it is followed only by white
space and a new line we strip out the new white space and new line. In any
other case we start outputting all that follows.
But we cannot simply use the following two rules:
"*" "/" WS* "/" "*" { continue; }
"*" "/" WS* NL { continue; }
The main problem is that WS* can get bigger then our buffer, so we need a
new scanner.
. Meanwhile our scanner gets a bit more complex and we have to add two more
things. First the scanner code now uses a YYMARKER to store backtracking
information.
- backtracking information
. When the scanner has two rules that can have the same beginning but a
different ending then it needs to store the position that identifies the
common part. This is called backtracking. As mentioned above re2c expects
you to provide compiler define YYMARKER and a pointer variable.
. When shifting buffer contents as done in our fill function the marker needs
to be corrected, too.
*/
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
/*!max:re2c */
#define BSIZE 128
#if BSIZE < YYMAXFILL
# error BSIZE must be greater YYMAXFILL
#endif
#define YYCTYPE unsigned char
#define YYCURSOR s.cur
#define YYLIMIT s.lim
#define YYMARKER s.mrk
#define YYFILL(n) { if ((res = fill(&s, n)) >= 0) break; }
typedef struct Scanner
{
FILE *fp;
unsigned char *cur, *tok, *lim, *eof, *mrk;
unsigned char buffer[BSIZE];
} Scanner;
int fill(Scanner *s, int len)
{
if (!len)
{
s->cur = s->tok = s->lim = s->mrk = s->buffer;
s->eof = 0;
}
if (!s->eof)
{
int got, cnt = s->tok - s->buffer;
if (cnt > 0)
{
memcpy(s->buffer, s->tok, s->lim - s->tok);
s->tok -= cnt;
s->cur -= cnt;
s->lim -= cnt;
s->mrk -= cnt;
}
cnt = BSIZE - cnt;
if ((got = fread(s->lim, 1, cnt, s->fp)) != cnt)
{
s->eof = &s->lim[got];
}
s->lim += got;
}
else if (s->cur + len > s->eof)
{
return 0; /* not enough input data */
}
return -1;
}
void echo(Scanner *s)
{
fwrite(s->tok, 1, s->cur - s->tok, stdout);
}
int scan(FILE *fp)
{
int res = 0;
Scanner s;
if (!fp)
{
return 1; /* no file was opened */
}
s.fp = fp;
fill(&s, 0);
for(;;)
{
s.tok = s.cur;
/*!re2c
re2c:indent:top = 2;
NL = "\r"? "\n" ;
WS = [\r\n\t ] ;
ANY = [^] ;
"/" "/" { goto cppcomment; }
"/" "*" { goto comment; }
ANY { fputc(*s.tok, stdout); continue; }
*/
comment:
s.tok = s.cur;
/*!re2c
"*" "/" { goto commentws; }
ANY { goto comment; }
*/
commentws:
s.tok = s.cur;
/*!re2c
NL { echo(&s); continue; }
WS { goto commentws; }
ANY { echo(&s); continue; }
*/
cppcomment:
s.tok = s.cur;
/*!re2c
NL { echo(&s); continue; }
ANY { goto cppcomment; }
*/
}
if (fp != stdin)
{
fclose(fp); /* close only if not stdin */
}
return res; /* return result */
}
int main(int argc, char **argv)
{
if (argc > 1)
{
return scan(!strcmp(argv[1], "-") ? stdin : fopen(argv[1], "r"));
}
else
{
fprintf(stderr, "%s <expr>\n", argv[0]);
return 1;
}
}

View file

@ -0,0 +1,179 @@
/* re2c lesson 002_strip_comments, strip_003.b, (c) M. Boerger 2006 - 2007 */
/*!ignore:re2c
- more complexity
. Right now we strip out trailing white space and new lines after a comment
block. This can be a problem when the comment block was not preceeded by
a new line.
. The solution is to use trailing contexts.
- trailing contexts
. Re2c allows to check for a portion of input and only recognize it when it
is followed by another portion. This is called a trailing context.
. The trailing context is not part of the identified input. That means that
it follows exactly at the cursor. A consequence is that the scanner has
already read more input and on the next run you need to restore begining
of input, in our case s.tok, from the cursor, here s.cur, rather then
restoring to the beginning of the buffer. This way the scanner can reuse
the portion it has already read.
. The position of the trailing context is stored in YYCTXMARKER for which
a pointer variable needs to be provided.
. As with YYMARKER the corrsponding variable needs to be corrected if we
shift in some buffer.
. Still this is not all we need to solve the problem. What is left is that
the information whether we detected a trailing context was detected has to
be stored somewhere. This is done by the new variable nlcomment.
- formatting
. Until now we only used single line expression code and we always had the
opening { on the same line as the rule itself. If we have multiline rule
code and care for formatting we can no longer rely on re2c. Now we have
to indent the rule code ourself. Also we need to take care of the opening
{. If we keep it on the same line as the rule then re2c will indent it
correctly and the emitted #line informations will be correct. If we place
it on the next line then the #line directive will also point to that line
and not to the rule.
*/
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
/*!max:re2c */
#define BSIZE 128
#if BSIZE < YYMAXFILL
# error BSIZE must be greater YYMAXFILL
#endif
#define YYCTYPE unsigned char
#define YYCURSOR s.cur
#define YYLIMIT s.lim
#define YYMARKER s.mrk
#define YYCTXMARKER s.ctx
#define YYFILL(n) { if ((res = fill(&s, n)) >= 0) break; }
typedef struct Scanner
{
FILE *fp;
unsigned char *cur, *tok, *lim, *eof, *ctx, *mrk;
unsigned char buffer[BSIZE];
} Scanner;
int fill(Scanner *s, int len)
{
if (!len)
{
s->cur = s->tok = s->lim = s->mrk = s->buffer;
s->eof = 0;
}
if (!s->eof)
{
int got, cnt = s->tok - s->buffer;
if (cnt > 0)
{
memcpy(s->buffer, s->tok, s->lim - s->tok);
s->tok -= cnt;
s->cur -= cnt;
s->lim -= cnt;
s->mrk -= cnt;
s->ctx -= cnt;
}
cnt = BSIZE - cnt;
if ((got = fread(s->lim, 1, cnt, s->fp)) != cnt)
{
s->eof = &s->lim[got];
}
s->lim += got;
}
else if (s->cur + len > s->eof)
{
return 0; /* not enough input data */
}
return -1;
}
void echo(Scanner *s)
{
fwrite(s->tok, 1, s->cur - s->tok, stdout);
}
int scan(FILE *fp)
{
int res = 0;
int nlcomment = 0;
Scanner s;
if (!fp)
{
return 1; /* no file was opened */
}
s.fp = fp;
fill(&s, 0);
for(;;)
{
s.tok = s.cur;
/*!re2c
re2c:indent:top = 2;
NL = "\r"? "\n" ;
WS = [\r\n\t ] ;
ANY = [^] ;
"/" "/" { goto cppcomment; }
NL / "/""*" { echo(&s); nlcomment = 1; continue; }
"/" "*" { goto comment; }
ANY { fputc(*s.tok, stdout); continue; }
*/
comment:
s.tok = s.cur;
/*!re2c
"*" "/" { goto commentws; }
ANY { goto comment; }
*/
commentws:
s.tok = s.cur;
/*!re2c
NL? "/" "*" { goto comment; }
NL {
if (!nlcomment)
{
echo(&s);
}
nlcomment = 0;
continue;
}
WS { goto commentws; }
ANY { echo(&s); nlcomment = 0; continue; }
*/
cppcomment:
s.tok = s.cur;
/*!re2c
NL { echo(&s); continue; }
ANY { goto cppcomment; }
*/
}
if (fp != stdin)
{
fclose(fp); /* close only if not stdin */
}
return res; /* return result */
}
int main(int argc, char **argv)
{
if (argc > 1)
{
return scan(!strcmp(argv[1], "-") ? stdin : fopen(argv[1], "r"));
}
else
{
fprintf(stderr, "%s <expr>\n", argv[0]);
return 1;
}
}

View file

@ -1,26 +0,0 @@
#include <stdlib.h>
#include <stdio.h>
#include <sys/stat.h>
#include <sys/mman.h>
#include <string.h>
#ifndef MAP_NORESERVE
#define MAP_NORESERVE 0
#endif
volatile char ch;
main(){
struct stat statbuf;
uchar *buf;
fstat(0, &statbuf);
buf = mmap(NULL, statbuf.st_size, PROT_READ, MAP_SHARED|MAP_NORESERVE,
0, 0);
if(buf != (uchar*)(-1)){
uchar *cur, *lim = &buf[statbuf.st_size];
for(cur = buf; buf != lim; ++cur){
ch = *cur;
}
munmap(buf, statbuf.st_size);
}
}

View file

@ -1,267 +0,0 @@
#include <stdlib.h>
#include <stdio.h>
#include <sys/stat.h>
#include <sys/mman.h>
#include <string.h>
#define ADDEQ 257
#define ANDAND 258
#define ANDEQ 259
#define ARRAY 260
#define ASM 261
#define AUTO 262
#define BREAK 263
#define CASE 264
#define CHAR 265
#define CONST 266
#define CONTINUE 267
#define DECR 268
#define DEFAULT 269
#define DEREF 270
#define DIVEQ 271
#define DO 272
#define DOUBLE 273
#define ELLIPSIS 274
#define ELSE 275
#define ENUM 276
#define EQL 277
#define EXTERN 278
#define FCON 279
#define FLOAT 280
#define FOR 281
#define FUNCTION 282
#define GEQ 283
#define GOTO 284
#define ICON 285
#define ID 286
#define IF 287
#define INCR 288
#define INT 289
#define LEQ 290
#define LONG 291
#define LSHIFT 292
#define LSHIFTEQ 293
#define MODEQ 294
#define MULEQ 295
#define NEQ 296
#define OREQ 297
#define OROR 298
#define POINTER 299
#define REGISTER 300
#define RETURN 301
#define RSHIFT 302
#define RSHIFTEQ 303
#define SCON 304
#define SHORT 305
#define SIGNED 306
#define SIZEOF 307
#define STATIC 308
#define STRUCT 309
#define SUBEQ 310
#define SWITCH 311
#define TYPEDEF 312
#define UNION 313
#define UNSIGNED 314
#define VOID 315
#define VOLATILE 316
#define WHILE 317
#define XOREQ 318
#define EOI 319
typedef unsigned int unint;
typedef unsigned char uchar;
#define YYCTYPE uchar
#define YYCURSOR cursor
#define YYLIMIT s->lim
#define YYMARKER s->ptr
#define YYFILL(n) {cursor = fill(s, cursor);}
#define RET(i) {s->cur = cursor; return i;}
typedef struct Scanner {
uchar *tok, *ptr, *cur, *pos, *lim, *eof;
unint line;
} Scanner;
uchar *fill(Scanner *s, uchar *cursor){
if(!s->eof){
unint cnt = s->lim - s->tok;
uchar *buf = malloc((cnt + 1)*sizeof(uchar));
memcpy(buf, s->tok, cnt);
cursor = &buf[cursor - s->tok];
s->pos = &buf[s->pos - s->tok];
s->ptr = &buf[s->ptr - s->tok];
s->lim = &buf[cnt];
s->eof = s->lim; *(s->eof)++ = '\n';
s->tok = buf;
}
return cursor;
}
int scan(Scanner *s){
uchar *cursor = s->cur;
std:
s->tok = cursor;
/*!re2c
any = [\000-\377];
O = [0-7];
D = [0-9];
L = [a-zA-Z_];
H = [a-fA-F0-9];
E = [Ee] [+-]? D+;
FS = [fFlL];
IS = [uUlL]*;
ESC = [\\] ([abfnrtv?'"\\] | "x" H+ | O+);
*/
/*!re2c
"/*" { goto comment; }
"auto" { RET(AUTO); }
"break" { RET(BREAK); }
"case" { RET(CASE); }
"char" { RET(CHAR); }
"const" { RET(CONST); }
"continue" { RET(CONTINUE); }
"default" { RET(DEFAULT); }
"do" { RET(DO); }
"double" { RET(DOUBLE); }
"else" { RET(ELSE); }
"enum" { RET(ENUM); }
"extern" { RET(EXTERN); }
"float" { RET(FLOAT); }
"for" { RET(FOR); }
"goto" { RET(GOTO); }
"if" { RET(IF); }
"int" { RET(INT); }
"long" { RET(LONG); }
"register" { RET(REGISTER); }
"return" { RET(RETURN); }
"short" { RET(SHORT); }
"signed" { RET(SIGNED); }
"sizeof" { RET(SIZEOF); }
"static" { RET(STATIC); }
"struct" { RET(STRUCT); }
"switch" { RET(SWITCH); }
"typedef" { RET(TYPEDEF); }
"union" { RET(UNION); }
"unsigned" { RET(UNSIGNED); }
"void" { RET(VOID); }
"volatile" { RET(VOLATILE); }
"while" { RET(WHILE); }
L (L|D)* { RET(ID); }
("0" [xX] H+ IS?) | ("0" D+ IS?) | (D+ IS?) |
(['] (ESC|any\[\n\\'])* ['])
{ RET(ICON); }
(D+ E FS?) | (D* "." D+ E? FS?) | (D+ "." D* E? FS?)
{ RET(FCON); }
(["] (ESC|any\[\n\\"])* ["])
{ RET(SCON); }
"..." { RET(ELLIPSIS); }
">>=" { RET(RSHIFTEQ); }
"<<=" { RET(LSHIFTEQ); }
"+=" { RET(ADDEQ); }
"-=" { RET(SUBEQ); }
"*=" { RET(MULEQ); }
"/=" { RET(DIVEQ); }
"%=" { RET(MODEQ); }
"&=" { RET(ANDEQ); }
"^=" { RET(XOREQ); }
"|=" { RET(OREQ); }
">>" { RET(RSHIFT); }
"<<" { RET(LSHIFT); }
"++" { RET(INCR); }
"--" { RET(DECR); }
"->" { RET(DEREF); }
"&&" { RET(ANDAND); }
"||" { RET(OROR); }
"<=" { RET(LEQ); }
">=" { RET(GEQ); }
"==" { RET(EQL); }
"!=" { RET(NEQ); }
";" { RET(';'); }
"{" { RET('{'); }
"}" { RET('}'); }
"," { RET(','); }
":" { RET(':'); }
"=" { RET('='); }
"(" { RET('('); }
")" { RET(')'); }
"[" { RET('['); }
"]" { RET(']'); }
"." { RET('.'); }
"&" { RET('&'); }
"!" { RET('!'); }
"~" { RET('~'); }
"-" { RET('-'); }
"+" { RET('+'); }
"*" { RET('*'); }
"/" { RET('/'); }
"%" { RET('%'); }
"<" { RET('<'); }
">" { RET('>'); }
"^" { RET('^'); }
"|" { RET('|'); }
"?" { RET('?'); }
[ \t\v\f]+ { goto std; }
"\n"
{
if(cursor == s->eof) RET(EOI);
s->pos = cursor; s->line++;
goto std;
}
any
{
printf("unexpected character: %c\n", *s->tok);
goto std;
}
*/
comment:
/*!re2c
"*/" { goto std; }
"\n"
{
if(cursor == s->eof) RET(EOI);
s->tok = s->pos = cursor; s->line++;
goto comment;
}
any { goto comment; }
*/
}
#ifndef MAP_NORESERVE
#define MAP_NORESERVE 0
#endif
main(){
Scanner in;
struct stat statbuf;
uchar *buf;
fstat(0, &statbuf);
buf = mmap(NULL, statbuf.st_size, PROT_READ, MAP_SHARED|MAP_NORESERVE,
0, 0);
if(buf != (uchar*)(-1)){
int t;
in.lim = &(in.cur = buf)[statbuf.st_size];
in.pos = NULL;
in.eof = NULL;
while((t = scan(&in)) != EOI){
/*
printf("%d\t%.*s\n", t, in.cur - in.tok, in.tok);
printf("%d\n", t);
*/
}
munmap(buf, statbuf.st_size);
}
}

View file

@ -1,239 +0,0 @@
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#define ADDEQ 257
#define ANDAND 258
#define ANDEQ 259
#define ARRAY 260
#define ASM 261
#define AUTO 262
#define BREAK 263
#define CASE 264
#define CHAR 265
#define CONST 266
#define CONTINUE 267
#define DECR 268
#define DEFAULT 269
#define DEREF 270
#define DIVEQ 271
#define DO 272
#define DOUBLE 273
#define ELLIPSIS 274
#define ELSE 275
#define ENUM 276
#define EQL 277
#define EXTERN 278
#define FCON 279
#define FLOAT 280
#define FOR 281
#define FUNCTION 282
#define GEQ 283
#define GOTO 284
#define ICON 285
#define ID 286
#define IF 287
#define INCR 288
#define INT 289
#define LEQ 290
#define LONG 291
#define LSHIFT 292
#define LSHIFTEQ 293
#define MODEQ 294
#define MULEQ 295
#define NEQ 296
#define OREQ 297
#define OROR 298
#define POINTER 299
#define REGISTER 300
#define RETURN 301
#define RSHIFT 302
#define RSHIFTEQ 303
#define SCON 304
#define SHORT 305
#define SIGNED 306
#define SIZEOF 307
#define STATIC 308
#define STRUCT 309
#define SUBEQ 310
#define SWITCH 311
#define TYPEDEF 312
#define UNION 313
#define UNSIGNED 314
#define VOID 315
#define VOLATILE 316
#define WHILE 317
#define XOREQ 318
#define EOI 319
typedef unsigned int uint;
typedef unsigned char uchar;
#define BSIZE 8192
#define YYCTYPE uchar
#define YYCURSOR cursor
#define YYLIMIT s->lim
#define YYMARKER s->ptr
#define YYFILL(n) {cursor = fill(s, cursor);}
#define RET(i) {s->cur = cursor; return i;}
typedef struct Scanner {
int fd;
uchar *bot, *tok, *ptr, *cur, *pos, *lim, *top, *eof;
uint line;
} Scanner;
uchar *fill(Scanner *s, uchar *cursor){
if(!s->eof){
uint cnt = s->tok - s->bot;
if(cnt){
memcpy(s->bot, s->tok, s->lim - s->tok);
s->tok = s->bot;
s->ptr -= cnt;
cursor -= cnt;
s->pos -= cnt;
s->lim -= cnt;
}
if((s->top - s->lim) < BSIZE){
uchar *buf = (uchar*) malloc(((s->lim - s->bot) + BSIZE)*sizeof(uchar));
memcpy(buf, s->tok, s->lim - s->tok);
s->tok = buf;
s->ptr = &buf[s->ptr - s->bot];
cursor = &buf[cursor - s->bot];
s->pos = &buf[s->pos - s->bot];
s->lim = &buf[s->lim - s->bot];
s->top = &s->lim[BSIZE];
free(s->bot);
s->bot = buf;
}
if((cnt = read(s->fd, (char*) s->lim, BSIZE)) != BSIZE){
s->eof = &s->lim[cnt]; *(s->eof)++ = '\n';
}
s->lim += cnt;
}
return cursor;
}
int scan(Scanner *s){
uchar *cursor = s->cur;
std:
s->tok = cursor;
/*!re2c
any = [\000-\377];
O = [0-7];
D = [0-9];
L = [a-zA-Z_];
H = [a-fA-F0-9];
E = [Ee] [+-]? D+;
FS = [fFlL];
IS = [uUlL]*;
ESC = [\\] ([abfnrtv?'"\\] | "x" H+ | O+);
*/
/*!re2c
"/*" { goto comment; }
L (L|D)* { RET(ID); }
("0" [xX] H+ IS?) | ("0" D+ IS?) | (D+ IS?) |
(['] (ESC|any\[\n\\'])* ['])
{ RET(ICON); }
(D+ E FS?) | (D* "." D+ E? FS?) | (D+ "." D* E? FS?)
{ RET(FCON); }
(["] (ESC|any\[\n\\"])* ["])
{ RET(SCON); }
"..." { RET(ELLIPSIS); }
">>=" { RET(RSHIFTEQ); }
"<<=" { RET(LSHIFTEQ); }
"+=" { RET(ADDEQ); }
"-=" { RET(SUBEQ); }
"*=" { RET(MULEQ); }
"/=" { RET(DIVEQ); }
"%=" { RET(MODEQ); }
"&=" { RET(ANDEQ); }
"^=" { RET(XOREQ); }
"|=" { RET(OREQ); }
">>" { RET(RSHIFT); }
"<<" { RET(LSHIFT); }
"++" { RET(INCR); }
"--" { RET(DECR); }
"->" { RET(DEREF); }
"&&" { RET(ANDAND); }
"||" { RET(OROR); }
"<=" { RET(LEQ); }
">=" { RET(GEQ); }
"==" { RET(EQL); }
"!=" { RET(NEQ); }
";" { RET(';'); }
"{" { RET('{'); }
"}" { RET('}'); }
"," { RET(','); }
":" { RET(':'); }
"=" { RET('='); }
"(" { RET('('); }
")" { RET(')'); }
"[" { RET('['); }
"]" { RET(']'); }
"." { RET('.'); }
"&" { RET('&'); }
"!" { RET('!'); }
"~" { RET('~'); }
"-" { RET('-'); }
"+" { RET('+'); }
"*" { RET('*'); }
"/" { RET('/'); }
"%" { RET('%'); }
"<" { RET('<'); }
">" { RET('>'); }
"^" { RET('^'); }
"|" { RET('|'); }
"?" { RET('?'); }
[ \t\v\f]+ { goto std; }
"\n"
{
if(cursor == s->eof) RET(EOI);
s->pos = cursor; s->line++;
goto std;
}
any
{
printf("unexpected character: %c\n", *s->tok);
goto std;
}
*/
comment:
/*!re2c
"*/" { goto std; }
"\n"
{
if(cursor == s->eof) RET(EOI);
s->tok = s->pos = cursor; s->line++;
goto comment;
}
any { goto comment; }
*/
}
main(){
Scanner in;
int t;
memset((char*) &in, 0, sizeof(in));
in.fd = 0;
while((t = scan(&in)) != EOI){
/*
printf("%d\t%.*s\n", t, in.cur - in.tok, in.tok);
printf("%d\n", t);
*/
}
close(in.fd);
}

View file

@ -1,258 +0,0 @@
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#define ADDEQ 257
#define ANDAND 258
#define ANDEQ 259
#define ARRAY 260
#define ASM 261
#define AUTO 262
#define BREAK 263
#define CASE 264
#define CHAR 265
#define CONST 266
#define CONTINUE 267
#define DECR 268
#define DEFAULT 269
#define DEREF 270
#define DIVEQ 271
#define DO 272
#define DOUBLE 273
#define ELLIPSIS 274
#define ELSE 275
#define ENUM 276
#define EQL 277
#define EXTERN 278
#define FCON 279
#define FLOAT 280
#define FOR 281
#define FUNCTION 282
#define GEQ 283
#define GOTO 284
#define ICON 285
#define ID 286
#define IF 287
#define INCR 288
#define INT 289
#define LEQ 290
#define LONG 291
#define LSHIFT 292
#define LSHIFTEQ 293
#define MODEQ 294
#define MULEQ 295
#define NEQ 296
#define OREQ 297
#define OROR 298
#define POINTER 299
#define REGISTER 300
#define RETURN 301
#define RSHIFT 302
#define RSHIFTEQ 303
#define SCON 304
#define SHORT 305
#define SIGNED 306
#define SIZEOF 307
#define STATIC 308
#define STRUCT 309
#define SUBEQ 310
#define SWITCH 311
#define TYPEDEF 312
#define UNION 313
#define UNSIGNED 314
#define VOID 315
#define VOLATILE 316
#define WHILE 317
#define XOREQ 318
#define EOI 319
typedef unsigned int uint;
typedef unsigned char uchar;
#define BSIZE 8192
#define YYCTYPE uchar
#define YYCURSOR cursor
#define YYLIMIT s->lim
#define YYMARKER s->ptr
#define YYFILL(n) {cursor = fill(s, cursor);}
#define RET(i) {s->cur = cursor; return i;}
typedef struct Scanner {
int fd;
uchar *bot, *tok, *ptr, *cur, *pos, *lim, *top, *eof;
uint line;
} Scanner;
uchar *fill(Scanner *s, uchar *cursor){
if(!s->eof){
uint cnt = s->tok - s->bot;
if(cnt){
memcpy(s->bot, s->tok, s->lim - s->tok);
s->tok = s->bot;
s->ptr -= cnt;
cursor -= cnt;
s->pos -= cnt;
s->lim -= cnt;
}
if((s->top - s->lim) < BSIZE){
uchar *buf = (uchar*) malloc(((s->lim - s->bot) + BSIZE)*sizeof(uchar));
memcpy(buf, s->tok, s->lim - s->tok);
s->tok = buf;
s->ptr = &buf[s->ptr - s->bot];
cursor = &buf[cursor - s->bot];
s->pos = &buf[s->pos - s->bot];
s->lim = &buf[s->lim - s->bot];
s->top = &s->lim[BSIZE];
free(s->bot);
s->bot = buf;
}
if((cnt = read(s->fd, (char*) s->lim, BSIZE)) != BSIZE){
s->eof = &s->lim[cnt]; *(s->eof)++ = '\n';
}
s->lim += cnt;
}
return cursor;
}
int scan(Scanner *s){
uchar *cursor = s->cur;
std:
s->tok = cursor;
/*!re2c
any = [\000-\377];
O = [0-7];
D = [0-9];
L = [a-zA-Z_];
I = L|D;
H = [a-fA-F0-9];
E = [Ee] [+-]? D+;
FS = [fFlL];
IS = [uUlL]*;
ESC = [\\] ([abfnrtv?'"\\] | "x" H+ | O+);
X = any\[*/];
*/
/*!re2c
"/*" { goto comment; }
L { RET(ID); }
L I { RET(ID); }
L I I { RET(ID); }
L I I I { RET(ID); }
L I I I I { RET(ID); }
L I I I I I { RET(ID); }
L I I I I I I { RET(ID); }
L I I I I I I I { RET(ID); }
L I* { RET(ID); }
("0" [xX] H+ IS?) | ("0" D+ IS?) | (D+ IS?) |
(['] (ESC|any\[\n\\'])* ['])
{ RET(ICON); }
(D+ E FS?) | (D* "." D+ E? FS?) | (D+ "." D* E? FS?)
{ RET(FCON); }
(["] (ESC|any\[\n\\"])* ["])
{ RET(SCON); }
"..." { RET(ELLIPSIS); }
">>=" { RET(RSHIFTEQ); }
"<<=" { RET(LSHIFTEQ); }
"+=" { RET(ADDEQ); }
"-=" { RET(SUBEQ); }
"*=" { RET(MULEQ); }
"/=" { RET(DIVEQ); }
"%=" { RET(MODEQ); }
"&=" { RET(ANDEQ); }
"^=" { RET(XOREQ); }
"|=" { RET(OREQ); }
">>" { RET(RSHIFT); }
"<<" { RET(LSHIFT); }
"++" { RET(INCR); }
"--" { RET(DECR); }
"->" { RET(DEREF); }
"&&" { RET(ANDAND); }
"||" { RET(OROR); }
"<=" { RET(LEQ); }
">=" { RET(GEQ); }
"==" { RET(EQL); }
"!=" { RET(NEQ); }
";" { RET(';'); }
"{" { RET('{'); }
"}" { RET('}'); }
"," { RET(','); }
":" { RET(':'); }
"=" { RET('='); }
"(" { RET('('); }
")" { RET(')'); }
"[" { RET('['); }
"]" { RET(']'); }
"." { RET('.'); }
"&" { RET('&'); }
"!" { RET('!'); }
"~" { RET('~'); }
"-" { RET('-'); }
"+" { RET('+'); }
"*" { RET('*'); }
"/" { RET('/'); }
"%" { RET('%'); }
"<" { RET('<'); }
">" { RET('>'); }
"^" { RET('^'); }
"|" { RET('|'); }
"?" { RET('?'); }
[ \t\v\f]+ { goto std; }
"\n"
{
if(cursor == s->eof) RET(EOI);
s->pos = cursor; s->line++;
goto std;
}
any
{
printf("unexpected character: %c\n", *s->tok);
goto std;
}
*/
comment:
/*!re2c
"*/" { goto std; }
"\n"
{
if(cursor == s->eof) RET(EOI);
s->tok = s->pos = cursor; s->line++;
goto comment;
}
X { goto comment; }
X X { goto comment; }
X X X { goto comment; }
X X X X { goto comment; }
X X X X X { goto comment; }
X X X X X X { goto comment; }
X X X X X X X { goto comment; }
X X X X X X X X { goto comment; }
any { goto comment; }
*/
}
main(){
Scanner in;
int t;
memset((char*) &in, 0, sizeof(in));
in.fd = 0;
while((t = scan(&in)) != EOI){
/*
printf("%d\t%.*s\n", t, in.cur - in.tok, in.tok);
printf("%d\n", t);
*/
}
close(in.fd);
}

View file

@ -0,0 +1,35 @@
// Build with "--input custom" re2c switch.
//
// This is an example of handling fixed-length buffer with "--input custom":
// on each YYPEEK we check for the end of input, thus YYFILL generation
// can be safely suppressed.
//
// Note that YYLIMIT points not to terminating NULL, but to the previous
// character: we emulate the case when input has no terminating NULL.
//
// For a real-life example see https://github.com/sopyer/mjson
// or mjson.re from re2c test collection.
bool lex (const char * cursor, const char * const limit)
{
const char * marker;
const char * ctxmarker;
# define YYCTYPE char
# define YYPEEK() (cursor >= limit ? 0 : *cursor)
# define YYSKIP() ++cursor
# define YYBACKUP() marker = cursor
# define YYBACKUPCTX() ctxmarker = cursor
# define YYRESTORE() cursor = marker
# define YYRESTORECTX() cursor = ctxmarker
/*!re2c
re2c:yyfill:enable = 0;
"int buffer " / "[" [0-9]+ "]" { return true; }
* { return false; }
*/
}
int main ()
{
char buffer [] = "int buffer [1024]";
return !lex (buffer, buffer + sizeof (buffer) - 1);
}

View file

@ -0,0 +1,20 @@
Build with "--input custom" re2c switch.
These are three examples of "--input custom" usage:
- input_custom_default.re:
implements default re2c input model (pointers to plain buffer)
- input_custom_fgetc:
implements C-style file input (using <stdio.h>)
- input_custom_fgetc:
implements std::istringstream input
Note that these examples are very simple and don't need
to implement YYFILL; the only reason they don't use
"re2c:yyfill:enable = 0;" is to keep YYLESSTHAN and YYLIMIT
(for the sake of example).
In real-life programs one will need to care for correct
end-of-input handling.

View file

@ -0,0 +1,24 @@
bool lex (const char * cursor, const char * const limit)
{
const char * marker;
const char * ctxmarker;
# define YYCTYPE char
# define YYPEEK() *cursor
# define YYSKIP() ++cursor
# define YYBACKUP() marker = cursor
# define YYBACKUPCTX() ctxmarker = cursor
# define YYRESTORE() cursor = marker
# define YYRESTORECTX() cursor = ctxmarker
# define YYLESSTHAN(n) limit - cursor < n
# define YYFILL(n) {}
/*!re2c
"int buffer " / "[" [0-9]+ "]" { return true; }
* { return false; }
*/
}
int main ()
{
char buffer [] = "int buffer [1024]";
return !lex (buffer, buffer + sizeof (buffer));
}

View file

@ -0,0 +1,43 @@
#include <stdio.h>
char peek (FILE * f)
{
char c = fgetc (f);
ungetc (c, f);
return c;
}
bool lex (FILE * f, const long limit)
{
long marker;
long ctxmarker;
# define YYCTYPE char
# define YYPEEK() peek (f)
# define YYSKIP() fgetc (f)
# define YYBACKUP() marker = ftell (f)
# define YYBACKUPCTX() ctxmarker = ftell (f)
# define YYRESTORE() fseek (f, marker, SEEK_SET)
# define YYRESTORECTX() fseek (f, ctxmarker, SEEK_SET)
# define YYLESSTHAN(n) limit - ftell (f) < n
# define YYFILL(n) {}
/*!re2c
"int buffer " / "[" [0-9]+ "]" { return true; }
* { return false; }
*/
}
int main ()
{
const char buffer [] = "int buffer [1024]";
const char fn [] = "input.txt";
FILE * f = fopen (fn, "w");
fwrite (buffer, 1, sizeof (buffer), f);
fclose (f);
f = fopen (fn, "rb");
int result = !lex (f, sizeof (buffer));
fclose (f);
return result;
}

View file

@ -0,0 +1,27 @@
#include <sstream>
bool lex (std::istringstream & is, const std::streampos limit)
{
std::streampos marker;
std::streampos ctxmarker;
# define YYCTYPE char
# define YYPEEK() is.peek ()
# define YYSKIP() is.ignore ()
# define YYBACKUP() marker = is.tellg ()
# define YYBACKUPCTX() ctxmarker = is.tellg ()
# define YYRESTORE() is.seekg (marker)
# define YYRESTORECTX() is.seekg (ctxmarker)
# define YYLESSTHAN(n) limit - is.tellg () < n
# define YYFILL(n) {}
/*!re2c
"int buffer " / "[" [0-9]+ "]" { return true; }
* { return false; }
*/
}
int main ()
{
const char buffer [] = "int buffer [1024]";
std::istringstream is (buffer);
return !lex (is, sizeof (buffer));
}

View file

@ -11,13 +11,14 @@ typedef unsigned char uchar;
#define YYCURSOR cursor
#define YYLIMIT s->lim
#define YYMARKER s->ptr
#define YYCTXMARKER s->ctx
#define YYFILL {cursor = fill(s, cursor);}
#define RETURN(i) {s->cur = cursor; return i;}
typedef struct Scanner {
int fd;
uchar *bot, *tok, *ptr, *cur, *pos, *lim, *top, *eof;
uchar *bot, *tok, *ptr, *ctx, *cur, *pos, *lim, *top, *eof;
uint line;
} Scanner;

View file

@ -226,14 +226,14 @@ public:
start:
/*!re2c
/*!re2c
re2c:startlabel = 1;
eol = "\n";
eof = "\000";
digit = [0-9];
integer = digit+;
alpha = [A-Za-z_];
any = [\000-\0377];
any = [\000-\377];
space = [ \h\t\v\f\r];
"if" { SEND(kIf); }

View file

@ -1,44 +0,0 @@
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#define RET(n) printf("%d\n", n); return n
int scan(char *s, int l){
char *p = s;
char *q;
#define YYCTYPE char
#define YYCURSOR p
#define YYLIMIT (s+l)
#define YYMARKER q
#define YYFILL(n)
/*!re2c
'a'{1}"\n" {RET(1);}
'a'{2,3}"\n" {RET(2);}
'a'{6}"\n" {RET(4);}
'a'{4,}"\n" {RET(3);}
[^aq]|"\n" {RET(0);}
*/
}
#define do_scan(str) scan(str, strlen(str))
main()
{
do_scan("a\n");
do_scan("aa\n");
do_scan("aaa\n");
do_scan("aaaa\n");
do_scan("q");
do_scan("a");
do_scan("A\n");
do_scan("AA\n");
do_scan("aAa\n");
do_scan("AaaA\n");
do_scan("Q");
do_scan("AaaAa\n");
do_scan("AaaAaA\n");
do_scan("A");
do_scan("\n");
do_scan("0");
}

View file

@ -1 +0,0 @@
Replacement modules for an existing REXX interpreter. Not standalone.

View file

@ -1,41 +0,0 @@
uchar *ScanFill(uchar *cursor){
unsigned cnt = s->tok - s->bot;
s->pos += cursor - s->mrk;
if(cnt){
if(s->eot){
unsigned len = s->eot - s->tok;
memcpy(s->bot, s->tok, len);
s->eot = &s->bot[len];
if((len = s->lim - cursor) != 0)
memcpy(s->eot, cursor, len);
cursor = s->eot;
s->lim = &cursor[len];
} else {
memcpy(s->bot, s->tok, s->lim - s->tok);
cursor -= cnt;
s->lim -= cnt;
}
s->tok = s->bot;
s->ptr -= cnt;
}
if((s->top - s->lim) < 512){
uchar *buf = (uchar*) malloc(((s->lim - s->bot) + 512)*sizeof(uchar));
memcpy(buf, s->bot, s->lim - s->bot);
s->tok = buf;
s->ptr = &buf[s->ptr - s->bot];
if(s->eot)
s->eot = &buf[s->eot - s->bot];
cursor = &buf[cursor - s->bot];
s->lim = &buf[s->lim - s->bot];
s->top = &s->lim[512];
free(s->bot);
s->bot = buf;
}
s->mrk = cursor;
if(ScanCBIO.file){
if((cnt = read(ScanCBIO.u.f.fd, (char*) s->lim, 512)) != 512)
memset(&s->lim[cnt], 0, 512 - cnt);
s->lim += 512;
}
return cursor;
}

View file

@ -1,7 +0,0 @@
/*!re2c
"print" {return PRINT;}
[a-z]+ {return ID;}
[0-9]+ {return DEC;}
"0x" [0-9a-f]+ {return HEX;}
[\000-\377] {return ERR;}
*/

View file

@ -1,13 +0,0 @@
#define NULL ((char*) 0)
char *scan(char *p){
char *q;
#define YYCTYPE char
#define YYCURSOR p
#define YYLIMIT p
#define YYMARKER q
#define YYFILL(n)
/*!re2c
[0-9]+ {return YYCURSOR;}
[\000-\377] {return NULL;}
*/
}

View file

@ -1,73 +0,0 @@
/* $Id: globals.h 713 2007-04-29 15:33:47Z helly $ */
#ifndef _globals_h
#define _globals_h
#include "basics.h"
#include <set>
#include <algorithm>
#include <string>
#include "stream_lc.h"
#include "code_names.h"
namespace re2c
{
extern file_info sourceFileInfo;
extern file_info outputFileInfo;
extern bool bFlag;
extern bool dFlag;
extern bool eFlag;
extern bool fFlag;
extern bool gFlag;
extern bool iFlag;
extern bool sFlag;
extern bool uFlag;
extern bool wFlag;
extern bool bNoGenerationDate;
extern bool bSinglePass;
extern bool bFirstPass;
extern bool bLastPass;
extern bool bUsedYYAccept;
extern bool bUsedYYMaxFill;
extern bool bUsedYYMarker;
extern bool bUseStartLabel;
extern std::string startLabelName;
extern std::string labelPrefix;
extern std::string yychConversion;
extern uint maxFill;
extern uint next_label;
extern uint cGotoThreshold;
/* configurations */
extern uint topIndent;
extern std::string indString;
extern bool yybmHexTable;
extern bool bUseStateAbort;
extern bool bUseStateNext;
extern bool bWroteGetState;
extern bool bUseYYFill;
extern bool bUseYYFillParam;
extern uint asc2ebc[256];
extern uint ebc2asc[256];
extern uint *xlat, *talx;
extern uint next_fill_index;
extern uint last_fill_index;
extern std::set<uint> vUsedLabels;
extern re2c::CodeNames mapCodeName;
extern uint nRealChars;
extern char octCh(uint c);
extern char hexCh(uint c);
} // end namespace re2c
#endif

View file

@ -1,56 +0,0 @@
/* $Id: ins.h 535 2006-05-25 13:36:14Z helly $ */
#ifndef _ins_h
#define _ins_h
#include "basics.h"
namespace re2c
{
typedef unsigned short Char;
const uint CHAR = 0;
const uint GOTO = 1;
const uint FORK = 2;
const uint TERM = 3;
const uint CTXT = 4;
union Ins {
struct
{
byte tag;
byte marked;
void *link;
}
i;
struct
{
ushort value;
ushort bump;
void *link;
}
c;
};
inline bool isMarked(Ins *i)
{
return i->i.marked != 0;
}
inline void mark(Ins *i)
{
i->i.marked = true;
}
inline void unmark(Ins *i)
{
i->i.marked = false;
}
} // end namespace re2c
#endif

View file

@ -1,351 +0,0 @@
/* $Id: main.cc 691 2007-04-22 15:07:39Z helly $ */
#ifdef HAVE_CONFIG_H
#include "config.h"
#elif defined(_WIN32)
#include "config_w32.h"
#endif
#include <fstream>
#include <iostream>
#include <stdlib.h>
#include <string.h>
#include "globals.h"
#include "parser.h"
#include "dfa.h"
#include "mbo_getopt.h"
namespace re2c
{
file_info sourceFileInfo;
file_info outputFileInfo;
bool bFlag = false;
bool dFlag = false;
bool eFlag = false;
bool fFlag = false;
bool gFlag = false;
bool iFlag = false;
bool sFlag = false;
bool uFlag = false;
bool wFlag = false;
bool bNoGenerationDate = false;
bool bSinglePass = false;
bool bFirstPass = true;
bool bLastPass = false;
bool bUsedYYAccept = false;
bool bUsedYYMaxFill = false;
bool bUsedYYMarker = true;
bool bUseStartLabel = false;
bool bUseStateNext = false;
bool bUseYYFill = true;
bool bUseYYFillParam = true;
std::string startLabelName;
std::string labelPrefix("yy");
std::string yychConversion("");
uint maxFill = 1;
uint next_label = 0;
uint cGotoThreshold = 9;
uint topIndent = 0;
std::string indString("\t");
bool yybmHexTable = false;
bool bUseStateAbort = false;
bool bWroteGetState = false;
uint nRealChars = 256;
uint next_fill_index = 0;
uint last_fill_index = 0;
std::set<uint> vUsedLabels;
re2c::CodeNames mapCodeName;
free_list<RegExp*> RegExp::vFreeList;
free_list<Range*> Range::vFreeList;
using namespace std;
static char *opt_arg = NULL;
static int opt_ind = 1;
static const mbo_opt_struct OPTIONS[] =
{
mbo_opt_struct('?', 0, "help"),
mbo_opt_struct('b', 0, "bit-vectors"),
mbo_opt_struct('d', 0, "debug-output"),
mbo_opt_struct('e', 0, "ecb"),
mbo_opt_struct('f', 0, "storable-state"),
mbo_opt_struct('g', 0, "computed-gotos"),
mbo_opt_struct('h', 0, "help"),
mbo_opt_struct('i', 0, "no-debug-info"),
mbo_opt_struct('o', 1, "output"),
mbo_opt_struct('s', 0, "nested-ifs"),
mbo_opt_struct('u', 0, "unicode"),
mbo_opt_struct('v', 0, "version"),
mbo_opt_struct('V', 0, "vernum"),
mbo_opt_struct('w', 0, "wide-chars"),
mbo_opt_struct('1', 0, "single-pass"),
mbo_opt_struct(10, 0, "no-generation-date"),
mbo_opt_struct('-', 0, NULL) /* end of args */
};
static void usage()
{
cerr << "usage: re2c [-bdefghisvVw1] [-o file] file\n"
"\n"
"-? -h --help Display this info.\n"
"\n"
"-b --bit-vectors Implies -s. Use bit vectors as well in the attempt to\n"
" coax better code out of the compiler. Most useful for\n"
" specifications with more than a few keywords (e.g. for\n"
" most programming languages).\n"
"\n"
"-d --debug-output Creates a parser that dumps information during\n"
" about the current position and in which state the\n"
" parser is.\n"
"\n"
"-e --ecb Cross-compile from an ASCII platform to\n"
" an EBCDIC one.\n"
"\n"
"-f --storable-state Generate a scanner that supports storable states.\n"
"\n"
"-g --computed-gotos Implies -b. Generate computed goto code (only useable\n"
" with gcc).\n"
"\n"
"-i --no-debug-info Do not generate '#line' info (usefull for versioning).\n"
"\n"
"-o --output=output Specify the output file instead of stdout\n"
" This cannot be used together with -e switch.\n"
"\n"
"-s --nested-ifs Generate nested ifs for some switches. Many compilers\n"
" need this assist to generate better code.\n"
"\n"
"-u --unicode Implies -w but supports the full Unicode character set.\n"
"\n"
"-v --version Show version information.\n"
"\n"
"-V --vernum Show version as one number.\n"
"\n"
"-w --wide-chars Create a parser that supports wide chars (UCS-2). This\n"
" implies -s and cannot be used together with -e switch.\n"
"\n"
"-1 --single-pass Force single pass generation, this cannot be combined\n"
" with -f and disables YYMAXFILL generation prior to last\n"
" re2c block.\n"
"\n"
"--no-generation-date Suppress date output in the generated output so that it\n"
" only shows the re2c version.\n"
;
}
} // end namespace re2c
using namespace re2c;
int main(int argc, char *argv[])
{
int c;
const char *sourceFileName = 0;
const char *outputFileName = 0;
if (argc == 1)
{
usage();
return 2;
}
while ((c = mbo_getopt(argc, argv, OPTIONS, &opt_arg, &opt_ind, 0)) != -1)
{
switch (c)
{
case 'b':
bFlag = true;
sFlag = true;
break;
case 'e':
xlat = asc2ebc;
talx = ebc2asc;
eFlag = true;
break;
case 'd':
dFlag = true;
break;
case 'f':
fFlag = true;
break;
case 'g':
gFlag = true;
bFlag = true;
sFlag = true;
break;
case 'i':
iFlag = true;
break;
case 'o':
outputFileName = opt_arg;
break;
case 's':
sFlag = true;
break;
case '1':
bSinglePass = true;
break;
case 'v':
cout << "re2c " << PACKAGE_VERSION << "\n";
return 2;
case 'V': {
string vernum(PACKAGE_VERSION);
if (vernum[1] == '.')
{
vernum.insert(0, "0");
}
vernum.erase(2, 1);
if (vernum[3] == '.')
{
vernum.insert(2, "0");
}
vernum.erase(4, 1);
if (vernum.length() < 6 || vernum[5] < '0' || vernum[5] > '9')
{
vernum.insert(4, "0");
}
vernum.resize(6);
cout << vernum << endl;
return 2;
}
case 'w':
nRealChars = (1<<16); /* 0x10000 */
sFlag = true;
wFlag = true;
break;
case 'u':
nRealChars = 0x110000; /* 17 times w-Flag */
sFlag = true;
uFlag = true;
break;
case 'h':
case '?':
default:
usage();
return 2;
case 10:
bNoGenerationDate = true;
break;
}
}
if ((bFlag || fFlag) && bSinglePass) {
std::cerr << "re2c: error: Cannot combine -1 and -b or -f switch\n";
return 1;
}
if (wFlag && eFlag)
{
std::cerr << "re2c: error: Cannot combine -e with -w or -u switch\n";
return 2;
}
if (wFlag && uFlag)
{
std::cerr << "re2c: error: Cannot combine -u with -w switch\n";
return 2;
}
if (uFlag)
{
wFlag = true;
}
if (argc == opt_ind + 1)
{
sourceFileName = argv[opt_ind];
}
else
{
usage();
return 2;
}
// set up the source stream
re2c::ifstream_lc source;
if (sourceFileName[0] == '-' && sourceFileName[1] == '\0')
{
if (fFlag)
{
std::cerr << "re2c: error: multiple /*!re2c stdin is not acceptable when -f is specified\n";
return 1;
}
sourceFileName = "<stdin>";
source.open(stdin);
}
else if (!source.open(sourceFileName).is_open())
{
cerr << "re2c: error: cannot open " << sourceFileName << "\n";
return 1;
}
// set up the output stream
re2c::ofstream_lc output;
if (outputFileName == 0 || (sourceFileName[0] == '-' && sourceFileName[1] == '\0'))
{
outputFileName = "<stdout>";
output.open(stdout);
}
else if (!output.open(outputFileName).is_open())
{
cerr << "re2c: error: cannot open " << outputFileName << "\n";
return 1;
}
Scanner scanner(sourceFileName, source, output);
sourceFileInfo = file_info(sourceFileName, &scanner);
outputFileInfo = file_info(outputFileName, &output);
if (!bSinglePass)
{
bUsedYYMarker = false;
re2c::ifstream_lc null_source;
if (!null_source.open(sourceFileName).is_open())
{
cerr << "re2c: error: cannot re-open " << sourceFileName << "\n";
return 1;
}
null_stream null_dev;
Scanner null_scanner(sourceFileName, null_source, null_dev);
parse(null_scanner, null_dev);
next_label = 0;
next_fill_index = 0;
bWroteGetState = false;
bUsedYYMaxFill = false;
bFirstPass = false;
}
bLastPass = true;
parse(scanner, output);
return 0;
}

View file

@ -1,210 +0,0 @@
/*
Author: Marcus Boerger <helly@users.sourceforge.net>
*/
/* $Id: mbo_getopt.cc 698 2007-04-23 21:06:56Z helly $ */
#include <stdio.h>
#include <string.h>
#include <assert.h>
#include <stdlib.h>
#include "mbo_getopt.h"
#define OPTERRCOLON (1)
#define OPTERRNF (2)
#define OPTERRARG (3)
namespace re2c
{
static int mbo_opt_error(int, char * const *argv, int oint, int optchr, int err, int show_err)
{
if (show_err)
{
fprintf(stderr, "Error in argument %d, char %d: ", oint, optchr + 1);
switch (err)
{
case OPTERRCOLON:
fprintf(stderr, ": in flags\n");
break;
case OPTERRNF:
fprintf(stderr, "option not found %c\n", argv[oint][optchr]);
break;
case OPTERRARG:
fprintf(stderr, "no argument for option %c\n", argv[oint][optchr]);
break;
default:
fprintf(stderr, "unknown\n");
break;
}
}
return ('?');
}
int mbo_getopt(int argc, char* const *argv, const mbo_opt_struct *opts, char **optarg, int *optind, int show_err)
{
static int optchr = 0;
static int dash = 0; /* have already seen the - */
int arg_start = 2;
int opts_idx = -1;
if (*optind >= argc)
{
return (EOF);
}
if (!dash)
{
if ((argv[*optind][0] != '-'))
{
return (EOF);
}
else
{
if (!argv[*optind][1])
{
/*
* use to specify stdin. Need to let pgm process this and
* the following args
*/
return (EOF);
}
}
}
if ((argv[*optind][0] == '-') && (argv[*optind][1] == '-'))
{
/* '--' indicates end of args if not followed by a known long option name */
if (argv[*optind][2] == '\0') {
(*optind)++;
return(EOF);
}
while (1)
{
opts_idx++;
if (opts[opts_idx].opt_char == '-')
{
(*optind)++;
return (mbo_opt_error(argc, argv, *optind - 1, optchr, OPTERRARG, show_err));
}
else if (opts[opts_idx].opt_name && !strcmp(&argv[*optind][2], opts[opts_idx].opt_name))
{
break;
}
}
optchr = 0;
dash = 0;
arg_start = 2 + strlen(opts[opts_idx].opt_name);
}
else
{
if (!dash)
{
dash = 1;
optchr = 1;
}
/* Check if the guy tries to do a -: kind of flag */
if (argv[*optind][optchr] == ':')
{
dash = 0;
(*optind)++;
return (mbo_opt_error(argc, argv, *optind - 1, optchr, OPTERRCOLON, show_err));
}
arg_start = 1 + optchr;
}
if (opts_idx < 0)
{
while (1)
{
opts_idx++;
if (opts[opts_idx].opt_char == '-')
{
int errind = *optind;
int errchr = optchr;
if (!argv[*optind][optchr + 1])
{
dash = 0;
(*optind)++;
}
else
{
optchr++;
arg_start++;
}
return (mbo_opt_error(argc, argv, errind, errchr, OPTERRNF, show_err));
}
else if (argv[*optind][optchr] == opts[opts_idx].opt_char)
{
break;
}
}
}
if (opts[opts_idx].need_param)
{
/* Check for cases where the value of the argument
is in the form -<arg> <val> or in the form -<arg><val> */
dash = 0;
if (!argv[*optind][arg_start])
{
(*optind)++;
if (*optind == argc)
{
return (mbo_opt_error(argc, argv, *optind - 1, optchr, OPTERRARG, show_err));
}
*optarg = argv[(*optind)++];
}
else
{
*optarg = &argv[*optind][arg_start];
(*optind)++;
}
return opts[opts_idx].opt_char;
}
else
{
if (arg_start >= 2 && !((argv[*optind][0] == '-') && (argv[*optind][1] == '-')))
{
if (!argv[*optind][optchr + 1])
{
dash = 0;
(*optind)++;
}
else
{
optchr++;
}
}
else
{
(*optind)++;
}
return opts[opts_idx].opt_char;
}
assert(0);
return (0); /* never reached */
}
} // end namespace re2c

View file

@ -1,34 +0,0 @@
/*
Author: Marcus Boerger <helly@users.sourceforge.net>
*/
/* $Id: mbo_getopt.h 539 2006-05-25 13:37:38Z helly $ */
/* Define structure for one recognized option (both single char and long name).
* If short_open is '-' this is the last option.
*/
#ifndef RE2C_MBO_GETOPT_H_INCLUDE_GUARD_
#define RE2C_MBO_GETOPT_H_INCLUDE_GUARD_
namespace re2c
{
struct mbo_opt_struct
{
mbo_opt_struct(char _opt_char, int _need_param, const char * _opt_name)
: opt_char(_opt_char), need_param(_need_param), opt_name(_opt_name)
{
}
const char opt_char;
const int need_param;
const char * opt_name;
};
int mbo_getopt(int argc, char* const *argv, const mbo_opt_struct *opts, char **optarg, int *optind, int show_err);
} // end namespace re2c
#endif

File diff suppressed because it is too large Load diff

View file

@ -1,56 +0,0 @@
/* $Id: parser.h 565 2006-06-05 22:07:13Z helly $ */
#ifndef _parser_h
#define _parser_h
#include "scanner.h"
#include "re.h"
#include <iosfwd>
#include <map>
namespace re2c
{
class Symbol
{
public:
RegExp* re;
static Symbol *find(const SubStr&);
static void ClearTable();
typedef std::map<std::string, Symbol*> SymbolTable;
protected:
Symbol(const SubStr& str)
: re(NULL)
, name(str)
{
}
private:
static SymbolTable symbol_table;
Str name;
#if PEDANTIC
Symbol(const Symbol& oth)
: re(oth.re)
, name(oth.name)
{
}
Symbol& operator = (const Symbol& oth)
{
new(this) Symbol(oth);
return *this;
}
#endif
};
void parse(Scanner&, std::ostream&);
} // end namespace re2c
#endif

View file

@ -1,220 +0,0 @@
%{
/* $Id: parser.y 674 2007-04-16 21:39:11Z helly $ */
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
#include <time.h>
#include <string.h>
#include <stdlib.h>
#include <iostream>
#include "globals.h"
#include "parser.h"
#include "basics.h"
#define YYMALLOC malloc
#define YYFREE free
using namespace re2c;
extern "C"
{
int yylex();
void yyerror(const char*);
}
static re2c::uint accept;
static RegExp *spec;
static Scanner *in = NULL;
/* Bison version 1.875 emits a definition that is not working
* with several g++ version. Hence we disable it here.
*/
#if defined(__GNUC__)
#define __attribute__(x)
#endif
/* strdup() isn't standard C, so if we don't have it, we'll create our
* own version
*/
#if !defined(HAVE_STRDUP)
static char* strdup(const char* s)
{
char* rv = (char*)malloc(strlen(s) + 1);
if (rv == NULL)
return NULL;
strcpy(rv, s);
return rv;
}
#endif
%}
%start spec
%union {
re2c::Symbol *symbol;
re2c::RegExp *regexp;
re2c::Token *token;
char op;
int number;
re2c::ExtOp extop;
re2c::Str *str;
};
%token CLOSESIZE CLOSE ID CODE RANGE STRING
%token CONFIG VALUE NUMBER
%type <op> CLOSE
%type <op> close
%type <extop> CLOSESIZE
%type <symbol> ID
%type <token> CODE
%type <regexp> RANGE STRING
%type <regexp> rule look expr diff term factor primary
%type <str> CONFIG VALUE
%type <number> NUMBER
%%
spec :
{ accept = 0;
spec = NULL; }
| spec rule
{ spec = spec? mkAlt(spec, $2) : $2; }
| spec decl
;
decl : ID '=' expr ';'
{ if($1->re)
in->fatal("sym already defined");
$1->re = $3; }
| ID '=' expr '/'
{ in->fatal("trailing contexts are not allowed in named definitions"); }
| CONFIG '=' VALUE ';'
{ in->config(*$1, *$3); delete $1; delete $3; }
| CONFIG '=' NUMBER ';'
{ in->config(*$1, $3); delete $1; }
;
rule : expr look CODE
{ $$ = new RuleOp($1, $2, $3, accept++); }
;
look :
{ $$ = new NullOp; }
| '/' expr
{ $$ = $2; }
;
expr : diff
{ $$ = $1; }
| expr '|' diff
{ $$ = mkAlt($1, $3); }
;
diff : term
{ $$ = $1; }
| diff '\\' term
{ $$ = mkDiff($1, $3);
if(!$$)
in->fatal("can only difference char sets");
}
;
term : factor
{ $$ = $1; }
| term factor
{ $$ = new CatOp($1, $2); }
;
factor : primary
{ $$ = $1; }
| primary close
{
switch($2){
case '*':
$$ = mkAlt(new CloseOp($1), new NullOp());
break;
case '+':
$$ = new CloseOp($1);
break;
case '?':
$$ = mkAlt($1, new NullOp());
break;
}
}
| primary CLOSESIZE
{
$$ = new CloseVOp($1, $2.minsize, $2.maxsize);
}
;
close : CLOSE
{ $$ = $1; }
| close CLOSE
{ $$ = ($1 == $2) ? $1 : '*'; }
;
primary : ID
{ if(!$1->re)
in->fatal("can't find symbol");
$$ = $1->re; }
| RANGE
{ $$ = $1; }
| STRING
{ $$ = $1; }
| '(' expr ')'
{ $$ = $2; }
;
%%
extern "C" {
void yyerror(const char* s)
{
in->fatal(s);
}
int yylex(){
return in ? in->scan() : 0;
}
} // end extern "C"
namespace re2c
{
void parse(Scanner& i, std::ostream& o)
{
in = &i;
o << "/* Generated by re2c " PACKAGE_VERSION;
if (!bNoGenerationDate)
{
o << " on ";
time_t now = time(&now);
o.write(ctime(&now), 24);
}
o << " */\n";
o << sourceFileInfo;
while(i.echo())
{
yyparse();
if(spec)
{
genCode(o, topIndent, spec);
}
o << sourceFileInfo;
}
RegExp::vFreeList.clear();
Range::vFreeList.clear();
Symbol::ClearTable();
in = NULL;
}
} // end namespace re2c

View file

@ -1,496 +0,0 @@
/* $Id: re.h 775 2007-07-10 19:33:17Z helly $ */
#ifndef _re_h
#define _re_h
#include <iostream>
#include <set>
#include "token.h"
#include "ins.h"
#include "globals.h"
namespace re2c
{
template<class _Ty>
class free_list: protected std::set<_Ty>
{
public:
typedef typename std::set<_Ty>::iterator iterator;
typedef typename std::set<_Ty>::size_type size_type;
typedef typename std::set<_Ty>::key_type key_type;
free_list(): in_clear(false)
{
}
using std::set<_Ty>::insert;
size_type erase(const key_type& key)
{
if (!in_clear)
{
return std::set<_Ty>::erase(key);
}
return 0;
}
void clear()
{
in_clear = true;
for(iterator it = this->begin(); it != this->end(); ++it)
{
delete *it;
}
std::set<_Ty>::clear();
in_clear = false;
}
~free_list()
{
clear();
}
protected:
bool in_clear;
};
typedef struct extop
{
char op;
int minsize;
int maxsize;
}
ExtOp;
struct CharPtn
{
uint card;
CharPtn *fix;
CharPtn *nxt;
};
typedef CharPtn *CharPtr;
struct CharSet
{
CharSet();
~CharSet();
CharPtn *fix;
CharPtn *freeHead, **freeTail;
CharPtr *rep;
CharPtn *ptn;
};
class Range
{
public:
Range *next;
uint lb, ub; // [lb,ub)
static free_list<Range*> vFreeList;
public:
Range(uint l, uint u) : next(NULL), lb(l), ub(u)
{
vFreeList.insert(this);
}
Range(Range &r) : next(NULL), lb(r.lb), ub(r.ub)
{
vFreeList.insert(this);
}
~Range()
{
vFreeList.erase(this);
}
friend std::ostream& operator<<(std::ostream&, const Range&);
friend std::ostream& operator<<(std::ostream&, const Range*);
};
inline std::ostream& operator<<(std::ostream &o, const Range *r)
{
return r ? o << *r : o;
}
class RegExp
{
public:
uint size;
static free_list<RegExp*> vFreeList;
public:
RegExp() : size(0)
{
vFreeList.insert(this);
}
virtual ~RegExp()
{
vFreeList.erase(this);
}
virtual const char *typeOf() = 0;
RegExp *isA(const char *t)
{
return typeOf() == t ? this : NULL;
}
virtual void split(CharSet&) = 0;
virtual void calcSize(Char*) = 0;
virtual uint fixedLength();
virtual void compile(Char*, Ins*) = 0;
virtual void display(std::ostream&) const = 0;
friend std::ostream& operator<<(std::ostream&, const RegExp&);
friend std::ostream& operator<<(std::ostream&, const RegExp*);
};
inline std::ostream& operator<<(std::ostream &o, const RegExp &re)
{
re.display(o);
return o;
}
inline std::ostream& operator<<(std::ostream &o, const RegExp *re)
{
return o << *re;
}
class NullOp: public RegExp
{
public:
static const char *type;
public:
const char *typeOf()
{
return type;
}
void split(CharSet&);
void calcSize(Char*);
uint fixedLength();
void compile(Char*, Ins*);
void display(std::ostream &o) const
{
o << "_";
}
};
class MatchOp: public RegExp
{
public:
static const char *type;
Range *match;
public:
MatchOp(Range *m) : match(m)
{
}
const char *typeOf()
{
return type;
}
void split(CharSet&);
void calcSize(Char*);
uint fixedLength();
void compile(Char*, Ins*);
void display(std::ostream&) const;
#ifdef PEDANTIC
private:
MatchOp(const MatchOp& oth)
: RegExp(oth)
, match(oth.match)
{
}
MatchOp& operator = (const MatchOp& oth)
{
new(this) MatchOp(oth);
return *this;
}
#endif
};
class RuleOp: public RegExp
{
public:
static const char *type;
private:
RegExp *exp;
public:
RegExp *ctx;
Ins *ins;
uint accept;
Token *code;
uint line;
public:
RuleOp(RegExp*, RegExp*, Token*, uint);
~RuleOp()
{
delete code;
}
const char *typeOf()
{
return type;
}
void split(CharSet&);
void calcSize(Char*);
void compile(Char*, Ins*);
void display(std::ostream &o) const
{
o << exp << "/" << ctx << ";";
}
#ifdef PEDANTIC
private:
RuleOp(const RuleOp& oth)
: RegExp(oth)
, exp(oth.exp)
, ctx(oth.ctx)
, ins(oth.ins)
, accept(oth.accept)
, code(oth.code)
, line(oth.line)
{
}
RuleOp& operator = (const RuleOp& oth)
{
new(this) RuleOp(oth);
return *this;
}
#endif
};
class RuleLine: public line_number
{
public:
RuleLine(const RuleOp& _op)
: op(_op)
{
}
uint get_line() const
{
return op.code->line;
}
const RuleOp& op;
};
RegExp *mkAlt(RegExp*, RegExp*);
class AltOp: public RegExp
{
private:
RegExp *exp1, *exp2;
public:
static const char *type;
public:
AltOp(RegExp *e1, RegExp *e2)
: exp1(e1)
, exp2(e2)
{
}
const char *typeOf()
{
return type;
}
void split(CharSet&);
void calcSize(Char*);
uint fixedLength();
void compile(Char*, Ins*);
void display(std::ostream &o) const
{
o << exp1 << "|" << exp2;
}
friend RegExp *mkAlt(RegExp*, RegExp*);
#ifdef PEDANTIC
private:
AltOp(const AltOp& oth)
: RegExp(oth)
, exp1(oth.exp1)
, exp2(oth.exp2)
{
}
AltOp& operator = (const AltOp& oth)
{
new(this) AltOp(oth);
return *this;
}
#endif
};
class CatOp: public RegExp
{
private:
RegExp *exp1, *exp2;
public:
static const char *type;
public:
CatOp(RegExp *e1, RegExp *e2)
: exp1(e1)
, exp2(e2)
{
}
const char *typeOf()
{
return type;
}
void split(CharSet&);
void calcSize(Char*);
uint fixedLength();
void compile(Char*, Ins*);
void display(std::ostream &o) const
{
o << exp1 << exp2;
}
#ifdef PEDANTIC
private:
CatOp(const CatOp& oth)
: RegExp(oth)
, exp1(oth.exp1)
, exp2(oth.exp2)
{
}
CatOp& operator = (const CatOp& oth)
{
new(this) CatOp(oth);
return *this;
}
#endif
};
class CloseOp: public RegExp
{
private:
RegExp *exp;
public:
static const char *type;
public:
CloseOp(RegExp *e)
: exp(e)
{
}
const char *typeOf()
{
return type;
}
void split(CharSet&);
void calcSize(Char*);
void compile(Char*, Ins*);
void display(std::ostream &o) const
{
o << exp << "+";
}
#ifdef PEDANTIC
private:
CloseOp(const CloseOp& oth)
: RegExp(oth)
, exp(oth.exp)
{
}
CloseOp& operator = (const CloseOp& oth)
{
new(this) CloseOp(oth);
return *this;
}
#endif
};
class CloseVOp: public RegExp
{
private:
RegExp *exp;
int min;
int max;
public:
static const char *type;
public:
CloseVOp(RegExp *e, int lb, int ub)
: exp(e)
, min(lb)
, max(ub)
{
}
const char *typeOf()
{
return type;
}
void split(CharSet&);
void calcSize(Char*);
void compile(Char*, Ins*);
void display(std::ostream &o) const
{
o << exp << "+";
}
#ifdef PEDANTIC
private:
CloseVOp(const CloseVOp& oth)
: RegExp(oth)
, exp(oth.exp)
, min(oth.min)
, max(oth.max)
{
}
CloseVOp& operator = (const CloseVOp& oth)
{
new(this) CloseVOp(oth);
return *this;
}
#endif
};
extern void genCode(std::ostream&, RegExp*);
extern void genCode(std::ostream&, uint, RegExp*);
extern void genGetState(std::ostream&, uint&, uint);
extern RegExp *mkDiff(RegExp*, RegExp*);
extern RegExp *mkAlt(RegExp*, RegExp*);
} // end namespace re2c
#endif

View file

@ -1,597 +0,0 @@
./"
./" $Id: re2c.1.in 663 2007-04-01 11:22:15Z helly $
./"
.TH RE2C 1 "22 April 2005" "Version 0.12.3"
.ds re \fBre2c\fP
.ds le \fBlex\fP
.ds rx regular expression
.ds lx \fIl\fP-expression
.SH NAME
re2c \- convert regular expressions to C/C++
.SH SYNOPSIS
\*(re [\fB-bdefghisuvVw1\fP] [\fB-o output\fP] file\fP
.SH DESCRIPTION
\*(re is a preprocessor that generates C-based recognizers from regular
expressions.
The input to \*(re consists of C/C++ source interleaved with
comments of the form \fC/*!re2c\fP ... \fC*/\fP which contain
scanner specifications.
In the output these comments are replaced with code that, when
executed, will find the next input token and then execute
some user-supplied token-specific code.
For example, given the following code
.in +3
.nf
char *scan(char *p)
{
/*!re2c
re2c:define:YYCTYPE = "unsigned char";
re2c:define:YYCURSOR = p;
re2c:yyfill:enable = 0;
re2c:yych:conversion = 1;
re2c:indent:top = 1;
[0-9]+ {return p;}
[\000-\377] {return (char*)0;}
*/
}
.fi
.in -3
\*(re -is will generate
.in +3
.nf
/* Generated by re2c on Sat Apr 16 11:40:58 1994 */
char *scan(char *p)
{
{
unsigned char yych;
yych = (unsigned char)*p;
if(yych &lt;= '/') goto yy4;
if(yych &gt;= ':') goto yy4;
++p;
yych = (unsigned char)*p;
goto yy7;
yy3:
{return p;}
yy4:
++p;
yych = (unsigned char)*p;
{return char*)0;}
yy6:
++p;
yych = (unsigned char)*p;
yy7:
if(yych &lt;= '/') goto yy3;
if(yych &lt;= '9') goto yy6;
goto yy3;
}
}
.fi
.in -3
You can place one \fC/*!max:re2c */\fP comment that will output a "#define
\fCYYMAXFILL\fP <n>" line that holds the maximum number of characters
required to parse the input. That is the maximum value \fCYYFILL\fP(n)
will receive. If -1 is in effect then YYMAXFILL can only be triggered once
after the last \fC/*!re2c */\fP.
You can also use \fC/*!ignore:re2c */\fP blocks that allows to document the
scanner code and will not be part of the output.
.SH OPTIONS
\*(re provides the following options:
.TP
\fB-?\fP
\fB-h\fP
Invoke a short help.
.TP
\fB-b\fP
Implies \fB-s\fP. Use bit vectors as well in the attempt to coax better
code out of the compiler. Most useful for specifications with more than a
few keywords (e.g. for most programming languages).
.TP
\fB-d\fP
Creates a parser that dumps information about the current position and in
which state the parser is while parsing the input. This is useful to debug
parser issues and states. If you use this switch you need to define a macro
\fIYYDEBUG\fP that is called like a function with two parameters:
\fIvoid YYDEBUG(int state, char current)\fP. The first parameter receives the
state or -1 and the second parameter receives the input at the current cursor.
.TP
\fB-e\fP
Cross-compile from an ASCII platform to an EBCDIC one.
.TP
\fB-f\fP
Generate a scanner with support for storable state.
For details see below at \fBSCANNER WITH STORABLE STATES\fP.
.TP
\fB-g\fP
Generate a scanner that utilizes GCC's computed goto feature. That is \*(re
generates jump tables whenever a decision is of a certain complexity (e.g. a
lot of if conditions are otherwise necessary). This is only useable with GCC
and produces output that cannot be compiled with any other compiler. Note that
this implies -b and that the complexity threshold can be configured using the
inplace configuration "cgoto:threshold".
.TP
\fB-i\fP
Do not output #line information. This is usefull when you want use a CMS tool
with the \*(re output which you might want if you do not require your users to
have \*(re themselves when building from your source.
\fB-o output\fP
Specify the output file.
.TP
\fB-s\fP
Generate nested \fCif\fPs for some \fCswitch\fPes. Many compilers need this
assist to generate better code.
.TP
\fB-u\fP
Generate a parser that supports Unicode chars (UTF-32). This means the
generated code can deal with any valid Unicode character up to 0x10FFFF. When
UTF-8 or UTF-16 needs to be supported you need to convert the incoming stream
to UTF-32 upon input yourself.
.TP
\fB-v\fP
Show version information.
.TP
\fB-V\fP
Show the version as a number XXYYZZ.
.TP
\fB-w\fP
Create a parser that supports wide chars (UCS-2). This implies \fB-s\fP and
cannot be used together with \fB-e\fP switch.
.TP
\fB-1\fP
Force single pass generation, this cannot be combined with -f and disables
YYMAXFILL generation prior to last \*(re block.
.TP
\fb--no-generation-date\fP
Suppress date output in the generated output so that it only shows the re2c
version.
.SH "INTERFACE CODE"
Unlike other scanner generators, \*(re does not generate complete scanners:
the user must supply some interface code.
In particular, the user must define the following macros or use the
corresponding inplace configurations:
.TP
\fCYYCTYPE\fP
Type used to hold an input symbol.
Usually \fCchar\fP or \fCunsigned char\fP.
.TP
\fCYYCURSOR\fP
\*(lx of type \fC*YYCTYPE\fP that points to the current input symbol.
The generated code advances \fCYYCURSOR\fP as symbols are matched.
On entry, \fCYYCURSOR\fP is assumed to point to the first character of the
current token. On exit, \fCYYCURSOR\fP will point to the first character of
the following token.
.TP
\fCYYLIMIT\fP
Expression of type \fC*YYCTYPE\fP that marks the end of the buffer
(\fCYYLIMIT[-1]\fP is the last character in the buffer).
The generated code repeatedly compares \fCYYCURSOR\fP to \fCYYLIMIT\fP
to determine when the buffer needs (re)filling.
.TP
\fCYYMARKER\fP
\*(lx of type \fC*YYCTYPE\fP.
The generated code saves backtracking information in \fCYYMARKER\fP. Some easy
scanners might not use this.
.TP
\fCYYCTXMARKER\fP
\*(lx of type \fC*YYCTYPE\fP.
The generated code saves trailing context backtracking information in \fCYYCTXMARKER\fP.
The user only needs to define this macro if a scanner specification uses trailing
context in one or more of its regular expressions.
.TP
\fCYYFILL\fP(\fIn\fP\fC\fP)
The generated code "calls" \fCYYFILL\fP(n) when the buffer needs
(re)filling: at least \fIn\fP additional characters should
be provided. \fCYYFILL\fP(n) should adjust \fCYYCURSOR\fP, \fCYYLIMIT\fP,
\fCYYMARKER\fP and \fCYYCTXMARKER\fP as needed. Note that for typical
programming languages \fIn\fP will be the length of the longest keyword plus one.
The user can place a comment of the form \fC/*!max:re2c */\fP once to insert
a \fCYYMAXFILL\fP(n) definition that is set to the maximum length value. If -1
switch is used then \fCYYMAXFILL\fP can be triggered only once after the
last \fC/*!re2c */\fP
block.
.TP
\fCYYGETSTATE\fP()
The user only needs to define this macro if the \fB-f\fP flag was specified.
In that case, the generated code "calls" \fCYYGETSTATE\fP() at the very beginning
of the scanner in order to obtain the saved state. \fCYYGETSTATE\fP() must return a signed
integer. The value must be either -1, indicating that the scanner is entered for the
first time, or a value previously saved by \fCYYSETSTATE\fP(s). In the second case, the
scanner will resume operations right after where the last \fCYYFILL\fP(n) was called.
.TP
\fCYYSETSTATE(\fP\fIs\fP\fC)\fP
The user only needs to define this macro if the \fB-f\fP flag was specified.
In that case, the generated code "calls" \fCYYSETSTATE\fP just before calling
\fCYYFILL\fP(n). The parameter to \fCYYSETSTATE\fP is a signed integer that uniquely
identifies the specific instance of \fCYYFILL\fP(n) that is about to be called.
Should the user wish to save the state of the scanner and have \fCYYFILL\fP(n) return
to the caller, all he has to do is store that unique identifer in a variable.
Later, when the scannered is called again, it will call \fCYYGETSTATE()\fP and
resume execution right where it left off. The generated code will contain
both \fCYYSETSTATE\fP(s) and \fCYYGETSTATE\fP even if \fCYYFILL\fP(n) is being
disabled.
.TP
\fCYYDEBUG(\fP\fIstate\fP,\fIcurrent\fC)\fP
This is only needed if the \fB-d\fP flag was specified. It allows to easily debug
the generated parser by calling a user defined function for every state. The function
should have the following signature: \fIvoid YYDEBUG(int state, char current)\fP.
The first parameter receives the state or -1 and the second parameter receives the
input at the current cursor.
.TP
\fCYYMAXFILL
This will be automatically defined by \fC/*!max:re2c */\fP blocks as explained above.
.SH "SCANNER WITH STORABLE STATES"
When the \fB-f\fP flag is specified, \*(re generates a scanner that
can store its current state, return to the caller, and later resume
operations exactly where it left off.
The default operation of \*(re is a "pull" model, where the scanner asks
for extra input whenever it needs it. However, this mode of operation
assumes that the scanner is the "owner" the parsing loop, and that may
not always be convenient.
Typically, if there is a preprocessor ahead of the scanner in the stream,
or for that matter any other procedural source of data, the scanner cannot
"ask" for more data unless both scanner and source live in a separate threads.
The \fB-f\fP flag is useful for just this situation : it lets users design
scanners that work in a "push" model, i.e. where data is fed to the scanner
chunk by chunk. When the scanner runs out of data to consume, it just stores
its state, and return to the caller. When more input data is fed to the scanner,
it resumes operations exactly where it left off.
When using the -f option \*(re does not accept stdin because it has to do the
full generation process twice which means it has to read the input twice. That
means \*(re would fail in case it cannot open the input twice or reading the
input for the first time influences the second read attempt.
Changes needed compared to the "pull" model.
1. User has to supply macros YYSETSTATE() and YYGETSTATE(state)
2. The \fB-f\fP option inhibits declaration of \fIyych\fP and
\fIyyaccept\fP. So the user has to declare these. Also the user has
to save and restore these. In the example \fIexamples/push.re\fP these
are declared as fields of the (C++) class of which the scanner is a
method, so they do not need to be saved/restored explicitly. For C
they could e.g. be made macros that select fields from a structure
passed in as parameter. Alternatively, they could be declared as local
variables, saved with YYFILL(n) when it decides to return and restored
at entry to the function. Also, it could be more efficient to save the
state from YYFILL(n) because YYSETSTATE(state) is called
unconditionally. YYFILL(n) however does not get \fIstate\fP as
parameter, so we would have to store state in a local variable by
YYSETSTATE(state).
3. Modify YYFILL(n) to return (from the function calling it) if more
input is needed.
4. Modify caller to recognise "more input is needed" and respond
appropriately.
5. The generated code will contain a switch block that is used to restores
the last state by jumping behind the corrspoding YYFILL(n) call. This code is
automatically generated in the epilog of the first "\fC/*!re2c */\fP" block.
It is possible to trigger generation of the YYGETSTATE() block earlier by
placing a "\fC/*!getstate:re2c */\fP" comment. This is especially useful when
the scanner code should be wrapped inside a loop.
Please see examples/push.re for push-model scanner. The generated code can be
tweaked using inplace configurations "\fBstate:abort\fP" and "\fBstate:nextlabel\fP".
.SH "SCANNER SPECIFICATIONS"
Each scanner specification consists of a set of \fIrules\fP, \fInamed
definitions\fP and \fIconfigurations\fP.
.LP
\fIRules\fP consist of a regular expression along with a block of C/C++ code that
is to be executed when the associated \fIregular expression\fP is matched.
.P
.RS
\fIregular expression\fP \fC{\fP \fIC/C++ code\fP \fC}\fP
.RE
.LP
Named definitions are of the form:
.P
.RS
\fIname\fP \fC=\fP \fIregular expression\fP\fC;\fP
.RE
.LP
Configurations look like named definitions whose names start
with "\fBre2c:\fP":
.P
.RS
\fCre2c:\fP\fIname\fP \fC=\fP \fIvalue\fP\fC;\fP
.RE
.RS
\fCre2c:\fP\fIname\fP \fC=\fP \fB"\fP\fIvalue\fP\fB"\fP\fC;\fP
.RE
.SH "SUMMARY OF RE2C REGULAR EXPRESSIONS"
.TP
\fC"foo"\fP
the literal string \fCfoo\fP.
ANSI-C escape sequences can be used.
.TP
\fC'foo'\fP
the literal string \fCfoo\fP (characters [a-zA-Z] treated case-insensitive).
ANSI-C escape sequences can be used.
.TP
\fC[xyz]\fP
a "character class"; in this case,
the \*(rx matches either an '\fCx\fP', a '\fCy\fP', or a '\fCz\fP'.
.TP
\fC[abj-oZ]\fP
a "character class" with a range in it;
matches an '\fCa\fP', a '\fCb\fP', any letter from '\fCj\fP' through '\fCo\fP',
or a '\fCZ\fP'.
.TP
\fC[^\fIclass\fP\fC]\fP
an inverted "character class".
.TP
\fIr\fP\fC\e\fP\fIs\fP
match any \fIr\fP which isn't an \fIs\fP. \fIr\fP and \fIs\fP must be regular expressions
which can be expressed as character classes.
.TP
\fIr\fP\fC*\fP
zero or more \fIr\fP's, where \fIr\fP is any regular expression
.TP
\fC\fIr\fP\fC+\fP
one or more \fIr\fP's
.TP
\fC\fIr\fP\fC?\fP
zero or one \fIr\fP's (that is, "an optional \fIr\fP")
.TP
name
the expansion of the "named definition" (see above)
.TP
\fC(\fP\fIr\fP\fC)\fP
an \fIr\fP; parentheses are used to override precedence
(see below)
.TP
\fIrs\fP
an \fIr\fP followed by an \fIs\fP ("concatenation")
.TP
\fIr\fP\fC|\fP\fIs\fP
either an \fIr\fP or an \fIs\fP
.TP
\fIr\fP\fC/\fP\fIs\fP
an \fIr\fP but only if it is followed by an \fIs\fP. The \fIs\fP is not part of
the matched text. This type of \*(rx is called "trailing context". A trailing
context can only be the end of a rule and not part of a named definition.
.TP
\fIr\fP\fC{\fP\fIn\fP\fC}\fP
matches \fIr\fP exactly \fIn\fP times.
.TP
\fIr\fP\fC{\fP\fIn\fP\fC,}\fP
matches \fIr\fP at least \fIn\fP times.
.TP
\fIr\fP\fC{\fP\fIn\fP\fC,\fP\fIm\fP\fC}\fP
matches \fIr\fP at least \fIn\fP but not more than \fIm\fP times.
.TP
\fC.\fP
match any character except newline (\\n).
.TP
\fIdef\fP
matches named definition as specified by \fIdef\fP.
.LP
Character classes and string literals may contain octoal or hexadecimal
character definitions and the following set of escape sequences (\fB\\n\fP,
\fB\\t\fP, \fB\\v\fP, \fB\\b\fP, \fB\\r\fP, \fB\\f\fP, \fB\\a\fP, \fB\\\\\fP).
An octal character is defined by a backslash followed by its three octal digits
and a hexadecimal character is defined by backslash, a lower cased '\fBx\fP'
and its two hexadecimal digits or a backslash, an upper cased \fBX\fP and its
four hexadecimal digits.
.LP
\*(re further more supports the c/c++ unicode notation. That is a backslash followed
by either a lowercased \fBu\fP and its four hexadecimal digits or an uppercased
\fBU\fP and its eight hexadecimal digits. However only in \fB-u\fP mode the
generated code can deal with any valid Unicode character up to 0x10FFFF.
.LP
Since characters greater \fB\\X00FF\fP are not allowed in non unicode mode, the
only portable "\fBany\fP" rules are \fB(.|"\\n")\fP and \fB[^]\fP.
.LP
The regular expressions listed above are grouped according to
precedence, from highest precedence at the top to lowest at the bottom.
Those grouped together have equal precedence.
.SH "INPLACE CONFIGURATION"
.LP
It is possible to configure code generation inside \*(re blocks. The following
lists the available configurations:
.TP
\fIre2c:indent:top\fP \fB=\fP 0 \fB;\fP
Specifies the minimum number of indendation to use. Requires a numeric value
greater than or equal zero.
.TP
\fIre2c:indent:string\fP \fB=\fP "\\t" \fB;\fP
Specifies the string to use for indendation. Requires a string that should
contain only whitespace unless you need this for external tools. The easiest
way to specify spaces is to enclude them in single or double quotes. If you do
not want any indendation at all you can simply set this to \fB""\fP.
.TP
\fIre2c:yybm:hex\fP \fB=\fP 0 \fB;\fP
If set to zero then a decimal table is being used else a hexadecimal table
will be generated.
.TP
\fIre2c:yyfill:enable\fP \fB=\fP 1 \fB;\fP
Set this to zero to suppress generation of YYFILL(n). When using this be sure
to verify that the generated scanner does not read behind input. Allowing
this behavior might introduce sever security issues to you programs.
.TP
\fIre2c:yyfill:parameter\fP \fB=\fP 1 \fB;\fP
Allows to suppress parameter passing to \fBYYFILL\fP calls. If set to zero
then no parameter is passed to \fBYYFILL\fP. If set to a non zero value then
\fBYYFILL\fP usage will be followed by the number of requested characters in
braces.
.TP
\fIre2c:startlabel\fP \fB=\fP 0 \fB;\fP
If set to a non zero integer then the start label of the next scanner blocks
will be generated even if not used by the scanner itself. Otherwise the normal
\fByy0\fP like start label is only being generated if needed. If set to a text
value then a label with that text will be generated regardless of whether the
normal start label is being used or not. This setting is being reset to \fB0\fP
after a start label has been generated.
.TP
\fIre2c:labelprefix\fP \fB=\fP yy \fB;\fP
Allows to change the prefix of numbered labels. The default is \fByy\fP and
can be set any string that is a valid label.
.TP
\fIre2c:state:abort\fP \fB=\fP 0 \fB;\fP
When not zero and switch -f is active then the \fCYYGETSTATE\fP block will
contain a default case that aborts and a -1 case is used for initialization.
.TP
\fIre2c:state:nextlabel\fP \fB=\fP 0 \fB;\fP
Used when -f is active to control whether the \fCYYGETSTATE\fP block is
followed by a \fCyyNext:\fP label line. Instead of using \fCyyNext\fP you can
usually also use configuration \fIstartlabel\fP to force a specific start label
or default to \fCyy0\fP as start label. Instead of using a dedicated label it
is often better to separate the YYGETSTATE code from the actual scanner code by
placing a "\fC/*!getstate:re2c */\fP" comment.
.TP
\fIre2c:cgoto:threshold\fP \fB=\fP 9 \fB;\fP
When -g is active this value specifies the complexity threshold that triggers
generation of jump tables rather than using nested if's and decision bitfields.
The threshold is compared against a calculated estimation of if-s needed where
every used bitmap divides the threshold by 2.
.TP
\fIre2c:yych:conversion\fP \fB=\fP 0 \fB;\fP
When the input uses signed characters and \fB-s\fP or \fB-b\fP switches are
in effect re2c allows to automatically convert to the unsigned character type
that is then necessary for its internal single character. When this setting
is zero or an empty string the conversion is disabled. Using a non zero number
the conversion is taken from \fBYYCTYPE\fP. If that is given by an inplace
configuration that value is being used. Otherwise it will be \fB(YYCTYPE)\fP
and changes to that configuration are no longer possible. When this setting is
a string the braces must be specified. Now assuming your input is a \fBchar*\fP
buffer and you are using above mentioned switches you can set \fBYYCTYPE\fP to
\fBunsigned char\fP and this setting to either \fB1\fP or \fB"(unsigned char)"\fP.
.TP
\fIre2c:define:YYCTXMARKER\fP \fB=\fP YYCTXMARKER \fB;\fP
Allows to overwrite the define YYCTXMARKER and thus avoiding it by setting the
value to the actual code needed.
.TP
\fIre2c:define:YYCTYPE\fP \fB=\fP YYCTYPE \fB;\fP
Allows to overwrite the define YYCTYPE and thus avoiding it by setting the
value to the actual code needed.
.TP
\fIre2c:define:YYCURSOR\fP \fB=\fP YYCURSOR \fB;\fP
Allows to overwrite the define YYCURSOR and thus avoiding it by setting the
value to the actual code needed.
.TP
\fIre2c:define:YYDEBUG\fP \fB=\fP YYDEBUG \fB;\fP
Allows to overwrite the define YYDEBUG and thus avoiding it by setting the
value to the actual code needed.
.TP
\fIre2c:define:YYFILL\fP \fB=\fP YYFILL \fB;\fP
Allows to overwrite the define YYFILL and thus avoiding it by setting the
value to the actual code needed.
.TP
\fIre2c:define:YYGETSTATE\fP \fB=\fP YYGETSTATE \fB;\fP
Allows to overwrite the define YYGETSTATE and thus avoiding it by setting the
value to the actual code needed.
.TP
\fIre2c:define:YYLIMIT\fP \fB=\fP YYLIMIT \fB;\fP
Allows to overwrite the define YYLIMIT and thus avoiding it by setting the
value to the actual code needed.
.TP
\fIre2c:define:YYMARKER\fP \fB=\fP YYMARKER \fB;\fP
Allows to overwrite the define YYMARKER and thus avoiding it by setting the
value to the actual code needed.
.TP
\fIre2c:define:YYSETSTATE\fP \fB=\fP YYSETSTATE \fB;\fP
Allows to overwrite the define YYSETSTATE and thus avoiding it by setting the
value to the actual code needed.
.TP
\fIre2c:label:yyFillLabel\fP \fB=\fP yyFillLabel \fB;\fP
Allows to overwrite the name of the label yyFillLabel.
.TP
\fIre2c:label:yyNext\fP \fB=\fP yyNext \fB;\fP
Allows to overwrite the name of the label yyNext.
.TP
\fIre2c:variable:yyaccept\fP \fB=\fP yyaccept \fB;\fP
Allows to overwrite the name of the variable yyaccept.
.TP
\fIre2c:variable:yybm\fP \fB=\fP yybm \fB;\fP
Allows to overwrite the name of the variable yybm.
.TP
\fIre2c:variable:yych\fP \fB=\fP yych \fB;\fP
Allows to overwrite the name of the variable yych.
.TP
\fIre2c:variable:yytarget\fP \fB=\fP yytarget \fB;\fP
Allows to overwrite the name of the variable yytarget.
.SH "UNDERSTANDING RE2C"
.LP
The subdirectory lessons of the \*(re distribution contains a few step by step
lessons to get you started with \*(re. All examples in the lessons subdirectory
can be compiled and actually work.
.SH FEATURES
.LP
\*(re does not provide a default action:
the generated code assumes that the input
will consist of a sequence of tokens.
Typically this can be dealt with by adding a rule such as the one for
unexpected characters in the example above.
.LP
The user must arrange for a sentinel token to appear at the end of input
(and provide a rule for matching it):
\*(re does not provide an \fC<<EOF>>\fP expression.
If the source is from a null-byte terminated string, a
rule matching a null character will suffice. If the source is from a
file then you could pad the input with a newline (or some other character that
cannot appear within another token); upon recognizing such a character check
to see if it is the sentinel and act accordingly. And you can also use YYFILL(n)
to end the scanner in case not enough characters are available which is nothing
else then e detection of end of data/file.
.LP
\*(re does not provide start conditions: use a separate scanner
specification for each start condition (as illustrated in the above example).
.SH BUGS
.LP
Difference only works for character sets.
.LP
The \*(re internal algorithms need documentation.
.SH "SEE ALSO"
.LP
flex(1), lex(1).
.P
More information on \*(re can be found here:
.PD 0
.P
.B http://re2c.org/
.PD 1
.SH AUTHORS
.PD 0
.P
Peter Bumbulis <peter@csg.uwaterloo.ca>
.P
Brian Young <bayoung@acm.org>
.P
Dan Nuffer <nuffer@users.sourceforge.net>
.P
Marcus Boerger <helly@users.sourceforge.net>
.P
Hartmut Kaiser <hkaiser@users.sourceforge.net>
.P
Emmanuel Mogenet <mgix@mgix.com> added storable state
.P
.PD 1
.SH VERSION INFORMATION
This manpage describes \*(re, version 0.12.3.
.fi

View file

@ -1,549 +0,0 @@
<?xml version="1.0" encoding="Windows-1252"?>
<VisualStudioProject
ProjectType="Visual C++"
Version="8.00"
Name="re2c"
ProjectGUID="{667D2EE7-C357-49E2-9BAB-0A4A45F0F76E}"
RootNamespace="re2c"
Keyword="Win32Proj"
>
<Platforms>
<Platform
Name="Win32"
/>
<Platform
Name="x64"
/>
</Platforms>
<ToolFiles>
</ToolFiles>
<Configurations>
<Configuration
Name="Debug|Win32"
OutputDirectory="."
IntermediateDirectory="Build"
ConfigurationType="1"
InheritedPropertySheets="$(VCInstallDir)VCProjectDefaults\UpgradeFromVC71.vsprops"
CharacterSet="2"
>
<Tool
Name="VCPreBuildEventTool"
/>
<Tool
Name="VCCustomBuildTool"
/>
<Tool
Name="VCXMLDataGeneratorTool"
/>
<Tool
Name="VCWebServiceProxyGeneratorTool"
/>
<Tool
Name="VCMIDLTool"
/>
<Tool
Name="VCCLCompilerTool"
Optimization="1"
PreprocessorDefinitions="WIN32;NDEBUG;_CONSOLE"
StringPooling="true"
ExceptionHandling="1"
RuntimeLibrary="2"
UsePrecompiledHeader="0"
WarningLevel="3"
Detect64BitPortabilityProblems="false"
DebugInformationFormat="0"
CallingConvention="0"
DisableSpecificWarnings="4996"
/>
<Tool
Name="VCManagedResourceCompilerTool"
/>
<Tool
Name="VCResourceCompilerTool"
/>
<Tool
Name="VCPreLinkEventTool"
/>
<Tool
Name="VCLinkerTool"
OutputFile="$(OutDir)/re2c.exe"
LinkIncremental="1"
GenerateDebugInformation="false"
SubSystem="1"
OptimizeReferences="2"
EnableCOMDATFolding="2"
OptimizeForWindows98="1"
TargetMachine="1"
/>
<Tool
Name="VCALinkTool"
/>
<Tool
Name="VCManifestTool"
/>
<Tool
Name="VCXDCMakeTool"
/>
<Tool
Name="VCBscMakeTool"
/>
<Tool
Name="VCFxCopTool"
/>
<Tool
Name="VCAppVerifierTool"
/>
<Tool
Name="VCWebDeploymentTool"
/>
<Tool
Name="VCPostBuildEventTool"
/>
</Configuration>
<Configuration
Name="Debug|x64"
OutputDirectory="$(PlatformName)\$(ConfigurationName)"
IntermediateDirectory="$(PlatformName)\$(ConfigurationName)"
ConfigurationType="1"
InheritedPropertySheets="$(VCInstallDir)VCProjectDefaults\UpgradeFromVC71.vsprops"
CharacterSet="2"
>
<Tool
Name="VCPreBuildEventTool"
/>
<Tool
Name="VCCustomBuildTool"
/>
<Tool
Name="VCXMLDataGeneratorTool"
/>
<Tool
Name="VCWebServiceProxyGeneratorTool"
/>
<Tool
Name="VCMIDLTool"
TargetEnvironment="3"
/>
<Tool
Name="VCCLCompilerTool"
Optimization="1"
PreprocessorDefinitions="WIN32;NDEBUG;_CONSOLE"
StringPooling="true"
ExceptionHandling="1"
RuntimeLibrary="2"
UsePrecompiledHeader="0"
WarningLevel="3"
Detect64BitPortabilityProblems="false"
DebugInformationFormat="0"
CallingConvention="0"
DisableSpecificWarnings="4996;4244"
/>
<Tool
Name="VCManagedResourceCompilerTool"
/>
<Tool
Name="VCResourceCompilerTool"
/>
<Tool
Name="VCPreLinkEventTool"
/>
<Tool
Name="VCLinkerTool"
OutputFile="$(OutDir)/re2c.exe"
LinkIncremental="1"
GenerateDebugInformation="false"
SubSystem="1"
OptimizeReferences="2"
EnableCOMDATFolding="2"
OptimizeForWindows98="1"
TargetMachine="17"
/>
<Tool
Name="VCALinkTool"
/>
<Tool
Name="VCManifestTool"
/>
<Tool
Name="VCXDCMakeTool"
/>
<Tool
Name="VCBscMakeTool"
/>
<Tool
Name="VCFxCopTool"
/>
<Tool
Name="VCAppVerifierTool"
/>
<Tool
Name="VCWebDeploymentTool"
/>
<Tool
Name="VCPostBuildEventTool"
/>
</Configuration>
<Configuration
Name="Release|Win32"
OutputDirectory="."
IntermediateDirectory="Build"
ConfigurationType="1"
InheritedPropertySheets="$(VCInstallDir)VCProjectDefaults\UpgradeFromVC71.vsprops"
CharacterSet="2"
>
<Tool
Name="VCPreBuildEventTool"
/>
<Tool
Name="VCCustomBuildTool"
/>
<Tool
Name="VCXMLDataGeneratorTool"
/>
<Tool
Name="VCWebServiceProxyGeneratorTool"
/>
<Tool
Name="VCMIDLTool"
/>
<Tool
Name="VCCLCompilerTool"
Optimization="1"
PreprocessorDefinitions="WIN32;NDEBUG;_CONSOLE"
StringPooling="true"
ExceptionHandling="1"
RuntimeLibrary="2"
UsePrecompiledHeader="0"
WarningLevel="3"
Detect64BitPortabilityProblems="false"
DebugInformationFormat="0"
DisableSpecificWarnings="4996"
/>
<Tool
Name="VCManagedResourceCompilerTool"
/>
<Tool
Name="VCResourceCompilerTool"
/>
<Tool
Name="VCPreLinkEventTool"
/>
<Tool
Name="VCLinkerTool"
OutputFile="$(OutDir)/re2c.exe"
LinkIncremental="1"
GenerateDebugInformation="false"
SubSystem="1"
OptimizeReferences="2"
EnableCOMDATFolding="2"
OptimizeForWindows98="1"
TargetMachine="1"
/>
<Tool
Name="VCALinkTool"
/>
<Tool
Name="VCManifestTool"
/>
<Tool
Name="VCXDCMakeTool"
/>
<Tool
Name="VCBscMakeTool"
/>
<Tool
Name="VCFxCopTool"
/>
<Tool
Name="VCAppVerifierTool"
/>
<Tool
Name="VCWebDeploymentTool"
/>
<Tool
Name="VCPostBuildEventTool"
/>
</Configuration>
<Configuration
Name="Release|x64"
OutputDirectory="$(PlatformName)\$(ConfigurationName)"
IntermediateDirectory="$(PlatformName)\$(ConfigurationName)"
ConfigurationType="1"
InheritedPropertySheets="$(VCInstallDir)VCProjectDefaults\UpgradeFromVC71.vsprops"
CharacterSet="2"
>
<Tool
Name="VCPreBuildEventTool"
/>
<Tool
Name="VCCustomBuildTool"
/>
<Tool
Name="VCXMLDataGeneratorTool"
/>
<Tool
Name="VCWebServiceProxyGeneratorTool"
/>
<Tool
Name="VCMIDLTool"
TargetEnvironment="3"
/>
<Tool
Name="VCCLCompilerTool"
Optimization="1"
PreprocessorDefinitions="WIN32;NDEBUG;_CONSOLE"
StringPooling="true"
ExceptionHandling="1"
RuntimeLibrary="2"
UsePrecompiledHeader="0"
WarningLevel="3"
Detect64BitPortabilityProblems="false"
DebugInformationFormat="0"
CallingConvention="0"
DisableSpecificWarnings="4996;4244"
/>
<Tool
Name="VCManagedResourceCompilerTool"
/>
<Tool
Name="VCResourceCompilerTool"
/>
<Tool
Name="VCPreLinkEventTool"
/>
<Tool
Name="VCLinkerTool"
OutputFile="$(OutDir)/re2c.exe"
LinkIncremental="1"
GenerateDebugInformation="false"
SubSystem="1"
OptimizeReferences="2"
EnableCOMDATFolding="2"
OptimizeForWindows98="1"
TargetMachine="17"
/>
<Tool
Name="VCALinkTool"
/>
<Tool
Name="VCManifestTool"
/>
<Tool
Name="VCXDCMakeTool"
/>
<Tool
Name="VCBscMakeTool"
/>
<Tool
Name="VCFxCopTool"
/>
<Tool
Name="VCAppVerifierTool"
/>
<Tool
Name="VCWebDeploymentTool"
/>
<Tool
Name="VCPostBuildEventTool"
/>
</Configuration>
</Configurations>
<References>
</References>
<Files>
<Filter
Name="Source Files"
Filter="cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx"
UniqueIdentifier="{4FC737F1-C7A5-4376-A066-2A32D752A2FF}"
>
<File
RelativePath=".\actions.cc"
>
</File>
<File
RelativePath=".\code.cc"
>
</File>
<File
RelativePath=".\dfa.cc"
>
</File>
<File
RelativePath=".\main.cc"
>
</File>
<File
RelativePath=".\mbo_getopt.cc"
>
</File>
<File
RelativePath=".\parser.cc"
>
</File>
<File
RelativePath=".\parser.y"
>
<FileConfiguration
Name="Debug|Win32"
ExcludedFromBuild="true"
>
<Tool
Name="VCCLCompilerTool"
ObjectFile="$(IntDir)\$(InputName)1.obj"
XMLDocumentationFileName="$(IntDir)\$(InputName)1.xdc"
/>
</FileConfiguration>
<FileConfiguration
Name="Debug|x64"
ExcludedFromBuild="true"
>
<Tool
Name="VCCLCompilerTool"
ObjectFile="$(IntDir)\$(InputName)1.obj"
XMLDocumentationFileName="$(IntDir)\$(InputName)1.xdc"
/>
</FileConfiguration>
<FileConfiguration
Name="Release|Win32"
ExcludedFromBuild="true"
>
<Tool
Name="VCCLCompilerTool"
ObjectFile="$(IntDir)\$(InputName)1.obj"
XMLDocumentationFileName="$(IntDir)\$(InputName)1.xdc"
/>
</FileConfiguration>
<FileConfiguration
Name="Release|x64"
ExcludedFromBuild="true"
>
<Tool
Name="VCCLCompilerTool"
ObjectFile="$(IntDir)\$(InputName)1.obj"
XMLDocumentationFileName="$(IntDir)\$(InputName)1.xdc"
/>
</FileConfiguration>
</File>
<File
RelativePath=".\scanner.cc"
>
</File>
<File
RelativePath=".\scanner.re"
>
<FileConfiguration
Name="Debug|Win32"
ExcludedFromBuild="true"
>
<Tool
Name="VCCustomBuildTool"
/>
</FileConfiguration>
<FileConfiguration
Name="Debug|x64"
ExcludedFromBuild="true"
>
<Tool
Name="VCCustomBuildTool"
/>
</FileConfiguration>
<FileConfiguration
Name="Release|Win32"
ExcludedFromBuild="true"
>
<Tool
Name="VCCustomBuildTool"
/>
</FileConfiguration>
<FileConfiguration
Name="Release|x64"
ExcludedFromBuild="true"
>
<Tool
Name="VCCustomBuildTool"
/>
</FileConfiguration>
</File>
<File
RelativePath=".\substr.cc"
>
</File>
<File
RelativePath=".\translate.cc"
>
</File>
</Filter>
<Filter
Name="Header Files"
Filter="h;hpp;hxx;hm;inl;inc;xsd"
UniqueIdentifier="{93995380-89BD-4b04-88EB-625FBE52EBFB}"
>
<File
RelativePath=".\basics.h"
>
</File>
<File
RelativePath=".\code.h"
>
</File>
<File
RelativePath=".\code_names.h"
>
</File>
<File
RelativePath=".\config_w32.h"
>
</File>
<File
RelativePath=".\dfa.h"
>
</File>
<File
RelativePath=".\globals.h"
>
</File>
<File
RelativePath=".\ins.h"
>
</File>
<File
RelativePath=".\mbo_getopt.h"
>
</File>
<File
RelativePath=".\parser.h"
>
</File>
<File
RelativePath=".\re.h"
>
</File>
<File
RelativePath=".\scanner.h"
>
</File>
<File
RelativePath=".\stream_lc.h"
>
</File>
<File
RelativePath=".\substr.h"
>
</File>
<File
RelativePath=".\token.h"
>
</File>
<File
RelativePath=".\y.tab.h"
>
</File>
</Filter>
<File
RelativePath=".\CMakeLists.txt"
>
</File>
</Files>
<Globals>
</Globals>
</VisualStudioProject>

File diff suppressed because it is too large Load diff

View file

@ -1,76 +0,0 @@
/* $Id: scanner.h,v 1.17 2006/02/25 12:57:50 helly Exp $ */
#ifndef _scanner_h
#define _scanner_h
#include <iosfwd>
#include <string>
#include "token.h"
#include "re.h"
#include "globals.h"
namespace re2c
{
class Scanner:
public line_number
{
private:
std::istream& in;
std::ostream& out;
char *bot, *tok, *ptr, *cur, *pos, *lim, *top, *eof;
uint tchar, tline, cline, iscfg;
const char *filename;
private:
char *fill(char*);
Scanner(const Scanner&); //unimplemented
Scanner& operator=(const Scanner&); //unimplemented
public:
Scanner(const char*, std::istream&, std::ostream&);
~Scanner();
int echo();
int scan();
void fatal(const char*) const;
void fatal(uint, const char*) const;
void config(const Str&, int);
void config(const Str&, const Str&);
SubStr token() const;
virtual uint get_line() const;
uint xlat(uint c) const;
uint unescape(SubStr &s) const;
std::string& unescape(SubStr& str_in, std::string& str_out) const;
Range * getRange(SubStr &s) const;
RegExp * matchChar(uint c) const;
RegExp * strToName(SubStr s) const;
RegExp * strToRE(SubStr s) const;
RegExp * strToCaseInsensitiveRE(SubStr s) const;
RegExp * ranToRE(SubStr s) const;
RegExp * invToRE(SubStr s) const;
RegExp * mkDot() const;
};
inline void Scanner::fatal(const char *msg) const
{
fatal(0, msg);
}
inline SubStr Scanner::token() const
{
return SubStr(tok, cur - tok);
}
inline uint Scanner::xlat(uint c) const
{
return re2c::wFlag ? c : re2c::xlat[c & 0xFF];
}
} // end namespace re2c
#endif

View file

@ -1,381 +0,0 @@
/* $Id: scanner.re 663 2007-04-01 11:22:15Z helly $ */
#include <stdlib.h>
#include <string.h>
#include <iostream>
#include <sstream>
#include "scanner.h"
#include "parser.h"
#include "y.tab.h"
#include "globals.h"
#include "dfa.h"
extern YYSTYPE yylval;
#ifndef MAX
#define MAX(a,b) (((a)>(b))?(a):(b))
#endif
#define BSIZE 8192
#define YYCTYPE unsigned char
#define YYCURSOR cursor
#define YYLIMIT lim
#define YYMARKER ptr
#define YYFILL(n) {cursor = fill(cursor);}
#define RETURN(i) {cur = cursor; return i;}
namespace re2c
{
Scanner::Scanner(const char *fn, std::istream& i, std::ostream& o)
: in(i)
, out(o)
, bot(NULL), tok(NULL), ptr(NULL), cur(NULL), pos(NULL), lim(NULL)
, top(NULL), eof(NULL), tchar(0), tline(0), cline(1), iscfg(0), filename(fn)
{
;
}
char *Scanner::fill(char *cursor)
{
if(!eof)
{
uint cnt = tok - bot;
if(cnt)
{
memcpy(bot, tok, lim - tok);
tok = bot;
ptr -= cnt;
cursor -= cnt;
pos -= cnt;
lim -= cnt;
}
if((top - lim) < BSIZE)
{
char *buf = new char[(lim - bot) + BSIZE];
memcpy(buf, tok, lim - tok);
tok = buf;
ptr = &buf[ptr - bot];
cursor = &buf[cursor - bot];
pos = &buf[pos - bot];
lim = &buf[lim - bot];
top = &lim[BSIZE];
delete [] bot;
bot = buf;
}
in.read(lim, BSIZE);
if ((cnt = in.gcount()) != BSIZE )
{
eof = &lim[cnt]; *eof++ = '\0';
}
lim += cnt;
}
return cursor;
}
/*!re2c
zero = "\000";
any = [\000-\377];
dot = any \ [\n];
esc = dot \ [\\];
istring = "[" "^" ((esc \ [\]]) | "\\" dot)* "]" ;
cstring = "[" ((esc \ [\]]) | "\\" dot)* "]" ;
dstring = "\"" ((esc \ ["] ) | "\\" dot)* "\"";
sstring = "'" ((esc \ ['] ) | "\\" dot)* "'" ;
letter = [a-zA-Z];
digit = [0-9];
number = "0" | ("-"? [1-9] digit*);
name = (letter|"_") (letter|digit|"_")*;
cname = ":" name;
space = [ \t];
eol = ("\r\n" | "\n");
config = "re2c" cname+;
value = [^\r\n; \t]* | dstring | sstring;
*/
int Scanner::echo()
{
char *cursor = cur;
bool ignore_eoc = false;
int ignore_cnt = 0;
if (eof && cursor == eof) // Catch EOF
{
return 0;
}
tok = cursor;
echo:
/*!re2c
"/*!re2c" {
if (bUsedYYMaxFill && bSinglePass) {
fatal("found scanner block after YYMAXFILL declaration");
}
out.write((const char*)(tok), (const char*)(&cursor[-7]) - (const char*)(tok));
tok = cursor;
RETURN(1);
}
"/*!max:re2c" {
if (bUsedYYMaxFill) {
fatal("cannot generate YYMAXFILL twice");
}
out << "#define YYMAXFILL " << maxFill << std::endl;
tok = pos = cursor;
ignore_eoc = true;
bUsedYYMaxFill = true;
goto echo;
}
"/*!getstate:re2c" {
tok = pos = cursor;
genGetState(out, topIndent, 0);
ignore_eoc = true;
goto echo;
}
"/*!ignore:re2c" {
tok = pos = cursor;
ignore_eoc = true;
goto echo;
}
"*" "/" "\r"? "\n" {
cline++;
if (ignore_eoc) {
if (ignore_cnt) {
out << sourceFileInfo;
}
ignore_eoc = false;
ignore_cnt = 0;
} else {
out.write((const char*)(tok), (const char*)(cursor) - (const char*)(tok));
}
tok = pos = cursor;
goto echo;
}
"*" "/" {
if (ignore_eoc) {
if (ignore_cnt) {
out << "\n" << sourceFileInfo;
}
ignore_eoc = false;
ignore_cnt = 0;
} else {
out.write((const char*)(tok), (const char*)(cursor) - (const char*)(tok));
}
tok = pos = cursor;
goto echo;
}
"\n" {
if (ignore_eoc) {
ignore_cnt++;
} else {
out.write((const char*)(tok), (const char*)(cursor) - (const char*)(tok));
}
tok = pos = cursor; cline++;
goto echo;
}
zero {
if (!ignore_eoc) {
out.write((const char*)(tok), (const char*)(cursor) - (const char*)(tok) - 1); // -1 so we don't write out the \0
}
if(cursor == eof) {
RETURN(0);
}
}
any {
goto echo;
}
*/
}
int Scanner::scan()
{
char *cursor = cur;
uint depth;
scan:
tchar = cursor - pos;
tline = cline;
tok = cursor;
if (iscfg == 1)
{
goto config;
}
else if (iscfg == 2)
{
goto value;
}
/*!re2c
"{" { depth = 1;
goto code;
}
"/*" { depth = 1;
goto comment; }
"*/" { tok = cursor;
RETURN(0); }
dstring { cur = cursor;
yylval.regexp = strToRE(token());
return STRING; }
sstring { cur = cursor;
yylval.regexp = strToCaseInsensitiveRE(token());
return STRING; }
"\"" { fatal("unterminated string constant (missing \")"); }
"'" { fatal("unterminated string constant (missing ')"); }
istring { cur = cursor;
yylval.regexp = invToRE(token());
return RANGE; }
cstring { cur = cursor;
yylval.regexp = ranToRE(token());
return RANGE; }
"[" { fatal("unterminated range (missing ])"); }
[()|=;/\\] { RETURN(*tok); }
[*+?] { yylval.op = *tok;
RETURN(CLOSE); }
"{0,}" { yylval.op = '*';
RETURN(CLOSE); }
"{" [0-9]+ "}" { yylval.extop.minsize = atoi((char *)tok+1);
yylval.extop.maxsize = atoi((char *)tok+1);
RETURN(CLOSESIZE); }
"{" [0-9]+ "," [0-9]+ "}" { yylval.extop.minsize = atoi((char *)tok+1);
yylval.extop.maxsize = MAX(yylval.extop.minsize,atoi(strchr((char *)tok, ',')+1));
RETURN(CLOSESIZE); }
"{" [0-9]+ ",}" { yylval.extop.minsize = atoi((char *)tok+1);
yylval.extop.maxsize = -1;
RETURN(CLOSESIZE); }
"{" [0-9]* "," { fatal("illegal closure form, use '{n}', '{n,}', '{n,m}' where n and m are numbers"); }
config { cur = cursor;
tok+= 5; /* skip "re2c:" */
iscfg = 1;
yylval.str = new Str(token());
return CONFIG;
}
name { cur = cursor;
yylval.symbol = Symbol::find(token());
return ID; }
"." { cur = cursor;
yylval.regexp = mkDot();
return RANGE;
}
space+ { goto scan; }
eol { if(cursor == eof) RETURN(0);
pos = cursor; cline++;
goto scan;
}
any { std::ostringstream msg;
msg << "unexpected character: ";
prtChOrHex(msg, *tok);
fatal(msg.str().c_str());
goto scan;
}
*/
code:
/*!re2c
"}" { if(--depth == 0){
cur = cursor;
yylval.token = new Token(token(), tline);
return CODE;
}
goto code; }
"{" { ++depth;
goto code; }
"\n" { if(cursor == eof) fatal("missing '}'");
pos = cursor; cline++;
goto code;
}
zero { if(cursor == eof) {
if (depth) fatal("missing '}'");
RETURN(0);
}
goto code;
}
dstring | sstring | any { goto code; }
*/
comment:
/*!re2c
"*/" { if(--depth == 0)
goto scan;
else
goto comment; }
"/*" { ++depth;
fatal("ambiguous /* found");
goto comment; }
"\n" { if(cursor == eof) RETURN(0);
tok = pos = cursor; cline++;
goto comment;
}
any { if(cursor == eof) RETURN(0);
goto comment; }
*/
config:
/*!re2c
space+ { goto config; }
"=" space* { iscfg = 2;
cur = cursor;
RETURN('=');
}
any { fatal("missing '='"); }
*/
value:
/*!re2c
number { cur = cursor;
yylval.number = atoi(token().to_string().c_str());
iscfg = 0;
return NUMBER;
}
value { cur = cursor;
yylval.str = new Str(token());
iscfg = 0;
return VALUE;
}
*/
}
void Scanner::fatal(uint ofs, const char *msg) const
{
out.flush();
#ifdef _MSC_VER
std::cerr << filename << "(" << tline << "): error : "
<< "column " << (tchar + ofs + 1) << ": "
<< msg << std::endl;
#else
std::cerr << "re2c: error: "
<< "line " << tline << ", column " << (tchar + ofs + 1) << ": "
<< msg << std::endl;
#endif
exit(1);
}
Scanner::~Scanner()
{
if (bot)
{
delete [] bot;
}
}
} // end namespace re2c

View file

@ -0,0 +1,168 @@
#include <algorithm> // min
#include <string.h> // memset
#include "src/codegen/bitmap.h"
#include "src/codegen/go.h"
#include "src/codegen/output.h"
#include "src/conf/opt.h"
#include "src/globals.h"
namespace re2c
{
BitMap *BitMap::first = NULL;
BitMap::BitMap(const Go *g, const State *x)
: go(g)
, on(x)
, next(first)
, i(0)
, m(0)
{
first = this;
}
BitMap::~BitMap()
{
delete next;
}
const BitMap *BitMap::find(const Go *g, const State *x)
{
for (const BitMap *b = first; b; b = b->next)
{
if (matches(b->go->span, b->go->nSpans, b->on, g->span, g->nSpans, x))
{
return b;
}
}
return new BitMap(g, x);
}
const BitMap *BitMap::find(const State *x)
{
for (const BitMap *b = first; b; b = b->next)
{
if (b->on == x)
{
return b;
}
}
return NULL;
}
static void doGen(const Go *g, const State *s, uint32_t *bm, uint32_t f, uint32_t m)
{
Span *b = g->span, *e = &b[g->nSpans];
uint32_t lb = 0;
for (; b < e; ++b)
{
if (b->to == s)
{
for (; lb < b->ub && lb < 256; ++lb)
{
bm[lb-f] |= m;
}
}
lb = b->ub;
}
}
void BitMap::gen(OutputFile & o, uint32_t ind, uint32_t lb, uint32_t ub)
{
if (first && bUsedYYBitmap)
{
o.wind(ind).ws("static const unsigned char ").wstring(opts->yybm).ws("[] = {");
uint32_t c = 1, n = ub - lb;
const BitMap *cb = first;
while((cb = cb->next) != NULL) {
++c;
}
BitMap *b = first;
uint32_t *bm = new uint32_t[n];
for (uint32_t i = 0, t = 1; b; i += n, t += 8)
{
memset(bm, 0, n * sizeof(uint32_t));
for (uint32_t m = 0x80; b && m; m >>= 1)
{
b->i = i;
b->m = m;
doGen(b->go, b->on, bm, lb, m);
b = const_cast<BitMap*>(b->next);
}
if (c > 8)
{
o.ws("\n").wind(ind+1).ws("/* table ").wu32(t).ws(" .. ").wu32(std::min(c, t+7)).ws(": ").wu32(i).ws(" */");
}
for (uint32_t j = 0; j < n; ++j)
{
if (j % 8 == 0)
{
o.ws("\n").wind(ind+1);
}
if (opts->yybmHexTable)
{
o.wu32_hex(bm[j]);
}
else
{
o.wu32_width(bm[j], 3);
}
o.ws(", ");
}
}
o.ws("\n").wind(ind).ws("};\n");
delete[] bm;
}
}
// All spans in b1 that lead to s1 are pairwise equal to that in b2 leading to s2
bool matches(const Span * b1, uint32_t n1, const State * s1, const Span * b2, uint32_t n2, const State * s2)
{
const Span * e1 = &b1[n1];
uint32_t lb1 = 0;
const Span * e2 = &b2[n2];
uint32_t lb2 = 0;
for (;;)
{
for (; b1 < e1 && b1->to != s1; ++b1)
{
lb1 = b1->ub;
}
for (; b2 < e2 && b2->to != s2; ++b2)
{
lb2 = b2->ub;
}
if (b1 == e1)
{
return b2 == e2;
}
if (b2 == e2)
{
return false;
}
if (lb1 != lb2 || b1->ub != b2->ub)
{
return false;
}
++b1;
++b2;
}
}
} // end namespace re2c

View file

@ -0,0 +1,45 @@
#ifndef _RE2C_CODEGEN_BITMAP_
#define _RE2C_CODEGEN_BITMAP_
#include "src/util/c99_stdint.h"
#include "src/util/forbid_copy.h"
namespace re2c
{
struct Go;
struct Span;
class State;
class OutputFile;
class BitMap
{
public:
static BitMap *first;
const Go *go;
const State *on;
const BitMap *next;
uint32_t i;
uint32_t m;
public:
static const BitMap *find(const Go*, const State*);
static const BitMap *find(const State*);
static void gen(OutputFile &, uint32_t ind, uint32_t, uint32_t);
BitMap(const Go*, const State*);
~BitMap();
FORBID_COPY (BitMap);
};
bool matches(const Span * b1, uint32_t n1, const State * s1, const Span * b2, uint32_t n2, const State * s2);
#ifdef _MSC_VER
# pragma warning(disable: 4355) /* 'this' : used in base member initializer list */
#endif
} // end namespace re2c
#endif // _RE2C_CODEGEN_BITMAP_

View file

@ -0,0 +1,43 @@
#ifndef _RE2C_CODEGEN_EMIT_
#define _RE2C_CODEGEN_EMIT_
#include "src/codegen/output.h"
#include "src/ir/adfa/adfa.h"
namespace re2c {
typedef std::vector<std::string> RegExpIndices;
void emit_action
( const Action & action
, OutputFile & o
, uint32_t ind
, bool & readCh
, const State * const s
, const std::string & condName
, const Skeleton * skeleton
, const std::set<label_t> & used_labels
, bool save_yyaccept
);
// helpers
void genGoTo (OutputFile & o, uint32_t ind, const State * from, const State * to, bool & readCh);
template<typename _Ty> std::string replaceParam (std::string str, const std::string & param, const _Ty & value)
{
if (!param.empty ())
{
std::ostringstream strValue;
strValue << value;
std::string::size_type pos;
while((pos = str.find(param)) != std::string::npos)
{
str.replace(pos, param.length(), strValue.str());
}
}
return str;
}
} // namespace re2c
#endif // _RE2C_CODEGEN_EMIT_

View file

@ -0,0 +1,388 @@
#include "src/util/c99_stdint.h"
#include <stddef.h>
#include <set>
#include <string>
#include "src/codegen/emit.h"
#include "src/codegen/input_api.h"
#include "src/codegen/output.h"
#include "src/conf/opt.h"
#include "src/globals.h"
#include "src/ir/adfa/action.h"
#include "src/ir/adfa/adfa.h"
#include "src/ir/regexp/regexp.h"
#include "src/ir/regexp/regexp_rule.h"
#include "src/ir/skeleton/skeleton.h"
#include "src/parse/code.h"
#include "src/parse/loc.h"
namespace re2c
{
class label_t;
static void need (OutputFile & o, uint32_t ind, bool & readCh, size_t n, bool bSetMarker);
static void emit_match (OutputFile & o, uint32_t ind, bool & readCh, const State * const s);
static void emit_initial (OutputFile & o, uint32_t ind, bool & readCh, const State * const s, const Initial & init, const std::set<label_t> & used_labels);
static void emit_save (OutputFile & o, uint32_t ind, bool & readCh, const State * const s, uint32_t save, bool save_yyaccept);
static void emit_accept_binary (OutputFile & o, uint32_t ind, bool & readCh, const State * const s, const accept_t & accept, size_t l, size_t r);
static void emit_accept (OutputFile & o, uint32_t ind, bool & readCh, const State * const s, const accept_t & accept);
static void emit_rule (OutputFile & o, uint32_t ind, const State * const s, const RuleOp * const rule, const std::string & condName, const Skeleton * skeleton);
static void genYYFill (OutputFile & o, size_t need);
static void genSetCondition (OutputFile & o, uint32_t ind, const std::string & newcond);
static void genSetState (OutputFile & o, uint32_t ind, uint32_t fillIndex);
void emit_action
( const Action & action
, OutputFile & o
, uint32_t ind
, bool & readCh
, const State * const s
, const std::string & condName
, const Skeleton * skeleton
, const std::set<label_t> & used_labels
, bool save_yyaccept
)
{
switch (action.type)
{
case Action::MATCH:
emit_match (o, ind, readCh, s);
break;
case Action::INITIAL:
emit_initial (o, ind, readCh, s, * action.info.initial, used_labels);
break;
case Action::SAVE:
emit_save (o, ind, readCh, s, action.info.save, save_yyaccept);
break;
case Action::MOVE:
break;
case Action::ACCEPT:
emit_accept (o, ind, readCh, s, * action.info.accepts);
break;
case Action::RULE:
emit_rule (o, ind, s, action.info.rule, condName, skeleton);
break;
}
if (s->isPreCtxt && opts->target != opt_t::DOT)
{
o.wstring(opts->input_api.stmt_backupctx (ind));
}
}
void emit_match (OutputFile & o, uint32_t ind, bool & readCh, const State * const s)
{
if (opts->target == opt_t::DOT)
{
return;
}
const bool read_ahead = s
&& s->next
&& s->next->action.type != Action::RULE;
if (s->fill != 0)
{
o.wstring(opts->input_api.stmt_skip (ind));
}
else if (!read_ahead)
{
/* do not read next char if match */
o.wstring(opts->input_api.stmt_skip (ind));
readCh = true;
}
else
{
o.wstring(opts->input_api.stmt_skip_peek (ind));
readCh = false;
}
if (s->fill != 0)
{
need(o, ind, readCh, s->fill, false);
}
}
void emit_initial (OutputFile & o, uint32_t ind, bool & readCh, const State * const s, const Initial & initial, const std::set<label_t> & used_labels)
{
if (opts->target == opt_t::DOT)
{
return;
}
if (used_labels.count(s->label))
{
if (s->fill != 0)
{
o.wstring(opts->input_api.stmt_skip (ind));
}
else
{
o.wstring(opts->input_api.stmt_skip_peek (ind));
}
}
if (used_labels.count(initial.label))
{
o.wstring(opts->labelPrefix).wlabel(initial.label).ws(":\n");
}
if (opts->dFlag)
{
o.wind(ind).wstring(opts->yydebug).ws("(").wlabel(initial.label).ws(", *").wstring(opts->yycursor).ws(");\n");
}
if (s->fill != 0)
{
need(o, ind, readCh, s->fill, initial.setMarker);
}
else
{
if (initial.setMarker)
{
o.wstring(opts->input_api.stmt_backup (ind));
}
readCh = false;
}
}
void emit_save (OutputFile & o, uint32_t ind, bool & readCh, const State * const s, uint32_t save, bool save_yyaccept)
{
if (opts->target == opt_t::DOT)
{
return;
}
if (save_yyaccept)
{
o.wind(ind).wstring(opts->yyaccept).ws(" = ").wu32(save).ws(";\n");
}
if (s->fill != 0)
{
o.wstring(opts->input_api.stmt_skip_backup (ind));
need(o, ind, readCh, s->fill, false);
}
else
{
o.wstring(opts->input_api.stmt_skip_backup_peek (ind));
readCh = false;
}
}
void emit_accept_binary (OutputFile & o, uint32_t ind, bool & readCh, const State * const s, const accept_t & accepts, size_t l, size_t r)
{
if (l < r)
{
const size_t m = (l + r) >> 1;
o.wind(ind).ws("if (").wstring(opts->yyaccept).ws(r == l+1 ? " == " : " <= ").wu64(m).ws(") {\n");
emit_accept_binary (o, ++ind, readCh, s, accepts, l, m);
o.wind(--ind).ws("} else {\n");
emit_accept_binary (o, ++ind, readCh, s, accepts, m + 1, r);
o.wind(--ind).ws("}\n");
}
else
{
genGoTo(o, ind, s, accepts[l], readCh);
}
}
void emit_accept (OutputFile & o, uint32_t ind, bool & readCh, const State * const s, const accept_t & accepts)
{
const size_t accepts_size = accepts.size ();
if (accepts_size > 0)
{
if (opts->target != opt_t::DOT)
{
o.wstring(opts->input_api.stmt_restore (ind));
}
if (readCh) // shouldn't be necessary, but might become at some point
{
o.wstring(opts->input_api.stmt_peek (ind));
readCh = false;
}
if (accepts_size > 1)
{
if (opts->gFlag && accepts_size >= opts->cGotoThreshold)
{
o.wind(ind++).ws("{\n");
o.wind(ind++).ws("static void *").wstring(opts->yytarget).ws("[").wu64(accepts_size).ws("] = {\n");
for (uint32_t i = 0; i < accepts_size; ++i)
{
o.wind(ind).ws("&&").wstring(opts->labelPrefix).wlabel(accepts[i]->label).ws(",\n");
}
o.wind(--ind).ws("};\n");
o.wind(ind).ws("goto *").wstring(opts->yytarget).ws("[").wstring(opts->yyaccept).ws("];\n");
o.wind(--ind).ws("}\n");
}
else if (opts->sFlag || (accepts_size == 2 && opts->target != opt_t::DOT))
{
emit_accept_binary (o, ind, readCh, s, accepts, 0, accepts_size - 1);
}
else if (opts->target == opt_t::DOT)
{
for (uint32_t i = 0; i < accepts_size; ++i)
{
o.wlabel(s->label).ws(" -> ").wlabel(accepts[i]->label);
o.ws(" [label=\"yyaccept=").wu32(i).ws("\"]\n");
}
}
else
{
o.wind(ind).ws("switch (").wstring(opts->yyaccept).ws(") {\n");
for (uint32_t i = 0; i < accepts_size - 1; ++i)
{
o.wind(ind).ws("case ").wu32(i).ws(": \t");
genGoTo(o, 0, s, accepts[i], readCh);
}
o.wind(ind).ws("default:\t");
genGoTo(o, 0, s, accepts[accepts_size - 1], readCh);
o.wind(ind).ws("}\n");
}
}
else
{
// no need to write if statement here since there is only case 0.
genGoTo(o, ind, s, accepts[0], readCh);
}
}
}
void emit_rule (OutputFile & o, uint32_t ind, const State * const s, const RuleOp * const rule, const std::string & condName, const Skeleton * skeleton)
{
if (opts->target == opt_t::DOT)
{
o.wlabel(s->label);
if (rule->code)
{
o.ws(" [label=\"").wstring(rule->code->loc.filename).ws(":").wu32(rule->code->loc.line).ws("\"]");
}
o.ws("\n");
return;
}
uint32_t back = rule->ctx->fixedLength();
if (back != 0u && opts->target != opt_t::DOT)
{
o.wstring(opts->input_api.stmt_restorectx (ind));
}
if (opts->target == opt_t::SKELETON)
{
skeleton->emit_action (o, ind, rule->rank);
}
else
{
if (!rule->newcond.empty () && condName != rule->newcond)
{
genSetCondition(o, ind, rule->newcond);
}
if (rule->code)
{
if (!yySetupRule.empty ())
{
o.wind(ind).wstring(yySetupRule).ws("\n");
}
o.wline_info(rule->code->loc.line, rule->code->loc.filename.c_str ())
.wind(ind).wstring(rule->code->text).ws("\n")
.wdelay_line_info ();
}
else if (!rule->newcond.empty ())
{
o.wind(ind).wstring(replaceParam(opts->condGoto, opts->condGotoParam, opts->condPrefix + rule->newcond)).ws("\n");
}
}
}
void need (OutputFile & o, uint32_t ind, bool & readCh, size_t n, bool bSetMarker)
{
if (opts->target == opt_t::DOT)
{
return;
}
uint32_t fillIndex = last_fill_index;
if (opts->fFlag)
{
last_fill_index++;
genSetState (o, ind, fillIndex);
}
if (opts->fill_use && n > 0)
{
o.wind(ind);
if (n == 1)
{
if (opts->fill_check)
{
o.ws("if (").wstring(opts->input_api.expr_lessthan_one ()).ws(") ");
}
genYYFill(o, n);
}
else
{
if (opts->fill_check)
{
o.ws("if (").wstring(opts->input_api.expr_lessthan (n)).ws(") ");
}
genYYFill(o, n);
}
}
if (opts->fFlag)
{
o.wstring(opts->yyfilllabel).wu32(fillIndex).ws(":\n");
}
if (n > 0)
{
if (bSetMarker)
{
o.wstring(opts->input_api.stmt_backup_peek (ind));
}
else
{
o.wstring(opts->input_api.stmt_peek (ind));
}
readCh = false;
}
}
void genYYFill (OutputFile & o, size_t need)
{
o.wstring(replaceParam (opts->fill, opts->fill_arg, need));
if (!opts->fill_naked)
{
if (opts->fill_arg_use)
{
o.ws("(").wu64(need).ws(")");
}
o.ws(";");
}
o.ws("\n");
}
void genSetCondition(OutputFile & o, uint32_t ind, const std::string& newcond)
{
o.wind(ind).wstring(replaceParam (opts->cond_set, opts->cond_set_arg, opts->condEnumPrefix + newcond));
if (!opts->cond_set_naked)
{
o.ws("(").wstring(opts->condEnumPrefix).wstring(newcond).ws(");");
}
o.ws("\n");
}
void genSetState(OutputFile & o, uint32_t ind, uint32_t fillIndex)
{
o.wind(ind).wstring(replaceParam (opts->state_set, opts->state_set_arg, fillIndex));
if (!opts->state_set_naked)
{
o.ws("(").wu32(fillIndex).ws(");");
}
o.ws("\n");
}
} // namespace re2c

View file

@ -0,0 +1,348 @@
#include "src/util/c99_stdint.h"
#include <stddef.h>
#include <set>
#include <string>
#include <utility>
#include <vector>
#include "src/codegen/bitmap.h"
#include "src/codegen/emit.h"
#include "src/codegen/go.h"
#include "src/codegen/input_api.h"
#include "src/codegen/label.h"
#include "src/codegen/output.h"
#include "src/conf/opt.h"
#include "src/globals.h"
#include "src/ir/adfa/action.h"
#include "src/ir/adfa/adfa.h"
#include "src/ir/skeleton/skeleton.h"
#include "src/util/counter.h"
namespace re2c
{
static std::string genGetCondition ();
static void genCondGotoSub (OutputFile & o, uint32_t ind, const std::vector<std::string> & condnames, uint32_t cMin, uint32_t cMax);
static void genCondTable (OutputFile & o, uint32_t ind, const std::vector<std::string> & condnames);
static void genCondGoto (OutputFile & o, uint32_t ind, const std::vector<std::string> & condnames);
static void emit_state (OutputFile & o, uint32_t ind, const State * s, bool used_label);
std::string genGetCondition()
{
return opts->cond_get_naked
? opts->cond_get
: opts->cond_get + "()";
}
void genGoTo(OutputFile & o, uint32_t ind, const State *from, const State *to, bool & readCh)
{
if (opts->target == opt_t::DOT)
{
o.wlabel(from->label).ws(" -> ").wlabel(to->label).ws("\n");
return;
}
if (readCh && from->next != to)
{
o.wstring(opts->input_api.stmt_peek (ind));
readCh = false;
}
o.wind(ind).ws("goto ").wstring(opts->labelPrefix).wlabel(to->label).ws(";\n");
}
void emit_state (OutputFile & o, uint32_t ind, const State * s, bool used_label)
{
if (opts->target != opt_t::DOT)
{
if (used_label)
{
o.wstring(opts->labelPrefix).wlabel(s->label).ws(":\n");
}
if (opts->dFlag && (s->action.type != Action::INITIAL))
{
o.wind(ind).wstring(opts->yydebug).ws("(").wlabel(s->label).ws(", ").wstring(opts->input_api.expr_peek ()).ws(");\n");
}
}
}
void DFA::count_used_labels (std::set<label_t> & used, label_t start, label_t initial, bool force_start) const
{
// In '-f' mode, default state is always state 0
if (opts->fFlag)
{
used.insert (label_t::first ());
}
if (force_start)
{
used.insert (start);
}
for (State * s = head; s; s = s->next)
{
s->go.used_labels (used);
}
for (uint32_t i = 0; i < accepts.size (); ++i)
{
used.insert (accepts[i]->label);
}
// must go last: it needs the set of used labels
if (used.count (head->label))
{
used.insert (initial);
}
}
void DFA::emit_body (OutputFile & o, uint32_t& ind, const std::set<label_t> & used_labels, label_t initial) const
{
// If DFA has transitions to initial state, then initial state
// has a piece of code that advances input position. Wee must
// skip it when entering DFA.
if (used_labels.count(head->label))
{
o.wind(ind).ws("goto ").wstring(opts->labelPrefix).wlabel(initial).ws(";\n");
}
const bool save_yyaccept = accepts.size () > 1;
for (State * s = head; s; s = s->next)
{
bool readCh = false;
emit_state (o, ind, s, used_labels.count (s->label));
emit_action (s->action, o, ind, readCh, s, cond, skeleton, used_labels, save_yyaccept);
s->go.emit(o, ind, readCh);
}
}
void DFA::emit(Output & output, uint32_t& ind, bool isLastCond, bool& bPrologBrace)
{
OutputFile & o = output.source;
bool bProlog = (!opts->cFlag || !bWroteCondCheck);
// start_label points to the beginning of current re2c block
// (prior to condition dispatch in '-c' mode)
// it can forced by configuration 're2c:startlabel = <integer>;'
label_t start_label = o.label_counter.next ();
// initial_label points to the beginning of DFA
// in '-c' mode this is NOT equal to start_label
label_t initial_label = bProlog && opts->cFlag
? o.label_counter.next ()
: start_label;
for (State * s = head; s; s = s->next)
{
s->label = o.label_counter.next ();
}
std::set<label_t> used_labels;
count_used_labels (used_labels, start_label, initial_label, o.get_force_start_label ());
head->action.set_initial (initial_label, head->action.type == Action::SAVE);
skeleton->warn_undefined_control_flow ();
skeleton->warn_unreachable_rules ();
skeleton->warn_match_empty ();
if (opts->target == opt_t::SKELETON)
{
if (output.skeletons.insert (name).second)
{
skeleton->emit_data (o.file_name);
skeleton->emit_start (o, max_fill, need_backup, need_backupctx, need_accept);
uint32_t i = 2;
emit_body (o, i, used_labels, initial_label);
skeleton->emit_end (o, need_backup, need_backupctx);
}
}
else
{
// Generate prolog
if (bProlog)
{
o.ws("\n").wdelay_line_info ();
if (opts->target == opt_t::DOT)
{
bPrologBrace = true;
o.ws("digraph re2c {\n");
}
else if ((!opts->fFlag && o.get_used_yyaccept ())
|| (!opts->fFlag && opts->bEmitYYCh)
|| (opts->bFlag && !opts->cFlag && BitMap::first)
|| (opts->cFlag && !bWroteCondCheck && opts->gFlag)
|| (opts->fFlag && !bWroteGetState && opts->gFlag)
)
{
bPrologBrace = true;
o.wind(ind++).ws("{\n");
}
else if (ind == 0)
{
ind = 1;
}
if (!opts->fFlag && opts->target != opt_t::DOT)
{
if (opts->bEmitYYCh)
{
o.wind(ind).wstring(opts->yyctype).ws(" ").wstring(opts->yych).ws(";\n");
}
o.wdelay_yyaccept_init (ind);
}
else
{
o.ws("\n");
}
}
if (opts->bFlag && !opts->cFlag && BitMap::first)
{
BitMap::gen(o, ind, lbChar, ubChar <= 256 ? ubChar : 256);
}
if (bProlog)
{
if (opts->cFlag && !bWroteCondCheck && opts->gFlag)
{
genCondTable(o, ind, output.types);
}
o.wdelay_state_goto (ind);
if (opts->cFlag && opts->target != opt_t::DOT)
{
if (used_labels.count(start_label))
{
o.wstring(opts->labelPrefix).wlabel(start_label).ws(":\n");
}
}
o.wuser_start_label ();
if (opts->cFlag && !bWroteCondCheck)
{
genCondGoto(o, ind, output.types);
}
}
if (opts->cFlag && !cond.empty())
{
if (opts->condDivider.length())
{
o.wstring(replaceParam(opts->condDivider, opts->condDividerParam, cond)).ws("\n");
}
if (opts->target == opt_t::DOT)
{
o.wstring(cond).ws(" -> ").wlabel(head->label).ws("\n");
}
else
{
o.wstring(opts->condPrefix).wstring(cond).ws(":\n");
}
}
if (opts->cFlag && opts->bFlag && BitMap::first)
{
o.wind(ind++).ws("{\n");
BitMap::gen(o, ind, lbChar, ubChar <= 256 ? ubChar : 256);
}
// Generate code
emit_body (o, ind, used_labels, initial_label);
if (opts->cFlag && opts->bFlag && BitMap::first)
{
o.wind(--ind).ws("}\n");
}
// Generate epilog
if ((!opts->cFlag || isLastCond) && bPrologBrace)
{
o.wind(--ind).ws("}\n");
}
}
// Cleanup
if (BitMap::first)
{
delete BitMap::first;
BitMap::first = NULL;
}
}
void genCondTable(OutputFile & o, uint32_t ind, const std::vector<std::string> & condnames)
{
const size_t conds = condnames.size ();
o.wind(ind++).ws("static void *").wstring(opts->yyctable).ws("[").wu64(conds).ws("] = {\n");
for (size_t i = 0; i < conds; ++i)
{
o.wind(ind).ws("&&").wstring(opts->condPrefix).wstring(condnames[i]).ws(",\n");
}
o.wind(--ind).ws("};\n");
}
void genCondGotoSub(OutputFile & o, uint32_t ind, const std::vector<std::string> & condnames, uint32_t cMin, uint32_t cMax)
{
if (cMin == cMax)
{
o.wind(ind).ws("goto ").wstring(opts->condPrefix).wstring(condnames[cMin]).ws(";\n");
}
else
{
uint32_t cMid = cMin + ((cMax - cMin + 1) / 2);
o.wind(ind).ws("if (").wstring(genGetCondition()).ws(" < ").wu32(cMid).ws(") {\n");
genCondGotoSub(o, ind + 1, condnames, cMin, cMid - 1);
o.wind(ind).ws("} else {\n");
genCondGotoSub(o, ind + 1, condnames, cMid, cMax);
o.wind(ind).ws("}\n");
}
}
/*
* note [condition order]
*
* In theory re2c makes no guarantee about the order of conditions in
* the generated lexer. Users should define condition type 'YYCONDTYPE'
* and use values of this type with 'YYGETCONDITION' and 'YYSETCONDITION'.
* This way code is independent of internal re2c condition numbering.
*
* However, it is possible to manually hardcode condition numbers and make
* re2c generate condition dispatch without explicit use of condition names
* (nested 'if' statements with '-b' or computed 'goto' table with '-g').
* This code is syntactically valid (compiles), but unsafe:
* - change of re2c options may break compilation
* - change of internal re2c condition numbering may break runtime
*
* re2c has to preserve the existing numbering scheme.
*
* re2c warns about implicit assumptions about condition order, unless:
* - condition type is defined with 'types:re2c' or '-t, --type-header'
* - dispatch is independent of condition order: either it uses
* explicit condition names or there's only one condition and
* dispatch shrinks to unconditional jump
*/
void genCondGoto(OutputFile & o, uint32_t ind, const std::vector<std::string> & condnames)
{
const size_t conds = condnames.size ();
if (opts->target == opt_t::DOT)
{
o.warn_condition_order = false; // see note [condition order]
for (size_t i = 0; i < conds; ++i)
{
const std::string cond = condnames[i];
o.ws("0 -> ").wstring(cond).ws(" [label=\"state=").wstring(cond).ws("\"]\n");
}
}
else if (opts->gFlag)
{
o.wind(ind).ws("goto *").wstring(opts->yyctable).ws("[").wstring(genGetCondition()).ws("];\n");
}
else if (opts->sFlag)
{
if (conds == 1)
{
o.warn_condition_order = false; // see note [condition order]
}
genCondGotoSub(o, ind, condnames, 0, static_cast<uint32_t> (conds) - 1);
}
else
{
o.warn_condition_order = false; // see note [condition order]
o.wind(ind).ws("switch (").wstring(genGetCondition()).ws(") {\n");
for (size_t i = 0; i < conds; ++i)
{
const std::string & cond = condnames[i];
o.wind(ind).ws("case ").wstring(opts->condEnumPrefix).wstring(cond).ws(": goto ").wstring(opts->condPrefix).wstring(cond).ws(";\n");
}
o.wind(ind).ws("}\n");
}
o.wdelay_warn_condition_order ();
bWroteCondCheck = true;
}
} // end namespace re2c

216
tools/re2c/src/codegen/go.h Normal file
View file

@ -0,0 +1,216 @@
#ifndef _RE2C_CODEGEN_GO_
#define _RE2C_CODEGEN_GO_
#include <iostream>
#include <set>
#include <vector>
#include "src/codegen/output.h"
#include "src/util/c99_stdint.h"
#include "src/util/forbid_copy.h"
namespace re2c
{
class BitMap;
class State;
struct If;
struct Span
{
uint32_t ub;
State * to;
FORBID_COPY (Span);
};
struct Case
{
std::vector<std::pair<uint32_t, uint32_t> > ranges;
const State * to;
void emit (OutputFile & o, uint32_t ind);
inline Case ()
: ranges ()
, to (NULL)
{}
FORBID_COPY (Case);
};
struct Cases
{
const State * def;
Case * cases;
uint32_t cases_size;
void add (uint32_t lb, uint32_t ub, State * to);
Cases (const Span * s, uint32_t n);
~Cases ();
void emit (OutputFile & o, uint32_t ind, bool & readCh);
void used_labels (std::set<label_t> & used);
FORBID_COPY (Cases);
};
struct Cond
{
std::string compare;
uint32_t value;
Cond (const std::string & cmp, uint32_t val);
};
struct Binary
{
Cond * cond;
If * thn;
If * els;
Binary (const Span * s, uint32_t n, const State * next);
~Binary ();
void emit (OutputFile & o, uint32_t ind, bool & readCh);
void used_labels (std::set<label_t> & used);
FORBID_COPY (Binary);
};
struct Linear
{
std::vector<std::pair<const Cond *, const State *> > branches;
Linear (const Span * s, uint32_t n, const State * next);
~Linear ();
void emit (OutputFile & o, uint32_t ind, bool & readCh);
void used_labels (std::set<label_t> & used);
};
struct If
{
enum type_t
{
BINARY,
LINEAR
} type;
union
{
Binary * binary;
Linear * linear;
} info;
If (type_t t, const Span * sp, uint32_t nsp, const State * next);
~If ();
void emit (OutputFile & o, uint32_t ind, bool & readCh);
void used_labels (std::set<label_t> & used);
};
struct SwitchIf
{
enum
{
SWITCH,
IF
} type;
union
{
Cases * cases;
If * ifs;
} info;
SwitchIf (const Span * sp, uint32_t nsp, const State * next);
~SwitchIf ();
void emit (OutputFile & o, uint32_t ind, bool & readCh);
void used_labels (std::set<label_t> & used);
};
struct GoBitmap
{
const BitMap * bitmap;
const State * bitmap_state;
SwitchIf * hgo;
SwitchIf * lgo;
GoBitmap (const Span * span, uint32_t nSpans, const Span * hspan, uint32_t hSpans, const BitMap * bm, const State * bm_state, const State * next);
~GoBitmap ();
void emit (OutputFile & o, uint32_t ind, bool & readCh);
void used_labels (std::set<label_t> & used);
FORBID_COPY (GoBitmap);
};
struct CpgotoTable
{
static const uint32_t TABLE_SIZE;
const State ** table;
CpgotoTable (const Span * span, uint32_t nSpans);
~CpgotoTable ();
void emit (OutputFile & o, uint32_t ind);
void used_labels (std::set<label_t> & used);
private:
label_t max_label () const;
FORBID_COPY (CpgotoTable);
};
struct Cpgoto
{
SwitchIf * hgo;
CpgotoTable * table;
Cpgoto (const Span * span, uint32_t nSpans, const Span * hspan, uint32_t hSpans, const State * next);
~Cpgoto ();
void emit (OutputFile & o, uint32_t ind, bool & readCh);
void used_labels (std::set<label_t> & used);
FORBID_COPY (Cpgoto);
};
struct Dot
{
const State * from;
Cases * cases;
Dot (const Span * sp, uint32_t nsp, const State * from);
~Dot ();
void emit (OutputFile & o);
FORBID_COPY (Dot);
};
struct Go
{
uint32_t nSpans; // number of spans
Span * span;
enum
{
EMPTY,
SWITCH_IF,
BITMAP,
CPGOTO,
DOT
} type;
union
{
SwitchIf * switchif;
GoBitmap * bitmap;
Cpgoto * cpgoto;
Dot * dot;
} info;
Go ();
~Go ();
void init (const State * from);
void emit (OutputFile & o, uint32_t ind, bool & readCh);
void used_labels (std::set<label_t> & used);
Go (const Go & g)
: nSpans (g.nSpans)
, span (g.span)
, type (g.type)
, info (g.info)
{}
Go & operator = (const Go & g)
{
nSpans = g.nSpans;
span = g.span;
type = g.type;
info = g.info;
return * this;
}
};
} // namespace re2c
#endif // _RE2C_CODEGEN_GO_

View file

@ -0,0 +1,284 @@
#include <stddef.h>
#include "src/util/c99_stdint.h"
#include <string>
#include <utility>
#include <vector>
#include "src/codegen/bitmap.h"
#include "src/codegen/go.h"
#include "src/conf/opt.h"
#include "src/globals.h"
#include "src/ir/adfa/adfa.h"
#include "src/util/allocate.h"
namespace re2c
{
static uint32_t unmap (Span * new_span, const Span * old_span, uint32_t old_nspans, const State * x);
Cases::Cases (const Span * span, uint32_t span_size)
: def (span_size == 0 ? NULL : span[span_size - 1].to)
, cases (new Case[span_size])
, cases_size (0)
{
for (uint32_t i = 0, lb = 0; i < span_size; ++ i)
{
add (lb, span[i].ub, span[i].to);
lb = span[i].ub;
}
}
void Cases::add (uint32_t lb, uint32_t ub, State * to)
{
for (uint32_t i = 0; i < cases_size; ++i)
{
if (cases[i].to == to)
{
cases[i].ranges.push_back (std::make_pair (lb, ub));
return;
}
}
cases[cases_size].ranges.push_back (std::make_pair (lb, ub));
cases[cases_size].to = to;
++cases_size;
}
Cond::Cond (const std::string & cmp, uint32_t val)
: compare (cmp)
, value (val)
{}
Binary::Binary (const Span * s, uint32_t n, const State * next)
: cond (NULL)
, thn (NULL)
, els (NULL)
{
const uint32_t l = n / 2;
const uint32_t h = n - l;
cond = new Cond ("<=", s[l - 1].ub - 1);
thn = new If (l > 4 ? If::BINARY : If::LINEAR, &s[0], l, next);
els = new If (h > 4 ? If::BINARY : If::LINEAR, &s[l], h, next);
}
Linear::Linear (const Span * s, uint32_t n, const State * next)
: branches ()
{
for (;;)
{
const State *bg = s[0].to;
while (n >= 3 && s[2].to == bg && (s[1].ub - s[0].ub) == 1)
{
if (s[1].to == next && n == 3)
{
branches.push_back (std::make_pair (new Cond ("!=", s[0].ub), bg));
return ;
}
else
{
branches.push_back (std::make_pair (new Cond ("==", s[0].ub), s[1].to));
}
n -= 2;
s += 2;
}
if (n == 1)
{
if (next == NULL || s[0].to != next)
{
branches.push_back (std::make_pair (static_cast<const Cond *> (NULL), s[0].to));
}
return;
}
else if (n == 2 && bg == next)
{
branches.push_back (std::make_pair (new Cond (">=", s[0].ub), s[1].to));
return;
}
else
{
branches.push_back (std::make_pair (new Cond ("<=", s[0].ub - 1), bg));
n -= 1;
s += 1;
}
}
}
If::If (type_t t, const Span * sp, uint32_t nsp, const State * next)
: type (t)
, info ()
{
switch (type)
{
case BINARY:
info.binary = new Binary (sp, nsp, next);
break;
case LINEAR:
info.linear = new Linear (sp, nsp, next);
break;
}
}
SwitchIf::SwitchIf (const Span * sp, uint32_t nsp, const State * next)
: type (IF)
, info ()
{
if ((!opts->sFlag && nsp > 2) || (nsp > 8 && (sp[nsp - 2].ub - sp[0].ub <= 3 * (nsp - 2))))
{
type = SWITCH;
info.cases = new Cases (sp, nsp);
}
else if (nsp > 5)
{
info.ifs = new If (If::BINARY, sp, nsp, next);
}
else
{
info.ifs = new If (If::LINEAR, sp, nsp, next);
}
}
GoBitmap::GoBitmap (const Span * span, uint32_t nSpans, const Span * hspan, uint32_t hSpans, const BitMap * bm, const State * bm_state, const State * next)
: bitmap (bm)
, bitmap_state (bm_state)
, hgo (NULL)
, lgo (NULL)
{
Span * bspan = allocate<Span> (nSpans);
uint32_t bSpans = unmap (bspan, span, nSpans, bm_state);
lgo = bSpans == 0
? NULL
: new SwitchIf (bspan, bSpans, next);
// if there are any low spans, then next state for high spans
// must be NULL to trigger explicit goto generation in linear 'if'
hgo = hSpans == 0
? NULL
: new SwitchIf (hspan, hSpans, lgo ? NULL : next);
operator delete (bspan);
}
const uint32_t CpgotoTable::TABLE_SIZE = 0x100;
CpgotoTable::CpgotoTable (const Span * span, uint32_t nSpans)
: table (new const State * [TABLE_SIZE])
{
uint32_t c = 0;
for (uint32_t i = 0; i < nSpans; ++i)
{
for(; c < span[i].ub && c < TABLE_SIZE; ++c)
{
table[c] = span[i].to;
}
}
}
Cpgoto::Cpgoto (const Span * span, uint32_t nSpans, const Span * hspan, uint32_t hSpans, const State * next)
: hgo (hSpans == 0 ? NULL : new SwitchIf (hspan, hSpans, next))
, table (new CpgotoTable (span, nSpans))
{}
Dot::Dot (const Span * sp, uint32_t nsp, const State * s)
: from (s)
, cases (new Cases (sp, nsp))
{}
Go::Go ()
: nSpans (0)
, span (NULL)
, type (EMPTY)
, info ()
{}
void Go::init (const State * from)
{
if (nSpans == 0)
{
return;
}
// initialize high (wide) spans
uint32_t hSpans = 0;
const Span * hspan = NULL;
for (uint32_t i = 0; i < nSpans; ++i)
{
if (span[i].ub > 0x100)
{
hspan = &span[i];
hSpans = nSpans - i;
break;
}
}
// initialize bitmaps
uint32_t nBitmaps = 0;
const BitMap * bitmap = NULL;
const State * bitmap_state = NULL;
for (uint32_t i = 0; i < nSpans; ++i)
{
if (span[i].to->isBase)
{
const BitMap *b = BitMap::find (span[i].to);
if (b && matches(b->go->span, b->go->nSpans, b->on, span, nSpans, span[i].to))
{
if (bitmap == NULL)
{
bitmap = b;
bitmap_state = span[i].to;
}
nBitmaps++;
}
}
}
const uint32_t dSpans = nSpans - hSpans - nBitmaps;
if (opts->target == opt_t::DOT)
{
type = DOT;
info.dot = new Dot (span, nSpans, from);
}
else if (opts->gFlag && (dSpans >= opts->cGotoThreshold))
{
type = CPGOTO;
info.cpgoto = new Cpgoto (span, nSpans, hspan, hSpans, from->next);
}
else if (opts->bFlag && (nBitmaps > 0))
{
type = BITMAP;
info.bitmap = new GoBitmap (span, nSpans, hspan, hSpans, bitmap, bitmap_state, from->next);
bUsedYYBitmap = true;
}
else
{
type = SWITCH_IF;
info.switchif = new SwitchIf (span, nSpans, from->next);
}
}
/*
* Find all spans, that map to the given state. For each of them,
* find upper adjacent span, that maps to another state (if such
* span exists, otherwize try lower one).
* If input contains single span that maps to the given state,
* then output contains 0 spans.
*/
uint32_t unmap (Span * new_span, const Span * old_span, uint32_t old_nspans, const State * x)
{
uint32_t new_nspans = 0;
for (uint32_t i = 0; i < old_nspans; ++i)
{
if (old_span[i].to != x)
{
if (new_nspans > 0 && new_span[new_nspans - 1].to == old_span[i].to)
new_span[new_nspans - 1].ub = old_span[i].ub;
else
{
new_span[new_nspans].to = old_span[i].to;
new_span[new_nspans].ub = old_span[i].ub;
++new_nspans;
}
}
}
if (new_nspans > 0)
new_span[new_nspans - 1].ub = old_span[old_nspans - 1].ub;
return new_nspans;
}
} // namespace re2c

View file

@ -0,0 +1,99 @@
#include "src/util/c99_stdint.h"
#include <utility>
#include <vector>
#include "src/codegen/go.h"
namespace re2c
{
Cases::~Cases ()
{
delete [] cases;
}
Binary::~Binary ()
{
delete cond;
delete thn;
delete els;
}
Linear::~Linear ()
{
for (uint32_t i = 0; i < branches.size (); ++i)
{
delete branches[i].first;
}
}
If::~If ()
{
switch (type)
{
case BINARY:
delete info.binary;
break;
case LINEAR:
delete info.linear;
break;
}
}
SwitchIf::~SwitchIf ()
{
switch (type)
{
case SWITCH:
delete info.cases;
break;
case IF:
delete info.ifs;
break;
}
}
GoBitmap::~GoBitmap ()
{
delete hgo;
delete lgo;
}
CpgotoTable::~CpgotoTable ()
{
delete [] table;
}
Cpgoto::~Cpgoto ()
{
delete hgo;
delete table;
}
Dot::~Dot ()
{
delete cases;
}
Go::~Go ()
{
switch (type)
{
case EMPTY:
break;
case SWITCH_IF:
delete info.switchif;
break;
case BITMAP:
delete info.bitmap;
break;
case CPGOTO:
delete info.cpgoto;
break;
case DOT:
delete info.dot;
break;
}
}
} // namespace re2c

View file

@ -0,0 +1,271 @@
#include <stddef.h>
#include "src/util/c99_stdint.h"
#include <string>
#include <utility>
#include <vector>
#include "src/codegen/bitmap.h"
#include "src/codegen/go.h"
#include "src/codegen/input_api.h"
#include "src/codegen/label.h"
#include "src/codegen/output.h"
#include "src/codegen/print.h"
#include "src/conf/opt.h"
#include "src/globals.h"
#include "src/ir/adfa/adfa.h"
#include "src/ir/regexp/encoding/enc.h"
namespace re2c
{
static void output_if (OutputFile & o, uint32_t ind, bool & readCh, const std::string & compare, uint32_t value);
static void output_goto (OutputFile & o, uint32_t ind, bool & readCh, label_t to);
static std::string output_yych (bool & readCh);
static std::string output_hgo (OutputFile & o, uint32_t ind, bool & readCh, SwitchIf * hgo);
std::string output_yych (bool & readCh)
{
if (readCh)
{
readCh = false;
return "(" + opts->input_api.expr_peek_save () + ")";
}
else
{
return opts->yych;
}
}
void output_if (OutputFile & o, uint32_t ind, bool & readCh, const std::string & compare, uint32_t value)
{
o.wind(ind).ws("if (").wstring(output_yych (readCh)).ws(" ").wstring(compare).ws(" ").wc_hex (value).ws(") ");
}
void output_goto (OutputFile & o, uint32_t ind, bool & readCh, label_t to)
{
if (readCh)
{
o.wstring(opts->input_api.stmt_peek (ind));
readCh = false;
}
o.wind(ind).ws("goto ").wstring(opts->labelPrefix).wlabel(to).ws(";\n");
}
std::string output_hgo (OutputFile & o, uint32_t ind, bool & readCh, SwitchIf * hgo)
{
std::string yych = output_yych (readCh);
if (hgo != NULL)
{
o.wind(ind).ws("if (").wstring(yych).ws(" & ~0xFF) {\n");
hgo->emit (o, ind + 1, readCh);
o.wind(ind).ws("} else ");
yych = opts->yych;
}
else
{
o.wind(ind);
}
return yych;
}
void Case::emit (OutputFile & o, uint32_t ind)
{
for (uint32_t i = 0; i < ranges.size (); ++i)
{
for (uint32_t b = ranges[i].first; b < ranges[i].second; ++b)
{
o.wind(ind).ws("case ").wc_hex (b).ws(":");
if (opts->dFlag && opts->encoding.type () == Enc::EBCDIC)
{
const uint32_t c = opts->encoding.decodeUnsafe (b);
if (is_print (c))
o.ws(" /* ").wc(static_cast<char> (c)).ws(" */");
}
bool last_case = i == ranges.size () - 1 && b == ranges[i].second - 1;
if (!last_case)
{
o.ws("\n");
}
}
}
}
void Cases::emit (OutputFile & o, uint32_t ind, bool & readCh)
{
o.wind(ind).ws("switch (").wstring(output_yych (readCh)).ws(") {\n");
for (uint32_t i = 0; i < cases_size; ++i)
{
if (cases[i].to != def)
{
cases[i].emit (o, ind);
output_goto (o, 1, readCh, cases[i].to->label);
}
}
o.wind(ind).ws("default:");
output_goto (o, 1, readCh, def->label);
o.wind(ind).ws("}\n");
}
void Binary::emit (OutputFile & o, uint32_t ind, bool & readCh)
{
output_if (o, ind, readCh, cond->compare, cond->value);
o.ws("{\n");
thn->emit (o, ind + 1, readCh);
o.wind(ind).ws("} else {\n");
els->emit (o, ind + 1, readCh);
o.wind(ind).ws("}\n");
}
void Linear::emit (OutputFile & o, uint32_t ind, bool & readCh)
{
for (uint32_t i = 0; i < branches.size (); ++i)
{
if (branches[i].first != NULL)
{
output_if (o, ind, readCh, branches[i].first->compare, branches[i].first->value);
output_goto (o, 0, readCh, branches[i].second->label);
}
else
{
output_goto (o, ind, readCh, branches[i].second->label);
}
}
}
void If::emit (OutputFile & o, uint32_t ind, bool & readCh)
{
switch (type)
{
case BINARY:
info.binary->emit (o, ind, readCh);
break;
case LINEAR:
info.linear->emit (o, ind, readCh);
break;
}
}
void SwitchIf::emit (OutputFile & o, uint32_t ind, bool & readCh)
{
switch (type)
{
case SWITCH:
info.cases->emit (o, ind, readCh);
break;
case IF:
info.ifs->emit (o, ind, readCh);
break;
}
}
void GoBitmap::emit (OutputFile & o, uint32_t ind, bool & readCh)
{
std::string yych = output_hgo (o, ind, readCh, hgo);
o.ws("if (").wstring(opts->yybm).ws("[").wu32(bitmap->i).ws("+").wstring(yych).ws("] & ");
if (opts->yybmHexTable)
{
o.wu32_hex(bitmap->m);
}
else
{
o.wu32(bitmap->m);
}
o.ws(") {\n");
output_goto (o, ind + 1, readCh, bitmap_state->label);
o.wind(ind).ws("}\n");
if (lgo != NULL)
{
lgo->emit (o, ind, readCh);
}
}
label_t CpgotoTable::max_label () const
{
label_t max = label_t::first ();
for (uint32_t i = 0; i < TABLE_SIZE; ++i)
{
if (max < table[i]->label)
{
max = table[i]->label;
};
}
return max;
}
void CpgotoTable::emit (OutputFile & o, uint32_t ind)
{
o.wind(ind).ws("static void *").wstring(opts->yytarget).ws("[256] = {\n");
o.wind(++ind);
const uint32_t max_digits = max_label ().width ();
for (uint32_t i = 0; i < TABLE_SIZE; ++i)
{
o.ws("&&").wstring(opts->labelPrefix).wlabel(table[i]->label);
if (i == TABLE_SIZE - 1)
{
o.ws("\n");
}
else if (i % 8 == 7)
{
o.ws(",\n").wind(ind);
}
else
{
const uint32_t padding = max_digits - table[i]->label.width () + 1;
o.ws(",").wstring(std::string (padding, ' '));
}
}
o.wind(--ind).ws("};\n");
}
void Cpgoto::emit (OutputFile & o, uint32_t ind, bool & readCh)
{
std::string yych = output_hgo (o, ind, readCh, hgo);
o.ws("{\n");
table->emit (o, ++ind);
o.wind(ind).ws("goto *").wstring(opts->yytarget).ws("[").wstring(yych).ws("];\n");
o.wind(--ind).ws("}\n");
}
void Dot::emit (OutputFile & o)
{
const uint32_t n = cases->cases_size;
if (n == 1)
{
o.wlabel(from->label).ws(" -> ").wlabel(cases->cases[0].to->label).ws("\n");
}
else
{
for (uint32_t i = 0; i < n; ++i)
{
o.wlabel(from->label).ws(" -> ").wlabel(cases->cases[i].to->label).ws(" [label=\"");
for (uint32_t j = 0; j < cases->cases[i].ranges.size (); ++j)
{
o.wrange(cases->cases[i].ranges[j].first, cases->cases[i].ranges[j].second);
}
o.ws("\"]\n");
}
}
}
void Go::emit (OutputFile & o, uint32_t ind, bool & readCh)
{
switch (type)
{
case EMPTY:
break;
case SWITCH_IF:
info.switchif->emit (o, ind, readCh);
break;
case BITMAP:
info.bitmap->emit (o, ind, readCh);
break;
case CPGOTO:
info.cpgoto->emit (o, ind, readCh);
break;
case DOT:
info.dot->emit (o);
break;
}
}
} // namespace re2c

View file

@ -0,0 +1,111 @@
#include <stddef.h>
#include "src/util/c99_stdint.h"
#include <set>
#include <utility>
#include <vector>
#include "src/codegen/go.h"
#include "src/codegen/label.h"
#include "src/ir/adfa/adfa.h"
namespace re2c
{
void Cases::used_labels (std::set<label_t> & used)
{
for (uint32_t i = 0; i < cases_size; ++i)
{
used.insert (cases[i].to->label);
}
}
void Binary::used_labels (std::set<label_t> & used)
{
thn->used_labels (used);
els->used_labels (used);
}
void Linear::used_labels (std::set<label_t> & used)
{
for (uint32_t i = 0; i < branches.size (); ++i)
{
used.insert (branches[i].second->label);
}
}
void If::used_labels (std::set<label_t> & used)
{
switch (type)
{
case BINARY:
info.binary->used_labels (used);
break;
case LINEAR:
info.linear->used_labels (used);
break;
}
}
void SwitchIf::used_labels (std::set<label_t> & used)
{
switch (type)
{
case SWITCH:
info.cases->used_labels (used);
break;
case IF:
info.ifs->used_labels (used);
break;
}
}
void GoBitmap::used_labels (std::set<label_t> & used)
{
if (hgo != NULL)
{
hgo->used_labels (used);
}
used.insert (bitmap_state->label);
if (lgo != NULL)
{
lgo->used_labels (used);
}
}
void CpgotoTable::used_labels (std::set<label_t> & used)
{
for (uint32_t i = 0; i < TABLE_SIZE; ++i)
{
used.insert (table[i]->label);
}
}
void Cpgoto::used_labels (std::set<label_t> & used)
{
if (hgo != NULL)
{
hgo->used_labels (used);
}
table->used_labels (used);
}
void Go::used_labels (std::set<label_t> & used)
{
switch (type)
{
case EMPTY:
case DOT:
break;
case SWITCH_IF:
info.switchif->used_labels (used);
break;
case BITMAP:
info.bitmap->used_labels (used);
break;
case CPGOTO:
info.cpgoto->used_labels (used);
break;
}
}
} // namespace re2c

View file

@ -0,0 +1,24 @@
#ifndef _RE2C_CODEGEN_INDENT_
#define _RE2C_CODEGEN_INDENT_
#include <string>
#include "src/globals.h"
namespace re2c
{
inline std::string indent (uint32_t ind)
{
std::string str;
while (opts->target != opt_t::DOT && ind-- > 0)
{
str += opts->indString;
}
return str;
}
} // end namespace re2c
#endif // _RE2C_CODEGEN_INDENT_

View file

@ -0,0 +1,175 @@
#include <sstream>
#include "src/codegen/input_api.h"
#include "src/codegen/indent.h"
#include "src/conf/opt.h"
#include "src/globals.h"
namespace re2c
{
InputAPI::InputAPI ()
: type_ (DEFAULT)
{}
InputAPI::type_t InputAPI::type () const
{
return type_;
}
void InputAPI::set (type_t t)
{
type_ = t;
}
std::string InputAPI::expr_peek () const
{
std::string s;
switch (type_)
{
case DEFAULT:
s = "*" + opts->yycursor;
break;
case CUSTOM:
s = opts->yypeek + " ()";
break;
}
return s;
}
std::string InputAPI::expr_peek_save () const
{
return opts->yych + " = " + opts.yychConversion () + expr_peek ();
}
std::string InputAPI::stmt_peek (uint32_t ind) const
{
return indent (ind) + expr_peek_save () + ";\n";
}
std::string InputAPI::stmt_skip (uint32_t ind) const
{
std::string s;
switch (type_)
{
case DEFAULT:
s = "++" + opts->yycursor;
break;
case CUSTOM:
s = opts->yyskip + " ()";
break;
}
return indent (ind) + s + ";\n";
}
std::string InputAPI::stmt_backup (uint32_t ind) const
{
std::string s;
switch (type_)
{
case DEFAULT:
s = opts->yymarker + " = " + opts->yycursor;
break;
case CUSTOM:
s = opts->yybackup + " ()";
break;
}
return indent (ind) + s + ";\n";
}
std::string InputAPI::stmt_backupctx (uint32_t ind) const
{
std::string s;
switch (type_)
{
case DEFAULT:
s = opts->yyctxmarker + " = " + opts->yycursor;
break;
case CUSTOM:
s = opts->yybackupctx + " ()";
break;
}
return indent (ind) + s + ";\n";
}
std::string InputAPI::stmt_restore (uint32_t ind) const
{
std::string s;
switch (type_)
{
case DEFAULT:
s = opts->yycursor + " = " + opts->yymarker;
break;
case CUSTOM:
s = opts->yyrestore + " ()";
break;
}
return indent (ind) + s + ";\n";
}
std::string InputAPI::stmt_restorectx (uint32_t ind) const
{
std::string s;
switch (type_)
{
case DEFAULT:
s = indent (ind) + opts->yycursor + " = " + opts->yyctxmarker + ";\n";
break;
case CUSTOM:
s = indent (ind) + opts->yyrestorectx + " ();\n";
break;
}
return s;
}
std::string InputAPI::stmt_skip_peek (uint32_t ind) const
{
return type_ == DEFAULT
? indent (ind) + opts->yych + " = " + opts.yychConversion () + "*++" + opts->yycursor + ";\n"
: stmt_skip (ind) + stmt_peek (ind);
}
std::string InputAPI::stmt_skip_backup (uint32_t ind) const
{
return type_ == DEFAULT
? indent (ind) + opts->yymarker + " = ++" + opts->yycursor + ";\n"
: stmt_skip (ind) + stmt_backup (ind);
}
std::string InputAPI::stmt_backup_peek (uint32_t ind) const
{
return type_ == DEFAULT
? indent (ind) + opts->yych + " = " + opts.yychConversion () + "*(" + opts->yymarker + " = " + opts->yycursor + ");\n"
: stmt_backup (ind) + stmt_peek (ind);
}
std::string InputAPI::stmt_skip_backup_peek (uint32_t ind) const
{
return type_ == DEFAULT
? indent (ind) + opts->yych + " = " + opts.yychConversion () + "*(" + opts->yymarker + " = ++" + opts->yycursor + ");\n"
: stmt_skip (ind) + stmt_backup (ind) + stmt_peek (ind);
}
std::string InputAPI::expr_lessthan_one () const
{
return type_ == DEFAULT
? opts->yylimit + " <= " + opts->yycursor
: expr_lessthan (1);
}
std::string InputAPI::expr_lessthan (size_t n) const
{
std::ostringstream s;
switch (type_)
{
case DEFAULT:
s << "(" << opts->yylimit << " - " << opts->yycursor << ") < " << n;
break;
case CUSTOM:
s << opts->yylessthan << " (" << n << ")";
break;
}
return s.str ();
}
} // end namespace re2c

View file

@ -0,0 +1,43 @@
#ifndef _RE2C_CODEGEN_INPUT_API_
#define _RE2C_CODEGEN_INPUT_API_
#include "src/util/c99_stdint.h"
#include <string>
namespace re2c
{
class InputAPI
{
public:
enum type_t
{ DEFAULT
, CUSTOM
};
private:
type_t type_;
public:
InputAPI ();
type_t type () const;
void set (type_t t);
std::string expr_peek () const;
std::string expr_peek_save () const;
std::string stmt_peek (uint32_t ind) const;
std::string stmt_skip (uint32_t ind) const;
std::string stmt_backup (uint32_t ind) const;
std::string stmt_backupctx (uint32_t ind) const;
std::string stmt_restore (uint32_t ind) const;
std::string stmt_restorectx (uint32_t ind) const;
std::string stmt_skip_peek (uint32_t ind) const;
std::string stmt_skip_backup (uint32_t ind) const;
std::string stmt_backup_peek (uint32_t ind) const;
std::string stmt_skip_backup_peek (uint32_t ind) const;
std::string expr_lessthan_one () const;
std::string expr_lessthan (size_t n) const;
};
} // end namespace re2c
#endif // _RE2C_CODEGEN_INPUT_API_

View file

@ -0,0 +1,42 @@
#include <ostream>
#include "src/codegen/label.h"
namespace re2c {
const uint32_t label_t::FIRST = 0;
label_t::label_t ()
: value (FIRST)
{}
void label_t::inc ()
{
++value;
}
label_t label_t::first ()
{
return label_t ();
}
bool label_t::operator < (const label_t & l) const
{
return value < l.value;
}
uint32_t label_t::width () const
{
uint32_t v = value;
uint32_t n = 0;
while (v /= 10) ++n;
return n;
}
std::ostream & operator << (std::ostream & o, label_t l)
{
o << l.value;
return o;
}
} // namespace re2c

View file

@ -0,0 +1,39 @@
#ifndef _RE2C_CODEGEN_LABEL_
#define _RE2C_CODEGEN_LABEL_
#include <iosfwd> // ostream
#include "src/util/c99_stdint.h"
namespace re2c {
template <typename num_t> class counter_t;
// label public API:
// - get first label
// - compare labels
// - get label width
// - output label to std::ostream
//
// label private API (for label counter):
// - get initial label
// - get next label
class label_t
{
static const uint32_t FIRST;
uint32_t value;
label_t ();
void inc ();
public:
static label_t first ();
bool operator < (const label_t & l) const;
uint32_t width () const;
friend std::ostream & operator << (std::ostream & o, label_t l);
friend class counter_t<label_t>;
};
} // namespace re2c
#endif // _RE2C_CODEGEN_LABEL_

View file

@ -0,0 +1,465 @@
#include <stdio.h>
#include <time.h>
#include <iomanip>
#include "src/codegen/indent.h"
#include "src/codegen/output.h"
#include "src/codegen/print.h"
#include "src/conf/opt.h"
#include "src/conf/warn.h"
#include "src/globals.h"
#include "src/ir/rule_rank.h"
namespace re2c
{
OutputFragment::OutputFragment (type_t t, uint32_t i)
: type (t)
, stream ()
, indent (i)
{}
uint32_t OutputFragment::count_lines ()
{
uint32_t lines = 0;
const std::string content = stream.str ();
const char * p = content.c_str ();
for (uint32_t i = 0; i < content.size (); ++i)
{
if (p[i] == '\n')
{
++lines;
}
}
return lines;
}
OutputBlock::OutputBlock ()
: fragments ()
, used_yyaccept (false)
, force_start_label (false)
, user_start_label ()
, line (0)
{
fragments.push_back (new OutputFragment (OutputFragment::CODE, 0));
}
OutputBlock::~OutputBlock ()
{
for (unsigned int i = 0; i < fragments.size (); ++i)
{
delete fragments[i];
}
}
OutputFile::OutputFile (const char * fn)
: file_name (fn)
, file (NULL)
, blocks ()
, label_counter ()
, warn_condition_order (!opts->tFlag) // see note [condition order]
{
new_block ();
}
bool OutputFile::open ()
{
if (file_name == NULL)
{
file_name = "<stdout>";
file = stdout;
}
else
{
file = fopen (file_name, "wb");
}
return file != NULL;
}
OutputFile::~OutputFile ()
{
if (file != NULL && file != stdout)
{
fclose (file);
}
for (unsigned int i = 0; i < blocks.size (); ++i)
{
delete blocks[i];
}
}
std::ostream & OutputFile::stream ()
{
return blocks.back ()->fragments.back ()->stream;
}
OutputFile & OutputFile::wraw (const char * s, size_t n)
{
stream ().write (s, static_cast<std::streamsize> (n));
return *this;
}
OutputFile & OutputFile::wu32_hex (uint32_t n)
{
prtHex (stream (), n);
return *this;
}
OutputFile & OutputFile::wc_hex (uint32_t n)
{
prtChOrHex (stream (), n);
return *this;
}
OutputFile & OutputFile::wrange (uint32_t l, uint32_t u)
{
printSpan (stream (), l, u);
return *this;
}
OutputFile & OutputFile::wu32_width (uint32_t n, int w)
{
stream () << std::setw (w);
stream () << n;
return *this;
}
OutputFile & OutputFile::wline_info (uint32_t l, const char * fn)
{
output_line_info (stream (), l, fn);
return *this;
}
OutputFile & OutputFile::wversion_time ()
{
output_version_time (stream ());
return *this;
}
OutputFile & OutputFile::wuser_start_label ()
{
const std::string label = blocks.back ()->user_start_label;
if (!label.empty ())
{
wstring(label).ws(":\n");
}
return *this;
}
OutputFile & OutputFile::wc (char c)
{
stream () << c;
return *this;
}
OutputFile & OutputFile::wu32 (uint32_t n)
{
stream () << n;
return *this;
}
OutputFile & OutputFile::wu64 (uint64_t n)
{
stream () << n;
return *this;
}
OutputFile & OutputFile::wstring (const std::string & s)
{
stream () << s;
return *this;
}
OutputFile & OutputFile::ws (const char * s)
{
stream () << s;
return *this;
}
OutputFile & OutputFile::wlabel (label_t l)
{
stream () << l;
return *this;
}
OutputFile & OutputFile::wrank (rule_rank_t r)
{
stream () << r;
return *this;
}
OutputFile & OutputFile::wind (uint32_t ind)
{
stream () << indent(ind);
return *this;
}
void OutputFile::insert_code ()
{
blocks.back ()->fragments.push_back (new OutputFragment (OutputFragment::CODE, 0));
}
OutputFile & OutputFile::wdelay_line_info ()
{
blocks.back ()->fragments.push_back (new OutputFragment (OutputFragment::LINE_INFO, 0));
insert_code ();
return *this;
}
OutputFile & OutputFile::wdelay_state_goto (uint32_t ind)
{
if (opts->fFlag && !bWroteGetState)
{
blocks.back ()->fragments.push_back (new OutputFragment (OutputFragment::STATE_GOTO, ind));
insert_code ();
bWroteGetState = true;
}
return *this;
}
OutputFile & OutputFile::wdelay_types ()
{
warn_condition_order = false; // see note [condition order]
blocks.back ()->fragments.push_back (new OutputFragment (OutputFragment::TYPES, 0));
insert_code ();
return *this;
}
OutputFile & OutputFile::wdelay_warn_condition_order ()
{
blocks.back ()->fragments.push_back (new OutputFragment (OutputFragment::WARN_CONDITION_ORDER, 0));
insert_code ();
return *this;
}
OutputFile & OutputFile::wdelay_yyaccept_init (uint32_t ind)
{
blocks.back ()->fragments.push_back (new OutputFragment (OutputFragment::YYACCEPT_INIT, ind));
insert_code ();
return *this;
}
OutputFile & OutputFile::wdelay_yymaxfill ()
{
blocks.back ()->fragments.push_back (new OutputFragment (OutputFragment::YYMAXFILL, 0));
insert_code ();
return *this;
}
void OutputFile::set_used_yyaccept ()
{
blocks.back ()->used_yyaccept = true;
}
bool OutputFile::get_used_yyaccept () const
{
return blocks.back ()->used_yyaccept;
}
void OutputFile::set_force_start_label (bool force)
{
blocks.back ()->force_start_label = force;
}
void OutputFile::set_user_start_label (const std::string & label)
{
blocks.back ()->user_start_label = label;
}
bool OutputFile::get_force_start_label () const
{
return blocks.back ()->force_start_label;
}
void OutputFile::set_block_line (uint32_t l)
{
blocks.back ()->line = l;
}
uint32_t OutputFile::get_block_line () const
{
return blocks.back ()->line;
}
void OutputFile::new_block ()
{
blocks.push_back (new OutputBlock ());
insert_code ();
}
void OutputFile::emit
( const std::vector<std::string> & types
, size_t max_fill
)
{
if (file != NULL)
{
unsigned int line_count = 1;
for (unsigned int j = 0; j < blocks.size (); ++j)
{
OutputBlock & b = * blocks[j];
for (unsigned int i = 0; i < b.fragments.size (); ++i)
{
OutputFragment & f = * b.fragments[i];
switch (f.type)
{
case OutputFragment::CODE:
break;
case OutputFragment::LINE_INFO:
output_line_info (f.stream, line_count + 1, file_name);
break;
case OutputFragment::STATE_GOTO:
output_state_goto (f.stream, f.indent, 0);
break;
case OutputFragment::TYPES:
output_types (f.stream, f.indent, types);
break;
case OutputFragment::WARN_CONDITION_ORDER:
if (warn_condition_order) // see note [condition order]
{
warn.condition_order (b.line);
}
break;
case OutputFragment::YYACCEPT_INIT:
output_yyaccept_init (f.stream, f.indent, b.used_yyaccept);
break;
case OutputFragment::YYMAXFILL:
output_yymaxfill (f.stream, max_fill);
break;
}
std::string content = f.stream.str ();
fwrite (content.c_str (), 1, content.size (), file);
line_count += f.count_lines ();
}
}
}
}
HeaderFile::HeaderFile (const char * fn)
: stream ()
// header is always generated, but not always dumped to file
// NULL filename crashes 'operator <<' on some platforms
// TODO: generate header only if necessary
, file_name (fn ? fn : "<stdout>.h")
, file (NULL)
{}
bool HeaderFile::open ()
{
file = fopen (file_name, "wb");
return file != NULL;
}
void HeaderFile::emit (const std::vector<std::string> & types)
{
output_version_time (stream);
output_line_info (stream, 3, file_name);
stream << "\n";
output_types (stream, 0, types);
}
HeaderFile::~HeaderFile ()
{
if (file != NULL)
{
std::string content = stream.str ();
fwrite (content.c_str (), 1, content.size (), file);
fclose (file);
}
}
Output::Output (const char * source_name, const char * header_name)
: source (source_name)
, header (header_name)
, types ()
, skeletons ()
, max_fill (1)
{}
Output::~Output ()
{
if (!warn.error ())
{
source.emit (types, max_fill);
header.emit (types);
}
}
void output_state_goto (std::ostream & o, uint32_t ind, uint32_t start_label)
{
o << indent(ind) << "switch (" << output_get_state() << ") {\n";
if (opts->bUseStateAbort)
{
o << indent(ind) << "default: abort();\n";
o << indent(ind) << "case -1: goto " << opts->labelPrefix << start_label << ";\n";
}
else
{
o << indent(ind) << "default: goto " << opts->labelPrefix << start_label << ";\n";
}
for (uint32_t i = 0; i < last_fill_index; ++i)
{
o << indent(ind) << "case " << i << ": goto " << opts->yyfilllabel << i << ";\n";
}
o << indent(ind) << "}\n";
if (opts->bUseStateNext)
{
o << opts->yynext << ":\n";
}
}
void output_yyaccept_init (std::ostream & o, uint32_t ind, bool used_yyaccept)
{
if (used_yyaccept)
{
o << indent (ind) << "unsigned int " << opts->yyaccept << " = 0;\n";
}
}
void output_yymaxfill (std::ostream & o, size_t max_fill)
{
o << "#define YYMAXFILL " << max_fill << "\n";
}
void output_line_info (std::ostream & o, uint32_t line_number, const char * file_name)
{
if (!opts->iFlag)
{
o << "#line " << line_number << " \"" << file_name << "\"\n";
}
}
void output_types (std::ostream & o, uint32_t ind, const std::vector<std::string> & types)
{
o << indent (ind++) << "enum " << opts->yycondtype << " {\n";
for (unsigned int i = 0; i < types.size (); ++i)
{
o << indent (ind) << opts->condEnumPrefix << types[i] << ",\n";
}
o << indent (--ind) << "};\n";
}
void output_version_time (std::ostream & o)
{
o << "/* Generated by re2c";
if (opts->version)
{
o << " " << PACKAGE_VERSION;
}
if (!opts->bNoGenerationDate)
{
o << " on ";
time_t now = time (NULL);
o.write (ctime (&now), 24);
}
o << " */" << "\n";
}
std::string output_get_state ()
{
return opts->state_get_naked
? opts->state_get
: opts->state_get + "()";
}
} // namespace re2c

View file

@ -0,0 +1,158 @@
#ifndef _RE2C_CODEGEN_OUTPUT_
#define _RE2C_CODEGEN_OUTPUT_
#include "src/util/c99_stdint.h"
#include <stddef.h>
#include <stdio.h>
#include <fstream>
#include <set>
#include <sstream>
#include <string>
#include <vector>
#include "src/codegen/label.h"
#include "src/util/counter.h"
#include "src/util/forbid_copy.h"
namespace re2c
{
class rule_rank_t;
struct OutputFragment
{
enum type_t
{ CODE
// , CONFIG
, LINE_INFO
, STATE_GOTO
, TYPES
, WARN_CONDITION_ORDER
, YYACCEPT_INIT
, YYMAXFILL
};
type_t type;
std::ostringstream stream;
uint32_t indent;
OutputFragment (type_t t, uint32_t i);
uint32_t count_lines ();
};
struct OutputBlock
{
std::vector<OutputFragment *> fragments;
bool used_yyaccept;
bool force_start_label;
std::string user_start_label;
uint32_t line;
OutputBlock ();
~OutputBlock ();
};
struct OutputFile
{
public:
const char * file_name;
private:
FILE * file;
std::vector<OutputBlock *> blocks;
public:
counter_t<label_t> label_counter;
bool warn_condition_order;
private:
std::ostream & stream ();
void insert_code ();
public:
OutputFile (const char * fn);
~OutputFile ();
bool open ();
void new_block ();
// immediate output
OutputFile & wraw (const char * s, size_t n);
OutputFile & wc (char c);
OutputFile & wc_hex (uint32_t n);
OutputFile & wu32 (uint32_t n);
OutputFile & wu32_hex (uint32_t n);
OutputFile & wu32_width (uint32_t n, int w);
OutputFile & wu64 (uint64_t n);
OutputFile & wstring (const std::string & s);
OutputFile & ws (const char * s);
OutputFile & wlabel (label_t l);
OutputFile & wrank (rule_rank_t l);
OutputFile & wrange (uint32_t u, uint32_t l);
OutputFile & wline_info (uint32_t l, const char * fn);
OutputFile & wversion_time ();
OutputFile & wuser_start_label ();
OutputFile & wind (uint32_t ind);
// delayed output
OutputFile & wdelay_line_info ();
OutputFile & wdelay_state_goto (uint32_t ind);
OutputFile & wdelay_types ();
OutputFile & wdelay_warn_condition_order ();
OutputFile & wdelay_yyaccept_init (uint32_t ind);
OutputFile & wdelay_yymaxfill ();
void set_used_yyaccept ();
bool get_used_yyaccept () const;
void set_force_start_label (bool force);
void set_user_start_label (const std::string & label);
bool get_force_start_label () const;
void set_block_line (uint32_t l);
uint32_t get_block_line () const;
void emit (const std::vector<std::string> & types, size_t max_fill);
FORBID_COPY (OutputFile);
};
struct HeaderFile
{
HeaderFile (const char * fn);
~HeaderFile ();
bool open ();
void emit (const std::vector<std::string> & types);
private:
std::ostringstream stream;
const char * file_name;
FILE * file;
FORBID_COPY (HeaderFile);
};
struct Output
{
OutputFile source;
HeaderFile header;
std::vector<std::string> types;
std::set<std::string> skeletons;
size_t max_fill;
Output (const char * source_name, const char * header_name);
~Output ();
};
void output_line_info (std::ostream &, uint32_t, const char *);
void output_state_goto (std::ostream &, uint32_t, uint32_t);
void output_types (std::ostream &, uint32_t, const std::vector<std::string> &);
void output_version_time (std::ostream &);
void output_yyaccept_init (std::ostream &, uint32_t, bool);
void output_yymaxfill (std::ostream &, size_t);
// helpers
std::string output_get_state ();
} // namespace re2c
#endif // _RE2C_CODEGEN_OUTPUT_

View file

@ -0,0 +1,156 @@
#include <iostream>
#include "src/codegen/print.h"
#include "src/conf/opt.h"
#include "src/globals.h"
#include "src/ir/regexp/encoding/enc.h"
namespace re2c
{
bool is_print (uint32_t c)
{
return c >= 0x20 && c < 0x7F;
}
bool is_space (uint32_t c)
{
switch (c)
{
case '\t':
case '\f':
case '\v':
case '\n':
case '\r':
case ' ':
return true;
default:
return false;
}
}
char hexCh(uint32_t c)
{
static const char * sHex = "0123456789ABCDEF";
return sHex[c & 0x0F];
}
void prtChOrHex(std::ostream& o, uint32_t c)
{
if (opts->encoding.type () != Enc::EBCDIC
&& (is_print (c) || is_space (c)))
{
o << '\'';
prtCh(o, c);
o << '\'';
}
else
{
prtHex(o, c);
}
}
void prtHex(std::ostream& o, uint32_t c)
{
o << "0x";
const uint32_t cunit_size = opts->encoding.szCodeUnit ();
if (cunit_size >= 4)
{
o << hexCh (c >> 28u)
<< hexCh (c >> 24u)
<< hexCh (c >> 20u)
<< hexCh (c >> 16u);
}
if (cunit_size >= 2)
{
o << hexCh (c >> 12u)
<< hexCh (c >> 8u);
}
o << hexCh (c >> 4u)
<< hexCh (c);
}
void prtCh(std::ostream& o, uint32_t c)
{
const bool dot = opts->target == opt_t::DOT;
switch (c)
{
case '\'':
o << (dot ? "'" : "\\'");
break;
case '"':
o << (dot ? "\\\"" : "\"");
break;
case '\n':
o << (dot ? "\\\\n" : "\\n");
break;
case '\t':
o << (dot ? "\\\\t" : "\\t");
break;
case '\v':
o << (dot ? "\\\\v" : "\\v");
break;
case '\b':
o << (dot ? "\\\\b" : "\\b");
break;
case '\r':
o << (dot ? "\\\\r" : "\\r");
break;
case '\f':
o << (dot ? "\\\\f" : "\\f");
break;
case '\a':
o << (dot ? "\\\\a" :"\\a");
break;
case '\\':
o << "\\\\"; // both .dot and C/C++ code expect "\\"
break;
default:
o << static_cast<char> (c);
break;
}
}
void prtChOrHexForSpan(std::ostream& o, uint32_t c)
{
if (opts->encoding.type () != Enc::EBCDIC
&& is_print (c)
&& (c != ']'))
{
prtCh(o, c);
}
else
{
prtHex(o, c);
}
}
void printSpan(std::ostream& o, uint32_t lb, uint32_t ub)
{
o << "[";
if ((ub - lb) == 1)
{
prtChOrHexForSpan(o, lb);
}
else
{
prtChOrHexForSpan(o, lb);
o << "-";
prtChOrHexForSpan(o, ub - 1);
}
o << "]";
}
} // end namespace re2c

View file

@ -0,0 +1,20 @@
#ifndef _RE2C_CODEGEN_PRINT_
#define _RE2C_CODEGEN_PRINT_
#include "src/util/c99_stdint.h"
#include <iosfwd>
namespace re2c
{
bool is_print (uint32_t c);
bool is_space (uint32_t c);
char hexCh(uint32_t c);
void prtCh(std::ostream&, uint32_t);
void prtHex(std::ostream&, uint32_t);
void prtChOrHex(std::ostream&, uint32_t);
void printSpan(std::ostream&, uint32_t, uint32_t);
} // end namespace re2c
#endif // _RE2C_CODEGEN_PRINT_

254
tools/re2c/src/conf/msg.cc Normal file
View file

@ -0,0 +1,254 @@
#include <stdarg.h>
#include <stdio.h>
#include <string>
#include "config.h"
#include "src/conf/msg.h"
namespace re2c {
void error (const char * fmt, ...)
{
fprintf (stderr, "re2c: error: ");
va_list args;
va_start (args, fmt);
vfprintf (stderr, fmt, args);
va_end (args);
fprintf (stderr, "\n");
}
void error_encoding ()
{
error ("only one of switches -e, -w, -x, -u and -8 must be set");
}
void error_arg (const char * option)
{
error ("expected argument to option %s", option);
}
void warning_start (uint32_t line, bool error)
{
static const char * msg = error ? "error" : "warning";
fprintf (stderr, "re2c: %s: line %u: ", msg, line);
}
void warning_end (const char * type, bool error)
{
if (type != NULL)
{
const char * prefix = error ? "error-" : "";
fprintf (stderr, " [-W%s%s]", prefix, type);
}
fprintf (stderr, "\n");
}
void warning (const char * type, uint32_t line, bool error, const char * fmt, ...)
{
warning_start (line, error);
va_list args;
va_start (args, fmt);
vfprintf (stderr, fmt, args);
va_end (args);
warning_end (type, error);
}
void usage ()
{
fprintf (stderr,
"usage: re2c [-bcdDefFghirsuvVwx18] [-o of] [-t th] file\n"
"\n"
"-? -h --help Display this info.\n"
"\n"
"-b --bit-vectors Implies -s. Use bit vectors as well in the attempt to\n"
" coax better code out of the compiler. Most useful for\n"
" specifications with more than a few keywords (e.g. for\n"
" most programming languages).\n"
"\n"
"-c --conditions Require start conditions.\n"
"\n"
"-d --debug-output Creates a parser that dumps information during\n"
" about the current position and in which state the\n"
" parser is.\n"
"\n"
"-D --emit-dot Emit a Graphviz dot view of the DFA graph\n"
"\n"
"-e --ecb Generate a parser that supports EBCDIC. The generated code\n"
" can deal with any character up to 0xFF. In this mode re2c\n"
" assumes that input character size is 1 byte. This switch is\n"
" incompatible with -w, -u, -x and -8\n"
"\n"
"-f --storable-state Generate a scanner that supports storable states.\n"
"\n"
"-F --flex-syntax Partial support for flex syntax.\n"
"\n"
"-g --computed-gotos Implies -b. Generate computed goto code (only useable\n"
" with gcc).\n"
"\n"
"-i --no-debug-info Do not generate '#line' info (useful for versioning).\n"
"\n"
"-o of --output=of Specify the output file (of) instead of stdout\n"
"\n"
"-r --reusable Allow reuse of scanner definitions.\n"
"\n"
"-s --nested-ifs Generate nested ifs for some switches. Many compilers\n"
" need this assist to generate better code.\n"
"\n"
"-t th --type-header=th Generate a type header file (th) with type definitions.\n"
"\n"
"-u --unicode Generate a parser that supports UTF-32. The generated code\n"
" can deal with any valid Unicode character up to 0x10FFFF.\n"
" In this mode re2c assumes that input character size is 4 bytes.\n"
" This switch is incompatible with -e, -w, -x and -8. It implies -s.\n"
"\n"
"-v --version Show version information.\n"
"\n"
"-V --vernum Show version as one number.\n"
"\n"
"-w --wide-chars Generate a parser that supports UCS-2. The generated code can\n"
" deal with any valid Unicode character up to 0xFFFF. In this mode\n"
" re2c assumes that input character size is 2 bytes. This switch is\n"
" incompatible with -e, -x, -u and -8. It implies -s."
"\n"
"-x --utf-16 Generate a parser that supports UTF-16. The generated code can\n"
" deal with any valid Unicode character up to 0x10FFFF. In this mode\n"
" re2c assumes that input character size is 2 bytes. This switch is\n"
" incompatible with -e, -w, -u and -8. It implies -s."
"\n"
"-8 --utf-8 Generate a parser that supports UTF-8. The generated code can\n"
" deal with any valid Unicode character up to 0x10FFFF. In this mode\n"
" re2c assumes that input character size is 1 byte. This switch is\n"
" incompatible with -e, -w, -x and -u."
"\n"
"--no-generation-date Suppress date output in the generated file.\n"
"\n"
"--no-version Suppress version output in the generated file.\n"
"\n"
"--case-insensitive All strings are case insensitive, so all \"-expressions\n"
" are treated in the same way '-expressions are.\n"
"\n"
"--case-inverted Invert the meaning of single and double quoted strings.\n"
" With this switch single quotes are case sensitive and\n"
" double quotes are case insensitive.\n"
"\n"
"--encoding-policy ep Specify what re2c should do when given bad code unit.\n"
" ep can be one of the following: fail, substitute, ignore.\n"
"\n"
"--input i Specify re2c input API.\n"
" i can be one of the following: default, custom.\n"
"\n"
"--skeleton Instead of embedding re2c-generated code into C/C++ source,\n"
" generate a self-contained program for the same DFA.\n"
" Most useful for correctness and performance testing.\n"
"\n"
"--empty-class policy What to do if user inputs empty character class. policy can be\n"
" one of the following: 'match-empty' (match empty input, default),\n"
" 'match-none' (fail to match on any input), 'error' (compilation\n"
" error). Note that there are various ways to construct empty class,\n"
" e.g: [], [^\\x00-\\xFF], [\\x00-\\xFF]\\[\\x00-\\xFF].\n"
"\n"
"--dfa-minimization <table | moore>\n"
" Internal algorithm used by re2c to minimize DFA (defaults to\n"
" 'moore'). Both table filling and Moore's algorithms should\n"
" produce identical DFA (up to states relabelling). Table filling\n"
" algorithm is much simpler and slower; it serves as a reference\n"
" implementation.\n"
"\n"
"-1 --single-pass Deprecated and does nothing (single pass is by default now).\n"
"\n"
"-W Turn on all warnings.\n"
"\n"
"-Werror Turn warnings into errors. Note that this option along doesn't\n"
" turn on any warnings, it only affects those warnings that have\n"
" been turned on so far or will be turned on later.\n"
"\n"
"-W<warning> Turn on individual warning.\n"
"\n"
"-Wno-<warning> Turn off individual warning.\n"
"\n"
"-Werror-<warning> Turn on individual warning and treat it as error (this implies\n"
" '-W<warning>').\n"
"\n"
"-Wno-error-<warning> Don't treat this particular warning as error. This doesn't turn\n"
" off the warning itself.\n"
"\n"
"Warnings:\n"
"\n"
"-Wcondition-order Warn if the generated program makes implicit assumptions about\n"
" condition numbering. One should use either '-t, --type-header'\n"
" option or '/*!types:re2c*/' directive to generate mapping of\n"
" condition names to numbers and use autogenerated condition names.\n"
"\n"
"-Wempty-character-class Warn if regular expression contains empty character class. From\n"
" the rational point of view trying to match empty character class\n"
" makes no sense: it should always fail. However, for backwards\n"
" compatibility reasons re2c allows empty character class and treats\n"
" it as empty string. Use '--empty-class' option to change default\n"
" behaviour.\n"
"\n"
"-Wmatch-empty-string Warn if regular expression in a rule is nullable (matches empty\n"
" string). If DFA runs in a loop and empty match is unintentional\n"
" (input position in not advanced manually), lexer may get stuck\n"
" in eternal loop.\n"
"\n"
"-Wswapped-range Warn if range lower bound is greater that upper bound. Default\n"
" re2c behaviour is to silently swap range bounds.\n"
"\n"
"-Wundefined-control-flow\n"
" Warn if some input strings cause undefined control flow in lexer\n"
" (the faulty patterns are reported). This is the most dangerous\n"
" and common mistake. It can be easily fixed by adding default rule\n"
" '*' (this rule has the lowest priority, matches any code unit\n"
" and consumes exactly one code unit).\n"
"\n"
"-Wuseless-escape Warn if a symbol is escaped when it shouldn't be. By default re2c\n"
" silently ignores escape, but this may as well indicate a typo\n"
" or an error in escape sequence.\n"
"\n"
);
}
void vernum ()
{
std::string vernum (PACKAGE_VERSION);
if (vernum[1] == '.')
{
vernum.insert(0, "0");
}
vernum.erase(2, 1);
if (vernum[3] == '.')
{
vernum.insert(2, "0");
}
vernum.erase(4, 1);
if (vernum.length() < 6 || vernum[5] < '0' || vernum[5] > '9')
{
vernum.insert(4, "0");
}
vernum.resize(6, '0');
printf ("%s\n", vernum.c_str ());
}
void version ()
{
printf ("re2c %s\n", PACKAGE_VERSION);
}
std::string incond (const std::string & cond)
{
std::string s;
if (!cond.empty ())
{
s += "in condition '";
s += cond;
s += "' ";
}
return s;
}
} // namespace re2c

24
tools/re2c/src/conf/msg.h Normal file
View file

@ -0,0 +1,24 @@
#ifndef _RE2C_CONF_MSG_
#define _RE2C_CONF_MSG_
#include <string>
#include "src/util/attribute.h"
#include "src/util/c99_stdint.h"
namespace re2c {
void error (const char * fmt, ...) RE2C_GXX_ATTRIBUTE ((format (printf, 1, 2)));
void error_encoding ();
void error_arg (const char * option);
void warning_start (uint32_t line, bool error);
void warning_end (const char * type, bool error);
void warning (const char * type, uint32_t line, bool error, const char * fmt, ...) RE2C_GXX_ATTRIBUTE ((format (printf, 4, 5)));
void usage ();
void vernum ();
void version ();
std::string incond (const std::string & cond);
} // namespace re2c
#endif // _RE2C_CONF_MSG_

331
tools/re2c/src/conf/opt.cc Normal file
View file

@ -0,0 +1,331 @@
#include "src/conf/msg.h"
#include "src/conf/opt.h"
namespace re2c
{
Opt opts;
opt_t::opt_t ()
#define OPT1(type, name, value) : name (value)
#define OPT(type, name, value) , name (value)
RE2C_OPTS
#undef OPT1
#undef OPT
{}
opt_t::opt_t (const opt_t & opt)
#define OPT1(type, name, value) : name (opt.name)
#define OPT(type, name, value) , name (opt.name)
RE2C_OPTS
#undef OPT1
#undef OPT
{}
opt_t & opt_t::operator = (const opt_t & opt)
{
#define OPT1 OPT
#define OPT(type, name, value) name = opt.name;
RE2C_OPTS
#undef OPT1
#undef OPT
return *this;
}
void opt_t::fix ()
{
// some options either make no sense or must have fixed value
// with current target: reset them to default
switch (target)
{
case DOT:
// default code generation options
sFlag = Opt::baseopt.sFlag;
bFlag = Opt::baseopt.bFlag;
gFlag = Opt::baseopt.gFlag;
cGotoThreshold = Opt::baseopt.cGotoThreshold;
// default environment-insensitive formatting
yybmHexTable = Opt::baseopt.yybmHexTable;
// fallthrough
case SKELETON:
// default line information
iFlag = Opt::baseopt.iFlag;
// default environment-sensitive formatting
topIndent = Opt::baseopt.topIndent;
indString = Opt::baseopt.indString;
condDivider = Opt::baseopt.condDivider;
condDividerParam = Opt::baseopt.condDividerParam;
// default environment bindings
tFlag = Opt::baseopt.tFlag;
header_file = Opt::baseopt.header_file;
yycondtype = Opt::baseopt.yycondtype;
cond_get = Opt::baseopt.cond_get;
cond_get_naked = Opt::baseopt.cond_get_naked;
cond_set = Opt::baseopt.cond_set;
cond_set_arg = Opt::baseopt.cond_set_arg;
cond_set_naked = Opt::baseopt.cond_set_naked;
yyctable = Opt::baseopt.yyctable;
condPrefix = Opt::baseopt.condPrefix;
condEnumPrefix = Opt::baseopt.condEnumPrefix;
condGoto = Opt::baseopt.condGoto;
condGotoParam = Opt::baseopt.condGotoParam;
fFlag = Opt::baseopt.fFlag;
state_get = Opt::baseopt.state_get;
state_get_naked = Opt::baseopt.state_get_naked;
state_set = Opt::baseopt.state_set;
state_set_arg = Opt::baseopt.state_set_arg;
state_set_naked = Opt::baseopt.state_set_naked;
yyfilllabel = Opt::baseopt.yyfilllabel;
yynext = Opt::baseopt.yynext;
yyaccept = Opt::baseopt.yyaccept;
bUseStateAbort = Opt::baseopt.bUseStateAbort;
bUseStateNext = Opt::baseopt.bUseStateNext;
yybm = Opt::baseopt.yybm;
yytarget = Opt::baseopt.yytarget;
input_api = Opt::baseopt.input_api;
yycursor = Opt::baseopt.yycursor;
yymarker = Opt::baseopt.yymarker;
yyctxmarker = Opt::baseopt.yyctxmarker;
yylimit = Opt::baseopt.yylimit;
yypeek = Opt::baseopt.yypeek;
yyskip = Opt::baseopt.yyskip;
yybackup = Opt::baseopt.yybackup;
yybackupctx = Opt::baseopt.yybackupctx;
yyrestore = Opt::baseopt.yyrestore;
yyrestorectx = Opt::baseopt.yyrestorectx;
yylessthan = Opt::baseopt.yylessthan;
dFlag = Opt::baseopt.dFlag;
yydebug = Opt::baseopt.yydebug;
yyctype = Opt::baseopt.yyctype;
yych = Opt::baseopt.yych;
bEmitYYCh = Opt::baseopt.bEmitYYCh;
yychConversion = Opt::baseopt.yychConversion;
fill = Opt::baseopt.fill;
fill_use = Opt::baseopt.fill_use;
fill_check = Opt::baseopt.fill_check;
fill_arg = Opt::baseopt.fill_arg;
fill_arg_use = Opt::baseopt.fill_arg_use;
fill_naked = Opt::baseopt.fill_naked;
labelPrefix = Opt::baseopt.labelPrefix;
break;
default:
break;
}
if (bCaseInsensitive)
{
bCaseInverted = Opt::baseopt.bCaseInverted;
}
// respect hierarchy
if (!cFlag)
{
tFlag = Opt::baseopt.tFlag;
header_file = Opt::baseopt.header_file;
yycondtype = Opt::baseopt.yycondtype;
cond_get = Opt::baseopt.cond_get;
cond_get_naked = Opt::baseopt.cond_get_naked;
cond_set = Opt::baseopt.cond_set;
cond_set_arg = Opt::baseopt.cond_set_arg;
cond_set_naked = Opt::baseopt.cond_set_naked;
yyctable = Opt::baseopt.yyctable;
condPrefix = Opt::baseopt.condPrefix;
condEnumPrefix = Opt::baseopt.condEnumPrefix;
condDivider = Opt::baseopt.condDivider;
condDividerParam = Opt::baseopt.condDividerParam;
condGoto = Opt::baseopt.condGoto;
condGotoParam = Opt::baseopt.condGotoParam;
}
if (!fFlag)
{
state_get = Opt::baseopt.state_get;
state_get_naked = Opt::baseopt.state_get_naked;
state_set = Opt::baseopt.state_set;
state_set_arg = Opt::baseopt.state_set_arg;
state_set_naked = Opt::baseopt.state_set_naked;
yyfilllabel = Opt::baseopt.yyfilllabel;
yynext = Opt::baseopt.yynext;
yyaccept = Opt::baseopt.yyaccept;
bUseStateAbort = Opt::baseopt.bUseStateAbort;
bUseStateNext = Opt::baseopt.bUseStateNext;
}
if (!bFlag)
{
yybmHexTable = Opt::baseopt.yybmHexTable;
yybm = Opt::baseopt.yybm;
}
if (!gFlag)
{
cGotoThreshold = Opt::baseopt.cGotoThreshold;
yytarget = Opt::baseopt.yytarget;
}
if (input_api.type () != InputAPI::DEFAULT)
{
yycursor = Opt::baseopt.yycursor;
yymarker = Opt::baseopt.yymarker;
yyctxmarker = Opt::baseopt.yyctxmarker;
yylimit = Opt::baseopt.yylimit;
}
if (input_api.type () != InputAPI::CUSTOM)
{
yypeek = Opt::baseopt.yypeek;
yyskip = Opt::baseopt.yyskip;
yybackup = Opt::baseopt.yybackup;
yybackupctx = Opt::baseopt.yybackupctx;
yyrestore = Opt::baseopt.yyrestore;
yyrestorectx = Opt::baseopt.yyrestorectx;
yylessthan = Opt::baseopt.yylessthan;
}
if (!dFlag)
{
yydebug = Opt::baseopt.yydebug;
}
if (!fill_use)
{
fill = Opt::baseopt.fill;
fill_check = Opt::baseopt.fill_check;
fill_arg = Opt::baseopt.fill_arg;
fill_arg_use = Opt::baseopt.fill_arg_use;
fill_naked = Opt::baseopt.fill_naked;
}
// force individual options
switch (target)
{
case DOT:
iFlag = true;
break;
case SKELETON:
iFlag = true;
input_api.set (InputAPI::CUSTOM);
indString = " ";
topIndent = 2;
break;
default:
break;
}
switch (encoding.type ())
{
case Enc::UCS2:
case Enc::UTF16:
case Enc::UTF32:
sFlag = true;
break;
default:
break;
}
if (bFlag)
{
sFlag = true;
}
if (gFlag)
{
bFlag = true;
sFlag = true;
}
if (header_file != NULL)
{
tFlag = true;
}
}
realopt_t::realopt_t (useropt_t & opt)
: real ()
, user (opt)
{}
const opt_t * realopt_t::operator -> ()
{
sync ();
return &real;
}
void realopt_t::sync ()
{
if (user.diverge)
{
real = user.opt;
real.fix ();
user.diverge = false;
}
}
useropt_t::useropt_t ()
: opt ()
, diverge (true)
{}
opt_t * useropt_t::operator -> ()
{
diverge = true;
return &opt;
}
const opt_t Opt::baseopt;
bool Opt::source (const char * s)
{
if (source_file)
{
error ("multiple source files: %s, %s", source_file, s);
return false;
}
else
{
source_file = s;
return true;
}
}
bool Opt::output (const char * s)
{
if (output_file)
{
error ("multiple output files: %s, %s", output_file, s);
return false;
}
else
{
output_file = s;
return true;
}
}
void Opt::reset_encoding (const Enc & enc)
{
useropt->encoding = enc;
}
void Opt::reset_mapCodeName ()
{
// historically arranged set of names
// no actual reason why these particular options should be reset
useropt->cond_get = Opt::baseopt.cond_get;
useropt->cond_set = Opt::baseopt.cond_set;
useropt->fill = Opt::baseopt.fill;
useropt->state_get = Opt::baseopt.state_get;
useropt->state_set = Opt::baseopt.state_set;
useropt->yybackup = Opt::baseopt.yybackup;
useropt->yybackupctx = Opt::baseopt.yybackupctx;
useropt->yycondtype = Opt::baseopt.yycondtype;
useropt->yyctxmarker = Opt::baseopt.yyctxmarker;
useropt->yyctype = Opt::baseopt.yyctype;
useropt->yycursor = Opt::baseopt.yycursor;
useropt->yydebug = Opt::baseopt.yydebug;
useropt->yylessthan = Opt::baseopt.yylessthan;
useropt->yylimit = Opt::baseopt.yylimit;
useropt->yymarker = Opt::baseopt.yymarker;
useropt->yypeek = Opt::baseopt.yypeek;
useropt->yyrestore = Opt::baseopt.yyrestore;
useropt->yyrestorectx = Opt::baseopt.yyrestorectx;
useropt->yyskip = Opt::baseopt.yyskip;
useropt->yyfilllabel = Opt::baseopt.yyfilllabel;
useropt->yynext = Opt::baseopt.yynext;
useropt->yyaccept = Opt::baseopt.yyaccept;
useropt->yybm = Opt::baseopt.yybm;
useropt->yych = Opt::baseopt.yych;
useropt->yyctable = Opt::baseopt.yyctable;
useropt->yytarget = Opt::baseopt.yytarget;
}
} // namespace re2c

218
tools/re2c/src/conf/opt.h Normal file
View file

@ -0,0 +1,218 @@
#ifndef _RE2C_CONF_OPT_
#define _RE2C_CONF_OPT_
#include "src/util/c99_stdint.h"
#include <stddef.h>
#include <string>
#include "src/codegen/input_api.h"
#include "src/ir/dfa/dfa.h"
#include "src/ir/regexp/empty_class_policy.h"
#include "src/ir/regexp/encoding/enc.h"
#include "src/util/forbid_copy.h"
namespace re2c
{
#define RE2C_OPTS \
/* target */ \
OPT1 (opt_t::target_t, target, CODE) \
/* fingerprint */ \
OPT (bool, bNoGenerationDate, false) \
OPT (bool, version, true) \
/* regular expressions */ \
OPT (Enc, encoding, Enc ()) \
OPT (bool, bCaseInsensitive, false) \
OPT (bool, bCaseInverted, false) \
OPT (empty_class_policy_t, empty_class_policy, EMPTY_CLASS_MATCH_EMPTY) \
/* conditions */ \
OPT (bool, cFlag, false) \
OPT (bool, tFlag, false) \
OPT (const char *, header_file, NULL) \
OPT (std::string, yycondtype, "YYCONDTYPE") \
OPT (std::string, cond_get, "YYGETCONDITION") \
OPT (bool, cond_get_naked, false) \
OPT (std::string, cond_set, "YYSETCONDITION" ) \
OPT (std::string, cond_set_arg, "@@" ) \
OPT (bool, cond_set_naked, false ) \
OPT (std::string, yyctable, "yyctable") \
OPT (std::string, condPrefix, "yyc_") \
OPT (std::string, condEnumPrefix, "yyc") \
OPT (std::string, condDivider, "/* *********************************** */") \
OPT (std::string, condDividerParam, "@@") \
OPT (std::string, condGoto, "goto @@;") \
OPT (std::string, condGotoParam, "@@") \
/* states */ \
OPT (bool, fFlag, false) \
OPT (std::string, state_get, "YYGETSTATE") \
OPT (bool, state_get_naked, false) \
OPT (std::string, state_set, "YYSETSTATE") \
OPT (std::string, state_set_arg, "@@") \
OPT (bool, state_set_naked, false) \
OPT (std::string, yyfilllabel, "yyFillLabel") \
OPT (std::string, yynext, "yyNext") \
OPT (std::string, yyaccept, "yyaccept") \
OPT (bool, bUseStateAbort, false) \
OPT (bool, bUseStateNext, false) \
/* reuse */ \
OPT (bool, rFlag, false) \
/* partial flex syntax support */ \
OPT (bool, FFlag, false) \
/* code generation */ \
OPT (bool, sFlag, false) \
OPT (bool, bFlag, false) \
OPT (std::string, yybm, "yybm") \
OPT (bool, yybmHexTable, false) \
OPT (bool, gFlag, false) \
OPT (std::string, yytarget, "yytarget") \
OPT (uint32_t, cGotoThreshold, 9) \
/* formatting */ \
OPT (uint32_t, topIndent, 0) \
OPT (std::string, indString, "\t") \
/* input API */ \
OPT (InputAPI, input_api, InputAPI ()) \
OPT (std::string, yycursor, "YYCURSOR") \
OPT (std::string, yymarker, "YYMARKER") \
OPT (std::string, yyctxmarker, "YYCTXMARKER") \
OPT (std::string, yylimit, "YYLIMIT") \
OPT (std::string, yypeek, "YYPEEK") \
OPT (std::string, yyskip, "YYSKIP") \
OPT (std::string, yybackup, "YYBACKUP") \
OPT (std::string, yybackupctx, "YYBACKUPCTX") \
OPT (std::string, yyrestore, "YYRESTORE") \
OPT (std::string, yyrestorectx, "YYRESTORECTX") \
OPT (std::string, yylessthan, "YYLESSTHAN") \
/* #line directives */ \
OPT (bool, iFlag, false) \
/* debug */ \
OPT (bool, dFlag, false) \
OPT (std::string, yydebug, "YYDEBUG") \
/* yych */ \
OPT (std::string, yyctype, "YYCTYPE") \
OPT (std::string, yych, "yych") \
OPT (bool, bEmitYYCh, true) \
OPT (bool, yychConversion, false) \
/* YYFILL */ \
OPT (std::string, fill, "YYFILL") \
OPT (bool, fill_use, true) \
OPT (bool, fill_check, true) \
OPT (std::string, fill_arg, "@@") \
OPT (bool, fill_arg_use, true) \
OPT (bool, fill_naked, false) \
/* labels */ \
OPT (std::string, labelPrefix, "yy") \
/* internals */ \
OPT (dfa_minimization_t, dfa_minimization, DFA_MINIMIZATION_MOORE)
struct opt_t
{
enum target_t
{
CODE,
DOT,
SKELETON
};
#define OPT1 OPT
#define OPT(type, name, value) type name;
RE2C_OPTS
#undef OPT1
#undef OPT
opt_t ();
opt_t (const opt_t & opt);
opt_t & operator = (const opt_t & opt);
void fix ();
};
class useropt_t;
class realopt_t
{
opt_t real;
useropt_t & user;
public:
realopt_t (useropt_t & opt);
const opt_t * operator -> ();
void sync ();
};
class useropt_t
{
opt_t opt;
bool diverge;
public:
useropt_t ();
opt_t * operator -> ();
friend void realopt_t::sync ();
};
struct Opt
{
static const opt_t baseopt;
const char * source_file;
const char * output_file;
private:
useropt_t useropt;
realopt_t realopt;
public:
Opt ()
: source_file (NULL)
, output_file (NULL)
, useropt ()
, realopt (useropt)
{}
// read-only access, forces options syncronization
const opt_t * operator -> ()
{
return realopt.operator -> ();
}
bool source (const char * s);
bool output (const char * s);
// Inplace configurations are applied immediately when parsed.
// This is very bad: first, re2c behaviour is changed in the middle
// of the block; second, config is resynced too often (every
// attempt to read config that has been updated results in
// automatic resync). It is much better to set all options at once.
bool set_encoding (Enc::type_t t) { return useropt->encoding.set (t); }
void unset_encoding (Enc::type_t t) { useropt->encoding.unset (t); }
void set_encoding_policy (Enc::policy_t p) { useropt->encoding.setPolicy (p); }
void set_input_api (InputAPI::type_t t) { useropt->input_api.set (t); }
#define OPT1 OPT
#define OPT(type, name, value) void set_##name (type arg) { useropt->name = arg; }
RE2C_OPTS
#undef OPT1
#undef OPT
// helpers
std::string yychConversion ()
{
return realopt->yychConversion
? "(" + realopt->yyctype + ")"
: "";
}
// bad temporary hacks, should be fixed by proper scoping of config (parts).
void reset_encoding (const Enc & enc);
void reset_mapCodeName ();
FORBID_COPY (Opt);
};
enum parse_opts_t
{
OK,
EXIT_OK,
EXIT_FAIL
};
parse_opts_t parse_opts (char ** argv, Opt & opts);
} // namespace re2c
#endif // _RE2C_CONF_OPT_

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,261 @@
#include "src/codegen/input_api.h"
#include "src/conf/msg.h"
#include "src/conf/opt.h"
#include "src/conf/warn.h"
#include "src/globals.h"
#include "src/ir/regexp/empty_class_policy.h"
#include "src/ir/regexp/encoding/enc.h"
namespace re2c
{
static inline bool next (char * & arg, char ** & argv)
{
arg = *++argv;
return arg != NULL;
}
parse_opts_t parse_opts (char ** argv, Opt & opts)
{
#define YYCTYPE unsigned char
char * YYCURSOR;
char * YYMARKER;
Warn::option_t option;
/*!re2c
re2c:yyfill:enable = 0;
re2c:yych:conversion = 1;
end = "\x00";
filename = [^\x00-] [^\x00]*;
*/
opt:
if (!next (YYCURSOR, argv))
{
goto end;
}
/*!re2c
*
{
error ("bad option: %s", *argv);
return EXIT_FAIL;
}
"--" end
{
// all remaining arguments are non-options
// so they must be input files
// re2c expects exactly one input file
for (char * f; next (f, argv);)
{
if (!opts.source (f))
{
return EXIT_FAIL;
}
}
goto end;
}
"-" end { if (!opts.source ("<stdin>")) return EXIT_FAIL; goto opt; }
filename end { if (!opts.source (*argv)) return EXIT_FAIL; goto opt; }
"-" { goto opt_short; }
"--" { goto opt_long; }
"-W" end { warn.set_all (); goto opt; }
"-Werror" end { warn.set_all_error (); goto opt; }
"-W" { option = Warn::W; goto opt_warn; }
"-Wno-" { option = Warn::WNO; goto opt_warn; }
"-Werror-" { option = Warn::WERROR; goto opt_warn; }
"-Wno-error-" { option = Warn::WNOERROR; goto opt_warn; }
*/
opt_warn:
/*!re2c
*
{
error ("bad warning: %s", *argv);
return EXIT_FAIL;
}
"condition-order" end { warn.set (Warn::CONDITION_ORDER, option); goto opt; }
"empty-character-class" end { warn.set (Warn::EMPTY_CHARACTER_CLASS, option); goto opt; }
"match-empty-string" end { warn.set (Warn::MATCH_EMPTY_STRING, option); goto opt; }
"swapped-range" end { warn.set (Warn::SWAPPED_RANGE, option); goto opt; }
"undefined-control-flow" end { warn.set (Warn::UNDEFINED_CONTROL_FLOW, option); goto opt; }
"unreachable-rules" end { warn.set (Warn::UNREACHABLE_RULES, option); goto opt; }
"useless-escape" end { warn.set (Warn::USELESS_ESCAPE, option); goto opt; }
*/
opt_short:
/*!re2c
*
{
error ("bad short option: %s", *argv);
return EXIT_FAIL;
}
end { goto opt; }
[?h] { usage (); return EXIT_OK; }
"v" { version (); return EXIT_OK; }
"V" { vernum (); return EXIT_OK; }
"b" { opts.set_bFlag (true); goto opt_short; }
"c" { opts.set_cFlag (true); goto opt_short; }
"d" { opts.set_dFlag (true); goto opt_short; }
"D" { opts.set_target (opt_t::DOT); goto opt_short; }
"f" { opts.set_fFlag (true); goto opt_short; }
"F" { opts.set_FFlag (true); goto opt_short; }
"g" { opts.set_gFlag (true); goto opt_short; }
"i" { opts.set_iFlag (true); goto opt_short; }
"r" { opts.set_rFlag (true); goto opt_short; }
"s" { opts.set_sFlag (true); goto opt_short; }
"S" { opts.set_target (opt_t::SKELETON); goto opt_short; }
"e" { if (!opts.set_encoding (Enc::EBCDIC)) { error_encoding (); return EXIT_FAIL; } goto opt_short; }
"u" { if (!opts.set_encoding (Enc::UTF32)) { error_encoding (); return EXIT_FAIL; } goto opt_short; }
"w" { if (!opts.set_encoding (Enc::UCS2)) { error_encoding (); return EXIT_FAIL; } goto opt_short; }
"x" { if (!opts.set_encoding (Enc::UTF16)) { error_encoding (); return EXIT_FAIL; } goto opt_short; }
"8" { if (!opts.set_encoding (Enc::UTF8)) { error_encoding (); return EXIT_FAIL; } goto opt_short; }
"o" end { if (!next (YYCURSOR, argv)) { error_arg ("-o, --output"); return EXIT_FAIL; } goto opt_output; }
"o" { *argv = YYCURSOR; goto opt_output; }
"t" end { if (!next (YYCURSOR, argv)) { error_arg ("-t, --type-header"); return EXIT_FAIL; } goto opt_header; }
"t" { *argv = YYCURSOR; goto opt_header; }
"1" { goto opt_short; } // deprecated
*/
opt_long:
/*!re2c
*
{
error ("bad long option: %s", *argv);
return EXIT_FAIL;
}
"help" end { usage (); return EXIT_OK; }
"version" end { version (); return EXIT_OK; }
"vernum" end { vernum (); return EXIT_OK; }
"bit-vectors" end { opts.set_bFlag (true); goto opt; }
"start-conditions" end { opts.set_cFlag (true); goto opt; }
"debug-output" end { opts.set_dFlag (true); goto opt; }
"emit-dot" end { opts.set_target (opt_t::DOT); goto opt; }
"storable-state" end { opts.set_fFlag (true); goto opt; }
"flex-syntax" end { opts.set_FFlag (true); goto opt; }
"computed-gotos" end { opts.set_gFlag (true); goto opt; }
"no-debug-info" end { opts.set_iFlag (true); goto opt; }
"reusable" end { opts.set_rFlag (true); goto opt; }
"nested-ifs" end { opts.set_sFlag (true); goto opt; }
"no-generation-date" end { opts.set_bNoGenerationDate (true); goto opt; }
"no-version" end { opts.set_version (false); goto opt; }
"case-insensitive" end { opts.set_bCaseInsensitive (true); goto opt; }
"case-inverted" end { opts.set_bCaseInverted (true); goto opt; }
"skeleton" end { opts.set_target (opt_t::SKELETON); goto opt; }
"ecb" end { if (!opts.set_encoding (Enc::EBCDIC)) { error_encoding (); return EXIT_FAIL; } goto opt; }
"unicode" end { if (!opts.set_encoding (Enc::UTF32)) { error_encoding (); return EXIT_FAIL; } goto opt; }
"wide-chars" end { if (!opts.set_encoding (Enc::UCS2)) { error_encoding (); return EXIT_FAIL; } goto opt; }
"utf-16" end { if (!opts.set_encoding (Enc::UTF16)) { error_encoding (); return EXIT_FAIL; } goto opt; }
"utf-8" end { if (!opts.set_encoding (Enc::UTF8)) { error_encoding (); return EXIT_FAIL; } goto opt; }
"output" end { if (!next (YYCURSOR, argv)) { error_arg ("-o, --output"); return EXIT_FAIL; } goto opt_output; }
"type-header" end { if (!next (YYCURSOR, argv)) { error_arg ("-t, --type-header"); return EXIT_FAIL; } goto opt_header; }
"encoding-policy" end { goto opt_encoding_policy; }
"input" end { goto opt_input; }
"empty-class" end { goto opt_empty_class; }
"dfa-minimization" end { goto opt_dfa_minimization; }
"single-pass" end { goto opt; } // deprecated
*/
opt_output:
/*!re2c
*
{
error ("bad argument to option -o, --output: %s", *argv);
return EXIT_FAIL;
}
filename end { if (!opts.output (*argv)) return EXIT_FAIL; goto opt; }
*/
opt_header:
/*!re2c
*
{
error ("bad argument to option -t, --type-header: %s", *argv);
return EXIT_FAIL;
}
filename end { opts.set_header_file (*argv); goto opt; }
*/
opt_encoding_policy:
if (!next (YYCURSOR, argv))
{
error_arg ("--encoding-policy");
return EXIT_FAIL;
}
/*!re2c
*
{
error ("bad argument to option --encoding-policy (expected: ignore | substitute | fail): %s", *argv);
return EXIT_FAIL;
}
"ignore" end { opts.set_encoding_policy (Enc::POLICY_IGNORE); goto opt; }
"substitute" end { opts.set_encoding_policy (Enc::POLICY_SUBSTITUTE); goto opt; }
"fail" end { opts.set_encoding_policy (Enc::POLICY_FAIL); goto opt; }
*/
opt_input:
if (!next (YYCURSOR, argv))
{
error_arg ("--input");
return EXIT_FAIL;
}
/*!re2c
*
{
error ("bad argument to option --input (expected: default | custom): %s", *argv);
return EXIT_FAIL;
}
"default" end { opts.set_input_api (InputAPI::DEFAULT); goto opt; }
"custom" end { opts.set_input_api (InputAPI::CUSTOM); goto opt; }
*/
opt_empty_class:
if (!next (YYCURSOR, argv))
{
error_arg ("--empty-class");
return EXIT_FAIL;
}
/*!re2c
*
{
error ("bad argument to option --empty-class (expected: match-empty | match-none | error): %s", *argv);
return EXIT_FAIL;
}
"match-empty" end { opts.set_empty_class_policy (EMPTY_CLASS_MATCH_EMPTY); goto opt; }
"match-none" end { opts.set_empty_class_policy (EMPTY_CLASS_MATCH_NONE); goto opt; }
"error" end { opts.set_empty_class_policy (EMPTY_CLASS_ERROR); goto opt; }
*/
opt_dfa_minimization:
if (!next (YYCURSOR, argv))
{
error_arg ("--minimization");
return EXIT_FAIL;
}
/*!re2c
*
{
error ("bad argument to option --dfa-minimization (expected: table | moore): %s", *argv);
return EXIT_FAIL;
}
"table" end { opts.set_dfa_minimization (DFA_MINIMIZATION_TABLE); goto opt; }
"moore" end { opts.set_dfa_minimization (DFA_MINIMIZATION_MOORE); goto opt; }
*/
end:
if (!opts.source_file)
{
error ("no source file");
return EXIT_FAIL;
}
return OK;
#undef YYCTYPE
}
} // namespace re2c

200
tools/re2c/src/conf/warn.cc Normal file
View file

@ -0,0 +1,200 @@
#include <stddef.h>
#include <stdio.h>
#include <algorithm>
#include <utility>
#include "src/conf/msg.h"
#include "src/conf/warn.h"
namespace re2c {
Warn warn;
const uint32_t Warn::SILENT = 0;
const uint32_t Warn::WARNING = 1u << 0;
const uint32_t Warn::ERROR = 1u << 1;
const char * Warn::names [TYPES] =
{
#define W(x, y) y
RE2C_WARNING_TYPES
#undef W
};
Warn::Warn ()
: mask ()
, error_accuml (false)
{
for (uint32_t i = 0; i < TYPES; ++i)
{
mask[i] = SILENT;
}
}
bool Warn::error () const
{
return error_accuml;
}
void Warn::set (type_t t, option_t o)
{
switch (o)
{
case W:
mask[t] |= WARNING;
break;
case WNO:
mask[t] &= ~WARNING;
break;
case WERROR:
// unlike -Werror, -Werror-<warning> implies -W<warning>
mask[t] |= (WARNING | ERROR);
break;
case WNOERROR:
mask[t] &= ~ERROR;
break;
}
}
void Warn::set_all ()
{
for (uint32_t i = 0; i < TYPES; ++i)
{
mask[i] |= WARNING;
}
}
// -Werror doesn't set any warnings: it only guarantees that if a warning
// has been set by now or will be set later then it will result into error.
void Warn::set_all_error ()
{
for (uint32_t i = 0; i < TYPES; ++i)
{
mask[i] |= ERROR;
}
}
void Warn::fail (type_t t, uint32_t line, const char * s)
{
if (mask[t] & WARNING)
{
// -Werror has no effect
warning (names[t], line, false, "%s", s);
}
}
void Warn::condition_order (uint32_t line)
{
if (mask[CONDITION_ORDER] & WARNING)
{
const bool e = mask[CONDITION_ORDER] & ERROR;
error_accuml |= e;
warning (names[CONDITION_ORDER], line, e,
"looks like you use hardcoded numbers instead of autogenerated condition names: "
"better add '/*!types:re2c*/' directive or '-t, --type-header' option "
"and don't rely on fixed condition order.");
}
}
void Warn::empty_class (uint32_t line)
{
if (mask[EMPTY_CHARACTER_CLASS] & WARNING)
{
const bool e = mask[EMPTY_CHARACTER_CLASS] & ERROR;
error_accuml |= e;
warning (names[EMPTY_CHARACTER_CLASS], line, e, "empty character class");
}
}
void Warn::match_empty_string (uint32_t line)
{
if (mask[MATCH_EMPTY_STRING] & WARNING)
{
const bool e = mask[MATCH_EMPTY_STRING] & ERROR;
error_accuml |= e;
warning (names[MATCH_EMPTY_STRING], line, e, "rule matches empty string");
}
}
void Warn::swapped_range (uint32_t line, uint32_t l, uint32_t u)
{
if (mask[SWAPPED_RANGE] & WARNING)
{
const bool e = mask[SWAPPED_RANGE] & ERROR;
error_accuml |= e;
warning (names[SWAPPED_RANGE], line, e, "range lower bound (0x%X) is greater than upper bound (0x%X), swapping", l, u);
}
}
void Warn::undefined_control_flow (uint32_t line, const std::string & cond, std::vector<way_t> & ways, bool overflow)
{
if (mask[UNDEFINED_CONTROL_FLOW] & WARNING)
{
const bool e = mask[UNDEFINED_CONTROL_FLOW] & ERROR;
error_accuml |= e;
// report shorter patterns first
std::sort (ways.begin (), ways.end (), cmp_ways);
warning_start (line, e);
fprintf (stderr, "control flow %sis undefined for strings that match ", incond (cond).c_str ());
const size_t count = ways.size ();
if (count == 1)
{
fprint_way (stderr, ways[0]);
}
else
{
for (size_t i = 0; i < count; ++i)
{
fprintf (stderr, "\n\t");
fprint_way (stderr, ways[i]);
}
fprintf (stderr, "\n");
}
if (overflow)
{
fprintf (stderr, " ... and a few more");
}
fprintf (stderr, ", use default rule '*'");
warning_end (names[UNDEFINED_CONTROL_FLOW], e);
}
}
void Warn::unreachable_rule (const std::string & cond, const rule_info_t & rule, const rules_t & rules)
{
if (mask[UNREACHABLE_RULES] & WARNING)
{
const bool e = mask[UNREACHABLE_RULES] & ERROR;
error_accuml |= e;
warning_start (rule.line, e);
fprintf (stderr, "unreachable rule %s", incond (cond).c_str ());
const size_t shadows = rule.shadow.size ();
if (shadows > 0)
{
const char * pl = shadows > 1
? "s"
: "";
std::set<rule_rank_t>::const_iterator i = rule.shadow.begin ();
fprintf (stderr, "(shadowed by rule%s at line%s %u", pl, pl, rules.find (*i)->second.line);
for (++i; i != rule.shadow.end (); ++i)
{
fprintf (stderr, ", %u", rules.find (*i)->second.line);
}
fprintf (stderr, ")");
}
warning_end (names[UNREACHABLE_RULES], e);
}
}
void Warn::useless_escape (uint32_t line, uint32_t col, char c)
{
if (mask[USELESS_ESCAPE] & WARNING)
{
const bool e = mask[USELESS_ESCAPE] & ERROR;
error_accuml |= e;
warning (names[USELESS_ESCAPE], line, e, "column %u: escape has no effect: '\\%c'", col, c);
}
}
} // namespace re2c

View file

@ -0,0 +1,67 @@
#ifndef _RE2C_CONF_WARN_
#define _RE2C_CONF_WARN_
#include "src/util/c99_stdint.h"
#include <string>
#include <vector>
#include "src/ir/skeleton/way.h"
#include "src/parse/rules.h"
namespace re2c {
#define RE2C_WARNING_TYPES \
W (CONDITION_ORDER, "condition-order"), \
W (EMPTY_CHARACTER_CLASS, "empty-character-class"), \
W (MATCH_EMPTY_STRING, "match-empty-string"), \
W (SWAPPED_RANGE, "swapped-range"), \
W (UNDEFINED_CONTROL_FLOW, "undefined-control-flow"), \
W (UNREACHABLE_RULES, "unreachable-rules"), \
W (USELESS_ESCAPE, "useless-escape"),
class Warn
{
public:
enum type_t
{
#define W(x, y) x
RE2C_WARNING_TYPES
#undef W
TYPES // count
};
enum option_t
{
W,
WNO,
WERROR,
WNOERROR
};
private:
static const uint32_t SILENT;
static const uint32_t WARNING;
static const uint32_t ERROR;
static const char * names [TYPES];
uint32_t mask[TYPES];
bool error_accuml;
public:
Warn ();
bool error () const;
void set (type_t t, option_t o);
void set_all ();
void set_all_error ();
void fail (type_t t, uint32_t line, const char * s);
void condition_order (uint32_t line);
void empty_class (uint32_t line);
void match_empty_string (uint32_t line);
void swapped_range (uint32_t line, uint32_t l, uint32_t u);
void undefined_control_flow (uint32_t line, const std::string & cond, std::vector<way_t> & ways, bool overflow);
void unreachable_rule (const std::string & cond, const rule_info_t & rule, const rules_t & rules);
void useless_escape (uint32_t line, uint32_t col, char c);
};
} // namespace re2c
#endif // _RE2C_CONF_WARN_

24
tools/re2c/src/globals.h Normal file
View file

@ -0,0 +1,24 @@
#ifndef _RE2C_GLOBALS_
#define _RE2C_GLOBALS_
#include <string>
#include "src/conf/opt.h"
#include "src/conf/warn.h"
#include "src/util/c99_stdint.h"
namespace re2c
{
extern bool bUsedYYBitmap;
extern bool bWroteGetState;
extern bool bWroteCondCheck;
extern uint32_t last_fill_index;
extern std::string yySetupRule;
extern Opt opts;
extern Warn warn;
} // end namespace re2c
#endif // _RE2C_GLOBALS_

View file

@ -0,0 +1,109 @@
#ifndef _RE2C_IR_ADFA_ACTION_
#define _RE2C_IR_ADFA_ACTION_
#include <vector>
#include "src/codegen/label.h"
#include "src/util/c99_stdint.h"
#include "src/util/uniq_vector.h"
namespace re2c
{
struct OutputFile;
class RuleOp;
class State;
struct Initial
{
label_t label;
bool setMarker;
inline Initial (label_t l, bool b)
: label (l)
, setMarker (b)
{}
};
typedef uniq_vector_t<const State *> accept_t;
class Action
{
public:
enum type_t
{
MATCH,
INITIAL,
SAVE,
MOVE,
ACCEPT,
RULE
} type;
union
{
Initial * initial;
uint32_t save;
const accept_t * accepts;
const RuleOp * rule;
} info;
public:
inline Action ()
: type (MATCH)
, info ()
{}
~Action ()
{
clear ();
}
void set_initial (label_t label, bool used_marker)
{
clear ();
type = INITIAL;
info.initial = new Initial (label, used_marker);
}
void set_save (uint32_t save)
{
clear ();
type = SAVE;
info.save = save;
}
void set_move ()
{
clear ();
type = MOVE;
}
void set_accept (const accept_t * accepts)
{
clear ();
type = ACCEPT;
info.accepts = accepts;
}
void set_rule (const RuleOp * const rule)
{
clear ();
type = RULE;
info.rule = rule;
}
private:
void clear ()
{
switch (type)
{
case INITIAL:
delete info.initial;
break;
case MATCH:
case SAVE:
case MOVE:
case ACCEPT:
case RULE:
break;
}
}
};
} // namespace re2c
#endif // _RE2C_IR_ADFA_ACTION_

View file

@ -0,0 +1,135 @@
#include <assert.h>
#include <queue>
#include <set>
#include <vector>
#include <utility>
#include "src/codegen/go.h"
#include "src/ir/adfa/adfa.h"
#include "src/ir/dfa/dfa.h"
#include "src/ir/skeleton/skeleton.h"
#include "src/util/allocate.h"
namespace re2c
{
DFA::DFA
( const dfa_t &dfa
, const std::vector<size_t> &fill
, Skeleton *skel
, const charset_t &charset
, const std::string &n
, const std::string &c
, uint32_t l
)
: accepts ()
, skeleton (skel)
, name (n)
, cond (c)
, line (l)
, lbChar(0)
, ubChar(charset.back())
, nStates(0)
, head(NULL)
// statistics
, max_fill (0)
, need_backup (false)
, need_backupctx (false)
, need_accept (false)
{
const size_t nstates = dfa.states.size();
const size_t nchars = dfa.nchars;
State **i2s = new State*[nstates];
for (size_t i = 0; i < nstates; ++i)
{
i2s[i] = new State;
}
State **p = &head;
for (size_t i = 0; i < nstates; ++i)
{
dfa_state_t *t = dfa.states[i];
State *s = i2s[i];
++nStates;
*p = s;
p = &s->next;
s->isPreCtxt = t->ctx;
s->rule = t->rule;
s->fill = fill[i];
s->go.span = allocate<Span>(nchars);
uint32_t j = 0;
for (uint32_t c = 0; c < nchars; ++j)
{
const size_t to = t->arcs[c];
for (;++c < nchars && t->arcs[c] == to;);
s->go.span[j].to = to == dfa_t::NIL ? NULL : i2s[to];
s->go.span[j].ub = charset[c];
}
s->go.nSpans = j;
}
*p = NULL;
delete[] i2s;
}
DFA::~DFA()
{
State *s;
while ((s = head))
{
head = s->next;
delete s;
}
delete skeleton;
}
void DFA::reorder()
{
std::vector<State*> ord;
ord.reserve(nStates);
std::queue<State*> todo;
todo.push(head);
std::set<State*> done;
done.insert(head);
for(;!todo.empty();)
{
State *s = todo.front();
todo.pop();
ord.push_back(s);
for(uint32_t i = 0; i < s->go.nSpans; ++i)
{
State *q = s->go.span[i].to;
if(q && done.insert(q).second)
{
todo.push(q);
}
}
}
assert(nStates == ord.size());
ord.push_back(NULL);
for(uint32_t i = 0; i < nStates; ++i)
{
ord[i]->next = ord[i + 1];
}
}
void DFA::addState(State *s, State *next)
{
++nStates;
s->next = next->next;
next->next = s;
}
} // namespace re2c

View file

@ -0,0 +1,101 @@
#ifndef _RE2C_IR_ADFA_ADFA_
#define _RE2C_IR_ADFA_ADFA_
#include <stddef.h>
#include "src/util/c99_stdint.h"
#include <set>
#include <string>
#include "src/codegen/go.h"
#include "src/codegen/label.h"
#include "src/ir/adfa/action.h"
#include "src/ir/regexp/regexp.h"
#include "src/util/forbid_copy.h"
namespace re2c
{
struct Skeleton;
struct Output;
struct OutputFile;
struct dfa_t;
struct State
{
label_t label;
RuleOp * rule;
State * next;
size_t fill;
bool isPreCtxt;
bool isBase;
Go go;
Action action;
State ()
: label (label_t::first ())
, rule (NULL)
, next (0)
, fill (0)
, isPreCtxt (false)
, isBase (false)
, go ()
, action ()
{}
~State ()
{
operator delete (go.span);
}
FORBID_COPY (State);
};
class DFA
{
accept_t accepts;
Skeleton * skeleton;
public:
const std::string name;
const std::string cond;
const uint32_t line;
uint32_t lbChar;
uint32_t ubChar;
uint32_t nStates;
State * head;
// statistics
size_t max_fill;
bool need_backup;
bool need_backupctx;
bool need_accept;
public:
DFA ( const dfa_t &dfa
, const std::vector<size_t> &fill
, Skeleton *skel
, const charset_t &charset
, const std::string &n
, const std::string &c
, uint32_t l
);
~DFA ();
void reorder();
void prepare();
void calc_stats();
void emit (Output &, uint32_t &, bool, bool &);
private:
void addState(State*, State *);
void split (State *);
void findBaseState ();
void count_used_labels (std::set<label_t> & used, label_t prolog, label_t start, bool force_start) const;
void emit_body (OutputFile &, uint32_t &, const std::set<label_t> & used_labels, label_t initial) const;
FORBID_COPY (DFA);
};
} // namespace re2c
#endif // _RE2C_IR_ADFA_ADFA_

View file

@ -0,0 +1,268 @@
#include "src/util/c99_stdint.h"
#include <string.h>
#include <map>
#include "src/codegen/bitmap.h"
#include "src/codegen/go.h"
#include "src/globals.h"
#include "src/ir/adfa/action.h"
#include "src/ir/adfa/adfa.h"
#include "src/ir/regexp/regexp_rule.h"
#include "src/ir/rule_rank.h"
#include "src/util/allocate.h"
namespace re2c {
void DFA::split(State *s)
{
State *move = new State;
addState(move, s);
move->action.set_move ();
move->rule = s->rule;
move->fill = s->fill;
move->go = s->go;
s->rule = NULL;
s->go.nSpans = 1;
s->go.span = allocate<Span> (1);
s->go.span[0].ub = ubChar;
s->go.span[0].to = move;
}
static uint32_t merge(Span *x0, State *fg, State *bg)
{
Span *x = x0, *f = fg->go.span, *b = bg->go.span;
uint32_t nf = fg->go.nSpans, nb = bg->go.nSpans;
State *prev = NULL, *to;
// NB: we assume both spans are for same range
for (;;)
{
if (f->ub == b->ub)
{
to = f->to == b->to ? bg : f->to;
if (to == prev)
{
--x;
}
else
{
x->to = prev = to;
}
x->ub = f->ub;
++x;
++f;
--nf;
++b;
--nb;
if (nf == 0 && nb == 0)
{
return static_cast<uint32_t> (x - x0);
}
}
while (f->ub < b->ub)
{
to = f->to == b->to ? bg : f->to;
if (to == prev)
{
--x;
}
else
{
x->to = prev = to;
}
x->ub = f->ub;
++x;
++f;
--nf;
}
while (b->ub < f->ub)
{
to = b->to == f->to ? bg : f->to;
if (to == prev)
{
--x;
}
else
{
x->to = prev = to;
}
x->ub = b->ub;
++x;
++b;
--nb;
}
}
}
void DFA::findBaseState()
{
Span *span = allocate<Span> (ubChar - lbChar);
for (State *s = head; s; s = s->next)
{
if (s->fill == 0)
{
for (uint32_t i = 0; i < s->go.nSpans; ++i)
{
State *to = s->go.span[i].to;
if (to->isBase)
{
to = to->go.span[0].to;
uint32_t nSpans = merge(span, s, to);
if (nSpans < s->go.nSpans)
{
operator delete (s->go.span);
s->go.nSpans = nSpans;
s->go.span = allocate<Span> (nSpans);
memcpy(s->go.span, span, nSpans*sizeof(Span));
}
break;
}
}
}
}
operator delete (span);
}
void DFA::prepare ()
{
bUsedYYBitmap = false;
// create rule states
std::map<rule_rank_t, State *> rules;
for (State * s = head; s; s = s->next)
{
if (s->rule)
{
if (rules.find (s->rule->rank) == rules.end ())
{
State *n = new State;
n->action.set_rule (s->rule);
rules[s->rule->rank] = n;
addState(n, s);
}
for (uint32_t i = 0; i < s->go.nSpans; ++i)
{
if (!s->go.span[i].to)
{
s->go.span[i].to = rules[s->rule->rank];
}
}
}
}
// create default state (if needed)
State * default_state = NULL;
for (State * s = head; s; s = s->next)
{
for (uint32_t i = 0; i < s->go.nSpans; ++i)
{
if (!s->go.span[i].to)
{
if (!default_state)
{
default_state = new State;
addState(default_state, s);
}
s->go.span[i].to = default_state;
}
}
}
// find backup states and create accept state (if needed)
if (default_state)
{
for (State * s = head; s; s = s->next)
{
if (s->rule)
{
for (uint32_t i = 0; i < s->go.nSpans; ++i)
{
if (!s->go.span[i].to->rule && s->go.span[i].to->action.type != Action::RULE)
{
const uint32_t accept = static_cast<uint32_t> (accepts.find_or_add (rules[s->rule->rank]));
s->action.set_save (accept);
}
}
}
}
default_state->action.set_accept (&accepts);
}
// split ``base'' states into two parts
for (State * s = head; s; s = s->next)
{
s->isBase = false;
if (s->fill != 0)
{
for (uint32_t i = 0; i < s->go.nSpans; ++i)
{
if (s->go.span[i].to == s)
{
s->isBase = true;
split(s);
if (opts->bFlag)
{
BitMap::find(&s->next->go, s);
}
s = s->next;
break;
}
}
}
}
// find ``base'' state, if possible
findBaseState();
for (State * s = head; s; s = s->next)
{
s->go.init (s);
}
}
void DFA::calc_stats ()
{
// calculate 'YYMAXFILL'
max_fill = 0;
for (State * s = head; s; s = s->next)
{
if (max_fill < s->fill)
{
max_fill = s->fill;
}
}
// determine if 'YYMARKER' or 'YYBACKUP'/'YYRESTORE' pair is used
need_backup = accepts.size () > 0;
// determine if 'YYCTXMARKER' or 'YYBACKUPCTX'/'YYRESTORECTX' pair is used
for (State * s = head; s; s = s->next)
{
if (s->isPreCtxt)
{
need_backupctx = true;
}
}
// determine if 'yyaccept' variable is used
need_accept = accepts.size () > 1;
}
} // namespace re2c

View file

@ -0,0 +1,104 @@
#include <algorithm>
#include <ostream>
#include <set>
#include "src/codegen/output.h"
#include "src/ir/compile.h"
#include "src/ir/adfa/adfa.h"
#include "src/ir/dfa/dfa.h"
#include "src/ir/nfa/nfa.h"
#include "src/ir/regexp/regexp.h"
#include "src/ir/skeleton/skeleton.h"
#include "src/parse/spec.h"
namespace re2c {
static std::string make_name(const std::string &cond, uint32_t line)
{
std::ostringstream os;
os << "line" << line;
std::string name = os.str();
if (!cond.empty ())
{
name += "_";
name += cond;
}
return name;
}
smart_ptr<DFA> compile (Spec & spec, Output & output, const std::string & cond, uint32_t cunits)
{
const uint32_t line = output.source.get_block_line();
const std::string name = make_name(cond, line);
// The original set of code units (charset) might be very large.
// A common trick it is to split charset into disjoint character ranges
// and choose a representative of each range (we choose lower bound).
// The set of all representatives is the new (compacted) charset.
// Don't forget to include zero and upper bound, even if they
// do not explicitely apper in ranges.
std::set<uint32_t> bounds;
spec.re->split(bounds);
bounds.insert(0);
bounds.insert(cunits);
charset_t cs;
for (std::set<uint32_t>::const_iterator i = bounds.begin(); i != bounds.end(); ++i)
{
cs.push_back(*i);
}
nfa_t nfa(spec.re);
dfa_t dfa(nfa, cs, spec.rules);
// skeleton must be constructed after DFA construction
// but prior to any other DFA transformations
Skeleton *skeleton = new Skeleton(dfa, cs, spec.rules, name, cond, line);
minimization(dfa);
// find YYFILL states and calculate argument to YYFILL
std::vector<size_t> fill;
fillpoints(dfa, fill);
// ADFA stands for 'DFA with actions'
DFA *adfa = new DFA(dfa, fill, skeleton, cs, name, cond, line);
/*
* note [reordering DFA states]
*
* re2c-generated code depends on the order of states in DFA: simply
* flipping two states may change the output significantly.
* The order of states is affected by many factors, e.g.:
* - flipping left and right subtrees of alternative when constructing
* AST (also applies to iteration and counted repetition)
* - changing the order in which graph nodes are visited (applies to
* any intermediate representation: bytecode, NFA, DFA, etc.)
*
* To make the resulting code independent of such changes, we hereby
* reorder DFA states. The ordering scheme is very simple:
*
* Starting with DFA root, walk DFA nodes in breadth-first order.
* Child nodes are ordered accoding to the (alphabetically) first symbol
* leading to each node. Each node must be visited exactly once.
* Default state (NULL) is always the last state.
*/
adfa->reorder();
// skeleton is constructed, do further DFA transformations
adfa->prepare();
// finally gather overall DFA statistics
adfa->calc_stats();
// accumulate global statistics from this particular DFA
output.max_fill = std::max (output.max_fill, adfa->max_fill);
if (adfa->need_accept)
{
output.source.set_used_yyaccept ();
}
return make_smart_ptr(adfa);
}
} // namespace re2c

View file

@ -0,0 +1,20 @@
#ifndef _RE2C_IR_COMPILE_
#define _RE2C_IR_COMPILE_
#include "src/util/c99_stdint.h"
#include <string>
#include "src/util/smart_ptr.h"
namespace re2c
{
class DFA;
struct Output;
struct Spec;
smart_ptr<DFA> compile (Spec & spec, Output & output, const std::string & cond, uint32_t cunits);
} // namespace re2c
#endif // _RE2C_IR_COMPILE_

View file

@ -0,0 +1,197 @@
#include <algorithm>
#include <limits>
#include <map>
#include <set>
#include <vector>
#include "src/ir/dfa/dfa.h"
#include "src/ir/nfa/nfa.h"
#include "src/ir/regexp/regexp.h"
#include "src/ir/regexp/regexp_rule.h"
#include "src/ir/rule_rank.h"
#include "src/parse/rules.h"
#include "src/util/ord_hash_set.h"
#include "src/util/range.h"
namespace re2c
{
const size_t dfa_t::NIL = std::numeric_limits<size_t>::max();
/*
* note [marking DFA states]
*
* DFA state is a set of NFA states.
* However, DFA state includes not all NFA states that are in
* epsilon-closure (NFA states that have only epsilon-transitions
* and are not context of final states are omitted).
* The included states are called 'kernel' states.
*
* We mark visited NFA states during closure construction.
* These marks serve two purposes:
* - avoid loops in NFA
* - avoid duplication of NFA states in kernel
*
* Note that after closure construction:
* - all non-kernel states must be unmarked (these states are
* not stored in kernel and it is impossible to unmark them
* afterwards)
* - all kernel states must be marked (because we may later
* extend this kernel with epsilon-closure of another NFA
* state). Kernel states are unmarked later (before finding
* or adding DFA state).
*/
static nfa_state_t **closure(nfa_state_t **cP, nfa_state_t *n)
{
if (!n->mark)
{
n->mark = true;
switch (n->type)
{
case nfa_state_t::ALT:
cP = closure(cP, n->value.alt.out2);
cP = closure(cP, n->value.alt.out1);
n->mark = false;
break;
case nfa_state_t::CTX:
*(cP++) = n;
cP = closure(cP, n->value.ctx.out);
break;
default:
*(cP++) = n;
break;
}
}
return cP;
}
static size_t find_state
( nfa_state_t **kernel
, nfa_state_t **end
, ord_hash_set_t &kernels
)
{
// zero-sized kernel corresponds to default state
if (kernel == end)
{
return dfa_t::NIL;
}
// see note [marking DFA states]
for (nfa_state_t **p = kernel; p != end; ++p)
{
(*p)->mark = false;
}
// sort kernel states: we need this to get stable hash
// and to compare states with simple 'memcmp'
std::sort(kernel, end);
const size_t size = static_cast<size_t>(end - kernel) * sizeof(nfa_state_t*);
return kernels.insert(kernel, size);
}
dfa_t::dfa_t(const nfa_t &nfa, const charset_t &charset, rules_t &rules)
: states()
, nchars(charset.size() - 1) // (n + 1) bounds for n ranges
{
std::map<size_t, std::set<RuleOp*> > s2rules;
ord_hash_set_t kernels;
nfa_state_t **const buffer = new nfa_state_t*[nfa.size];
std::vector<std::vector<nfa_state_t*> > arcs(nchars);
find_state(buffer, closure(buffer, nfa.root), kernels);
for (size_t i = 0; i < kernels.size(); ++i)
{
dfa_state_t *s = new dfa_state_t;
states.push_back(s);
nfa_state_t **kernel;
const size_t kernel_size = kernels.deref<nfa_state_t*>(i, kernel);
for (size_t j = 0; j < kernel_size; ++j)
{
nfa_state_t *n = kernel[j];
switch (n->type)
{
case nfa_state_t::RAN:
{
nfa_state_t *m = n->value.ran.out;
size_t c = 0;
for (Range *r = n->value.ran.ran; r; r = r->next ())
{
for (; charset[c] != r->lower(); ++c);
for (; charset[c] != r->upper(); ++c)
{
arcs[c].push_back(m);
}
}
break;
}
case nfa_state_t::CTX:
s->ctx = true;
break;
case nfa_state_t::FIN:
s2rules[i].insert(n->value.fin.rule);
break;
default:
break;
}
}
s->arcs = new size_t[nchars];
for(size_t c = 0; c < nchars; ++c)
{
nfa_state_t **end = buffer;
for (std::vector<nfa_state_t*>::const_iterator j = arcs[c].begin(); j != arcs[c].end(); ++j)
{
end = closure(end, *j);
}
s->arcs[c] = find_state(buffer, end, kernels);
}
for(size_t c = 0; c < nchars; ++c)
{
arcs[c].clear();
}
}
delete[] buffer;
const size_t count = states.size();
for (size_t i = 0; i < count; ++i)
{
dfa_state_t *s = states[i];
std::set<RuleOp*> &rs = s2rules[i];
// for each final state: choose the rule with the smallest rank
for (std::set<RuleOp*>::const_iterator j = rs.begin(); j != rs.end(); ++j)
{
RuleOp *rule = *j;
if (!s->rule || rule->rank < s->rule->rank)
{
s->rule = rule;
}
}
// other rules are shadowed by the chosen rule
for (std::set<RuleOp*>::const_iterator j = rs.begin(); j != rs.end(); ++j)
{
RuleOp *rule = *j;
if (s->rule != rule)
{
rules[rule->rank].shadow.insert(s->rule->rank);
}
}
}
}
dfa_t::~dfa_t()
{
std::vector<dfa_state_t*>::iterator
i = states.begin(),
e = states.end();
for (; i != e; ++i)
{
delete *i;
}
}
} // namespace re2c

View file

@ -0,0 +1,58 @@
#ifndef _RE2C_IR_DFA_DFA_
#define _RE2C_IR_DFA_DFA_
#include "src/util/c99_stdint.h"
#include <vector>
#include "src/ir/regexp/regexp.h"
#include "src/parse/rules.h"
#include "src/util/forbid_copy.h"
namespace re2c
{
struct nfa_t;
class RuleOp;
struct dfa_state_t
{
size_t *arcs;
RuleOp *rule;
bool ctx;
dfa_state_t()
: arcs(NULL)
, rule(NULL)
, ctx(false)
{}
~dfa_state_t()
{
delete[] arcs;
}
FORBID_COPY(dfa_state_t);
};
struct dfa_t
{
static const size_t NIL;
std::vector<dfa_state_t*> states;
const size_t nchars;
dfa_t(const nfa_t &nfa, const charset_t &charset, rules_t &rules);
~dfa_t();
};
enum dfa_minimization_t
{
DFA_MINIMIZATION_TABLE,
DFA_MINIMIZATION_MOORE
};
void minimization(dfa_t &dfa);
void fillpoints(const dfa_t &dfa, std::vector<size_t> &fill);
} // namespace re2c
#endif // _RE2C_IR_DFA_DFA_

View file

@ -0,0 +1,154 @@
#include <limits>
#include <stack>
#include <vector>
#include "src/ir/dfa/dfa.h"
namespace re2c
{
static const size_t INFINITY = std::numeric_limits<size_t>::max();
static const size_t UNDEFINED = INFINITY - 1;
static bool loopback(size_t node, size_t narcs, const size_t *arcs)
{
for (size_t i = 0; i < narcs; ++i)
{
if (arcs[i] == node)
{
return true;
}
}
return false;
}
/*
* node [finding strongly connected components of DFA]
*
* A slight modification of Tarjan's algorithm.
*
* The algorithm walks graph in deep-first order. It maintains a stack
* of nodes that have already been visited but haven't been assigned to
* SCC yet. For each node the algorithm calculates 'lowlink': index of
* the highest ancestor node reachable in one step from a descendant of
* the node. Lowlink is used to determine when a set of nodes should be
* popped off the stack into a new SCC.
*
* We use lowlink to hold different kinds of information:
* - values in range [0 .. stack size] mean that this node is on stack
* (link to a node with the smallest index reachable from this one)
* - UNDEFINED means that this node has not been visited yet
* - INFINITY means that this node has already been popped off stack
*
* We use stack size (rather than topological sort index) as unique index
* of a node on stack. This is safe because indices of nodes on stack are
* still unique and less than indices of nodes that have been popped off
* stack (INFINITY).
*
*/
static void scc(
const dfa_t &dfa,
std::stack<size_t> &stack,
std::vector<size_t> &lowlink,
std::vector<bool> &trivial,
size_t i)
{
const size_t link = stack.size();
lowlink[i] = link;
stack.push(i);
const size_t *arcs = dfa.states[i]->arcs;
for (size_t c = 0; c < dfa.nchars; ++c)
{
const size_t j = arcs[c];
if (j != dfa_t::NIL)
{
if (lowlink[j] == UNDEFINED)
{
scc(dfa, stack, lowlink, trivial, j);
}
if (lowlink[j] < lowlink[i])
{
lowlink[i] = lowlink[j];
}
}
}
if (lowlink[i] == link)
{
// SCC is non-trivial (has loops) iff it either:
// - consists of multiple nodes (they all must be interconnected)
// - consists of single node which loops back to itself
trivial[i] = i == stack.top()
&& !loopback(i, dfa.nchars, arcs);
size_t j;
do
{
j = stack.top();
stack.pop();
lowlink[j] = INFINITY;
}
while (j != i);
}
}
static void calc_fill(
const dfa_t &dfa,
const std::vector<bool> &trivial,
std::vector<size_t> &fill,
size_t i)
{
if (fill[i] == UNDEFINED)
{
fill[i] = 0;
const size_t *arcs = dfa.states[i]->arcs;
for (size_t c = 0; c < dfa.nchars; ++c)
{
const size_t j = arcs[c];
if (j != dfa_t::NIL)
{
calc_fill(dfa, trivial, fill, j);
size_t max = 1;
if (trivial[j])
{
max += fill[j];
}
if (max > fill[i])
{
fill[i] = max;
}
}
}
}
}
void fillpoints(const dfa_t &dfa, std::vector<size_t> &fill)
{
const size_t size = dfa.states.size();
// find DFA states that belong to non-trivial SCC
std::stack<size_t> stack;
std::vector<size_t> lowlink(size, UNDEFINED);
std::vector<bool> trivial(size, false);
scc(dfa, stack, lowlink, trivial, 0);
// for each DFA state, calculate YYFILL argument:
// maximal path length to the next YYFILL state
fill.resize(size, UNDEFINED);
calc_fill(dfa, trivial, fill, 0);
// The following states must trigger YYFILL:
// - inital state
// - all states in non-trivial SCCs
// for other states, reset YYFILL argument to zero
for (size_t i = 1; i < size; ++i)
{
if (trivial[i])
{
fill[i] = 0;
}
}
}
} // namespace re2c

View file

@ -0,0 +1,252 @@
#include <string.h>
#include <utility>
#include <vector>
#include "src/conf/opt.h"
#include "src/ir/dfa/dfa.h"
#include "src/globals.h"
namespace re2c
{
class RuleOp;
/*
* note [DFA minimization: table filling algorithm]
*
* This algorithm is simple and slow; it's a reference implementation.
*
* The algorithm constructs (strictly lower triangular) boolean matrix
* indexed by DFA states. Each matrix cell (S1,S2) indicates if states
* S1 and S2 are distinguishable. Initialy states are distinguished
* according to their rule and context. One step of the algorithm
* updates the matrix as follows: each pair of states S1 and S2 is
* marked as distinguishable iff exist transitions from S1 and S2 on
* the same symbol that go to distinguishable states. The algorithm
* loops until the matrix stops changing.
*/
static void minimization_table(
size_t *part,
const std::vector<dfa_state_t*> &states,
size_t nchars)
{
const size_t count = states.size();
bool **tbl = new bool*[count];
tbl[0] = new bool[count * (count - 1) / 2];
for (size_t i = 0; i < count - 1; ++i)
{
tbl[i + 1] = tbl[i] + i;
}
for (size_t i = 0; i < count; ++i)
{
dfa_state_t *s1 = states[i];
for (size_t j = 0; j < i; ++j)
{
dfa_state_t *s2 = states[j];
tbl[i][j] = s1->ctx != s2->ctx
|| s1->rule != s2->rule;
}
}
for (bool loop = true; loop;)
{
loop = false;
for (size_t i = 0; i < count; ++i)
{
for (size_t j = 0; j < i; ++j)
{
if (!tbl[i][j])
{
for (size_t k = 0; k < nchars; ++k)
{
size_t oi = states[i]->arcs[k];
size_t oj = states[j]->arcs[k];
if (oi < oj)
{
std::swap(oi, oj);
}
if (oi != oj &&
(oi == dfa_t::NIL ||
oj == dfa_t::NIL ||
tbl[oi][oj]))
{
tbl[i][j] = true;
loop = true;
break;
}
}
}
}
}
}
for (size_t i = 0; i < count; ++i)
{
part[i] = i;
for (size_t j = 0; j < i; ++j)
{
if (!tbl[i][j])
{
part[i] = j;
break;
}
}
}
delete[] tbl[0];
delete[] tbl;
}
/*
* note [DFA minimization: Moore algorithm]
*
* The algorithm maintains partition of DFA states.
* Initial partition is coarse: states are distinguished according
* to their rule and context. Partition is gradually refined: each
* set of states is split into minimal number of subsets such that
* for all states in a subset transitions on the same symbol go to
* the same set of states.
* The algorithm loops until partition stops changing.
*/
static void minimization_moore(
size_t *part,
const std::vector<dfa_state_t*> &states,
size_t nchars)
{
const size_t count = states.size();
size_t *next = new size_t[count];
std::map<std::pair<RuleOp*, bool>, size_t> init;
for (size_t i = 0; i < count; ++i)
{
dfa_state_t *s = states[i];
std::pair<RuleOp*, bool> key(s->rule, s->ctx);
if (init.insert(std::make_pair(key, i)).second)
{
part[i] = i;
next[i] = dfa_t::NIL;
}
else
{
const size_t j = init[key];
part[i] = j;
next[i] = next[j];
next[j] = i;
}
}
size_t *out = new size_t[nchars * count];
size_t *diff = new size_t[count];
for (bool loop = true; loop;)
{
loop = false;
for (size_t i = 0; i < count; ++i)
{
if (i != part[i] || next[i] == dfa_t::NIL)
{
continue;
}
for (size_t j = i; j != dfa_t::NIL; j = next[j])
{
size_t *o = &out[j * nchars];
size_t *a = states[j]->arcs;
for (size_t c = 0; c < nchars; ++c)
{
o[c] = a[c] == dfa_t::NIL
? dfa_t::NIL
: part[a[c]];
}
}
size_t diff_count = 0;
for (size_t j = i; j != dfa_t::NIL;)
{
const size_t j_next = next[j];
size_t n = 0;
for (; n < diff_count; ++n)
{
size_t k = diff[n];
if (memcmp(&out[j * nchars],
&out[k * nchars],
nchars * sizeof(size_t)) == 0)
{
part[j] = k;
next[j] = next[k];
next[k] = j;
break;
}
}
if (n == diff_count)
{
diff[diff_count++] = j;
part[j] = j;
next[j] = dfa_t::NIL;
}
j = j_next;
}
loop |= diff_count > 1;
}
}
delete[] out;
delete[] diff;
delete[] next;
}
void minimization(dfa_t &dfa)
{
const size_t count = dfa.states.size();
size_t *part = new size_t[count];
switch (opts->dfa_minimization)
{
case DFA_MINIMIZATION_TABLE:
minimization_table(part, dfa.states, dfa.nchars);
break;
case DFA_MINIMIZATION_MOORE:
minimization_moore(part, dfa.states, dfa.nchars);
break;
}
size_t *compact = new size_t[count];
for (size_t i = 0, j = 0; i < count; ++i)
{
if (i == part[i])
{
compact[i] = j++;
}
}
size_t new_count = 0;
for (size_t i = 0; i < count; ++i)
{
dfa_state_t *s = dfa.states[i];
if (i == part[i])
{
size_t *arcs = s->arcs;
for (size_t c = 0; c < dfa.nchars; ++c)
{
if (arcs[c] != dfa_t::NIL)
{
arcs[c] = compact[part[arcs[c]]];
}
}
dfa.states[new_count++] = s;
}
else
{
delete s;
}
}
dfa.states.resize(new_count);
delete[] compact;
delete[] part;
}
} // namespace re2c

Some files were not shown because too many files have changed in this diff Show more