mirror of
https://github.com/ZDoom/gzdoom.git
synced 2024-11-10 06:42:08 +00:00
Update re2c to version 0.16
This commit is contained in:
parent
5ee988f607
commit
43c6c9b5ee
191 changed files with 25750 additions and 17231 deletions
|
@ -1,3 +1,202 @@
|
|||
Version 0.16 (2016-01-21)
|
||||
---------------------------
|
||||
- Fixed bug #127 "code generation error with wide chars and bitmaps (omitted 'goto' statement)"
|
||||
- Added DFA minimization and option '--dfa-minimization <table | moore>'
|
||||
- Fixed bug #128 "very slow DFA construction (resulting in a very large DFA)"
|
||||
- Fixed bug #132 "test failure on big endian archs with 0.15.3"
|
||||
|
||||
Version 0.15.3 (2015-12-02)
|
||||
---------------------------
|
||||
- Fixed bugs and applied patches:
|
||||
#122 "clang does not compile re2c 0.15.x" (reported and fixed by Oleksii Taran).
|
||||
#124 "Get rid of UINT32_MAX and friends" (patch by Sergei Trofimovich, fixes FreeBSD builds).
|
||||
#125 "[OS X] git reports changes not staged for commit in newly cloned repository" (by Oleksii Taran, this fix also applies to Windows).
|
||||
- Added option --no-version that allows to omit version information.
|
||||
- Reduced memory and time consumed with -Wundefined-control-flow.
|
||||
- Improved coverage of input data generated with -S --skeleton.
|
||||
|
||||
Version 0.15.2 (2015-11-23)
|
||||
---------------------------
|
||||
- Fixed build system: lexer depends on bison-generated parser
|
||||
(Gentoo bug: https://bugs.gentoo.org/show_bug.cgi?id=566620)
|
||||
|
||||
Version 0.15.1 (2015-11-22)
|
||||
---------------------------
|
||||
- Fixed test failures caused by locale-sensitive 'sort'.
|
||||
|
||||
Version 0.15 (2015-11-22)
|
||||
-------------------------
|
||||
- Updated website http://re2c.org:
|
||||
added examples
|
||||
updated docs
|
||||
added news
|
||||
added web feed (Atom 1.0)
|
||||
- Added options:
|
||||
-S, --skeleton
|
||||
--empty-class <match-empty | match-none | error>
|
||||
- Added warnings:
|
||||
-W
|
||||
-Werror
|
||||
-W<warning>
|
||||
-Wno-<warning>
|
||||
-Werror-<warning>
|
||||
-Wno-error-<warning>
|
||||
- Added individual warnings:
|
||||
-Wundefined-control-flow
|
||||
-Wunreachable-rules
|
||||
-Wcondition-order
|
||||
-Wuseless-escape
|
||||
-Wempty-character-class
|
||||
-Wswapped-range
|
||||
-Wmatch-empty-string
|
||||
- Fixed options:
|
||||
-- (interpret remaining arguments as non-options)
|
||||
- Deprecated options:
|
||||
-1 --single-pass (single pass is by default now)
|
||||
- Reduced size of the generated .dot files.
|
||||
- Fixed bugs:
|
||||
#27 re2c crashes reading files containing %{ %} (patch by Rui)
|
||||
#51 default rule doesn't work in reuse mode
|
||||
#52 eliminate multiple passes
|
||||
#59 bogus yyaccept in -c mode
|
||||
#60 redundant use of YYMARKER
|
||||
#61 empty character class [] matches empty string
|
||||
#115 flex-style named definitions cause ambiguity in re2c grammar
|
||||
#119 -f with -b/-g generates incorrect dispatch on fill labels
|
||||
#116 empty string with non-empty trailing context consumes code units
|
||||
- Added test options:
|
||||
-j, -j <N> (run tests in N threads, defaults to the number of CPUs)
|
||||
--wine (test windows builds using wine)
|
||||
--skeleton (generate skeleton programs, compile and execute them)
|
||||
--keep-tmp-files (don't delete intermediate files for successful tests)
|
||||
- Updated build system:
|
||||
support out of source builds
|
||||
support `make distcheck`
|
||||
added `make bootstrap` (rebuild re2c after building with precomplied .re files)
|
||||
added `make tests` (run tests with -j)
|
||||
added `make vtests` (run tests with --valgrind -j)
|
||||
added `make wtests` (run tests with --wine -j 1)
|
||||
added Autoconf tests for CXXFLAGS. By default try the following options:
|
||||
-W -Wall -Wextra -Weffc++ -pedantic -Wformat=2 -Wredundant-decls
|
||||
-Wsuggest-attribute=format -Wconversion -Wsign-conversion -O2 -Weverything),
|
||||
respect user-defined CXXFLAGS
|
||||
support Mingw builds: `configure -host i686-w64-mingw32`
|
||||
structured source files
|
||||
removed old MSVC files
|
||||
- Moved development to github (https://github.com/skvadrik/re2c), keep a mirror on sourceforge.
|
||||
|
||||
Version 0.14.3 (2015-05-20)
|
||||
-----------------------------
|
||||
- applied patch '#27 re2c crashes reading files containing %{ %}' by Rui
|
||||
- dropped distfiles for MSVC (they are broken anyway)
|
||||
|
||||
Version 0.14.2 (2015-03-25)
|
||||
-----------------------------
|
||||
- fixed #57 Wrong result only if another rule is present
|
||||
|
||||
Version 0.14.1 (2015-02-27)
|
||||
-----------------------------
|
||||
- fixed #55 re2c-0.14: re2c -V outputs null byte
|
||||
|
||||
Version 0.14 (2015-02-23)
|
||||
-----------------------------
|
||||
- Added generic input API 21 (#21 Support to configure how re2c code interfaced with the symbol buffer?)
|
||||
- fixed #46 re2c generates an infinite loop, depends on existence of previous parser
|
||||
- fixed #47 Dot output label escaped characters
|
||||
|
||||
Version 0.13.7.5 (2014-08-22)
|
||||
-----------------------------
|
||||
- Fixed Gentoo bug: https://bugs.gentoo.org/show_bug.cgi?id=518904 (PHP lexer)
|
||||
|
||||
Version 0.13.7.4 (2014-07-29)
|
||||
-----------------------------
|
||||
- Enabled 'make docs' only if configured with '--enable-docs'
|
||||
- Disallowed to use yacc/byacc instead of bison to build parser
|
||||
- Removed non-portable sed feature in script that runs tests
|
||||
|
||||
Version 0.13.7.3 (2014-07-28)
|
||||
-----------------------------
|
||||
- Fixed CXX warning
|
||||
- Got rid of asciidoc build-time dependency
|
||||
|
||||
Version 0.13.7.2 (2014-07-27)
|
||||
-----------------------------
|
||||
- Included man page into dist, respect users CXXFLAGS.
|
||||
|
||||
Version 0.13.7.1 (2014-07-26)
|
||||
-----------------------------
|
||||
- Added missing files to tarball
|
||||
|
||||
Version 0.13.7 (2014-07-25)
|
||||
---------------------------
|
||||
- Added UTF-8 support
|
||||
- Added UTF-16 support
|
||||
- Added default rule
|
||||
- Added option to control ill-formed Unicode
|
||||
|
||||
Version 0.13.6 (2013-07-04)
|
||||
---------------------------
|
||||
- Fixed #2535084 uint problem with Sun C 5.8
|
||||
- #3308400: allow Yacc-style %{code brackets}%
|
||||
- #2506253: allow C++ // comments
|
||||
- Fixed inplace configuration in -e mode.
|
||||
- Applied #2482572 Typos in error messages.
|
||||
- Applied #2482561 Error in manual section on -r mode.
|
||||
- Fixed #2478216 Wrong start_label in -c mode.
|
||||
- Fixed #2186718 Unescaped backslash in file name of #line directive.
|
||||
- Fixed #2102138 Duplicate case labels on EBCDIC.
|
||||
- Fixed #2088583 Compile problem on AIX.
|
||||
- Fixed #2038610 Ebcdic problem.
|
||||
- improve dot support: make char intervals (e.g. [A-Z]) instead of one edge per char
|
||||
|
||||
Version 0.13.5 (2008-05-25)
|
||||
---------------------------
|
||||
- Fixed #1952896 Segfault in re2c::Scanner::scan.
|
||||
- Fixed #1952842 Regression.
|
||||
|
||||
Version 0.13.4 (2008-04-05)
|
||||
---------------------------
|
||||
- Added transparent handling of #line directives in input files.
|
||||
- Added re2c:yyfill:check inplace configuration.
|
||||
- Added re2c:define:YYSETSTATE:naked inplace configuration.
|
||||
- Added re2c:flags:w and re2c:flags:u inplace configurations.
|
||||
- Added the ability to add rules in 'use:re2c' blocks.
|
||||
- Changed -r flag to accept only 'rules:re2c' and 'use:re2c' blocks.
|
||||
|
||||
Version 0.13.3 (2008-03-14)
|
||||
---------------------------
|
||||
- Added -r flag to allow reuse of scanner definitions.
|
||||
- Added -F flag to support flex syntax in rules.
|
||||
- Fixed SEGV in scanner that occurs with very large blocks.
|
||||
- Fixed issue with unused yybm.
|
||||
- Partial support for flex syntax.
|
||||
- Changed to allow /* comments with -c switch.
|
||||
- Added flag -D/--emit-dot.
|
||||
|
||||
Version 0.13.2 (2008-02-14)
|
||||
---------------------------
|
||||
- Added flag --case-inverted.
|
||||
- Added flag --case-insensitive.
|
||||
- Added support for '<!...>' to enable rule setup.
|
||||
- Added support for '=>' style rules.
|
||||
- Added support for ':=' style rules.
|
||||
- Added support for ':=>' style rules.
|
||||
- Added re2c:cond:divider and re2c:con:goto inplace configuration.
|
||||
- Fixed code generation to emit space after 'if'.
|
||||
|
||||
Version 0.13.1 (2007-08-24)
|
||||
---------------------------
|
||||
- Added custom build rules for Visual Studio 2005 (re2c.rules). (William Swanson)
|
||||
- Fixed issue with some compilers.
|
||||
- Fixed #1776177 Build on AIX.
|
||||
- Fixed #1743180 fwrite with 0 length crashes on OS X.
|
||||
|
||||
Version 0.13.0 (2007-06-24)
|
||||
---------------------------
|
||||
- Added -c and -t to generate scanners with (f)lex-like condition support.
|
||||
- Fixed issue with short form of switches and parameter if not first switch.
|
||||
- Fixed #1708378 segfault in actions.cc.
|
||||
|
||||
Version 0.12.3 (2007-08-24)
|
||||
---------------------------
|
||||
- Fixed issue with some compilers.
|
||||
|
|
|
@ -7,32 +7,92 @@ include( CheckTypeSize )
|
|||
|
||||
set( PACKAGE_NAME re2c )
|
||||
set( PACKAGE_TARNAME re2c )
|
||||
set( PACKAGE_VERSION 0.12.3 )
|
||||
set( PACKAGE_STRING "re2c 0.12.3" )
|
||||
set( PACKAGE_VERSION 0.16 )
|
||||
set( PACKAGE_STRING "re2c 0.16" )
|
||||
set( PACKAGE_BUGREPORT "re2c-general@lists.sourceforge.net" )
|
||||
|
||||
CHECK_FUNCTION_EXISTS( strdup HAVE_STRDUP )
|
||||
CHECK_FUNCTION_EXISTS( strndup HAVE_STRNDUP )
|
||||
|
||||
CHECK_TYPE_SIZE( "0i8" SIZEOF_0I8 )
|
||||
CHECK_TYPE_SIZE( "0l" SIZEOF_0L )
|
||||
CHECK_TYPE_SIZE( "0ll" SIZEOF_0LL )
|
||||
CHECK_TYPE_SIZE( char SIZEOF_CHAR )
|
||||
CHECK_TYPE_SIZE( short SIZEOF_SHORT )
|
||||
CHECK_TYPE_SIZE( int SIZEOF_INT )
|
||||
CHECK_TYPE_SIZE( long SIZEOF_LONG )
|
||||
CHECK_TYPE_SIZE( "long long" SIZEOF_LONG_LONG )
|
||||
CHECK_TYPE_SIZE( "void *" SIZEOF_VOID_P )
|
||||
CHECK_TYPE_SIZE( __int64 SIZEOF___INT_64 )
|
||||
|
||||
configure_file( ${CMAKE_CURRENT_SOURCE_DIR}/config.h.in ${CMAKE_CURRENT_BINARY_DIR}/config.h )
|
||||
include_directories( ${CMAKE_CURRENT_BINARY_DIR} )
|
||||
include_directories( ${CMAKE_CURRENT_BINARY_DIR} ${CMAKE_CURRENT_SOURCE_DIR} )
|
||||
add_definitions( -DHAVE_CONFIG_H )
|
||||
|
||||
file( GLOB SRC_HDR
|
||||
src/codegen/*.h
|
||||
src/conf/*.h
|
||||
src/ir/*.h
|
||||
src/*.h
|
||||
src/parse/*.h
|
||||
src/util/*.h )
|
||||
|
||||
add_executable( re2c
|
||||
actions.cc
|
||||
code.cc
|
||||
dfa.cc
|
||||
main.cc
|
||||
mbo_getopt.cc
|
||||
parser.cc
|
||||
scanner.cc
|
||||
substr.cc
|
||||
translate.cc )
|
||||
${SRC_HDR}
|
||||
src/codegen/bitmap.cc
|
||||
src/codegen/emit_action.cc
|
||||
src/codegen/emit_dfa.cc
|
||||
src/codegen/label.cc
|
||||
src/codegen/go_construct.cc
|
||||
src/codegen/go_destruct.cc
|
||||
src/codegen/go_emit.cc
|
||||
src/codegen/go_used_labels.cc
|
||||
src/codegen/input_api.cc
|
||||
src/codegen/output.cc
|
||||
src/codegen/print.cc
|
||||
src/conf/msg.cc
|
||||
src/conf/opt.cc
|
||||
src/conf/parse_opts.cc
|
||||
src/conf/warn.cc
|
||||
src/ir/nfa/calc_size.cc
|
||||
src/ir/nfa/nfa.cc
|
||||
src/ir/nfa/split.cc
|
||||
src/ir/adfa/adfa.cc
|
||||
src/ir/adfa/prepare.cc
|
||||
src/ir/dfa/determinization.cc
|
||||
src/ir/dfa/fillpoints.cc
|
||||
src/ir/dfa/minimization.cc
|
||||
src/ir/regexp/display.cc
|
||||
src/ir/regexp/encoding/enc.cc
|
||||
src/ir/regexp/encoding/range_suffix.cc
|
||||
src/ir/regexp/encoding/utf8/utf8_regexp.cc
|
||||
src/ir/regexp/encoding/utf8/utf8_range.cc
|
||||
src/ir/regexp/encoding/utf8/utf8.cc
|
||||
src/ir/regexp/encoding/utf16/utf16_regexp.cc
|
||||
src/ir/regexp/encoding/utf16/utf16.cc
|
||||
src/ir/regexp/encoding/utf16/utf16_range.cc
|
||||
src/ir/regexp/fixed_length.cc
|
||||
src/ir/regexp/regexp.cc
|
||||
src/ir/compile.cc
|
||||
src/ir/rule_rank.cc
|
||||
src/ir/skeleton/control_flow.cc
|
||||
src/ir/skeleton/generate_code.cc
|
||||
src/ir/skeleton/generate_data.cc
|
||||
src/ir/skeleton/match_empty.cc
|
||||
src/ir/skeleton/maxlen.cc
|
||||
src/ir/skeleton/skeleton.cc
|
||||
src/ir/skeleton/unreachable.cc
|
||||
src/ir/skeleton/way.cc
|
||||
src/main.cc
|
||||
src/parse/code.cc
|
||||
src/parse/input.cc
|
||||
src/parse/lex.cc
|
||||
src/parse/lex_conf.cc
|
||||
src/parse/parser.cc
|
||||
src/parse/scanner.cc
|
||||
src/parse/unescape.cc
|
||||
src/util/s_to_n32_unsafe.cc
|
||||
src/util/range.cc )
|
||||
|
||||
set( CROSS_EXPORTS ${CROSS_EXPORTS} re2c PARENT_SCOPE )
|
||||
|
||||
|
|
2
tools/re2c/NO_WARRANTY
Normal file
2
tools/re2c/NO_WARRANTY
Normal file
|
@ -0,0 +1,2 @@
|
|||
re2c is distributed with no warranty whatever. The author and any other
|
||||
contributors take no responsibility for the consequences of its use.
|
|
@ -1,188 +1,159 @@
|
|||
re2c Version 0.12.3
|
||||
------------------
|
||||
re2c
|
||||
--------------------------------------------------------------------------------
|
||||
|
||||
Originally written by Peter Bumbulis (peter@csg.uwaterloo.ca)
|
||||
|
||||
Currently maintained by:
|
||||
Dan Nuffer <nuffer at users.sourceforge.net>
|
||||
Marcus Boerger <helly at users.sourceforge.net>
|
||||
Hartmut Kaiser <hkaiser at users.sourceforge.net>
|
||||
DESCRIPTION
|
||||
--------------------------------------------------------------------------------
|
||||
re2c is a tool for generating C-based recognizers from regular expressions.
|
||||
re2c-based scanners are efficient: for programming languages, given similar
|
||||
specifications, a re2c-based scanner is typically almost twice as fast as a
|
||||
flex-based scanner with little or no increase in size (possibly a decrease
|
||||
on cisc architectures). Indeed, re2c-based scanners are quite competitive with
|
||||
hand-crafted ones.
|
||||
|
||||
Unlike flex, re2c does not generate complete scanners: the user must supply some
|
||||
interface code. While this code is not bulky (about 50-100 lines for a
|
||||
flex-like scanner; see the man page and examples in the distribution) careful
|
||||
coding is required for efficiency (and correctness). One advantage of this
|
||||
arrangement is that the generated code is not tied to any particular input
|
||||
model.
|
||||
--------------------------------------------------------------------------------
|
||||
|
||||
|
||||
DOWNLOAD
|
||||
--------------------------------------------------------------------------------
|
||||
The re2c distribution can be found at:
|
||||
https://sourceforge.net/projects/re2c/
|
||||
|
||||
http://sourceforge.net/projects/re2c/
|
||||
Download the latest tarball:
|
||||
https://sourceforge.net/projects/re2c/files/latest/download
|
||||
|
||||
re2c has been developed and tested with the following compilers on various
|
||||
platforms in 32 bit and 64 bit mode:
|
||||
- GCC 3.3 ... 4.1
|
||||
- Microsoft VC 7, 7.1, 8
|
||||
- Intel 9.0
|
||||
- Sun C++ 5.8 (CXXFLAGS='-library=stlport4')
|
||||
- MIPSpro Compilers: Version 7.4.4m
|
||||
Clone git repo:
|
||||
git clone git://git.code.sf.net/p/re2c/code-git
|
||||
--------------------------------------------------------------------------------
|
||||
|
||||
GCC 2.x and Microsoft VC 6 are not capable of compiling re2c.
|
||||
|
||||
Building re2c on unix like platforms requires autoconf 2.57 and bison (tested
|
||||
with 1.875 and later). Under windows you don't need autoconf or bison
|
||||
and can use the pregenerated files.
|
||||
BUILD
|
||||
--------------------------------------------------------------------------------
|
||||
Contents:
|
||||
1. simple build
|
||||
2. bootstrap
|
||||
3. out-of-source build
|
||||
4. testing
|
||||
5. rebuild documentation
|
||||
6. build for windows with mingw
|
||||
7. build from git
|
||||
|
||||
You can build this software by simply typing the following commands:
|
||||
./configure
|
||||
make
|
||||
1. Simplest possible build:
|
||||
$ ./configure [--prefix=<prefix>]
|
||||
$ make
|
||||
$ make install
|
||||
This will build re2c and install it (binary and man page) to <prefix> (defaults
|
||||
to /usr/local).
|
||||
|
||||
The above version will be based on the pregenerated scanner.cc file.
|
||||
If you want to build that file yourself (recommended when installing
|
||||
re2c) you need the following steps:
|
||||
./configure
|
||||
make
|
||||
rm -f scanner.cc
|
||||
make install
|
||||
2. Bootstrap and rebuild:
|
||||
$ ./configure [--prefix=<prefix>]
|
||||
$ make bootstrap
|
||||
$ make install
|
||||
Usual bootstrap procedure: re2c uses re2c to compile its lexer.
|
||||
1. build lexer (if make finds re2c binary in build directory, it will build lexer
|
||||
from source, otherwize it will use prebuilt lexer)
|
||||
2. build re2c
|
||||
3. build lexer from source using re2c binary in build directory
|
||||
4. rebuild re2c
|
||||
|
||||
Or you can create a rpm package and install it by the following commands:
|
||||
./configure
|
||||
make rpm
|
||||
rpm -Uhv <packagedir>/re2c-0.12.3-1.rpm
|
||||
3. Out-of-source build:
|
||||
$ mkdir <build-directory>
|
||||
$ cd <build-directory>
|
||||
$ <path-to-configure>/configure [--prefix=<prefix>]
|
||||
$ make
|
||||
$ make install
|
||||
|
||||
If you want to build from CVS then the first thing you should do is
|
||||
regenerating all build files using the following command:
|
||||
./autogen.sh
|
||||
and then continue with one of the above described build methods. Or if you
|
||||
need to generate RPM packages for cvs builds use these commands:
|
||||
./autogen.sh
|
||||
./configure
|
||||
./makerpm <release>
|
||||
rpm -Uhv <packagedir>/re2c-0.12.3-<release>.rpm
|
||||
4. Testing:
|
||||
$ make check
|
||||
This will redirect test script output to file. If you want to see progress:
|
||||
$ make tests
|
||||
Testing under valgrind (takes a long time):
|
||||
$ make vtests
|
||||
|
||||
Here <realease> should be a number like 1. And <packagedir> must equal
|
||||
the directory where the makerpm step has written the generated rpm to.
|
||||
5. Rebuild documentation (requires rst2man.py):
|
||||
$ ./configure --enable-docs [--prefix=<prefix>]
|
||||
$ make docs
|
||||
$ make install
|
||||
|
||||
If you are on a debian system you can use the tool 'alien' to convert rpms
|
||||
to debian packages.
|
||||
6. Build for windows using mingw:
|
||||
$ ../configure --host i686-w64-mingw32 [--prefix=<prefix>]
|
||||
$ make
|
||||
This will result into an executable re2c.exe, which can be tested with wine:
|
||||
$ make wtests
|
||||
|
||||
When building with native SUN compilers you need to set the following compiler
|
||||
flags: CXXFLAGS='-g -compat5 -library=stlport4'.
|
||||
7. If you want to build from git, you'll first need to generate autotools files:
|
||||
$ ./autogen.sh
|
||||
--------------------------------------------------------------------------------
|
||||
|
||||
If you want to build re2c on a windows system you can either use cygwin and one
|
||||
of the methods described above or use Microsoft Visual C .NET 2002 or later
|
||||
with the solution files provided (re2c.sln for 2002/2003 and re2c-2005.sln for
|
||||
version 2005). re2c cannot be built with Microsoft Visual C 6.0 or earlier.
|
||||
|
||||
re2c is a great tool for writing fast and flexible lexers. It has
|
||||
served many people well for many years. re2c is on the order of 2-3
|
||||
times faster than a flex based scanner, and its input model is much
|
||||
more flexible.
|
||||
INFO
|
||||
--------------------------------------------------------------------------------
|
||||
$ man re2c
|
||||
|
||||
For an introduction to re2c refer to the lessons sub directory.
|
||||
re2c home page:
|
||||
re2c.org
|
||||
|
||||
Peter's original version 0.5 ANNOUNCE and README follows.
|
||||
re2c manual:
|
||||
re2c.org/manual.html
|
||||
|
||||
--
|
||||
Ulya Trofimovich's blog on re2c:
|
||||
skvadrik.github.io/aleph_null/re2c.html
|
||||
|
||||
re2c is a tool for generating C-based recognizers from regular
|
||||
expressions. re2c-based scanners are efficient: for programming
|
||||
languages, given similar specifications, an re2c-based scanner is
|
||||
typically almost twice as fast as a flex-based scanner with little or no
|
||||
increase in size (possibly a decrease on cisc architectures). Indeed,
|
||||
re2c-based scanners are quite competitive with hand-crafted ones.
|
||||
Original paper on re2c: "RE2C: a More Versatile Parser Generator" (1994, Peter
|
||||
Bumbulis and Donald D. Cowan).
|
||||
|
||||
Unlike flex, re2c does not generate complete scanners: the user must
|
||||
supply some interface code. While this code is not bulky (about 50-100
|
||||
lines for a flex-like scanner; see the man page and examples in the
|
||||
distribution) careful coding is required for efficiency (and
|
||||
correctness). One advantage of this arrangement is that the generated
|
||||
code is not tied to any particular input model. For example, re2c
|
||||
generated code can be used to scan data from a null-byte terminated
|
||||
buffer as illustrated below.
|
||||
Examples can be found in 'examples' directory.
|
||||
--------------------------------------------------------------------------------
|
||||
|
||||
Given the following source
|
||||
|
||||
#define NULL ((char*) 0)
|
||||
char *scan(char *p)
|
||||
{
|
||||
#define YYCTYPE char
|
||||
#define YYCURSOR p
|
||||
#define YYLIMIT p
|
||||
#define YYFILL(n)
|
||||
/*!re2c
|
||||
[0-9]+ {return YYCURSOR;}
|
||||
[\000-\377] {return NULL;}
|
||||
*/
|
||||
}
|
||||
MAILING LISTS
|
||||
--------------------------------------------------------------------------------
|
||||
re2c-general:
|
||||
re2c-general@lists.sourceforge.net
|
||||
re2c-devel:
|
||||
re2c-devel@lists.sourceforge.net
|
||||
|
||||
re2c will generate
|
||||
You are welcome to ask for help or share your thoughts and ideas about re2c :)
|
||||
--------------------------------------------------------------------------------
|
||||
|
||||
/* Generated by re2c on Sat Apr 16 11:40:58 1994 */
|
||||
#line 1 "simple.re"
|
||||
#define NULL ((char*) 0)
|
||||
char *scan(char *p)
|
||||
{
|
||||
#define YYCTYPE char
|
||||
#define YYCURSOR p
|
||||
#define YYLIMIT p
|
||||
#define YYFILL(n)
|
||||
{
|
||||
YYCTYPE yych;
|
||||
unsigned int yyaccept;
|
||||
|
||||
if((YYLIMIT - YYCURSOR) < 2) YYFILL(2);
|
||||
yych = *YYCURSOR;
|
||||
if(yych <= '/') goto yy4;
|
||||
if(yych >= ':') goto yy4;
|
||||
yy2: yych = *++YYCURSOR;
|
||||
goto yy7;
|
||||
yy3:
|
||||
#line 9
|
||||
{return YYCURSOR;}
|
||||
yy4: yych = *++YYCURSOR;
|
||||
yy5:
|
||||
#line 10
|
||||
{return NULL;}
|
||||
yy6: ++YYCURSOR;
|
||||
if(YYLIMIT == YYCURSOR) YYFILL(1);
|
||||
yych = *YYCURSOR;
|
||||
yy7: if(yych <= '/') goto yy3;
|
||||
if(yych <= '9') goto yy6;
|
||||
goto yy3;
|
||||
}
|
||||
#line 11
|
||||
BUGS
|
||||
--------------------------------------------------------------------------------
|
||||
Please report any bugs and send feature requests to:
|
||||
https://sourceforge.net/p/re2c/_list/tickets
|
||||
--------------------------------------------------------------------------------
|
||||
|
||||
}
|
||||
|
||||
Note that most compilers will perform dead-code elimination to remove
|
||||
all YYCURSOR, YYLIMIT comparisions.
|
||||
AUTHORS
|
||||
--------------------------------------------------------------------------------
|
||||
Originally written by Peter Bumbulis (peter@csg.uwaterloo.ca)
|
||||
Currently maintained by:
|
||||
Ulya Trofimovich <skvadrik@gmail.com>
|
||||
Dan Nuffer <nuffer@users.sourceforge.net>
|
||||
Marcus Boerger <helly@users.sourceforge.net>
|
||||
Hartmut Kaiser <hkaiser@users.sourceforge.net>
|
||||
--------------------------------------------------------------------------------
|
||||
|
||||
re2c was developed for a particular project (constructing a fast REXX
|
||||
scanner of all things!) and so while it has some rough edges, it should
|
||||
be quite usable. More information about re2c can be found in the
|
||||
(admittedly skimpy) man page; the algorithms and heuristics used are
|
||||
described in an upcoming LOPLAS article (included in the distribution).
|
||||
Probably the best way to find out more about re2c is to try the supplied
|
||||
examples. re2c is written in C++, and is currently being developed
|
||||
under Linux using gcc 2.5.8.
|
||||
|
||||
Peter
|
||||
LICENSE
|
||||
--------------------------------------------------------------------------------
|
||||
re2c is distributed with no warranty whatever. The code is certain to contain
|
||||
errors. Neither the author nor any contributor takes responsibility for any
|
||||
consequences of its use.
|
||||
|
||||
--
|
||||
|
||||
re2c is distributed with no warranty whatever. The code is certain to
|
||||
contain errors. Neither the author nor any contributor takes
|
||||
responsibility for any consequences of its use.
|
||||
|
||||
re2c is in the public domain. The data structures and algorithms used
|
||||
in re2c are all either taken from documents available to the general
|
||||
public or are inventions of the author. Programs generated by re2c may
|
||||
be distributed freely. re2c itself may be distributed freely, in source
|
||||
or binary, unchanged or modified. Distributors may charge whatever fees
|
||||
they can obtain for re2c.
|
||||
re2c is in the public domain. The data structures and algorithms used in re2c
|
||||
are all either taken from documents available to the general public or are
|
||||
inventions of the authors. Programs generated by re2c may be distributed freely.
|
||||
re2c itself may be distributed freely, in source or binary, unchanged or
|
||||
modified. Distributors may charge whatever fees they can obtain for re2c.
|
||||
|
||||
If you do make use of re2c, or incorporate it into a larger project an
|
||||
acknowledgement somewhere (documentation, research report, etc.) would
|
||||
be appreciated.
|
||||
|
||||
Please send bug reports and feedback (including suggestions for
|
||||
improving the distribution) to
|
||||
|
||||
peter@csg.uwaterloo.ca
|
||||
|
||||
Include a small example and the banner from parser.y with bug reports.
|
||||
|
||||
acknowledgement somewhere (documentation, research report, etc.) would be
|
||||
appreciated.
|
||||
--------------------------------------------------------------------------------
|
||||
|
|
File diff suppressed because it is too large
Load diff
|
@ -1,57 +0,0 @@
|
|||
/* $Id: basics.h 520 2006-05-25 13:31:06Z helly $ */
|
||||
#ifndef _basics_h
|
||||
#define _basics_h
|
||||
|
||||
#ifdef HAVE_CONFIG_H
|
||||
#include "config.h"
|
||||
#elif defined(_WIN32)
|
||||
#include "config_w32.h"
|
||||
#endif
|
||||
|
||||
namespace re2c
|
||||
{
|
||||
|
||||
#if SIZEOF_CHAR == 1
|
||||
typedef unsigned char byte;
|
||||
#elif SIZEOF_SHORT == 1
|
||||
typedef unsigned short byte;
|
||||
#elif SIZEOF_INT == 1
|
||||
typedef unsigned int byte;
|
||||
#elif SIZEOF_LONG == 1
|
||||
typedef unsigned long byte;
|
||||
#else
|
||||
typedef unsigned char byte;
|
||||
#endif
|
||||
|
||||
#if SIZEOF_CHAR == 2
|
||||
typedef unsigned char word;
|
||||
#elif SIZEOF_SHORT == 2
|
||||
typedef unsigned short word;
|
||||
#elif SIZEOF_INT == 2
|
||||
typedef unsigned int word;
|
||||
#elif SIZEOF_LONG == 2
|
||||
typedef unsigned long word;
|
||||
#else
|
||||
typedef unsigned short word;
|
||||
#endif
|
||||
|
||||
#if SIZEOF_CHAR == 4
|
||||
typedef unsigned char dword;
|
||||
#elif SIZEOF_SHORT == 4
|
||||
typedef unsigned short dword;
|
||||
#elif SIZEOF_INT == 4
|
||||
typedef unsigned int dword;
|
||||
#elif SIZEOF_LONG == 4
|
||||
typedef unsigned long dword;
|
||||
#else
|
||||
typedef unsigned long dword;
|
||||
#endif
|
||||
|
||||
typedef unsigned int uint;
|
||||
typedef unsigned char uchar;
|
||||
typedef unsigned short ushort;
|
||||
typedef unsigned long ulong;
|
||||
|
||||
} // end namespace re2c
|
||||
|
||||
#endif
|
1806
tools/re2c/code.cc
1806
tools/re2c/code.cc
File diff suppressed because it is too large
Load diff
|
@ -1,53 +0,0 @@
|
|||
/* $Id: code.h 525 2006-05-25 13:32:49Z helly $ */
|
||||
#ifndef _code_h
|
||||
#define _code_h
|
||||
|
||||
#include "re.h"
|
||||
#include "dfa.h"
|
||||
|
||||
namespace re2c
|
||||
{
|
||||
|
||||
class BitMap
|
||||
{
|
||||
public:
|
||||
static BitMap *first;
|
||||
|
||||
const Go *go;
|
||||
const State *on;
|
||||
const BitMap *next;
|
||||
uint i;
|
||||
uint m;
|
||||
|
||||
public:
|
||||
static const BitMap *find(const Go*, const State*);
|
||||
static const BitMap *find(const State*);
|
||||
static void gen(std::ostream&, uint ind, uint, uint);
|
||||
static void stats();
|
||||
BitMap(const Go*, const State*);
|
||||
~BitMap();
|
||||
|
||||
#if PEDANTIC
|
||||
BitMap(const BitMap& oth)
|
||||
: go(oth.go)
|
||||
, on(oth.on)
|
||||
, next(oth.next)
|
||||
, i(oth.i)
|
||||
, m(oth.m)
|
||||
{
|
||||
}
|
||||
BitMap& operator = (const BitMap& oth)
|
||||
{
|
||||
new(this) BitMap(oth);
|
||||
return *this;
|
||||
}
|
||||
#endif
|
||||
};
|
||||
|
||||
#ifdef _MSC_VER
|
||||
# pragma warning(disable: 4355) /* 'this' : used in base member initializer list */
|
||||
#endif
|
||||
|
||||
} // end namespace re2c
|
||||
|
||||
#endif
|
|
@ -1,33 +0,0 @@
|
|||
/* $Id: token.h 547 2006-05-25 13:40:35Z helly $ */
|
||||
#ifndef _code_names_h
|
||||
#define _code_names_h
|
||||
|
||||
#include <string>
|
||||
#include <map>
|
||||
|
||||
namespace re2c
|
||||
{
|
||||
|
||||
class CodeNames: public std::map<std::string, std::string>
|
||||
{
|
||||
public:
|
||||
std::string& operator [] (const char * what);
|
||||
};
|
||||
|
||||
inline std::string& CodeNames::operator [] (const char * what)
|
||||
{
|
||||
CodeNames::iterator it = find(std::string(what));
|
||||
|
||||
if (it != end())
|
||||
{
|
||||
return it->second;
|
||||
}
|
||||
else
|
||||
{
|
||||
return insert(std::make_pair(std::string(what), std::string(what))).first->second;
|
||||
}
|
||||
}
|
||||
|
||||
} // end namespace re2c
|
||||
|
||||
#endif
|
|
@ -1,10 +1,7 @@
|
|||
/* config.h.in. Generated from configure.in by autoheader. */
|
||||
/* config.h.in. Generated from configure.ac by autoheader. */
|
||||
|
||||
/* Define to 1 if you have the `strdup' function. */
|
||||
#cmakedefine HAVE_STRDUP
|
||||
|
||||
/* Define to 1 if you have the `strndup' function. */
|
||||
#cmakedefine HAVE_STRNDUP
|
||||
/* Define to 1 if you have the <stdint.h> header file. */
|
||||
#cmakedefine HAVE_STDINT_H @HAVE_STDINT_H@
|
||||
|
||||
/* Name of package */
|
||||
#cmakedefine PACKAGE "@PACKAGE_NAME@"
|
||||
|
@ -21,21 +18,41 @@
|
|||
/* Define to the one symbol short name of this package. */
|
||||
#cmakedefine PACKAGE_TARNAME "@PACKAGE_TARNAME@"
|
||||
|
||||
/* Define to the home page for this package. */
|
||||
#cmakedefine PACKAGE_URL "@PACKAGE_URL@"
|
||||
|
||||
/* Define to the version of this package. */
|
||||
#cmakedefine PACKAGE_VERSION "@PACKAGE_VERSION@"
|
||||
|
||||
/* The size of `0i8', as computed by sizeof. */
|
||||
#cmakedefine SIZEOF_0I8 @SIZEOF_0I8@
|
||||
|
||||
/* The size of `0l', as computed by sizeof. */
|
||||
#cmakedefine SIZEOF_0L @SIZEOF_0L@
|
||||
|
||||
/* The size of `0ll', as computed by sizeof. */
|
||||
#cmakedefine SIZEOF_0LL @SIZEOF_0LL@
|
||||
|
||||
/* The size of `char', as computed by sizeof. */
|
||||
#define SIZEOF_CHAR @SIZEOF_CHAR@
|
||||
#cmakedefine SIZEOF_CHAR @SIZEOF_CHAR@
|
||||
|
||||
/* The size of `int', as computed by sizeof. */
|
||||
#define SIZEOF_INT @SIZEOF_INT@
|
||||
#cmakedefine SIZEOF_INT @SIZEOF_INT@
|
||||
|
||||
/* The size of `long', as computed by sizeof. */
|
||||
#define SIZEOF_LONG @SIZEOF_LONG@
|
||||
#cmakedefine SIZEOF_LONG @SIZEOF_LONG@
|
||||
|
||||
/* The size of `long long', as computed by sizeof. */
|
||||
#cmakedefine SIZEOF_LONG_LONG @SIZEOF_LONG_LONG@
|
||||
|
||||
/* The size of `short', as computed by sizeof. */
|
||||
#define SIZEOF_SHORT @SIZEOF_SHORT@
|
||||
#cmakedefine SIZEOF_SHORT @SIZEOF_SHORT@
|
||||
|
||||
/* The size of `void *', as computed by sizeof. */
|
||||
#cmakedefine SIZEOF_VOID_P @SIZEOF_VOID_P@
|
||||
|
||||
/* The size of `__int64', as computed by sizeof. */
|
||||
#cmakedefine SIZEOF___INT64 @SIZEOF___INT64@
|
||||
|
||||
/* Version number of package */
|
||||
#define VERSION "@PACKAGE_VERSION@"
|
||||
|
||||
#cmakedefine VERSION @PACKAGE_VERSION@
|
||||
|
|
|
@ -1,102 +0,0 @@
|
|||
/* config.h. Generated by configure. */
|
||||
/* config.h.in. Generated from configure.in by autoheader. */
|
||||
|
||||
/* Define to 1 if you have the `getpagesize' function. */
|
||||
#define HAVE_GETPAGESIZE 1
|
||||
|
||||
/* Define to 1 if you have the <inttypes.h> header file. */
|
||||
#define HAVE_INTTYPES_H 1
|
||||
|
||||
/* Define to 1 if you have the <memory.h> header file. */
|
||||
#define HAVE_MEMORY_H 1
|
||||
|
||||
/* Define to 1 if you have the `memset' function. */
|
||||
#define HAVE_MEMSET 1
|
||||
|
||||
/* Define to 1 if you have a working `mmap' system call. */
|
||||
/* #undef HAVE_MMAP */
|
||||
|
||||
/* Define to 1 if you have the `munmap' function. */
|
||||
#define HAVE_MUNMAP 1
|
||||
|
||||
/* Define to 1 if stdbool.h conforms to C99. */
|
||||
#define HAVE_STDBOOL_H 1
|
||||
|
||||
/* Define to 1 if you have the <stdint.h> header file. */
|
||||
#define HAVE_STDINT_H 1
|
||||
|
||||
/* Define to 1 if you have the <stdlib.h> header file. */
|
||||
#define HAVE_STDLIB_H 1
|
||||
|
||||
/* Define to 1 if you have the `strdup' function. */
|
||||
#define HAVE_STRDUP 1
|
||||
|
||||
/* Define to 1 if you have the <strings.h> header file. */
|
||||
#define HAVE_STRINGS_H 1
|
||||
|
||||
/* Define to 1 if you have the <string.h> header file. */
|
||||
#define HAVE_STRING_H 1
|
||||
|
||||
/* Define to 1 if you have the <sys/stat.h> header file. */
|
||||
#define HAVE_SYS_STAT_H 1
|
||||
|
||||
/* Define to 1 if you have the <sys/types.h> header file. */
|
||||
#define HAVE_SYS_TYPES_H 1
|
||||
|
||||
/* Define to 1 if you have the <unistd.h> header file. */
|
||||
#define HAVE_UNISTD_H 1
|
||||
|
||||
/* Define to 1 if the system has the type `_Bool'. */
|
||||
#define HAVE__BOOL 1
|
||||
|
||||
/* Name of package */
|
||||
#define PACKAGE "re2c"
|
||||
|
||||
/* Define to the address where bug reports for this package should be sent. */
|
||||
#define PACKAGE_BUGREPORT "re2c-general@lists.sourceforge.net"
|
||||
|
||||
/* Define to the full name of this package. */
|
||||
#define PACKAGE_NAME "re2c"
|
||||
|
||||
/* Define to the full name and version of this package. */
|
||||
#define PACKAGE_STRING "re2c 0.12.3"
|
||||
|
||||
/* Define to the one symbol short name of this package. */
|
||||
#define PACKAGE_TARNAME "re2c"
|
||||
|
||||
/* Define to the version of this package. */
|
||||
#define PACKAGE_VERSION "0.12.3"
|
||||
|
||||
/* The size of a `char', as computed by sizeof. */
|
||||
#define SIZEOF_CHAR 1
|
||||
|
||||
/* The size of a `int', as computed by sizeof. */
|
||||
#define SIZEOF_INT 4
|
||||
|
||||
/* The size of a `long', as computed by sizeof. */
|
||||
#define SIZEOF_LONG 4
|
||||
|
||||
/* The size of a `short', as computed by sizeof. */
|
||||
#define SIZEOF_SHORT 2
|
||||
|
||||
/* Define to 1 if you have the ANSI C header files. */
|
||||
#define STDC_HEADERS 1
|
||||
|
||||
/* Version number of package */
|
||||
#define VERSION "0.12.3"
|
||||
|
||||
/* Define to empty if `const' does not conform to ANSI C. */
|
||||
/* #undef const */
|
||||
|
||||
/* Define to `__inline__' or `__inline' if that's what the C compiler
|
||||
calls it, or to nothing if 'inline' is not supported under any name. */
|
||||
#ifndef __cplusplus
|
||||
/* #undef inline */
|
||||
#endif
|
||||
|
||||
/* Define to `unsigned' if <sys/types.h> does not define. */
|
||||
/* #undef size_t */
|
||||
|
||||
/* Define to empty if the keyword `volatile' does not work. Warning: valid
|
||||
code using `volatile' can become incorrect without. Disable with care. */
|
||||
/* #undef volatile */
|
|
@ -1,416 +0,0 @@
|
|||
#include <stdlib.h>
|
||||
#include <ctype.h>
|
||||
#include <string.h>
|
||||
#include "globals.h"
|
||||
#include "substr.h"
|
||||
#include "dfa.h"
|
||||
|
||||
namespace re2c
|
||||
{
|
||||
|
||||
void prtChOrHex(std::ostream& o, uint c, bool useTalx)
|
||||
{
|
||||
int oc = (int)(re2c::wFlag || !useTalx ? c : re2c::talx[c]);
|
||||
|
||||
if ((oc < 256) && isprint(oc))
|
||||
{
|
||||
o << '\'';
|
||||
prtCh(o, c);
|
||||
o << '\'';
|
||||
}
|
||||
else
|
||||
{
|
||||
prtHex(o, c);
|
||||
}
|
||||
}
|
||||
|
||||
void prtHex(std::ostream& o, uint c, bool useTalx)
|
||||
{
|
||||
int oc = (int)(re2c::wFlag || !useTalx ? c : re2c::talx[c]);
|
||||
|
||||
if (re2c::uFlag)
|
||||
{
|
||||
o << "0x"
|
||||
<< hexCh(oc >> 28)
|
||||
<< hexCh(oc >> 24)
|
||||
<< hexCh(oc >> 20)
|
||||
<< hexCh(oc >> 16)
|
||||
<< hexCh(oc >> 12)
|
||||
<< hexCh(oc >> 8)
|
||||
<< hexCh(oc >> 4)
|
||||
<< hexCh(oc);
|
||||
}
|
||||
else if (re2c::wFlag)
|
||||
{
|
||||
o << "0x"
|
||||
<< hexCh(oc >> 12)
|
||||
<< hexCh(oc >> 8)
|
||||
<< hexCh(oc >> 4)
|
||||
<< hexCh(oc);
|
||||
}
|
||||
else
|
||||
{
|
||||
o << "0x"
|
||||
<< hexCh(oc >> 4)
|
||||
<< hexCh(oc);
|
||||
}
|
||||
}
|
||||
|
||||
void prtCh(std::ostream& o, uint c, bool useTalx)
|
||||
{
|
||||
int oc = (int)(re2c::wFlag || !useTalx ? c : re2c::talx[c]);
|
||||
|
||||
switch (oc)
|
||||
{
|
||||
case '\'':
|
||||
o << "\\'";
|
||||
break;
|
||||
|
||||
case '\n':
|
||||
o << "\\n";
|
||||
break;
|
||||
|
||||
case '\t':
|
||||
o << "\\t";
|
||||
break;
|
||||
|
||||
case '\v':
|
||||
o << "\\v";
|
||||
break;
|
||||
|
||||
case '\b':
|
||||
o << "\\b";
|
||||
break;
|
||||
|
||||
case '\r':
|
||||
o << "\\r";
|
||||
break;
|
||||
|
||||
case '\f':
|
||||
o << "\\f";
|
||||
break;
|
||||
|
||||
case '\a':
|
||||
o << "\\a";
|
||||
break;
|
||||
|
||||
case '\\':
|
||||
o << "\\\\";
|
||||
break;
|
||||
|
||||
default:
|
||||
|
||||
if ((oc < 256) && isprint(oc))
|
||||
{
|
||||
o << (char) oc;
|
||||
}
|
||||
else if (re2c::uFlag)
|
||||
{
|
||||
o << "0x"
|
||||
<< hexCh(oc >> 20)
|
||||
<< hexCh(oc >> 16)
|
||||
<< hexCh(oc >> 12)
|
||||
<< hexCh(oc >> 8)
|
||||
<< hexCh(oc >> 4)
|
||||
<< hexCh(oc);
|
||||
}
|
||||
else if (re2c::wFlag)
|
||||
{
|
||||
o << "0x"
|
||||
<< hexCh(oc >> 12)
|
||||
<< hexCh(oc >> 8)
|
||||
<< hexCh(oc >> 4)
|
||||
<< hexCh(oc);
|
||||
}
|
||||
else
|
||||
{
|
||||
o << '\\' << octCh(oc / 64) << octCh(oc / 8) << octCh(oc);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void printSpan(std::ostream& o, uint lb, uint ub)
|
||||
{
|
||||
if (lb > ub)
|
||||
{
|
||||
o << "*";
|
||||
}
|
||||
|
||||
o << "[";
|
||||
|
||||
if ((ub - lb) == 1)
|
||||
{
|
||||
prtCh(o, lb);
|
||||
}
|
||||
else
|
||||
{
|
||||
prtCh(o, lb);
|
||||
o << "-";
|
||||
prtCh(o, ub - 1);
|
||||
}
|
||||
|
||||
o << "]";
|
||||
}
|
||||
|
||||
uint Span::show(std::ostream &o, uint lb) const
|
||||
{
|
||||
if (to)
|
||||
{
|
||||
printSpan(o, lb, ub);
|
||||
o << " " << to->label << "; ";
|
||||
}
|
||||
|
||||
return ub;
|
||||
}
|
||||
|
||||
std::ostream& operator<<(std::ostream &o, const State &s)
|
||||
{
|
||||
o << "state " << s.label;
|
||||
|
||||
if (s.rule)
|
||||
{
|
||||
o << " accepts " << s.rule->accept;
|
||||
}
|
||||
|
||||
o << "\n";
|
||||
|
||||
uint lb = 0;
|
||||
|
||||
for (uint i = 0; i < s.go.nSpans; ++i)
|
||||
{
|
||||
lb = s.go.span[i].show(o, lb);
|
||||
}
|
||||
|
||||
return o;
|
||||
}
|
||||
|
||||
std::ostream& operator<<(std::ostream &o, const DFA &dfa)
|
||||
{
|
||||
for (State *s = dfa.head; s; s = s->next)
|
||||
{
|
||||
o << s << "\n\n";
|
||||
}
|
||||
|
||||
return o;
|
||||
}
|
||||
|
||||
State::State()
|
||||
: label(0)
|
||||
, rule(NULL)
|
||||
, next(0)
|
||||
, link(NULL)
|
||||
, depth(0)
|
||||
, kCount(0)
|
||||
, kernel(NULL)
|
||||
, isPreCtxt(false)
|
||||
, isBase(false)
|
||||
, go()
|
||||
, action(NULL)
|
||||
{
|
||||
}
|
||||
|
||||
State::~State()
|
||||
{
|
||||
delete action;
|
||||
delete [] kernel;
|
||||
delete [] go.span;
|
||||
}
|
||||
|
||||
static Ins **closure(Ins **cP, Ins *i)
|
||||
{
|
||||
while (!isMarked(i))
|
||||
{
|
||||
mark(i);
|
||||
*(cP++) = i;
|
||||
|
||||
if (i->i.tag == FORK)
|
||||
{
|
||||
cP = closure(cP, i + 1);
|
||||
i = (Ins*) i->i.link;
|
||||
}
|
||||
else if (i->i.tag == GOTO || i->i.tag == CTXT)
|
||||
{
|
||||
i = (Ins*) i->i.link;
|
||||
}
|
||||
else
|
||||
break;
|
||||
}
|
||||
|
||||
return cP;
|
||||
}
|
||||
|
||||
struct GoTo
|
||||
{
|
||||
Char ch;
|
||||
void *to;
|
||||
};
|
||||
|
||||
DFA::DFA(Ins *ins, uint ni, uint lb, uint ub, Char *rep)
|
||||
: lbChar(lb)
|
||||
, ubChar(ub)
|
||||
, nStates(0)
|
||||
, head(NULL)
|
||||
, tail(&head)
|
||||
, toDo(NULL)
|
||||
{
|
||||
Ins **work = new Ins * [ni + 1];
|
||||
uint nc = ub - lb;
|
||||
GoTo *goTo = new GoTo[nc];
|
||||
Span *span = new Span[nc];
|
||||
memset((char*) goTo, 0, nc*sizeof(GoTo));
|
||||
findState(work, closure(work, &ins[0]) - work);
|
||||
|
||||
while (toDo)
|
||||
{
|
||||
State *s = toDo;
|
||||
toDo = s->link;
|
||||
|
||||
Ins **cP, **iP, *i;
|
||||
uint nGoTos = 0;
|
||||
uint j;
|
||||
|
||||
s->rule = NULL;
|
||||
|
||||
for (iP = s->kernel; (i = *iP); ++iP)
|
||||
{
|
||||
if (i->i.tag == CHAR)
|
||||
{
|
||||
for (Ins *j = i + 1; j < (Ins*) i->i.link; ++j)
|
||||
{
|
||||
if (!(j->c.link = goTo[j->c.value - lb].to))
|
||||
goTo[nGoTos++].ch = j->c.value;
|
||||
|
||||
goTo[j->c.value - lb].to = j;
|
||||
}
|
||||
}
|
||||
else if (i->i.tag == TERM)
|
||||
{
|
||||
if (!s->rule || ((RuleOp*) i->i.link)->accept < s->rule->accept)
|
||||
s->rule = (RuleOp*) i->i.link;
|
||||
}
|
||||
else if (i->i.tag == CTXT)
|
||||
{
|
||||
s->isPreCtxt = true;
|
||||
}
|
||||
}
|
||||
|
||||
for (j = 0; j < nGoTos; ++j)
|
||||
{
|
||||
GoTo *go = &goTo[goTo[j].ch - lb];
|
||||
i = (Ins*) go->to;
|
||||
|
||||
for (cP = work; i; i = (Ins*) i->c.link)
|
||||
cP = closure(cP, i + i->c.bump);
|
||||
|
||||
go->to = findState(work, cP - work);
|
||||
}
|
||||
|
||||
s->go.nSpans = 0;
|
||||
|
||||
for (j = 0; j < nc;)
|
||||
{
|
||||
State *to = (State*) goTo[rep[j]].to;
|
||||
|
||||
while (++j < nc && goTo[rep[j]].to == to) ;
|
||||
|
||||
span[s->go.nSpans].ub = lb + j;
|
||||
|
||||
span[s->go.nSpans].to = to;
|
||||
|
||||
s->go.nSpans++;
|
||||
}
|
||||
|
||||
for (j = nGoTos; j-- > 0;)
|
||||
goTo[goTo[j].ch - lb].to = NULL;
|
||||
|
||||
s->go.span = new Span[s->go.nSpans];
|
||||
|
||||
memcpy((char*) s->go.span, (char*) span, s->go.nSpans*sizeof(Span));
|
||||
|
||||
(void) new Match(s);
|
||||
|
||||
}
|
||||
|
||||
delete [] work;
|
||||
delete [] goTo;
|
||||
delete [] span;
|
||||
}
|
||||
|
||||
DFA::~DFA()
|
||||
{
|
||||
State *s;
|
||||
|
||||
while ((s = head))
|
||||
{
|
||||
head = s->next;
|
||||
delete s;
|
||||
}
|
||||
}
|
||||
|
||||
void DFA::addState(State **a, State *s)
|
||||
{
|
||||
s->label = nStates++;
|
||||
s->next = *a;
|
||||
*a = s;
|
||||
|
||||
if (a == tail)
|
||||
tail = &s->next;
|
||||
}
|
||||
|
||||
State *DFA::findState(Ins **kernel, uint kCount)
|
||||
{
|
||||
Ins **cP, **iP, *i;
|
||||
State *s;
|
||||
|
||||
kernel[kCount] = NULL;
|
||||
|
||||
cP = kernel;
|
||||
|
||||
for (iP = kernel; (i = *iP); ++iP)
|
||||
{
|
||||
if (i->i.tag == CHAR || i->i.tag == TERM || i->i.tag == CTXT)
|
||||
{
|
||||
*cP++ = i;
|
||||
}
|
||||
else
|
||||
{
|
||||
unmark(i);
|
||||
}
|
||||
}
|
||||
|
||||
kCount = cP - kernel;
|
||||
kernel[kCount] = NULL;
|
||||
|
||||
for (s = head; s; s = s->next)
|
||||
{
|
||||
if (s->kCount == kCount)
|
||||
{
|
||||
for (iP = s->kernel; (i = *iP); ++iP)
|
||||
if (!isMarked(i))
|
||||
goto nextState;
|
||||
|
||||
goto unmarkAll;
|
||||
}
|
||||
|
||||
nextState:
|
||||
;
|
||||
}
|
||||
|
||||
s = new State;
|
||||
addState(tail, s);
|
||||
s->kCount = kCount;
|
||||
s->kernel = new Ins * [kCount + 1];
|
||||
memcpy(s->kernel, kernel, (kCount + 1)*sizeof(Ins*));
|
||||
s->link = toDo;
|
||||
toDo = s;
|
||||
|
||||
unmarkAll:
|
||||
|
||||
for (iP = kernel; (i = *iP); ++iP)
|
||||
unmark(i);
|
||||
|
||||
return s;
|
||||
}
|
||||
|
||||
} // end namespace re2c
|
||||
|
366
tools/re2c/dfa.h
366
tools/re2c/dfa.h
|
@ -1,366 +0,0 @@
|
|||
/* $Id: dfa.h 569 2006-06-05 22:14:00Z helly $ */
|
||||
#ifndef _dfa_h
|
||||
#define _dfa_h
|
||||
|
||||
#include <iosfwd>
|
||||
#include <map>
|
||||
#include "re.h"
|
||||
|
||||
namespace re2c
|
||||
{
|
||||
|
||||
extern void prtCh(std::ostream&, uint, bool useTalx = true);
|
||||
extern void prtHex(std::ostream&, uint, bool useTalx = true);
|
||||
extern void prtChOrHex(std::ostream&, uint, bool useTalx = true);
|
||||
extern void printSpan(std::ostream&, uint, uint);
|
||||
|
||||
class DFA;
|
||||
|
||||
class State;
|
||||
|
||||
class Action
|
||||
{
|
||||
|
||||
public:
|
||||
State *state;
|
||||
|
||||
public:
|
||||
Action(State*);
|
||||
virtual ~Action();
|
||||
|
||||
virtual void emit(std::ostream&, uint, bool&) const = 0;
|
||||
virtual bool isRule() const;
|
||||
virtual bool isMatch() const;
|
||||
virtual bool isInitial() const;
|
||||
virtual bool readAhead() const;
|
||||
|
||||
#ifdef PEDANTIC
|
||||
protected:
|
||||
Action(const Action& oth)
|
||||
: state(oth.state)
|
||||
{
|
||||
}
|
||||
Action& operator = (const Action& oth)
|
||||
{
|
||||
state = oth.state;
|
||||
return *this;
|
||||
}
|
||||
#endif
|
||||
};
|
||||
|
||||
class Match: public Action
|
||||
{
|
||||
public:
|
||||
Match(State*);
|
||||
void emit(std::ostream&, uint, bool&) const;
|
||||
bool isMatch() const;
|
||||
};
|
||||
|
||||
class Enter: public Action
|
||||
{
|
||||
public:
|
||||
uint label;
|
||||
|
||||
public:
|
||||
Enter(State*, uint);
|
||||
void emit(std::ostream&, uint, bool&) const;
|
||||
};
|
||||
|
||||
class Initial: public Enter
|
||||
{
|
||||
public:
|
||||
bool setMarker;
|
||||
|
||||
public:
|
||||
Initial(State*, uint, bool);
|
||||
void emit(std::ostream&, uint, bool&) const;
|
||||
bool isInitial() const;
|
||||
};
|
||||
|
||||
class Save: public Match
|
||||
{
|
||||
|
||||
public:
|
||||
uint selector;
|
||||
|
||||
public:
|
||||
Save(State*, uint);
|
||||
void emit(std::ostream&, uint, bool&) const;
|
||||
bool isMatch() const;
|
||||
};
|
||||
|
||||
class Move: public Action
|
||||
{
|
||||
|
||||
public:
|
||||
Move(State*);
|
||||
void emit(std::ostream&, uint, bool&) const;
|
||||
};
|
||||
|
||||
class Accept: public Action
|
||||
{
|
||||
|
||||
public:
|
||||
typedef std::map<uint, State*> RuleMap;
|
||||
|
||||
uint nRules;
|
||||
uint *saves;
|
||||
State **rules;
|
||||
RuleMap mapRules;
|
||||
|
||||
public:
|
||||
Accept(State*, uint, uint*, State**);
|
||||
void emit(std::ostream&, uint, bool&) const;
|
||||
void emitBinary(std::ostream &o, uint ind, uint l, uint r, bool &readCh) const;
|
||||
void genRuleMap();
|
||||
|
||||
#ifdef PEDANTIC
|
||||
private:
|
||||
Accept(const Accept& oth)
|
||||
: Action(oth)
|
||||
, nRules(oth.nRules)
|
||||
, saves(oth.saves)
|
||||
, rules(oth.rules)
|
||||
{
|
||||
}
|
||||
Accept& operator=(const Accept& oth)
|
||||
{
|
||||
new(this) Accept(oth);
|
||||
return *this;
|
||||
}
|
||||
#endif
|
||||
};
|
||||
|
||||
class Rule: public Action
|
||||
{
|
||||
|
||||
public:
|
||||
RuleOp *rule;
|
||||
|
||||
public:
|
||||
Rule(State*, RuleOp*);
|
||||
void emit(std::ostream&, uint, bool&) const;
|
||||
bool isRule() const;
|
||||
|
||||
#ifdef PEDANTIC
|
||||
private:
|
||||
Rule (const Rule& oth)
|
||||
: Action(oth)
|
||||
, rule(oth.rule)
|
||||
{
|
||||
}
|
||||
Rule& operator=(const Rule& oth)
|
||||
{
|
||||
new(this) Rule(oth);
|
||||
return *this;
|
||||
}
|
||||
#endif
|
||||
};
|
||||
|
||||
class Span
|
||||
{
|
||||
|
||||
public:
|
||||
uint ub;
|
||||
State *to;
|
||||
|
||||
public:
|
||||
uint show(std::ostream&, uint) const;
|
||||
};
|
||||
|
||||
class Go
|
||||
{
|
||||
public:
|
||||
Go()
|
||||
: nSpans(0)
|
||||
, wSpans(~0u)
|
||||
, lSpans(~0u)
|
||||
, dSpans(~0u)
|
||||
, lTargets(~0u)
|
||||
, span(NULL)
|
||||
{
|
||||
}
|
||||
|
||||
public:
|
||||
uint nSpans; // number of spans
|
||||
uint wSpans; // number of spans in wide mode
|
||||
uint lSpans; // number of low (non wide) spans
|
||||
uint dSpans; // number of decision spans (decide between g and b mode)
|
||||
uint lTargets;
|
||||
Span *span;
|
||||
|
||||
public:
|
||||
void genGoto( std::ostream&, uint ind, const State *from, const State *next, bool &readCh);
|
||||
void genBase( std::ostream&, uint ind, const State *from, const State *next, bool &readCh, uint mask) const;
|
||||
void genLinear(std::ostream&, uint ind, const State *from, const State *next, bool &readCh, uint mask) const;
|
||||
void genBinary(std::ostream&, uint ind, const State *from, const State *next, bool &readCh, uint mask) const;
|
||||
void genSwitch(std::ostream&, uint ind, const State *from, const State *next, bool &readCh, uint mask) const;
|
||||
void genCpGoto(std::ostream&, uint ind, const State *from, const State *next, bool &readCh) const;
|
||||
void compact();
|
||||
void unmap(Go*, const State*);
|
||||
};
|
||||
|
||||
class State
|
||||
{
|
||||
|
||||
public:
|
||||
uint label;
|
||||
RuleOp *rule;
|
||||
State *next;
|
||||
State *link;
|
||||
uint depth; // for finding SCCs
|
||||
uint kCount;
|
||||
Ins **kernel;
|
||||
|
||||
bool isPreCtxt;
|
||||
bool isBase;
|
||||
Go go;
|
||||
Action *action;
|
||||
|
||||
public:
|
||||
State();
|
||||
~State();
|
||||
void emit(std::ostream&, uint, bool&) const;
|
||||
friend std::ostream& operator<<(std::ostream&, const State&);
|
||||
friend std::ostream& operator<<(std::ostream&, const State*);
|
||||
|
||||
#ifdef PEDANTIC
|
||||
private:
|
||||
State(const State& oth)
|
||||
: label(oth.label)
|
||||
, rule(oth.rule)
|
||||
, next(oth.next)
|
||||
, link(oth.link)
|
||||
, depth(oth.depth)
|
||||
, kCount(oth.kCount)
|
||||
, kernel(oth.kernel)
|
||||
, isBase(oth.isBase)
|
||||
, go(oth.go)
|
||||
, action(oth.action)
|
||||
{
|
||||
}
|
||||
State& operator = (const State& oth)
|
||||
{
|
||||
new(this) State(oth);
|
||||
return *this;
|
||||
}
|
||||
#endif
|
||||
};
|
||||
|
||||
class DFA
|
||||
{
|
||||
|
||||
public:
|
||||
uint lbChar;
|
||||
uint ubChar;
|
||||
uint nStates;
|
||||
State *head, **tail;
|
||||
State *toDo;
|
||||
|
||||
public:
|
||||
DFA(Ins*, uint, uint, uint, Char*);
|
||||
~DFA();
|
||||
void addState(State**, State*);
|
||||
State *findState(Ins**, uint);
|
||||
void split(State*);
|
||||
|
||||
void findSCCs();
|
||||
void findBaseState();
|
||||
void emit(std::ostream&, uint);
|
||||
|
||||
friend std::ostream& operator<<(std::ostream&, const DFA&);
|
||||
friend std::ostream& operator<<(std::ostream&, const DFA*);
|
||||
|
||||
#ifdef PEDANTIC
|
||||
DFA(const DFA& oth)
|
||||
: lbChar(oth.lbChar)
|
||||
, ubChar(oth.ubChar)
|
||||
, nStates(oth.nStates)
|
||||
, head(oth.head)
|
||||
, tail(oth.tail)
|
||||
, toDo(oth.toDo)
|
||||
{
|
||||
}
|
||||
DFA& operator = (const DFA& oth)
|
||||
{
|
||||
new(this) DFA(oth);
|
||||
return *this;
|
||||
}
|
||||
#endif
|
||||
};
|
||||
|
||||
inline Action::Action(State *s) : state(s)
|
||||
{
|
||||
delete s->action;
|
||||
s->action = this;
|
||||
}
|
||||
|
||||
inline Action::~Action()
|
||||
{
|
||||
}
|
||||
|
||||
inline bool Action::isRule() const
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
inline bool Action::isMatch() const
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
inline bool Action::isInitial() const
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
inline bool Action::readAhead() const
|
||||
{
|
||||
return !isMatch() || (state && state->next && state->next->action && !state->next->action->isRule());
|
||||
}
|
||||
|
||||
inline Match::Match(State *s) : Action(s)
|
||||
{ }
|
||||
|
||||
inline bool Match::isMatch() const
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
inline Enter::Enter(State *s, uint l) : Action(s), label(l)
|
||||
{ }
|
||||
|
||||
inline Initial::Initial(State *s, uint l, bool b) : Enter(s, l), setMarker(b)
|
||||
{ }
|
||||
|
||||
inline bool Initial::isInitial() const
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
inline Save::Save(State *s, uint i) : Match(s), selector(i)
|
||||
{ }
|
||||
|
||||
inline bool Save::isMatch() const
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
inline bool Rule::isRule() const
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
inline std::ostream& operator<<(std::ostream &o, const State *s)
|
||||
{
|
||||
return o << *s;
|
||||
}
|
||||
|
||||
inline std::ostream& operator<<(std::ostream &o, const DFA *dfa)
|
||||
{
|
||||
return o << *dfa;
|
||||
}
|
||||
|
||||
} // end namespace re2c
|
||||
|
||||
#endif
|
File diff suppressed because it is too large
Load diff
|
@ -1,48 +0,0 @@
|
|||
@Article{Bumbulis94,
|
||||
author = {Peter Bumbulis and Donald D. Cowan},
|
||||
title = {RE2C -- A More Versatile Scanner Generator},
|
||||
journal = "ACM Letters on Programming Languages and Systems",
|
||||
volume = 2,
|
||||
number = "1--4",
|
||||
year = 1994,
|
||||
abstract = {
|
||||
It is usually claimed that lexical analysis routines are still coded by
|
||||
hand, despite the widespread availability of scanner generators, for
|
||||
efficiency reasons. While efficiency is a consideration, there exist
|
||||
freely available scanner generators such as GLA \cite{Gray88} that can
|
||||
generate scanners that are faster than most hand-coded ones. However,
|
||||
most generated scanners are tailored for a particular environment, and
|
||||
retargetting these scanners to other environments, if possible, is
|
||||
usually complex enough to make a hand-coded scanner more appealing. In
|
||||
this paper we describe RE2C, a scanner generator that not only generates
|
||||
scanners which are faster (and usually smaller) than those produced by
|
||||
any other scanner generator known to the authors, including GLA, but
|
||||
also adapt easily to any environment.
|
||||
}
|
||||
}
|
||||
@Article{Gray88,
|
||||
author = {Robert W. Gray},
|
||||
title = {{$\gamma$-GLA} - {A} Generator for Lexical Analyzers That
|
||||
Programmers Can Use},
|
||||
journal = {USENIX Conference Proceedings},
|
||||
year = {1988},
|
||||
month = {June},
|
||||
pages = {147-160},
|
||||
abstract = {Writing an efficient lexical analyzer for even a simple
|
||||
language is not a trivial task, and should not be done by hand. We
|
||||
describe GLA, a tool that generates very efficient scanners. These
|
||||
scanners do not use the conventional transition matrix, but instead
|
||||
use a few 128 element vectors. Scanning time is only slightly
|
||||
greater than the absolute minimum --- the time it takes to look at
|
||||
each character in a file. The GLA language allows simple, concise
|
||||
specification of scanners. Augmenting regular expressions with
|
||||
auxiliary scanners easily handles nasty problems such as C comments
|
||||
and C literal constants. We formalize the connection between token
|
||||
scanning and token processing by associating a processor with
|
||||
appropriate patterns. A library of canned descriptions simplifies the
|
||||
specification of commonly used language pieces --- such as,
|
||||
C\_IDENTIFIERS, C\_STRINGS, PASCAL\_COMMENTS, etc. Finally, carefully
|
||||
tuned lexical analysis support modules are provided for error
|
||||
handling, input buffering, storing identifiers in hash tables and
|
||||
manipulating denotations.}
|
||||
}
|
83
tools/re2c/examples/001_upn_calculator/README
Normal file
83
tools/re2c/examples/001_upn_calculator/README
Normal file
|
@ -0,0 +1,83 @@
|
|||
re2c lesson 001_upn_calculator, (c) M. Boerger 2006
|
||||
|
||||
This lesson gets you started with re2c. In the end you will have an easy RPN
|
||||
(reverse polish notation) calculator for use at command line.
|
||||
|
||||
You will learn about the basic interface of re2c when scanning input strings.
|
||||
How to detect the end of the input and use that to stop scanning in order to
|
||||
avoid problems.
|
||||
|
||||
Once you have successfully installed re2c you can use it to generate *.c files
|
||||
from the *.re files presented in this lesson. Actually the expected *.c files
|
||||
are already present. So you should name them *.cc or something alike or just
|
||||
give them a different name like test.c. To do so you simply change into the
|
||||
directory and execute the following command:
|
||||
|
||||
re2c calc_001.re > test.c
|
||||
|
||||
Then use your compiler to compile that code and run it. If you are using gcc
|
||||
you simply do the following:
|
||||
|
||||
gcc -o test.o test.c
|
||||
./test.o <input_file_name>
|
||||
|
||||
If you are using windows you might want to read till the end of this lesson.
|
||||
|
||||
When you want to debug the code it helps to make re2c generate working #line
|
||||
information. To do so you simply specify the output file using the -o switch
|
||||
followed by the output filename:
|
||||
|
||||
re2c -o test.c calc_001.re
|
||||
|
||||
The input files *.re each contain basic step by comments that explain what is
|
||||
going on and what you can see in the examples.
|
||||
|
||||
In order to optimize the generated code we will use the -s command line switch
|
||||
of re2c. This tells re2c to generate code that uses if statements rather
|
||||
then endless switch/case expressions where appropriate. Note that the file name
|
||||
extension is actually '.s.re' to tell the test system to use the -s switch. To
|
||||
invoke re2 you do the following:
|
||||
|
||||
re2c -s -o test.c calc_006.s.re
|
||||
|
||||
Finally we use the -b switch to have the code use a decision table. The -b
|
||||
switch also contains the -s behavior.
|
||||
|
||||
re2c -b -o test.c calc_007.b.re
|
||||
|
||||
|
||||
|
||||
-------------------------------------------------------------------------------
|
||||
|
||||
For windows users Lynn Allan provided some additional stuff to get you started
|
||||
in the Microsoft world. This addon resides in the windows subdirectory and
|
||||
gives you something to expereiment with. The code in that directory is based
|
||||
on the first step and has the following changes:
|
||||
|
||||
* vc6 .dsp/.dsw and vc7/vc8 .sln/.vcproj project files that have "Custom Build
|
||||
Steps" that can tell when main.re changes, and know how to generate main.c
|
||||
from main.re. They assume that you unpacked the zip package and have re2c
|
||||
itself build or installed in Release and Release-2005 directory respectively.
|
||||
If re2c cannot be found you need to modify the custom build step and correct
|
||||
the path to re2c.
|
||||
|
||||
* BuildAndRun.bat to do command line rec2 and then cl and then run the
|
||||
executable (discontinues with message if errors).
|
||||
|
||||
* built-in cppunit-like test to confirm it worked as expected.
|
||||
|
||||
* array of test strings "fed" to scan rather than file contents to facilitate
|
||||
testing and also reduce the newbie learning curve.
|
||||
|
||||
* HiResTimer output for 10,000 loops and 100,000 loops. While this might be
|
||||
excessive for this lesson, it illustrates how to do it for subsequent lessons
|
||||
and your own stuff using windows. Also it shows that Release build is as fast
|
||||
as strncmp for this test and can probably be made significantly faster.
|
||||
|
||||
* If you want to build the other steps of this lesson using windows tools
|
||||
simply copy the *.re files into the windows directory as main.re and rebuild.
|
||||
|
||||
|
||||
-------------------------------------------------------------------------------
|
||||
Sidenote: UPN is the german translation of RPN, somehow hardcoded into the
|
||||
authors brain :-)
|
84
tools/re2c/examples/001_upn_calculator/calc_001.re
Normal file
84
tools/re2c/examples/001_upn_calculator/calc_001.re
Normal file
|
@ -0,0 +1,84 @@
|
|||
/* re2c lesson 001_upn_calculator, calc_001, (c) M. Boerger 2006 - 2007 */
|
||||
/*!ignore:re2c
|
||||
|
||||
- basic interface for string reading
|
||||
|
||||
. We define the macros YYCTYPE, YYCURSOR, YYLIMIT, YYMARKER, YYFILL
|
||||
. YYCTYPE is the type re2c operates on or in other words the type that
|
||||
it generates code for. While it is not a big difference when we were
|
||||
using 'unsigned char' here we would need to run re2c with option -w
|
||||
to fully support types with sieof() > 1.
|
||||
. YYCURSOR is used internally and holds the current scanner position. In
|
||||
expression handlers, the code blocks after re2c expressions, this can be
|
||||
used to identify the end of the token.
|
||||
. YYMARKER is not always being used so we set an initial value to avoid
|
||||
a compiler warning. Here we could also omit it compleley.
|
||||
. YYLIMIT stores the end of the input. Unfortunatley we have to use strlen()
|
||||
in this lesson. In the next example we see one way to get rid of it.
|
||||
. We use a 'for(;;)'-loop around the scanner block. We could have used a
|
||||
'while(1)'-loop instead but some compilers generate a warning for it.
|
||||
. To make the output more readable we use 're2c:indent:top' scanner
|
||||
configuration that configures re2c to prepend a single tab (the default)
|
||||
to the beginning of each output line.
|
||||
. The following lines are expressions and for each expression we output the
|
||||
token name and continue the scanner loop.
|
||||
. The second last token detects the end of our input, the terminating zero in
|
||||
our input string. In other scanners detecting the end of input may vary.
|
||||
For example binary code may contain \0 as valid input.
|
||||
. The last expression accepts any input character. It tells re2c to accept
|
||||
the opposit of the empty range. This includes numbers and our tokens but
|
||||
as re2c goes from top to botton when evaluating the expressions this is no
|
||||
problem.
|
||||
. The first three rules show that re2c actually prioritizes the expressions
|
||||
from top to bottom. Octal number require a starting "0" and the actual
|
||||
number. Normal numbers start with a digit greater 0. And zero is finally a
|
||||
special case. A single "0" is detected by the last rule of this set. And
|
||||
valid ocal number is already being detected by the first rule. This even
|
||||
includes multi "0" sequences that in octal notation also means zero.
|
||||
Another way would be to only use two rules:
|
||||
"0" [0-9]+
|
||||
"0" | ( [1-9] [0-9]* )
|
||||
A full description of re2c rule syntax can be found in the manual.
|
||||
*/
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
|
||||
int scan(char *s, int l)
|
||||
{
|
||||
char *p = s;
|
||||
char *q = 0;
|
||||
#define YYCTYPE char
|
||||
#define YYCURSOR p
|
||||
#define YYLIMIT (s+l)
|
||||
#define YYMARKER q
|
||||
#define YYFILL(n)
|
||||
|
||||
for(;;)
|
||||
{
|
||||
/*!re2c
|
||||
re2c:indent:top = 2;
|
||||
"0"[0-9]+ { printf("Oct\n"); continue; }
|
||||
[1-9][0-9]* { printf("Num\n"); continue; }
|
||||
"0" { printf("Num\n"); continue; }
|
||||
"+" { printf("+\n"); continue; }
|
||||
"-" { printf("-\n"); continue; }
|
||||
"\000" { printf("EOF\n"); return 0; }
|
||||
[^] { printf("ERR\n"); return 1; }
|
||||
*/
|
||||
}
|
||||
}
|
||||
|
||||
int main(int argc, char **argv)
|
||||
{
|
||||
if (argc > 1)
|
||||
{
|
||||
return scan(argv[1], strlen(argv[1]));
|
||||
}
|
||||
else
|
||||
{
|
||||
fprintf(stderr, "%s <expr>\n", argv[0]);
|
||||
return 1;
|
||||
}
|
||||
}
|
69
tools/re2c/examples/001_upn_calculator/calc_002.re
Normal file
69
tools/re2c/examples/001_upn_calculator/calc_002.re
Normal file
|
@ -0,0 +1,69 @@
|
|||
/* re2c lesson 001_upn_calculator, calc_002, (c) M. Boerger 2006 - 2007 */
|
||||
/*!ignore:re2c
|
||||
|
||||
- making use of YYFILL
|
||||
|
||||
. Here we modified the scanner to not require strlen() on the call. Instead
|
||||
we compute limit on the fly. That is whenever more input is needed we
|
||||
search for the terminating \0 in the next n chars the scanner needs.
|
||||
. If there is not enough input we quit the scanner.
|
||||
. Note that in lesson_001 YYLIMIT was a character pointer computed only once.
|
||||
Here is of course also of type YYCTYPE but a variable that gets reevaluated
|
||||
by YYFILL().
|
||||
. To make the code smaller we take advantage of the fact that our loop has no
|
||||
break so far. This allows us to use break here and have the code that is
|
||||
used for YYFILL() not contain the printf in every occurence. That way the
|
||||
generated code gets smaller.
|
||||
|
||||
*/
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
|
||||
int fill(char *p, int n, char **l)
|
||||
{
|
||||
while (*++p && n--) ;
|
||||
* l = p;
|
||||
return n <= 0;
|
||||
}
|
||||
|
||||
int scan(char *s)
|
||||
{
|
||||
char *p = s;
|
||||
char *l = s;
|
||||
char *q = 0;
|
||||
#define YYCTYPE char
|
||||
#define YYCURSOR p
|
||||
#define YYLIMIT l
|
||||
#define YYMARKER q
|
||||
#define YYFILL(n) { if (!fill(p, n, &l)) break; }
|
||||
|
||||
for(;;)
|
||||
{
|
||||
/*!re2c
|
||||
re2c:indent:top = 2;
|
||||
"0"[0-9]+ { printf("Oct\n"); continue; }
|
||||
[1-9][0-9]* { printf("Num\n"); continue; }
|
||||
"0" { printf("Num\n"); continue; }
|
||||
"+" { printf("+\n"); continue; }
|
||||
"-" { printf("+\n"); continue; }
|
||||
"\000" { printf("EOF\n"); return 0; }
|
||||
[^] { printf("ERR\n"); return 1; }
|
||||
*/
|
||||
}
|
||||
printf("OOD\n"); return 2;
|
||||
}
|
||||
|
||||
int main(int argc, char **argv)
|
||||
{
|
||||
if (argc > 1)
|
||||
{
|
||||
return scan(argv[1]);
|
||||
}
|
||||
else
|
||||
{
|
||||
fprintf(stderr, "%s <expr>\n", argv[0]);
|
||||
return 0;
|
||||
}
|
||||
}
|
61
tools/re2c/examples/001_upn_calculator/calc_003.re
Normal file
61
tools/re2c/examples/001_upn_calculator/calc_003.re
Normal file
|
@ -0,0 +1,61 @@
|
|||
/* re2c lesson 001_upn_calculator, calc_003, (c) M. Boerger 2006 - 2007 */
|
||||
/*!ignore:re2c
|
||||
|
||||
- making use of YYFILL
|
||||
|
||||
. Again provide the length of the input to generate the limit only once. Now
|
||||
we can use YYFILL() to detect the end and simply return since YYFILL() is
|
||||
only being used if the next scanner run might use more chars then YYLIMIT
|
||||
allows.
|
||||
. Note that we now use (s+l+2) instead of (s+l) as we did in lesson_001. In
|
||||
the first lesson we did not quit from YYFILL() and used a special rule to
|
||||
detect the end of input. Here we use the fact that we know the exact end
|
||||
of input and that this length does not include the terminating zero. Since
|
||||
YYLIMIT points to the first character behind the used buffer we use "+ 2".
|
||||
If we would use "+1" we could drop the "\000" rule but could no longer
|
||||
distinguish between end of input and out of data.
|
||||
|
||||
*/
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
|
||||
int scan(char *s, int l)
|
||||
{
|
||||
char *p = s;
|
||||
char *q = 0;
|
||||
#define YYCTYPE char
|
||||
#define YYCURSOR p
|
||||
#define YYLIMIT (s+l+2)
|
||||
#define YYMARKER q
|
||||
#define YYFILL(n) { printf("OOD\n"); return 2; }
|
||||
|
||||
for(;;)
|
||||
{
|
||||
/*!re2c
|
||||
re2c:indent:top = 2;
|
||||
"0"[0-9]+ { printf("Oct\n"); continue; }
|
||||
[1-9][0-9]* { printf("Num\n"); continue; }
|
||||
"0" { printf("Num\n"); continue; }
|
||||
"+" { printf("+\n"); continue; }
|
||||
"-" { printf("+\n"); continue; }
|
||||
"\000" { printf("EOF\n"); return 0; }
|
||||
[^] { printf("ERR\n"); return 1; }
|
||||
*/
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
int main(int argc, char **argv)
|
||||
{
|
||||
if (argc > 1)
|
||||
{
|
||||
return scan(argv[1], strlen(argv[1]));
|
||||
}
|
||||
else
|
||||
{
|
||||
fprintf(stderr, "%s <expr>\n", argv[0]);
|
||||
return 0;
|
||||
}
|
||||
}
|
78
tools/re2c/examples/001_upn_calculator/calc_004.re
Normal file
78
tools/re2c/examples/001_upn_calculator/calc_004.re
Normal file
|
@ -0,0 +1,78 @@
|
|||
/* re2c lesson 001_upn_calculator, calc_004, (c) M. Boerger 2006 - 2007 */
|
||||
/*!ignore:re2c
|
||||
|
||||
- making use of definitions
|
||||
. We provide complex rules as definitions. We can even have definitions made
|
||||
up from other definitions. And we could also use definitions as part of
|
||||
rules and not only as full rules as shown in this lesson.
|
||||
|
||||
- showing the tokens
|
||||
. re2c does not store the beginning of a token on its own but we can easily
|
||||
do this by providing variable, in our case t, that is set to YYCURSOR on
|
||||
every loop. If we were not using a loop here the token, we could have used
|
||||
s instead of a new variable instead.
|
||||
. As we use the token for an output function that requires a terminating zero
|
||||
we copy the token. Alternatively we could store the end of the token, then
|
||||
replace it with a zero character and replace it after the token has been
|
||||
used. However that approach is not always acceptable.
|
||||
|
||||
*/
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
|
||||
char * tokendup(const char *t, const char *l)
|
||||
{
|
||||
size_t n = l -t + 1;
|
||||
char *r = (char*)malloc(n);
|
||||
|
||||
memmove(r, t, n-1);
|
||||
r[n] = '\0';
|
||||
return r;
|
||||
}
|
||||
|
||||
int scan(char *s, int l)
|
||||
{
|
||||
char *p = s;
|
||||
char *q = 0;
|
||||
char *t;
|
||||
#define YYCTYPE char
|
||||
#define YYCURSOR p
|
||||
#define YYLIMIT (s+l+2)
|
||||
#define YYMARKER q
|
||||
#define YYFILL(n) { printf("OOD\n"); return 2; }
|
||||
|
||||
for(;;)
|
||||
{
|
||||
t = p;
|
||||
/*!re2c
|
||||
re2c:indent:top = 2;
|
||||
|
||||
DIGIT = [0-9] ;
|
||||
OCT = "0" DIGIT+ ;
|
||||
INT = "0" | ( [1-9] DIGIT* ) ;
|
||||
|
||||
OCT { t = tokendup(t, p); printf("Oct: %s\n", t); free(t); continue; }
|
||||
INT { t = tokendup(t, p); printf("Num: %s\n", t); free(t); continue; }
|
||||
"+" { printf("+\n"); continue; }
|
||||
"-" { printf("+\n"); continue; }
|
||||
"\000" { printf("EOF\n"); return 0; }
|
||||
[^] { printf("ERR\n"); return 1; }
|
||||
*/
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
int main(int argc, char **argv)
|
||||
{
|
||||
if (argc > 1)
|
||||
{
|
||||
return scan(argv[1], strlen(argv[1]));
|
||||
}
|
||||
else
|
||||
{
|
||||
fprintf(stderr, "%s <expr>\n", argv[0]);
|
||||
return 0;
|
||||
}
|
||||
}
|
144
tools/re2c/examples/001_upn_calculator/calc_005.re
Normal file
144
tools/re2c/examples/001_upn_calculator/calc_005.re
Normal file
|
@ -0,0 +1,144 @@
|
|||
/* re2c lesson 001_upn_calculator, calc_005, (c) M. Boerger 2006 - 2007 */
|
||||
/*!ignore:re2c
|
||||
|
||||
- turning this lesson into an easy calculator
|
||||
. We are going to write an UPN calculator so we need an additional rule to
|
||||
ignore white space.
|
||||
. Then we need to store the scanned input somewhere and do our math on it.
|
||||
. Also we need to scan all arguments since the main c code gets the input
|
||||
split up into chunks.
|
||||
. In contrast to what we did before we now add a variable res that holds the
|
||||
scanner state. We initialize that variable to 0 and quit the loop when it
|
||||
is non zero. This will also be our return value so that we can use it in
|
||||
function main to generate error information.
|
||||
. To support operating systems where ' and " get passed in program arguments
|
||||
we check for them being first and last input character. If so we correct
|
||||
input pointer and input length. Since now our scanner might not see a
|
||||
terminating zero we change YYLIMIT again and drop the special zero rule.
|
||||
*/
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
|
||||
#define DEBUG(stmt) stmt
|
||||
|
||||
int stack[4];
|
||||
int depth = 0;
|
||||
|
||||
int push_num(const char *t, const char *l, int radix)
|
||||
{
|
||||
int num = 0;
|
||||
|
||||
if (depth >= sizeof(stack))
|
||||
{
|
||||
return 3;
|
||||
}
|
||||
|
||||
--t;
|
||||
while(++t < l)
|
||||
{
|
||||
num = num * radix + (*t - '0');
|
||||
}
|
||||
DEBUG(printf("Num: %d\n", num));
|
||||
|
||||
stack[depth++] = num;
|
||||
return 0;
|
||||
}
|
||||
|
||||
int stack_add()
|
||||
{
|
||||
if (depth < 2) return 4;
|
||||
|
||||
--depth;
|
||||
stack[depth-1] = stack[depth-1] + stack[depth];
|
||||
return 0;
|
||||
}
|
||||
|
||||
int stack_sub()
|
||||
{
|
||||
if (depth < 2) return 4;
|
||||
|
||||
--depth;
|
||||
stack[depth-1] = stack[depth-1] - stack[depth];
|
||||
return 0;
|
||||
}
|
||||
|
||||
int scan(char *s, int l)
|
||||
{
|
||||
char *p = s;
|
||||
char *q = 0;
|
||||
char *t;
|
||||
int res = 0;
|
||||
|
||||
#define YYCTYPE char
|
||||
#define YYCURSOR p
|
||||
#define YYLIMIT (s+l+1)
|
||||
#define YYMARKER q
|
||||
#define YYFILL(n) { return depth == 1 ? 0 : 2; }
|
||||
|
||||
while(!res)
|
||||
{
|
||||
t = p;
|
||||
/*!re2c
|
||||
re2c:indent:top = 2;
|
||||
|
||||
DIGIT = [0-9] ;
|
||||
OCT = "0" DIGIT+ ;
|
||||
INT = "0" | ( [1-9] DIGIT* ) ;
|
||||
WS = [ \t]+ ;
|
||||
|
||||
WS { continue; }
|
||||
OCT { res = push_num(t, p, 8); continue; }
|
||||
INT { res = push_num(t, p, 10); continue; }
|
||||
"+" { res = stack_add(); continue; }
|
||||
"-" { res = stack_sub(); continue; }
|
||||
[^] { res = 1; continue; }
|
||||
*/
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
||||
int main(int argc, char **argv)
|
||||
{
|
||||
if (argc > 1)
|
||||
{
|
||||
char *inp;
|
||||
int res = 0, argp = 0, len;
|
||||
|
||||
while(!res && ++argp < argc)
|
||||
{
|
||||
inp = argv[argp];
|
||||
len = strlen(inp);
|
||||
if (inp[0] == '\"' && inp[len-1] == '\"')
|
||||
{
|
||||
++inp;
|
||||
len -=2;
|
||||
}
|
||||
res = scan(inp, len);
|
||||
}
|
||||
switch(res)
|
||||
{
|
||||
case 0:
|
||||
printf("Result: %d\n", stack[0]);
|
||||
return 0;
|
||||
case 1:
|
||||
fprintf(stderr, "Illegal character in input.\n");
|
||||
return 1;
|
||||
case 2:
|
||||
fprintf(stderr, "Premature end of input.\n");
|
||||
return 2;
|
||||
case 3:
|
||||
fprintf(stderr, "Stack overflow.\n");
|
||||
return 3;
|
||||
case 4:
|
||||
fprintf(stderr, "Stack underflow.\n");
|
||||
return 4;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
fprintf(stderr, "%s <expr>\n", argv[0]);
|
||||
return 0;
|
||||
}
|
||||
}
|
162
tools/re2c/examples/001_upn_calculator/calc_006.s.re
Normal file
162
tools/re2c/examples/001_upn_calculator/calc_006.s.re
Normal file
|
@ -0,0 +1,162 @@
|
|||
/* re2c lesson 001_upn_calculator, calc_006, (c) M. Boerger 2006 - 2007 */
|
||||
/*!ignore:re2c
|
||||
|
||||
- avoiding YYFILL()
|
||||
. We use the inplace configuration re2c:yyfill to suppress generation of
|
||||
YYFILL() blocks. This of course means we no longer have to provide the
|
||||
macro.
|
||||
. We also drop the YYMARKER stuff since we know that re2c does not generate
|
||||
it for this example.
|
||||
. Since re2c does no longer check for out of data situations we must do this.
|
||||
For that reason we first reintroduce our zero rule and second we need to
|
||||
ensure that the scanner does not take more than one bytes in one go.
|
||||
|
||||
In the example suppose "0" is passed. The scanner reads the first "0" and
|
||||
then is in an undecided state. The scanner can earliest decide on the next
|
||||
char what the token is. In case of a zero the input ends and it was a
|
||||
number, 0 to be precise. In case of a digit it is an octal number and the
|
||||
next character needs to be read. In case of any other character the scanner
|
||||
will detect an error with the any rule [^].
|
||||
|
||||
Now the above shows that the scanner may read two characters directly. But
|
||||
only if the first is a "0". So we could easily check that if the first char
|
||||
is "0" and the next char is a digit then yet another charcter is present.
|
||||
But we require our inut to be zero terminated. And that means we do not
|
||||
have to check anything for this scanner.
|
||||
|
||||
However with other rule sets re2c might read more then one character in a
|
||||
row. In those cases it is normally hard to impossible to avoid YYFILL.
|
||||
|
||||
- optimizing the generated code by using -s command line switch of re2c
|
||||
. This tells re2c to generate code that uses if statements rather
|
||||
then endless switch/case expressions where appropriate. Note that the
|
||||
generated code now requires the input to be unsigned char rather than char
|
||||
due to the way comparisons are generated.
|
||||
*/
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
|
||||
#define DEBUG(stmt) stmt
|
||||
|
||||
int stack[4];
|
||||
int depth = 0;
|
||||
|
||||
int push_num(const unsigned char *t, const unsigned char *l, int radix)
|
||||
{
|
||||
int num = 0;
|
||||
|
||||
if (depth >= sizeof(stack))
|
||||
{
|
||||
return 3;
|
||||
}
|
||||
|
||||
--t;
|
||||
while(++t < l)
|
||||
{
|
||||
num = num * radix + (*t - (unsigned char)'0');
|
||||
}
|
||||
DEBUG(printf("Num: %d\n", num));
|
||||
|
||||
stack[depth++] = num;
|
||||
return 0;
|
||||
}
|
||||
|
||||
int stack_add()
|
||||
{
|
||||
if (depth < 2) return 4;
|
||||
|
||||
--depth;
|
||||
stack[depth-1] = stack[depth-1] + stack[depth];
|
||||
DEBUG(printf("+\n"));
|
||||
return 0;
|
||||
}
|
||||
|
||||
int stack_sub()
|
||||
{
|
||||
if (depth < 2) return 4;
|
||||
|
||||
--depth;
|
||||
stack[depth-1] = stack[depth-1] - stack[depth];
|
||||
DEBUG(printf("-\n"));
|
||||
return 0;
|
||||
}
|
||||
|
||||
int scan(char *s)
|
||||
{
|
||||
unsigned char *p = (unsigned char*)s;
|
||||
unsigned char *t;
|
||||
int res = 0;
|
||||
|
||||
#define YYCTYPE unsigned char
|
||||
#define YYCURSOR p
|
||||
|
||||
while(!res)
|
||||
{
|
||||
t = p;
|
||||
/*!re2c
|
||||
re2c:indent:top = 2;
|
||||
re2c:yyfill:enable = 0;
|
||||
|
||||
DIGIT = [0-9] ;
|
||||
OCT = "0" DIGIT+ ;
|
||||
INT = "0" | ( [1-9] DIGIT* ) ;
|
||||
WS = [ \t]+ ;
|
||||
|
||||
WS { continue; }
|
||||
OCT { res = push_num(t, p, 8); continue; }
|
||||
INT { res = push_num(t, p, 10); continue; }
|
||||
"+" { res = stack_add(); continue; }
|
||||
"-" { res = stack_sub(); continue; }
|
||||
"\000" { res = depth == 1 ? 0 : 2; break; }
|
||||
[^] { res = 1; continue; }
|
||||
*/
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
||||
int main(int argc, char **argv)
|
||||
{
|
||||
if (argc > 1)
|
||||
{
|
||||
char *inp;
|
||||
int res = 0, argp = 0, len;
|
||||
|
||||
while(!res && ++argp < argc)
|
||||
{
|
||||
inp = strdup(argv[argp]);
|
||||
len = strlen(inp);
|
||||
if (inp[0] == '\"' && inp[len-1] == '\"')
|
||||
{
|
||||
inp[len - 1] = '\0';
|
||||
++inp;
|
||||
}
|
||||
res = scan(inp);
|
||||
free(inp);
|
||||
}
|
||||
switch(res)
|
||||
{
|
||||
case 0:
|
||||
printf("Result: %d\n", stack[0]);
|
||||
return 0;
|
||||
case 1:
|
||||
fprintf(stderr, "Illegal character in input.\n");
|
||||
return 1;
|
||||
case 2:
|
||||
fprintf(stderr, "Premature end of input.\n");
|
||||
return 2;
|
||||
case 3:
|
||||
fprintf(stderr, "Stack overflow.\n");
|
||||
return 3;
|
||||
case 4:
|
||||
fprintf(stderr, "Stack underflow.\n");
|
||||
return 4;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
fprintf(stderr, "%s <expr>\n", argv[0]);
|
||||
return 0;
|
||||
}
|
||||
}
|
135
tools/re2c/examples/001_upn_calculator/calc_007.b.re
Normal file
135
tools/re2c/examples/001_upn_calculator/calc_007.b.re
Normal file
|
@ -0,0 +1,135 @@
|
|||
/* re2c lesson 001_upn_calculator, calc_007, (c) M. Boerger 2006 - 2007 */
|
||||
/*!ignore:re2c
|
||||
|
||||
- optimizing the generated code by using -b command line switch of re2c
|
||||
. This tells re2c to generate code that uses a decision table. The -b switch
|
||||
also contains the -s behavior. And -b also requires the input to be
|
||||
unsigned chars.
|
||||
*/
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
|
||||
#define DEBUG(stmt) stmt
|
||||
|
||||
int stack[4];
|
||||
int depth = 0;
|
||||
|
||||
int push_num(const unsigned char *t, const unsigned char *l, int radix)
|
||||
{
|
||||
int num = 0;
|
||||
|
||||
if (depth >= sizeof(stack))
|
||||
{
|
||||
return 3;
|
||||
}
|
||||
|
||||
--t;
|
||||
while(++t < l)
|
||||
{
|
||||
num = num * radix + (*t - (unsigned char)'0');
|
||||
}
|
||||
DEBUG(printf("Num: %d\n", num));
|
||||
|
||||
stack[depth++] = num;
|
||||
return 0;
|
||||
}
|
||||
|
||||
int stack_add()
|
||||
{
|
||||
if (depth < 2) return 4;
|
||||
|
||||
--depth;
|
||||
stack[depth-1] = stack[depth-1] + stack[depth];
|
||||
DEBUG(printf("+\n"));
|
||||
return 0;
|
||||
}
|
||||
|
||||
int stack_sub()
|
||||
{
|
||||
if (depth < 2) return 4;
|
||||
|
||||
--depth;
|
||||
stack[depth-1] = stack[depth-1] - stack[depth];
|
||||
DEBUG(printf("+\n"));
|
||||
return 0;
|
||||
}
|
||||
|
||||
int scan(char *s)
|
||||
{
|
||||
unsigned char *p = (unsigned char*)s;
|
||||
unsigned char *t;
|
||||
int res = 0;
|
||||
|
||||
#define YYCTYPE unsigned char
|
||||
#define YYCURSOR p
|
||||
|
||||
while(!res)
|
||||
{
|
||||
t = p;
|
||||
/*!re2c
|
||||
re2c:indent:top = 2;
|
||||
re2c:yyfill:enable = 0;
|
||||
|
||||
DIGIT = [0-9] ;
|
||||
OCT = "0" DIGIT+ ;
|
||||
INT = "0" | ( [1-9] DIGIT* ) ;
|
||||
WS = [ \t]+ ;
|
||||
|
||||
WS { continue; }
|
||||
OCT { res = push_num(t, p, 8); continue; }
|
||||
INT { res = push_num(t, p, 10); continue; }
|
||||
"+" { res = stack_add(); continue; }
|
||||
"-" { res = stack_sub(); continue; }
|
||||
"\000" { res = depth == 1 ? 0 : 2; break; }
|
||||
[^] { res = 1; continue; }
|
||||
*/
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
||||
int main(int argc, char **argv)
|
||||
{
|
||||
if (argc > 1)
|
||||
{
|
||||
char *inp;
|
||||
int res = 0, argp = 0, len;
|
||||
|
||||
while(!res && ++argp < argc)
|
||||
{
|
||||
inp = strdup(argv[argp]);
|
||||
len = strlen(inp);
|
||||
if (inp[0] == '\"' && inp[len-1] == '\"')
|
||||
{
|
||||
inp[len - 1] = '\0';
|
||||
++inp;
|
||||
}
|
||||
res = scan(inp);
|
||||
free(inp);
|
||||
}
|
||||
switch(res)
|
||||
{
|
||||
case 0:
|
||||
printf("Result: %d\n", stack[0]);
|
||||
return 0;
|
||||
case 1:
|
||||
fprintf(stderr, "Illegal character in input.\n");
|
||||
return 1;
|
||||
case 2:
|
||||
fprintf(stderr, "Premature end of input.\n");
|
||||
return 2;
|
||||
case 3:
|
||||
fprintf(stderr, "Stack overflow.\n");
|
||||
return 3;
|
||||
case 4:
|
||||
fprintf(stderr, "Stack underflow.\n");
|
||||
return 4;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
fprintf(stderr, "%s <expr>\n", argv[0]);
|
||||
return 0;
|
||||
}
|
||||
}
|
158
tools/re2c/examples/001_upn_calculator/calc_008.b.re
Normal file
158
tools/re2c/examples/001_upn_calculator/calc_008.b.re
Normal file
|
@ -0,0 +1,158 @@
|
|||
/* re2c lesson 001_upn_calculator, calc_008, (c) M. Boerger 2006 - 2007 */
|
||||
/*!ignore:re2c
|
||||
|
||||
- using -b with signed character input
|
||||
. Since the code is being generated with -b switch re2c requires the internal
|
||||
character variable yych to use an unsigned character type. For that reason
|
||||
the previous lessons had a conversion at the beginning of their scan()
|
||||
function. Other re2c generated code often have the scanners work completely
|
||||
on unsigned input. Thus requesting a conversion.
|
||||
|
||||
To avoid the conversion on input, re2c allows to do the conversion when
|
||||
reading the internal yych variable. To enable that conversion you need to
|
||||
use the implace configuration 're2c:yych:conversion' and set it to 1. This
|
||||
will change the generated code to insert conversions to YYCTYPE whenever
|
||||
yych is being read.
|
||||
|
||||
- More inplace configurations for better/nicer code
|
||||
. re2c allows to overwrite the generation of any define, label or variable
|
||||
used in the generated code. For example we overwrite the 'yych' variable
|
||||
name to 'curr' using inplace configuration 're2c:variable:yych = curr;'.
|
||||
|
||||
. We further more use inplace configurations instead of defines. This allows
|
||||
to use correct conversions to 'unsigned char' instead of having to convert
|
||||
to 'YYCTYPE' when placing 're2c:define:YYCTYPE = "unsigned char";' infront
|
||||
of 're2c:yych:conversion'. Note that we have to use apostrophies for the
|
||||
first setting as it contains a space.
|
||||
|
||||
. Last but not least we use 're2c:labelprefix = scan' to change the prefix
|
||||
of generated labels.
|
||||
*/
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
|
||||
#define DEBUG(stmt) stmt
|
||||
|
||||
int stack[4];
|
||||
int depth = 0;
|
||||
|
||||
int push_num(const char *t, const char *l, int radix)
|
||||
{
|
||||
int num = 0;
|
||||
|
||||
if (depth >= sizeof(stack))
|
||||
{
|
||||
return 3;
|
||||
}
|
||||
|
||||
--t;
|
||||
while(++t < l)
|
||||
{
|
||||
num = num * radix + (*t - '0');
|
||||
}
|
||||
DEBUG(printf("Num: %d\n", num));
|
||||
|
||||
stack[depth++] = num;
|
||||
return 0;
|
||||
}
|
||||
|
||||
int stack_add()
|
||||
{
|
||||
if (depth < 2) return 4;
|
||||
|
||||
--depth;
|
||||
stack[depth-1] = stack[depth-1] + stack[depth];
|
||||
DEBUG(printf("+\n"));
|
||||
return 0;
|
||||
}
|
||||
|
||||
int stack_sub()
|
||||
{
|
||||
if (depth < 2) return 4;
|
||||
|
||||
--depth;
|
||||
stack[depth-1] = stack[depth-1] - stack[depth];
|
||||
DEBUG(printf("+\n"));
|
||||
return 0;
|
||||
}
|
||||
|
||||
int scan(char *p)
|
||||
{
|
||||
char *t;
|
||||
int res = 0;
|
||||
|
||||
while(!res)
|
||||
{
|
||||
t = p;
|
||||
/*!re2c
|
||||
re2c:define:YYCTYPE = "unsigned char";
|
||||
re2c:define:YYCURSOR = p;
|
||||
re2c:variable:yych = curr;
|
||||
re2c:indent:top = 2;
|
||||
re2c:yyfill:enable = 0;
|
||||
re2c:yych:conversion = 1;
|
||||
re2c:labelprefix = scan;
|
||||
|
||||
DIGIT = [0-9] ;
|
||||
OCT = "0" DIGIT+ ;
|
||||
INT = "0" | ( [1-9] DIGIT* ) ;
|
||||
WS = [ \t]+ ;
|
||||
|
||||
WS { continue; }
|
||||
OCT { res = push_num(t, p, 8); continue; }
|
||||
INT { res = push_num(t, p, 10); continue; }
|
||||
"+" { res = stack_add(); continue; }
|
||||
"-" { res = stack_sub(); continue; }
|
||||
"\000" { res = depth == 1 ? 0 : 2; break; }
|
||||
[^] { res = 1; continue; }
|
||||
*/
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
||||
int main(int argc, char **argv)
|
||||
{
|
||||
if (argc > 1)
|
||||
{
|
||||
char *inp;
|
||||
int res = 0, argp = 0, len;
|
||||
|
||||
while(!res && ++argp < argc)
|
||||
{
|
||||
inp = strdup(argv[argp]);
|
||||
len = strlen(inp);
|
||||
if (inp[0] == '\"' && inp[len-1] == '\"')
|
||||
{
|
||||
inp[len - 1] = '\0';
|
||||
++inp;
|
||||
}
|
||||
res = scan(inp);
|
||||
free(inp);
|
||||
}
|
||||
switch(res)
|
||||
{
|
||||
case 0:
|
||||
printf("Result: %d\n", stack[0]);
|
||||
return 0;
|
||||
case 1:
|
||||
fprintf(stderr, "Illegal character in input.\n");
|
||||
return 1;
|
||||
case 2:
|
||||
fprintf(stderr, "Premature end of input.\n");
|
||||
return 2;
|
||||
case 3:
|
||||
fprintf(stderr, "Stack overflow.\n");
|
||||
return 3;
|
||||
case 4:
|
||||
fprintf(stderr, "Stack underflow.\n");
|
||||
return 4;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
fprintf(stderr, "%s <expr>\n", argv[0]);
|
||||
return 0;
|
||||
}
|
||||
}
|
54
tools/re2c/examples/001_upn_calculator/windows/HiResTimer.h
Normal file
54
tools/re2c/examples/001_upn_calculator/windows/HiResTimer.h
Normal file
|
@ -0,0 +1,54 @@
|
|||
/**
|
||||
* @file HiResTimer.h
|
||||
* @brief
|
||||
* @note
|
||||
*/
|
||||
|
||||
#ifndef _HI_RES_TIMER_H_
|
||||
#define _HI_RES_TIMER_H_
|
||||
|
||||
#ifdef WIN32
|
||||
#include <windows.h> // probably already done in stdafx.h
|
||||
static LARGE_INTEGER start;
|
||||
static LARGE_INTEGER stop;
|
||||
static LARGE_INTEGER freq;
|
||||
static _int64 elapsedCounts;
|
||||
static double elapsedMillis;
|
||||
static double elapsedMicros;
|
||||
static HANDLE processHandle;
|
||||
static DWORD prevPriorityClass;
|
||||
|
||||
void HrtInit()
|
||||
{
|
||||
processHandle = GetCurrentProcess();
|
||||
prevPriorityClass = GetPriorityClass(processHandle);
|
||||
QueryPerformanceFrequency(&freq);
|
||||
}
|
||||
|
||||
void HrtStart()
|
||||
{
|
||||
QueryPerformanceCounter(&start);
|
||||
}
|
||||
|
||||
void HrtSetPriority(DWORD priority)
|
||||
{
|
||||
int flag;
|
||||
prevPriorityClass = GetPriorityClass(processHandle);
|
||||
flag = SetPriorityClass(processHandle, priority);
|
||||
}
|
||||
|
||||
void HrtResetPriority(void)
|
||||
{
|
||||
int flag = SetPriorityClass(processHandle, prevPriorityClass);
|
||||
}
|
||||
|
||||
double HrtElapsedMillis()
|
||||
{
|
||||
QueryPerformanceCounter(&stop);
|
||||
elapsedCounts = (stop.QuadPart - start.QuadPart);
|
||||
elapsedMillis = ((elapsedCounts * 1000.0) / freq.QuadPart);
|
||||
return elapsedMillis;
|
||||
}
|
||||
|
||||
#endif
|
||||
#endif
|
291
tools/re2c/examples/001_upn_calculator/windows/main.b.re
Normal file
291
tools/re2c/examples/001_upn_calculator/windows/main.b.re
Normal file
|
@ -0,0 +1,291 @@
|
|||
/* re2c lesson 001_upn_calculator, main.b.re, (c) M. Boerger, L. Allan 2006 */
|
||||
/*!ignore:re2c
|
||||
|
||||
- basic interface for string reading
|
||||
|
||||
. We define the macros YYCTYPE, YYCURSOR, YYLIMIT, YYMARKER, YYFILL
|
||||
. YYCTYPE is the type re2c operates on or in other words the type that
|
||||
it generates code for. While it is not a big difference when we were
|
||||
using 'unsigned char' here we would need to run re2c with option -w
|
||||
to fully support types with sieof() > 1.
|
||||
. YYCURSOR is used internally and holds the current scanner position. In
|
||||
expression handlers, the code blocks after re2c expressions, this can be
|
||||
used to identify the end of the token.
|
||||
. YYMARKER is not always being used so we set an initial value to avoid
|
||||
a compiler warning.
|
||||
. YYLIMIT stores the end of the input. Unfortunatley we have to use strlen()
|
||||
in this lesson. In the next example we see one way to get rid of it.
|
||||
. We use a 'for(;;)'-loop around the scanner block. We could have used a
|
||||
'while(1)'-loop instead but some compilers generate a warning for it.
|
||||
. To make the output more readable we use 're2c:indent:top' scanner
|
||||
configuration that configures re2c to prepend a single tab (the default)
|
||||
to the beginning of each output line.
|
||||
. The following lines are expressions and for each expression we output the
|
||||
token name and continue the scanner loop.
|
||||
. The second last token detects the end of our input, the terminating zero in
|
||||
our input string. In other scanners detecting the end of input may vary.
|
||||
For example binary code may contain \0 as valid input.
|
||||
. The last expression accepts any input character. It tells re2c to accept
|
||||
the opposit of the empty range. This includes numbers and our tokens but
|
||||
as re2c goes from top to botton when evaluating the expressions this is no
|
||||
problem.
|
||||
. The first three rules show that re2c actually prioritizes the expressions
|
||||
from top to bottom. Octal number require a starting "0" and the actual
|
||||
number. Normal numbers start with a digit greater 0. And zero is finally a
|
||||
special case. A single "0" is detected by the last rule of this set. And
|
||||
valid ocal number is already being detected by the first rule. This even
|
||||
includes multi "0" sequences that in octal notation also means zero.
|
||||
Another way would be to only use two rules:
|
||||
"0" [0-9]+
|
||||
"0" | ( [1-9] [0-9]* )
|
||||
A full description of re2c rule syntax can be found in the manual.
|
||||
*/
|
||||
|
||||
#define VC_EXTRALEAN // Exclude rarely-used stuff from Windows headers
|
||||
|
||||
#if _MSC_VER > 1200
|
||||
#define WINVER 0x0400 // Change this to the appropriate value to target Windows 98 and Windows 2000 or later.
|
||||
#endif // Prevents warning from vc7.1 complaining about redefinition
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <assert.h>
|
||||
#include <string.h>
|
||||
#include <windows.h>
|
||||
#include "HiResTimer.h"
|
||||
|
||||
static char gTestBuf[1000] = "";
|
||||
|
||||
/**
|
||||
* @brief Setup HiResolution timer and confirm it is working ok
|
||||
*/
|
||||
void InitHiResTimerAndVerifyWorking(void)
|
||||
{
|
||||
double elapsed;
|
||||
HrtInit();
|
||||
HrtSetPriority(ABOVE_NORMAL_PRIORITY_CLASS);
|
||||
HrtStart();
|
||||
Sleep(100);
|
||||
elapsed = HrtElapsedMillis();
|
||||
if ((elapsed < 90) || (elapsed > 110)) {
|
||||
printf("HiResTimer misbehaving: %f\n", elapsed);
|
||||
exit(2);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Scan for numbers in different formats
|
||||
*/
|
||||
int ScanFullSpeed(char *pzStrToScan, size_t lenStrToScan)
|
||||
{
|
||||
unsigned char *pzCurScanPos = (unsigned char*)pzStrToScan;
|
||||
unsigned char *pzBacktrackInfo = 0;
|
||||
#define YYCTYPE unsigned char
|
||||
#define YYCURSOR pzCurScanPos
|
||||
#define YYLIMIT (pzStrToScan+lenStrToScan)
|
||||
#define YYMARKER pzBacktrackInfo
|
||||
#define YYFILL(n)
|
||||
|
||||
for(;;)
|
||||
{
|
||||
/*!re2c
|
||||
re2c:indent:top = 2;
|
||||
[1-9][0-9]* { continue; }
|
||||
[0][0-9]+ { continue; }
|
||||
"+" { continue; }
|
||||
"-" { continue; }
|
||||
"\000" { return 0; }
|
||||
[^] { return 1; }
|
||||
*/
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Scan for numbers in different formats
|
||||
*/
|
||||
int scan(char *pzStrToScan, size_t lenStrToScan)
|
||||
{
|
||||
unsigned char *pzCurScanPos = (unsigned char*)pzStrToScan;
|
||||
unsigned char *pzBacktrackInfo = 0;
|
||||
#define YYCTYPE unsigned char
|
||||
#define YYCURSOR pzCurScanPos
|
||||
#define YYLIMIT (pzStrToScan+lenStrToScan)
|
||||
#define YYMARKER pzBacktrackInfo
|
||||
#define YYFILL(n)
|
||||
|
||||
for(;;)
|
||||
{
|
||||
/*!re2c
|
||||
re2c:indent:top = 2;
|
||||
[1-9][0-9]* { printf("Num\n"); strcat(gTestBuf, "Num "); continue; }
|
||||
[0][0-9]+ { printf("Oct\n"); strcat(gTestBuf, "Oct "); continue; }
|
||||
"+" { printf("+\n"); strcat(gTestBuf, "+ "); continue; }
|
||||
"-" { printf("-\n"); strcat(gTestBuf, "- "); continue; }
|
||||
"\000" { printf("EOF\n"); return 0; }
|
||||
[^] { printf("ERR\n"); strcat(gTestBuf, "ERR "); return 1; }
|
||||
*/
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Show high resolution elapsed time for 10,000 and 100,000 loops
|
||||
*/
|
||||
void DoTimingsOfStrnCmp(void)
|
||||
{
|
||||
char testStr[] = "Hello, world";
|
||||
int totLoops = 10000;
|
||||
int totFoundCount = 0;
|
||||
int foundCount = 0;
|
||||
int loop;
|
||||
int rc;
|
||||
const int progressAnd = 0xFFFFF000;
|
||||
double elapsed;
|
||||
|
||||
printf("\n\n%d loops with * every %d loops to confirm\n", totLoops, ((~progressAnd) + 1));
|
||||
|
||||
HrtStart();
|
||||
for (loop = 0; loop < totLoops; ++loop) {
|
||||
foundCount = 0;
|
||||
rc = strncmp(testStr, "Hello", 5);
|
||||
if (rc == 0) {
|
||||
foundCount++;
|
||||
totFoundCount++;
|
||||
if ((totFoundCount & progressAnd) == totFoundCount) {
|
||||
printf("*");
|
||||
}
|
||||
}
|
||||
}
|
||||
elapsed = HrtElapsedMillis();
|
||||
printf("\nstrncmp Elapsed for %7d loops milliseconds: %7.3f\n", totLoops, elapsed);
|
||||
printf("FoundCount each loop: %d\n", foundCount);
|
||||
printf("TotalFoundCount for all loops: %d\n", totFoundCount);
|
||||
|
||||
totLoops = 100000;
|
||||
HrtStart();
|
||||
for (loop = 0; loop < totLoops; ++loop) {
|
||||
foundCount = 0;
|
||||
rc = strncmp(testStr, "Hello", 5);
|
||||
if (rc == 0) {
|
||||
foundCount++;
|
||||
totFoundCount++;
|
||||
if ((totFoundCount & progressAnd) == totFoundCount) {
|
||||
printf("*");
|
||||
}
|
||||
}
|
||||
}
|
||||
elapsed = HrtElapsedMillis();
|
||||
printf("\nstrncmp Elapsed for %7d loops milliseconds: %7.3f\n", totLoops, elapsed);
|
||||
printf("FoundCount each loop: %d\n", foundCount);
|
||||
printf("TotalFoundCount for all loops: %d\n", totFoundCount);
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Show high resolution elapsed time for 10,000 and 100,000 loops
|
||||
*/
|
||||
void DoTimingsOfRe2c(void)
|
||||
{
|
||||
char* testStrings[] = { "123", "1234", "+123", "01234", "-04321", "abc", "123abc" };
|
||||
const int testCount = sizeof(testStrings) / sizeof(testStrings[0]);
|
||||
int i;
|
||||
int totLoops = 10000 / testCount; // Doing more than one per loop
|
||||
int totFoundCount = 0;
|
||||
int foundCount = 0;
|
||||
int loop;
|
||||
int rc;
|
||||
const int progressAnd = 0xFFFFF000;
|
||||
double elapsed;
|
||||
|
||||
printf("\n\n%d loops with * every %d loops to confirm\n", totLoops, ((~progressAnd) + 1));
|
||||
|
||||
HrtStart();
|
||||
for (loop = 0; loop < totLoops; ++loop) {
|
||||
foundCount = 0;
|
||||
strcpy(gTestBuf, "");
|
||||
for (i = 0; i < testCount; ++i) {
|
||||
char* pzCurStr = testStrings[i];
|
||||
size_t len = strlen(pzCurStr); // Calc of strlen slows things down ... std::string?
|
||||
rc = ScanFullSpeed(pzCurStr, len);
|
||||
if (rc == 0) {
|
||||
foundCount++;
|
||||
totFoundCount++;
|
||||
if ((totFoundCount & progressAnd) == totFoundCount) {
|
||||
printf("*");
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
elapsed = HrtElapsedMillis();
|
||||
printf("\nRe2c Elapsed for %7d loops milliseconds: %7.3f\n", totLoops, elapsed);
|
||||
printf("FoundCount each loop: %d\n", foundCount);
|
||||
printf("TotalFoundCount for all loops: %d\n", totFoundCount);
|
||||
|
||||
totLoops = 100000 / testCount;
|
||||
printf("\n\n%d loops with * every %d loops to confirm\n", totLoops, ((~progressAnd) + 1));
|
||||
|
||||
HrtStart();
|
||||
for (loop = 0; loop < totLoops; ++loop) {
|
||||
foundCount = 0;
|
||||
strcpy(gTestBuf, "");
|
||||
for (i = 0; i < testCount; ++i) {
|
||||
char* pzCurStr = testStrings[i];
|
||||
size_t len = strlen(pzCurStr); // Calc of strlen slows things down ... std::string?
|
||||
rc = ScanFullSpeed(pzCurStr, len);
|
||||
if (rc == 0) {
|
||||
foundCount++;
|
||||
totFoundCount++;
|
||||
if ((totFoundCount & progressAnd) == totFoundCount) {
|
||||
printf("*");
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
elapsed = HrtElapsedMillis();
|
||||
printf("\nRe2c Elapsed for %7d loops milliseconds: %7.3f\n", totLoops, elapsed);
|
||||
printf("FoundCount each loop: %d\n", foundCount);
|
||||
printf("TotalFoundCount for all loops: %d\n", totFoundCount);
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Entry point for console app
|
||||
*/
|
||||
int main(int argc, char **argv)
|
||||
{
|
||||
char testStr_A[] = "123";
|
||||
char* testStr_B = "456";
|
||||
char* testStrings[] = { "123", "1234", "+123", "01234", "-04321", "abc", "123abc" };
|
||||
const int testCount = sizeof(testStrings) / sizeof(testStrings[0]);
|
||||
int i;
|
||||
|
||||
int rc = scan(testStr_A, 3);
|
||||
printf("rc: %d\n", rc);
|
||||
|
||||
rc = scan(testStr_B, 3);
|
||||
printf("rc: %d\n", rc);
|
||||
|
||||
rc = scan("789", 3);
|
||||
printf("rc: %d\n", rc);
|
||||
|
||||
strcpy(gTestBuf, "");
|
||||
for (i = 0; i < testCount; ++i) {
|
||||
char* pzCurStr = testStrings[i];
|
||||
size_t len = strlen(pzCurStr);
|
||||
scan(pzCurStr, len);
|
||||
}
|
||||
printf("%s\n", gTestBuf);
|
||||
rc = strcmp(gTestBuf, "Num Num + Num Oct - Oct ERR Num ERR ");
|
||||
if (rc == 0) {
|
||||
printf("Success\n");
|
||||
}
|
||||
else {
|
||||
printf("Failure\n");
|
||||
}
|
||||
assert(0 == rc); // Doesn't work with Release build
|
||||
|
||||
InitHiResTimerAndVerifyWorking();
|
||||
|
||||
DoTimingsOfStrnCmp();
|
||||
|
||||
DoTimingsOfRe2c();
|
||||
|
||||
return 0;
|
||||
}
|
21
tools/re2c/examples/002_strip_comments/README
Normal file
21
tools/re2c/examples/002_strip_comments/README
Normal file
|
@ -0,0 +1,21 @@
|
|||
re2c lesson 002_strip_comments, (c) M. Boerger 2006
|
||||
|
||||
In this lesson you will learn how to use multiple scanner blocks and how to
|
||||
read the input from a file instead of a zero terminated string. In the end you
|
||||
will have a scanner that filters comments out of c source files but keeps re2c
|
||||
comments.
|
||||
|
||||
The first scanner can be generated with:
|
||||
|
||||
re2c -s -o t.c strip_001.s.re
|
||||
|
||||
In the second step we will learn about YYMARKER that stores backtracking
|
||||
information.
|
||||
|
||||
re2c -s -0 t.c strip_002.b.re
|
||||
|
||||
The third step brings trailing contexts that are stored in YYCTXMARKER. We also
|
||||
change to use -b instead of -s option since the scanner gets more and more
|
||||
complex.
|
||||
|
||||
re2c -b -0 t.c strip_002.b.re
|
147
tools/re2c/examples/002_strip_comments/strip_001.s.re
Normal file
147
tools/re2c/examples/002_strip_comments/strip_001.s.re
Normal file
|
@ -0,0 +1,147 @@
|
|||
/* re2c lesson 002_strip_comments, strip_001.s, (c) M. Boerger 2006 - 2007 */
|
||||
/*!ignore:re2c
|
||||
|
||||
- basic interface for file reading
|
||||
. This scanner will read chunks of input from a file. The easiest way would
|
||||
be to read the whole file into a memory buffer and use that a zero
|
||||
terminated string.
|
||||
. Instead we want to read input chunks of a reasonable size as they are neede
|
||||
by the scanner. Thus we basically need YYFILL(n) to call fread(n).
|
||||
. Before we provide a buffer that we constantly reallocate we instead use
|
||||
one buffer that we get from the stack or global memory just once. When we
|
||||
reach the end of the buffer we simply move the beginning of our input
|
||||
that is somewhere in our buffer to the beginning of our buffer and then
|
||||
append the next chunk of input to the correct end inside our buffer.
|
||||
. As re2c scanners might read more than one character we need to ensure our
|
||||
buffer is long enough. We can use re2c to inform about the maximum size
|
||||
by placing a "!max:re2c" comment somewhere. This gets translated to a
|
||||
"#define YYMAXFILL <n>" line where <n> is the maximum length value. This
|
||||
define can be used as precompiler condition.
|
||||
|
||||
- multiple scanner blocks
|
||||
. We use a main scanner block that outputs every input character unless the
|
||||
input is two /s or a / followed by a *. In the latter two cases we switch
|
||||
to a special c++ comment and a comment block respectively.
|
||||
. Both special blocks simply detect their end ignore any other character.
|
||||
. The c++ block is a bit special. Since the terminating new line needs to
|
||||
be output and that can either be a new line or a carridge return followed
|
||||
by a new line.
|
||||
. In order to ensure that we do not read behind our buffer we reset the token
|
||||
pointer to the cursor on every scanner run.
|
||||
*/
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
|
||||
/*!max:re2c */
|
||||
#define BSIZE 128
|
||||
|
||||
#if BSIZE < YYMAXFILL
|
||||
# error BSIZE must be greater YYMAXFILL
|
||||
#endif
|
||||
|
||||
#define YYCTYPE unsigned char
|
||||
#define YYCURSOR s.cur
|
||||
#define YYLIMIT s.lim
|
||||
#define YYFILL(n) { if ((res = fill(&s, n)) >= 0) break; }
|
||||
|
||||
typedef struct Scanner
|
||||
{
|
||||
FILE *fp;
|
||||
unsigned char *cur, *tok, *lim, *eof;
|
||||
unsigned char buffer[BSIZE];
|
||||
} Scanner;
|
||||
|
||||
int fill(Scanner *s, int len)
|
||||
{
|
||||
if (!len)
|
||||
{
|
||||
s->cur = s->tok = s->lim = s->buffer;
|
||||
s->eof = 0;
|
||||
}
|
||||
if (!s->eof)
|
||||
{
|
||||
int got, cnt = s->tok - s->buffer;
|
||||
|
||||
if (cnt > 0)
|
||||
{
|
||||
memcpy(s->buffer, s->tok, s->lim - s->tok);
|
||||
s->tok -= cnt;
|
||||
s->cur -= cnt;
|
||||
s->lim -= cnt;
|
||||
}
|
||||
cnt = BSIZE - cnt;
|
||||
if ((got = fread(s->lim, 1, cnt, s->fp)) != cnt)
|
||||
{
|
||||
s->eof = &s->lim[got];
|
||||
}
|
||||
s->lim += got;
|
||||
}
|
||||
else if (s->cur + len > s->eof)
|
||||
{
|
||||
return 0; /* not enough input data */
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
||||
int scan(FILE *fp)
|
||||
{
|
||||
int res = 0;
|
||||
Scanner s;
|
||||
|
||||
if (!fp)
|
||||
{
|
||||
return 1; /* no file was opened */
|
||||
}
|
||||
|
||||
s.fp = fp;
|
||||
|
||||
fill(&s, 0);
|
||||
|
||||
for(;;)
|
||||
{
|
||||
s.tok = s.cur;
|
||||
/*!re2c
|
||||
re2c:indent:top = 2;
|
||||
|
||||
NL = "\r"? "\n" ;
|
||||
ANY = [^] ;
|
||||
|
||||
"/" "/" { goto cppcomment; }
|
||||
"/" "*" { goto comment; }
|
||||
ANY { fputc(*s.tok, stdout); continue; }
|
||||
*/
|
||||
comment:
|
||||
s.tok = s.cur;
|
||||
/*!re2c
|
||||
"*" "/" { continue; }
|
||||
ANY { goto comment; }
|
||||
*/
|
||||
cppcomment:
|
||||
s.tok = s.cur;
|
||||
/*!re2c
|
||||
NL { fwrite(s.tok, 1, s.cur - s.tok, stdout); continue; }
|
||||
ANY { goto cppcomment; }
|
||||
*/
|
||||
}
|
||||
|
||||
if (fp != stdin)
|
||||
{
|
||||
fclose(fp); /* close only if not stdin */
|
||||
}
|
||||
return res; /* return result */
|
||||
}
|
||||
|
||||
int main(int argc, char **argv)
|
||||
{
|
||||
if (argc > 1)
|
||||
{
|
||||
return scan(!strcmp(argv[1], "-") ? stdin : fopen(argv[1], "r"));
|
||||
}
|
||||
else
|
||||
{
|
||||
fprintf(stderr, "%s <expr>\n", argv[0]);
|
||||
return 1;
|
||||
}
|
||||
}
|
162
tools/re2c/examples/002_strip_comments/strip_002.s.re
Normal file
162
tools/re2c/examples/002_strip_comments/strip_002.s.re
Normal file
|
@ -0,0 +1,162 @@
|
|||
/* re2c lesson 002_strip_comments, strip_002.s, (c) M. Boerger 2006 - 2007 */
|
||||
/*!ignore:re2c
|
||||
|
||||
- complexity
|
||||
. When a comment is preceeded by a new line and followed by whitespace and a
|
||||
new line then we can drop the trailing whitespace and new line.
|
||||
. Additional to what we strip out already what about two consequtive comment
|
||||
blocks? When two comments are only separated by whitespace we want to drop
|
||||
both. In other words when detecting the end of a comment block we need to
|
||||
check whether it is followed by only whitespace and the a new comment in
|
||||
which case we continure ignoring the input. If it is followed only by white
|
||||
space and a new line we strip out the new white space and new line. In any
|
||||
other case we start outputting all that follows.
|
||||
But we cannot simply use the following two rules:
|
||||
"*" "/" WS* "/" "*" { continue; }
|
||||
"*" "/" WS* NL { continue; }
|
||||
The main problem is that WS* can get bigger then our buffer, so we need a
|
||||
new scanner.
|
||||
. Meanwhile our scanner gets a bit more complex and we have to add two more
|
||||
things. First the scanner code now uses a YYMARKER to store backtracking
|
||||
information.
|
||||
|
||||
- backtracking information
|
||||
. When the scanner has two rules that can have the same beginning but a
|
||||
different ending then it needs to store the position that identifies the
|
||||
common part. This is called backtracking. As mentioned above re2c expects
|
||||
you to provide compiler define YYMARKER and a pointer variable.
|
||||
. When shifting buffer contents as done in our fill function the marker needs
|
||||
to be corrected, too.
|
||||
|
||||
*/
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
|
||||
/*!max:re2c */
|
||||
#define BSIZE 128
|
||||
|
||||
#if BSIZE < YYMAXFILL
|
||||
# error BSIZE must be greater YYMAXFILL
|
||||
#endif
|
||||
|
||||
#define YYCTYPE unsigned char
|
||||
#define YYCURSOR s.cur
|
||||
#define YYLIMIT s.lim
|
||||
#define YYMARKER s.mrk
|
||||
#define YYFILL(n) { if ((res = fill(&s, n)) >= 0) break; }
|
||||
|
||||
typedef struct Scanner
|
||||
{
|
||||
FILE *fp;
|
||||
unsigned char *cur, *tok, *lim, *eof, *mrk;
|
||||
unsigned char buffer[BSIZE];
|
||||
} Scanner;
|
||||
|
||||
int fill(Scanner *s, int len)
|
||||
{
|
||||
if (!len)
|
||||
{
|
||||
s->cur = s->tok = s->lim = s->mrk = s->buffer;
|
||||
s->eof = 0;
|
||||
}
|
||||
if (!s->eof)
|
||||
{
|
||||
int got, cnt = s->tok - s->buffer;
|
||||
|
||||
if (cnt > 0)
|
||||
{
|
||||
memcpy(s->buffer, s->tok, s->lim - s->tok);
|
||||
s->tok -= cnt;
|
||||
s->cur -= cnt;
|
||||
s->lim -= cnt;
|
||||
s->mrk -= cnt;
|
||||
}
|
||||
cnt = BSIZE - cnt;
|
||||
if ((got = fread(s->lim, 1, cnt, s->fp)) != cnt)
|
||||
{
|
||||
s->eof = &s->lim[got];
|
||||
}
|
||||
s->lim += got;
|
||||
}
|
||||
else if (s->cur + len > s->eof)
|
||||
{
|
||||
return 0; /* not enough input data */
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
||||
void echo(Scanner *s)
|
||||
{
|
||||
fwrite(s->tok, 1, s->cur - s->tok, stdout);
|
||||
}
|
||||
|
||||
int scan(FILE *fp)
|
||||
{
|
||||
int res = 0;
|
||||
Scanner s;
|
||||
|
||||
if (!fp)
|
||||
{
|
||||
return 1; /* no file was opened */
|
||||
}
|
||||
|
||||
s.fp = fp;
|
||||
|
||||
fill(&s, 0);
|
||||
|
||||
for(;;)
|
||||
{
|
||||
s.tok = s.cur;
|
||||
/*!re2c
|
||||
re2c:indent:top = 2;
|
||||
|
||||
NL = "\r"? "\n" ;
|
||||
WS = [\r\n\t ] ;
|
||||
ANY = [^] ;
|
||||
|
||||
"/" "/" { goto cppcomment; }
|
||||
"/" "*" { goto comment; }
|
||||
ANY { fputc(*s.tok, stdout); continue; }
|
||||
*/
|
||||
comment:
|
||||
s.tok = s.cur;
|
||||
/*!re2c
|
||||
"*" "/" { goto commentws; }
|
||||
ANY { goto comment; }
|
||||
*/
|
||||
commentws:
|
||||
s.tok = s.cur;
|
||||
/*!re2c
|
||||
NL { echo(&s); continue; }
|
||||
WS { goto commentws; }
|
||||
ANY { echo(&s); continue; }
|
||||
*/
|
||||
cppcomment:
|
||||
s.tok = s.cur;
|
||||
/*!re2c
|
||||
NL { echo(&s); continue; }
|
||||
ANY { goto cppcomment; }
|
||||
*/
|
||||
}
|
||||
|
||||
if (fp != stdin)
|
||||
{
|
||||
fclose(fp); /* close only if not stdin */
|
||||
}
|
||||
return res; /* return result */
|
||||
}
|
||||
|
||||
int main(int argc, char **argv)
|
||||
{
|
||||
if (argc > 1)
|
||||
{
|
||||
return scan(!strcmp(argv[1], "-") ? stdin : fopen(argv[1], "r"));
|
||||
}
|
||||
else
|
||||
{
|
||||
fprintf(stderr, "%s <expr>\n", argv[0]);
|
||||
return 1;
|
||||
}
|
||||
}
|
179
tools/re2c/examples/002_strip_comments/strip_003.b.re
Normal file
179
tools/re2c/examples/002_strip_comments/strip_003.b.re
Normal file
|
@ -0,0 +1,179 @@
|
|||
/* re2c lesson 002_strip_comments, strip_003.b, (c) M. Boerger 2006 - 2007 */
|
||||
/*!ignore:re2c
|
||||
|
||||
- more complexity
|
||||
. Right now we strip out trailing white space and new lines after a comment
|
||||
block. This can be a problem when the comment block was not preceeded by
|
||||
a new line.
|
||||
. The solution is to use trailing contexts.
|
||||
|
||||
- trailing contexts
|
||||
. Re2c allows to check for a portion of input and only recognize it when it
|
||||
is followed by another portion. This is called a trailing context.
|
||||
. The trailing context is not part of the identified input. That means that
|
||||
it follows exactly at the cursor. A consequence is that the scanner has
|
||||
already read more input and on the next run you need to restore begining
|
||||
of input, in our case s.tok, from the cursor, here s.cur, rather then
|
||||
restoring to the beginning of the buffer. This way the scanner can reuse
|
||||
the portion it has already read.
|
||||
. The position of the trailing context is stored in YYCTXMARKER for which
|
||||
a pointer variable needs to be provided.
|
||||
. As with YYMARKER the corrsponding variable needs to be corrected if we
|
||||
shift in some buffer.
|
||||
. Still this is not all we need to solve the problem. What is left is that
|
||||
the information whether we detected a trailing context was detected has to
|
||||
be stored somewhere. This is done by the new variable nlcomment.
|
||||
|
||||
- formatting
|
||||
. Until now we only used single line expression code and we always had the
|
||||
opening { on the same line as the rule itself. If we have multiline rule
|
||||
code and care for formatting we can no longer rely on re2c. Now we have
|
||||
to indent the rule code ourself. Also we need to take care of the opening
|
||||
{. If we keep it on the same line as the rule then re2c will indent it
|
||||
correctly and the emitted #line informations will be correct. If we place
|
||||
it on the next line then the #line directive will also point to that line
|
||||
and not to the rule.
|
||||
*/
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
|
||||
/*!max:re2c */
|
||||
#define BSIZE 128
|
||||
|
||||
#if BSIZE < YYMAXFILL
|
||||
# error BSIZE must be greater YYMAXFILL
|
||||
#endif
|
||||
|
||||
#define YYCTYPE unsigned char
|
||||
#define YYCURSOR s.cur
|
||||
#define YYLIMIT s.lim
|
||||
#define YYMARKER s.mrk
|
||||
#define YYCTXMARKER s.ctx
|
||||
#define YYFILL(n) { if ((res = fill(&s, n)) >= 0) break; }
|
||||
|
||||
typedef struct Scanner
|
||||
{
|
||||
FILE *fp;
|
||||
unsigned char *cur, *tok, *lim, *eof, *ctx, *mrk;
|
||||
unsigned char buffer[BSIZE];
|
||||
} Scanner;
|
||||
|
||||
int fill(Scanner *s, int len)
|
||||
{
|
||||
if (!len)
|
||||
{
|
||||
s->cur = s->tok = s->lim = s->mrk = s->buffer;
|
||||
s->eof = 0;
|
||||
}
|
||||
if (!s->eof)
|
||||
{
|
||||
int got, cnt = s->tok - s->buffer;
|
||||
|
||||
if (cnt > 0)
|
||||
{
|
||||
memcpy(s->buffer, s->tok, s->lim - s->tok);
|
||||
s->tok -= cnt;
|
||||
s->cur -= cnt;
|
||||
s->lim -= cnt;
|
||||
s->mrk -= cnt;
|
||||
s->ctx -= cnt;
|
||||
}
|
||||
cnt = BSIZE - cnt;
|
||||
if ((got = fread(s->lim, 1, cnt, s->fp)) != cnt)
|
||||
{
|
||||
s->eof = &s->lim[got];
|
||||
}
|
||||
s->lim += got;
|
||||
}
|
||||
else if (s->cur + len > s->eof)
|
||||
{
|
||||
return 0; /* not enough input data */
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
||||
void echo(Scanner *s)
|
||||
{
|
||||
fwrite(s->tok, 1, s->cur - s->tok, stdout);
|
||||
}
|
||||
|
||||
int scan(FILE *fp)
|
||||
{
|
||||
int res = 0;
|
||||
int nlcomment = 0;
|
||||
Scanner s;
|
||||
|
||||
if (!fp)
|
||||
{
|
||||
return 1; /* no file was opened */
|
||||
}
|
||||
|
||||
s.fp = fp;
|
||||
|
||||
fill(&s, 0);
|
||||
|
||||
for(;;)
|
||||
{
|
||||
s.tok = s.cur;
|
||||
/*!re2c
|
||||
re2c:indent:top = 2;
|
||||
|
||||
NL = "\r"? "\n" ;
|
||||
WS = [\r\n\t ] ;
|
||||
ANY = [^] ;
|
||||
|
||||
"/" "/" { goto cppcomment; }
|
||||
NL / "/""*" { echo(&s); nlcomment = 1; continue; }
|
||||
"/" "*" { goto comment; }
|
||||
ANY { fputc(*s.tok, stdout); continue; }
|
||||
*/
|
||||
comment:
|
||||
s.tok = s.cur;
|
||||
/*!re2c
|
||||
"*" "/" { goto commentws; }
|
||||
ANY { goto comment; }
|
||||
*/
|
||||
commentws:
|
||||
s.tok = s.cur;
|
||||
/*!re2c
|
||||
NL? "/" "*" { goto comment; }
|
||||
NL {
|
||||
if (!nlcomment)
|
||||
{
|
||||
echo(&s);
|
||||
}
|
||||
nlcomment = 0;
|
||||
continue;
|
||||
}
|
||||
WS { goto commentws; }
|
||||
ANY { echo(&s); nlcomment = 0; continue; }
|
||||
*/
|
||||
cppcomment:
|
||||
s.tok = s.cur;
|
||||
/*!re2c
|
||||
NL { echo(&s); continue; }
|
||||
ANY { goto cppcomment; }
|
||||
*/
|
||||
}
|
||||
|
||||
if (fp != stdin)
|
||||
{
|
||||
fclose(fp); /* close only if not stdin */
|
||||
}
|
||||
return res; /* return result */
|
||||
}
|
||||
|
||||
int main(int argc, char **argv)
|
||||
{
|
||||
if (argc > 1)
|
||||
{
|
||||
return scan(!strcmp(argv[1], "-") ? stdin : fopen(argv[1], "r"));
|
||||
}
|
||||
else
|
||||
{
|
||||
fprintf(stderr, "%s <expr>\n", argv[0]);
|
||||
return 1;
|
||||
}
|
||||
}
|
|
@ -1,26 +0,0 @@
|
|||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
#include <sys/stat.h>
|
||||
#include <sys/mman.h>
|
||||
#include <string.h>
|
||||
|
||||
#ifndef MAP_NORESERVE
|
||||
#define MAP_NORESERVE 0
|
||||
#endif
|
||||
|
||||
volatile char ch;
|
||||
|
||||
main(){
|
||||
struct stat statbuf;
|
||||
uchar *buf;
|
||||
fstat(0, &statbuf);
|
||||
buf = mmap(NULL, statbuf.st_size, PROT_READ, MAP_SHARED|MAP_NORESERVE,
|
||||
0, 0);
|
||||
if(buf != (uchar*)(-1)){
|
||||
uchar *cur, *lim = &buf[statbuf.st_size];
|
||||
for(cur = buf; buf != lim; ++cur){
|
||||
ch = *cur;
|
||||
}
|
||||
munmap(buf, statbuf.st_size);
|
||||
}
|
||||
}
|
|
@ -1,267 +0,0 @@
|
|||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
#include <sys/stat.h>
|
||||
#include <sys/mman.h>
|
||||
#include <string.h>
|
||||
|
||||
#define ADDEQ 257
|
||||
#define ANDAND 258
|
||||
#define ANDEQ 259
|
||||
#define ARRAY 260
|
||||
#define ASM 261
|
||||
#define AUTO 262
|
||||
#define BREAK 263
|
||||
#define CASE 264
|
||||
#define CHAR 265
|
||||
#define CONST 266
|
||||
#define CONTINUE 267
|
||||
#define DECR 268
|
||||
#define DEFAULT 269
|
||||
#define DEREF 270
|
||||
#define DIVEQ 271
|
||||
#define DO 272
|
||||
#define DOUBLE 273
|
||||
#define ELLIPSIS 274
|
||||
#define ELSE 275
|
||||
#define ENUM 276
|
||||
#define EQL 277
|
||||
#define EXTERN 278
|
||||
#define FCON 279
|
||||
#define FLOAT 280
|
||||
#define FOR 281
|
||||
#define FUNCTION 282
|
||||
#define GEQ 283
|
||||
#define GOTO 284
|
||||
#define ICON 285
|
||||
#define ID 286
|
||||
#define IF 287
|
||||
#define INCR 288
|
||||
#define INT 289
|
||||
#define LEQ 290
|
||||
#define LONG 291
|
||||
#define LSHIFT 292
|
||||
#define LSHIFTEQ 293
|
||||
#define MODEQ 294
|
||||
#define MULEQ 295
|
||||
#define NEQ 296
|
||||
#define OREQ 297
|
||||
#define OROR 298
|
||||
#define POINTER 299
|
||||
#define REGISTER 300
|
||||
#define RETURN 301
|
||||
#define RSHIFT 302
|
||||
#define RSHIFTEQ 303
|
||||
#define SCON 304
|
||||
#define SHORT 305
|
||||
#define SIGNED 306
|
||||
#define SIZEOF 307
|
||||
#define STATIC 308
|
||||
#define STRUCT 309
|
||||
#define SUBEQ 310
|
||||
#define SWITCH 311
|
||||
#define TYPEDEF 312
|
||||
#define UNION 313
|
||||
#define UNSIGNED 314
|
||||
#define VOID 315
|
||||
#define VOLATILE 316
|
||||
#define WHILE 317
|
||||
#define XOREQ 318
|
||||
#define EOI 319
|
||||
|
||||
typedef unsigned int unint;
|
||||
typedef unsigned char uchar;
|
||||
|
||||
#define YYCTYPE uchar
|
||||
#define YYCURSOR cursor
|
||||
#define YYLIMIT s->lim
|
||||
#define YYMARKER s->ptr
|
||||
#define YYFILL(n) {cursor = fill(s, cursor);}
|
||||
|
||||
#define RET(i) {s->cur = cursor; return i;}
|
||||
|
||||
typedef struct Scanner {
|
||||
uchar *tok, *ptr, *cur, *pos, *lim, *eof;
|
||||
unint line;
|
||||
} Scanner;
|
||||
|
||||
uchar *fill(Scanner *s, uchar *cursor){
|
||||
if(!s->eof){
|
||||
unint cnt = s->lim - s->tok;
|
||||
uchar *buf = malloc((cnt + 1)*sizeof(uchar));
|
||||
memcpy(buf, s->tok, cnt);
|
||||
cursor = &buf[cursor - s->tok];
|
||||
s->pos = &buf[s->pos - s->tok];
|
||||
s->ptr = &buf[s->ptr - s->tok];
|
||||
s->lim = &buf[cnt];
|
||||
s->eof = s->lim; *(s->eof)++ = '\n';
|
||||
s->tok = buf;
|
||||
}
|
||||
return cursor;
|
||||
}
|
||||
|
||||
int scan(Scanner *s){
|
||||
uchar *cursor = s->cur;
|
||||
std:
|
||||
s->tok = cursor;
|
||||
/*!re2c
|
||||
any = [\000-\377];
|
||||
O = [0-7];
|
||||
D = [0-9];
|
||||
L = [a-zA-Z_];
|
||||
H = [a-fA-F0-9];
|
||||
E = [Ee] [+-]? D+;
|
||||
FS = [fFlL];
|
||||
IS = [uUlL]*;
|
||||
ESC = [\\] ([abfnrtv?'"\\] | "x" H+ | O+);
|
||||
*/
|
||||
|
||||
/*!re2c
|
||||
"/*" { goto comment; }
|
||||
|
||||
"auto" { RET(AUTO); }
|
||||
"break" { RET(BREAK); }
|
||||
"case" { RET(CASE); }
|
||||
"char" { RET(CHAR); }
|
||||
"const" { RET(CONST); }
|
||||
"continue" { RET(CONTINUE); }
|
||||
"default" { RET(DEFAULT); }
|
||||
"do" { RET(DO); }
|
||||
"double" { RET(DOUBLE); }
|
||||
"else" { RET(ELSE); }
|
||||
"enum" { RET(ENUM); }
|
||||
"extern" { RET(EXTERN); }
|
||||
"float" { RET(FLOAT); }
|
||||
"for" { RET(FOR); }
|
||||
"goto" { RET(GOTO); }
|
||||
"if" { RET(IF); }
|
||||
"int" { RET(INT); }
|
||||
"long" { RET(LONG); }
|
||||
"register" { RET(REGISTER); }
|
||||
"return" { RET(RETURN); }
|
||||
"short" { RET(SHORT); }
|
||||
"signed" { RET(SIGNED); }
|
||||
"sizeof" { RET(SIZEOF); }
|
||||
"static" { RET(STATIC); }
|
||||
"struct" { RET(STRUCT); }
|
||||
"switch" { RET(SWITCH); }
|
||||
"typedef" { RET(TYPEDEF); }
|
||||
"union" { RET(UNION); }
|
||||
"unsigned" { RET(UNSIGNED); }
|
||||
"void" { RET(VOID); }
|
||||
"volatile" { RET(VOLATILE); }
|
||||
"while" { RET(WHILE); }
|
||||
|
||||
L (L|D)* { RET(ID); }
|
||||
|
||||
("0" [xX] H+ IS?) | ("0" D+ IS?) | (D+ IS?) |
|
||||
(['] (ESC|any\[\n\\'])* ['])
|
||||
{ RET(ICON); }
|
||||
|
||||
(D+ E FS?) | (D* "." D+ E? FS?) | (D+ "." D* E? FS?)
|
||||
{ RET(FCON); }
|
||||
|
||||
(["] (ESC|any\[\n\\"])* ["])
|
||||
{ RET(SCON); }
|
||||
|
||||
"..." { RET(ELLIPSIS); }
|
||||
">>=" { RET(RSHIFTEQ); }
|
||||
"<<=" { RET(LSHIFTEQ); }
|
||||
"+=" { RET(ADDEQ); }
|
||||
"-=" { RET(SUBEQ); }
|
||||
"*=" { RET(MULEQ); }
|
||||
"/=" { RET(DIVEQ); }
|
||||
"%=" { RET(MODEQ); }
|
||||
"&=" { RET(ANDEQ); }
|
||||
"^=" { RET(XOREQ); }
|
||||
"|=" { RET(OREQ); }
|
||||
">>" { RET(RSHIFT); }
|
||||
"<<" { RET(LSHIFT); }
|
||||
"++" { RET(INCR); }
|
||||
"--" { RET(DECR); }
|
||||
"->" { RET(DEREF); }
|
||||
"&&" { RET(ANDAND); }
|
||||
"||" { RET(OROR); }
|
||||
"<=" { RET(LEQ); }
|
||||
">=" { RET(GEQ); }
|
||||
"==" { RET(EQL); }
|
||||
"!=" { RET(NEQ); }
|
||||
";" { RET(';'); }
|
||||
"{" { RET('{'); }
|
||||
"}" { RET('}'); }
|
||||
"," { RET(','); }
|
||||
":" { RET(':'); }
|
||||
"=" { RET('='); }
|
||||
"(" { RET('('); }
|
||||
")" { RET(')'); }
|
||||
"[" { RET('['); }
|
||||
"]" { RET(']'); }
|
||||
"." { RET('.'); }
|
||||
"&" { RET('&'); }
|
||||
"!" { RET('!'); }
|
||||
"~" { RET('~'); }
|
||||
"-" { RET('-'); }
|
||||
"+" { RET('+'); }
|
||||
"*" { RET('*'); }
|
||||
"/" { RET('/'); }
|
||||
"%" { RET('%'); }
|
||||
"<" { RET('<'); }
|
||||
">" { RET('>'); }
|
||||
"^" { RET('^'); }
|
||||
"|" { RET('|'); }
|
||||
"?" { RET('?'); }
|
||||
|
||||
|
||||
[ \t\v\f]+ { goto std; }
|
||||
|
||||
"\n"
|
||||
{
|
||||
if(cursor == s->eof) RET(EOI);
|
||||
s->pos = cursor; s->line++;
|
||||
goto std;
|
||||
}
|
||||
|
||||
any
|
||||
{
|
||||
printf("unexpected character: %c\n", *s->tok);
|
||||
goto std;
|
||||
}
|
||||
*/
|
||||
|
||||
comment:
|
||||
/*!re2c
|
||||
"*/" { goto std; }
|
||||
"\n"
|
||||
{
|
||||
if(cursor == s->eof) RET(EOI);
|
||||
s->tok = s->pos = cursor; s->line++;
|
||||
goto comment;
|
||||
}
|
||||
any { goto comment; }
|
||||
*/
|
||||
}
|
||||
|
||||
#ifndef MAP_NORESERVE
|
||||
#define MAP_NORESERVE 0
|
||||
#endif
|
||||
|
||||
main(){
|
||||
Scanner in;
|
||||
struct stat statbuf;
|
||||
uchar *buf;
|
||||
fstat(0, &statbuf);
|
||||
buf = mmap(NULL, statbuf.st_size, PROT_READ, MAP_SHARED|MAP_NORESERVE,
|
||||
0, 0);
|
||||
if(buf != (uchar*)(-1)){
|
||||
int t;
|
||||
in.lim = &(in.cur = buf)[statbuf.st_size];
|
||||
in.pos = NULL;
|
||||
in.eof = NULL;
|
||||
while((t = scan(&in)) != EOI){
|
||||
/*
|
||||
printf("%d\t%.*s\n", t, in.cur - in.tok, in.tok);
|
||||
printf("%d\n", t);
|
||||
*/
|
||||
}
|
||||
munmap(buf, statbuf.st_size);
|
||||
}
|
||||
}
|
|
@ -1,239 +0,0 @@
|
|||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
|
||||
#define ADDEQ 257
|
||||
#define ANDAND 258
|
||||
#define ANDEQ 259
|
||||
#define ARRAY 260
|
||||
#define ASM 261
|
||||
#define AUTO 262
|
||||
#define BREAK 263
|
||||
#define CASE 264
|
||||
#define CHAR 265
|
||||
#define CONST 266
|
||||
#define CONTINUE 267
|
||||
#define DECR 268
|
||||
#define DEFAULT 269
|
||||
#define DEREF 270
|
||||
#define DIVEQ 271
|
||||
#define DO 272
|
||||
#define DOUBLE 273
|
||||
#define ELLIPSIS 274
|
||||
#define ELSE 275
|
||||
#define ENUM 276
|
||||
#define EQL 277
|
||||
#define EXTERN 278
|
||||
#define FCON 279
|
||||
#define FLOAT 280
|
||||
#define FOR 281
|
||||
#define FUNCTION 282
|
||||
#define GEQ 283
|
||||
#define GOTO 284
|
||||
#define ICON 285
|
||||
#define ID 286
|
||||
#define IF 287
|
||||
#define INCR 288
|
||||
#define INT 289
|
||||
#define LEQ 290
|
||||
#define LONG 291
|
||||
#define LSHIFT 292
|
||||
#define LSHIFTEQ 293
|
||||
#define MODEQ 294
|
||||
#define MULEQ 295
|
||||
#define NEQ 296
|
||||
#define OREQ 297
|
||||
#define OROR 298
|
||||
#define POINTER 299
|
||||
#define REGISTER 300
|
||||
#define RETURN 301
|
||||
#define RSHIFT 302
|
||||
#define RSHIFTEQ 303
|
||||
#define SCON 304
|
||||
#define SHORT 305
|
||||
#define SIGNED 306
|
||||
#define SIZEOF 307
|
||||
#define STATIC 308
|
||||
#define STRUCT 309
|
||||
#define SUBEQ 310
|
||||
#define SWITCH 311
|
||||
#define TYPEDEF 312
|
||||
#define UNION 313
|
||||
#define UNSIGNED 314
|
||||
#define VOID 315
|
||||
#define VOLATILE 316
|
||||
#define WHILE 317
|
||||
#define XOREQ 318
|
||||
#define EOI 319
|
||||
|
||||
typedef unsigned int uint;
|
||||
typedef unsigned char uchar;
|
||||
|
||||
#define BSIZE 8192
|
||||
|
||||
#define YYCTYPE uchar
|
||||
#define YYCURSOR cursor
|
||||
#define YYLIMIT s->lim
|
||||
#define YYMARKER s->ptr
|
||||
#define YYFILL(n) {cursor = fill(s, cursor);}
|
||||
|
||||
#define RET(i) {s->cur = cursor; return i;}
|
||||
|
||||
typedef struct Scanner {
|
||||
int fd;
|
||||
uchar *bot, *tok, *ptr, *cur, *pos, *lim, *top, *eof;
|
||||
uint line;
|
||||
} Scanner;
|
||||
|
||||
uchar *fill(Scanner *s, uchar *cursor){
|
||||
if(!s->eof){
|
||||
uint cnt = s->tok - s->bot;
|
||||
if(cnt){
|
||||
memcpy(s->bot, s->tok, s->lim - s->tok);
|
||||
s->tok = s->bot;
|
||||
s->ptr -= cnt;
|
||||
cursor -= cnt;
|
||||
s->pos -= cnt;
|
||||
s->lim -= cnt;
|
||||
}
|
||||
if((s->top - s->lim) < BSIZE){
|
||||
uchar *buf = (uchar*) malloc(((s->lim - s->bot) + BSIZE)*sizeof(uchar));
|
||||
memcpy(buf, s->tok, s->lim - s->tok);
|
||||
s->tok = buf;
|
||||
s->ptr = &buf[s->ptr - s->bot];
|
||||
cursor = &buf[cursor - s->bot];
|
||||
s->pos = &buf[s->pos - s->bot];
|
||||
s->lim = &buf[s->lim - s->bot];
|
||||
s->top = &s->lim[BSIZE];
|
||||
free(s->bot);
|
||||
s->bot = buf;
|
||||
}
|
||||
if((cnt = read(s->fd, (char*) s->lim, BSIZE)) != BSIZE){
|
||||
s->eof = &s->lim[cnt]; *(s->eof)++ = '\n';
|
||||
}
|
||||
s->lim += cnt;
|
||||
}
|
||||
return cursor;
|
||||
}
|
||||
|
||||
int scan(Scanner *s){
|
||||
uchar *cursor = s->cur;
|
||||
std:
|
||||
s->tok = cursor;
|
||||
/*!re2c
|
||||
any = [\000-\377];
|
||||
O = [0-7];
|
||||
D = [0-9];
|
||||
L = [a-zA-Z_];
|
||||
H = [a-fA-F0-9];
|
||||
E = [Ee] [+-]? D+;
|
||||
FS = [fFlL];
|
||||
IS = [uUlL]*;
|
||||
ESC = [\\] ([abfnrtv?'"\\] | "x" H+ | O+);
|
||||
*/
|
||||
|
||||
/*!re2c
|
||||
"/*" { goto comment; }
|
||||
|
||||
L (L|D)* { RET(ID); }
|
||||
|
||||
("0" [xX] H+ IS?) | ("0" D+ IS?) | (D+ IS?) |
|
||||
(['] (ESC|any\[\n\\'])* ['])
|
||||
{ RET(ICON); }
|
||||
|
||||
(D+ E FS?) | (D* "." D+ E? FS?) | (D+ "." D* E? FS?)
|
||||
{ RET(FCON); }
|
||||
|
||||
(["] (ESC|any\[\n\\"])* ["])
|
||||
{ RET(SCON); }
|
||||
|
||||
"..." { RET(ELLIPSIS); }
|
||||
">>=" { RET(RSHIFTEQ); }
|
||||
"<<=" { RET(LSHIFTEQ); }
|
||||
"+=" { RET(ADDEQ); }
|
||||
"-=" { RET(SUBEQ); }
|
||||
"*=" { RET(MULEQ); }
|
||||
"/=" { RET(DIVEQ); }
|
||||
"%=" { RET(MODEQ); }
|
||||
"&=" { RET(ANDEQ); }
|
||||
"^=" { RET(XOREQ); }
|
||||
"|=" { RET(OREQ); }
|
||||
">>" { RET(RSHIFT); }
|
||||
"<<" { RET(LSHIFT); }
|
||||
"++" { RET(INCR); }
|
||||
"--" { RET(DECR); }
|
||||
"->" { RET(DEREF); }
|
||||
"&&" { RET(ANDAND); }
|
||||
"||" { RET(OROR); }
|
||||
"<=" { RET(LEQ); }
|
||||
">=" { RET(GEQ); }
|
||||
"==" { RET(EQL); }
|
||||
"!=" { RET(NEQ); }
|
||||
";" { RET(';'); }
|
||||
"{" { RET('{'); }
|
||||
"}" { RET('}'); }
|
||||
"," { RET(','); }
|
||||
":" { RET(':'); }
|
||||
"=" { RET('='); }
|
||||
"(" { RET('('); }
|
||||
")" { RET(')'); }
|
||||
"[" { RET('['); }
|
||||
"]" { RET(']'); }
|
||||
"." { RET('.'); }
|
||||
"&" { RET('&'); }
|
||||
"!" { RET('!'); }
|
||||
"~" { RET('~'); }
|
||||
"-" { RET('-'); }
|
||||
"+" { RET('+'); }
|
||||
"*" { RET('*'); }
|
||||
"/" { RET('/'); }
|
||||
"%" { RET('%'); }
|
||||
"<" { RET('<'); }
|
||||
">" { RET('>'); }
|
||||
"^" { RET('^'); }
|
||||
"|" { RET('|'); }
|
||||
"?" { RET('?'); }
|
||||
|
||||
|
||||
[ \t\v\f]+ { goto std; }
|
||||
|
||||
"\n"
|
||||
{
|
||||
if(cursor == s->eof) RET(EOI);
|
||||
s->pos = cursor; s->line++;
|
||||
goto std;
|
||||
}
|
||||
|
||||
any
|
||||
{
|
||||
printf("unexpected character: %c\n", *s->tok);
|
||||
goto std;
|
||||
}
|
||||
*/
|
||||
|
||||
comment:
|
||||
/*!re2c
|
||||
"*/" { goto std; }
|
||||
"\n"
|
||||
{
|
||||
if(cursor == s->eof) RET(EOI);
|
||||
s->tok = s->pos = cursor; s->line++;
|
||||
goto comment;
|
||||
}
|
||||
any { goto comment; }
|
||||
*/
|
||||
}
|
||||
|
||||
main(){
|
||||
Scanner in;
|
||||
int t;
|
||||
memset((char*) &in, 0, sizeof(in));
|
||||
in.fd = 0;
|
||||
while((t = scan(&in)) != EOI){
|
||||
/*
|
||||
printf("%d\t%.*s\n", t, in.cur - in.tok, in.tok);
|
||||
printf("%d\n", t);
|
||||
*/
|
||||
}
|
||||
close(in.fd);
|
||||
}
|
|
@ -1,258 +0,0 @@
|
|||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
|
||||
#define ADDEQ 257
|
||||
#define ANDAND 258
|
||||
#define ANDEQ 259
|
||||
#define ARRAY 260
|
||||
#define ASM 261
|
||||
#define AUTO 262
|
||||
#define BREAK 263
|
||||
#define CASE 264
|
||||
#define CHAR 265
|
||||
#define CONST 266
|
||||
#define CONTINUE 267
|
||||
#define DECR 268
|
||||
#define DEFAULT 269
|
||||
#define DEREF 270
|
||||
#define DIVEQ 271
|
||||
#define DO 272
|
||||
#define DOUBLE 273
|
||||
#define ELLIPSIS 274
|
||||
#define ELSE 275
|
||||
#define ENUM 276
|
||||
#define EQL 277
|
||||
#define EXTERN 278
|
||||
#define FCON 279
|
||||
#define FLOAT 280
|
||||
#define FOR 281
|
||||
#define FUNCTION 282
|
||||
#define GEQ 283
|
||||
#define GOTO 284
|
||||
#define ICON 285
|
||||
#define ID 286
|
||||
#define IF 287
|
||||
#define INCR 288
|
||||
#define INT 289
|
||||
#define LEQ 290
|
||||
#define LONG 291
|
||||
#define LSHIFT 292
|
||||
#define LSHIFTEQ 293
|
||||
#define MODEQ 294
|
||||
#define MULEQ 295
|
||||
#define NEQ 296
|
||||
#define OREQ 297
|
||||
#define OROR 298
|
||||
#define POINTER 299
|
||||
#define REGISTER 300
|
||||
#define RETURN 301
|
||||
#define RSHIFT 302
|
||||
#define RSHIFTEQ 303
|
||||
#define SCON 304
|
||||
#define SHORT 305
|
||||
#define SIGNED 306
|
||||
#define SIZEOF 307
|
||||
#define STATIC 308
|
||||
#define STRUCT 309
|
||||
#define SUBEQ 310
|
||||
#define SWITCH 311
|
||||
#define TYPEDEF 312
|
||||
#define UNION 313
|
||||
#define UNSIGNED 314
|
||||
#define VOID 315
|
||||
#define VOLATILE 316
|
||||
#define WHILE 317
|
||||
#define XOREQ 318
|
||||
#define EOI 319
|
||||
|
||||
typedef unsigned int uint;
|
||||
typedef unsigned char uchar;
|
||||
|
||||
#define BSIZE 8192
|
||||
|
||||
#define YYCTYPE uchar
|
||||
#define YYCURSOR cursor
|
||||
#define YYLIMIT s->lim
|
||||
#define YYMARKER s->ptr
|
||||
#define YYFILL(n) {cursor = fill(s, cursor);}
|
||||
|
||||
#define RET(i) {s->cur = cursor; return i;}
|
||||
|
||||
typedef struct Scanner {
|
||||
int fd;
|
||||
uchar *bot, *tok, *ptr, *cur, *pos, *lim, *top, *eof;
|
||||
uint line;
|
||||
} Scanner;
|
||||
|
||||
uchar *fill(Scanner *s, uchar *cursor){
|
||||
if(!s->eof){
|
||||
uint cnt = s->tok - s->bot;
|
||||
if(cnt){
|
||||
memcpy(s->bot, s->tok, s->lim - s->tok);
|
||||
s->tok = s->bot;
|
||||
s->ptr -= cnt;
|
||||
cursor -= cnt;
|
||||
s->pos -= cnt;
|
||||
s->lim -= cnt;
|
||||
}
|
||||
if((s->top - s->lim) < BSIZE){
|
||||
uchar *buf = (uchar*) malloc(((s->lim - s->bot) + BSIZE)*sizeof(uchar));
|
||||
memcpy(buf, s->tok, s->lim - s->tok);
|
||||
s->tok = buf;
|
||||
s->ptr = &buf[s->ptr - s->bot];
|
||||
cursor = &buf[cursor - s->bot];
|
||||
s->pos = &buf[s->pos - s->bot];
|
||||
s->lim = &buf[s->lim - s->bot];
|
||||
s->top = &s->lim[BSIZE];
|
||||
free(s->bot);
|
||||
s->bot = buf;
|
||||
}
|
||||
if((cnt = read(s->fd, (char*) s->lim, BSIZE)) != BSIZE){
|
||||
s->eof = &s->lim[cnt]; *(s->eof)++ = '\n';
|
||||
}
|
||||
s->lim += cnt;
|
||||
}
|
||||
return cursor;
|
||||
}
|
||||
|
||||
int scan(Scanner *s){
|
||||
uchar *cursor = s->cur;
|
||||
std:
|
||||
s->tok = cursor;
|
||||
/*!re2c
|
||||
any = [\000-\377];
|
||||
O = [0-7];
|
||||
D = [0-9];
|
||||
L = [a-zA-Z_];
|
||||
I = L|D;
|
||||
H = [a-fA-F0-9];
|
||||
E = [Ee] [+-]? D+;
|
||||
FS = [fFlL];
|
||||
IS = [uUlL]*;
|
||||
ESC = [\\] ([abfnrtv?'"\\] | "x" H+ | O+);
|
||||
X = any\[*/];
|
||||
*/
|
||||
|
||||
/*!re2c
|
||||
"/*" { goto comment; }
|
||||
|
||||
|
||||
L { RET(ID); }
|
||||
L I { RET(ID); }
|
||||
L I I { RET(ID); }
|
||||
L I I I { RET(ID); }
|
||||
L I I I I { RET(ID); }
|
||||
L I I I I I { RET(ID); }
|
||||
L I I I I I I { RET(ID); }
|
||||
L I I I I I I I { RET(ID); }
|
||||
L I* { RET(ID); }
|
||||
|
||||
("0" [xX] H+ IS?) | ("0" D+ IS?) | (D+ IS?) |
|
||||
(['] (ESC|any\[\n\\'])* ['])
|
||||
{ RET(ICON); }
|
||||
|
||||
(D+ E FS?) | (D* "." D+ E? FS?) | (D+ "." D* E? FS?)
|
||||
{ RET(FCON); }
|
||||
|
||||
(["] (ESC|any\[\n\\"])* ["])
|
||||
{ RET(SCON); }
|
||||
|
||||
"..." { RET(ELLIPSIS); }
|
||||
">>=" { RET(RSHIFTEQ); }
|
||||
"<<=" { RET(LSHIFTEQ); }
|
||||
"+=" { RET(ADDEQ); }
|
||||
"-=" { RET(SUBEQ); }
|
||||
"*=" { RET(MULEQ); }
|
||||
"/=" { RET(DIVEQ); }
|
||||
"%=" { RET(MODEQ); }
|
||||
"&=" { RET(ANDEQ); }
|
||||
"^=" { RET(XOREQ); }
|
||||
"|=" { RET(OREQ); }
|
||||
">>" { RET(RSHIFT); }
|
||||
"<<" { RET(LSHIFT); }
|
||||
"++" { RET(INCR); }
|
||||
"--" { RET(DECR); }
|
||||
"->" { RET(DEREF); }
|
||||
"&&" { RET(ANDAND); }
|
||||
"||" { RET(OROR); }
|
||||
"<=" { RET(LEQ); }
|
||||
">=" { RET(GEQ); }
|
||||
"==" { RET(EQL); }
|
||||
"!=" { RET(NEQ); }
|
||||
";" { RET(';'); }
|
||||
"{" { RET('{'); }
|
||||
"}" { RET('}'); }
|
||||
"," { RET(','); }
|
||||
":" { RET(':'); }
|
||||
"=" { RET('='); }
|
||||
"(" { RET('('); }
|
||||
")" { RET(')'); }
|
||||
"[" { RET('['); }
|
||||
"]" { RET(']'); }
|
||||
"." { RET('.'); }
|
||||
"&" { RET('&'); }
|
||||
"!" { RET('!'); }
|
||||
"~" { RET('~'); }
|
||||
"-" { RET('-'); }
|
||||
"+" { RET('+'); }
|
||||
"*" { RET('*'); }
|
||||
"/" { RET('/'); }
|
||||
"%" { RET('%'); }
|
||||
"<" { RET('<'); }
|
||||
">" { RET('>'); }
|
||||
"^" { RET('^'); }
|
||||
"|" { RET('|'); }
|
||||
"?" { RET('?'); }
|
||||
|
||||
|
||||
[ \t\v\f]+ { goto std; }
|
||||
|
||||
"\n"
|
||||
{
|
||||
if(cursor == s->eof) RET(EOI);
|
||||
s->pos = cursor; s->line++;
|
||||
goto std;
|
||||
}
|
||||
|
||||
any
|
||||
{
|
||||
printf("unexpected character: %c\n", *s->tok);
|
||||
goto std;
|
||||
}
|
||||
*/
|
||||
|
||||
comment:
|
||||
/*!re2c
|
||||
"*/" { goto std; }
|
||||
"\n"
|
||||
{
|
||||
if(cursor == s->eof) RET(EOI);
|
||||
s->tok = s->pos = cursor; s->line++;
|
||||
goto comment;
|
||||
}
|
||||
X { goto comment; }
|
||||
X X { goto comment; }
|
||||
X X X { goto comment; }
|
||||
X X X X { goto comment; }
|
||||
X X X X X { goto comment; }
|
||||
X X X X X X { goto comment; }
|
||||
X X X X X X X { goto comment; }
|
||||
X X X X X X X X { goto comment; }
|
||||
any { goto comment; }
|
||||
*/
|
||||
}
|
||||
|
||||
main(){
|
||||
Scanner in;
|
||||
int t;
|
||||
memset((char*) &in, 0, sizeof(in));
|
||||
in.fd = 0;
|
||||
while((t = scan(&in)) != EOI){
|
||||
/*
|
||||
printf("%d\t%.*s\n", t, in.cur - in.tok, in.tok);
|
||||
printf("%d\n", t);
|
||||
*/
|
||||
}
|
||||
close(in.fd);
|
||||
}
|
35
tools/re2c/examples/input_custom/fixed.re
Normal file
35
tools/re2c/examples/input_custom/fixed.re
Normal file
|
@ -0,0 +1,35 @@
|
|||
// Build with "--input custom" re2c switch.
|
||||
//
|
||||
// This is an example of handling fixed-length buffer with "--input custom":
|
||||
// on each YYPEEK we check for the end of input, thus YYFILL generation
|
||||
// can be safely suppressed.
|
||||
//
|
||||
// Note that YYLIMIT points not to terminating NULL, but to the previous
|
||||
// character: we emulate the case when input has no terminating NULL.
|
||||
//
|
||||
// For a real-life example see https://github.com/sopyer/mjson
|
||||
// or mjson.re from re2c test collection.
|
||||
|
||||
bool lex (const char * cursor, const char * const limit)
|
||||
{
|
||||
const char * marker;
|
||||
const char * ctxmarker;
|
||||
# define YYCTYPE char
|
||||
# define YYPEEK() (cursor >= limit ? 0 : *cursor)
|
||||
# define YYSKIP() ++cursor
|
||||
# define YYBACKUP() marker = cursor
|
||||
# define YYBACKUPCTX() ctxmarker = cursor
|
||||
# define YYRESTORE() cursor = marker
|
||||
# define YYRESTORECTX() cursor = ctxmarker
|
||||
/*!re2c
|
||||
re2c:yyfill:enable = 0;
|
||||
"int buffer " / "[" [0-9]+ "]" { return true; }
|
||||
* { return false; }
|
||||
*/
|
||||
}
|
||||
|
||||
int main ()
|
||||
{
|
||||
char buffer [] = "int buffer [1024]";
|
||||
return !lex (buffer, buffer + sizeof (buffer) - 1);
|
||||
}
|
20
tools/re2c/examples/input_custom/simple/README
Normal file
20
tools/re2c/examples/input_custom/simple/README
Normal file
|
@ -0,0 +1,20 @@
|
|||
Build with "--input custom" re2c switch.
|
||||
|
||||
These are three examples of "--input custom" usage:
|
||||
|
||||
- input_custom_default.re:
|
||||
implements default re2c input model (pointers to plain buffer)
|
||||
|
||||
- input_custom_fgetc:
|
||||
implements C-style file input (using <stdio.h>)
|
||||
|
||||
- input_custom_fgetc:
|
||||
implements std::istringstream input
|
||||
|
||||
Note that these examples are very simple and don't need
|
||||
to implement YYFILL; the only reason they don't use
|
||||
"re2c:yyfill:enable = 0;" is to keep YYLESSTHAN and YYLIMIT
|
||||
(for the sake of example).
|
||||
|
||||
In real-life programs one will need to care for correct
|
||||
end-of-input handling.
|
24
tools/re2c/examples/input_custom/simple/default.re
Normal file
24
tools/re2c/examples/input_custom/simple/default.re
Normal file
|
@ -0,0 +1,24 @@
|
|||
bool lex (const char * cursor, const char * const limit)
|
||||
{
|
||||
const char * marker;
|
||||
const char * ctxmarker;
|
||||
# define YYCTYPE char
|
||||
# define YYPEEK() *cursor
|
||||
# define YYSKIP() ++cursor
|
||||
# define YYBACKUP() marker = cursor
|
||||
# define YYBACKUPCTX() ctxmarker = cursor
|
||||
# define YYRESTORE() cursor = marker
|
||||
# define YYRESTORECTX() cursor = ctxmarker
|
||||
# define YYLESSTHAN(n) limit - cursor < n
|
||||
# define YYFILL(n) {}
|
||||
/*!re2c
|
||||
"int buffer " / "[" [0-9]+ "]" { return true; }
|
||||
* { return false; }
|
||||
*/
|
||||
}
|
||||
|
||||
int main ()
|
||||
{
|
||||
char buffer [] = "int buffer [1024]";
|
||||
return !lex (buffer, buffer + sizeof (buffer));
|
||||
}
|
43
tools/re2c/examples/input_custom/simple/fgetc.re
Normal file
43
tools/re2c/examples/input_custom/simple/fgetc.re
Normal file
|
@ -0,0 +1,43 @@
|
|||
#include <stdio.h>
|
||||
|
||||
char peek (FILE * f)
|
||||
{
|
||||
char c = fgetc (f);
|
||||
ungetc (c, f);
|
||||
return c;
|
||||
}
|
||||
|
||||
bool lex (FILE * f, const long limit)
|
||||
{
|
||||
long marker;
|
||||
long ctxmarker;
|
||||
# define YYCTYPE char
|
||||
# define YYPEEK() peek (f)
|
||||
# define YYSKIP() fgetc (f)
|
||||
# define YYBACKUP() marker = ftell (f)
|
||||
# define YYBACKUPCTX() ctxmarker = ftell (f)
|
||||
# define YYRESTORE() fseek (f, marker, SEEK_SET)
|
||||
# define YYRESTORECTX() fseek (f, ctxmarker, SEEK_SET)
|
||||
# define YYLESSTHAN(n) limit - ftell (f) < n
|
||||
# define YYFILL(n) {}
|
||||
/*!re2c
|
||||
"int buffer " / "[" [0-9]+ "]" { return true; }
|
||||
* { return false; }
|
||||
*/
|
||||
}
|
||||
|
||||
int main ()
|
||||
{
|
||||
const char buffer [] = "int buffer [1024]";
|
||||
const char fn [] = "input.txt";
|
||||
|
||||
FILE * f = fopen (fn, "w");
|
||||
fwrite (buffer, 1, sizeof (buffer), f);
|
||||
fclose (f);
|
||||
|
||||
f = fopen (fn, "rb");
|
||||
int result = !lex (f, sizeof (buffer));
|
||||
fclose (f);
|
||||
|
||||
return result;
|
||||
}
|
27
tools/re2c/examples/input_custom/simple/istringstream.re
Normal file
27
tools/re2c/examples/input_custom/simple/istringstream.re
Normal file
|
@ -0,0 +1,27 @@
|
|||
#include <sstream>
|
||||
|
||||
bool lex (std::istringstream & is, const std::streampos limit)
|
||||
{
|
||||
std::streampos marker;
|
||||
std::streampos ctxmarker;
|
||||
# define YYCTYPE char
|
||||
# define YYPEEK() is.peek ()
|
||||
# define YYSKIP() is.ignore ()
|
||||
# define YYBACKUP() marker = is.tellg ()
|
||||
# define YYBACKUPCTX() ctxmarker = is.tellg ()
|
||||
# define YYRESTORE() is.seekg (marker)
|
||||
# define YYRESTORECTX() is.seekg (ctxmarker)
|
||||
# define YYLESSTHAN(n) limit - is.tellg () < n
|
||||
# define YYFILL(n) {}
|
||||
/*!re2c
|
||||
"int buffer " / "[" [0-9]+ "]" { return true; }
|
||||
* { return false; }
|
||||
*/
|
||||
}
|
||||
|
||||
int main ()
|
||||
{
|
||||
const char buffer [] = "int buffer [1024]";
|
||||
std::istringstream is (buffer);
|
||||
return !lex (is, sizeof (buffer));
|
||||
}
|
|
@ -11,13 +11,14 @@ typedef unsigned char uchar;
|
|||
#define YYCURSOR cursor
|
||||
#define YYLIMIT s->lim
|
||||
#define YYMARKER s->ptr
|
||||
#define YYCTXMARKER s->ctx
|
||||
#define YYFILL {cursor = fill(s, cursor);}
|
||||
|
||||
#define RETURN(i) {s->cur = cursor; return i;}
|
||||
|
||||
typedef struct Scanner {
|
||||
int fd;
|
||||
uchar *bot, *tok, *ptr, *cur, *pos, *lim, *top, *eof;
|
||||
uchar *bot, *tok, *ptr, *ctx, *cur, *pos, *lim, *top, *eof;
|
||||
uint line;
|
||||
} Scanner;
|
||||
|
|
@ -226,14 +226,14 @@ public:
|
|||
|
||||
start:
|
||||
|
||||
/*!re2c
|
||||
|
||||
/*!re2c
|
||||
re2c:startlabel = 1;
|
||||
eol = "\n";
|
||||
eof = "\000";
|
||||
digit = [0-9];
|
||||
integer = digit+;
|
||||
alpha = [A-Za-z_];
|
||||
any = [\000-\0377];
|
||||
any = [\000-\377];
|
||||
space = [ \h\t\v\f\r];
|
||||
|
||||
"if" { SEND(kIf); }
|
|
@ -1,44 +0,0 @@
|
|||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
|
||||
#define RET(n) printf("%d\n", n); return n
|
||||
|
||||
int scan(char *s, int l){
|
||||
char *p = s;
|
||||
char *q;
|
||||
#define YYCTYPE char
|
||||
#define YYCURSOR p
|
||||
#define YYLIMIT (s+l)
|
||||
#define YYMARKER q
|
||||
#define YYFILL(n)
|
||||
/*!re2c
|
||||
'a'{1}"\n" {RET(1);}
|
||||
'a'{2,3}"\n" {RET(2);}
|
||||
'a'{6}"\n" {RET(4);}
|
||||
'a'{4,}"\n" {RET(3);}
|
||||
[^aq]|"\n" {RET(0);}
|
||||
*/
|
||||
}
|
||||
|
||||
#define do_scan(str) scan(str, strlen(str))
|
||||
|
||||
main()
|
||||
{
|
||||
do_scan("a\n");
|
||||
do_scan("aa\n");
|
||||
do_scan("aaa\n");
|
||||
do_scan("aaaa\n");
|
||||
do_scan("q");
|
||||
do_scan("a");
|
||||
do_scan("A\n");
|
||||
do_scan("AA\n");
|
||||
do_scan("aAa\n");
|
||||
do_scan("AaaA\n");
|
||||
do_scan("Q");
|
||||
do_scan("AaaAa\n");
|
||||
do_scan("AaaAaA\n");
|
||||
do_scan("A");
|
||||
do_scan("\n");
|
||||
do_scan("0");
|
||||
}
|
|
@ -1 +0,0 @@
|
|||
Replacement modules for an existing REXX interpreter. Not standalone.
|
|
@ -1,41 +0,0 @@
|
|||
uchar *ScanFill(uchar *cursor){
|
||||
unsigned cnt = s->tok - s->bot;
|
||||
s->pos += cursor - s->mrk;
|
||||
if(cnt){
|
||||
if(s->eot){
|
||||
unsigned len = s->eot - s->tok;
|
||||
memcpy(s->bot, s->tok, len);
|
||||
s->eot = &s->bot[len];
|
||||
if((len = s->lim - cursor) != 0)
|
||||
memcpy(s->eot, cursor, len);
|
||||
cursor = s->eot;
|
||||
s->lim = &cursor[len];
|
||||
} else {
|
||||
memcpy(s->bot, s->tok, s->lim - s->tok);
|
||||
cursor -= cnt;
|
||||
s->lim -= cnt;
|
||||
}
|
||||
s->tok = s->bot;
|
||||
s->ptr -= cnt;
|
||||
}
|
||||
if((s->top - s->lim) < 512){
|
||||
uchar *buf = (uchar*) malloc(((s->lim - s->bot) + 512)*sizeof(uchar));
|
||||
memcpy(buf, s->bot, s->lim - s->bot);
|
||||
s->tok = buf;
|
||||
s->ptr = &buf[s->ptr - s->bot];
|
||||
if(s->eot)
|
||||
s->eot = &buf[s->eot - s->bot];
|
||||
cursor = &buf[cursor - s->bot];
|
||||
s->lim = &buf[s->lim - s->bot];
|
||||
s->top = &s->lim[512];
|
||||
free(s->bot);
|
||||
s->bot = buf;
|
||||
}
|
||||
s->mrk = cursor;
|
||||
if(ScanCBIO.file){
|
||||
if((cnt = read(ScanCBIO.u.f.fd, (char*) s->lim, 512)) != 512)
|
||||
memset(&s->lim[cnt], 0, 512 - cnt);
|
||||
s->lim += 512;
|
||||
}
|
||||
return cursor;
|
||||
}
|
|
@ -1,7 +0,0 @@
|
|||
/*!re2c
|
||||
"print" {return PRINT;}
|
||||
[a-z]+ {return ID;}
|
||||
[0-9]+ {return DEC;}
|
||||
"0x" [0-9a-f]+ {return HEX;}
|
||||
[\000-\377] {return ERR;}
|
||||
*/
|
|
@ -1,13 +0,0 @@
|
|||
#define NULL ((char*) 0)
|
||||
char *scan(char *p){
|
||||
char *q;
|
||||
#define YYCTYPE char
|
||||
#define YYCURSOR p
|
||||
#define YYLIMIT p
|
||||
#define YYMARKER q
|
||||
#define YYFILL(n)
|
||||
/*!re2c
|
||||
[0-9]+ {return YYCURSOR;}
|
||||
[\000-\377] {return NULL;}
|
||||
*/
|
||||
}
|
|
@ -1,73 +0,0 @@
|
|||
/* $Id: globals.h 713 2007-04-29 15:33:47Z helly $ */
|
||||
#ifndef _globals_h
|
||||
#define _globals_h
|
||||
|
||||
#include "basics.h"
|
||||
#include <set>
|
||||
#include <algorithm>
|
||||
#include <string>
|
||||
#include "stream_lc.h"
|
||||
#include "code_names.h"
|
||||
|
||||
namespace re2c
|
||||
{
|
||||
|
||||
extern file_info sourceFileInfo;
|
||||
extern file_info outputFileInfo;
|
||||
|
||||
extern bool bFlag;
|
||||
extern bool dFlag;
|
||||
extern bool eFlag;
|
||||
extern bool fFlag;
|
||||
extern bool gFlag;
|
||||
extern bool iFlag;
|
||||
extern bool sFlag;
|
||||
extern bool uFlag;
|
||||
extern bool wFlag;
|
||||
|
||||
extern bool bNoGenerationDate;
|
||||
|
||||
extern bool bSinglePass;
|
||||
extern bool bFirstPass;
|
||||
extern bool bLastPass;
|
||||
|
||||
extern bool bUsedYYAccept;
|
||||
extern bool bUsedYYMaxFill;
|
||||
extern bool bUsedYYMarker;
|
||||
|
||||
extern bool bUseStartLabel;
|
||||
extern std::string startLabelName;
|
||||
extern std::string labelPrefix;
|
||||
extern std::string yychConversion;
|
||||
extern uint maxFill;
|
||||
extern uint next_label;
|
||||
extern uint cGotoThreshold;
|
||||
|
||||
/* configurations */
|
||||
extern uint topIndent;
|
||||
extern std::string indString;
|
||||
extern bool yybmHexTable;
|
||||
extern bool bUseStateAbort;
|
||||
extern bool bUseStateNext;
|
||||
extern bool bWroteGetState;
|
||||
extern bool bUseYYFill;
|
||||
extern bool bUseYYFillParam;
|
||||
|
||||
extern uint asc2ebc[256];
|
||||
extern uint ebc2asc[256];
|
||||
|
||||
extern uint *xlat, *talx;
|
||||
|
||||
extern uint next_fill_index;
|
||||
extern uint last_fill_index;
|
||||
extern std::set<uint> vUsedLabels;
|
||||
extern re2c::CodeNames mapCodeName;
|
||||
|
||||
extern uint nRealChars;
|
||||
|
||||
extern char octCh(uint c);
|
||||
extern char hexCh(uint c);
|
||||
|
||||
} // end namespace re2c
|
||||
|
||||
#endif
|
|
@ -1,56 +0,0 @@
|
|||
/* $Id: ins.h 535 2006-05-25 13:36:14Z helly $ */
|
||||
#ifndef _ins_h
|
||||
#define _ins_h
|
||||
|
||||
#include "basics.h"
|
||||
|
||||
namespace re2c
|
||||
{
|
||||
|
||||
typedef unsigned short Char;
|
||||
|
||||
const uint CHAR = 0;
|
||||
const uint GOTO = 1;
|
||||
const uint FORK = 2;
|
||||
const uint TERM = 3;
|
||||
const uint CTXT = 4;
|
||||
|
||||
union Ins {
|
||||
|
||||
struct
|
||||
{
|
||||
byte tag;
|
||||
byte marked;
|
||||
void *link;
|
||||
}
|
||||
|
||||
i;
|
||||
|
||||
struct
|
||||
{
|
||||
ushort value;
|
||||
ushort bump;
|
||||
void *link;
|
||||
}
|
||||
|
||||
c;
|
||||
};
|
||||
|
||||
inline bool isMarked(Ins *i)
|
||||
{
|
||||
return i->i.marked != 0;
|
||||
}
|
||||
|
||||
inline void mark(Ins *i)
|
||||
{
|
||||
i->i.marked = true;
|
||||
}
|
||||
|
||||
inline void unmark(Ins *i)
|
||||
{
|
||||
i->i.marked = false;
|
||||
}
|
||||
|
||||
} // end namespace re2c
|
||||
|
||||
#endif
|
|
@ -1,351 +0,0 @@
|
|||
/* $Id: main.cc 691 2007-04-22 15:07:39Z helly $ */
|
||||
#ifdef HAVE_CONFIG_H
|
||||
#include "config.h"
|
||||
#elif defined(_WIN32)
|
||||
#include "config_w32.h"
|
||||
#endif
|
||||
|
||||
#include <fstream>
|
||||
#include <iostream>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "globals.h"
|
||||
#include "parser.h"
|
||||
#include "dfa.h"
|
||||
#include "mbo_getopt.h"
|
||||
|
||||
namespace re2c
|
||||
{
|
||||
|
||||
file_info sourceFileInfo;
|
||||
file_info outputFileInfo;
|
||||
|
||||
bool bFlag = false;
|
||||
bool dFlag = false;
|
||||
bool eFlag = false;
|
||||
bool fFlag = false;
|
||||
bool gFlag = false;
|
||||
bool iFlag = false;
|
||||
bool sFlag = false;
|
||||
bool uFlag = false;
|
||||
bool wFlag = false;
|
||||
|
||||
bool bNoGenerationDate = false;
|
||||
|
||||
bool bSinglePass = false;
|
||||
bool bFirstPass = true;
|
||||
bool bLastPass = false;
|
||||
|
||||
bool bUsedYYAccept = false;
|
||||
bool bUsedYYMaxFill = false;
|
||||
bool bUsedYYMarker = true;
|
||||
|
||||
bool bUseStartLabel = false;
|
||||
bool bUseStateNext = false;
|
||||
bool bUseYYFill = true;
|
||||
bool bUseYYFillParam = true;
|
||||
|
||||
std::string startLabelName;
|
||||
std::string labelPrefix("yy");
|
||||
std::string yychConversion("");
|
||||
uint maxFill = 1;
|
||||
uint next_label = 0;
|
||||
uint cGotoThreshold = 9;
|
||||
|
||||
uint topIndent = 0;
|
||||
std::string indString("\t");
|
||||
bool yybmHexTable = false;
|
||||
bool bUseStateAbort = false;
|
||||
bool bWroteGetState = false;
|
||||
|
||||
uint nRealChars = 256;
|
||||
|
||||
uint next_fill_index = 0;
|
||||
uint last_fill_index = 0;
|
||||
std::set<uint> vUsedLabels;
|
||||
re2c::CodeNames mapCodeName;
|
||||
|
||||
free_list<RegExp*> RegExp::vFreeList;
|
||||
free_list<Range*> Range::vFreeList;
|
||||
|
||||
using namespace std;
|
||||
|
||||
static char *opt_arg = NULL;
|
||||
static int opt_ind = 1;
|
||||
|
||||
static const mbo_opt_struct OPTIONS[] =
|
||||
{
|
||||
mbo_opt_struct('?', 0, "help"),
|
||||
mbo_opt_struct('b', 0, "bit-vectors"),
|
||||
mbo_opt_struct('d', 0, "debug-output"),
|
||||
mbo_opt_struct('e', 0, "ecb"),
|
||||
mbo_opt_struct('f', 0, "storable-state"),
|
||||
mbo_opt_struct('g', 0, "computed-gotos"),
|
||||
mbo_opt_struct('h', 0, "help"),
|
||||
mbo_opt_struct('i', 0, "no-debug-info"),
|
||||
mbo_opt_struct('o', 1, "output"),
|
||||
mbo_opt_struct('s', 0, "nested-ifs"),
|
||||
mbo_opt_struct('u', 0, "unicode"),
|
||||
mbo_opt_struct('v', 0, "version"),
|
||||
mbo_opt_struct('V', 0, "vernum"),
|
||||
mbo_opt_struct('w', 0, "wide-chars"),
|
||||
mbo_opt_struct('1', 0, "single-pass"),
|
||||
mbo_opt_struct(10, 0, "no-generation-date"),
|
||||
mbo_opt_struct('-', 0, NULL) /* end of args */
|
||||
};
|
||||
|
||||
static void usage()
|
||||
{
|
||||
cerr << "usage: re2c [-bdefghisvVw1] [-o file] file\n"
|
||||
"\n"
|
||||
"-? -h --help Display this info.\n"
|
||||
"\n"
|
||||
"-b --bit-vectors Implies -s. Use bit vectors as well in the attempt to\n"
|
||||
" coax better code out of the compiler. Most useful for\n"
|
||||
" specifications with more than a few keywords (e.g. for\n"
|
||||
" most programming languages).\n"
|
||||
"\n"
|
||||
"-d --debug-output Creates a parser that dumps information during\n"
|
||||
" about the current position and in which state the\n"
|
||||
" parser is.\n"
|
||||
"\n"
|
||||
"-e --ecb Cross-compile from an ASCII platform to\n"
|
||||
" an EBCDIC one.\n"
|
||||
"\n"
|
||||
"-f --storable-state Generate a scanner that supports storable states.\n"
|
||||
"\n"
|
||||
"-g --computed-gotos Implies -b. Generate computed goto code (only useable\n"
|
||||
" with gcc).\n"
|
||||
"\n"
|
||||
"-i --no-debug-info Do not generate '#line' info (usefull for versioning).\n"
|
||||
"\n"
|
||||
"-o --output=output Specify the output file instead of stdout\n"
|
||||
" This cannot be used together with -e switch.\n"
|
||||
"\n"
|
||||
"-s --nested-ifs Generate nested ifs for some switches. Many compilers\n"
|
||||
" need this assist to generate better code.\n"
|
||||
"\n"
|
||||
"-u --unicode Implies -w but supports the full Unicode character set.\n"
|
||||
"\n"
|
||||
"-v --version Show version information.\n"
|
||||
"\n"
|
||||
"-V --vernum Show version as one number.\n"
|
||||
"\n"
|
||||
"-w --wide-chars Create a parser that supports wide chars (UCS-2). This\n"
|
||||
" implies -s and cannot be used together with -e switch.\n"
|
||||
"\n"
|
||||
"-1 --single-pass Force single pass generation, this cannot be combined\n"
|
||||
" with -f and disables YYMAXFILL generation prior to last\n"
|
||||
" re2c block.\n"
|
||||
"\n"
|
||||
"--no-generation-date Suppress date output in the generated output so that it\n"
|
||||
" only shows the re2c version.\n"
|
||||
;
|
||||
}
|
||||
|
||||
} // end namespace re2c
|
||||
|
||||
using namespace re2c;
|
||||
|
||||
int main(int argc, char *argv[])
|
||||
{
|
||||
int c;
|
||||
const char *sourceFileName = 0;
|
||||
const char *outputFileName = 0;
|
||||
|
||||
if (argc == 1)
|
||||
{
|
||||
usage();
|
||||
return 2;
|
||||
}
|
||||
|
||||
while ((c = mbo_getopt(argc, argv, OPTIONS, &opt_arg, &opt_ind, 0)) != -1)
|
||||
{
|
||||
switch (c)
|
||||
{
|
||||
|
||||
case 'b':
|
||||
bFlag = true;
|
||||
sFlag = true;
|
||||
break;
|
||||
|
||||
case 'e':
|
||||
xlat = asc2ebc;
|
||||
talx = ebc2asc;
|
||||
eFlag = true;
|
||||
break;
|
||||
|
||||
case 'd':
|
||||
dFlag = true;
|
||||
break;
|
||||
|
||||
case 'f':
|
||||
fFlag = true;
|
||||
break;
|
||||
|
||||
case 'g':
|
||||
gFlag = true;
|
||||
bFlag = true;
|
||||
sFlag = true;
|
||||
break;
|
||||
|
||||
case 'i':
|
||||
iFlag = true;
|
||||
break;
|
||||
|
||||
case 'o':
|
||||
outputFileName = opt_arg;
|
||||
break;
|
||||
|
||||
case 's':
|
||||
sFlag = true;
|
||||
break;
|
||||
|
||||
case '1':
|
||||
bSinglePass = true;
|
||||
break;
|
||||
|
||||
case 'v':
|
||||
cout << "re2c " << PACKAGE_VERSION << "\n";
|
||||
return 2;
|
||||
|
||||
case 'V': {
|
||||
string vernum(PACKAGE_VERSION);
|
||||
|
||||
if (vernum[1] == '.')
|
||||
{
|
||||
vernum.insert(0, "0");
|
||||
}
|
||||
vernum.erase(2, 1);
|
||||
if (vernum[3] == '.')
|
||||
{
|
||||
vernum.insert(2, "0");
|
||||
}
|
||||
vernum.erase(4, 1);
|
||||
if (vernum.length() < 6 || vernum[5] < '0' || vernum[5] > '9')
|
||||
{
|
||||
vernum.insert(4, "0");
|
||||
}
|
||||
vernum.resize(6);
|
||||
cout << vernum << endl;
|
||||
return 2;
|
||||
}
|
||||
|
||||
case 'w':
|
||||
nRealChars = (1<<16); /* 0x10000 */
|
||||
sFlag = true;
|
||||
wFlag = true;
|
||||
break;
|
||||
|
||||
case 'u':
|
||||
nRealChars = 0x110000; /* 17 times w-Flag */
|
||||
sFlag = true;
|
||||
uFlag = true;
|
||||
break;
|
||||
|
||||
case 'h':
|
||||
case '?':
|
||||
default:
|
||||
usage();
|
||||
return 2;
|
||||
|
||||
case 10:
|
||||
bNoGenerationDate = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if ((bFlag || fFlag) && bSinglePass) {
|
||||
std::cerr << "re2c: error: Cannot combine -1 and -b or -f switch\n";
|
||||
return 1;
|
||||
}
|
||||
|
||||
if (wFlag && eFlag)
|
||||
{
|
||||
std::cerr << "re2c: error: Cannot combine -e with -w or -u switch\n";
|
||||
return 2;
|
||||
}
|
||||
if (wFlag && uFlag)
|
||||
{
|
||||
std::cerr << "re2c: error: Cannot combine -u with -w switch\n";
|
||||
return 2;
|
||||
}
|
||||
|
||||
if (uFlag)
|
||||
{
|
||||
wFlag = true;
|
||||
}
|
||||
|
||||
if (argc == opt_ind + 1)
|
||||
{
|
||||
sourceFileName = argv[opt_ind];
|
||||
}
|
||||
else
|
||||
{
|
||||
usage();
|
||||
return 2;
|
||||
}
|
||||
|
||||
// set up the source stream
|
||||
re2c::ifstream_lc source;
|
||||
|
||||
if (sourceFileName[0] == '-' && sourceFileName[1] == '\0')
|
||||
{
|
||||
if (fFlag)
|
||||
{
|
||||
std::cerr << "re2c: error: multiple /*!re2c stdin is not acceptable when -f is specified\n";
|
||||
return 1;
|
||||
}
|
||||
sourceFileName = "<stdin>";
|
||||
source.open(stdin);
|
||||
}
|
||||
else if (!source.open(sourceFileName).is_open())
|
||||
{
|
||||
cerr << "re2c: error: cannot open " << sourceFileName << "\n";
|
||||
return 1;
|
||||
}
|
||||
|
||||
// set up the output stream
|
||||
re2c::ofstream_lc output;
|
||||
|
||||
if (outputFileName == 0 || (sourceFileName[0] == '-' && sourceFileName[1] == '\0'))
|
||||
{
|
||||
outputFileName = "<stdout>";
|
||||
output.open(stdout);
|
||||
}
|
||||
else if (!output.open(outputFileName).is_open())
|
||||
{
|
||||
cerr << "re2c: error: cannot open " << outputFileName << "\n";
|
||||
return 1;
|
||||
}
|
||||
Scanner scanner(sourceFileName, source, output);
|
||||
sourceFileInfo = file_info(sourceFileName, &scanner);
|
||||
outputFileInfo = file_info(outputFileName, &output);
|
||||
|
||||
if (!bSinglePass)
|
||||
{
|
||||
bUsedYYMarker = false;
|
||||
|
||||
re2c::ifstream_lc null_source;
|
||||
|
||||
if (!null_source.open(sourceFileName).is_open())
|
||||
{
|
||||
cerr << "re2c: error: cannot re-open " << sourceFileName << "\n";
|
||||
return 1;
|
||||
}
|
||||
|
||||
null_stream null_dev;
|
||||
Scanner null_scanner(sourceFileName, null_source, null_dev);
|
||||
parse(null_scanner, null_dev);
|
||||
next_label = 0;
|
||||
next_fill_index = 0;
|
||||
bWroteGetState = false;
|
||||
bUsedYYMaxFill = false;
|
||||
bFirstPass = false;
|
||||
}
|
||||
|
||||
bLastPass = true;
|
||||
parse(scanner, output);
|
||||
return 0;
|
||||
}
|
|
@ -1,210 +0,0 @@
|
|||
/*
|
||||
Author: Marcus Boerger <helly@users.sourceforge.net>
|
||||
*/
|
||||
|
||||
/* $Id: mbo_getopt.cc 698 2007-04-23 21:06:56Z helly $ */
|
||||
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <assert.h>
|
||||
#include <stdlib.h>
|
||||
#include "mbo_getopt.h"
|
||||
#define OPTERRCOLON (1)
|
||||
#define OPTERRNF (2)
|
||||
#define OPTERRARG (3)
|
||||
|
||||
namespace re2c
|
||||
{
|
||||
|
||||
static int mbo_opt_error(int, char * const *argv, int oint, int optchr, int err, int show_err)
|
||||
{
|
||||
if (show_err)
|
||||
{
|
||||
fprintf(stderr, "Error in argument %d, char %d: ", oint, optchr + 1);
|
||||
|
||||
switch (err)
|
||||
{
|
||||
|
||||
case OPTERRCOLON:
|
||||
fprintf(stderr, ": in flags\n");
|
||||
break;
|
||||
|
||||
case OPTERRNF:
|
||||
fprintf(stderr, "option not found %c\n", argv[oint][optchr]);
|
||||
break;
|
||||
|
||||
case OPTERRARG:
|
||||
fprintf(stderr, "no argument for option %c\n", argv[oint][optchr]);
|
||||
break;
|
||||
|
||||
default:
|
||||
fprintf(stderr, "unknown\n");
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return ('?');
|
||||
}
|
||||
|
||||
int mbo_getopt(int argc, char* const *argv, const mbo_opt_struct *opts, char **optarg, int *optind, int show_err)
|
||||
{
|
||||
static int optchr = 0;
|
||||
static int dash = 0; /* have already seen the - */
|
||||
int arg_start = 2;
|
||||
|
||||
int opts_idx = -1;
|
||||
|
||||
if (*optind >= argc)
|
||||
{
|
||||
return (EOF);
|
||||
}
|
||||
|
||||
if (!dash)
|
||||
{
|
||||
if ((argv[*optind][0] != '-'))
|
||||
{
|
||||
return (EOF);
|
||||
}
|
||||
else
|
||||
{
|
||||
if (!argv[*optind][1])
|
||||
{
|
||||
/*
|
||||
* use to specify stdin. Need to let pgm process this and
|
||||
* the following args
|
||||
*/
|
||||
return (EOF);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if ((argv[*optind][0] == '-') && (argv[*optind][1] == '-'))
|
||||
{
|
||||
/* '--' indicates end of args if not followed by a known long option name */
|
||||
if (argv[*optind][2] == '\0') {
|
||||
(*optind)++;
|
||||
return(EOF);
|
||||
}
|
||||
|
||||
while (1)
|
||||
{
|
||||
opts_idx++;
|
||||
|
||||
if (opts[opts_idx].opt_char == '-')
|
||||
{
|
||||
(*optind)++;
|
||||
return (mbo_opt_error(argc, argv, *optind - 1, optchr, OPTERRARG, show_err));
|
||||
}
|
||||
else if (opts[opts_idx].opt_name && !strcmp(&argv[*optind][2], opts[opts_idx].opt_name))
|
||||
{
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
optchr = 0;
|
||||
dash = 0;
|
||||
arg_start = 2 + strlen(opts[opts_idx].opt_name);
|
||||
}
|
||||
|
||||
else
|
||||
{
|
||||
if (!dash)
|
||||
{
|
||||
dash = 1;
|
||||
optchr = 1;
|
||||
}
|
||||
|
||||
/* Check if the guy tries to do a -: kind of flag */
|
||||
if (argv[*optind][optchr] == ':')
|
||||
{
|
||||
dash = 0;
|
||||
(*optind)++;
|
||||
return (mbo_opt_error(argc, argv, *optind - 1, optchr, OPTERRCOLON, show_err));
|
||||
}
|
||||
arg_start = 1 + optchr;
|
||||
}
|
||||
|
||||
if (opts_idx < 0)
|
||||
{
|
||||
while (1)
|
||||
{
|
||||
opts_idx++;
|
||||
|
||||
if (opts[opts_idx].opt_char == '-')
|
||||
{
|
||||
int errind = *optind;
|
||||
int errchr = optchr;
|
||||
|
||||
if (!argv[*optind][optchr + 1])
|
||||
{
|
||||
dash = 0;
|
||||
(*optind)++;
|
||||
}
|
||||
else
|
||||
{
|
||||
optchr++;
|
||||
arg_start++;
|
||||
}
|
||||
|
||||
return (mbo_opt_error(argc, argv, errind, errchr, OPTERRNF, show_err));
|
||||
}
|
||||
else if (argv[*optind][optchr] == opts[opts_idx].opt_char)
|
||||
{
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (opts[opts_idx].need_param)
|
||||
{
|
||||
/* Check for cases where the value of the argument
|
||||
is in the form -<arg> <val> or in the form -<arg><val> */
|
||||
dash = 0;
|
||||
|
||||
if (!argv[*optind][arg_start])
|
||||
{
|
||||
(*optind)++;
|
||||
|
||||
if (*optind == argc)
|
||||
{
|
||||
return (mbo_opt_error(argc, argv, *optind - 1, optchr, OPTERRARG, show_err));
|
||||
}
|
||||
|
||||
*optarg = argv[(*optind)++];
|
||||
}
|
||||
else
|
||||
{
|
||||
*optarg = &argv[*optind][arg_start];
|
||||
(*optind)++;
|
||||
}
|
||||
|
||||
return opts[opts_idx].opt_char;
|
||||
}
|
||||
else
|
||||
{
|
||||
if (arg_start >= 2 && !((argv[*optind][0] == '-') && (argv[*optind][1] == '-')))
|
||||
{
|
||||
if (!argv[*optind][optchr + 1])
|
||||
{
|
||||
dash = 0;
|
||||
(*optind)++;
|
||||
}
|
||||
else
|
||||
{
|
||||
optchr++;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
(*optind)++;
|
||||
}
|
||||
|
||||
return opts[opts_idx].opt_char;
|
||||
}
|
||||
|
||||
assert(0);
|
||||
return (0); /* never reached */
|
||||
}
|
||||
|
||||
} // end namespace re2c
|
||||
|
|
@ -1,34 +0,0 @@
|
|||
/*
|
||||
Author: Marcus Boerger <helly@users.sourceforge.net>
|
||||
*/
|
||||
|
||||
/* $Id: mbo_getopt.h 539 2006-05-25 13:37:38Z helly $ */
|
||||
|
||||
/* Define structure for one recognized option (both single char and long name).
|
||||
* If short_open is '-' this is the last option.
|
||||
*/
|
||||
|
||||
#ifndef RE2C_MBO_GETOPT_H_INCLUDE_GUARD_
|
||||
#define RE2C_MBO_GETOPT_H_INCLUDE_GUARD_
|
||||
|
||||
namespace re2c
|
||||
{
|
||||
|
||||
struct mbo_opt_struct
|
||||
{
|
||||
mbo_opt_struct(char _opt_char, int _need_param, const char * _opt_name)
|
||||
: opt_char(_opt_char), need_param(_need_param), opt_name(_opt_name)
|
||||
{
|
||||
}
|
||||
|
||||
const char opt_char;
|
||||
const int need_param;
|
||||
const char * opt_name;
|
||||
};
|
||||
|
||||
int mbo_getopt(int argc, char* const *argv, const mbo_opt_struct *opts, char **optarg, int *optind, int show_err);
|
||||
|
||||
} // end namespace re2c
|
||||
|
||||
#endif
|
||||
|
1807
tools/re2c/parser.cc
1807
tools/re2c/parser.cc
File diff suppressed because it is too large
Load diff
|
@ -1,56 +0,0 @@
|
|||
/* $Id: parser.h 565 2006-06-05 22:07:13Z helly $ */
|
||||
#ifndef _parser_h
|
||||
#define _parser_h
|
||||
|
||||
#include "scanner.h"
|
||||
#include "re.h"
|
||||
#include <iosfwd>
|
||||
#include <map>
|
||||
|
||||
namespace re2c
|
||||
{
|
||||
|
||||
class Symbol
|
||||
{
|
||||
public:
|
||||
|
||||
RegExp* re;
|
||||
|
||||
static Symbol *find(const SubStr&);
|
||||
static void ClearTable();
|
||||
|
||||
typedef std::map<std::string, Symbol*> SymbolTable;
|
||||
|
||||
protected:
|
||||
|
||||
Symbol(const SubStr& str)
|
||||
: re(NULL)
|
||||
, name(str)
|
||||
{
|
||||
}
|
||||
|
||||
private:
|
||||
|
||||
static SymbolTable symbol_table;
|
||||
|
||||
Str name;
|
||||
|
||||
#if PEDANTIC
|
||||
Symbol(const Symbol& oth)
|
||||
: re(oth.re)
|
||||
, name(oth.name)
|
||||
{
|
||||
}
|
||||
Symbol& operator = (const Symbol& oth)
|
||||
{
|
||||
new(this) Symbol(oth);
|
||||
return *this;
|
||||
}
|
||||
#endif
|
||||
};
|
||||
|
||||
void parse(Scanner&, std::ostream&);
|
||||
|
||||
} // end namespace re2c
|
||||
|
||||
#endif
|
|
@ -1,220 +0,0 @@
|
|||
%{
|
||||
|
||||
/* $Id: parser.y 674 2007-04-16 21:39:11Z helly $ */
|
||||
|
||||
#ifdef HAVE_CONFIG_H
|
||||
#include "config.h"
|
||||
#endif
|
||||
|
||||
#include <time.h>
|
||||
#include <string.h>
|
||||
#include <stdlib.h>
|
||||
#include <iostream>
|
||||
|
||||
#include "globals.h"
|
||||
#include "parser.h"
|
||||
#include "basics.h"
|
||||
|
||||
#define YYMALLOC malloc
|
||||
#define YYFREE free
|
||||
|
||||
using namespace re2c;
|
||||
|
||||
extern "C"
|
||||
{
|
||||
int yylex();
|
||||
void yyerror(const char*);
|
||||
}
|
||||
|
||||
static re2c::uint accept;
|
||||
static RegExp *spec;
|
||||
static Scanner *in = NULL;
|
||||
|
||||
/* Bison version 1.875 emits a definition that is not working
|
||||
* with several g++ version. Hence we disable it here.
|
||||
*/
|
||||
#if defined(__GNUC__)
|
||||
#define __attribute__(x)
|
||||
#endif
|
||||
|
||||
/* strdup() isn't standard C, so if we don't have it, we'll create our
|
||||
* own version
|
||||
*/
|
||||
#if !defined(HAVE_STRDUP)
|
||||
static char* strdup(const char* s)
|
||||
{
|
||||
char* rv = (char*)malloc(strlen(s) + 1);
|
||||
if (rv == NULL)
|
||||
return NULL;
|
||||
strcpy(rv, s);
|
||||
return rv;
|
||||
}
|
||||
#endif
|
||||
|
||||
%}
|
||||
|
||||
%start spec
|
||||
|
||||
%union {
|
||||
re2c::Symbol *symbol;
|
||||
re2c::RegExp *regexp;
|
||||
re2c::Token *token;
|
||||
char op;
|
||||
int number;
|
||||
re2c::ExtOp extop;
|
||||
re2c::Str *str;
|
||||
};
|
||||
|
||||
%token CLOSESIZE CLOSE ID CODE RANGE STRING
|
||||
%token CONFIG VALUE NUMBER
|
||||
|
||||
%type <op> CLOSE
|
||||
%type <op> close
|
||||
%type <extop> CLOSESIZE
|
||||
%type <symbol> ID
|
||||
%type <token> CODE
|
||||
%type <regexp> RANGE STRING
|
||||
%type <regexp> rule look expr diff term factor primary
|
||||
%type <str> CONFIG VALUE
|
||||
%type <number> NUMBER
|
||||
|
||||
%%
|
||||
|
||||
spec :
|
||||
{ accept = 0;
|
||||
spec = NULL; }
|
||||
| spec rule
|
||||
{ spec = spec? mkAlt(spec, $2) : $2; }
|
||||
| spec decl
|
||||
;
|
||||
|
||||
decl : ID '=' expr ';'
|
||||
{ if($1->re)
|
||||
in->fatal("sym already defined");
|
||||
$1->re = $3; }
|
||||
| ID '=' expr '/'
|
||||
{ in->fatal("trailing contexts are not allowed in named definitions"); }
|
||||
| CONFIG '=' VALUE ';'
|
||||
{ in->config(*$1, *$3); delete $1; delete $3; }
|
||||
| CONFIG '=' NUMBER ';'
|
||||
{ in->config(*$1, $3); delete $1; }
|
||||
;
|
||||
|
||||
rule : expr look CODE
|
||||
{ $$ = new RuleOp($1, $2, $3, accept++); }
|
||||
;
|
||||
|
||||
look :
|
||||
{ $$ = new NullOp; }
|
||||
| '/' expr
|
||||
{ $$ = $2; }
|
||||
;
|
||||
|
||||
expr : diff
|
||||
{ $$ = $1; }
|
||||
| expr '|' diff
|
||||
{ $$ = mkAlt($1, $3); }
|
||||
;
|
||||
|
||||
diff : term
|
||||
{ $$ = $1; }
|
||||
| diff '\\' term
|
||||
{ $$ = mkDiff($1, $3);
|
||||
if(!$$)
|
||||
in->fatal("can only difference char sets");
|
||||
}
|
||||
;
|
||||
|
||||
term : factor
|
||||
{ $$ = $1; }
|
||||
| term factor
|
||||
{ $$ = new CatOp($1, $2); }
|
||||
;
|
||||
|
||||
factor : primary
|
||||
{ $$ = $1; }
|
||||
| primary close
|
||||
{
|
||||
switch($2){
|
||||
case '*':
|
||||
$$ = mkAlt(new CloseOp($1), new NullOp());
|
||||
break;
|
||||
case '+':
|
||||
$$ = new CloseOp($1);
|
||||
break;
|
||||
case '?':
|
||||
$$ = mkAlt($1, new NullOp());
|
||||
break;
|
||||
}
|
||||
}
|
||||
| primary CLOSESIZE
|
||||
{
|
||||
$$ = new CloseVOp($1, $2.minsize, $2.maxsize);
|
||||
}
|
||||
;
|
||||
|
||||
close : CLOSE
|
||||
{ $$ = $1; }
|
||||
| close CLOSE
|
||||
{ $$ = ($1 == $2) ? $1 : '*'; }
|
||||
;
|
||||
|
||||
primary : ID
|
||||
{ if(!$1->re)
|
||||
in->fatal("can't find symbol");
|
||||
$$ = $1->re; }
|
||||
| RANGE
|
||||
{ $$ = $1; }
|
||||
| STRING
|
||||
{ $$ = $1; }
|
||||
| '(' expr ')'
|
||||
{ $$ = $2; }
|
||||
;
|
||||
|
||||
%%
|
||||
|
||||
extern "C" {
|
||||
void yyerror(const char* s)
|
||||
{
|
||||
in->fatal(s);
|
||||
}
|
||||
|
||||
int yylex(){
|
||||
return in ? in->scan() : 0;
|
||||
}
|
||||
} // end extern "C"
|
||||
|
||||
namespace re2c
|
||||
{
|
||||
|
||||
void parse(Scanner& i, std::ostream& o)
|
||||
{
|
||||
in = &i;
|
||||
|
||||
o << "/* Generated by re2c " PACKAGE_VERSION;
|
||||
if (!bNoGenerationDate)
|
||||
{
|
||||
o << " on ";
|
||||
time_t now = time(&now);
|
||||
o.write(ctime(&now), 24);
|
||||
}
|
||||
o << " */\n";
|
||||
o << sourceFileInfo;
|
||||
|
||||
while(i.echo())
|
||||
{
|
||||
yyparse();
|
||||
if(spec)
|
||||
{
|
||||
genCode(o, topIndent, spec);
|
||||
}
|
||||
o << sourceFileInfo;
|
||||
}
|
||||
|
||||
RegExp::vFreeList.clear();
|
||||
Range::vFreeList.clear();
|
||||
Symbol::ClearTable();
|
||||
in = NULL;
|
||||
}
|
||||
|
||||
} // end namespace re2c
|
496
tools/re2c/re.h
496
tools/re2c/re.h
|
@ -1,496 +0,0 @@
|
|||
/* $Id: re.h 775 2007-07-10 19:33:17Z helly $ */
|
||||
#ifndef _re_h
|
||||
#define _re_h
|
||||
|
||||
#include <iostream>
|
||||
#include <set>
|
||||
#include "token.h"
|
||||
#include "ins.h"
|
||||
#include "globals.h"
|
||||
|
||||
namespace re2c
|
||||
{
|
||||
|
||||
template<class _Ty>
|
||||
class free_list: protected std::set<_Ty>
|
||||
{
|
||||
public:
|
||||
typedef typename std::set<_Ty>::iterator iterator;
|
||||
typedef typename std::set<_Ty>::size_type size_type;
|
||||
typedef typename std::set<_Ty>::key_type key_type;
|
||||
|
||||
free_list(): in_clear(false)
|
||||
{
|
||||
}
|
||||
|
||||
using std::set<_Ty>::insert;
|
||||
|
||||
size_type erase(const key_type& key)
|
||||
{
|
||||
if (!in_clear)
|
||||
{
|
||||
return std::set<_Ty>::erase(key);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
void clear()
|
||||
{
|
||||
in_clear = true;
|
||||
|
||||
for(iterator it = this->begin(); it != this->end(); ++it)
|
||||
{
|
||||
delete *it;
|
||||
}
|
||||
std::set<_Ty>::clear();
|
||||
|
||||
in_clear = false;
|
||||
}
|
||||
|
||||
~free_list()
|
||||
{
|
||||
clear();
|
||||
}
|
||||
|
||||
protected:
|
||||
bool in_clear;
|
||||
};
|
||||
|
||||
typedef struct extop
|
||||
{
|
||||
char op;
|
||||
int minsize;
|
||||
int maxsize;
|
||||
}
|
||||
|
||||
ExtOp;
|
||||
|
||||
struct CharPtn
|
||||
{
|
||||
uint card;
|
||||
CharPtn *fix;
|
||||
CharPtn *nxt;
|
||||
};
|
||||
|
||||
typedef CharPtn *CharPtr;
|
||||
|
||||
struct CharSet
|
||||
{
|
||||
CharSet();
|
||||
~CharSet();
|
||||
|
||||
CharPtn *fix;
|
||||
CharPtn *freeHead, **freeTail;
|
||||
CharPtr *rep;
|
||||
CharPtn *ptn;
|
||||
};
|
||||
|
||||
class Range
|
||||
{
|
||||
|
||||
public:
|
||||
Range *next;
|
||||
uint lb, ub; // [lb,ub)
|
||||
|
||||
static free_list<Range*> vFreeList;
|
||||
|
||||
public:
|
||||
Range(uint l, uint u) : next(NULL), lb(l), ub(u)
|
||||
{
|
||||
vFreeList.insert(this);
|
||||
}
|
||||
|
||||
Range(Range &r) : next(NULL), lb(r.lb), ub(r.ub)
|
||||
{
|
||||
vFreeList.insert(this);
|
||||
}
|
||||
|
||||
~Range()
|
||||
{
|
||||
vFreeList.erase(this);
|
||||
}
|
||||
|
||||
friend std::ostream& operator<<(std::ostream&, const Range&);
|
||||
friend std::ostream& operator<<(std::ostream&, const Range*);
|
||||
};
|
||||
|
||||
inline std::ostream& operator<<(std::ostream &o, const Range *r)
|
||||
{
|
||||
return r ? o << *r : o;
|
||||
}
|
||||
|
||||
class RegExp
|
||||
{
|
||||
|
||||
public:
|
||||
uint size;
|
||||
|
||||
static free_list<RegExp*> vFreeList;
|
||||
|
||||
public:
|
||||
RegExp() : size(0)
|
||||
{
|
||||
vFreeList.insert(this);
|
||||
}
|
||||
|
||||
virtual ~RegExp()
|
||||
{
|
||||
vFreeList.erase(this);
|
||||
}
|
||||
|
||||
virtual const char *typeOf() = 0;
|
||||
RegExp *isA(const char *t)
|
||||
{
|
||||
return typeOf() == t ? this : NULL;
|
||||
}
|
||||
|
||||
virtual void split(CharSet&) = 0;
|
||||
virtual void calcSize(Char*) = 0;
|
||||
virtual uint fixedLength();
|
||||
virtual void compile(Char*, Ins*) = 0;
|
||||
virtual void display(std::ostream&) const = 0;
|
||||
friend std::ostream& operator<<(std::ostream&, const RegExp&);
|
||||
friend std::ostream& operator<<(std::ostream&, const RegExp*);
|
||||
};
|
||||
|
||||
inline std::ostream& operator<<(std::ostream &o, const RegExp &re)
|
||||
{
|
||||
re.display(o);
|
||||
return o;
|
||||
}
|
||||
|
||||
inline std::ostream& operator<<(std::ostream &o, const RegExp *re)
|
||||
{
|
||||
return o << *re;
|
||||
}
|
||||
|
||||
class NullOp: public RegExp
|
||||
{
|
||||
|
||||
public:
|
||||
static const char *type;
|
||||
|
||||
public:
|
||||
const char *typeOf()
|
||||
{
|
||||
return type;
|
||||
}
|
||||
|
||||
void split(CharSet&);
|
||||
void calcSize(Char*);
|
||||
uint fixedLength();
|
||||
void compile(Char*, Ins*);
|
||||
void display(std::ostream &o) const
|
||||
{
|
||||
o << "_";
|
||||
}
|
||||
};
|
||||
|
||||
class MatchOp: public RegExp
|
||||
{
|
||||
|
||||
public:
|
||||
static const char *type;
|
||||
Range *match;
|
||||
|
||||
public:
|
||||
MatchOp(Range *m) : match(m)
|
||||
{
|
||||
}
|
||||
|
||||
const char *typeOf()
|
||||
{
|
||||
return type;
|
||||
}
|
||||
|
||||
void split(CharSet&);
|
||||
void calcSize(Char*);
|
||||
uint fixedLength();
|
||||
void compile(Char*, Ins*);
|
||||
void display(std::ostream&) const;
|
||||
|
||||
#ifdef PEDANTIC
|
||||
private:
|
||||
MatchOp(const MatchOp& oth)
|
||||
: RegExp(oth)
|
||||
, match(oth.match)
|
||||
{
|
||||
}
|
||||
|
||||
MatchOp& operator = (const MatchOp& oth)
|
||||
{
|
||||
new(this) MatchOp(oth);
|
||||
return *this;
|
||||
}
|
||||
#endif
|
||||
};
|
||||
|
||||
class RuleOp: public RegExp
|
||||
{
|
||||
public:
|
||||
static const char *type;
|
||||
|
||||
private:
|
||||
RegExp *exp;
|
||||
|
||||
public:
|
||||
RegExp *ctx;
|
||||
Ins *ins;
|
||||
uint accept;
|
||||
Token *code;
|
||||
uint line;
|
||||
|
||||
public:
|
||||
RuleOp(RegExp*, RegExp*, Token*, uint);
|
||||
|
||||
~RuleOp()
|
||||
{
|
||||
delete code;
|
||||
}
|
||||
|
||||
const char *typeOf()
|
||||
{
|
||||
return type;
|
||||
}
|
||||
|
||||
void split(CharSet&);
|
||||
void calcSize(Char*);
|
||||
void compile(Char*, Ins*);
|
||||
void display(std::ostream &o) const
|
||||
{
|
||||
o << exp << "/" << ctx << ";";
|
||||
}
|
||||
|
||||
#ifdef PEDANTIC
|
||||
private:
|
||||
RuleOp(const RuleOp& oth)
|
||||
: RegExp(oth)
|
||||
, exp(oth.exp)
|
||||
, ctx(oth.ctx)
|
||||
, ins(oth.ins)
|
||||
, accept(oth.accept)
|
||||
, code(oth.code)
|
||||
, line(oth.line)
|
||||
{
|
||||
}
|
||||
RuleOp& operator = (const RuleOp& oth)
|
||||
{
|
||||
new(this) RuleOp(oth);
|
||||
return *this;
|
||||
}
|
||||
#endif
|
||||
};
|
||||
|
||||
class RuleLine: public line_number
|
||||
{
|
||||
public:
|
||||
|
||||
RuleLine(const RuleOp& _op)
|
||||
: op(_op)
|
||||
{
|
||||
}
|
||||
|
||||
uint get_line() const
|
||||
{
|
||||
return op.code->line;
|
||||
}
|
||||
|
||||
const RuleOp& op;
|
||||
};
|
||||
|
||||
RegExp *mkAlt(RegExp*, RegExp*);
|
||||
|
||||
class AltOp: public RegExp
|
||||
{
|
||||
|
||||
private:
|
||||
RegExp *exp1, *exp2;
|
||||
|
||||
public:
|
||||
static const char *type;
|
||||
|
||||
public:
|
||||
AltOp(RegExp *e1, RegExp *e2)
|
||||
: exp1(e1)
|
||||
, exp2(e2)
|
||||
{
|
||||
}
|
||||
|
||||
const char *typeOf()
|
||||
{
|
||||
return type;
|
||||
}
|
||||
|
||||
void split(CharSet&);
|
||||
void calcSize(Char*);
|
||||
uint fixedLength();
|
||||
void compile(Char*, Ins*);
|
||||
void display(std::ostream &o) const
|
||||
{
|
||||
o << exp1 << "|" << exp2;
|
||||
}
|
||||
|
||||
friend RegExp *mkAlt(RegExp*, RegExp*);
|
||||
|
||||
#ifdef PEDANTIC
|
||||
private:
|
||||
AltOp(const AltOp& oth)
|
||||
: RegExp(oth)
|
||||
, exp1(oth.exp1)
|
||||
, exp2(oth.exp2)
|
||||
{
|
||||
}
|
||||
AltOp& operator = (const AltOp& oth)
|
||||
{
|
||||
new(this) AltOp(oth);
|
||||
return *this;
|
||||
}
|
||||
#endif
|
||||
};
|
||||
|
||||
class CatOp: public RegExp
|
||||
{
|
||||
|
||||
private:
|
||||
RegExp *exp1, *exp2;
|
||||
|
||||
public:
|
||||
static const char *type;
|
||||
|
||||
public:
|
||||
CatOp(RegExp *e1, RegExp *e2)
|
||||
: exp1(e1)
|
||||
, exp2(e2)
|
||||
{
|
||||
}
|
||||
|
||||
const char *typeOf()
|
||||
{
|
||||
return type;
|
||||
}
|
||||
|
||||
void split(CharSet&);
|
||||
void calcSize(Char*);
|
||||
uint fixedLength();
|
||||
void compile(Char*, Ins*);
|
||||
void display(std::ostream &o) const
|
||||
{
|
||||
o << exp1 << exp2;
|
||||
}
|
||||
|
||||
#ifdef PEDANTIC
|
||||
private:
|
||||
CatOp(const CatOp& oth)
|
||||
: RegExp(oth)
|
||||
, exp1(oth.exp1)
|
||||
, exp2(oth.exp2)
|
||||
{
|
||||
}
|
||||
CatOp& operator = (const CatOp& oth)
|
||||
{
|
||||
new(this) CatOp(oth);
|
||||
return *this;
|
||||
}
|
||||
#endif
|
||||
};
|
||||
|
||||
class CloseOp: public RegExp
|
||||
{
|
||||
|
||||
private:
|
||||
RegExp *exp;
|
||||
|
||||
public:
|
||||
static const char *type;
|
||||
|
||||
public:
|
||||
CloseOp(RegExp *e)
|
||||
: exp(e)
|
||||
{
|
||||
}
|
||||
|
||||
const char *typeOf()
|
||||
{
|
||||
return type;
|
||||
}
|
||||
|
||||
void split(CharSet&);
|
||||
void calcSize(Char*);
|
||||
void compile(Char*, Ins*);
|
||||
void display(std::ostream &o) const
|
||||
{
|
||||
o << exp << "+";
|
||||
}
|
||||
|
||||
#ifdef PEDANTIC
|
||||
private:
|
||||
CloseOp(const CloseOp& oth)
|
||||
: RegExp(oth)
|
||||
, exp(oth.exp)
|
||||
{
|
||||
}
|
||||
CloseOp& operator = (const CloseOp& oth)
|
||||
{
|
||||
new(this) CloseOp(oth);
|
||||
return *this;
|
||||
}
|
||||
#endif
|
||||
};
|
||||
|
||||
class CloseVOp: public RegExp
|
||||
{
|
||||
|
||||
private:
|
||||
RegExp *exp;
|
||||
int min;
|
||||
int max;
|
||||
|
||||
public:
|
||||
static const char *type;
|
||||
|
||||
public:
|
||||
CloseVOp(RegExp *e, int lb, int ub)
|
||||
: exp(e)
|
||||
, min(lb)
|
||||
, max(ub)
|
||||
{
|
||||
}
|
||||
|
||||
const char *typeOf()
|
||||
{
|
||||
return type;
|
||||
}
|
||||
|
||||
void split(CharSet&);
|
||||
void calcSize(Char*);
|
||||
void compile(Char*, Ins*);
|
||||
void display(std::ostream &o) const
|
||||
{
|
||||
o << exp << "+";
|
||||
}
|
||||
#ifdef PEDANTIC
|
||||
private:
|
||||
CloseVOp(const CloseVOp& oth)
|
||||
: RegExp(oth)
|
||||
, exp(oth.exp)
|
||||
, min(oth.min)
|
||||
, max(oth.max)
|
||||
{
|
||||
}
|
||||
CloseVOp& operator = (const CloseVOp& oth)
|
||||
{
|
||||
new(this) CloseVOp(oth);
|
||||
return *this;
|
||||
}
|
||||
#endif
|
||||
};
|
||||
|
||||
extern void genCode(std::ostream&, RegExp*);
|
||||
extern void genCode(std::ostream&, uint, RegExp*);
|
||||
extern void genGetState(std::ostream&, uint&, uint);
|
||||
extern RegExp *mkDiff(RegExp*, RegExp*);
|
||||
extern RegExp *mkAlt(RegExp*, RegExp*);
|
||||
|
||||
} // end namespace re2c
|
||||
|
||||
#endif
|
|
@ -1,597 +0,0 @@
|
|||
./"
|
||||
./" $Id: re2c.1.in 663 2007-04-01 11:22:15Z helly $
|
||||
./"
|
||||
.TH RE2C 1 "22 April 2005" "Version 0.12.3"
|
||||
.ds re \fBre2c\fP
|
||||
.ds le \fBlex\fP
|
||||
.ds rx regular expression
|
||||
.ds lx \fIl\fP-expression
|
||||
.SH NAME
|
||||
re2c \- convert regular expressions to C/C++
|
||||
|
||||
.SH SYNOPSIS
|
||||
\*(re [\fB-bdefghisuvVw1\fP] [\fB-o output\fP] file\fP
|
||||
|
||||
.SH DESCRIPTION
|
||||
\*(re is a preprocessor that generates C-based recognizers from regular
|
||||
expressions.
|
||||
The input to \*(re consists of C/C++ source interleaved with
|
||||
comments of the form \fC/*!re2c\fP ... \fC*/\fP which contain
|
||||
scanner specifications.
|
||||
In the output these comments are replaced with code that, when
|
||||
executed, will find the next input token and then execute
|
||||
some user-supplied token-specific code.
|
||||
|
||||
For example, given the following code
|
||||
|
||||
.in +3
|
||||
.nf
|
||||
char *scan(char *p)
|
||||
{
|
||||
/*!re2c
|
||||
re2c:define:YYCTYPE = "unsigned char";
|
||||
re2c:define:YYCURSOR = p;
|
||||
re2c:yyfill:enable = 0;
|
||||
re2c:yych:conversion = 1;
|
||||
re2c:indent:top = 1;
|
||||
[0-9]+ {return p;}
|
||||
[\000-\377] {return (char*)0;}
|
||||
*/
|
||||
}
|
||||
.fi
|
||||
.in -3
|
||||
|
||||
\*(re -is will generate
|
||||
|
||||
.in +3
|
||||
.nf
|
||||
/* Generated by re2c on Sat Apr 16 11:40:58 1994 */
|
||||
char *scan(char *p)
|
||||
{
|
||||
{
|
||||
unsigned char yych;
|
||||
|
||||
yych = (unsigned char)*p;
|
||||
if(yych <= '/') goto yy4;
|
||||
if(yych >= ':') goto yy4;
|
||||
++p;
|
||||
yych = (unsigned char)*p;
|
||||
goto yy7;
|
||||
yy3:
|
||||
{return p;}
|
||||
yy4:
|
||||
++p;
|
||||
yych = (unsigned char)*p;
|
||||
{return char*)0;}
|
||||
yy6:
|
||||
++p;
|
||||
yych = (unsigned char)*p;
|
||||
yy7:
|
||||
if(yych <= '/') goto yy3;
|
||||
if(yych <= '9') goto yy6;
|
||||
goto yy3;
|
||||
}
|
||||
|
||||
}
|
||||
.fi
|
||||
.in -3
|
||||
|
||||
You can place one \fC/*!max:re2c */\fP comment that will output a "#define
|
||||
\fCYYMAXFILL\fP <n>" line that holds the maximum number of characters
|
||||
required to parse the input. That is the maximum value \fCYYFILL\fP(n)
|
||||
will receive. If -1 is in effect then YYMAXFILL can only be triggered once
|
||||
after the last \fC/*!re2c */\fP.
|
||||
|
||||
You can also use \fC/*!ignore:re2c */\fP blocks that allows to document the
|
||||
scanner code and will not be part of the output.
|
||||
|
||||
.SH OPTIONS
|
||||
\*(re provides the following options:
|
||||
.TP
|
||||
\fB-?\fP
|
||||
\fB-h\fP
|
||||
Invoke a short help.
|
||||
.TP
|
||||
\fB-b\fP
|
||||
Implies \fB-s\fP. Use bit vectors as well in the attempt to coax better
|
||||
code out of the compiler. Most useful for specifications with more than a
|
||||
few keywords (e.g. for most programming languages).
|
||||
.TP
|
||||
\fB-d\fP
|
||||
Creates a parser that dumps information about the current position and in
|
||||
which state the parser is while parsing the input. This is useful to debug
|
||||
parser issues and states. If you use this switch you need to define a macro
|
||||
\fIYYDEBUG\fP that is called like a function with two parameters:
|
||||
\fIvoid YYDEBUG(int state, char current)\fP. The first parameter receives the
|
||||
state or -1 and the second parameter receives the input at the current cursor.
|
||||
.TP
|
||||
\fB-e\fP
|
||||
Cross-compile from an ASCII platform to an EBCDIC one.
|
||||
.TP
|
||||
\fB-f\fP
|
||||
Generate a scanner with support for storable state.
|
||||
For details see below at \fBSCANNER WITH STORABLE STATES\fP.
|
||||
.TP
|
||||
\fB-g\fP
|
||||
Generate a scanner that utilizes GCC's computed goto feature. That is \*(re
|
||||
generates jump tables whenever a decision is of a certain complexity (e.g. a
|
||||
lot of if conditions are otherwise necessary). This is only useable with GCC
|
||||
and produces output that cannot be compiled with any other compiler. Note that
|
||||
this implies -b and that the complexity threshold can be configured using the
|
||||
inplace configuration "cgoto:threshold".
|
||||
.TP
|
||||
\fB-i\fP
|
||||
Do not output #line information. This is usefull when you want use a CMS tool
|
||||
with the \*(re output which you might want if you do not require your users to
|
||||
have \*(re themselves when building from your source.
|
||||
\fB-o output\fP
|
||||
Specify the output file.
|
||||
.TP
|
||||
\fB-s\fP
|
||||
Generate nested \fCif\fPs for some \fCswitch\fPes. Many compilers need this
|
||||
assist to generate better code.
|
||||
.TP
|
||||
\fB-u\fP
|
||||
Generate a parser that supports Unicode chars (UTF-32). This means the
|
||||
generated code can deal with any valid Unicode character up to 0x10FFFF. When
|
||||
UTF-8 or UTF-16 needs to be supported you need to convert the incoming stream
|
||||
to UTF-32 upon input yourself.
|
||||
.TP
|
||||
\fB-v\fP
|
||||
Show version information.
|
||||
.TP
|
||||
\fB-V\fP
|
||||
Show the version as a number XXYYZZ.
|
||||
.TP
|
||||
\fB-w\fP
|
||||
Create a parser that supports wide chars (UCS-2). This implies \fB-s\fP and
|
||||
cannot be used together with \fB-e\fP switch.
|
||||
.TP
|
||||
\fB-1\fP
|
||||
Force single pass generation, this cannot be combined with -f and disables
|
||||
YYMAXFILL generation prior to last \*(re block.
|
||||
.TP
|
||||
\fb--no-generation-date\fP
|
||||
Suppress date output in the generated output so that it only shows the re2c
|
||||
version.
|
||||
.SH "INTERFACE CODE"
|
||||
Unlike other scanner generators, \*(re does not generate complete scanners:
|
||||
the user must supply some interface code.
|
||||
In particular, the user must define the following macros or use the
|
||||
corresponding inplace configurations:
|
||||
.TP
|
||||
\fCYYCTYPE\fP
|
||||
Type used to hold an input symbol.
|
||||
Usually \fCchar\fP or \fCunsigned char\fP.
|
||||
.TP
|
||||
\fCYYCURSOR\fP
|
||||
\*(lx of type \fC*YYCTYPE\fP that points to the current input symbol.
|
||||
The generated code advances \fCYYCURSOR\fP as symbols are matched.
|
||||
On entry, \fCYYCURSOR\fP is assumed to point to the first character of the
|
||||
current token. On exit, \fCYYCURSOR\fP will point to the first character of
|
||||
the following token.
|
||||
.TP
|
||||
\fCYYLIMIT\fP
|
||||
Expression of type \fC*YYCTYPE\fP that marks the end of the buffer
|
||||
(\fCYYLIMIT[-1]\fP is the last character in the buffer).
|
||||
The generated code repeatedly compares \fCYYCURSOR\fP to \fCYYLIMIT\fP
|
||||
to determine when the buffer needs (re)filling.
|
||||
.TP
|
||||
\fCYYMARKER\fP
|
||||
\*(lx of type \fC*YYCTYPE\fP.
|
||||
The generated code saves backtracking information in \fCYYMARKER\fP. Some easy
|
||||
scanners might not use this.
|
||||
.TP
|
||||
\fCYYCTXMARKER\fP
|
||||
\*(lx of type \fC*YYCTYPE\fP.
|
||||
The generated code saves trailing context backtracking information in \fCYYCTXMARKER\fP.
|
||||
The user only needs to define this macro if a scanner specification uses trailing
|
||||
context in one or more of its regular expressions.
|
||||
.TP
|
||||
\fCYYFILL\fP(\fIn\fP\fC\fP)
|
||||
The generated code "calls" \fCYYFILL\fP(n) when the buffer needs
|
||||
(re)filling: at least \fIn\fP additional characters should
|
||||
be provided. \fCYYFILL\fP(n) should adjust \fCYYCURSOR\fP, \fCYYLIMIT\fP,
|
||||
\fCYYMARKER\fP and \fCYYCTXMARKER\fP as needed. Note that for typical
|
||||
programming languages \fIn\fP will be the length of the longest keyword plus one.
|
||||
The user can place a comment of the form \fC/*!max:re2c */\fP once to insert
|
||||
a \fCYYMAXFILL\fP(n) definition that is set to the maximum length value. If -1
|
||||
switch is used then \fCYYMAXFILL\fP can be triggered only once after the
|
||||
last \fC/*!re2c */\fP
|
||||
block.
|
||||
.TP
|
||||
\fCYYGETSTATE\fP()
|
||||
The user only needs to define this macro if the \fB-f\fP flag was specified.
|
||||
In that case, the generated code "calls" \fCYYGETSTATE\fP() at the very beginning
|
||||
of the scanner in order to obtain the saved state. \fCYYGETSTATE\fP() must return a signed
|
||||
integer. The value must be either -1, indicating that the scanner is entered for the
|
||||
first time, or a value previously saved by \fCYYSETSTATE\fP(s). In the second case, the
|
||||
scanner will resume operations right after where the last \fCYYFILL\fP(n) was called.
|
||||
.TP
|
||||
\fCYYSETSTATE(\fP\fIs\fP\fC)\fP
|
||||
The user only needs to define this macro if the \fB-f\fP flag was specified.
|
||||
In that case, the generated code "calls" \fCYYSETSTATE\fP just before calling
|
||||
\fCYYFILL\fP(n). The parameter to \fCYYSETSTATE\fP is a signed integer that uniquely
|
||||
identifies the specific instance of \fCYYFILL\fP(n) that is about to be called.
|
||||
Should the user wish to save the state of the scanner and have \fCYYFILL\fP(n) return
|
||||
to the caller, all he has to do is store that unique identifer in a variable.
|
||||
Later, when the scannered is called again, it will call \fCYYGETSTATE()\fP and
|
||||
resume execution right where it left off. The generated code will contain
|
||||
both \fCYYSETSTATE\fP(s) and \fCYYGETSTATE\fP even if \fCYYFILL\fP(n) is being
|
||||
disabled.
|
||||
.TP
|
||||
\fCYYDEBUG(\fP\fIstate\fP,\fIcurrent\fC)\fP
|
||||
This is only needed if the \fB-d\fP flag was specified. It allows to easily debug
|
||||
the generated parser by calling a user defined function for every state. The function
|
||||
should have the following signature: \fIvoid YYDEBUG(int state, char current)\fP.
|
||||
The first parameter receives the state or -1 and the second parameter receives the
|
||||
input at the current cursor.
|
||||
.TP
|
||||
\fCYYMAXFILL
|
||||
This will be automatically defined by \fC/*!max:re2c */\fP blocks as explained above.
|
||||
|
||||
.SH "SCANNER WITH STORABLE STATES"
|
||||
When the \fB-f\fP flag is specified, \*(re generates a scanner that
|
||||
can store its current state, return to the caller, and later resume
|
||||
operations exactly where it left off.
|
||||
|
||||
The default operation of \*(re is a "pull" model, where the scanner asks
|
||||
for extra input whenever it needs it. However, this mode of operation
|
||||
assumes that the scanner is the "owner" the parsing loop, and that may
|
||||
not always be convenient.
|
||||
|
||||
Typically, if there is a preprocessor ahead of the scanner in the stream,
|
||||
or for that matter any other procedural source of data, the scanner cannot
|
||||
"ask" for more data unless both scanner and source live in a separate threads.
|
||||
|
||||
The \fB-f\fP flag is useful for just this situation : it lets users design
|
||||
scanners that work in a "push" model, i.e. where data is fed to the scanner
|
||||
chunk by chunk. When the scanner runs out of data to consume, it just stores
|
||||
its state, and return to the caller. When more input data is fed to the scanner,
|
||||
it resumes operations exactly where it left off.
|
||||
|
||||
When using the -f option \*(re does not accept stdin because it has to do the
|
||||
full generation process twice which means it has to read the input twice. That
|
||||
means \*(re would fail in case it cannot open the input twice or reading the
|
||||
input for the first time influences the second read attempt.
|
||||
|
||||
Changes needed compared to the "pull" model.
|
||||
|
||||
1. User has to supply macros YYSETSTATE() and YYGETSTATE(state)
|
||||
|
||||
2. The \fB-f\fP option inhibits declaration of \fIyych\fP and
|
||||
\fIyyaccept\fP. So the user has to declare these. Also the user has
|
||||
to save and restore these. In the example \fIexamples/push.re\fP these
|
||||
are declared as fields of the (C++) class of which the scanner is a
|
||||
method, so they do not need to be saved/restored explicitly. For C
|
||||
they could e.g. be made macros that select fields from a structure
|
||||
passed in as parameter. Alternatively, they could be declared as local
|
||||
variables, saved with YYFILL(n) when it decides to return and restored
|
||||
at entry to the function. Also, it could be more efficient to save the
|
||||
state from YYFILL(n) because YYSETSTATE(state) is called
|
||||
unconditionally. YYFILL(n) however does not get \fIstate\fP as
|
||||
parameter, so we would have to store state in a local variable by
|
||||
YYSETSTATE(state).
|
||||
|
||||
3. Modify YYFILL(n) to return (from the function calling it) if more
|
||||
input is needed.
|
||||
|
||||
4. Modify caller to recognise "more input is needed" and respond
|
||||
appropriately.
|
||||
|
||||
5. The generated code will contain a switch block that is used to restores
|
||||
the last state by jumping behind the corrspoding YYFILL(n) call. This code is
|
||||
automatically generated in the epilog of the first "\fC/*!re2c */\fP" block.
|
||||
It is possible to trigger generation of the YYGETSTATE() block earlier by
|
||||
placing a "\fC/*!getstate:re2c */\fP" comment. This is especially useful when
|
||||
the scanner code should be wrapped inside a loop.
|
||||
|
||||
Please see examples/push.re for push-model scanner. The generated code can be
|
||||
tweaked using inplace configurations "\fBstate:abort\fP" and "\fBstate:nextlabel\fP".
|
||||
|
||||
.SH "SCANNER SPECIFICATIONS"
|
||||
Each scanner specification consists of a set of \fIrules\fP, \fInamed
|
||||
definitions\fP and \fIconfigurations\fP.
|
||||
.LP
|
||||
\fIRules\fP consist of a regular expression along with a block of C/C++ code that
|
||||
is to be executed when the associated \fIregular expression\fP is matched.
|
||||
.P
|
||||
.RS
|
||||
\fIregular expression\fP \fC{\fP \fIC/C++ code\fP \fC}\fP
|
||||
.RE
|
||||
.LP
|
||||
Named definitions are of the form:
|
||||
.P
|
||||
.RS
|
||||
\fIname\fP \fC=\fP \fIregular expression\fP\fC;\fP
|
||||
.RE
|
||||
.LP
|
||||
Configurations look like named definitions whose names start
|
||||
with "\fBre2c:\fP":
|
||||
.P
|
||||
.RS
|
||||
\fCre2c:\fP\fIname\fP \fC=\fP \fIvalue\fP\fC;\fP
|
||||
.RE
|
||||
.RS
|
||||
\fCre2c:\fP\fIname\fP \fC=\fP \fB"\fP\fIvalue\fP\fB"\fP\fC;\fP
|
||||
.RE
|
||||
|
||||
.SH "SUMMARY OF RE2C REGULAR EXPRESSIONS"
|
||||
.TP
|
||||
\fC"foo"\fP
|
||||
the literal string \fCfoo\fP.
|
||||
ANSI-C escape sequences can be used.
|
||||
.TP
|
||||
\fC'foo'\fP
|
||||
the literal string \fCfoo\fP (characters [a-zA-Z] treated case-insensitive).
|
||||
ANSI-C escape sequences can be used.
|
||||
.TP
|
||||
\fC[xyz]\fP
|
||||
a "character class"; in this case,
|
||||
the \*(rx matches either an '\fCx\fP', a '\fCy\fP', or a '\fCz\fP'.
|
||||
.TP
|
||||
\fC[abj-oZ]\fP
|
||||
a "character class" with a range in it;
|
||||
matches an '\fCa\fP', a '\fCb\fP', any letter from '\fCj\fP' through '\fCo\fP',
|
||||
or a '\fCZ\fP'.
|
||||
.TP
|
||||
\fC[^\fIclass\fP\fC]\fP
|
||||
an inverted "character class".
|
||||
.TP
|
||||
\fIr\fP\fC\e\fP\fIs\fP
|
||||
match any \fIr\fP which isn't an \fIs\fP. \fIr\fP and \fIs\fP must be regular expressions
|
||||
which can be expressed as character classes.
|
||||
.TP
|
||||
\fIr\fP\fC*\fP
|
||||
zero or more \fIr\fP's, where \fIr\fP is any regular expression
|
||||
.TP
|
||||
\fC\fIr\fP\fC+\fP
|
||||
one or more \fIr\fP's
|
||||
.TP
|
||||
\fC\fIr\fP\fC?\fP
|
||||
zero or one \fIr\fP's (that is, "an optional \fIr\fP")
|
||||
.TP
|
||||
name
|
||||
the expansion of the "named definition" (see above)
|
||||
.TP
|
||||
\fC(\fP\fIr\fP\fC)\fP
|
||||
an \fIr\fP; parentheses are used to override precedence
|
||||
(see below)
|
||||
.TP
|
||||
\fIrs\fP
|
||||
an \fIr\fP followed by an \fIs\fP ("concatenation")
|
||||
.TP
|
||||
\fIr\fP\fC|\fP\fIs\fP
|
||||
either an \fIr\fP or an \fIs\fP
|
||||
.TP
|
||||
\fIr\fP\fC/\fP\fIs\fP
|
||||
an \fIr\fP but only if it is followed by an \fIs\fP. The \fIs\fP is not part of
|
||||
the matched text. This type of \*(rx is called "trailing context". A trailing
|
||||
context can only be the end of a rule and not part of a named definition.
|
||||
.TP
|
||||
\fIr\fP\fC{\fP\fIn\fP\fC}\fP
|
||||
matches \fIr\fP exactly \fIn\fP times.
|
||||
.TP
|
||||
\fIr\fP\fC{\fP\fIn\fP\fC,}\fP
|
||||
matches \fIr\fP at least \fIn\fP times.
|
||||
.TP
|
||||
\fIr\fP\fC{\fP\fIn\fP\fC,\fP\fIm\fP\fC}\fP
|
||||
matches \fIr\fP at least \fIn\fP but not more than \fIm\fP times.
|
||||
.TP
|
||||
\fC.\fP
|
||||
match any character except newline (\\n).
|
||||
.TP
|
||||
\fIdef\fP
|
||||
matches named definition as specified by \fIdef\fP.
|
||||
.LP
|
||||
Character classes and string literals may contain octoal or hexadecimal
|
||||
character definitions and the following set of escape sequences (\fB\\n\fP,
|
||||
\fB\\t\fP, \fB\\v\fP, \fB\\b\fP, \fB\\r\fP, \fB\\f\fP, \fB\\a\fP, \fB\\\\\fP).
|
||||
An octal character is defined by a backslash followed by its three octal digits
|
||||
and a hexadecimal character is defined by backslash, a lower cased '\fBx\fP'
|
||||
and its two hexadecimal digits or a backslash, an upper cased \fBX\fP and its
|
||||
four hexadecimal digits.
|
||||
.LP
|
||||
\*(re further more supports the c/c++ unicode notation. That is a backslash followed
|
||||
by either a lowercased \fBu\fP and its four hexadecimal digits or an uppercased
|
||||
\fBU\fP and its eight hexadecimal digits. However only in \fB-u\fP mode the
|
||||
generated code can deal with any valid Unicode character up to 0x10FFFF.
|
||||
.LP
|
||||
Since characters greater \fB\\X00FF\fP are not allowed in non unicode mode, the
|
||||
only portable "\fBany\fP" rules are \fB(.|"\\n")\fP and \fB[^]\fP.
|
||||
.LP
|
||||
The regular expressions listed above are grouped according to
|
||||
precedence, from highest precedence at the top to lowest at the bottom.
|
||||
Those grouped together have equal precedence.
|
||||
|
||||
.SH "INPLACE CONFIGURATION"
|
||||
.LP
|
||||
It is possible to configure code generation inside \*(re blocks. The following
|
||||
lists the available configurations:
|
||||
.TP
|
||||
\fIre2c:indent:top\fP \fB=\fP 0 \fB;\fP
|
||||
Specifies the minimum number of indendation to use. Requires a numeric value
|
||||
greater than or equal zero.
|
||||
.TP
|
||||
\fIre2c:indent:string\fP \fB=\fP "\\t" \fB;\fP
|
||||
Specifies the string to use for indendation. Requires a string that should
|
||||
contain only whitespace unless you need this for external tools. The easiest
|
||||
way to specify spaces is to enclude them in single or double quotes. If you do
|
||||
not want any indendation at all you can simply set this to \fB""\fP.
|
||||
.TP
|
||||
\fIre2c:yybm:hex\fP \fB=\fP 0 \fB;\fP
|
||||
If set to zero then a decimal table is being used else a hexadecimal table
|
||||
will be generated.
|
||||
.TP
|
||||
\fIre2c:yyfill:enable\fP \fB=\fP 1 \fB;\fP
|
||||
Set this to zero to suppress generation of YYFILL(n). When using this be sure
|
||||
to verify that the generated scanner does not read behind input. Allowing
|
||||
this behavior might introduce sever security issues to you programs.
|
||||
.TP
|
||||
\fIre2c:yyfill:parameter\fP \fB=\fP 1 \fB;\fP
|
||||
Allows to suppress parameter passing to \fBYYFILL\fP calls. If set to zero
|
||||
then no parameter is passed to \fBYYFILL\fP. If set to a non zero value then
|
||||
\fBYYFILL\fP usage will be followed by the number of requested characters in
|
||||
braces.
|
||||
.TP
|
||||
\fIre2c:startlabel\fP \fB=\fP 0 \fB;\fP
|
||||
If set to a non zero integer then the start label of the next scanner blocks
|
||||
will be generated even if not used by the scanner itself. Otherwise the normal
|
||||
\fByy0\fP like start label is only being generated if needed. If set to a text
|
||||
value then a label with that text will be generated regardless of whether the
|
||||
normal start label is being used or not. This setting is being reset to \fB0\fP
|
||||
after a start label has been generated.
|
||||
.TP
|
||||
\fIre2c:labelprefix\fP \fB=\fP yy \fB;\fP
|
||||
Allows to change the prefix of numbered labels. The default is \fByy\fP and
|
||||
can be set any string that is a valid label.
|
||||
.TP
|
||||
\fIre2c:state:abort\fP \fB=\fP 0 \fB;\fP
|
||||
When not zero and switch -f is active then the \fCYYGETSTATE\fP block will
|
||||
contain a default case that aborts and a -1 case is used for initialization.
|
||||
.TP
|
||||
\fIre2c:state:nextlabel\fP \fB=\fP 0 \fB;\fP
|
||||
Used when -f is active to control whether the \fCYYGETSTATE\fP block is
|
||||
followed by a \fCyyNext:\fP label line. Instead of using \fCyyNext\fP you can
|
||||
usually also use configuration \fIstartlabel\fP to force a specific start label
|
||||
or default to \fCyy0\fP as start label. Instead of using a dedicated label it
|
||||
is often better to separate the YYGETSTATE code from the actual scanner code by
|
||||
placing a "\fC/*!getstate:re2c */\fP" comment.
|
||||
.TP
|
||||
\fIre2c:cgoto:threshold\fP \fB=\fP 9 \fB;\fP
|
||||
When -g is active this value specifies the complexity threshold that triggers
|
||||
generation of jump tables rather than using nested if's and decision bitfields.
|
||||
The threshold is compared against a calculated estimation of if-s needed where
|
||||
every used bitmap divides the threshold by 2.
|
||||
.TP
|
||||
\fIre2c:yych:conversion\fP \fB=\fP 0 \fB;\fP
|
||||
When the input uses signed characters and \fB-s\fP or \fB-b\fP switches are
|
||||
in effect re2c allows to automatically convert to the unsigned character type
|
||||
that is then necessary for its internal single character. When this setting
|
||||
is zero or an empty string the conversion is disabled. Using a non zero number
|
||||
the conversion is taken from \fBYYCTYPE\fP. If that is given by an inplace
|
||||
configuration that value is being used. Otherwise it will be \fB(YYCTYPE)\fP
|
||||
and changes to that configuration are no longer possible. When this setting is
|
||||
a string the braces must be specified. Now assuming your input is a \fBchar*\fP
|
||||
buffer and you are using above mentioned switches you can set \fBYYCTYPE\fP to
|
||||
\fBunsigned char\fP and this setting to either \fB1\fP or \fB"(unsigned char)"\fP.
|
||||
.TP
|
||||
\fIre2c:define:YYCTXMARKER\fP \fB=\fP YYCTXMARKER \fB;\fP
|
||||
Allows to overwrite the define YYCTXMARKER and thus avoiding it by setting the
|
||||
value to the actual code needed.
|
||||
.TP
|
||||
\fIre2c:define:YYCTYPE\fP \fB=\fP YYCTYPE \fB;\fP
|
||||
Allows to overwrite the define YYCTYPE and thus avoiding it by setting the
|
||||
value to the actual code needed.
|
||||
.TP
|
||||
\fIre2c:define:YYCURSOR\fP \fB=\fP YYCURSOR \fB;\fP
|
||||
Allows to overwrite the define YYCURSOR and thus avoiding it by setting the
|
||||
value to the actual code needed.
|
||||
.TP
|
||||
\fIre2c:define:YYDEBUG\fP \fB=\fP YYDEBUG \fB;\fP
|
||||
Allows to overwrite the define YYDEBUG and thus avoiding it by setting the
|
||||
value to the actual code needed.
|
||||
.TP
|
||||
\fIre2c:define:YYFILL\fP \fB=\fP YYFILL \fB;\fP
|
||||
Allows to overwrite the define YYFILL and thus avoiding it by setting the
|
||||
value to the actual code needed.
|
||||
.TP
|
||||
\fIre2c:define:YYGETSTATE\fP \fB=\fP YYGETSTATE \fB;\fP
|
||||
Allows to overwrite the define YYGETSTATE and thus avoiding it by setting the
|
||||
value to the actual code needed.
|
||||
.TP
|
||||
\fIre2c:define:YYLIMIT\fP \fB=\fP YYLIMIT \fB;\fP
|
||||
Allows to overwrite the define YYLIMIT and thus avoiding it by setting the
|
||||
value to the actual code needed.
|
||||
.TP
|
||||
\fIre2c:define:YYMARKER\fP \fB=\fP YYMARKER \fB;\fP
|
||||
Allows to overwrite the define YYMARKER and thus avoiding it by setting the
|
||||
value to the actual code needed.
|
||||
.TP
|
||||
\fIre2c:define:YYSETSTATE\fP \fB=\fP YYSETSTATE \fB;\fP
|
||||
Allows to overwrite the define YYSETSTATE and thus avoiding it by setting the
|
||||
value to the actual code needed.
|
||||
.TP
|
||||
\fIre2c:label:yyFillLabel\fP \fB=\fP yyFillLabel \fB;\fP
|
||||
Allows to overwrite the name of the label yyFillLabel.
|
||||
.TP
|
||||
\fIre2c:label:yyNext\fP \fB=\fP yyNext \fB;\fP
|
||||
Allows to overwrite the name of the label yyNext.
|
||||
.TP
|
||||
\fIre2c:variable:yyaccept\fP \fB=\fP yyaccept \fB;\fP
|
||||
Allows to overwrite the name of the variable yyaccept.
|
||||
.TP
|
||||
\fIre2c:variable:yybm\fP \fB=\fP yybm \fB;\fP
|
||||
Allows to overwrite the name of the variable yybm.
|
||||
.TP
|
||||
\fIre2c:variable:yych\fP \fB=\fP yych \fB;\fP
|
||||
Allows to overwrite the name of the variable yych.
|
||||
.TP
|
||||
\fIre2c:variable:yytarget\fP \fB=\fP yytarget \fB;\fP
|
||||
Allows to overwrite the name of the variable yytarget.
|
||||
|
||||
.SH "UNDERSTANDING RE2C"
|
||||
.LP
|
||||
The subdirectory lessons of the \*(re distribution contains a few step by step
|
||||
lessons to get you started with \*(re. All examples in the lessons subdirectory
|
||||
can be compiled and actually work.
|
||||
|
||||
.SH FEATURES
|
||||
.LP
|
||||
\*(re does not provide a default action:
|
||||
the generated code assumes that the input
|
||||
will consist of a sequence of tokens.
|
||||
Typically this can be dealt with by adding a rule such as the one for
|
||||
unexpected characters in the example above.
|
||||
.LP
|
||||
The user must arrange for a sentinel token to appear at the end of input
|
||||
(and provide a rule for matching it):
|
||||
\*(re does not provide an \fC<<EOF>>\fP expression.
|
||||
If the source is from a null-byte terminated string, a
|
||||
rule matching a null character will suffice. If the source is from a
|
||||
file then you could pad the input with a newline (or some other character that
|
||||
cannot appear within another token); upon recognizing such a character check
|
||||
to see if it is the sentinel and act accordingly. And you can also use YYFILL(n)
|
||||
to end the scanner in case not enough characters are available which is nothing
|
||||
else then e detection of end of data/file.
|
||||
.LP
|
||||
\*(re does not provide start conditions: use a separate scanner
|
||||
specification for each start condition (as illustrated in the above example).
|
||||
|
||||
.SH BUGS
|
||||
.LP
|
||||
Difference only works for character sets.
|
||||
.LP
|
||||
The \*(re internal algorithms need documentation.
|
||||
|
||||
.SH "SEE ALSO"
|
||||
.LP
|
||||
flex(1), lex(1).
|
||||
.P
|
||||
More information on \*(re can be found here:
|
||||
.PD 0
|
||||
.P
|
||||
.B http://re2c.org/
|
||||
.PD 1
|
||||
|
||||
.SH AUTHORS
|
||||
.PD 0
|
||||
.P
|
||||
Peter Bumbulis <peter@csg.uwaterloo.ca>
|
||||
.P
|
||||
Brian Young <bayoung@acm.org>
|
||||
.P
|
||||
Dan Nuffer <nuffer@users.sourceforge.net>
|
||||
.P
|
||||
Marcus Boerger <helly@users.sourceforge.net>
|
||||
.P
|
||||
Hartmut Kaiser <hkaiser@users.sourceforge.net>
|
||||
.P
|
||||
Emmanuel Mogenet <mgix@mgix.com> added storable state
|
||||
.P
|
||||
.PD 1
|
||||
|
||||
.SH VERSION INFORMATION
|
||||
This manpage describes \*(re, version 0.12.3.
|
||||
|
||||
.fi
|
|
@ -1,549 +0,0 @@
|
|||
<?xml version="1.0" encoding="Windows-1252"?>
|
||||
<VisualStudioProject
|
||||
ProjectType="Visual C++"
|
||||
Version="8.00"
|
||||
Name="re2c"
|
||||
ProjectGUID="{667D2EE7-C357-49E2-9BAB-0A4A45F0F76E}"
|
||||
RootNamespace="re2c"
|
||||
Keyword="Win32Proj"
|
||||
>
|
||||
<Platforms>
|
||||
<Platform
|
||||
Name="Win32"
|
||||
/>
|
||||
<Platform
|
||||
Name="x64"
|
||||
/>
|
||||
</Platforms>
|
||||
<ToolFiles>
|
||||
</ToolFiles>
|
||||
<Configurations>
|
||||
<Configuration
|
||||
Name="Debug|Win32"
|
||||
OutputDirectory="."
|
||||
IntermediateDirectory="Build"
|
||||
ConfigurationType="1"
|
||||
InheritedPropertySheets="$(VCInstallDir)VCProjectDefaults\UpgradeFromVC71.vsprops"
|
||||
CharacterSet="2"
|
||||
>
|
||||
<Tool
|
||||
Name="VCPreBuildEventTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCCustomBuildTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCXMLDataGeneratorTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCWebServiceProxyGeneratorTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCMIDLTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCCLCompilerTool"
|
||||
Optimization="1"
|
||||
PreprocessorDefinitions="WIN32;NDEBUG;_CONSOLE"
|
||||
StringPooling="true"
|
||||
ExceptionHandling="1"
|
||||
RuntimeLibrary="2"
|
||||
UsePrecompiledHeader="0"
|
||||
WarningLevel="3"
|
||||
Detect64BitPortabilityProblems="false"
|
||||
DebugInformationFormat="0"
|
||||
CallingConvention="0"
|
||||
DisableSpecificWarnings="4996"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCManagedResourceCompilerTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCResourceCompilerTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCPreLinkEventTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCLinkerTool"
|
||||
OutputFile="$(OutDir)/re2c.exe"
|
||||
LinkIncremental="1"
|
||||
GenerateDebugInformation="false"
|
||||
SubSystem="1"
|
||||
OptimizeReferences="2"
|
||||
EnableCOMDATFolding="2"
|
||||
OptimizeForWindows98="1"
|
||||
TargetMachine="1"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCALinkTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCManifestTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCXDCMakeTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCBscMakeTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCFxCopTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCAppVerifierTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCWebDeploymentTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCPostBuildEventTool"
|
||||
/>
|
||||
</Configuration>
|
||||
<Configuration
|
||||
Name="Debug|x64"
|
||||
OutputDirectory="$(PlatformName)\$(ConfigurationName)"
|
||||
IntermediateDirectory="$(PlatformName)\$(ConfigurationName)"
|
||||
ConfigurationType="1"
|
||||
InheritedPropertySheets="$(VCInstallDir)VCProjectDefaults\UpgradeFromVC71.vsprops"
|
||||
CharacterSet="2"
|
||||
>
|
||||
<Tool
|
||||
Name="VCPreBuildEventTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCCustomBuildTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCXMLDataGeneratorTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCWebServiceProxyGeneratorTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCMIDLTool"
|
||||
TargetEnvironment="3"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCCLCompilerTool"
|
||||
Optimization="1"
|
||||
PreprocessorDefinitions="WIN32;NDEBUG;_CONSOLE"
|
||||
StringPooling="true"
|
||||
ExceptionHandling="1"
|
||||
RuntimeLibrary="2"
|
||||
UsePrecompiledHeader="0"
|
||||
WarningLevel="3"
|
||||
Detect64BitPortabilityProblems="false"
|
||||
DebugInformationFormat="0"
|
||||
CallingConvention="0"
|
||||
DisableSpecificWarnings="4996;4244"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCManagedResourceCompilerTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCResourceCompilerTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCPreLinkEventTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCLinkerTool"
|
||||
OutputFile="$(OutDir)/re2c.exe"
|
||||
LinkIncremental="1"
|
||||
GenerateDebugInformation="false"
|
||||
SubSystem="1"
|
||||
OptimizeReferences="2"
|
||||
EnableCOMDATFolding="2"
|
||||
OptimizeForWindows98="1"
|
||||
TargetMachine="17"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCALinkTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCManifestTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCXDCMakeTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCBscMakeTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCFxCopTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCAppVerifierTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCWebDeploymentTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCPostBuildEventTool"
|
||||
/>
|
||||
</Configuration>
|
||||
<Configuration
|
||||
Name="Release|Win32"
|
||||
OutputDirectory="."
|
||||
IntermediateDirectory="Build"
|
||||
ConfigurationType="1"
|
||||
InheritedPropertySheets="$(VCInstallDir)VCProjectDefaults\UpgradeFromVC71.vsprops"
|
||||
CharacterSet="2"
|
||||
>
|
||||
<Tool
|
||||
Name="VCPreBuildEventTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCCustomBuildTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCXMLDataGeneratorTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCWebServiceProxyGeneratorTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCMIDLTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCCLCompilerTool"
|
||||
Optimization="1"
|
||||
PreprocessorDefinitions="WIN32;NDEBUG;_CONSOLE"
|
||||
StringPooling="true"
|
||||
ExceptionHandling="1"
|
||||
RuntimeLibrary="2"
|
||||
UsePrecompiledHeader="0"
|
||||
WarningLevel="3"
|
||||
Detect64BitPortabilityProblems="false"
|
||||
DebugInformationFormat="0"
|
||||
DisableSpecificWarnings="4996"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCManagedResourceCompilerTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCResourceCompilerTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCPreLinkEventTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCLinkerTool"
|
||||
OutputFile="$(OutDir)/re2c.exe"
|
||||
LinkIncremental="1"
|
||||
GenerateDebugInformation="false"
|
||||
SubSystem="1"
|
||||
OptimizeReferences="2"
|
||||
EnableCOMDATFolding="2"
|
||||
OptimizeForWindows98="1"
|
||||
TargetMachine="1"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCALinkTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCManifestTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCXDCMakeTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCBscMakeTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCFxCopTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCAppVerifierTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCWebDeploymentTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCPostBuildEventTool"
|
||||
/>
|
||||
</Configuration>
|
||||
<Configuration
|
||||
Name="Release|x64"
|
||||
OutputDirectory="$(PlatformName)\$(ConfigurationName)"
|
||||
IntermediateDirectory="$(PlatformName)\$(ConfigurationName)"
|
||||
ConfigurationType="1"
|
||||
InheritedPropertySheets="$(VCInstallDir)VCProjectDefaults\UpgradeFromVC71.vsprops"
|
||||
CharacterSet="2"
|
||||
>
|
||||
<Tool
|
||||
Name="VCPreBuildEventTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCCustomBuildTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCXMLDataGeneratorTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCWebServiceProxyGeneratorTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCMIDLTool"
|
||||
TargetEnvironment="3"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCCLCompilerTool"
|
||||
Optimization="1"
|
||||
PreprocessorDefinitions="WIN32;NDEBUG;_CONSOLE"
|
||||
StringPooling="true"
|
||||
ExceptionHandling="1"
|
||||
RuntimeLibrary="2"
|
||||
UsePrecompiledHeader="0"
|
||||
WarningLevel="3"
|
||||
Detect64BitPortabilityProblems="false"
|
||||
DebugInformationFormat="0"
|
||||
CallingConvention="0"
|
||||
DisableSpecificWarnings="4996;4244"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCManagedResourceCompilerTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCResourceCompilerTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCPreLinkEventTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCLinkerTool"
|
||||
OutputFile="$(OutDir)/re2c.exe"
|
||||
LinkIncremental="1"
|
||||
GenerateDebugInformation="false"
|
||||
SubSystem="1"
|
||||
OptimizeReferences="2"
|
||||
EnableCOMDATFolding="2"
|
||||
OptimizeForWindows98="1"
|
||||
TargetMachine="17"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCALinkTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCManifestTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCXDCMakeTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCBscMakeTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCFxCopTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCAppVerifierTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCWebDeploymentTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCPostBuildEventTool"
|
||||
/>
|
||||
</Configuration>
|
||||
</Configurations>
|
||||
<References>
|
||||
</References>
|
||||
<Files>
|
||||
<Filter
|
||||
Name="Source Files"
|
||||
Filter="cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx"
|
||||
UniqueIdentifier="{4FC737F1-C7A5-4376-A066-2A32D752A2FF}"
|
||||
>
|
||||
<File
|
||||
RelativePath=".\actions.cc"
|
||||
>
|
||||
</File>
|
||||
<File
|
||||
RelativePath=".\code.cc"
|
||||
>
|
||||
</File>
|
||||
<File
|
||||
RelativePath=".\dfa.cc"
|
||||
>
|
||||
</File>
|
||||
<File
|
||||
RelativePath=".\main.cc"
|
||||
>
|
||||
</File>
|
||||
<File
|
||||
RelativePath=".\mbo_getopt.cc"
|
||||
>
|
||||
</File>
|
||||
<File
|
||||
RelativePath=".\parser.cc"
|
||||
>
|
||||
</File>
|
||||
<File
|
||||
RelativePath=".\parser.y"
|
||||
>
|
||||
<FileConfiguration
|
||||
Name="Debug|Win32"
|
||||
ExcludedFromBuild="true"
|
||||
>
|
||||
<Tool
|
||||
Name="VCCLCompilerTool"
|
||||
ObjectFile="$(IntDir)\$(InputName)1.obj"
|
||||
XMLDocumentationFileName="$(IntDir)\$(InputName)1.xdc"
|
||||
/>
|
||||
</FileConfiguration>
|
||||
<FileConfiguration
|
||||
Name="Debug|x64"
|
||||
ExcludedFromBuild="true"
|
||||
>
|
||||
<Tool
|
||||
Name="VCCLCompilerTool"
|
||||
ObjectFile="$(IntDir)\$(InputName)1.obj"
|
||||
XMLDocumentationFileName="$(IntDir)\$(InputName)1.xdc"
|
||||
/>
|
||||
</FileConfiguration>
|
||||
<FileConfiguration
|
||||
Name="Release|Win32"
|
||||
ExcludedFromBuild="true"
|
||||
>
|
||||
<Tool
|
||||
Name="VCCLCompilerTool"
|
||||
ObjectFile="$(IntDir)\$(InputName)1.obj"
|
||||
XMLDocumentationFileName="$(IntDir)\$(InputName)1.xdc"
|
||||
/>
|
||||
</FileConfiguration>
|
||||
<FileConfiguration
|
||||
Name="Release|x64"
|
||||
ExcludedFromBuild="true"
|
||||
>
|
||||
<Tool
|
||||
Name="VCCLCompilerTool"
|
||||
ObjectFile="$(IntDir)\$(InputName)1.obj"
|
||||
XMLDocumentationFileName="$(IntDir)\$(InputName)1.xdc"
|
||||
/>
|
||||
</FileConfiguration>
|
||||
</File>
|
||||
<File
|
||||
RelativePath=".\scanner.cc"
|
||||
>
|
||||
</File>
|
||||
<File
|
||||
RelativePath=".\scanner.re"
|
||||
>
|
||||
<FileConfiguration
|
||||
Name="Debug|Win32"
|
||||
ExcludedFromBuild="true"
|
||||
>
|
||||
<Tool
|
||||
Name="VCCustomBuildTool"
|
||||
/>
|
||||
</FileConfiguration>
|
||||
<FileConfiguration
|
||||
Name="Debug|x64"
|
||||
ExcludedFromBuild="true"
|
||||
>
|
||||
<Tool
|
||||
Name="VCCustomBuildTool"
|
||||
/>
|
||||
</FileConfiguration>
|
||||
<FileConfiguration
|
||||
Name="Release|Win32"
|
||||
ExcludedFromBuild="true"
|
||||
>
|
||||
<Tool
|
||||
Name="VCCustomBuildTool"
|
||||
/>
|
||||
</FileConfiguration>
|
||||
<FileConfiguration
|
||||
Name="Release|x64"
|
||||
ExcludedFromBuild="true"
|
||||
>
|
||||
<Tool
|
||||
Name="VCCustomBuildTool"
|
||||
/>
|
||||
</FileConfiguration>
|
||||
</File>
|
||||
<File
|
||||
RelativePath=".\substr.cc"
|
||||
>
|
||||
</File>
|
||||
<File
|
||||
RelativePath=".\translate.cc"
|
||||
>
|
||||
</File>
|
||||
</Filter>
|
||||
<Filter
|
||||
Name="Header Files"
|
||||
Filter="h;hpp;hxx;hm;inl;inc;xsd"
|
||||
UniqueIdentifier="{93995380-89BD-4b04-88EB-625FBE52EBFB}"
|
||||
>
|
||||
<File
|
||||
RelativePath=".\basics.h"
|
||||
>
|
||||
</File>
|
||||
<File
|
||||
RelativePath=".\code.h"
|
||||
>
|
||||
</File>
|
||||
<File
|
||||
RelativePath=".\code_names.h"
|
||||
>
|
||||
</File>
|
||||
<File
|
||||
RelativePath=".\config_w32.h"
|
||||
>
|
||||
</File>
|
||||
<File
|
||||
RelativePath=".\dfa.h"
|
||||
>
|
||||
</File>
|
||||
<File
|
||||
RelativePath=".\globals.h"
|
||||
>
|
||||
</File>
|
||||
<File
|
||||
RelativePath=".\ins.h"
|
||||
>
|
||||
</File>
|
||||
<File
|
||||
RelativePath=".\mbo_getopt.h"
|
||||
>
|
||||
</File>
|
||||
<File
|
||||
RelativePath=".\parser.h"
|
||||
>
|
||||
</File>
|
||||
<File
|
||||
RelativePath=".\re.h"
|
||||
>
|
||||
</File>
|
||||
<File
|
||||
RelativePath=".\scanner.h"
|
||||
>
|
||||
</File>
|
||||
<File
|
||||
RelativePath=".\stream_lc.h"
|
||||
>
|
||||
</File>
|
||||
<File
|
||||
RelativePath=".\substr.h"
|
||||
>
|
||||
</File>
|
||||
<File
|
||||
RelativePath=".\token.h"
|
||||
>
|
||||
</File>
|
||||
<File
|
||||
RelativePath=".\y.tab.h"
|
||||
>
|
||||
</File>
|
||||
</Filter>
|
||||
<File
|
||||
RelativePath=".\CMakeLists.txt"
|
||||
>
|
||||
</File>
|
||||
</Files>
|
||||
<Globals>
|
||||
</Globals>
|
||||
</VisualStudioProject>
|
File diff suppressed because it is too large
Load diff
|
@ -1,76 +0,0 @@
|
|||
/* $Id: scanner.h,v 1.17 2006/02/25 12:57:50 helly Exp $ */
|
||||
#ifndef _scanner_h
|
||||
#define _scanner_h
|
||||
|
||||
#include <iosfwd>
|
||||
#include <string>
|
||||
#include "token.h"
|
||||
#include "re.h"
|
||||
#include "globals.h"
|
||||
|
||||
namespace re2c
|
||||
{
|
||||
|
||||
class Scanner:
|
||||
public line_number
|
||||
{
|
||||
private:
|
||||
std::istream& in;
|
||||
std::ostream& out;
|
||||
char *bot, *tok, *ptr, *cur, *pos, *lim, *top, *eof;
|
||||
uint tchar, tline, cline, iscfg;
|
||||
const char *filename;
|
||||
|
||||
private:
|
||||
char *fill(char*);
|
||||
Scanner(const Scanner&); //unimplemented
|
||||
Scanner& operator=(const Scanner&); //unimplemented
|
||||
|
||||
public:
|
||||
Scanner(const char*, std::istream&, std::ostream&);
|
||||
~Scanner();
|
||||
|
||||
int echo();
|
||||
int scan();
|
||||
|
||||
void fatal(const char*) const;
|
||||
void fatal(uint, const char*) const;
|
||||
|
||||
void config(const Str&, int);
|
||||
void config(const Str&, const Str&);
|
||||
|
||||
SubStr token() const;
|
||||
virtual uint get_line() const;
|
||||
uint xlat(uint c) const;
|
||||
|
||||
uint unescape(SubStr &s) const;
|
||||
std::string& unescape(SubStr& str_in, std::string& str_out) const;
|
||||
|
||||
Range * getRange(SubStr &s) const;
|
||||
RegExp * matchChar(uint c) const;
|
||||
RegExp * strToName(SubStr s) const;
|
||||
RegExp * strToRE(SubStr s) const;
|
||||
RegExp * strToCaseInsensitiveRE(SubStr s) const;
|
||||
RegExp * ranToRE(SubStr s) const;
|
||||
RegExp * invToRE(SubStr s) const;
|
||||
RegExp * mkDot() const;
|
||||
};
|
||||
|
||||
inline void Scanner::fatal(const char *msg) const
|
||||
{
|
||||
fatal(0, msg);
|
||||
}
|
||||
|
||||
inline SubStr Scanner::token() const
|
||||
{
|
||||
return SubStr(tok, cur - tok);
|
||||
}
|
||||
|
||||
inline uint Scanner::xlat(uint c) const
|
||||
{
|
||||
return re2c::wFlag ? c : re2c::xlat[c & 0xFF];
|
||||
}
|
||||
|
||||
} // end namespace re2c
|
||||
|
||||
#endif
|
|
@ -1,381 +0,0 @@
|
|||
/* $Id: scanner.re 663 2007-04-01 11:22:15Z helly $ */
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <iostream>
|
||||
#include <sstream>
|
||||
#include "scanner.h"
|
||||
#include "parser.h"
|
||||
#include "y.tab.h"
|
||||
#include "globals.h"
|
||||
#include "dfa.h"
|
||||
|
||||
extern YYSTYPE yylval;
|
||||
|
||||
#ifndef MAX
|
||||
#define MAX(a,b) (((a)>(b))?(a):(b))
|
||||
#endif
|
||||
|
||||
#define BSIZE 8192
|
||||
|
||||
#define YYCTYPE unsigned char
|
||||
#define YYCURSOR cursor
|
||||
#define YYLIMIT lim
|
||||
#define YYMARKER ptr
|
||||
#define YYFILL(n) {cursor = fill(cursor);}
|
||||
|
||||
#define RETURN(i) {cur = cursor; return i;}
|
||||
|
||||
namespace re2c
|
||||
{
|
||||
|
||||
Scanner::Scanner(const char *fn, std::istream& i, std::ostream& o)
|
||||
: in(i)
|
||||
, out(o)
|
||||
, bot(NULL), tok(NULL), ptr(NULL), cur(NULL), pos(NULL), lim(NULL)
|
||||
, top(NULL), eof(NULL), tchar(0), tline(0), cline(1), iscfg(0), filename(fn)
|
||||
{
|
||||
;
|
||||
}
|
||||
|
||||
char *Scanner::fill(char *cursor)
|
||||
{
|
||||
if(!eof)
|
||||
{
|
||||
uint cnt = tok - bot;
|
||||
if(cnt)
|
||||
{
|
||||
memcpy(bot, tok, lim - tok);
|
||||
tok = bot;
|
||||
ptr -= cnt;
|
||||
cursor -= cnt;
|
||||
pos -= cnt;
|
||||
lim -= cnt;
|
||||
}
|
||||
if((top - lim) < BSIZE)
|
||||
{
|
||||
char *buf = new char[(lim - bot) + BSIZE];
|
||||
memcpy(buf, tok, lim - tok);
|
||||
tok = buf;
|
||||
ptr = &buf[ptr - bot];
|
||||
cursor = &buf[cursor - bot];
|
||||
pos = &buf[pos - bot];
|
||||
lim = &buf[lim - bot];
|
||||
top = &lim[BSIZE];
|
||||
delete [] bot;
|
||||
bot = buf;
|
||||
}
|
||||
in.read(lim, BSIZE);
|
||||
if ((cnt = in.gcount()) != BSIZE )
|
||||
{
|
||||
eof = &lim[cnt]; *eof++ = '\0';
|
||||
}
|
||||
lim += cnt;
|
||||
}
|
||||
return cursor;
|
||||
}
|
||||
|
||||
/*!re2c
|
||||
zero = "\000";
|
||||
any = [\000-\377];
|
||||
dot = any \ [\n];
|
||||
esc = dot \ [\\];
|
||||
istring = "[" "^" ((esc \ [\]]) | "\\" dot)* "]" ;
|
||||
cstring = "[" ((esc \ [\]]) | "\\" dot)* "]" ;
|
||||
dstring = "\"" ((esc \ ["] ) | "\\" dot)* "\"";
|
||||
sstring = "'" ((esc \ ['] ) | "\\" dot)* "'" ;
|
||||
letter = [a-zA-Z];
|
||||
digit = [0-9];
|
||||
number = "0" | ("-"? [1-9] digit*);
|
||||
name = (letter|"_") (letter|digit|"_")*;
|
||||
cname = ":" name;
|
||||
space = [ \t];
|
||||
eol = ("\r\n" | "\n");
|
||||
config = "re2c" cname+;
|
||||
value = [^\r\n; \t]* | dstring | sstring;
|
||||
*/
|
||||
|
||||
int Scanner::echo()
|
||||
{
|
||||
char *cursor = cur;
|
||||
bool ignore_eoc = false;
|
||||
int ignore_cnt = 0;
|
||||
|
||||
if (eof && cursor == eof) // Catch EOF
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
tok = cursor;
|
||||
echo:
|
||||
/*!re2c
|
||||
"/*!re2c" {
|
||||
if (bUsedYYMaxFill && bSinglePass) {
|
||||
fatal("found scanner block after YYMAXFILL declaration");
|
||||
}
|
||||
out.write((const char*)(tok), (const char*)(&cursor[-7]) - (const char*)(tok));
|
||||
tok = cursor;
|
||||
RETURN(1);
|
||||
}
|
||||
"/*!max:re2c" {
|
||||
if (bUsedYYMaxFill) {
|
||||
fatal("cannot generate YYMAXFILL twice");
|
||||
}
|
||||
out << "#define YYMAXFILL " << maxFill << std::endl;
|
||||
tok = pos = cursor;
|
||||
ignore_eoc = true;
|
||||
bUsedYYMaxFill = true;
|
||||
goto echo;
|
||||
}
|
||||
"/*!getstate:re2c" {
|
||||
tok = pos = cursor;
|
||||
genGetState(out, topIndent, 0);
|
||||
ignore_eoc = true;
|
||||
goto echo;
|
||||
}
|
||||
"/*!ignore:re2c" {
|
||||
tok = pos = cursor;
|
||||
ignore_eoc = true;
|
||||
goto echo;
|
||||
}
|
||||
"*" "/" "\r"? "\n" {
|
||||
cline++;
|
||||
if (ignore_eoc) {
|
||||
if (ignore_cnt) {
|
||||
out << sourceFileInfo;
|
||||
}
|
||||
ignore_eoc = false;
|
||||
ignore_cnt = 0;
|
||||
} else {
|
||||
out.write((const char*)(tok), (const char*)(cursor) - (const char*)(tok));
|
||||
}
|
||||
tok = pos = cursor;
|
||||
goto echo;
|
||||
}
|
||||
"*" "/" {
|
||||
if (ignore_eoc) {
|
||||
if (ignore_cnt) {
|
||||
out << "\n" << sourceFileInfo;
|
||||
}
|
||||
ignore_eoc = false;
|
||||
ignore_cnt = 0;
|
||||
} else {
|
||||
out.write((const char*)(tok), (const char*)(cursor) - (const char*)(tok));
|
||||
}
|
||||
tok = pos = cursor;
|
||||
goto echo;
|
||||
}
|
||||
"\n" {
|
||||
if (ignore_eoc) {
|
||||
ignore_cnt++;
|
||||
} else {
|
||||
out.write((const char*)(tok), (const char*)(cursor) - (const char*)(tok));
|
||||
}
|
||||
tok = pos = cursor; cline++;
|
||||
goto echo;
|
||||
}
|
||||
zero {
|
||||
if (!ignore_eoc) {
|
||||
out.write((const char*)(tok), (const char*)(cursor) - (const char*)(tok) - 1); // -1 so we don't write out the \0
|
||||
}
|
||||
if(cursor == eof) {
|
||||
RETURN(0);
|
||||
}
|
||||
}
|
||||
any {
|
||||
goto echo;
|
||||
}
|
||||
*/
|
||||
}
|
||||
|
||||
|
||||
int Scanner::scan()
|
||||
{
|
||||
char *cursor = cur;
|
||||
uint depth;
|
||||
|
||||
scan:
|
||||
tchar = cursor - pos;
|
||||
tline = cline;
|
||||
tok = cursor;
|
||||
if (iscfg == 1)
|
||||
{
|
||||
goto config;
|
||||
}
|
||||
else if (iscfg == 2)
|
||||
{
|
||||
goto value;
|
||||
}
|
||||
/*!re2c
|
||||
"{" { depth = 1;
|
||||
goto code;
|
||||
}
|
||||
"/*" { depth = 1;
|
||||
goto comment; }
|
||||
|
||||
"*/" { tok = cursor;
|
||||
RETURN(0); }
|
||||
|
||||
dstring { cur = cursor;
|
||||
yylval.regexp = strToRE(token());
|
||||
return STRING; }
|
||||
|
||||
sstring { cur = cursor;
|
||||
yylval.regexp = strToCaseInsensitiveRE(token());
|
||||
return STRING; }
|
||||
|
||||
"\"" { fatal("unterminated string constant (missing \")"); }
|
||||
"'" { fatal("unterminated string constant (missing ')"); }
|
||||
|
||||
istring { cur = cursor;
|
||||
yylval.regexp = invToRE(token());
|
||||
return RANGE; }
|
||||
|
||||
cstring { cur = cursor;
|
||||
yylval.regexp = ranToRE(token());
|
||||
return RANGE; }
|
||||
|
||||
"[" { fatal("unterminated range (missing ])"); }
|
||||
|
||||
[()|=;/\\] { RETURN(*tok); }
|
||||
|
||||
[*+?] { yylval.op = *tok;
|
||||
RETURN(CLOSE); }
|
||||
|
||||
"{0,}" { yylval.op = '*';
|
||||
RETURN(CLOSE); }
|
||||
|
||||
"{" [0-9]+ "}" { yylval.extop.minsize = atoi((char *)tok+1);
|
||||
yylval.extop.maxsize = atoi((char *)tok+1);
|
||||
RETURN(CLOSESIZE); }
|
||||
|
||||
"{" [0-9]+ "," [0-9]+ "}" { yylval.extop.minsize = atoi((char *)tok+1);
|
||||
yylval.extop.maxsize = MAX(yylval.extop.minsize,atoi(strchr((char *)tok, ',')+1));
|
||||
RETURN(CLOSESIZE); }
|
||||
|
||||
"{" [0-9]+ ",}" { yylval.extop.minsize = atoi((char *)tok+1);
|
||||
yylval.extop.maxsize = -1;
|
||||
RETURN(CLOSESIZE); }
|
||||
|
||||
"{" [0-9]* "," { fatal("illegal closure form, use '{n}', '{n,}', '{n,m}' where n and m are numbers"); }
|
||||
|
||||
config { cur = cursor;
|
||||
tok+= 5; /* skip "re2c:" */
|
||||
iscfg = 1;
|
||||
yylval.str = new Str(token());
|
||||
return CONFIG;
|
||||
}
|
||||
|
||||
name { cur = cursor;
|
||||
yylval.symbol = Symbol::find(token());
|
||||
return ID; }
|
||||
|
||||
"." { cur = cursor;
|
||||
yylval.regexp = mkDot();
|
||||
return RANGE;
|
||||
}
|
||||
|
||||
space+ { goto scan; }
|
||||
|
||||
eol { if(cursor == eof) RETURN(0);
|
||||
pos = cursor; cline++;
|
||||
goto scan;
|
||||
}
|
||||
|
||||
any { std::ostringstream msg;
|
||||
msg << "unexpected character: ";
|
||||
prtChOrHex(msg, *tok);
|
||||
fatal(msg.str().c_str());
|
||||
goto scan;
|
||||
}
|
||||
*/
|
||||
|
||||
code:
|
||||
/*!re2c
|
||||
"}" { if(--depth == 0){
|
||||
cur = cursor;
|
||||
yylval.token = new Token(token(), tline);
|
||||
return CODE;
|
||||
}
|
||||
goto code; }
|
||||
"{" { ++depth;
|
||||
goto code; }
|
||||
"\n" { if(cursor == eof) fatal("missing '}'");
|
||||
pos = cursor; cline++;
|
||||
goto code;
|
||||
}
|
||||
zero { if(cursor == eof) {
|
||||
if (depth) fatal("missing '}'");
|
||||
RETURN(0);
|
||||
}
|
||||
goto code;
|
||||
}
|
||||
dstring | sstring | any { goto code; }
|
||||
*/
|
||||
|
||||
comment:
|
||||
/*!re2c
|
||||
"*/" { if(--depth == 0)
|
||||
goto scan;
|
||||
else
|
||||
goto comment; }
|
||||
"/*" { ++depth;
|
||||
fatal("ambiguous /* found");
|
||||
goto comment; }
|
||||
"\n" { if(cursor == eof) RETURN(0);
|
||||
tok = pos = cursor; cline++;
|
||||
goto comment;
|
||||
}
|
||||
any { if(cursor == eof) RETURN(0);
|
||||
goto comment; }
|
||||
*/
|
||||
|
||||
config:
|
||||
/*!re2c
|
||||
space+ { goto config; }
|
||||
"=" space* { iscfg = 2;
|
||||
cur = cursor;
|
||||
RETURN('=');
|
||||
}
|
||||
any { fatal("missing '='"); }
|
||||
*/
|
||||
|
||||
value:
|
||||
/*!re2c
|
||||
number { cur = cursor;
|
||||
yylval.number = atoi(token().to_string().c_str());
|
||||
iscfg = 0;
|
||||
return NUMBER;
|
||||
}
|
||||
value { cur = cursor;
|
||||
yylval.str = new Str(token());
|
||||
iscfg = 0;
|
||||
return VALUE;
|
||||
}
|
||||
*/
|
||||
}
|
||||
|
||||
void Scanner::fatal(uint ofs, const char *msg) const
|
||||
{
|
||||
out.flush();
|
||||
#ifdef _MSC_VER
|
||||
std::cerr << filename << "(" << tline << "): error : "
|
||||
<< "column " << (tchar + ofs + 1) << ": "
|
||||
<< msg << std::endl;
|
||||
#else
|
||||
std::cerr << "re2c: error: "
|
||||
<< "line " << tline << ", column " << (tchar + ofs + 1) << ": "
|
||||
<< msg << std::endl;
|
||||
#endif
|
||||
exit(1);
|
||||
}
|
||||
|
||||
Scanner::~Scanner()
|
||||
{
|
||||
if (bot)
|
||||
{
|
||||
delete [] bot;
|
||||
}
|
||||
}
|
||||
|
||||
} // end namespace re2c
|
||||
|
168
tools/re2c/src/codegen/bitmap.cc
Normal file
168
tools/re2c/src/codegen/bitmap.cc
Normal file
|
@ -0,0 +1,168 @@
|
|||
#include <algorithm> // min
|
||||
#include <string.h> // memset
|
||||
|
||||
#include "src/codegen/bitmap.h"
|
||||
#include "src/codegen/go.h"
|
||||
#include "src/codegen/output.h"
|
||||
#include "src/conf/opt.h"
|
||||
#include "src/globals.h"
|
||||
|
||||
namespace re2c
|
||||
{
|
||||
|
||||
BitMap *BitMap::first = NULL;
|
||||
|
||||
BitMap::BitMap(const Go *g, const State *x)
|
||||
: go(g)
|
||||
, on(x)
|
||||
, next(first)
|
||||
, i(0)
|
||||
, m(0)
|
||||
{
|
||||
first = this;
|
||||
}
|
||||
|
||||
BitMap::~BitMap()
|
||||
{
|
||||
delete next;
|
||||
}
|
||||
|
||||
const BitMap *BitMap::find(const Go *g, const State *x)
|
||||
{
|
||||
for (const BitMap *b = first; b; b = b->next)
|
||||
{
|
||||
if (matches(b->go->span, b->go->nSpans, b->on, g->span, g->nSpans, x))
|
||||
{
|
||||
return b;
|
||||
}
|
||||
}
|
||||
|
||||
return new BitMap(g, x);
|
||||
}
|
||||
|
||||
const BitMap *BitMap::find(const State *x)
|
||||
{
|
||||
for (const BitMap *b = first; b; b = b->next)
|
||||
{
|
||||
if (b->on == x)
|
||||
{
|
||||
return b;
|
||||
}
|
||||
}
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static void doGen(const Go *g, const State *s, uint32_t *bm, uint32_t f, uint32_t m)
|
||||
{
|
||||
Span *b = g->span, *e = &b[g->nSpans];
|
||||
uint32_t lb = 0;
|
||||
|
||||
for (; b < e; ++b)
|
||||
{
|
||||
if (b->to == s)
|
||||
{
|
||||
for (; lb < b->ub && lb < 256; ++lb)
|
||||
{
|
||||
bm[lb-f] |= m;
|
||||
}
|
||||
}
|
||||
|
||||
lb = b->ub;
|
||||
}
|
||||
}
|
||||
|
||||
void BitMap::gen(OutputFile & o, uint32_t ind, uint32_t lb, uint32_t ub)
|
||||
{
|
||||
if (first && bUsedYYBitmap)
|
||||
{
|
||||
o.wind(ind).ws("static const unsigned char ").wstring(opts->yybm).ws("[] = {");
|
||||
|
||||
uint32_t c = 1, n = ub - lb;
|
||||
const BitMap *cb = first;
|
||||
|
||||
while((cb = cb->next) != NULL) {
|
||||
++c;
|
||||
}
|
||||
BitMap *b = first;
|
||||
|
||||
uint32_t *bm = new uint32_t[n];
|
||||
|
||||
for (uint32_t i = 0, t = 1; b; i += n, t += 8)
|
||||
{
|
||||
memset(bm, 0, n * sizeof(uint32_t));
|
||||
|
||||
for (uint32_t m = 0x80; b && m; m >>= 1)
|
||||
{
|
||||
b->i = i;
|
||||
b->m = m;
|
||||
doGen(b->go, b->on, bm, lb, m);
|
||||
b = const_cast<BitMap*>(b->next);
|
||||
}
|
||||
|
||||
if (c > 8)
|
||||
{
|
||||
o.ws("\n").wind(ind+1).ws("/* table ").wu32(t).ws(" .. ").wu32(std::min(c, t+7)).ws(": ").wu32(i).ws(" */");
|
||||
}
|
||||
|
||||
for (uint32_t j = 0; j < n; ++j)
|
||||
{
|
||||
if (j % 8 == 0)
|
||||
{
|
||||
o.ws("\n").wind(ind+1);
|
||||
}
|
||||
|
||||
if (opts->yybmHexTable)
|
||||
{
|
||||
o.wu32_hex(bm[j]);
|
||||
}
|
||||
else
|
||||
{
|
||||
o.wu32_width(bm[j], 3);
|
||||
}
|
||||
o.ws(", ");
|
||||
}
|
||||
}
|
||||
|
||||
o.ws("\n").wind(ind).ws("};\n");
|
||||
|
||||
delete[] bm;
|
||||
}
|
||||
}
|
||||
|
||||
// All spans in b1 that lead to s1 are pairwise equal to that in b2 leading to s2
|
||||
bool matches(const Span * b1, uint32_t n1, const State * s1, const Span * b2, uint32_t n2, const State * s2)
|
||||
{
|
||||
const Span * e1 = &b1[n1];
|
||||
uint32_t lb1 = 0;
|
||||
const Span * e2 = &b2[n2];
|
||||
uint32_t lb2 = 0;
|
||||
|
||||
for (;;)
|
||||
{
|
||||
for (; b1 < e1 && b1->to != s1; ++b1)
|
||||
{
|
||||
lb1 = b1->ub;
|
||||
}
|
||||
for (; b2 < e2 && b2->to != s2; ++b2)
|
||||
{
|
||||
lb2 = b2->ub;
|
||||
}
|
||||
if (b1 == e1)
|
||||
{
|
||||
return b2 == e2;
|
||||
}
|
||||
if (b2 == e2)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
if (lb1 != lb2 || b1->ub != b2->ub)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
++b1;
|
||||
++b2;
|
||||
}
|
||||
}
|
||||
|
||||
} // end namespace re2c
|
45
tools/re2c/src/codegen/bitmap.h
Normal file
45
tools/re2c/src/codegen/bitmap.h
Normal file
|
@ -0,0 +1,45 @@
|
|||
#ifndef _RE2C_CODEGEN_BITMAP_
|
||||
#define _RE2C_CODEGEN_BITMAP_
|
||||
|
||||
#include "src/util/c99_stdint.h"
|
||||
|
||||
#include "src/util/forbid_copy.h"
|
||||
|
||||
namespace re2c
|
||||
{
|
||||
|
||||
struct Go;
|
||||
struct Span;
|
||||
class State;
|
||||
class OutputFile;
|
||||
|
||||
class BitMap
|
||||
{
|
||||
public:
|
||||
static BitMap *first;
|
||||
|
||||
const Go *go;
|
||||
const State *on;
|
||||
const BitMap *next;
|
||||
uint32_t i;
|
||||
uint32_t m;
|
||||
|
||||
public:
|
||||
static const BitMap *find(const Go*, const State*);
|
||||
static const BitMap *find(const State*);
|
||||
static void gen(OutputFile &, uint32_t ind, uint32_t, uint32_t);
|
||||
BitMap(const Go*, const State*);
|
||||
~BitMap();
|
||||
|
||||
FORBID_COPY (BitMap);
|
||||
};
|
||||
|
||||
bool matches(const Span * b1, uint32_t n1, const State * s1, const Span * b2, uint32_t n2, const State * s2);
|
||||
|
||||
#ifdef _MSC_VER
|
||||
# pragma warning(disable: 4355) /* 'this' : used in base member initializer list */
|
||||
#endif
|
||||
|
||||
} // end namespace re2c
|
||||
|
||||
#endif // _RE2C_CODEGEN_BITMAP_
|
43
tools/re2c/src/codegen/emit.h
Normal file
43
tools/re2c/src/codegen/emit.h
Normal file
|
@ -0,0 +1,43 @@
|
|||
#ifndef _RE2C_CODEGEN_EMIT_
|
||||
#define _RE2C_CODEGEN_EMIT_
|
||||
|
||||
#include "src/codegen/output.h"
|
||||
#include "src/ir/adfa/adfa.h"
|
||||
|
||||
namespace re2c {
|
||||
|
||||
typedef std::vector<std::string> RegExpIndices;
|
||||
|
||||
void emit_action
|
||||
( const Action & action
|
||||
, OutputFile & o
|
||||
, uint32_t ind
|
||||
, bool & readCh
|
||||
, const State * const s
|
||||
, const std::string & condName
|
||||
, const Skeleton * skeleton
|
||||
, const std::set<label_t> & used_labels
|
||||
, bool save_yyaccept
|
||||
);
|
||||
|
||||
// helpers
|
||||
void genGoTo (OutputFile & o, uint32_t ind, const State * from, const State * to, bool & readCh);
|
||||
|
||||
template<typename _Ty> std::string replaceParam (std::string str, const std::string & param, const _Ty & value)
|
||||
{
|
||||
if (!param.empty ())
|
||||
{
|
||||
std::ostringstream strValue;
|
||||
strValue << value;
|
||||
std::string::size_type pos;
|
||||
while((pos = str.find(param)) != std::string::npos)
|
||||
{
|
||||
str.replace(pos, param.length(), strValue.str());
|
||||
}
|
||||
}
|
||||
return str;
|
||||
}
|
||||
|
||||
} // namespace re2c
|
||||
|
||||
#endif // _RE2C_CODEGEN_EMIT_
|
388
tools/re2c/src/codegen/emit_action.cc
Normal file
388
tools/re2c/src/codegen/emit_action.cc
Normal file
|
@ -0,0 +1,388 @@
|
|||
#include "src/util/c99_stdint.h"
|
||||
#include <stddef.h>
|
||||
#include <set>
|
||||
#include <string>
|
||||
|
||||
#include "src/codegen/emit.h"
|
||||
#include "src/codegen/input_api.h"
|
||||
#include "src/codegen/output.h"
|
||||
#include "src/conf/opt.h"
|
||||
#include "src/globals.h"
|
||||
#include "src/ir/adfa/action.h"
|
||||
#include "src/ir/adfa/adfa.h"
|
||||
#include "src/ir/regexp/regexp.h"
|
||||
#include "src/ir/regexp/regexp_rule.h"
|
||||
#include "src/ir/skeleton/skeleton.h"
|
||||
#include "src/parse/code.h"
|
||||
#include "src/parse/loc.h"
|
||||
|
||||
namespace re2c
|
||||
{
|
||||
|
||||
class label_t;
|
||||
|
||||
static void need (OutputFile & o, uint32_t ind, bool & readCh, size_t n, bool bSetMarker);
|
||||
static void emit_match (OutputFile & o, uint32_t ind, bool & readCh, const State * const s);
|
||||
static void emit_initial (OutputFile & o, uint32_t ind, bool & readCh, const State * const s, const Initial & init, const std::set<label_t> & used_labels);
|
||||
static void emit_save (OutputFile & o, uint32_t ind, bool & readCh, const State * const s, uint32_t save, bool save_yyaccept);
|
||||
static void emit_accept_binary (OutputFile & o, uint32_t ind, bool & readCh, const State * const s, const accept_t & accept, size_t l, size_t r);
|
||||
static void emit_accept (OutputFile & o, uint32_t ind, bool & readCh, const State * const s, const accept_t & accept);
|
||||
static void emit_rule (OutputFile & o, uint32_t ind, const State * const s, const RuleOp * const rule, const std::string & condName, const Skeleton * skeleton);
|
||||
static void genYYFill (OutputFile & o, size_t need);
|
||||
static void genSetCondition (OutputFile & o, uint32_t ind, const std::string & newcond);
|
||||
static void genSetState (OutputFile & o, uint32_t ind, uint32_t fillIndex);
|
||||
|
||||
void emit_action
|
||||
( const Action & action
|
||||
, OutputFile & o
|
||||
, uint32_t ind
|
||||
, bool & readCh
|
||||
, const State * const s
|
||||
, const std::string & condName
|
||||
, const Skeleton * skeleton
|
||||
, const std::set<label_t> & used_labels
|
||||
, bool save_yyaccept
|
||||
)
|
||||
{
|
||||
switch (action.type)
|
||||
{
|
||||
case Action::MATCH:
|
||||
emit_match (o, ind, readCh, s);
|
||||
break;
|
||||
case Action::INITIAL:
|
||||
emit_initial (o, ind, readCh, s, * action.info.initial, used_labels);
|
||||
break;
|
||||
case Action::SAVE:
|
||||
emit_save (o, ind, readCh, s, action.info.save, save_yyaccept);
|
||||
break;
|
||||
case Action::MOVE:
|
||||
break;
|
||||
case Action::ACCEPT:
|
||||
emit_accept (o, ind, readCh, s, * action.info.accepts);
|
||||
break;
|
||||
case Action::RULE:
|
||||
emit_rule (o, ind, s, action.info.rule, condName, skeleton);
|
||||
break;
|
||||
}
|
||||
if (s->isPreCtxt && opts->target != opt_t::DOT)
|
||||
{
|
||||
o.wstring(opts->input_api.stmt_backupctx (ind));
|
||||
}
|
||||
}
|
||||
|
||||
void emit_match (OutputFile & o, uint32_t ind, bool & readCh, const State * const s)
|
||||
{
|
||||
if (opts->target == opt_t::DOT)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
const bool read_ahead = s
|
||||
&& s->next
|
||||
&& s->next->action.type != Action::RULE;
|
||||
if (s->fill != 0)
|
||||
{
|
||||
o.wstring(opts->input_api.stmt_skip (ind));
|
||||
}
|
||||
else if (!read_ahead)
|
||||
{
|
||||
/* do not read next char if match */
|
||||
o.wstring(opts->input_api.stmt_skip (ind));
|
||||
readCh = true;
|
||||
}
|
||||
else
|
||||
{
|
||||
o.wstring(opts->input_api.stmt_skip_peek (ind));
|
||||
readCh = false;
|
||||
}
|
||||
|
||||
if (s->fill != 0)
|
||||
{
|
||||
need(o, ind, readCh, s->fill, false);
|
||||
}
|
||||
}
|
||||
|
||||
void emit_initial (OutputFile & o, uint32_t ind, bool & readCh, const State * const s, const Initial & initial, const std::set<label_t> & used_labels)
|
||||
{
|
||||
if (opts->target == opt_t::DOT)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
if (used_labels.count(s->label))
|
||||
{
|
||||
if (s->fill != 0)
|
||||
{
|
||||
o.wstring(opts->input_api.stmt_skip (ind));
|
||||
}
|
||||
else
|
||||
{
|
||||
o.wstring(opts->input_api.stmt_skip_peek (ind));
|
||||
}
|
||||
}
|
||||
|
||||
if (used_labels.count(initial.label))
|
||||
{
|
||||
o.wstring(opts->labelPrefix).wlabel(initial.label).ws(":\n");
|
||||
}
|
||||
|
||||
if (opts->dFlag)
|
||||
{
|
||||
o.wind(ind).wstring(opts->yydebug).ws("(").wlabel(initial.label).ws(", *").wstring(opts->yycursor).ws(");\n");
|
||||
}
|
||||
|
||||
if (s->fill != 0)
|
||||
{
|
||||
need(o, ind, readCh, s->fill, initial.setMarker);
|
||||
}
|
||||
else
|
||||
{
|
||||
if (initial.setMarker)
|
||||
{
|
||||
o.wstring(opts->input_api.stmt_backup (ind));
|
||||
}
|
||||
readCh = false;
|
||||
}
|
||||
}
|
||||
|
||||
void emit_save (OutputFile & o, uint32_t ind, bool & readCh, const State * const s, uint32_t save, bool save_yyaccept)
|
||||
{
|
||||
if (opts->target == opt_t::DOT)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
if (save_yyaccept)
|
||||
{
|
||||
o.wind(ind).wstring(opts->yyaccept).ws(" = ").wu32(save).ws(";\n");
|
||||
}
|
||||
|
||||
if (s->fill != 0)
|
||||
{
|
||||
o.wstring(opts->input_api.stmt_skip_backup (ind));
|
||||
need(o, ind, readCh, s->fill, false);
|
||||
}
|
||||
else
|
||||
{
|
||||
o.wstring(opts->input_api.stmt_skip_backup_peek (ind));
|
||||
readCh = false;
|
||||
}
|
||||
}
|
||||
|
||||
void emit_accept_binary (OutputFile & o, uint32_t ind, bool & readCh, const State * const s, const accept_t & accepts, size_t l, size_t r)
|
||||
{
|
||||
if (l < r)
|
||||
{
|
||||
const size_t m = (l + r) >> 1;
|
||||
o.wind(ind).ws("if (").wstring(opts->yyaccept).ws(r == l+1 ? " == " : " <= ").wu64(m).ws(") {\n");
|
||||
emit_accept_binary (o, ++ind, readCh, s, accepts, l, m);
|
||||
o.wind(--ind).ws("} else {\n");
|
||||
emit_accept_binary (o, ++ind, readCh, s, accepts, m + 1, r);
|
||||
o.wind(--ind).ws("}\n");
|
||||
}
|
||||
else
|
||||
{
|
||||
genGoTo(o, ind, s, accepts[l], readCh);
|
||||
}
|
||||
}
|
||||
|
||||
void emit_accept (OutputFile & o, uint32_t ind, bool & readCh, const State * const s, const accept_t & accepts)
|
||||
{
|
||||
const size_t accepts_size = accepts.size ();
|
||||
if (accepts_size > 0)
|
||||
{
|
||||
if (opts->target != opt_t::DOT)
|
||||
{
|
||||
o.wstring(opts->input_api.stmt_restore (ind));
|
||||
}
|
||||
|
||||
if (readCh) // shouldn't be necessary, but might become at some point
|
||||
{
|
||||
o.wstring(opts->input_api.stmt_peek (ind));
|
||||
readCh = false;
|
||||
}
|
||||
|
||||
if (accepts_size > 1)
|
||||
{
|
||||
if (opts->gFlag && accepts_size >= opts->cGotoThreshold)
|
||||
{
|
||||
o.wind(ind++).ws("{\n");
|
||||
o.wind(ind++).ws("static void *").wstring(opts->yytarget).ws("[").wu64(accepts_size).ws("] = {\n");
|
||||
for (uint32_t i = 0; i < accepts_size; ++i)
|
||||
{
|
||||
o.wind(ind).ws("&&").wstring(opts->labelPrefix).wlabel(accepts[i]->label).ws(",\n");
|
||||
}
|
||||
o.wind(--ind).ws("};\n");
|
||||
o.wind(ind).ws("goto *").wstring(opts->yytarget).ws("[").wstring(opts->yyaccept).ws("];\n");
|
||||
o.wind(--ind).ws("}\n");
|
||||
}
|
||||
else if (opts->sFlag || (accepts_size == 2 && opts->target != opt_t::DOT))
|
||||
{
|
||||
emit_accept_binary (o, ind, readCh, s, accepts, 0, accepts_size - 1);
|
||||
}
|
||||
else if (opts->target == opt_t::DOT)
|
||||
{
|
||||
for (uint32_t i = 0; i < accepts_size; ++i)
|
||||
{
|
||||
o.wlabel(s->label).ws(" -> ").wlabel(accepts[i]->label);
|
||||
o.ws(" [label=\"yyaccept=").wu32(i).ws("\"]\n");
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
o.wind(ind).ws("switch (").wstring(opts->yyaccept).ws(") {\n");
|
||||
for (uint32_t i = 0; i < accepts_size - 1; ++i)
|
||||
{
|
||||
o.wind(ind).ws("case ").wu32(i).ws(": \t");
|
||||
genGoTo(o, 0, s, accepts[i], readCh);
|
||||
}
|
||||
o.wind(ind).ws("default:\t");
|
||||
genGoTo(o, 0, s, accepts[accepts_size - 1], readCh);
|
||||
o.wind(ind).ws("}\n");
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
// no need to write if statement here since there is only case 0.
|
||||
genGoTo(o, ind, s, accepts[0], readCh);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void emit_rule (OutputFile & o, uint32_t ind, const State * const s, const RuleOp * const rule, const std::string & condName, const Skeleton * skeleton)
|
||||
{
|
||||
if (opts->target == opt_t::DOT)
|
||||
{
|
||||
o.wlabel(s->label);
|
||||
if (rule->code)
|
||||
{
|
||||
o.ws(" [label=\"").wstring(rule->code->loc.filename).ws(":").wu32(rule->code->loc.line).ws("\"]");
|
||||
}
|
||||
o.ws("\n");
|
||||
return;
|
||||
}
|
||||
|
||||
uint32_t back = rule->ctx->fixedLength();
|
||||
if (back != 0u && opts->target != opt_t::DOT)
|
||||
{
|
||||
o.wstring(opts->input_api.stmt_restorectx (ind));
|
||||
}
|
||||
|
||||
if (opts->target == opt_t::SKELETON)
|
||||
{
|
||||
skeleton->emit_action (o, ind, rule->rank);
|
||||
}
|
||||
else
|
||||
{
|
||||
if (!rule->newcond.empty () && condName != rule->newcond)
|
||||
{
|
||||
genSetCondition(o, ind, rule->newcond);
|
||||
}
|
||||
|
||||
if (rule->code)
|
||||
{
|
||||
if (!yySetupRule.empty ())
|
||||
{
|
||||
o.wind(ind).wstring(yySetupRule).ws("\n");
|
||||
}
|
||||
o.wline_info(rule->code->loc.line, rule->code->loc.filename.c_str ())
|
||||
.wind(ind).wstring(rule->code->text).ws("\n")
|
||||
.wdelay_line_info ();
|
||||
}
|
||||
else if (!rule->newcond.empty ())
|
||||
{
|
||||
o.wind(ind).wstring(replaceParam(opts->condGoto, opts->condGotoParam, opts->condPrefix + rule->newcond)).ws("\n");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void need (OutputFile & o, uint32_t ind, bool & readCh, size_t n, bool bSetMarker)
|
||||
{
|
||||
if (opts->target == opt_t::DOT)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
uint32_t fillIndex = last_fill_index;
|
||||
|
||||
if (opts->fFlag)
|
||||
{
|
||||
last_fill_index++;
|
||||
genSetState (o, ind, fillIndex);
|
||||
}
|
||||
|
||||
if (opts->fill_use && n > 0)
|
||||
{
|
||||
o.wind(ind);
|
||||
if (n == 1)
|
||||
{
|
||||
if (opts->fill_check)
|
||||
{
|
||||
o.ws("if (").wstring(opts->input_api.expr_lessthan_one ()).ws(") ");
|
||||
}
|
||||
genYYFill(o, n);
|
||||
}
|
||||
else
|
||||
{
|
||||
if (opts->fill_check)
|
||||
{
|
||||
o.ws("if (").wstring(opts->input_api.expr_lessthan (n)).ws(") ");
|
||||
}
|
||||
genYYFill(o, n);
|
||||
}
|
||||
}
|
||||
|
||||
if (opts->fFlag)
|
||||
{
|
||||
o.wstring(opts->yyfilllabel).wu32(fillIndex).ws(":\n");
|
||||
}
|
||||
|
||||
if (n > 0)
|
||||
{
|
||||
if (bSetMarker)
|
||||
{
|
||||
o.wstring(opts->input_api.stmt_backup_peek (ind));
|
||||
}
|
||||
else
|
||||
{
|
||||
o.wstring(opts->input_api.stmt_peek (ind));
|
||||
}
|
||||
readCh = false;
|
||||
}
|
||||
}
|
||||
|
||||
void genYYFill (OutputFile & o, size_t need)
|
||||
{
|
||||
o.wstring(replaceParam (opts->fill, opts->fill_arg, need));
|
||||
if (!opts->fill_naked)
|
||||
{
|
||||
if (opts->fill_arg_use)
|
||||
{
|
||||
o.ws("(").wu64(need).ws(")");
|
||||
}
|
||||
o.ws(";");
|
||||
}
|
||||
o.ws("\n");
|
||||
}
|
||||
|
||||
void genSetCondition(OutputFile & o, uint32_t ind, const std::string& newcond)
|
||||
{
|
||||
o.wind(ind).wstring(replaceParam (opts->cond_set, opts->cond_set_arg, opts->condEnumPrefix + newcond));
|
||||
if (!opts->cond_set_naked)
|
||||
{
|
||||
o.ws("(").wstring(opts->condEnumPrefix).wstring(newcond).ws(");");
|
||||
}
|
||||
o.ws("\n");
|
||||
}
|
||||
|
||||
void genSetState(OutputFile & o, uint32_t ind, uint32_t fillIndex)
|
||||
{
|
||||
o.wind(ind).wstring(replaceParam (opts->state_set, opts->state_set_arg, fillIndex));
|
||||
if (!opts->state_set_naked)
|
||||
{
|
||||
o.ws("(").wu32(fillIndex).ws(");");
|
||||
}
|
||||
o.ws("\n");
|
||||
}
|
||||
|
||||
} // namespace re2c
|
348
tools/re2c/src/codegen/emit_dfa.cc
Normal file
348
tools/re2c/src/codegen/emit_dfa.cc
Normal file
|
@ -0,0 +1,348 @@
|
|||
#include "src/util/c99_stdint.h"
|
||||
#include <stddef.h>
|
||||
#include <set>
|
||||
#include <string>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
#include "src/codegen/bitmap.h"
|
||||
#include "src/codegen/emit.h"
|
||||
#include "src/codegen/go.h"
|
||||
#include "src/codegen/input_api.h"
|
||||
#include "src/codegen/label.h"
|
||||
#include "src/codegen/output.h"
|
||||
#include "src/conf/opt.h"
|
||||
#include "src/globals.h"
|
||||
#include "src/ir/adfa/action.h"
|
||||
#include "src/ir/adfa/adfa.h"
|
||||
#include "src/ir/skeleton/skeleton.h"
|
||||
#include "src/util/counter.h"
|
||||
|
||||
namespace re2c
|
||||
{
|
||||
|
||||
static std::string genGetCondition ();
|
||||
static void genCondGotoSub (OutputFile & o, uint32_t ind, const std::vector<std::string> & condnames, uint32_t cMin, uint32_t cMax);
|
||||
static void genCondTable (OutputFile & o, uint32_t ind, const std::vector<std::string> & condnames);
|
||||
static void genCondGoto (OutputFile & o, uint32_t ind, const std::vector<std::string> & condnames);
|
||||
static void emit_state (OutputFile & o, uint32_t ind, const State * s, bool used_label);
|
||||
|
||||
std::string genGetCondition()
|
||||
{
|
||||
return opts->cond_get_naked
|
||||
? opts->cond_get
|
||||
: opts->cond_get + "()";
|
||||
}
|
||||
|
||||
void genGoTo(OutputFile & o, uint32_t ind, const State *from, const State *to, bool & readCh)
|
||||
{
|
||||
if (opts->target == opt_t::DOT)
|
||||
{
|
||||
o.wlabel(from->label).ws(" -> ").wlabel(to->label).ws("\n");
|
||||
return;
|
||||
}
|
||||
|
||||
if (readCh && from->next != to)
|
||||
{
|
||||
o.wstring(opts->input_api.stmt_peek (ind));
|
||||
readCh = false;
|
||||
}
|
||||
|
||||
o.wind(ind).ws("goto ").wstring(opts->labelPrefix).wlabel(to->label).ws(";\n");
|
||||
}
|
||||
|
||||
void emit_state (OutputFile & o, uint32_t ind, const State * s, bool used_label)
|
||||
{
|
||||
if (opts->target != opt_t::DOT)
|
||||
{
|
||||
if (used_label)
|
||||
{
|
||||
o.wstring(opts->labelPrefix).wlabel(s->label).ws(":\n");
|
||||
}
|
||||
if (opts->dFlag && (s->action.type != Action::INITIAL))
|
||||
{
|
||||
o.wind(ind).wstring(opts->yydebug).ws("(").wlabel(s->label).ws(", ").wstring(opts->input_api.expr_peek ()).ws(");\n");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void DFA::count_used_labels (std::set<label_t> & used, label_t start, label_t initial, bool force_start) const
|
||||
{
|
||||
// In '-f' mode, default state is always state 0
|
||||
if (opts->fFlag)
|
||||
{
|
||||
used.insert (label_t::first ());
|
||||
}
|
||||
if (force_start)
|
||||
{
|
||||
used.insert (start);
|
||||
}
|
||||
for (State * s = head; s; s = s->next)
|
||||
{
|
||||
s->go.used_labels (used);
|
||||
}
|
||||
for (uint32_t i = 0; i < accepts.size (); ++i)
|
||||
{
|
||||
used.insert (accepts[i]->label);
|
||||
}
|
||||
// must go last: it needs the set of used labels
|
||||
if (used.count (head->label))
|
||||
{
|
||||
used.insert (initial);
|
||||
}
|
||||
}
|
||||
|
||||
void DFA::emit_body (OutputFile & o, uint32_t& ind, const std::set<label_t> & used_labels, label_t initial) const
|
||||
{
|
||||
// If DFA has transitions to initial state, then initial state
|
||||
// has a piece of code that advances input position. Wee must
|
||||
// skip it when entering DFA.
|
||||
if (used_labels.count(head->label))
|
||||
{
|
||||
o.wind(ind).ws("goto ").wstring(opts->labelPrefix).wlabel(initial).ws(";\n");
|
||||
}
|
||||
|
||||
const bool save_yyaccept = accepts.size () > 1;
|
||||
for (State * s = head; s; s = s->next)
|
||||
{
|
||||
bool readCh = false;
|
||||
emit_state (o, ind, s, used_labels.count (s->label));
|
||||
emit_action (s->action, o, ind, readCh, s, cond, skeleton, used_labels, save_yyaccept);
|
||||
s->go.emit(o, ind, readCh);
|
||||
}
|
||||
}
|
||||
|
||||
void DFA::emit(Output & output, uint32_t& ind, bool isLastCond, bool& bPrologBrace)
|
||||
{
|
||||
OutputFile & o = output.source;
|
||||
|
||||
bool bProlog = (!opts->cFlag || !bWroteCondCheck);
|
||||
|
||||
// start_label points to the beginning of current re2c block
|
||||
// (prior to condition dispatch in '-c' mode)
|
||||
// it can forced by configuration 're2c:startlabel = <integer>;'
|
||||
label_t start_label = o.label_counter.next ();
|
||||
// initial_label points to the beginning of DFA
|
||||
// in '-c' mode this is NOT equal to start_label
|
||||
label_t initial_label = bProlog && opts->cFlag
|
||||
? o.label_counter.next ()
|
||||
: start_label;
|
||||
for (State * s = head; s; s = s->next)
|
||||
{
|
||||
s->label = o.label_counter.next ();
|
||||
}
|
||||
std::set<label_t> used_labels;
|
||||
count_used_labels (used_labels, start_label, initial_label, o.get_force_start_label ());
|
||||
|
||||
head->action.set_initial (initial_label, head->action.type == Action::SAVE);
|
||||
|
||||
skeleton->warn_undefined_control_flow ();
|
||||
skeleton->warn_unreachable_rules ();
|
||||
skeleton->warn_match_empty ();
|
||||
|
||||
if (opts->target == opt_t::SKELETON)
|
||||
{
|
||||
if (output.skeletons.insert (name).second)
|
||||
{
|
||||
skeleton->emit_data (o.file_name);
|
||||
skeleton->emit_start (o, max_fill, need_backup, need_backupctx, need_accept);
|
||||
uint32_t i = 2;
|
||||
emit_body (o, i, used_labels, initial_label);
|
||||
skeleton->emit_end (o, need_backup, need_backupctx);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
// Generate prolog
|
||||
if (bProlog)
|
||||
{
|
||||
o.ws("\n").wdelay_line_info ();
|
||||
if (opts->target == opt_t::DOT)
|
||||
{
|
||||
bPrologBrace = true;
|
||||
o.ws("digraph re2c {\n");
|
||||
}
|
||||
else if ((!opts->fFlag && o.get_used_yyaccept ())
|
||||
|| (!opts->fFlag && opts->bEmitYYCh)
|
||||
|| (opts->bFlag && !opts->cFlag && BitMap::first)
|
||||
|| (opts->cFlag && !bWroteCondCheck && opts->gFlag)
|
||||
|| (opts->fFlag && !bWroteGetState && opts->gFlag)
|
||||
)
|
||||
{
|
||||
bPrologBrace = true;
|
||||
o.wind(ind++).ws("{\n");
|
||||
}
|
||||
else if (ind == 0)
|
||||
{
|
||||
ind = 1;
|
||||
}
|
||||
if (!opts->fFlag && opts->target != opt_t::DOT)
|
||||
{
|
||||
if (opts->bEmitYYCh)
|
||||
{
|
||||
o.wind(ind).wstring(opts->yyctype).ws(" ").wstring(opts->yych).ws(";\n");
|
||||
}
|
||||
o.wdelay_yyaccept_init (ind);
|
||||
}
|
||||
else
|
||||
{
|
||||
o.ws("\n");
|
||||
}
|
||||
}
|
||||
if (opts->bFlag && !opts->cFlag && BitMap::first)
|
||||
{
|
||||
BitMap::gen(o, ind, lbChar, ubChar <= 256 ? ubChar : 256);
|
||||
}
|
||||
if (bProlog)
|
||||
{
|
||||
if (opts->cFlag && !bWroteCondCheck && opts->gFlag)
|
||||
{
|
||||
genCondTable(o, ind, output.types);
|
||||
}
|
||||
o.wdelay_state_goto (ind);
|
||||
if (opts->cFlag && opts->target != opt_t::DOT)
|
||||
{
|
||||
if (used_labels.count(start_label))
|
||||
{
|
||||
o.wstring(opts->labelPrefix).wlabel(start_label).ws(":\n");
|
||||
}
|
||||
}
|
||||
o.wuser_start_label ();
|
||||
if (opts->cFlag && !bWroteCondCheck)
|
||||
{
|
||||
genCondGoto(o, ind, output.types);
|
||||
}
|
||||
}
|
||||
if (opts->cFlag && !cond.empty())
|
||||
{
|
||||
if (opts->condDivider.length())
|
||||
{
|
||||
o.wstring(replaceParam(opts->condDivider, opts->condDividerParam, cond)).ws("\n");
|
||||
}
|
||||
if (opts->target == opt_t::DOT)
|
||||
{
|
||||
o.wstring(cond).ws(" -> ").wlabel(head->label).ws("\n");
|
||||
}
|
||||
else
|
||||
{
|
||||
o.wstring(opts->condPrefix).wstring(cond).ws(":\n");
|
||||
}
|
||||
}
|
||||
if (opts->cFlag && opts->bFlag && BitMap::first)
|
||||
{
|
||||
o.wind(ind++).ws("{\n");
|
||||
BitMap::gen(o, ind, lbChar, ubChar <= 256 ? ubChar : 256);
|
||||
}
|
||||
// Generate code
|
||||
emit_body (o, ind, used_labels, initial_label);
|
||||
if (opts->cFlag && opts->bFlag && BitMap::first)
|
||||
{
|
||||
o.wind(--ind).ws("}\n");
|
||||
}
|
||||
// Generate epilog
|
||||
if ((!opts->cFlag || isLastCond) && bPrologBrace)
|
||||
{
|
||||
o.wind(--ind).ws("}\n");
|
||||
}
|
||||
}
|
||||
|
||||
// Cleanup
|
||||
if (BitMap::first)
|
||||
{
|
||||
delete BitMap::first;
|
||||
BitMap::first = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
void genCondTable(OutputFile & o, uint32_t ind, const std::vector<std::string> & condnames)
|
||||
{
|
||||
const size_t conds = condnames.size ();
|
||||
o.wind(ind++).ws("static void *").wstring(opts->yyctable).ws("[").wu64(conds).ws("] = {\n");
|
||||
for (size_t i = 0; i < conds; ++i)
|
||||
{
|
||||
o.wind(ind).ws("&&").wstring(opts->condPrefix).wstring(condnames[i]).ws(",\n");
|
||||
}
|
||||
o.wind(--ind).ws("};\n");
|
||||
}
|
||||
|
||||
void genCondGotoSub(OutputFile & o, uint32_t ind, const std::vector<std::string> & condnames, uint32_t cMin, uint32_t cMax)
|
||||
{
|
||||
if (cMin == cMax)
|
||||
{
|
||||
o.wind(ind).ws("goto ").wstring(opts->condPrefix).wstring(condnames[cMin]).ws(";\n");
|
||||
}
|
||||
else
|
||||
{
|
||||
uint32_t cMid = cMin + ((cMax - cMin + 1) / 2);
|
||||
|
||||
o.wind(ind).ws("if (").wstring(genGetCondition()).ws(" < ").wu32(cMid).ws(") {\n");
|
||||
genCondGotoSub(o, ind + 1, condnames, cMin, cMid - 1);
|
||||
o.wind(ind).ws("} else {\n");
|
||||
genCondGotoSub(o, ind + 1, condnames, cMid, cMax);
|
||||
o.wind(ind).ws("}\n");
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* note [condition order]
|
||||
*
|
||||
* In theory re2c makes no guarantee about the order of conditions in
|
||||
* the generated lexer. Users should define condition type 'YYCONDTYPE'
|
||||
* and use values of this type with 'YYGETCONDITION' and 'YYSETCONDITION'.
|
||||
* This way code is independent of internal re2c condition numbering.
|
||||
*
|
||||
* However, it is possible to manually hardcode condition numbers and make
|
||||
* re2c generate condition dispatch without explicit use of condition names
|
||||
* (nested 'if' statements with '-b' or computed 'goto' table with '-g').
|
||||
* This code is syntactically valid (compiles), but unsafe:
|
||||
* - change of re2c options may break compilation
|
||||
* - change of internal re2c condition numbering may break runtime
|
||||
*
|
||||
* re2c has to preserve the existing numbering scheme.
|
||||
*
|
||||
* re2c warns about implicit assumptions about condition order, unless:
|
||||
* - condition type is defined with 'types:re2c' or '-t, --type-header'
|
||||
* - dispatch is independent of condition order: either it uses
|
||||
* explicit condition names or there's only one condition and
|
||||
* dispatch shrinks to unconditional jump
|
||||
*/
|
||||
void genCondGoto(OutputFile & o, uint32_t ind, const std::vector<std::string> & condnames)
|
||||
{
|
||||
const size_t conds = condnames.size ();
|
||||
if (opts->target == opt_t::DOT)
|
||||
{
|
||||
o.warn_condition_order = false; // see note [condition order]
|
||||
for (size_t i = 0; i < conds; ++i)
|
||||
{
|
||||
const std::string cond = condnames[i];
|
||||
o.ws("0 -> ").wstring(cond).ws(" [label=\"state=").wstring(cond).ws("\"]\n");
|
||||
}
|
||||
}
|
||||
else if (opts->gFlag)
|
||||
{
|
||||
o.wind(ind).ws("goto *").wstring(opts->yyctable).ws("[").wstring(genGetCondition()).ws("];\n");
|
||||
}
|
||||
else if (opts->sFlag)
|
||||
{
|
||||
if (conds == 1)
|
||||
{
|
||||
o.warn_condition_order = false; // see note [condition order]
|
||||
}
|
||||
genCondGotoSub(o, ind, condnames, 0, static_cast<uint32_t> (conds) - 1);
|
||||
}
|
||||
else
|
||||
{
|
||||
o.warn_condition_order = false; // see note [condition order]
|
||||
o.wind(ind).ws("switch (").wstring(genGetCondition()).ws(") {\n");
|
||||
for (size_t i = 0; i < conds; ++i)
|
||||
{
|
||||
const std::string & cond = condnames[i];
|
||||
o.wind(ind).ws("case ").wstring(opts->condEnumPrefix).wstring(cond).ws(": goto ").wstring(opts->condPrefix).wstring(cond).ws(";\n");
|
||||
}
|
||||
o.wind(ind).ws("}\n");
|
||||
}
|
||||
o.wdelay_warn_condition_order ();
|
||||
bWroteCondCheck = true;
|
||||
}
|
||||
|
||||
} // end namespace re2c
|
216
tools/re2c/src/codegen/go.h
Normal file
216
tools/re2c/src/codegen/go.h
Normal file
|
@ -0,0 +1,216 @@
|
|||
#ifndef _RE2C_CODEGEN_GO_
|
||||
#define _RE2C_CODEGEN_GO_
|
||||
|
||||
#include <iostream>
|
||||
#include <set>
|
||||
#include <vector>
|
||||
|
||||
#include "src/codegen/output.h"
|
||||
#include "src/util/c99_stdint.h"
|
||||
#include "src/util/forbid_copy.h"
|
||||
|
||||
namespace re2c
|
||||
{
|
||||
|
||||
class BitMap;
|
||||
class State;
|
||||
struct If;
|
||||
|
||||
struct Span
|
||||
{
|
||||
uint32_t ub;
|
||||
State * to;
|
||||
|
||||
FORBID_COPY (Span);
|
||||
};
|
||||
|
||||
struct Case
|
||||
{
|
||||
std::vector<std::pair<uint32_t, uint32_t> > ranges;
|
||||
const State * to;
|
||||
void emit (OutputFile & o, uint32_t ind);
|
||||
|
||||
inline Case ()
|
||||
: ranges ()
|
||||
, to (NULL)
|
||||
{}
|
||||
|
||||
FORBID_COPY (Case);
|
||||
};
|
||||
|
||||
struct Cases
|
||||
{
|
||||
const State * def;
|
||||
Case * cases;
|
||||
uint32_t cases_size;
|
||||
void add (uint32_t lb, uint32_t ub, State * to);
|
||||
Cases (const Span * s, uint32_t n);
|
||||
~Cases ();
|
||||
void emit (OutputFile & o, uint32_t ind, bool & readCh);
|
||||
void used_labels (std::set<label_t> & used);
|
||||
|
||||
FORBID_COPY (Cases);
|
||||
};
|
||||
|
||||
struct Cond
|
||||
{
|
||||
std::string compare;
|
||||
uint32_t value;
|
||||
Cond (const std::string & cmp, uint32_t val);
|
||||
};
|
||||
|
||||
struct Binary
|
||||
{
|
||||
Cond * cond;
|
||||
If * thn;
|
||||
If * els;
|
||||
Binary (const Span * s, uint32_t n, const State * next);
|
||||
~Binary ();
|
||||
void emit (OutputFile & o, uint32_t ind, bool & readCh);
|
||||
void used_labels (std::set<label_t> & used);
|
||||
|
||||
FORBID_COPY (Binary);
|
||||
};
|
||||
|
||||
struct Linear
|
||||
{
|
||||
std::vector<std::pair<const Cond *, const State *> > branches;
|
||||
Linear (const Span * s, uint32_t n, const State * next);
|
||||
~Linear ();
|
||||
void emit (OutputFile & o, uint32_t ind, bool & readCh);
|
||||
void used_labels (std::set<label_t> & used);
|
||||
};
|
||||
|
||||
struct If
|
||||
{
|
||||
enum type_t
|
||||
{
|
||||
BINARY,
|
||||
LINEAR
|
||||
} type;
|
||||
union
|
||||
{
|
||||
Binary * binary;
|
||||
Linear * linear;
|
||||
} info;
|
||||
If (type_t t, const Span * sp, uint32_t nsp, const State * next);
|
||||
~If ();
|
||||
void emit (OutputFile & o, uint32_t ind, bool & readCh);
|
||||
void used_labels (std::set<label_t> & used);
|
||||
};
|
||||
|
||||
struct SwitchIf
|
||||
{
|
||||
enum
|
||||
{
|
||||
SWITCH,
|
||||
IF
|
||||
} type;
|
||||
union
|
||||
{
|
||||
Cases * cases;
|
||||
If * ifs;
|
||||
} info;
|
||||
SwitchIf (const Span * sp, uint32_t nsp, const State * next);
|
||||
~SwitchIf ();
|
||||
void emit (OutputFile & o, uint32_t ind, bool & readCh);
|
||||
void used_labels (std::set<label_t> & used);
|
||||
};
|
||||
|
||||
struct GoBitmap
|
||||
{
|
||||
const BitMap * bitmap;
|
||||
const State * bitmap_state;
|
||||
SwitchIf * hgo;
|
||||
SwitchIf * lgo;
|
||||
GoBitmap (const Span * span, uint32_t nSpans, const Span * hspan, uint32_t hSpans, const BitMap * bm, const State * bm_state, const State * next);
|
||||
~GoBitmap ();
|
||||
void emit (OutputFile & o, uint32_t ind, bool & readCh);
|
||||
void used_labels (std::set<label_t> & used);
|
||||
|
||||
FORBID_COPY (GoBitmap);
|
||||
};
|
||||
|
||||
struct CpgotoTable
|
||||
{
|
||||
static const uint32_t TABLE_SIZE;
|
||||
const State ** table;
|
||||
CpgotoTable (const Span * span, uint32_t nSpans);
|
||||
~CpgotoTable ();
|
||||
void emit (OutputFile & o, uint32_t ind);
|
||||
void used_labels (std::set<label_t> & used);
|
||||
|
||||
private:
|
||||
label_t max_label () const;
|
||||
|
||||
FORBID_COPY (CpgotoTable);
|
||||
};
|
||||
|
||||
struct Cpgoto
|
||||
{
|
||||
SwitchIf * hgo;
|
||||
CpgotoTable * table;
|
||||
Cpgoto (const Span * span, uint32_t nSpans, const Span * hspan, uint32_t hSpans, const State * next);
|
||||
~Cpgoto ();
|
||||
void emit (OutputFile & o, uint32_t ind, bool & readCh);
|
||||
void used_labels (std::set<label_t> & used);
|
||||
|
||||
FORBID_COPY (Cpgoto);
|
||||
};
|
||||
|
||||
struct Dot
|
||||
{
|
||||
const State * from;
|
||||
Cases * cases;
|
||||
Dot (const Span * sp, uint32_t nsp, const State * from);
|
||||
~Dot ();
|
||||
void emit (OutputFile & o);
|
||||
|
||||
FORBID_COPY (Dot);
|
||||
};
|
||||
|
||||
struct Go
|
||||
{
|
||||
uint32_t nSpans; // number of spans
|
||||
Span * span;
|
||||
enum
|
||||
{
|
||||
EMPTY,
|
||||
SWITCH_IF,
|
||||
BITMAP,
|
||||
CPGOTO,
|
||||
DOT
|
||||
} type;
|
||||
union
|
||||
{
|
||||
SwitchIf * switchif;
|
||||
GoBitmap * bitmap;
|
||||
Cpgoto * cpgoto;
|
||||
Dot * dot;
|
||||
} info;
|
||||
|
||||
Go ();
|
||||
~Go ();
|
||||
void init (const State * from);
|
||||
void emit (OutputFile & o, uint32_t ind, bool & readCh);
|
||||
void used_labels (std::set<label_t> & used);
|
||||
|
||||
Go (const Go & g)
|
||||
: nSpans (g.nSpans)
|
||||
, span (g.span)
|
||||
, type (g.type)
|
||||
, info (g.info)
|
||||
{}
|
||||
Go & operator = (const Go & g)
|
||||
{
|
||||
nSpans = g.nSpans;
|
||||
span = g.span;
|
||||
type = g.type;
|
||||
info = g.info;
|
||||
return * this;
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace re2c
|
||||
|
||||
#endif // _RE2C_CODEGEN_GO_
|
284
tools/re2c/src/codegen/go_construct.cc
Normal file
284
tools/re2c/src/codegen/go_construct.cc
Normal file
|
@ -0,0 +1,284 @@
|
|||
#include <stddef.h>
|
||||
#include "src/util/c99_stdint.h"
|
||||
#include <string>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
#include "src/codegen/bitmap.h"
|
||||
#include "src/codegen/go.h"
|
||||
#include "src/conf/opt.h"
|
||||
#include "src/globals.h"
|
||||
#include "src/ir/adfa/adfa.h"
|
||||
#include "src/util/allocate.h"
|
||||
|
||||
namespace re2c
|
||||
{
|
||||
|
||||
static uint32_t unmap (Span * new_span, const Span * old_span, uint32_t old_nspans, const State * x);
|
||||
|
||||
Cases::Cases (const Span * span, uint32_t span_size)
|
||||
: def (span_size == 0 ? NULL : span[span_size - 1].to)
|
||||
, cases (new Case[span_size])
|
||||
, cases_size (0)
|
||||
{
|
||||
for (uint32_t i = 0, lb = 0; i < span_size; ++ i)
|
||||
{
|
||||
add (lb, span[i].ub, span[i].to);
|
||||
lb = span[i].ub;
|
||||
}
|
||||
}
|
||||
|
||||
void Cases::add (uint32_t lb, uint32_t ub, State * to)
|
||||
{
|
||||
for (uint32_t i = 0; i < cases_size; ++i)
|
||||
{
|
||||
if (cases[i].to == to)
|
||||
{
|
||||
cases[i].ranges.push_back (std::make_pair (lb, ub));
|
||||
return;
|
||||
}
|
||||
}
|
||||
cases[cases_size].ranges.push_back (std::make_pair (lb, ub));
|
||||
cases[cases_size].to = to;
|
||||
++cases_size;
|
||||
}
|
||||
|
||||
Cond::Cond (const std::string & cmp, uint32_t val)
|
||||
: compare (cmp)
|
||||
, value (val)
|
||||
{}
|
||||
|
||||
Binary::Binary (const Span * s, uint32_t n, const State * next)
|
||||
: cond (NULL)
|
||||
, thn (NULL)
|
||||
, els (NULL)
|
||||
{
|
||||
const uint32_t l = n / 2;
|
||||
const uint32_t h = n - l;
|
||||
cond = new Cond ("<=", s[l - 1].ub - 1);
|
||||
thn = new If (l > 4 ? If::BINARY : If::LINEAR, &s[0], l, next);
|
||||
els = new If (h > 4 ? If::BINARY : If::LINEAR, &s[l], h, next);
|
||||
}
|
||||
|
||||
Linear::Linear (const Span * s, uint32_t n, const State * next)
|
||||
: branches ()
|
||||
{
|
||||
for (;;)
|
||||
{
|
||||
const State *bg = s[0].to;
|
||||
while (n >= 3 && s[2].to == bg && (s[1].ub - s[0].ub) == 1)
|
||||
{
|
||||
if (s[1].to == next && n == 3)
|
||||
{
|
||||
branches.push_back (std::make_pair (new Cond ("!=", s[0].ub), bg));
|
||||
return ;
|
||||
}
|
||||
else
|
||||
{
|
||||
branches.push_back (std::make_pair (new Cond ("==", s[0].ub), s[1].to));
|
||||
}
|
||||
n -= 2;
|
||||
s += 2;
|
||||
}
|
||||
if (n == 1)
|
||||
{
|
||||
if (next == NULL || s[0].to != next)
|
||||
{
|
||||
branches.push_back (std::make_pair (static_cast<const Cond *> (NULL), s[0].to));
|
||||
}
|
||||
return;
|
||||
}
|
||||
else if (n == 2 && bg == next)
|
||||
{
|
||||
branches.push_back (std::make_pair (new Cond (">=", s[0].ub), s[1].to));
|
||||
return;
|
||||
}
|
||||
else
|
||||
{
|
||||
branches.push_back (std::make_pair (new Cond ("<=", s[0].ub - 1), bg));
|
||||
n -= 1;
|
||||
s += 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
If::If (type_t t, const Span * sp, uint32_t nsp, const State * next)
|
||||
: type (t)
|
||||
, info ()
|
||||
{
|
||||
switch (type)
|
||||
{
|
||||
case BINARY:
|
||||
info.binary = new Binary (sp, nsp, next);
|
||||
break;
|
||||
case LINEAR:
|
||||
info.linear = new Linear (sp, nsp, next);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
SwitchIf::SwitchIf (const Span * sp, uint32_t nsp, const State * next)
|
||||
: type (IF)
|
||||
, info ()
|
||||
{
|
||||
if ((!opts->sFlag && nsp > 2) || (nsp > 8 && (sp[nsp - 2].ub - sp[0].ub <= 3 * (nsp - 2))))
|
||||
{
|
||||
type = SWITCH;
|
||||
info.cases = new Cases (sp, nsp);
|
||||
}
|
||||
else if (nsp > 5)
|
||||
{
|
||||
info.ifs = new If (If::BINARY, sp, nsp, next);
|
||||
}
|
||||
else
|
||||
{
|
||||
info.ifs = new If (If::LINEAR, sp, nsp, next);
|
||||
}
|
||||
}
|
||||
|
||||
GoBitmap::GoBitmap (const Span * span, uint32_t nSpans, const Span * hspan, uint32_t hSpans, const BitMap * bm, const State * bm_state, const State * next)
|
||||
: bitmap (bm)
|
||||
, bitmap_state (bm_state)
|
||||
, hgo (NULL)
|
||||
, lgo (NULL)
|
||||
{
|
||||
Span * bspan = allocate<Span> (nSpans);
|
||||
uint32_t bSpans = unmap (bspan, span, nSpans, bm_state);
|
||||
lgo = bSpans == 0
|
||||
? NULL
|
||||
: new SwitchIf (bspan, bSpans, next);
|
||||
// if there are any low spans, then next state for high spans
|
||||
// must be NULL to trigger explicit goto generation in linear 'if'
|
||||
hgo = hSpans == 0
|
||||
? NULL
|
||||
: new SwitchIf (hspan, hSpans, lgo ? NULL : next);
|
||||
operator delete (bspan);
|
||||
}
|
||||
|
||||
const uint32_t CpgotoTable::TABLE_SIZE = 0x100;
|
||||
|
||||
CpgotoTable::CpgotoTable (const Span * span, uint32_t nSpans)
|
||||
: table (new const State * [TABLE_SIZE])
|
||||
{
|
||||
uint32_t c = 0;
|
||||
for (uint32_t i = 0; i < nSpans; ++i)
|
||||
{
|
||||
for(; c < span[i].ub && c < TABLE_SIZE; ++c)
|
||||
{
|
||||
table[c] = span[i].to;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Cpgoto::Cpgoto (const Span * span, uint32_t nSpans, const Span * hspan, uint32_t hSpans, const State * next)
|
||||
: hgo (hSpans == 0 ? NULL : new SwitchIf (hspan, hSpans, next))
|
||||
, table (new CpgotoTable (span, nSpans))
|
||||
{}
|
||||
|
||||
Dot::Dot (const Span * sp, uint32_t nsp, const State * s)
|
||||
: from (s)
|
||||
, cases (new Cases (sp, nsp))
|
||||
{}
|
||||
|
||||
Go::Go ()
|
||||
: nSpans (0)
|
||||
, span (NULL)
|
||||
, type (EMPTY)
|
||||
, info ()
|
||||
{}
|
||||
|
||||
void Go::init (const State * from)
|
||||
{
|
||||
if (nSpans == 0)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
// initialize high (wide) spans
|
||||
uint32_t hSpans = 0;
|
||||
const Span * hspan = NULL;
|
||||
for (uint32_t i = 0; i < nSpans; ++i)
|
||||
{
|
||||
if (span[i].ub > 0x100)
|
||||
{
|
||||
hspan = &span[i];
|
||||
hSpans = nSpans - i;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// initialize bitmaps
|
||||
uint32_t nBitmaps = 0;
|
||||
const BitMap * bitmap = NULL;
|
||||
const State * bitmap_state = NULL;
|
||||
for (uint32_t i = 0; i < nSpans; ++i)
|
||||
{
|
||||
if (span[i].to->isBase)
|
||||
{
|
||||
const BitMap *b = BitMap::find (span[i].to);
|
||||
if (b && matches(b->go->span, b->go->nSpans, b->on, span, nSpans, span[i].to))
|
||||
{
|
||||
if (bitmap == NULL)
|
||||
{
|
||||
bitmap = b;
|
||||
bitmap_state = span[i].to;
|
||||
}
|
||||
nBitmaps++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const uint32_t dSpans = nSpans - hSpans - nBitmaps;
|
||||
if (opts->target == opt_t::DOT)
|
||||
{
|
||||
type = DOT;
|
||||
info.dot = new Dot (span, nSpans, from);
|
||||
}
|
||||
else if (opts->gFlag && (dSpans >= opts->cGotoThreshold))
|
||||
{
|
||||
type = CPGOTO;
|
||||
info.cpgoto = new Cpgoto (span, nSpans, hspan, hSpans, from->next);
|
||||
}
|
||||
else if (opts->bFlag && (nBitmaps > 0))
|
||||
{
|
||||
type = BITMAP;
|
||||
info.bitmap = new GoBitmap (span, nSpans, hspan, hSpans, bitmap, bitmap_state, from->next);
|
||||
bUsedYYBitmap = true;
|
||||
}
|
||||
else
|
||||
{
|
||||
type = SWITCH_IF;
|
||||
info.switchif = new SwitchIf (span, nSpans, from->next);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Find all spans, that map to the given state. For each of them,
|
||||
* find upper adjacent span, that maps to another state (if such
|
||||
* span exists, otherwize try lower one).
|
||||
* If input contains single span that maps to the given state,
|
||||
* then output contains 0 spans.
|
||||
*/
|
||||
uint32_t unmap (Span * new_span, const Span * old_span, uint32_t old_nspans, const State * x)
|
||||
{
|
||||
uint32_t new_nspans = 0;
|
||||
for (uint32_t i = 0; i < old_nspans; ++i)
|
||||
{
|
||||
if (old_span[i].to != x)
|
||||
{
|
||||
if (new_nspans > 0 && new_span[new_nspans - 1].to == old_span[i].to)
|
||||
new_span[new_nspans - 1].ub = old_span[i].ub;
|
||||
else
|
||||
{
|
||||
new_span[new_nspans].to = old_span[i].to;
|
||||
new_span[new_nspans].ub = old_span[i].ub;
|
||||
++new_nspans;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (new_nspans > 0)
|
||||
new_span[new_nspans - 1].ub = old_span[old_nspans - 1].ub;
|
||||
return new_nspans;
|
||||
}
|
||||
|
||||
} // namespace re2c
|
99
tools/re2c/src/codegen/go_destruct.cc
Normal file
99
tools/re2c/src/codegen/go_destruct.cc
Normal file
|
@ -0,0 +1,99 @@
|
|||
#include "src/util/c99_stdint.h"
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
#include "src/codegen/go.h"
|
||||
|
||||
namespace re2c
|
||||
{
|
||||
|
||||
Cases::~Cases ()
|
||||
{
|
||||
delete [] cases;
|
||||
}
|
||||
|
||||
Binary::~Binary ()
|
||||
{
|
||||
delete cond;
|
||||
delete thn;
|
||||
delete els;
|
||||
}
|
||||
|
||||
Linear::~Linear ()
|
||||
{
|
||||
for (uint32_t i = 0; i < branches.size (); ++i)
|
||||
{
|
||||
delete branches[i].first;
|
||||
}
|
||||
}
|
||||
|
||||
If::~If ()
|
||||
{
|
||||
switch (type)
|
||||
{
|
||||
case BINARY:
|
||||
delete info.binary;
|
||||
break;
|
||||
case LINEAR:
|
||||
delete info.linear;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
SwitchIf::~SwitchIf ()
|
||||
{
|
||||
switch (type)
|
||||
{
|
||||
case SWITCH:
|
||||
delete info.cases;
|
||||
break;
|
||||
case IF:
|
||||
delete info.ifs;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
GoBitmap::~GoBitmap ()
|
||||
{
|
||||
delete hgo;
|
||||
delete lgo;
|
||||
}
|
||||
|
||||
CpgotoTable::~CpgotoTable ()
|
||||
{
|
||||
delete [] table;
|
||||
}
|
||||
|
||||
Cpgoto::~Cpgoto ()
|
||||
{
|
||||
delete hgo;
|
||||
delete table;
|
||||
}
|
||||
|
||||
Dot::~Dot ()
|
||||
{
|
||||
delete cases;
|
||||
}
|
||||
|
||||
Go::~Go ()
|
||||
{
|
||||
switch (type)
|
||||
{
|
||||
case EMPTY:
|
||||
break;
|
||||
case SWITCH_IF:
|
||||
delete info.switchif;
|
||||
break;
|
||||
case BITMAP:
|
||||
delete info.bitmap;
|
||||
break;
|
||||
case CPGOTO:
|
||||
delete info.cpgoto;
|
||||
break;
|
||||
case DOT:
|
||||
delete info.dot;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace re2c
|
271
tools/re2c/src/codegen/go_emit.cc
Normal file
271
tools/re2c/src/codegen/go_emit.cc
Normal file
|
@ -0,0 +1,271 @@
|
|||
#include <stddef.h>
|
||||
#include "src/util/c99_stdint.h"
|
||||
#include <string>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
#include "src/codegen/bitmap.h"
|
||||
#include "src/codegen/go.h"
|
||||
#include "src/codegen/input_api.h"
|
||||
#include "src/codegen/label.h"
|
||||
#include "src/codegen/output.h"
|
||||
#include "src/codegen/print.h"
|
||||
#include "src/conf/opt.h"
|
||||
#include "src/globals.h"
|
||||
#include "src/ir/adfa/adfa.h"
|
||||
#include "src/ir/regexp/encoding/enc.h"
|
||||
|
||||
namespace re2c
|
||||
{
|
||||
|
||||
static void output_if (OutputFile & o, uint32_t ind, bool & readCh, const std::string & compare, uint32_t value);
|
||||
static void output_goto (OutputFile & o, uint32_t ind, bool & readCh, label_t to);
|
||||
static std::string output_yych (bool & readCh);
|
||||
static std::string output_hgo (OutputFile & o, uint32_t ind, bool & readCh, SwitchIf * hgo);
|
||||
|
||||
std::string output_yych (bool & readCh)
|
||||
{
|
||||
if (readCh)
|
||||
{
|
||||
readCh = false;
|
||||
return "(" + opts->input_api.expr_peek_save () + ")";
|
||||
}
|
||||
else
|
||||
{
|
||||
return opts->yych;
|
||||
}
|
||||
}
|
||||
|
||||
void output_if (OutputFile & o, uint32_t ind, bool & readCh, const std::string & compare, uint32_t value)
|
||||
{
|
||||
o.wind(ind).ws("if (").wstring(output_yych (readCh)).ws(" ").wstring(compare).ws(" ").wc_hex (value).ws(") ");
|
||||
}
|
||||
|
||||
void output_goto (OutputFile & o, uint32_t ind, bool & readCh, label_t to)
|
||||
{
|
||||
if (readCh)
|
||||
{
|
||||
o.wstring(opts->input_api.stmt_peek (ind));
|
||||
readCh = false;
|
||||
}
|
||||
o.wind(ind).ws("goto ").wstring(opts->labelPrefix).wlabel(to).ws(";\n");
|
||||
}
|
||||
|
||||
std::string output_hgo (OutputFile & o, uint32_t ind, bool & readCh, SwitchIf * hgo)
|
||||
{
|
||||
std::string yych = output_yych (readCh);
|
||||
if (hgo != NULL)
|
||||
{
|
||||
o.wind(ind).ws("if (").wstring(yych).ws(" & ~0xFF) {\n");
|
||||
hgo->emit (o, ind + 1, readCh);
|
||||
o.wind(ind).ws("} else ");
|
||||
yych = opts->yych;
|
||||
}
|
||||
else
|
||||
{
|
||||
o.wind(ind);
|
||||
}
|
||||
return yych;
|
||||
}
|
||||
|
||||
void Case::emit (OutputFile & o, uint32_t ind)
|
||||
{
|
||||
for (uint32_t i = 0; i < ranges.size (); ++i)
|
||||
{
|
||||
for (uint32_t b = ranges[i].first; b < ranges[i].second; ++b)
|
||||
{
|
||||
o.wind(ind).ws("case ").wc_hex (b).ws(":");
|
||||
if (opts->dFlag && opts->encoding.type () == Enc::EBCDIC)
|
||||
{
|
||||
const uint32_t c = opts->encoding.decodeUnsafe (b);
|
||||
if (is_print (c))
|
||||
o.ws(" /* ").wc(static_cast<char> (c)).ws(" */");
|
||||
}
|
||||
bool last_case = i == ranges.size () - 1 && b == ranges[i].second - 1;
|
||||
if (!last_case)
|
||||
{
|
||||
o.ws("\n");
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void Cases::emit (OutputFile & o, uint32_t ind, bool & readCh)
|
||||
{
|
||||
o.wind(ind).ws("switch (").wstring(output_yych (readCh)).ws(") {\n");
|
||||
for (uint32_t i = 0; i < cases_size; ++i)
|
||||
{
|
||||
if (cases[i].to != def)
|
||||
{
|
||||
cases[i].emit (o, ind);
|
||||
output_goto (o, 1, readCh, cases[i].to->label);
|
||||
}
|
||||
}
|
||||
o.wind(ind).ws("default:");
|
||||
output_goto (o, 1, readCh, def->label);
|
||||
o.wind(ind).ws("}\n");
|
||||
}
|
||||
|
||||
void Binary::emit (OutputFile & o, uint32_t ind, bool & readCh)
|
||||
{
|
||||
output_if (o, ind, readCh, cond->compare, cond->value);
|
||||
o.ws("{\n");
|
||||
thn->emit (o, ind + 1, readCh);
|
||||
o.wind(ind).ws("} else {\n");
|
||||
els->emit (o, ind + 1, readCh);
|
||||
o.wind(ind).ws("}\n");
|
||||
}
|
||||
|
||||
void Linear::emit (OutputFile & o, uint32_t ind, bool & readCh)
|
||||
{
|
||||
for (uint32_t i = 0; i < branches.size (); ++i)
|
||||
{
|
||||
if (branches[i].first != NULL)
|
||||
{
|
||||
output_if (o, ind, readCh, branches[i].first->compare, branches[i].first->value);
|
||||
output_goto (o, 0, readCh, branches[i].second->label);
|
||||
}
|
||||
else
|
||||
{
|
||||
output_goto (o, ind, readCh, branches[i].second->label);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void If::emit (OutputFile & o, uint32_t ind, bool & readCh)
|
||||
{
|
||||
switch (type)
|
||||
{
|
||||
case BINARY:
|
||||
info.binary->emit (o, ind, readCh);
|
||||
break;
|
||||
case LINEAR:
|
||||
info.linear->emit (o, ind, readCh);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
void SwitchIf::emit (OutputFile & o, uint32_t ind, bool & readCh)
|
||||
{
|
||||
switch (type)
|
||||
{
|
||||
case SWITCH:
|
||||
info.cases->emit (o, ind, readCh);
|
||||
break;
|
||||
case IF:
|
||||
info.ifs->emit (o, ind, readCh);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
void GoBitmap::emit (OutputFile & o, uint32_t ind, bool & readCh)
|
||||
{
|
||||
std::string yych = output_hgo (o, ind, readCh, hgo);
|
||||
o.ws("if (").wstring(opts->yybm).ws("[").wu32(bitmap->i).ws("+").wstring(yych).ws("] & ");
|
||||
if (opts->yybmHexTable)
|
||||
{
|
||||
o.wu32_hex(bitmap->m);
|
||||
}
|
||||
else
|
||||
{
|
||||
o.wu32(bitmap->m);
|
||||
}
|
||||
o.ws(") {\n");
|
||||
output_goto (o, ind + 1, readCh, bitmap_state->label);
|
||||
o.wind(ind).ws("}\n");
|
||||
if (lgo != NULL)
|
||||
{
|
||||
lgo->emit (o, ind, readCh);
|
||||
}
|
||||
}
|
||||
|
||||
label_t CpgotoTable::max_label () const
|
||||
{
|
||||
label_t max = label_t::first ();
|
||||
for (uint32_t i = 0; i < TABLE_SIZE; ++i)
|
||||
{
|
||||
if (max < table[i]->label)
|
||||
{
|
||||
max = table[i]->label;
|
||||
};
|
||||
}
|
||||
return max;
|
||||
}
|
||||
|
||||
void CpgotoTable::emit (OutputFile & o, uint32_t ind)
|
||||
{
|
||||
o.wind(ind).ws("static void *").wstring(opts->yytarget).ws("[256] = {\n");
|
||||
o.wind(++ind);
|
||||
const uint32_t max_digits = max_label ().width ();
|
||||
for (uint32_t i = 0; i < TABLE_SIZE; ++i)
|
||||
{
|
||||
o.ws("&&").wstring(opts->labelPrefix).wlabel(table[i]->label);
|
||||
if (i == TABLE_SIZE - 1)
|
||||
{
|
||||
o.ws("\n");
|
||||
}
|
||||
else if (i % 8 == 7)
|
||||
{
|
||||
o.ws(",\n").wind(ind);
|
||||
}
|
||||
else
|
||||
{
|
||||
const uint32_t padding = max_digits - table[i]->label.width () + 1;
|
||||
o.ws(",").wstring(std::string (padding, ' '));
|
||||
}
|
||||
}
|
||||
o.wind(--ind).ws("};\n");
|
||||
}
|
||||
|
||||
void Cpgoto::emit (OutputFile & o, uint32_t ind, bool & readCh)
|
||||
{
|
||||
std::string yych = output_hgo (o, ind, readCh, hgo);
|
||||
o.ws("{\n");
|
||||
table->emit (o, ++ind);
|
||||
o.wind(ind).ws("goto *").wstring(opts->yytarget).ws("[").wstring(yych).ws("];\n");
|
||||
o.wind(--ind).ws("}\n");
|
||||
}
|
||||
|
||||
void Dot::emit (OutputFile & o)
|
||||
{
|
||||
const uint32_t n = cases->cases_size;
|
||||
if (n == 1)
|
||||
{
|
||||
o.wlabel(from->label).ws(" -> ").wlabel(cases->cases[0].to->label).ws("\n");
|
||||
}
|
||||
else
|
||||
{
|
||||
for (uint32_t i = 0; i < n; ++i)
|
||||
{
|
||||
o.wlabel(from->label).ws(" -> ").wlabel(cases->cases[i].to->label).ws(" [label=\"");
|
||||
for (uint32_t j = 0; j < cases->cases[i].ranges.size (); ++j)
|
||||
{
|
||||
o.wrange(cases->cases[i].ranges[j].first, cases->cases[i].ranges[j].second);
|
||||
}
|
||||
o.ws("\"]\n");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void Go::emit (OutputFile & o, uint32_t ind, bool & readCh)
|
||||
{
|
||||
switch (type)
|
||||
{
|
||||
case EMPTY:
|
||||
break;
|
||||
case SWITCH_IF:
|
||||
info.switchif->emit (o, ind, readCh);
|
||||
break;
|
||||
case BITMAP:
|
||||
info.bitmap->emit (o, ind, readCh);
|
||||
break;
|
||||
case CPGOTO:
|
||||
info.cpgoto->emit (o, ind, readCh);
|
||||
break;
|
||||
case DOT:
|
||||
info.dot->emit (o);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace re2c
|
111
tools/re2c/src/codegen/go_used_labels.cc
Normal file
111
tools/re2c/src/codegen/go_used_labels.cc
Normal file
|
@ -0,0 +1,111 @@
|
|||
#include <stddef.h>
|
||||
#include "src/util/c99_stdint.h"
|
||||
#include <set>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
#include "src/codegen/go.h"
|
||||
#include "src/codegen/label.h"
|
||||
#include "src/ir/adfa/adfa.h"
|
||||
|
||||
namespace re2c
|
||||
{
|
||||
|
||||
void Cases::used_labels (std::set<label_t> & used)
|
||||
{
|
||||
for (uint32_t i = 0; i < cases_size; ++i)
|
||||
{
|
||||
used.insert (cases[i].to->label);
|
||||
}
|
||||
}
|
||||
|
||||
void Binary::used_labels (std::set<label_t> & used)
|
||||
{
|
||||
thn->used_labels (used);
|
||||
els->used_labels (used);
|
||||
}
|
||||
|
||||
void Linear::used_labels (std::set<label_t> & used)
|
||||
{
|
||||
for (uint32_t i = 0; i < branches.size (); ++i)
|
||||
{
|
||||
used.insert (branches[i].second->label);
|
||||
}
|
||||
}
|
||||
|
||||
void If::used_labels (std::set<label_t> & used)
|
||||
{
|
||||
switch (type)
|
||||
{
|
||||
case BINARY:
|
||||
info.binary->used_labels (used);
|
||||
break;
|
||||
case LINEAR:
|
||||
info.linear->used_labels (used);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
void SwitchIf::used_labels (std::set<label_t> & used)
|
||||
{
|
||||
switch (type)
|
||||
{
|
||||
case SWITCH:
|
||||
info.cases->used_labels (used);
|
||||
break;
|
||||
case IF:
|
||||
info.ifs->used_labels (used);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
void GoBitmap::used_labels (std::set<label_t> & used)
|
||||
{
|
||||
if (hgo != NULL)
|
||||
{
|
||||
hgo->used_labels (used);
|
||||
}
|
||||
used.insert (bitmap_state->label);
|
||||
if (lgo != NULL)
|
||||
{
|
||||
lgo->used_labels (used);
|
||||
}
|
||||
}
|
||||
|
||||
void CpgotoTable::used_labels (std::set<label_t> & used)
|
||||
{
|
||||
for (uint32_t i = 0; i < TABLE_SIZE; ++i)
|
||||
{
|
||||
used.insert (table[i]->label);
|
||||
}
|
||||
}
|
||||
|
||||
void Cpgoto::used_labels (std::set<label_t> & used)
|
||||
{
|
||||
if (hgo != NULL)
|
||||
{
|
||||
hgo->used_labels (used);
|
||||
}
|
||||
table->used_labels (used);
|
||||
}
|
||||
|
||||
void Go::used_labels (std::set<label_t> & used)
|
||||
{
|
||||
switch (type)
|
||||
{
|
||||
case EMPTY:
|
||||
case DOT:
|
||||
break;
|
||||
case SWITCH_IF:
|
||||
info.switchif->used_labels (used);
|
||||
break;
|
||||
case BITMAP:
|
||||
info.bitmap->used_labels (used);
|
||||
break;
|
||||
case CPGOTO:
|
||||
info.cpgoto->used_labels (used);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace re2c
|
24
tools/re2c/src/codegen/indent.h
Normal file
24
tools/re2c/src/codegen/indent.h
Normal file
|
@ -0,0 +1,24 @@
|
|||
#ifndef _RE2C_CODEGEN_INDENT_
|
||||
#define _RE2C_CODEGEN_INDENT_
|
||||
|
||||
#include <string>
|
||||
|
||||
#include "src/globals.h"
|
||||
|
||||
namespace re2c
|
||||
{
|
||||
|
||||
inline std::string indent (uint32_t ind)
|
||||
{
|
||||
std::string str;
|
||||
|
||||
while (opts->target != opt_t::DOT && ind-- > 0)
|
||||
{
|
||||
str += opts->indString;
|
||||
}
|
||||
return str;
|
||||
}
|
||||
|
||||
} // end namespace re2c
|
||||
|
||||
#endif // _RE2C_CODEGEN_INDENT_
|
175
tools/re2c/src/codegen/input_api.cc
Normal file
175
tools/re2c/src/codegen/input_api.cc
Normal file
|
@ -0,0 +1,175 @@
|
|||
#include <sstream>
|
||||
|
||||
#include "src/codegen/input_api.h"
|
||||
#include "src/codegen/indent.h"
|
||||
#include "src/conf/opt.h"
|
||||
#include "src/globals.h"
|
||||
|
||||
namespace re2c
|
||||
{
|
||||
|
||||
InputAPI::InputAPI ()
|
||||
: type_ (DEFAULT)
|
||||
{}
|
||||
|
||||
InputAPI::type_t InputAPI::type () const
|
||||
{
|
||||
return type_;
|
||||
}
|
||||
|
||||
void InputAPI::set (type_t t)
|
||||
{
|
||||
type_ = t;
|
||||
}
|
||||
|
||||
std::string InputAPI::expr_peek () const
|
||||
{
|
||||
std::string s;
|
||||
switch (type_)
|
||||
{
|
||||
case DEFAULT:
|
||||
s = "*" + opts->yycursor;
|
||||
break;
|
||||
case CUSTOM:
|
||||
s = opts->yypeek + " ()";
|
||||
break;
|
||||
}
|
||||
return s;
|
||||
}
|
||||
|
||||
std::string InputAPI::expr_peek_save () const
|
||||
{
|
||||
return opts->yych + " = " + opts.yychConversion () + expr_peek ();
|
||||
}
|
||||
|
||||
std::string InputAPI::stmt_peek (uint32_t ind) const
|
||||
{
|
||||
return indent (ind) + expr_peek_save () + ";\n";
|
||||
}
|
||||
|
||||
std::string InputAPI::stmt_skip (uint32_t ind) const
|
||||
{
|
||||
std::string s;
|
||||
switch (type_)
|
||||
{
|
||||
case DEFAULT:
|
||||
s = "++" + opts->yycursor;
|
||||
break;
|
||||
case CUSTOM:
|
||||
s = opts->yyskip + " ()";
|
||||
break;
|
||||
}
|
||||
return indent (ind) + s + ";\n";
|
||||
}
|
||||
|
||||
std::string InputAPI::stmt_backup (uint32_t ind) const
|
||||
{
|
||||
std::string s;
|
||||
switch (type_)
|
||||
{
|
||||
case DEFAULT:
|
||||
s = opts->yymarker + " = " + opts->yycursor;
|
||||
break;
|
||||
case CUSTOM:
|
||||
s = opts->yybackup + " ()";
|
||||
break;
|
||||
}
|
||||
return indent (ind) + s + ";\n";
|
||||
}
|
||||
|
||||
std::string InputAPI::stmt_backupctx (uint32_t ind) const
|
||||
{
|
||||
std::string s;
|
||||
switch (type_)
|
||||
{
|
||||
case DEFAULT:
|
||||
s = opts->yyctxmarker + " = " + opts->yycursor;
|
||||
break;
|
||||
case CUSTOM:
|
||||
s = opts->yybackupctx + " ()";
|
||||
break;
|
||||
}
|
||||
return indent (ind) + s + ";\n";
|
||||
}
|
||||
|
||||
std::string InputAPI::stmt_restore (uint32_t ind) const
|
||||
{
|
||||
std::string s;
|
||||
switch (type_)
|
||||
{
|
||||
case DEFAULT:
|
||||
s = opts->yycursor + " = " + opts->yymarker;
|
||||
break;
|
||||
case CUSTOM:
|
||||
s = opts->yyrestore + " ()";
|
||||
break;
|
||||
}
|
||||
return indent (ind) + s + ";\n";
|
||||
}
|
||||
|
||||
std::string InputAPI::stmt_restorectx (uint32_t ind) const
|
||||
{
|
||||
std::string s;
|
||||
switch (type_)
|
||||
{
|
||||
case DEFAULT:
|
||||
s = indent (ind) + opts->yycursor + " = " + opts->yyctxmarker + ";\n";
|
||||
break;
|
||||
case CUSTOM:
|
||||
s = indent (ind) + opts->yyrestorectx + " ();\n";
|
||||
break;
|
||||
}
|
||||
return s;
|
||||
}
|
||||
|
||||
std::string InputAPI::stmt_skip_peek (uint32_t ind) const
|
||||
{
|
||||
return type_ == DEFAULT
|
||||
? indent (ind) + opts->yych + " = " + opts.yychConversion () + "*++" + opts->yycursor + ";\n"
|
||||
: stmt_skip (ind) + stmt_peek (ind);
|
||||
}
|
||||
|
||||
std::string InputAPI::stmt_skip_backup (uint32_t ind) const
|
||||
{
|
||||
return type_ == DEFAULT
|
||||
? indent (ind) + opts->yymarker + " = ++" + opts->yycursor + ";\n"
|
||||
: stmt_skip (ind) + stmt_backup (ind);
|
||||
}
|
||||
|
||||
std::string InputAPI::stmt_backup_peek (uint32_t ind) const
|
||||
{
|
||||
return type_ == DEFAULT
|
||||
? indent (ind) + opts->yych + " = " + opts.yychConversion () + "*(" + opts->yymarker + " = " + opts->yycursor + ");\n"
|
||||
: stmt_backup (ind) + stmt_peek (ind);
|
||||
}
|
||||
|
||||
std::string InputAPI::stmt_skip_backup_peek (uint32_t ind) const
|
||||
{
|
||||
return type_ == DEFAULT
|
||||
? indent (ind) + opts->yych + " = " + opts.yychConversion () + "*(" + opts->yymarker + " = ++" + opts->yycursor + ");\n"
|
||||
: stmt_skip (ind) + stmt_backup (ind) + stmt_peek (ind);
|
||||
}
|
||||
|
||||
std::string InputAPI::expr_lessthan_one () const
|
||||
{
|
||||
return type_ == DEFAULT
|
||||
? opts->yylimit + " <= " + opts->yycursor
|
||||
: expr_lessthan (1);
|
||||
}
|
||||
|
||||
std::string InputAPI::expr_lessthan (size_t n) const
|
||||
{
|
||||
std::ostringstream s;
|
||||
switch (type_)
|
||||
{
|
||||
case DEFAULT:
|
||||
s << "(" << opts->yylimit << " - " << opts->yycursor << ") < " << n;
|
||||
break;
|
||||
case CUSTOM:
|
||||
s << opts->yylessthan << " (" << n << ")";
|
||||
break;
|
||||
}
|
||||
return s.str ();
|
||||
}
|
||||
|
||||
} // end namespace re2c
|
43
tools/re2c/src/codegen/input_api.h
Normal file
43
tools/re2c/src/codegen/input_api.h
Normal file
|
@ -0,0 +1,43 @@
|
|||
#ifndef _RE2C_CODEGEN_INPUT_API_
|
||||
#define _RE2C_CODEGEN_INPUT_API_
|
||||
|
||||
#include "src/util/c99_stdint.h"
|
||||
#include <string>
|
||||
|
||||
namespace re2c
|
||||
{
|
||||
|
||||
class InputAPI
|
||||
{
|
||||
public:
|
||||
enum type_t
|
||||
{ DEFAULT
|
||||
, CUSTOM
|
||||
};
|
||||
|
||||
private:
|
||||
type_t type_;
|
||||
|
||||
public:
|
||||
InputAPI ();
|
||||
type_t type () const;
|
||||
void set (type_t t);
|
||||
std::string expr_peek () const;
|
||||
std::string expr_peek_save () const;
|
||||
std::string stmt_peek (uint32_t ind) const;
|
||||
std::string stmt_skip (uint32_t ind) const;
|
||||
std::string stmt_backup (uint32_t ind) const;
|
||||
std::string stmt_backupctx (uint32_t ind) const;
|
||||
std::string stmt_restore (uint32_t ind) const;
|
||||
std::string stmt_restorectx (uint32_t ind) const;
|
||||
std::string stmt_skip_peek (uint32_t ind) const;
|
||||
std::string stmt_skip_backup (uint32_t ind) const;
|
||||
std::string stmt_backup_peek (uint32_t ind) const;
|
||||
std::string stmt_skip_backup_peek (uint32_t ind) const;
|
||||
std::string expr_lessthan_one () const;
|
||||
std::string expr_lessthan (size_t n) const;
|
||||
};
|
||||
|
||||
} // end namespace re2c
|
||||
|
||||
#endif // _RE2C_CODEGEN_INPUT_API_
|
42
tools/re2c/src/codegen/label.cc
Normal file
42
tools/re2c/src/codegen/label.cc
Normal file
|
@ -0,0 +1,42 @@
|
|||
#include <ostream>
|
||||
|
||||
#include "src/codegen/label.h"
|
||||
|
||||
namespace re2c {
|
||||
|
||||
const uint32_t label_t::FIRST = 0;
|
||||
|
||||
label_t::label_t ()
|
||||
: value (FIRST)
|
||||
{}
|
||||
|
||||
void label_t::inc ()
|
||||
{
|
||||
++value;
|
||||
}
|
||||
|
||||
label_t label_t::first ()
|
||||
{
|
||||
return label_t ();
|
||||
}
|
||||
|
||||
bool label_t::operator < (const label_t & l) const
|
||||
{
|
||||
return value < l.value;
|
||||
}
|
||||
|
||||
uint32_t label_t::width () const
|
||||
{
|
||||
uint32_t v = value;
|
||||
uint32_t n = 0;
|
||||
while (v /= 10) ++n;
|
||||
return n;
|
||||
}
|
||||
|
||||
std::ostream & operator << (std::ostream & o, label_t l)
|
||||
{
|
||||
o << l.value;
|
||||
return o;
|
||||
}
|
||||
|
||||
} // namespace re2c
|
39
tools/re2c/src/codegen/label.h
Normal file
39
tools/re2c/src/codegen/label.h
Normal file
|
@ -0,0 +1,39 @@
|
|||
#ifndef _RE2C_CODEGEN_LABEL_
|
||||
#define _RE2C_CODEGEN_LABEL_
|
||||
|
||||
#include <iosfwd> // ostream
|
||||
|
||||
#include "src/util/c99_stdint.h"
|
||||
|
||||
namespace re2c {
|
||||
|
||||
template <typename num_t> class counter_t;
|
||||
|
||||
// label public API:
|
||||
// - get first label
|
||||
// - compare labels
|
||||
// - get label width
|
||||
// - output label to std::ostream
|
||||
//
|
||||
// label private API (for label counter):
|
||||
// - get initial label
|
||||
// - get next label
|
||||
class label_t
|
||||
{
|
||||
static const uint32_t FIRST;
|
||||
uint32_t value;
|
||||
label_t ();
|
||||
void inc ();
|
||||
|
||||
public:
|
||||
static label_t first ();
|
||||
bool operator < (const label_t & l) const;
|
||||
uint32_t width () const;
|
||||
friend std::ostream & operator << (std::ostream & o, label_t l);
|
||||
|
||||
friend class counter_t<label_t>;
|
||||
};
|
||||
|
||||
} // namespace re2c
|
||||
|
||||
#endif // _RE2C_CODEGEN_LABEL_
|
465
tools/re2c/src/codegen/output.cc
Normal file
465
tools/re2c/src/codegen/output.cc
Normal file
|
@ -0,0 +1,465 @@
|
|||
#include <stdio.h>
|
||||
#include <time.h>
|
||||
#include <iomanip>
|
||||
|
||||
#include "src/codegen/indent.h"
|
||||
#include "src/codegen/output.h"
|
||||
#include "src/codegen/print.h"
|
||||
#include "src/conf/opt.h"
|
||||
#include "src/conf/warn.h"
|
||||
#include "src/globals.h"
|
||||
#include "src/ir/rule_rank.h"
|
||||
|
||||
namespace re2c
|
||||
{
|
||||
|
||||
OutputFragment::OutputFragment (type_t t, uint32_t i)
|
||||
: type (t)
|
||||
, stream ()
|
||||
, indent (i)
|
||||
{}
|
||||
|
||||
uint32_t OutputFragment::count_lines ()
|
||||
{
|
||||
uint32_t lines = 0;
|
||||
const std::string content = stream.str ();
|
||||
const char * p = content.c_str ();
|
||||
for (uint32_t i = 0; i < content.size (); ++i)
|
||||
{
|
||||
if (p[i] == '\n')
|
||||
{
|
||||
++lines;
|
||||
}
|
||||
}
|
||||
return lines;
|
||||
}
|
||||
|
||||
OutputBlock::OutputBlock ()
|
||||
: fragments ()
|
||||
, used_yyaccept (false)
|
||||
, force_start_label (false)
|
||||
, user_start_label ()
|
||||
, line (0)
|
||||
{
|
||||
fragments.push_back (new OutputFragment (OutputFragment::CODE, 0));
|
||||
}
|
||||
|
||||
OutputBlock::~OutputBlock ()
|
||||
{
|
||||
for (unsigned int i = 0; i < fragments.size (); ++i)
|
||||
{
|
||||
delete fragments[i];
|
||||
}
|
||||
}
|
||||
|
||||
OutputFile::OutputFile (const char * fn)
|
||||
: file_name (fn)
|
||||
, file (NULL)
|
||||
, blocks ()
|
||||
, label_counter ()
|
||||
, warn_condition_order (!opts->tFlag) // see note [condition order]
|
||||
{
|
||||
new_block ();
|
||||
}
|
||||
|
||||
bool OutputFile::open ()
|
||||
{
|
||||
if (file_name == NULL)
|
||||
{
|
||||
file_name = "<stdout>";
|
||||
file = stdout;
|
||||
}
|
||||
else
|
||||
{
|
||||
file = fopen (file_name, "wb");
|
||||
}
|
||||
return file != NULL;
|
||||
}
|
||||
|
||||
OutputFile::~OutputFile ()
|
||||
{
|
||||
if (file != NULL && file != stdout)
|
||||
{
|
||||
fclose (file);
|
||||
}
|
||||
for (unsigned int i = 0; i < blocks.size (); ++i)
|
||||
{
|
||||
delete blocks[i];
|
||||
}
|
||||
}
|
||||
|
||||
std::ostream & OutputFile::stream ()
|
||||
{
|
||||
return blocks.back ()->fragments.back ()->stream;
|
||||
}
|
||||
|
||||
OutputFile & OutputFile::wraw (const char * s, size_t n)
|
||||
{
|
||||
stream ().write (s, static_cast<std::streamsize> (n));
|
||||
return *this;
|
||||
}
|
||||
|
||||
OutputFile & OutputFile::wu32_hex (uint32_t n)
|
||||
{
|
||||
prtHex (stream (), n);
|
||||
return *this;
|
||||
}
|
||||
|
||||
OutputFile & OutputFile::wc_hex (uint32_t n)
|
||||
{
|
||||
prtChOrHex (stream (), n);
|
||||
return *this;
|
||||
}
|
||||
|
||||
OutputFile & OutputFile::wrange (uint32_t l, uint32_t u)
|
||||
{
|
||||
printSpan (stream (), l, u);
|
||||
return *this;
|
||||
}
|
||||
|
||||
OutputFile & OutputFile::wu32_width (uint32_t n, int w)
|
||||
{
|
||||
stream () << std::setw (w);
|
||||
stream () << n;
|
||||
return *this;
|
||||
}
|
||||
|
||||
OutputFile & OutputFile::wline_info (uint32_t l, const char * fn)
|
||||
{
|
||||
output_line_info (stream (), l, fn);
|
||||
return *this;
|
||||
}
|
||||
|
||||
OutputFile & OutputFile::wversion_time ()
|
||||
{
|
||||
output_version_time (stream ());
|
||||
return *this;
|
||||
}
|
||||
|
||||
OutputFile & OutputFile::wuser_start_label ()
|
||||
{
|
||||
const std::string label = blocks.back ()->user_start_label;
|
||||
if (!label.empty ())
|
||||
{
|
||||
wstring(label).ws(":\n");
|
||||
}
|
||||
return *this;
|
||||
}
|
||||
|
||||
OutputFile & OutputFile::wc (char c)
|
||||
{
|
||||
stream () << c;
|
||||
return *this;
|
||||
}
|
||||
|
||||
OutputFile & OutputFile::wu32 (uint32_t n)
|
||||
{
|
||||
stream () << n;
|
||||
return *this;
|
||||
}
|
||||
|
||||
OutputFile & OutputFile::wu64 (uint64_t n)
|
||||
{
|
||||
stream () << n;
|
||||
return *this;
|
||||
}
|
||||
|
||||
OutputFile & OutputFile::wstring (const std::string & s)
|
||||
{
|
||||
stream () << s;
|
||||
return *this;
|
||||
}
|
||||
|
||||
OutputFile & OutputFile::ws (const char * s)
|
||||
{
|
||||
stream () << s;
|
||||
return *this;
|
||||
}
|
||||
|
||||
OutputFile & OutputFile::wlabel (label_t l)
|
||||
{
|
||||
stream () << l;
|
||||
return *this;
|
||||
}
|
||||
|
||||
OutputFile & OutputFile::wrank (rule_rank_t r)
|
||||
{
|
||||
stream () << r;
|
||||
return *this;
|
||||
}
|
||||
|
||||
OutputFile & OutputFile::wind (uint32_t ind)
|
||||
{
|
||||
stream () << indent(ind);
|
||||
return *this;
|
||||
}
|
||||
|
||||
void OutputFile::insert_code ()
|
||||
{
|
||||
blocks.back ()->fragments.push_back (new OutputFragment (OutputFragment::CODE, 0));
|
||||
}
|
||||
|
||||
OutputFile & OutputFile::wdelay_line_info ()
|
||||
{
|
||||
blocks.back ()->fragments.push_back (new OutputFragment (OutputFragment::LINE_INFO, 0));
|
||||
insert_code ();
|
||||
return *this;
|
||||
}
|
||||
|
||||
OutputFile & OutputFile::wdelay_state_goto (uint32_t ind)
|
||||
{
|
||||
if (opts->fFlag && !bWroteGetState)
|
||||
{
|
||||
blocks.back ()->fragments.push_back (new OutputFragment (OutputFragment::STATE_GOTO, ind));
|
||||
insert_code ();
|
||||
bWroteGetState = true;
|
||||
}
|
||||
return *this;
|
||||
}
|
||||
|
||||
OutputFile & OutputFile::wdelay_types ()
|
||||
{
|
||||
warn_condition_order = false; // see note [condition order]
|
||||
blocks.back ()->fragments.push_back (new OutputFragment (OutputFragment::TYPES, 0));
|
||||
insert_code ();
|
||||
return *this;
|
||||
}
|
||||
|
||||
OutputFile & OutputFile::wdelay_warn_condition_order ()
|
||||
{
|
||||
blocks.back ()->fragments.push_back (new OutputFragment (OutputFragment::WARN_CONDITION_ORDER, 0));
|
||||
insert_code ();
|
||||
return *this;
|
||||
}
|
||||
|
||||
OutputFile & OutputFile::wdelay_yyaccept_init (uint32_t ind)
|
||||
{
|
||||
blocks.back ()->fragments.push_back (new OutputFragment (OutputFragment::YYACCEPT_INIT, ind));
|
||||
insert_code ();
|
||||
return *this;
|
||||
}
|
||||
|
||||
OutputFile & OutputFile::wdelay_yymaxfill ()
|
||||
{
|
||||
blocks.back ()->fragments.push_back (new OutputFragment (OutputFragment::YYMAXFILL, 0));
|
||||
insert_code ();
|
||||
return *this;
|
||||
}
|
||||
|
||||
void OutputFile::set_used_yyaccept ()
|
||||
{
|
||||
blocks.back ()->used_yyaccept = true;
|
||||
}
|
||||
|
||||
bool OutputFile::get_used_yyaccept () const
|
||||
{
|
||||
return blocks.back ()->used_yyaccept;
|
||||
}
|
||||
|
||||
void OutputFile::set_force_start_label (bool force)
|
||||
{
|
||||
blocks.back ()->force_start_label = force;
|
||||
}
|
||||
|
||||
void OutputFile::set_user_start_label (const std::string & label)
|
||||
{
|
||||
blocks.back ()->user_start_label = label;
|
||||
}
|
||||
|
||||
bool OutputFile::get_force_start_label () const
|
||||
{
|
||||
return blocks.back ()->force_start_label;
|
||||
}
|
||||
|
||||
void OutputFile::set_block_line (uint32_t l)
|
||||
{
|
||||
blocks.back ()->line = l;
|
||||
}
|
||||
|
||||
uint32_t OutputFile::get_block_line () const
|
||||
{
|
||||
return blocks.back ()->line;
|
||||
}
|
||||
|
||||
void OutputFile::new_block ()
|
||||
{
|
||||
blocks.push_back (new OutputBlock ());
|
||||
insert_code ();
|
||||
}
|
||||
|
||||
void OutputFile::emit
|
||||
( const std::vector<std::string> & types
|
||||
, size_t max_fill
|
||||
)
|
||||
{
|
||||
if (file != NULL)
|
||||
{
|
||||
unsigned int line_count = 1;
|
||||
for (unsigned int j = 0; j < blocks.size (); ++j)
|
||||
{
|
||||
OutputBlock & b = * blocks[j];
|
||||
for (unsigned int i = 0; i < b.fragments.size (); ++i)
|
||||
{
|
||||
OutputFragment & f = * b.fragments[i];
|
||||
switch (f.type)
|
||||
{
|
||||
case OutputFragment::CODE:
|
||||
break;
|
||||
case OutputFragment::LINE_INFO:
|
||||
output_line_info (f.stream, line_count + 1, file_name);
|
||||
break;
|
||||
case OutputFragment::STATE_GOTO:
|
||||
output_state_goto (f.stream, f.indent, 0);
|
||||
break;
|
||||
case OutputFragment::TYPES:
|
||||
output_types (f.stream, f.indent, types);
|
||||
break;
|
||||
case OutputFragment::WARN_CONDITION_ORDER:
|
||||
if (warn_condition_order) // see note [condition order]
|
||||
{
|
||||
warn.condition_order (b.line);
|
||||
}
|
||||
break;
|
||||
case OutputFragment::YYACCEPT_INIT:
|
||||
output_yyaccept_init (f.stream, f.indent, b.used_yyaccept);
|
||||
break;
|
||||
case OutputFragment::YYMAXFILL:
|
||||
output_yymaxfill (f.stream, max_fill);
|
||||
break;
|
||||
}
|
||||
std::string content = f.stream.str ();
|
||||
fwrite (content.c_str (), 1, content.size (), file);
|
||||
line_count += f.count_lines ();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
HeaderFile::HeaderFile (const char * fn)
|
||||
: stream ()
|
||||
// header is always generated, but not always dumped to file
|
||||
// NULL filename crashes 'operator <<' on some platforms
|
||||
// TODO: generate header only if necessary
|
||||
, file_name (fn ? fn : "<stdout>.h")
|
||||
, file (NULL)
|
||||
{}
|
||||
|
||||
bool HeaderFile::open ()
|
||||
{
|
||||
file = fopen (file_name, "wb");
|
||||
return file != NULL;
|
||||
}
|
||||
|
||||
void HeaderFile::emit (const std::vector<std::string> & types)
|
||||
{
|
||||
output_version_time (stream);
|
||||
output_line_info (stream, 3, file_name);
|
||||
stream << "\n";
|
||||
output_types (stream, 0, types);
|
||||
}
|
||||
|
||||
HeaderFile::~HeaderFile ()
|
||||
{
|
||||
if (file != NULL)
|
||||
{
|
||||
std::string content = stream.str ();
|
||||
fwrite (content.c_str (), 1, content.size (), file);
|
||||
fclose (file);
|
||||
}
|
||||
}
|
||||
|
||||
Output::Output (const char * source_name, const char * header_name)
|
||||
: source (source_name)
|
||||
, header (header_name)
|
||||
, types ()
|
||||
, skeletons ()
|
||||
, max_fill (1)
|
||||
{}
|
||||
|
||||
Output::~Output ()
|
||||
{
|
||||
if (!warn.error ())
|
||||
{
|
||||
source.emit (types, max_fill);
|
||||
header.emit (types);
|
||||
}
|
||||
}
|
||||
|
||||
void output_state_goto (std::ostream & o, uint32_t ind, uint32_t start_label)
|
||||
{
|
||||
o << indent(ind) << "switch (" << output_get_state() << ") {\n";
|
||||
if (opts->bUseStateAbort)
|
||||
{
|
||||
o << indent(ind) << "default: abort();\n";
|
||||
o << indent(ind) << "case -1: goto " << opts->labelPrefix << start_label << ";\n";
|
||||
}
|
||||
else
|
||||
{
|
||||
o << indent(ind) << "default: goto " << opts->labelPrefix << start_label << ";\n";
|
||||
}
|
||||
for (uint32_t i = 0; i < last_fill_index; ++i)
|
||||
{
|
||||
o << indent(ind) << "case " << i << ": goto " << opts->yyfilllabel << i << ";\n";
|
||||
}
|
||||
o << indent(ind) << "}\n";
|
||||
if (opts->bUseStateNext)
|
||||
{
|
||||
o << opts->yynext << ":\n";
|
||||
}
|
||||
}
|
||||
|
||||
void output_yyaccept_init (std::ostream & o, uint32_t ind, bool used_yyaccept)
|
||||
{
|
||||
if (used_yyaccept)
|
||||
{
|
||||
o << indent (ind) << "unsigned int " << opts->yyaccept << " = 0;\n";
|
||||
}
|
||||
}
|
||||
|
||||
void output_yymaxfill (std::ostream & o, size_t max_fill)
|
||||
{
|
||||
o << "#define YYMAXFILL " << max_fill << "\n";
|
||||
}
|
||||
|
||||
void output_line_info (std::ostream & o, uint32_t line_number, const char * file_name)
|
||||
{
|
||||
if (!opts->iFlag)
|
||||
{
|
||||
o << "#line " << line_number << " \"" << file_name << "\"\n";
|
||||
}
|
||||
}
|
||||
|
||||
void output_types (std::ostream & o, uint32_t ind, const std::vector<std::string> & types)
|
||||
{
|
||||
o << indent (ind++) << "enum " << opts->yycondtype << " {\n";
|
||||
for (unsigned int i = 0; i < types.size (); ++i)
|
||||
{
|
||||
o << indent (ind) << opts->condEnumPrefix << types[i] << ",\n";
|
||||
}
|
||||
o << indent (--ind) << "};\n";
|
||||
}
|
||||
|
||||
void output_version_time (std::ostream & o)
|
||||
{
|
||||
o << "/* Generated by re2c";
|
||||
if (opts->version)
|
||||
{
|
||||
o << " " << PACKAGE_VERSION;
|
||||
}
|
||||
if (!opts->bNoGenerationDate)
|
||||
{
|
||||
o << " on ";
|
||||
time_t now = time (NULL);
|
||||
o.write (ctime (&now), 24);
|
||||
}
|
||||
o << " */" << "\n";
|
||||
}
|
||||
|
||||
std::string output_get_state ()
|
||||
{
|
||||
return opts->state_get_naked
|
||||
? opts->state_get
|
||||
: opts->state_get + "()";
|
||||
}
|
||||
|
||||
} // namespace re2c
|
158
tools/re2c/src/codegen/output.h
Normal file
158
tools/re2c/src/codegen/output.h
Normal file
|
@ -0,0 +1,158 @@
|
|||
#ifndef _RE2C_CODEGEN_OUTPUT_
|
||||
#define _RE2C_CODEGEN_OUTPUT_
|
||||
|
||||
#include "src/util/c99_stdint.h"
|
||||
#include <stddef.h>
|
||||
#include <stdio.h>
|
||||
#include <fstream>
|
||||
#include <set>
|
||||
#include <sstream>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include "src/codegen/label.h"
|
||||
#include "src/util/counter.h"
|
||||
#include "src/util/forbid_copy.h"
|
||||
|
||||
namespace re2c
|
||||
{
|
||||
|
||||
class rule_rank_t;
|
||||
|
||||
struct OutputFragment
|
||||
{
|
||||
enum type_t
|
||||
{ CODE
|
||||
// , CONFIG
|
||||
, LINE_INFO
|
||||
, STATE_GOTO
|
||||
, TYPES
|
||||
, WARN_CONDITION_ORDER
|
||||
, YYACCEPT_INIT
|
||||
, YYMAXFILL
|
||||
};
|
||||
|
||||
type_t type;
|
||||
std::ostringstream stream;
|
||||
uint32_t indent;
|
||||
|
||||
OutputFragment (type_t t, uint32_t i);
|
||||
uint32_t count_lines ();
|
||||
};
|
||||
|
||||
struct OutputBlock
|
||||
{
|
||||
std::vector<OutputFragment *> fragments;
|
||||
bool used_yyaccept;
|
||||
bool force_start_label;
|
||||
std::string user_start_label;
|
||||
uint32_t line;
|
||||
|
||||
OutputBlock ();
|
||||
~OutputBlock ();
|
||||
};
|
||||
|
||||
struct OutputFile
|
||||
{
|
||||
public:
|
||||
const char * file_name;
|
||||
|
||||
private:
|
||||
FILE * file;
|
||||
std::vector<OutputBlock *> blocks;
|
||||
|
||||
public:
|
||||
counter_t<label_t> label_counter;
|
||||
bool warn_condition_order;
|
||||
|
||||
private:
|
||||
std::ostream & stream ();
|
||||
void insert_code ();
|
||||
|
||||
public:
|
||||
OutputFile (const char * fn);
|
||||
~OutputFile ();
|
||||
|
||||
bool open ();
|
||||
|
||||
void new_block ();
|
||||
|
||||
// immediate output
|
||||
OutputFile & wraw (const char * s, size_t n);
|
||||
OutputFile & wc (char c);
|
||||
OutputFile & wc_hex (uint32_t n);
|
||||
OutputFile & wu32 (uint32_t n);
|
||||
OutputFile & wu32_hex (uint32_t n);
|
||||
OutputFile & wu32_width (uint32_t n, int w);
|
||||
OutputFile & wu64 (uint64_t n);
|
||||
OutputFile & wstring (const std::string & s);
|
||||
OutputFile & ws (const char * s);
|
||||
OutputFile & wlabel (label_t l);
|
||||
OutputFile & wrank (rule_rank_t l);
|
||||
OutputFile & wrange (uint32_t u, uint32_t l);
|
||||
OutputFile & wline_info (uint32_t l, const char * fn);
|
||||
OutputFile & wversion_time ();
|
||||
OutputFile & wuser_start_label ();
|
||||
OutputFile & wind (uint32_t ind);
|
||||
|
||||
// delayed output
|
||||
OutputFile & wdelay_line_info ();
|
||||
OutputFile & wdelay_state_goto (uint32_t ind);
|
||||
OutputFile & wdelay_types ();
|
||||
OutputFile & wdelay_warn_condition_order ();
|
||||
OutputFile & wdelay_yyaccept_init (uint32_t ind);
|
||||
OutputFile & wdelay_yymaxfill ();
|
||||
|
||||
void set_used_yyaccept ();
|
||||
bool get_used_yyaccept () const;
|
||||
void set_force_start_label (bool force);
|
||||
void set_user_start_label (const std::string & label);
|
||||
bool get_force_start_label () const;
|
||||
void set_block_line (uint32_t l);
|
||||
uint32_t get_block_line () const;
|
||||
|
||||
void emit (const std::vector<std::string> & types, size_t max_fill);
|
||||
|
||||
FORBID_COPY (OutputFile);
|
||||
};
|
||||
|
||||
struct HeaderFile
|
||||
{
|
||||
HeaderFile (const char * fn);
|
||||
~HeaderFile ();
|
||||
bool open ();
|
||||
void emit (const std::vector<std::string> & types);
|
||||
|
||||
private:
|
||||
std::ostringstream stream;
|
||||
const char * file_name;
|
||||
FILE * file;
|
||||
|
||||
FORBID_COPY (HeaderFile);
|
||||
};
|
||||
|
||||
struct Output
|
||||
{
|
||||
OutputFile source;
|
||||
HeaderFile header;
|
||||
std::vector<std::string> types;
|
||||
std::set<std::string> skeletons;
|
||||
size_t max_fill;
|
||||
|
||||
Output (const char * source_name, const char * header_name);
|
||||
~Output ();
|
||||
};
|
||||
|
||||
void output_line_info (std::ostream &, uint32_t, const char *);
|
||||
void output_state_goto (std::ostream &, uint32_t, uint32_t);
|
||||
void output_types (std::ostream &, uint32_t, const std::vector<std::string> &);
|
||||
void output_version_time (std::ostream &);
|
||||
void output_yyaccept_init (std::ostream &, uint32_t, bool);
|
||||
void output_yymaxfill (std::ostream &, size_t);
|
||||
|
||||
// helpers
|
||||
std::string output_get_state ();
|
||||
|
||||
} // namespace re2c
|
||||
|
||||
#endif // _RE2C_CODEGEN_OUTPUT_
|
156
tools/re2c/src/codegen/print.cc
Normal file
156
tools/re2c/src/codegen/print.cc
Normal file
|
@ -0,0 +1,156 @@
|
|||
#include <iostream>
|
||||
|
||||
#include "src/codegen/print.h"
|
||||
#include "src/conf/opt.h"
|
||||
#include "src/globals.h"
|
||||
#include "src/ir/regexp/encoding/enc.h"
|
||||
|
||||
namespace re2c
|
||||
{
|
||||
|
||||
bool is_print (uint32_t c)
|
||||
{
|
||||
return c >= 0x20 && c < 0x7F;
|
||||
}
|
||||
|
||||
bool is_space (uint32_t c)
|
||||
{
|
||||
switch (c)
|
||||
{
|
||||
case '\t':
|
||||
case '\f':
|
||||
case '\v':
|
||||
case '\n':
|
||||
case '\r':
|
||||
case ' ':
|
||||
return true;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
char hexCh(uint32_t c)
|
||||
{
|
||||
static const char * sHex = "0123456789ABCDEF";
|
||||
return sHex[c & 0x0F];
|
||||
}
|
||||
|
||||
void prtChOrHex(std::ostream& o, uint32_t c)
|
||||
{
|
||||
if (opts->encoding.type () != Enc::EBCDIC
|
||||
&& (is_print (c) || is_space (c)))
|
||||
{
|
||||
o << '\'';
|
||||
prtCh(o, c);
|
||||
o << '\'';
|
||||
}
|
||||
else
|
||||
{
|
||||
prtHex(o, c);
|
||||
}
|
||||
}
|
||||
|
||||
void prtHex(std::ostream& o, uint32_t c)
|
||||
{
|
||||
o << "0x";
|
||||
const uint32_t cunit_size = opts->encoding.szCodeUnit ();
|
||||
if (cunit_size >= 4)
|
||||
{
|
||||
o << hexCh (c >> 28u)
|
||||
<< hexCh (c >> 24u)
|
||||
<< hexCh (c >> 20u)
|
||||
<< hexCh (c >> 16u);
|
||||
}
|
||||
if (cunit_size >= 2)
|
||||
{
|
||||
o << hexCh (c >> 12u)
|
||||
<< hexCh (c >> 8u);
|
||||
}
|
||||
o << hexCh (c >> 4u)
|
||||
<< hexCh (c);
|
||||
}
|
||||
|
||||
void prtCh(std::ostream& o, uint32_t c)
|
||||
{
|
||||
const bool dot = opts->target == opt_t::DOT;
|
||||
|
||||
switch (c)
|
||||
{
|
||||
case '\'':
|
||||
o << (dot ? "'" : "\\'");
|
||||
break;
|
||||
|
||||
case '"':
|
||||
o << (dot ? "\\\"" : "\"");
|
||||
break;
|
||||
|
||||
case '\n':
|
||||
o << (dot ? "\\\\n" : "\\n");
|
||||
break;
|
||||
|
||||
case '\t':
|
||||
o << (dot ? "\\\\t" : "\\t");
|
||||
break;
|
||||
|
||||
case '\v':
|
||||
o << (dot ? "\\\\v" : "\\v");
|
||||
break;
|
||||
|
||||
case '\b':
|
||||
o << (dot ? "\\\\b" : "\\b");
|
||||
break;
|
||||
|
||||
case '\r':
|
||||
o << (dot ? "\\\\r" : "\\r");
|
||||
break;
|
||||
|
||||
case '\f':
|
||||
o << (dot ? "\\\\f" : "\\f");
|
||||
break;
|
||||
|
||||
case '\a':
|
||||
o << (dot ? "\\\\a" :"\\a");
|
||||
break;
|
||||
|
||||
case '\\':
|
||||
o << "\\\\"; // both .dot and C/C++ code expect "\\"
|
||||
break;
|
||||
|
||||
default:
|
||||
o << static_cast<char> (c);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
void prtChOrHexForSpan(std::ostream& o, uint32_t c)
|
||||
{
|
||||
if (opts->encoding.type () != Enc::EBCDIC
|
||||
&& is_print (c)
|
||||
&& (c != ']'))
|
||||
{
|
||||
prtCh(o, c);
|
||||
}
|
||||
else
|
||||
{
|
||||
prtHex(o, c);
|
||||
}
|
||||
}
|
||||
|
||||
void printSpan(std::ostream& o, uint32_t lb, uint32_t ub)
|
||||
{
|
||||
o << "[";
|
||||
if ((ub - lb) == 1)
|
||||
{
|
||||
prtChOrHexForSpan(o, lb);
|
||||
}
|
||||
else
|
||||
{
|
||||
prtChOrHexForSpan(o, lb);
|
||||
o << "-";
|
||||
prtChOrHexForSpan(o, ub - 1);
|
||||
}
|
||||
o << "]";
|
||||
}
|
||||
|
||||
} // end namespace re2c
|
||||
|
20
tools/re2c/src/codegen/print.h
Normal file
20
tools/re2c/src/codegen/print.h
Normal file
|
@ -0,0 +1,20 @@
|
|||
#ifndef _RE2C_CODEGEN_PRINT_
|
||||
#define _RE2C_CODEGEN_PRINT_
|
||||
|
||||
#include "src/util/c99_stdint.h"
|
||||
#include <iosfwd>
|
||||
|
||||
namespace re2c
|
||||
{
|
||||
|
||||
bool is_print (uint32_t c);
|
||||
bool is_space (uint32_t c);
|
||||
char hexCh(uint32_t c);
|
||||
void prtCh(std::ostream&, uint32_t);
|
||||
void prtHex(std::ostream&, uint32_t);
|
||||
void prtChOrHex(std::ostream&, uint32_t);
|
||||
void printSpan(std::ostream&, uint32_t, uint32_t);
|
||||
|
||||
} // end namespace re2c
|
||||
|
||||
#endif // _RE2C_CODEGEN_PRINT_
|
254
tools/re2c/src/conf/msg.cc
Normal file
254
tools/re2c/src/conf/msg.cc
Normal file
|
@ -0,0 +1,254 @@
|
|||
#include <stdarg.h>
|
||||
#include <stdio.h>
|
||||
#include <string>
|
||||
|
||||
#include "config.h"
|
||||
#include "src/conf/msg.h"
|
||||
|
||||
namespace re2c {
|
||||
|
||||
void error (const char * fmt, ...)
|
||||
{
|
||||
fprintf (stderr, "re2c: error: ");
|
||||
|
||||
va_list args;
|
||||
va_start (args, fmt);
|
||||
vfprintf (stderr, fmt, args);
|
||||
va_end (args);
|
||||
|
||||
fprintf (stderr, "\n");
|
||||
}
|
||||
|
||||
void error_encoding ()
|
||||
{
|
||||
error ("only one of switches -e, -w, -x, -u and -8 must be set");
|
||||
}
|
||||
|
||||
void error_arg (const char * option)
|
||||
{
|
||||
error ("expected argument to option %s", option);
|
||||
}
|
||||
|
||||
void warning_start (uint32_t line, bool error)
|
||||
{
|
||||
static const char * msg = error ? "error" : "warning";
|
||||
fprintf (stderr, "re2c: %s: line %u: ", msg, line);
|
||||
}
|
||||
|
||||
void warning_end (const char * type, bool error)
|
||||
{
|
||||
if (type != NULL)
|
||||
{
|
||||
const char * prefix = error ? "error-" : "";
|
||||
fprintf (stderr, " [-W%s%s]", prefix, type);
|
||||
}
|
||||
fprintf (stderr, "\n");
|
||||
}
|
||||
|
||||
void warning (const char * type, uint32_t line, bool error, const char * fmt, ...)
|
||||
{
|
||||
warning_start (line, error);
|
||||
|
||||
va_list args;
|
||||
va_start (args, fmt);
|
||||
vfprintf (stderr, fmt, args);
|
||||
va_end (args);
|
||||
|
||||
warning_end (type, error);
|
||||
}
|
||||
|
||||
void usage ()
|
||||
{
|
||||
fprintf (stderr,
|
||||
"usage: re2c [-bcdDefFghirsuvVwx18] [-o of] [-t th] file\n"
|
||||
"\n"
|
||||
"-? -h --help Display this info.\n"
|
||||
"\n"
|
||||
"-b --bit-vectors Implies -s. Use bit vectors as well in the attempt to\n"
|
||||
" coax better code out of the compiler. Most useful for\n"
|
||||
" specifications with more than a few keywords (e.g. for\n"
|
||||
" most programming languages).\n"
|
||||
"\n"
|
||||
"-c --conditions Require start conditions.\n"
|
||||
"\n"
|
||||
"-d --debug-output Creates a parser that dumps information during\n"
|
||||
" about the current position and in which state the\n"
|
||||
" parser is.\n"
|
||||
"\n"
|
||||
"-D --emit-dot Emit a Graphviz dot view of the DFA graph\n"
|
||||
"\n"
|
||||
"-e --ecb Generate a parser that supports EBCDIC. The generated code\n"
|
||||
" can deal with any character up to 0xFF. In this mode re2c\n"
|
||||
" assumes that input character size is 1 byte. This switch is\n"
|
||||
" incompatible with -w, -u, -x and -8\n"
|
||||
"\n"
|
||||
"-f --storable-state Generate a scanner that supports storable states.\n"
|
||||
"\n"
|
||||
"-F --flex-syntax Partial support for flex syntax.\n"
|
||||
"\n"
|
||||
"-g --computed-gotos Implies -b. Generate computed goto code (only useable\n"
|
||||
" with gcc).\n"
|
||||
"\n"
|
||||
"-i --no-debug-info Do not generate '#line' info (useful for versioning).\n"
|
||||
"\n"
|
||||
"-o of --output=of Specify the output file (of) instead of stdout\n"
|
||||
"\n"
|
||||
"-r --reusable Allow reuse of scanner definitions.\n"
|
||||
"\n"
|
||||
"-s --nested-ifs Generate nested ifs for some switches. Many compilers\n"
|
||||
" need this assist to generate better code.\n"
|
||||
"\n"
|
||||
"-t th --type-header=th Generate a type header file (th) with type definitions.\n"
|
||||
"\n"
|
||||
"-u --unicode Generate a parser that supports UTF-32. The generated code\n"
|
||||
" can deal with any valid Unicode character up to 0x10FFFF.\n"
|
||||
" In this mode re2c assumes that input character size is 4 bytes.\n"
|
||||
" This switch is incompatible with -e, -w, -x and -8. It implies -s.\n"
|
||||
"\n"
|
||||
"-v --version Show version information.\n"
|
||||
"\n"
|
||||
"-V --vernum Show version as one number.\n"
|
||||
"\n"
|
||||
"-w --wide-chars Generate a parser that supports UCS-2. The generated code can\n"
|
||||
" deal with any valid Unicode character up to 0xFFFF. In this mode\n"
|
||||
" re2c assumes that input character size is 2 bytes. This switch is\n"
|
||||
" incompatible with -e, -x, -u and -8. It implies -s."
|
||||
"\n"
|
||||
"-x --utf-16 Generate a parser that supports UTF-16. The generated code can\n"
|
||||
" deal with any valid Unicode character up to 0x10FFFF. In this mode\n"
|
||||
" re2c assumes that input character size is 2 bytes. This switch is\n"
|
||||
" incompatible with -e, -w, -u and -8. It implies -s."
|
||||
"\n"
|
||||
"-8 --utf-8 Generate a parser that supports UTF-8. The generated code can\n"
|
||||
" deal with any valid Unicode character up to 0x10FFFF. In this mode\n"
|
||||
" re2c assumes that input character size is 1 byte. This switch is\n"
|
||||
" incompatible with -e, -w, -x and -u."
|
||||
"\n"
|
||||
"--no-generation-date Suppress date output in the generated file.\n"
|
||||
"\n"
|
||||
"--no-version Suppress version output in the generated file.\n"
|
||||
"\n"
|
||||
"--case-insensitive All strings are case insensitive, so all \"-expressions\n"
|
||||
" are treated in the same way '-expressions are.\n"
|
||||
"\n"
|
||||
"--case-inverted Invert the meaning of single and double quoted strings.\n"
|
||||
" With this switch single quotes are case sensitive and\n"
|
||||
" double quotes are case insensitive.\n"
|
||||
"\n"
|
||||
"--encoding-policy ep Specify what re2c should do when given bad code unit.\n"
|
||||
" ep can be one of the following: fail, substitute, ignore.\n"
|
||||
"\n"
|
||||
"--input i Specify re2c input API.\n"
|
||||
" i can be one of the following: default, custom.\n"
|
||||
"\n"
|
||||
"--skeleton Instead of embedding re2c-generated code into C/C++ source,\n"
|
||||
" generate a self-contained program for the same DFA.\n"
|
||||
" Most useful for correctness and performance testing.\n"
|
||||
"\n"
|
||||
"--empty-class policy What to do if user inputs empty character class. policy can be\n"
|
||||
" one of the following: 'match-empty' (match empty input, default),\n"
|
||||
" 'match-none' (fail to match on any input), 'error' (compilation\n"
|
||||
" error). Note that there are various ways to construct empty class,\n"
|
||||
" e.g: [], [^\\x00-\\xFF], [\\x00-\\xFF]\\[\\x00-\\xFF].\n"
|
||||
"\n"
|
||||
"--dfa-minimization <table | moore>\n"
|
||||
" Internal algorithm used by re2c to minimize DFA (defaults to\n"
|
||||
" 'moore'). Both table filling and Moore's algorithms should\n"
|
||||
" produce identical DFA (up to states relabelling). Table filling\n"
|
||||
" algorithm is much simpler and slower; it serves as a reference\n"
|
||||
" implementation.\n"
|
||||
"\n"
|
||||
"-1 --single-pass Deprecated and does nothing (single pass is by default now).\n"
|
||||
"\n"
|
||||
"-W Turn on all warnings.\n"
|
||||
"\n"
|
||||
"-Werror Turn warnings into errors. Note that this option along doesn't\n"
|
||||
" turn on any warnings, it only affects those warnings that have\n"
|
||||
" been turned on so far or will be turned on later.\n"
|
||||
"\n"
|
||||
"-W<warning> Turn on individual warning.\n"
|
||||
"\n"
|
||||
"-Wno-<warning> Turn off individual warning.\n"
|
||||
"\n"
|
||||
"-Werror-<warning> Turn on individual warning and treat it as error (this implies\n"
|
||||
" '-W<warning>').\n"
|
||||
"\n"
|
||||
"-Wno-error-<warning> Don't treat this particular warning as error. This doesn't turn\n"
|
||||
" off the warning itself.\n"
|
||||
"\n"
|
||||
"Warnings:\n"
|
||||
"\n"
|
||||
"-Wcondition-order Warn if the generated program makes implicit assumptions about\n"
|
||||
" condition numbering. One should use either '-t, --type-header'\n"
|
||||
" option or '/*!types:re2c*/' directive to generate mapping of\n"
|
||||
" condition names to numbers and use autogenerated condition names.\n"
|
||||
"\n"
|
||||
"-Wempty-character-class Warn if regular expression contains empty character class. From\n"
|
||||
" the rational point of view trying to match empty character class\n"
|
||||
" makes no sense: it should always fail. However, for backwards\n"
|
||||
" compatibility reasons re2c allows empty character class and treats\n"
|
||||
" it as empty string. Use '--empty-class' option to change default\n"
|
||||
" behaviour.\n"
|
||||
"\n"
|
||||
"-Wmatch-empty-string Warn if regular expression in a rule is nullable (matches empty\n"
|
||||
" string). If DFA runs in a loop and empty match is unintentional\n"
|
||||
" (input position in not advanced manually), lexer may get stuck\n"
|
||||
" in eternal loop.\n"
|
||||
"\n"
|
||||
"-Wswapped-range Warn if range lower bound is greater that upper bound. Default\n"
|
||||
" re2c behaviour is to silently swap range bounds.\n"
|
||||
"\n"
|
||||
"-Wundefined-control-flow\n"
|
||||
" Warn if some input strings cause undefined control flow in lexer\n"
|
||||
" (the faulty patterns are reported). This is the most dangerous\n"
|
||||
" and common mistake. It can be easily fixed by adding default rule\n"
|
||||
" '*' (this rule has the lowest priority, matches any code unit\n"
|
||||
" and consumes exactly one code unit).\n"
|
||||
"\n"
|
||||
"-Wuseless-escape Warn if a symbol is escaped when it shouldn't be. By default re2c\n"
|
||||
" silently ignores escape, but this may as well indicate a typo\n"
|
||||
" or an error in escape sequence.\n"
|
||||
"\n"
|
||||
);
|
||||
}
|
||||
|
||||
void vernum ()
|
||||
{
|
||||
std::string vernum (PACKAGE_VERSION);
|
||||
if (vernum[1] == '.')
|
||||
{
|
||||
vernum.insert(0, "0");
|
||||
}
|
||||
vernum.erase(2, 1);
|
||||
if (vernum[3] == '.')
|
||||
{
|
||||
vernum.insert(2, "0");
|
||||
}
|
||||
vernum.erase(4, 1);
|
||||
if (vernum.length() < 6 || vernum[5] < '0' || vernum[5] > '9')
|
||||
{
|
||||
vernum.insert(4, "0");
|
||||
}
|
||||
vernum.resize(6, '0');
|
||||
|
||||
printf ("%s\n", vernum.c_str ());
|
||||
}
|
||||
|
||||
void version ()
|
||||
{
|
||||
printf ("re2c %s\n", PACKAGE_VERSION);
|
||||
}
|
||||
|
||||
std::string incond (const std::string & cond)
|
||||
{
|
||||
std::string s;
|
||||
if (!cond.empty ())
|
||||
{
|
||||
s += "in condition '";
|
||||
s += cond;
|
||||
s += "' ";
|
||||
}
|
||||
return s;
|
||||
}
|
||||
|
||||
} // namespace re2c
|
24
tools/re2c/src/conf/msg.h
Normal file
24
tools/re2c/src/conf/msg.h
Normal file
|
@ -0,0 +1,24 @@
|
|||
#ifndef _RE2C_CONF_MSG_
|
||||
#define _RE2C_CONF_MSG_
|
||||
|
||||
#include <string>
|
||||
|
||||
#include "src/util/attribute.h"
|
||||
#include "src/util/c99_stdint.h"
|
||||
|
||||
namespace re2c {
|
||||
|
||||
void error (const char * fmt, ...) RE2C_GXX_ATTRIBUTE ((format (printf, 1, 2)));
|
||||
void error_encoding ();
|
||||
void error_arg (const char * option);
|
||||
void warning_start (uint32_t line, bool error);
|
||||
void warning_end (const char * type, bool error);
|
||||
void warning (const char * type, uint32_t line, bool error, const char * fmt, ...) RE2C_GXX_ATTRIBUTE ((format (printf, 4, 5)));
|
||||
void usage ();
|
||||
void vernum ();
|
||||
void version ();
|
||||
std::string incond (const std::string & cond);
|
||||
|
||||
} // namespace re2c
|
||||
|
||||
#endif // _RE2C_CONF_MSG_
|
331
tools/re2c/src/conf/opt.cc
Normal file
331
tools/re2c/src/conf/opt.cc
Normal file
|
@ -0,0 +1,331 @@
|
|||
#include "src/conf/msg.h"
|
||||
#include "src/conf/opt.h"
|
||||
|
||||
namespace re2c
|
||||
{
|
||||
|
||||
Opt opts;
|
||||
|
||||
opt_t::opt_t ()
|
||||
#define OPT1(type, name, value) : name (value)
|
||||
#define OPT(type, name, value) , name (value)
|
||||
RE2C_OPTS
|
||||
#undef OPT1
|
||||
#undef OPT
|
||||
{}
|
||||
|
||||
opt_t::opt_t (const opt_t & opt)
|
||||
#define OPT1(type, name, value) : name (opt.name)
|
||||
#define OPT(type, name, value) , name (opt.name)
|
||||
RE2C_OPTS
|
||||
#undef OPT1
|
||||
#undef OPT
|
||||
{}
|
||||
|
||||
opt_t & opt_t::operator = (const opt_t & opt)
|
||||
{
|
||||
#define OPT1 OPT
|
||||
#define OPT(type, name, value) name = opt.name;
|
||||
RE2C_OPTS
|
||||
#undef OPT1
|
||||
#undef OPT
|
||||
return *this;
|
||||
}
|
||||
|
||||
void opt_t::fix ()
|
||||
{
|
||||
// some options either make no sense or must have fixed value
|
||||
// with current target: reset them to default
|
||||
switch (target)
|
||||
{
|
||||
case DOT:
|
||||
// default code generation options
|
||||
sFlag = Opt::baseopt.sFlag;
|
||||
bFlag = Opt::baseopt.bFlag;
|
||||
gFlag = Opt::baseopt.gFlag;
|
||||
cGotoThreshold = Opt::baseopt.cGotoThreshold;
|
||||
// default environment-insensitive formatting
|
||||
yybmHexTable = Opt::baseopt.yybmHexTable;
|
||||
// fallthrough
|
||||
case SKELETON:
|
||||
// default line information
|
||||
iFlag = Opt::baseopt.iFlag;
|
||||
// default environment-sensitive formatting
|
||||
topIndent = Opt::baseopt.topIndent;
|
||||
indString = Opt::baseopt.indString;
|
||||
condDivider = Opt::baseopt.condDivider;
|
||||
condDividerParam = Opt::baseopt.condDividerParam;
|
||||
// default environment bindings
|
||||
tFlag = Opt::baseopt.tFlag;
|
||||
header_file = Opt::baseopt.header_file;
|
||||
yycondtype = Opt::baseopt.yycondtype;
|
||||
cond_get = Opt::baseopt.cond_get;
|
||||
cond_get_naked = Opt::baseopt.cond_get_naked;
|
||||
cond_set = Opt::baseopt.cond_set;
|
||||
cond_set_arg = Opt::baseopt.cond_set_arg;
|
||||
cond_set_naked = Opt::baseopt.cond_set_naked;
|
||||
yyctable = Opt::baseopt.yyctable;
|
||||
condPrefix = Opt::baseopt.condPrefix;
|
||||
condEnumPrefix = Opt::baseopt.condEnumPrefix;
|
||||
condGoto = Opt::baseopt.condGoto;
|
||||
condGotoParam = Opt::baseopt.condGotoParam;
|
||||
fFlag = Opt::baseopt.fFlag;
|
||||
state_get = Opt::baseopt.state_get;
|
||||
state_get_naked = Opt::baseopt.state_get_naked;
|
||||
state_set = Opt::baseopt.state_set;
|
||||
state_set_arg = Opt::baseopt.state_set_arg;
|
||||
state_set_naked = Opt::baseopt.state_set_naked;
|
||||
yyfilllabel = Opt::baseopt.yyfilllabel;
|
||||
yynext = Opt::baseopt.yynext;
|
||||
yyaccept = Opt::baseopt.yyaccept;
|
||||
bUseStateAbort = Opt::baseopt.bUseStateAbort;
|
||||
bUseStateNext = Opt::baseopt.bUseStateNext;
|
||||
yybm = Opt::baseopt.yybm;
|
||||
yytarget = Opt::baseopt.yytarget;
|
||||
input_api = Opt::baseopt.input_api;
|
||||
yycursor = Opt::baseopt.yycursor;
|
||||
yymarker = Opt::baseopt.yymarker;
|
||||
yyctxmarker = Opt::baseopt.yyctxmarker;
|
||||
yylimit = Opt::baseopt.yylimit;
|
||||
yypeek = Opt::baseopt.yypeek;
|
||||
yyskip = Opt::baseopt.yyskip;
|
||||
yybackup = Opt::baseopt.yybackup;
|
||||
yybackupctx = Opt::baseopt.yybackupctx;
|
||||
yyrestore = Opt::baseopt.yyrestore;
|
||||
yyrestorectx = Opt::baseopt.yyrestorectx;
|
||||
yylessthan = Opt::baseopt.yylessthan;
|
||||
dFlag = Opt::baseopt.dFlag;
|
||||
yydebug = Opt::baseopt.yydebug;
|
||||
yyctype = Opt::baseopt.yyctype;
|
||||
yych = Opt::baseopt.yych;
|
||||
bEmitYYCh = Opt::baseopt.bEmitYYCh;
|
||||
yychConversion = Opt::baseopt.yychConversion;
|
||||
fill = Opt::baseopt.fill;
|
||||
fill_use = Opt::baseopt.fill_use;
|
||||
fill_check = Opt::baseopt.fill_check;
|
||||
fill_arg = Opt::baseopt.fill_arg;
|
||||
fill_arg_use = Opt::baseopt.fill_arg_use;
|
||||
fill_naked = Opt::baseopt.fill_naked;
|
||||
labelPrefix = Opt::baseopt.labelPrefix;
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
if (bCaseInsensitive)
|
||||
{
|
||||
bCaseInverted = Opt::baseopt.bCaseInverted;
|
||||
}
|
||||
|
||||
// respect hierarchy
|
||||
if (!cFlag)
|
||||
{
|
||||
tFlag = Opt::baseopt.tFlag;
|
||||
header_file = Opt::baseopt.header_file;
|
||||
yycondtype = Opt::baseopt.yycondtype;
|
||||
cond_get = Opt::baseopt.cond_get;
|
||||
cond_get_naked = Opt::baseopt.cond_get_naked;
|
||||
cond_set = Opt::baseopt.cond_set;
|
||||
cond_set_arg = Opt::baseopt.cond_set_arg;
|
||||
cond_set_naked = Opt::baseopt.cond_set_naked;
|
||||
yyctable = Opt::baseopt.yyctable;
|
||||
condPrefix = Opt::baseopt.condPrefix;
|
||||
condEnumPrefix = Opt::baseopt.condEnumPrefix;
|
||||
condDivider = Opt::baseopt.condDivider;
|
||||
condDividerParam = Opt::baseopt.condDividerParam;
|
||||
condGoto = Opt::baseopt.condGoto;
|
||||
condGotoParam = Opt::baseopt.condGotoParam;
|
||||
}
|
||||
if (!fFlag)
|
||||
{
|
||||
state_get = Opt::baseopt.state_get;
|
||||
state_get_naked = Opt::baseopt.state_get_naked;
|
||||
state_set = Opt::baseopt.state_set;
|
||||
state_set_arg = Opt::baseopt.state_set_arg;
|
||||
state_set_naked = Opt::baseopt.state_set_naked;
|
||||
yyfilllabel = Opt::baseopt.yyfilllabel;
|
||||
yynext = Opt::baseopt.yynext;
|
||||
yyaccept = Opt::baseopt.yyaccept;
|
||||
bUseStateAbort = Opt::baseopt.bUseStateAbort;
|
||||
bUseStateNext = Opt::baseopt.bUseStateNext;
|
||||
}
|
||||
if (!bFlag)
|
||||
{
|
||||
yybmHexTable = Opt::baseopt.yybmHexTable;
|
||||
yybm = Opt::baseopt.yybm;
|
||||
}
|
||||
if (!gFlag)
|
||||
{
|
||||
cGotoThreshold = Opt::baseopt.cGotoThreshold;
|
||||
yytarget = Opt::baseopt.yytarget;
|
||||
}
|
||||
if (input_api.type () != InputAPI::DEFAULT)
|
||||
{
|
||||
yycursor = Opt::baseopt.yycursor;
|
||||
yymarker = Opt::baseopt.yymarker;
|
||||
yyctxmarker = Opt::baseopt.yyctxmarker;
|
||||
yylimit = Opt::baseopt.yylimit;
|
||||
}
|
||||
if (input_api.type () != InputAPI::CUSTOM)
|
||||
{
|
||||
yypeek = Opt::baseopt.yypeek;
|
||||
yyskip = Opt::baseopt.yyskip;
|
||||
yybackup = Opt::baseopt.yybackup;
|
||||
yybackupctx = Opt::baseopt.yybackupctx;
|
||||
yyrestore = Opt::baseopt.yyrestore;
|
||||
yyrestorectx = Opt::baseopt.yyrestorectx;
|
||||
yylessthan = Opt::baseopt.yylessthan;
|
||||
}
|
||||
if (!dFlag)
|
||||
{
|
||||
yydebug = Opt::baseopt.yydebug;
|
||||
}
|
||||
if (!fill_use)
|
||||
{
|
||||
fill = Opt::baseopt.fill;
|
||||
fill_check = Opt::baseopt.fill_check;
|
||||
fill_arg = Opt::baseopt.fill_arg;
|
||||
fill_arg_use = Opt::baseopt.fill_arg_use;
|
||||
fill_naked = Opt::baseopt.fill_naked;
|
||||
}
|
||||
|
||||
// force individual options
|
||||
switch (target)
|
||||
{
|
||||
case DOT:
|
||||
iFlag = true;
|
||||
break;
|
||||
case SKELETON:
|
||||
iFlag = true;
|
||||
input_api.set (InputAPI::CUSTOM);
|
||||
indString = " ";
|
||||
topIndent = 2;
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
switch (encoding.type ())
|
||||
{
|
||||
case Enc::UCS2:
|
||||
case Enc::UTF16:
|
||||
case Enc::UTF32:
|
||||
sFlag = true;
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
if (bFlag)
|
||||
{
|
||||
sFlag = true;
|
||||
}
|
||||
if (gFlag)
|
||||
{
|
||||
bFlag = true;
|
||||
sFlag = true;
|
||||
}
|
||||
if (header_file != NULL)
|
||||
{
|
||||
tFlag = true;
|
||||
}
|
||||
}
|
||||
|
||||
realopt_t::realopt_t (useropt_t & opt)
|
||||
: real ()
|
||||
, user (opt)
|
||||
{}
|
||||
|
||||
const opt_t * realopt_t::operator -> ()
|
||||
{
|
||||
sync ();
|
||||
return ℜ
|
||||
}
|
||||
|
||||
void realopt_t::sync ()
|
||||
{
|
||||
if (user.diverge)
|
||||
{
|
||||
real = user.opt;
|
||||
real.fix ();
|
||||
user.diverge = false;
|
||||
}
|
||||
}
|
||||
|
||||
useropt_t::useropt_t ()
|
||||
: opt ()
|
||||
, diverge (true)
|
||||
{}
|
||||
|
||||
opt_t * useropt_t::operator -> ()
|
||||
{
|
||||
diverge = true;
|
||||
return &opt;
|
||||
}
|
||||
|
||||
const opt_t Opt::baseopt;
|
||||
|
||||
bool Opt::source (const char * s)
|
||||
{
|
||||
if (source_file)
|
||||
{
|
||||
error ("multiple source files: %s, %s", source_file, s);
|
||||
return false;
|
||||
}
|
||||
else
|
||||
{
|
||||
source_file = s;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
bool Opt::output (const char * s)
|
||||
{
|
||||
if (output_file)
|
||||
{
|
||||
error ("multiple output files: %s, %s", output_file, s);
|
||||
return false;
|
||||
}
|
||||
else
|
||||
{
|
||||
output_file = s;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
void Opt::reset_encoding (const Enc & enc)
|
||||
{
|
||||
useropt->encoding = enc;
|
||||
}
|
||||
|
||||
void Opt::reset_mapCodeName ()
|
||||
{
|
||||
// historically arranged set of names
|
||||
// no actual reason why these particular options should be reset
|
||||
useropt->cond_get = Opt::baseopt.cond_get;
|
||||
useropt->cond_set = Opt::baseopt.cond_set;
|
||||
useropt->fill = Opt::baseopt.fill;
|
||||
useropt->state_get = Opt::baseopt.state_get;
|
||||
useropt->state_set = Opt::baseopt.state_set;
|
||||
useropt->yybackup = Opt::baseopt.yybackup;
|
||||
useropt->yybackupctx = Opt::baseopt.yybackupctx;
|
||||
useropt->yycondtype = Opt::baseopt.yycondtype;
|
||||
useropt->yyctxmarker = Opt::baseopt.yyctxmarker;
|
||||
useropt->yyctype = Opt::baseopt.yyctype;
|
||||
useropt->yycursor = Opt::baseopt.yycursor;
|
||||
useropt->yydebug = Opt::baseopt.yydebug;
|
||||
useropt->yylessthan = Opt::baseopt.yylessthan;
|
||||
useropt->yylimit = Opt::baseopt.yylimit;
|
||||
useropt->yymarker = Opt::baseopt.yymarker;
|
||||
useropt->yypeek = Opt::baseopt.yypeek;
|
||||
useropt->yyrestore = Opt::baseopt.yyrestore;
|
||||
useropt->yyrestorectx = Opt::baseopt.yyrestorectx;
|
||||
useropt->yyskip = Opt::baseopt.yyskip;
|
||||
useropt->yyfilllabel = Opt::baseopt.yyfilllabel;
|
||||
useropt->yynext = Opt::baseopt.yynext;
|
||||
useropt->yyaccept = Opt::baseopt.yyaccept;
|
||||
useropt->yybm = Opt::baseopt.yybm;
|
||||
useropt->yych = Opt::baseopt.yych;
|
||||
useropt->yyctable = Opt::baseopt.yyctable;
|
||||
useropt->yytarget = Opt::baseopt.yytarget;
|
||||
}
|
||||
|
||||
} // namespace re2c
|
218
tools/re2c/src/conf/opt.h
Normal file
218
tools/re2c/src/conf/opt.h
Normal file
|
@ -0,0 +1,218 @@
|
|||
#ifndef _RE2C_CONF_OPT_
|
||||
#define _RE2C_CONF_OPT_
|
||||
|
||||
#include "src/util/c99_stdint.h"
|
||||
#include <stddef.h>
|
||||
#include <string>
|
||||
|
||||
#include "src/codegen/input_api.h"
|
||||
#include "src/ir/dfa/dfa.h"
|
||||
#include "src/ir/regexp/empty_class_policy.h"
|
||||
#include "src/ir/regexp/encoding/enc.h"
|
||||
#include "src/util/forbid_copy.h"
|
||||
|
||||
namespace re2c
|
||||
{
|
||||
|
||||
#define RE2C_OPTS \
|
||||
/* target */ \
|
||||
OPT1 (opt_t::target_t, target, CODE) \
|
||||
/* fingerprint */ \
|
||||
OPT (bool, bNoGenerationDate, false) \
|
||||
OPT (bool, version, true) \
|
||||
/* regular expressions */ \
|
||||
OPT (Enc, encoding, Enc ()) \
|
||||
OPT (bool, bCaseInsensitive, false) \
|
||||
OPT (bool, bCaseInverted, false) \
|
||||
OPT (empty_class_policy_t, empty_class_policy, EMPTY_CLASS_MATCH_EMPTY) \
|
||||
/* conditions */ \
|
||||
OPT (bool, cFlag, false) \
|
||||
OPT (bool, tFlag, false) \
|
||||
OPT (const char *, header_file, NULL) \
|
||||
OPT (std::string, yycondtype, "YYCONDTYPE") \
|
||||
OPT (std::string, cond_get, "YYGETCONDITION") \
|
||||
OPT (bool, cond_get_naked, false) \
|
||||
OPT (std::string, cond_set, "YYSETCONDITION" ) \
|
||||
OPT (std::string, cond_set_arg, "@@" ) \
|
||||
OPT (bool, cond_set_naked, false ) \
|
||||
OPT (std::string, yyctable, "yyctable") \
|
||||
OPT (std::string, condPrefix, "yyc_") \
|
||||
OPT (std::string, condEnumPrefix, "yyc") \
|
||||
OPT (std::string, condDivider, "/* *********************************** */") \
|
||||
OPT (std::string, condDividerParam, "@@") \
|
||||
OPT (std::string, condGoto, "goto @@;") \
|
||||
OPT (std::string, condGotoParam, "@@") \
|
||||
/* states */ \
|
||||
OPT (bool, fFlag, false) \
|
||||
OPT (std::string, state_get, "YYGETSTATE") \
|
||||
OPT (bool, state_get_naked, false) \
|
||||
OPT (std::string, state_set, "YYSETSTATE") \
|
||||
OPT (std::string, state_set_arg, "@@") \
|
||||
OPT (bool, state_set_naked, false) \
|
||||
OPT (std::string, yyfilllabel, "yyFillLabel") \
|
||||
OPT (std::string, yynext, "yyNext") \
|
||||
OPT (std::string, yyaccept, "yyaccept") \
|
||||
OPT (bool, bUseStateAbort, false) \
|
||||
OPT (bool, bUseStateNext, false) \
|
||||
/* reuse */ \
|
||||
OPT (bool, rFlag, false) \
|
||||
/* partial flex syntax support */ \
|
||||
OPT (bool, FFlag, false) \
|
||||
/* code generation */ \
|
||||
OPT (bool, sFlag, false) \
|
||||
OPT (bool, bFlag, false) \
|
||||
OPT (std::string, yybm, "yybm") \
|
||||
OPT (bool, yybmHexTable, false) \
|
||||
OPT (bool, gFlag, false) \
|
||||
OPT (std::string, yytarget, "yytarget") \
|
||||
OPT (uint32_t, cGotoThreshold, 9) \
|
||||
/* formatting */ \
|
||||
OPT (uint32_t, topIndent, 0) \
|
||||
OPT (std::string, indString, "\t") \
|
||||
/* input API */ \
|
||||
OPT (InputAPI, input_api, InputAPI ()) \
|
||||
OPT (std::string, yycursor, "YYCURSOR") \
|
||||
OPT (std::string, yymarker, "YYMARKER") \
|
||||
OPT (std::string, yyctxmarker, "YYCTXMARKER") \
|
||||
OPT (std::string, yylimit, "YYLIMIT") \
|
||||
OPT (std::string, yypeek, "YYPEEK") \
|
||||
OPT (std::string, yyskip, "YYSKIP") \
|
||||
OPT (std::string, yybackup, "YYBACKUP") \
|
||||
OPT (std::string, yybackupctx, "YYBACKUPCTX") \
|
||||
OPT (std::string, yyrestore, "YYRESTORE") \
|
||||
OPT (std::string, yyrestorectx, "YYRESTORECTX") \
|
||||
OPT (std::string, yylessthan, "YYLESSTHAN") \
|
||||
/* #line directives */ \
|
||||
OPT (bool, iFlag, false) \
|
||||
/* debug */ \
|
||||
OPT (bool, dFlag, false) \
|
||||
OPT (std::string, yydebug, "YYDEBUG") \
|
||||
/* yych */ \
|
||||
OPT (std::string, yyctype, "YYCTYPE") \
|
||||
OPT (std::string, yych, "yych") \
|
||||
OPT (bool, bEmitYYCh, true) \
|
||||
OPT (bool, yychConversion, false) \
|
||||
/* YYFILL */ \
|
||||
OPT (std::string, fill, "YYFILL") \
|
||||
OPT (bool, fill_use, true) \
|
||||
OPT (bool, fill_check, true) \
|
||||
OPT (std::string, fill_arg, "@@") \
|
||||
OPT (bool, fill_arg_use, true) \
|
||||
OPT (bool, fill_naked, false) \
|
||||
/* labels */ \
|
||||
OPT (std::string, labelPrefix, "yy") \
|
||||
/* internals */ \
|
||||
OPT (dfa_minimization_t, dfa_minimization, DFA_MINIMIZATION_MOORE)
|
||||
|
||||
struct opt_t
|
||||
{
|
||||
enum target_t
|
||||
{
|
||||
CODE,
|
||||
DOT,
|
||||
SKELETON
|
||||
};
|
||||
|
||||
#define OPT1 OPT
|
||||
#define OPT(type, name, value) type name;
|
||||
RE2C_OPTS
|
||||
#undef OPT1
|
||||
#undef OPT
|
||||
|
||||
opt_t ();
|
||||
opt_t (const opt_t & opt);
|
||||
opt_t & operator = (const opt_t & opt);
|
||||
void fix ();
|
||||
};
|
||||
|
||||
class useropt_t;
|
||||
class realopt_t
|
||||
{
|
||||
opt_t real;
|
||||
useropt_t & user;
|
||||
public:
|
||||
realopt_t (useropt_t & opt);
|
||||
const opt_t * operator -> ();
|
||||
void sync ();
|
||||
};
|
||||
|
||||
class useropt_t
|
||||
{
|
||||
opt_t opt;
|
||||
bool diverge;
|
||||
public:
|
||||
useropt_t ();
|
||||
opt_t * operator -> ();
|
||||
friend void realopt_t::sync ();
|
||||
};
|
||||
|
||||
struct Opt
|
||||
{
|
||||
static const opt_t baseopt;
|
||||
|
||||
const char * source_file;
|
||||
const char * output_file;
|
||||
|
||||
private:
|
||||
useropt_t useropt;
|
||||
realopt_t realopt;
|
||||
|
||||
public:
|
||||
Opt ()
|
||||
: source_file (NULL)
|
||||
, output_file (NULL)
|
||||
, useropt ()
|
||||
, realopt (useropt)
|
||||
{}
|
||||
|
||||
// read-only access, forces options syncronization
|
||||
const opt_t * operator -> ()
|
||||
{
|
||||
return realopt.operator -> ();
|
||||
}
|
||||
|
||||
bool source (const char * s);
|
||||
bool output (const char * s);
|
||||
|
||||
// Inplace configurations are applied immediately when parsed.
|
||||
// This is very bad: first, re2c behaviour is changed in the middle
|
||||
// of the block; second, config is resynced too often (every
|
||||
// attempt to read config that has been updated results in
|
||||
// automatic resync). It is much better to set all options at once.
|
||||
bool set_encoding (Enc::type_t t) { return useropt->encoding.set (t); }
|
||||
void unset_encoding (Enc::type_t t) { useropt->encoding.unset (t); }
|
||||
void set_encoding_policy (Enc::policy_t p) { useropt->encoding.setPolicy (p); }
|
||||
void set_input_api (InputAPI::type_t t) { useropt->input_api.set (t); }
|
||||
#define OPT1 OPT
|
||||
#define OPT(type, name, value) void set_##name (type arg) { useropt->name = arg; }
|
||||
RE2C_OPTS
|
||||
#undef OPT1
|
||||
#undef OPT
|
||||
|
||||
// helpers
|
||||
std::string yychConversion ()
|
||||
{
|
||||
return realopt->yychConversion
|
||||
? "(" + realopt->yyctype + ")"
|
||||
: "";
|
||||
}
|
||||
|
||||
// bad temporary hacks, should be fixed by proper scoping of config (parts).
|
||||
void reset_encoding (const Enc & enc);
|
||||
void reset_mapCodeName ();
|
||||
|
||||
FORBID_COPY (Opt);
|
||||
};
|
||||
|
||||
enum parse_opts_t
|
||||
{
|
||||
OK,
|
||||
EXIT_OK,
|
||||
EXIT_FAIL
|
||||
};
|
||||
|
||||
parse_opts_t parse_opts (char ** argv, Opt & opts);
|
||||
|
||||
} // namespace re2c
|
||||
|
||||
#endif // _RE2C_CONF_OPT_
|
2846
tools/re2c/src/conf/parse_opts.cc
Normal file
2846
tools/re2c/src/conf/parse_opts.cc
Normal file
File diff suppressed because it is too large
Load diff
261
tools/re2c/src/conf/parse_opts.re
Normal file
261
tools/re2c/src/conf/parse_opts.re
Normal file
|
@ -0,0 +1,261 @@
|
|||
#include "src/codegen/input_api.h"
|
||||
#include "src/conf/msg.h"
|
||||
#include "src/conf/opt.h"
|
||||
#include "src/conf/warn.h"
|
||||
#include "src/globals.h"
|
||||
#include "src/ir/regexp/empty_class_policy.h"
|
||||
#include "src/ir/regexp/encoding/enc.h"
|
||||
|
||||
namespace re2c
|
||||
{
|
||||
|
||||
static inline bool next (char * & arg, char ** & argv)
|
||||
{
|
||||
arg = *++argv;
|
||||
return arg != NULL;
|
||||
}
|
||||
|
||||
parse_opts_t parse_opts (char ** argv, Opt & opts)
|
||||
{
|
||||
#define YYCTYPE unsigned char
|
||||
char * YYCURSOR;
|
||||
char * YYMARKER;
|
||||
Warn::option_t option;
|
||||
|
||||
/*!re2c
|
||||
re2c:yyfill:enable = 0;
|
||||
re2c:yych:conversion = 1;
|
||||
|
||||
end = "\x00";
|
||||
filename = [^\x00-] [^\x00]*;
|
||||
*/
|
||||
|
||||
opt:
|
||||
if (!next (YYCURSOR, argv))
|
||||
{
|
||||
goto end;
|
||||
}
|
||||
/*!re2c
|
||||
*
|
||||
{
|
||||
error ("bad option: %s", *argv);
|
||||
return EXIT_FAIL;
|
||||
}
|
||||
|
||||
"--" end
|
||||
{
|
||||
// all remaining arguments are non-options
|
||||
// so they must be input files
|
||||
// re2c expects exactly one input file
|
||||
for (char * f; next (f, argv);)
|
||||
{
|
||||
if (!opts.source (f))
|
||||
{
|
||||
return EXIT_FAIL;
|
||||
}
|
||||
}
|
||||
goto end;
|
||||
}
|
||||
|
||||
"-" end { if (!opts.source ("<stdin>")) return EXIT_FAIL; goto opt; }
|
||||
filename end { if (!opts.source (*argv)) return EXIT_FAIL; goto opt; }
|
||||
|
||||
"-" { goto opt_short; }
|
||||
"--" { goto opt_long; }
|
||||
|
||||
"-W" end { warn.set_all (); goto opt; }
|
||||
"-Werror" end { warn.set_all_error (); goto opt; }
|
||||
"-W" { option = Warn::W; goto opt_warn; }
|
||||
"-Wno-" { option = Warn::WNO; goto opt_warn; }
|
||||
"-Werror-" { option = Warn::WERROR; goto opt_warn; }
|
||||
"-Wno-error-" { option = Warn::WNOERROR; goto opt_warn; }
|
||||
*/
|
||||
|
||||
opt_warn:
|
||||
/*!re2c
|
||||
*
|
||||
{
|
||||
error ("bad warning: %s", *argv);
|
||||
return EXIT_FAIL;
|
||||
}
|
||||
"condition-order" end { warn.set (Warn::CONDITION_ORDER, option); goto opt; }
|
||||
"empty-character-class" end { warn.set (Warn::EMPTY_CHARACTER_CLASS, option); goto opt; }
|
||||
"match-empty-string" end { warn.set (Warn::MATCH_EMPTY_STRING, option); goto opt; }
|
||||
"swapped-range" end { warn.set (Warn::SWAPPED_RANGE, option); goto opt; }
|
||||
"undefined-control-flow" end { warn.set (Warn::UNDEFINED_CONTROL_FLOW, option); goto opt; }
|
||||
"unreachable-rules" end { warn.set (Warn::UNREACHABLE_RULES, option); goto opt; }
|
||||
"useless-escape" end { warn.set (Warn::USELESS_ESCAPE, option); goto opt; }
|
||||
*/
|
||||
|
||||
opt_short:
|
||||
/*!re2c
|
||||
*
|
||||
{
|
||||
error ("bad short option: %s", *argv);
|
||||
return EXIT_FAIL;
|
||||
}
|
||||
end { goto opt; }
|
||||
[?h] { usage (); return EXIT_OK; }
|
||||
"v" { version (); return EXIT_OK; }
|
||||
"V" { vernum (); return EXIT_OK; }
|
||||
"b" { opts.set_bFlag (true); goto opt_short; }
|
||||
"c" { opts.set_cFlag (true); goto opt_short; }
|
||||
"d" { opts.set_dFlag (true); goto opt_short; }
|
||||
"D" { opts.set_target (opt_t::DOT); goto opt_short; }
|
||||
"f" { opts.set_fFlag (true); goto opt_short; }
|
||||
"F" { opts.set_FFlag (true); goto opt_short; }
|
||||
"g" { opts.set_gFlag (true); goto opt_short; }
|
||||
"i" { opts.set_iFlag (true); goto opt_short; }
|
||||
"r" { opts.set_rFlag (true); goto opt_short; }
|
||||
"s" { opts.set_sFlag (true); goto opt_short; }
|
||||
"S" { opts.set_target (opt_t::SKELETON); goto opt_short; }
|
||||
"e" { if (!opts.set_encoding (Enc::EBCDIC)) { error_encoding (); return EXIT_FAIL; } goto opt_short; }
|
||||
"u" { if (!opts.set_encoding (Enc::UTF32)) { error_encoding (); return EXIT_FAIL; } goto opt_short; }
|
||||
"w" { if (!opts.set_encoding (Enc::UCS2)) { error_encoding (); return EXIT_FAIL; } goto opt_short; }
|
||||
"x" { if (!opts.set_encoding (Enc::UTF16)) { error_encoding (); return EXIT_FAIL; } goto opt_short; }
|
||||
"8" { if (!opts.set_encoding (Enc::UTF8)) { error_encoding (); return EXIT_FAIL; } goto opt_short; }
|
||||
"o" end { if (!next (YYCURSOR, argv)) { error_arg ("-o, --output"); return EXIT_FAIL; } goto opt_output; }
|
||||
"o" { *argv = YYCURSOR; goto opt_output; }
|
||||
"t" end { if (!next (YYCURSOR, argv)) { error_arg ("-t, --type-header"); return EXIT_FAIL; } goto opt_header; }
|
||||
"t" { *argv = YYCURSOR; goto opt_header; }
|
||||
"1" { goto opt_short; } // deprecated
|
||||
*/
|
||||
|
||||
opt_long:
|
||||
/*!re2c
|
||||
*
|
||||
{
|
||||
error ("bad long option: %s", *argv);
|
||||
return EXIT_FAIL;
|
||||
}
|
||||
"help" end { usage (); return EXIT_OK; }
|
||||
"version" end { version (); return EXIT_OK; }
|
||||
"vernum" end { vernum (); return EXIT_OK; }
|
||||
"bit-vectors" end { opts.set_bFlag (true); goto opt; }
|
||||
"start-conditions" end { opts.set_cFlag (true); goto opt; }
|
||||
"debug-output" end { opts.set_dFlag (true); goto opt; }
|
||||
"emit-dot" end { opts.set_target (opt_t::DOT); goto opt; }
|
||||
"storable-state" end { opts.set_fFlag (true); goto opt; }
|
||||
"flex-syntax" end { opts.set_FFlag (true); goto opt; }
|
||||
"computed-gotos" end { opts.set_gFlag (true); goto opt; }
|
||||
"no-debug-info" end { opts.set_iFlag (true); goto opt; }
|
||||
"reusable" end { opts.set_rFlag (true); goto opt; }
|
||||
"nested-ifs" end { opts.set_sFlag (true); goto opt; }
|
||||
"no-generation-date" end { opts.set_bNoGenerationDate (true); goto opt; }
|
||||
"no-version" end { opts.set_version (false); goto opt; }
|
||||
"case-insensitive" end { opts.set_bCaseInsensitive (true); goto opt; }
|
||||
"case-inverted" end { opts.set_bCaseInverted (true); goto opt; }
|
||||
"skeleton" end { opts.set_target (opt_t::SKELETON); goto opt; }
|
||||
"ecb" end { if (!opts.set_encoding (Enc::EBCDIC)) { error_encoding (); return EXIT_FAIL; } goto opt; }
|
||||
"unicode" end { if (!opts.set_encoding (Enc::UTF32)) { error_encoding (); return EXIT_FAIL; } goto opt; }
|
||||
"wide-chars" end { if (!opts.set_encoding (Enc::UCS2)) { error_encoding (); return EXIT_FAIL; } goto opt; }
|
||||
"utf-16" end { if (!opts.set_encoding (Enc::UTF16)) { error_encoding (); return EXIT_FAIL; } goto opt; }
|
||||
"utf-8" end { if (!opts.set_encoding (Enc::UTF8)) { error_encoding (); return EXIT_FAIL; } goto opt; }
|
||||
"output" end { if (!next (YYCURSOR, argv)) { error_arg ("-o, --output"); return EXIT_FAIL; } goto opt_output; }
|
||||
"type-header" end { if (!next (YYCURSOR, argv)) { error_arg ("-t, --type-header"); return EXIT_FAIL; } goto opt_header; }
|
||||
"encoding-policy" end { goto opt_encoding_policy; }
|
||||
"input" end { goto opt_input; }
|
||||
"empty-class" end { goto opt_empty_class; }
|
||||
"dfa-minimization" end { goto opt_dfa_minimization; }
|
||||
"single-pass" end { goto opt; } // deprecated
|
||||
*/
|
||||
|
||||
opt_output:
|
||||
/*!re2c
|
||||
*
|
||||
{
|
||||
error ("bad argument to option -o, --output: %s", *argv);
|
||||
return EXIT_FAIL;
|
||||
}
|
||||
filename end { if (!opts.output (*argv)) return EXIT_FAIL; goto opt; }
|
||||
*/
|
||||
|
||||
opt_header:
|
||||
/*!re2c
|
||||
*
|
||||
{
|
||||
error ("bad argument to option -t, --type-header: %s", *argv);
|
||||
return EXIT_FAIL;
|
||||
}
|
||||
filename end { opts.set_header_file (*argv); goto opt; }
|
||||
*/
|
||||
|
||||
opt_encoding_policy:
|
||||
if (!next (YYCURSOR, argv))
|
||||
{
|
||||
error_arg ("--encoding-policy");
|
||||
return EXIT_FAIL;
|
||||
}
|
||||
/*!re2c
|
||||
*
|
||||
{
|
||||
error ("bad argument to option --encoding-policy (expected: ignore | substitute | fail): %s", *argv);
|
||||
return EXIT_FAIL;
|
||||
}
|
||||
"ignore" end { opts.set_encoding_policy (Enc::POLICY_IGNORE); goto opt; }
|
||||
"substitute" end { opts.set_encoding_policy (Enc::POLICY_SUBSTITUTE); goto opt; }
|
||||
"fail" end { opts.set_encoding_policy (Enc::POLICY_FAIL); goto opt; }
|
||||
*/
|
||||
|
||||
opt_input:
|
||||
if (!next (YYCURSOR, argv))
|
||||
{
|
||||
error_arg ("--input");
|
||||
return EXIT_FAIL;
|
||||
}
|
||||
/*!re2c
|
||||
*
|
||||
{
|
||||
error ("bad argument to option --input (expected: default | custom): %s", *argv);
|
||||
return EXIT_FAIL;
|
||||
}
|
||||
"default" end { opts.set_input_api (InputAPI::DEFAULT); goto opt; }
|
||||
"custom" end { opts.set_input_api (InputAPI::CUSTOM); goto opt; }
|
||||
*/
|
||||
|
||||
opt_empty_class:
|
||||
if (!next (YYCURSOR, argv))
|
||||
{
|
||||
error_arg ("--empty-class");
|
||||
return EXIT_FAIL;
|
||||
}
|
||||
/*!re2c
|
||||
*
|
||||
{
|
||||
error ("bad argument to option --empty-class (expected: match-empty | match-none | error): %s", *argv);
|
||||
return EXIT_FAIL;
|
||||
}
|
||||
"match-empty" end { opts.set_empty_class_policy (EMPTY_CLASS_MATCH_EMPTY); goto opt; }
|
||||
"match-none" end { opts.set_empty_class_policy (EMPTY_CLASS_MATCH_NONE); goto opt; }
|
||||
"error" end { opts.set_empty_class_policy (EMPTY_CLASS_ERROR); goto opt; }
|
||||
*/
|
||||
|
||||
opt_dfa_minimization:
|
||||
if (!next (YYCURSOR, argv))
|
||||
{
|
||||
error_arg ("--minimization");
|
||||
return EXIT_FAIL;
|
||||
}
|
||||
/*!re2c
|
||||
*
|
||||
{
|
||||
error ("bad argument to option --dfa-minimization (expected: table | moore): %s", *argv);
|
||||
return EXIT_FAIL;
|
||||
}
|
||||
"table" end { opts.set_dfa_minimization (DFA_MINIMIZATION_TABLE); goto opt; }
|
||||
"moore" end { opts.set_dfa_minimization (DFA_MINIMIZATION_MOORE); goto opt; }
|
||||
*/
|
||||
|
||||
end:
|
||||
if (!opts.source_file)
|
||||
{
|
||||
error ("no source file");
|
||||
return EXIT_FAIL;
|
||||
}
|
||||
|
||||
return OK;
|
||||
|
||||
#undef YYCTYPE
|
||||
}
|
||||
|
||||
} // namespace re2c
|
200
tools/re2c/src/conf/warn.cc
Normal file
200
tools/re2c/src/conf/warn.cc
Normal file
|
@ -0,0 +1,200 @@
|
|||
#include <stddef.h>
|
||||
#include <stdio.h>
|
||||
#include <algorithm>
|
||||
#include <utility>
|
||||
|
||||
#include "src/conf/msg.h"
|
||||
#include "src/conf/warn.h"
|
||||
|
||||
namespace re2c {
|
||||
|
||||
Warn warn;
|
||||
|
||||
const uint32_t Warn::SILENT = 0;
|
||||
const uint32_t Warn::WARNING = 1u << 0;
|
||||
const uint32_t Warn::ERROR = 1u << 1;
|
||||
|
||||
const char * Warn::names [TYPES] =
|
||||
{
|
||||
#define W(x, y) y
|
||||
RE2C_WARNING_TYPES
|
||||
#undef W
|
||||
};
|
||||
|
||||
Warn::Warn ()
|
||||
: mask ()
|
||||
, error_accuml (false)
|
||||
{
|
||||
for (uint32_t i = 0; i < TYPES; ++i)
|
||||
{
|
||||
mask[i] = SILENT;
|
||||
}
|
||||
}
|
||||
|
||||
bool Warn::error () const
|
||||
{
|
||||
return error_accuml;
|
||||
}
|
||||
|
||||
void Warn::set (type_t t, option_t o)
|
||||
{
|
||||
switch (o)
|
||||
{
|
||||
case W:
|
||||
mask[t] |= WARNING;
|
||||
break;
|
||||
case WNO:
|
||||
mask[t] &= ~WARNING;
|
||||
break;
|
||||
case WERROR:
|
||||
// unlike -Werror, -Werror-<warning> implies -W<warning>
|
||||
mask[t] |= (WARNING | ERROR);
|
||||
break;
|
||||
case WNOERROR:
|
||||
mask[t] &= ~ERROR;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
void Warn::set_all ()
|
||||
{
|
||||
for (uint32_t i = 0; i < TYPES; ++i)
|
||||
{
|
||||
mask[i] |= WARNING;
|
||||
}
|
||||
}
|
||||
|
||||
// -Werror doesn't set any warnings: it only guarantees that if a warning
|
||||
// has been set by now or will be set later then it will result into error.
|
||||
void Warn::set_all_error ()
|
||||
{
|
||||
for (uint32_t i = 0; i < TYPES; ++i)
|
||||
{
|
||||
mask[i] |= ERROR;
|
||||
}
|
||||
}
|
||||
|
||||
void Warn::fail (type_t t, uint32_t line, const char * s)
|
||||
{
|
||||
if (mask[t] & WARNING)
|
||||
{
|
||||
// -Werror has no effect
|
||||
warning (names[t], line, false, "%s", s);
|
||||
}
|
||||
}
|
||||
|
||||
void Warn::condition_order (uint32_t line)
|
||||
{
|
||||
if (mask[CONDITION_ORDER] & WARNING)
|
||||
{
|
||||
const bool e = mask[CONDITION_ORDER] & ERROR;
|
||||
error_accuml |= e;
|
||||
warning (names[CONDITION_ORDER], line, e,
|
||||
"looks like you use hardcoded numbers instead of autogenerated condition names: "
|
||||
"better add '/*!types:re2c*/' directive or '-t, --type-header' option "
|
||||
"and don't rely on fixed condition order.");
|
||||
}
|
||||
}
|
||||
|
||||
void Warn::empty_class (uint32_t line)
|
||||
{
|
||||
if (mask[EMPTY_CHARACTER_CLASS] & WARNING)
|
||||
{
|
||||
const bool e = mask[EMPTY_CHARACTER_CLASS] & ERROR;
|
||||
error_accuml |= e;
|
||||
warning (names[EMPTY_CHARACTER_CLASS], line, e, "empty character class");
|
||||
}
|
||||
}
|
||||
|
||||
void Warn::match_empty_string (uint32_t line)
|
||||
{
|
||||
if (mask[MATCH_EMPTY_STRING] & WARNING)
|
||||
{
|
||||
const bool e = mask[MATCH_EMPTY_STRING] & ERROR;
|
||||
error_accuml |= e;
|
||||
warning (names[MATCH_EMPTY_STRING], line, e, "rule matches empty string");
|
||||
}
|
||||
}
|
||||
|
||||
void Warn::swapped_range (uint32_t line, uint32_t l, uint32_t u)
|
||||
{
|
||||
if (mask[SWAPPED_RANGE] & WARNING)
|
||||
{
|
||||
const bool e = mask[SWAPPED_RANGE] & ERROR;
|
||||
error_accuml |= e;
|
||||
warning (names[SWAPPED_RANGE], line, e, "range lower bound (0x%X) is greater than upper bound (0x%X), swapping", l, u);
|
||||
}
|
||||
}
|
||||
|
||||
void Warn::undefined_control_flow (uint32_t line, const std::string & cond, std::vector<way_t> & ways, bool overflow)
|
||||
{
|
||||
if (mask[UNDEFINED_CONTROL_FLOW] & WARNING)
|
||||
{
|
||||
const bool e = mask[UNDEFINED_CONTROL_FLOW] & ERROR;
|
||||
error_accuml |= e;
|
||||
|
||||
// report shorter patterns first
|
||||
std::sort (ways.begin (), ways.end (), cmp_ways);
|
||||
|
||||
warning_start (line, e);
|
||||
fprintf (stderr, "control flow %sis undefined for strings that match ", incond (cond).c_str ());
|
||||
const size_t count = ways.size ();
|
||||
if (count == 1)
|
||||
{
|
||||
fprint_way (stderr, ways[0]);
|
||||
}
|
||||
else
|
||||
{
|
||||
for (size_t i = 0; i < count; ++i)
|
||||
{
|
||||
fprintf (stderr, "\n\t");
|
||||
fprint_way (stderr, ways[i]);
|
||||
}
|
||||
fprintf (stderr, "\n");
|
||||
}
|
||||
if (overflow)
|
||||
{
|
||||
fprintf (stderr, " ... and a few more");
|
||||
}
|
||||
fprintf (stderr, ", use default rule '*'");
|
||||
warning_end (names[UNDEFINED_CONTROL_FLOW], e);
|
||||
}
|
||||
}
|
||||
|
||||
void Warn::unreachable_rule (const std::string & cond, const rule_info_t & rule, const rules_t & rules)
|
||||
{
|
||||
if (mask[UNREACHABLE_RULES] & WARNING)
|
||||
{
|
||||
const bool e = mask[UNREACHABLE_RULES] & ERROR;
|
||||
error_accuml |= e;
|
||||
warning_start (rule.line, e);
|
||||
fprintf (stderr, "unreachable rule %s", incond (cond).c_str ());
|
||||
const size_t shadows = rule.shadow.size ();
|
||||
if (shadows > 0)
|
||||
{
|
||||
const char * pl = shadows > 1
|
||||
? "s"
|
||||
: "";
|
||||
std::set<rule_rank_t>::const_iterator i = rule.shadow.begin ();
|
||||
fprintf (stderr, "(shadowed by rule%s at line%s %u", pl, pl, rules.find (*i)->second.line);
|
||||
for (++i; i != rule.shadow.end (); ++i)
|
||||
{
|
||||
fprintf (stderr, ", %u", rules.find (*i)->second.line);
|
||||
}
|
||||
fprintf (stderr, ")");
|
||||
}
|
||||
warning_end (names[UNREACHABLE_RULES], e);
|
||||
}
|
||||
}
|
||||
|
||||
void Warn::useless_escape (uint32_t line, uint32_t col, char c)
|
||||
{
|
||||
if (mask[USELESS_ESCAPE] & WARNING)
|
||||
{
|
||||
const bool e = mask[USELESS_ESCAPE] & ERROR;
|
||||
error_accuml |= e;
|
||||
warning (names[USELESS_ESCAPE], line, e, "column %u: escape has no effect: '\\%c'", col, c);
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace re2c
|
67
tools/re2c/src/conf/warn.h
Normal file
67
tools/re2c/src/conf/warn.h
Normal file
|
@ -0,0 +1,67 @@
|
|||
#ifndef _RE2C_CONF_WARN_
|
||||
#define _RE2C_CONF_WARN_
|
||||
|
||||
#include "src/util/c99_stdint.h"
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include "src/ir/skeleton/way.h"
|
||||
#include "src/parse/rules.h"
|
||||
|
||||
namespace re2c {
|
||||
|
||||
#define RE2C_WARNING_TYPES \
|
||||
W (CONDITION_ORDER, "condition-order"), \
|
||||
W (EMPTY_CHARACTER_CLASS, "empty-character-class"), \
|
||||
W (MATCH_EMPTY_STRING, "match-empty-string"), \
|
||||
W (SWAPPED_RANGE, "swapped-range"), \
|
||||
W (UNDEFINED_CONTROL_FLOW, "undefined-control-flow"), \
|
||||
W (UNREACHABLE_RULES, "unreachable-rules"), \
|
||||
W (USELESS_ESCAPE, "useless-escape"),
|
||||
|
||||
class Warn
|
||||
{
|
||||
public:
|
||||
enum type_t
|
||||
{
|
||||
#define W(x, y) x
|
||||
RE2C_WARNING_TYPES
|
||||
#undef W
|
||||
TYPES // count
|
||||
};
|
||||
enum option_t
|
||||
{
|
||||
W,
|
||||
WNO,
|
||||
WERROR,
|
||||
WNOERROR
|
||||
};
|
||||
|
||||
private:
|
||||
static const uint32_t SILENT;
|
||||
static const uint32_t WARNING;
|
||||
static const uint32_t ERROR;
|
||||
static const char * names [TYPES];
|
||||
uint32_t mask[TYPES];
|
||||
bool error_accuml;
|
||||
|
||||
public:
|
||||
Warn ();
|
||||
bool error () const;
|
||||
void set (type_t t, option_t o);
|
||||
void set_all ();
|
||||
void set_all_error ();
|
||||
void fail (type_t t, uint32_t line, const char * s);
|
||||
|
||||
void condition_order (uint32_t line);
|
||||
void empty_class (uint32_t line);
|
||||
void match_empty_string (uint32_t line);
|
||||
void swapped_range (uint32_t line, uint32_t l, uint32_t u);
|
||||
void undefined_control_flow (uint32_t line, const std::string & cond, std::vector<way_t> & ways, bool overflow);
|
||||
void unreachable_rule (const std::string & cond, const rule_info_t & rule, const rules_t & rules);
|
||||
void useless_escape (uint32_t line, uint32_t col, char c);
|
||||
};
|
||||
|
||||
} // namespace re2c
|
||||
|
||||
#endif // _RE2C_CONF_WARN_
|
24
tools/re2c/src/globals.h
Normal file
24
tools/re2c/src/globals.h
Normal file
|
@ -0,0 +1,24 @@
|
|||
#ifndef _RE2C_GLOBALS_
|
||||
#define _RE2C_GLOBALS_
|
||||
|
||||
#include <string>
|
||||
|
||||
#include "src/conf/opt.h"
|
||||
#include "src/conf/warn.h"
|
||||
#include "src/util/c99_stdint.h"
|
||||
|
||||
namespace re2c
|
||||
{
|
||||
|
||||
extern bool bUsedYYBitmap;
|
||||
extern bool bWroteGetState;
|
||||
extern bool bWroteCondCheck;
|
||||
extern uint32_t last_fill_index;
|
||||
extern std::string yySetupRule;
|
||||
|
||||
extern Opt opts;
|
||||
extern Warn warn;
|
||||
|
||||
} // end namespace re2c
|
||||
|
||||
#endif // _RE2C_GLOBALS_
|
109
tools/re2c/src/ir/adfa/action.h
Normal file
109
tools/re2c/src/ir/adfa/action.h
Normal file
|
@ -0,0 +1,109 @@
|
|||
#ifndef _RE2C_IR_ADFA_ACTION_
|
||||
#define _RE2C_IR_ADFA_ACTION_
|
||||
|
||||
#include <vector>
|
||||
|
||||
#include "src/codegen/label.h"
|
||||
#include "src/util/c99_stdint.h"
|
||||
#include "src/util/uniq_vector.h"
|
||||
|
||||
namespace re2c
|
||||
{
|
||||
|
||||
struct OutputFile;
|
||||
class RuleOp;
|
||||
class State;
|
||||
|
||||
struct Initial
|
||||
{
|
||||
label_t label;
|
||||
bool setMarker;
|
||||
|
||||
inline Initial (label_t l, bool b)
|
||||
: label (l)
|
||||
, setMarker (b)
|
||||
{}
|
||||
};
|
||||
|
||||
typedef uniq_vector_t<const State *> accept_t;
|
||||
|
||||
class Action
|
||||
{
|
||||
public:
|
||||
enum type_t
|
||||
{
|
||||
MATCH,
|
||||
INITIAL,
|
||||
SAVE,
|
||||
MOVE,
|
||||
ACCEPT,
|
||||
RULE
|
||||
} type;
|
||||
union
|
||||
{
|
||||
Initial * initial;
|
||||
uint32_t save;
|
||||
const accept_t * accepts;
|
||||
const RuleOp * rule;
|
||||
} info;
|
||||
|
||||
public:
|
||||
inline Action ()
|
||||
: type (MATCH)
|
||||
, info ()
|
||||
{}
|
||||
~Action ()
|
||||
{
|
||||
clear ();
|
||||
}
|
||||
void set_initial (label_t label, bool used_marker)
|
||||
{
|
||||
clear ();
|
||||
type = INITIAL;
|
||||
info.initial = new Initial (label, used_marker);
|
||||
}
|
||||
void set_save (uint32_t save)
|
||||
{
|
||||
clear ();
|
||||
type = SAVE;
|
||||
info.save = save;
|
||||
}
|
||||
void set_move ()
|
||||
{
|
||||
clear ();
|
||||
type = MOVE;
|
||||
}
|
||||
void set_accept (const accept_t * accepts)
|
||||
{
|
||||
clear ();
|
||||
type = ACCEPT;
|
||||
info.accepts = accepts;
|
||||
}
|
||||
void set_rule (const RuleOp * const rule)
|
||||
{
|
||||
clear ();
|
||||
type = RULE;
|
||||
info.rule = rule;
|
||||
}
|
||||
|
||||
private:
|
||||
void clear ()
|
||||
{
|
||||
switch (type)
|
||||
{
|
||||
case INITIAL:
|
||||
delete info.initial;
|
||||
break;
|
||||
case MATCH:
|
||||
case SAVE:
|
||||
case MOVE:
|
||||
case ACCEPT:
|
||||
case RULE:
|
||||
break;
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace re2c
|
||||
|
||||
#endif // _RE2C_IR_ADFA_ACTION_
|
135
tools/re2c/src/ir/adfa/adfa.cc
Normal file
135
tools/re2c/src/ir/adfa/adfa.cc
Normal file
|
@ -0,0 +1,135 @@
|
|||
#include <assert.h>
|
||||
#include <queue>
|
||||
#include <set>
|
||||
#include <vector>
|
||||
#include <utility>
|
||||
|
||||
#include "src/codegen/go.h"
|
||||
#include "src/ir/adfa/adfa.h"
|
||||
#include "src/ir/dfa/dfa.h"
|
||||
#include "src/ir/skeleton/skeleton.h"
|
||||
#include "src/util/allocate.h"
|
||||
|
||||
namespace re2c
|
||||
{
|
||||
|
||||
DFA::DFA
|
||||
( const dfa_t &dfa
|
||||
, const std::vector<size_t> &fill
|
||||
, Skeleton *skel
|
||||
, const charset_t &charset
|
||||
, const std::string &n
|
||||
, const std::string &c
|
||||
, uint32_t l
|
||||
)
|
||||
: accepts ()
|
||||
, skeleton (skel)
|
||||
, name (n)
|
||||
, cond (c)
|
||||
, line (l)
|
||||
, lbChar(0)
|
||||
, ubChar(charset.back())
|
||||
, nStates(0)
|
||||
, head(NULL)
|
||||
|
||||
// statistics
|
||||
, max_fill (0)
|
||||
, need_backup (false)
|
||||
, need_backupctx (false)
|
||||
, need_accept (false)
|
||||
{
|
||||
const size_t nstates = dfa.states.size();
|
||||
const size_t nchars = dfa.nchars;
|
||||
|
||||
State **i2s = new State*[nstates];
|
||||
for (size_t i = 0; i < nstates; ++i)
|
||||
{
|
||||
i2s[i] = new State;
|
||||
}
|
||||
|
||||
State **p = &head;
|
||||
for (size_t i = 0; i < nstates; ++i)
|
||||
{
|
||||
dfa_state_t *t = dfa.states[i];
|
||||
State *s = i2s[i];
|
||||
|
||||
++nStates;
|
||||
*p = s;
|
||||
p = &s->next;
|
||||
|
||||
s->isPreCtxt = t->ctx;
|
||||
s->rule = t->rule;
|
||||
s->fill = fill[i];
|
||||
s->go.span = allocate<Span>(nchars);
|
||||
uint32_t j = 0;
|
||||
for (uint32_t c = 0; c < nchars; ++j)
|
||||
{
|
||||
const size_t to = t->arcs[c];
|
||||
for (;++c < nchars && t->arcs[c] == to;);
|
||||
s->go.span[j].to = to == dfa_t::NIL ? NULL : i2s[to];
|
||||
s->go.span[j].ub = charset[c];
|
||||
}
|
||||
s->go.nSpans = j;
|
||||
}
|
||||
*p = NULL;
|
||||
|
||||
delete[] i2s;
|
||||
}
|
||||
|
||||
DFA::~DFA()
|
||||
{
|
||||
State *s;
|
||||
|
||||
while ((s = head))
|
||||
{
|
||||
head = s->next;
|
||||
delete s;
|
||||
}
|
||||
|
||||
delete skeleton;
|
||||
}
|
||||
|
||||
void DFA::reorder()
|
||||
{
|
||||
std::vector<State*> ord;
|
||||
ord.reserve(nStates);
|
||||
|
||||
std::queue<State*> todo;
|
||||
todo.push(head);
|
||||
|
||||
std::set<State*> done;
|
||||
done.insert(head);
|
||||
|
||||
for(;!todo.empty();)
|
||||
{
|
||||
State *s = todo.front();
|
||||
todo.pop();
|
||||
ord.push_back(s);
|
||||
for(uint32_t i = 0; i < s->go.nSpans; ++i)
|
||||
{
|
||||
State *q = s->go.span[i].to;
|
||||
if(q && done.insert(q).second)
|
||||
{
|
||||
todo.push(q);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
assert(nStates == ord.size());
|
||||
|
||||
ord.push_back(NULL);
|
||||
for(uint32_t i = 0; i < nStates; ++i)
|
||||
{
|
||||
ord[i]->next = ord[i + 1];
|
||||
}
|
||||
}
|
||||
|
||||
void DFA::addState(State *s, State *next)
|
||||
{
|
||||
++nStates;
|
||||
s->next = next->next;
|
||||
next->next = s;
|
||||
}
|
||||
|
||||
} // namespace re2c
|
||||
|
101
tools/re2c/src/ir/adfa/adfa.h
Normal file
101
tools/re2c/src/ir/adfa/adfa.h
Normal file
|
@ -0,0 +1,101 @@
|
|||
#ifndef _RE2C_IR_ADFA_ADFA_
|
||||
#define _RE2C_IR_ADFA_ADFA_
|
||||
|
||||
#include <stddef.h>
|
||||
#include "src/util/c99_stdint.h"
|
||||
#include <set>
|
||||
#include <string>
|
||||
|
||||
#include "src/codegen/go.h"
|
||||
#include "src/codegen/label.h"
|
||||
#include "src/ir/adfa/action.h"
|
||||
#include "src/ir/regexp/regexp.h"
|
||||
#include "src/util/forbid_copy.h"
|
||||
|
||||
namespace re2c
|
||||
{
|
||||
|
||||
struct Skeleton;
|
||||
struct Output;
|
||||
struct OutputFile;
|
||||
struct dfa_t;
|
||||
|
||||
struct State
|
||||
{
|
||||
label_t label;
|
||||
RuleOp * rule;
|
||||
State * next;
|
||||
size_t fill;
|
||||
|
||||
bool isPreCtxt;
|
||||
bool isBase;
|
||||
Go go;
|
||||
Action action;
|
||||
|
||||
State ()
|
||||
: label (label_t::first ())
|
||||
, rule (NULL)
|
||||
, next (0)
|
||||
, fill (0)
|
||||
, isPreCtxt (false)
|
||||
, isBase (false)
|
||||
, go ()
|
||||
, action ()
|
||||
{}
|
||||
~State ()
|
||||
{
|
||||
operator delete (go.span);
|
||||
}
|
||||
|
||||
FORBID_COPY (State);
|
||||
};
|
||||
|
||||
class DFA
|
||||
{
|
||||
accept_t accepts;
|
||||
Skeleton * skeleton;
|
||||
|
||||
public:
|
||||
const std::string name;
|
||||
const std::string cond;
|
||||
const uint32_t line;
|
||||
|
||||
uint32_t lbChar;
|
||||
uint32_t ubChar;
|
||||
uint32_t nStates;
|
||||
State * head;
|
||||
|
||||
// statistics
|
||||
size_t max_fill;
|
||||
bool need_backup;
|
||||
bool need_backupctx;
|
||||
bool need_accept;
|
||||
|
||||
public:
|
||||
DFA ( const dfa_t &dfa
|
||||
, const std::vector<size_t> &fill
|
||||
, Skeleton *skel
|
||||
, const charset_t &charset
|
||||
, const std::string &n
|
||||
, const std::string &c
|
||||
, uint32_t l
|
||||
);
|
||||
~DFA ();
|
||||
void reorder();
|
||||
void prepare();
|
||||
void calc_stats();
|
||||
void emit (Output &, uint32_t &, bool, bool &);
|
||||
|
||||
private:
|
||||
void addState(State*, State *);
|
||||
void split (State *);
|
||||
void findBaseState ();
|
||||
void count_used_labels (std::set<label_t> & used, label_t prolog, label_t start, bool force_start) const;
|
||||
void emit_body (OutputFile &, uint32_t &, const std::set<label_t> & used_labels, label_t initial) const;
|
||||
|
||||
FORBID_COPY (DFA);
|
||||
};
|
||||
|
||||
} // namespace re2c
|
||||
|
||||
#endif // _RE2C_IR_ADFA_ADFA_
|
268
tools/re2c/src/ir/adfa/prepare.cc
Normal file
268
tools/re2c/src/ir/adfa/prepare.cc
Normal file
|
@ -0,0 +1,268 @@
|
|||
#include "src/util/c99_stdint.h"
|
||||
#include <string.h>
|
||||
#include <map>
|
||||
|
||||
#include "src/codegen/bitmap.h"
|
||||
#include "src/codegen/go.h"
|
||||
#include "src/globals.h"
|
||||
#include "src/ir/adfa/action.h"
|
||||
#include "src/ir/adfa/adfa.h"
|
||||
#include "src/ir/regexp/regexp_rule.h"
|
||||
#include "src/ir/rule_rank.h"
|
||||
#include "src/util/allocate.h"
|
||||
|
||||
namespace re2c {
|
||||
|
||||
void DFA::split(State *s)
|
||||
{
|
||||
State *move = new State;
|
||||
addState(move, s);
|
||||
move->action.set_move ();
|
||||
move->rule = s->rule;
|
||||
move->fill = s->fill;
|
||||
move->go = s->go;
|
||||
s->rule = NULL;
|
||||
s->go.nSpans = 1;
|
||||
s->go.span = allocate<Span> (1);
|
||||
s->go.span[0].ub = ubChar;
|
||||
s->go.span[0].to = move;
|
||||
}
|
||||
|
||||
static uint32_t merge(Span *x0, State *fg, State *bg)
|
||||
{
|
||||
Span *x = x0, *f = fg->go.span, *b = bg->go.span;
|
||||
uint32_t nf = fg->go.nSpans, nb = bg->go.nSpans;
|
||||
State *prev = NULL, *to;
|
||||
// NB: we assume both spans are for same range
|
||||
|
||||
for (;;)
|
||||
{
|
||||
if (f->ub == b->ub)
|
||||
{
|
||||
to = f->to == b->to ? bg : f->to;
|
||||
|
||||
if (to == prev)
|
||||
{
|
||||
--x;
|
||||
}
|
||||
else
|
||||
{
|
||||
x->to = prev = to;
|
||||
}
|
||||
|
||||
x->ub = f->ub;
|
||||
++x;
|
||||
++f;
|
||||
--nf;
|
||||
++b;
|
||||
--nb;
|
||||
|
||||
if (nf == 0 && nb == 0)
|
||||
{
|
||||
return static_cast<uint32_t> (x - x0);
|
||||
}
|
||||
}
|
||||
|
||||
while (f->ub < b->ub)
|
||||
{
|
||||
to = f->to == b->to ? bg : f->to;
|
||||
|
||||
if (to == prev)
|
||||
{
|
||||
--x;
|
||||
}
|
||||
else
|
||||
{
|
||||
x->to = prev = to;
|
||||
}
|
||||
|
||||
x->ub = f->ub;
|
||||
++x;
|
||||
++f;
|
||||
--nf;
|
||||
}
|
||||
|
||||
while (b->ub < f->ub)
|
||||
{
|
||||
to = b->to == f->to ? bg : f->to;
|
||||
|
||||
if (to == prev)
|
||||
{
|
||||
--x;
|
||||
}
|
||||
else
|
||||
{
|
||||
x->to = prev = to;
|
||||
}
|
||||
|
||||
x->ub = b->ub;
|
||||
++x;
|
||||
++b;
|
||||
--nb;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void DFA::findBaseState()
|
||||
{
|
||||
Span *span = allocate<Span> (ubChar - lbChar);
|
||||
|
||||
for (State *s = head; s; s = s->next)
|
||||
{
|
||||
if (s->fill == 0)
|
||||
{
|
||||
for (uint32_t i = 0; i < s->go.nSpans; ++i)
|
||||
{
|
||||
State *to = s->go.span[i].to;
|
||||
|
||||
if (to->isBase)
|
||||
{
|
||||
to = to->go.span[0].to;
|
||||
uint32_t nSpans = merge(span, s, to);
|
||||
|
||||
if (nSpans < s->go.nSpans)
|
||||
{
|
||||
operator delete (s->go.span);
|
||||
s->go.nSpans = nSpans;
|
||||
s->go.span = allocate<Span> (nSpans);
|
||||
memcpy(s->go.span, span, nSpans*sizeof(Span));
|
||||
}
|
||||
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
operator delete (span);
|
||||
}
|
||||
|
||||
void DFA::prepare ()
|
||||
{
|
||||
bUsedYYBitmap = false;
|
||||
|
||||
// create rule states
|
||||
std::map<rule_rank_t, State *> rules;
|
||||
for (State * s = head; s; s = s->next)
|
||||
{
|
||||
if (s->rule)
|
||||
{
|
||||
if (rules.find (s->rule->rank) == rules.end ())
|
||||
{
|
||||
State *n = new State;
|
||||
n->action.set_rule (s->rule);
|
||||
rules[s->rule->rank] = n;
|
||||
addState(n, s);
|
||||
}
|
||||
for (uint32_t i = 0; i < s->go.nSpans; ++i)
|
||||
{
|
||||
if (!s->go.span[i].to)
|
||||
{
|
||||
s->go.span[i].to = rules[s->rule->rank];
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// create default state (if needed)
|
||||
State * default_state = NULL;
|
||||
for (State * s = head; s; s = s->next)
|
||||
{
|
||||
for (uint32_t i = 0; i < s->go.nSpans; ++i)
|
||||
{
|
||||
if (!s->go.span[i].to)
|
||||
{
|
||||
if (!default_state)
|
||||
{
|
||||
default_state = new State;
|
||||
addState(default_state, s);
|
||||
}
|
||||
s->go.span[i].to = default_state;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// find backup states and create accept state (if needed)
|
||||
if (default_state)
|
||||
{
|
||||
for (State * s = head; s; s = s->next)
|
||||
{
|
||||
if (s->rule)
|
||||
{
|
||||
for (uint32_t i = 0; i < s->go.nSpans; ++i)
|
||||
{
|
||||
if (!s->go.span[i].to->rule && s->go.span[i].to->action.type != Action::RULE)
|
||||
{
|
||||
const uint32_t accept = static_cast<uint32_t> (accepts.find_or_add (rules[s->rule->rank]));
|
||||
s->action.set_save (accept);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
default_state->action.set_accept (&accepts);
|
||||
}
|
||||
|
||||
// split ``base'' states into two parts
|
||||
for (State * s = head; s; s = s->next)
|
||||
{
|
||||
s->isBase = false;
|
||||
|
||||
if (s->fill != 0)
|
||||
{
|
||||
for (uint32_t i = 0; i < s->go.nSpans; ++i)
|
||||
{
|
||||
if (s->go.span[i].to == s)
|
||||
{
|
||||
s->isBase = true;
|
||||
split(s);
|
||||
|
||||
if (opts->bFlag)
|
||||
{
|
||||
BitMap::find(&s->next->go, s);
|
||||
}
|
||||
|
||||
s = s->next;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// find ``base'' state, if possible
|
||||
findBaseState();
|
||||
|
||||
for (State * s = head; s; s = s->next)
|
||||
{
|
||||
s->go.init (s);
|
||||
}
|
||||
}
|
||||
|
||||
void DFA::calc_stats ()
|
||||
{
|
||||
// calculate 'YYMAXFILL'
|
||||
max_fill = 0;
|
||||
for (State * s = head; s; s = s->next)
|
||||
{
|
||||
if (max_fill < s->fill)
|
||||
{
|
||||
max_fill = s->fill;
|
||||
}
|
||||
}
|
||||
|
||||
// determine if 'YYMARKER' or 'YYBACKUP'/'YYRESTORE' pair is used
|
||||
need_backup = accepts.size () > 0;
|
||||
|
||||
// determine if 'YYCTXMARKER' or 'YYBACKUPCTX'/'YYRESTORECTX' pair is used
|
||||
for (State * s = head; s; s = s->next)
|
||||
{
|
||||
if (s->isPreCtxt)
|
||||
{
|
||||
need_backupctx = true;
|
||||
}
|
||||
}
|
||||
|
||||
// determine if 'yyaccept' variable is used
|
||||
need_accept = accepts.size () > 1;
|
||||
}
|
||||
|
||||
} // namespace re2c
|
104
tools/re2c/src/ir/compile.cc
Normal file
104
tools/re2c/src/ir/compile.cc
Normal file
|
@ -0,0 +1,104 @@
|
|||
#include <algorithm>
|
||||
#include <ostream>
|
||||
#include <set>
|
||||
|
||||
#include "src/codegen/output.h"
|
||||
#include "src/ir/compile.h"
|
||||
#include "src/ir/adfa/adfa.h"
|
||||
#include "src/ir/dfa/dfa.h"
|
||||
#include "src/ir/nfa/nfa.h"
|
||||
#include "src/ir/regexp/regexp.h"
|
||||
#include "src/ir/skeleton/skeleton.h"
|
||||
#include "src/parse/spec.h"
|
||||
|
||||
namespace re2c {
|
||||
|
||||
static std::string make_name(const std::string &cond, uint32_t line)
|
||||
{
|
||||
std::ostringstream os;
|
||||
os << "line" << line;
|
||||
std::string name = os.str();
|
||||
if (!cond.empty ())
|
||||
{
|
||||
name += "_";
|
||||
name += cond;
|
||||
}
|
||||
return name;
|
||||
}
|
||||
|
||||
smart_ptr<DFA> compile (Spec & spec, Output & output, const std::string & cond, uint32_t cunits)
|
||||
{
|
||||
const uint32_t line = output.source.get_block_line();
|
||||
const std::string name = make_name(cond, line);
|
||||
|
||||
// The original set of code units (charset) might be very large.
|
||||
// A common trick it is to split charset into disjoint character ranges
|
||||
// and choose a representative of each range (we choose lower bound).
|
||||
// The set of all representatives is the new (compacted) charset.
|
||||
// Don't forget to include zero and upper bound, even if they
|
||||
// do not explicitely apper in ranges.
|
||||
std::set<uint32_t> bounds;
|
||||
spec.re->split(bounds);
|
||||
bounds.insert(0);
|
||||
bounds.insert(cunits);
|
||||
charset_t cs;
|
||||
for (std::set<uint32_t>::const_iterator i = bounds.begin(); i != bounds.end(); ++i)
|
||||
{
|
||||
cs.push_back(*i);
|
||||
}
|
||||
|
||||
nfa_t nfa(spec.re);
|
||||
|
||||
dfa_t dfa(nfa, cs, spec.rules);
|
||||
|
||||
// skeleton must be constructed after DFA construction
|
||||
// but prior to any other DFA transformations
|
||||
Skeleton *skeleton = new Skeleton(dfa, cs, spec.rules, name, cond, line);
|
||||
|
||||
minimization(dfa);
|
||||
|
||||
// find YYFILL states and calculate argument to YYFILL
|
||||
std::vector<size_t> fill;
|
||||
fillpoints(dfa, fill);
|
||||
|
||||
// ADFA stands for 'DFA with actions'
|
||||
DFA *adfa = new DFA(dfa, fill, skeleton, cs, name, cond, line);
|
||||
|
||||
/*
|
||||
* note [reordering DFA states]
|
||||
*
|
||||
* re2c-generated code depends on the order of states in DFA: simply
|
||||
* flipping two states may change the output significantly.
|
||||
* The order of states is affected by many factors, e.g.:
|
||||
* - flipping left and right subtrees of alternative when constructing
|
||||
* AST (also applies to iteration and counted repetition)
|
||||
* - changing the order in which graph nodes are visited (applies to
|
||||
* any intermediate representation: bytecode, NFA, DFA, etc.)
|
||||
*
|
||||
* To make the resulting code independent of such changes, we hereby
|
||||
* reorder DFA states. The ordering scheme is very simple:
|
||||
*
|
||||
* Starting with DFA root, walk DFA nodes in breadth-first order.
|
||||
* Child nodes are ordered accoding to the (alphabetically) first symbol
|
||||
* leading to each node. Each node must be visited exactly once.
|
||||
* Default state (NULL) is always the last state.
|
||||
*/
|
||||
adfa->reorder();
|
||||
|
||||
// skeleton is constructed, do further DFA transformations
|
||||
adfa->prepare();
|
||||
|
||||
// finally gather overall DFA statistics
|
||||
adfa->calc_stats();
|
||||
|
||||
// accumulate global statistics from this particular DFA
|
||||
output.max_fill = std::max (output.max_fill, adfa->max_fill);
|
||||
if (adfa->need_accept)
|
||||
{
|
||||
output.source.set_used_yyaccept ();
|
||||
}
|
||||
|
||||
return make_smart_ptr(adfa);
|
||||
}
|
||||
|
||||
} // namespace re2c
|
20
tools/re2c/src/ir/compile.h
Normal file
20
tools/re2c/src/ir/compile.h
Normal file
|
@ -0,0 +1,20 @@
|
|||
#ifndef _RE2C_IR_COMPILE_
|
||||
#define _RE2C_IR_COMPILE_
|
||||
|
||||
#include "src/util/c99_stdint.h"
|
||||
#include <string>
|
||||
|
||||
#include "src/util/smart_ptr.h"
|
||||
|
||||
namespace re2c
|
||||
{
|
||||
|
||||
class DFA;
|
||||
struct Output;
|
||||
struct Spec;
|
||||
|
||||
smart_ptr<DFA> compile (Spec & spec, Output & output, const std::string & cond, uint32_t cunits);
|
||||
|
||||
} // namespace re2c
|
||||
|
||||
#endif // _RE2C_IR_COMPILE_
|
197
tools/re2c/src/ir/dfa/determinization.cc
Normal file
197
tools/re2c/src/ir/dfa/determinization.cc
Normal file
|
@ -0,0 +1,197 @@
|
|||
#include <algorithm>
|
||||
#include <limits>
|
||||
#include <map>
|
||||
#include <set>
|
||||
#include <vector>
|
||||
|
||||
#include "src/ir/dfa/dfa.h"
|
||||
#include "src/ir/nfa/nfa.h"
|
||||
#include "src/ir/regexp/regexp.h"
|
||||
#include "src/ir/regexp/regexp_rule.h"
|
||||
#include "src/ir/rule_rank.h"
|
||||
#include "src/parse/rules.h"
|
||||
#include "src/util/ord_hash_set.h"
|
||||
#include "src/util/range.h"
|
||||
|
||||
namespace re2c
|
||||
{
|
||||
|
||||
const size_t dfa_t::NIL = std::numeric_limits<size_t>::max();
|
||||
|
||||
/*
|
||||
* note [marking DFA states]
|
||||
*
|
||||
* DFA state is a set of NFA states.
|
||||
* However, DFA state includes not all NFA states that are in
|
||||
* epsilon-closure (NFA states that have only epsilon-transitions
|
||||
* and are not context of final states are omitted).
|
||||
* The included states are called 'kernel' states.
|
||||
*
|
||||
* We mark visited NFA states during closure construction.
|
||||
* These marks serve two purposes:
|
||||
* - avoid loops in NFA
|
||||
* - avoid duplication of NFA states in kernel
|
||||
*
|
||||
* Note that after closure construction:
|
||||
* - all non-kernel states must be unmarked (these states are
|
||||
* not stored in kernel and it is impossible to unmark them
|
||||
* afterwards)
|
||||
* - all kernel states must be marked (because we may later
|
||||
* extend this kernel with epsilon-closure of another NFA
|
||||
* state). Kernel states are unmarked later (before finding
|
||||
* or adding DFA state).
|
||||
*/
|
||||
static nfa_state_t **closure(nfa_state_t **cP, nfa_state_t *n)
|
||||
{
|
||||
if (!n->mark)
|
||||
{
|
||||
n->mark = true;
|
||||
switch (n->type)
|
||||
{
|
||||
case nfa_state_t::ALT:
|
||||
cP = closure(cP, n->value.alt.out2);
|
||||
cP = closure(cP, n->value.alt.out1);
|
||||
n->mark = false;
|
||||
break;
|
||||
case nfa_state_t::CTX:
|
||||
*(cP++) = n;
|
||||
cP = closure(cP, n->value.ctx.out);
|
||||
break;
|
||||
default:
|
||||
*(cP++) = n;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return cP;
|
||||
}
|
||||
|
||||
static size_t find_state
|
||||
( nfa_state_t **kernel
|
||||
, nfa_state_t **end
|
||||
, ord_hash_set_t &kernels
|
||||
)
|
||||
{
|
||||
// zero-sized kernel corresponds to default state
|
||||
if (kernel == end)
|
||||
{
|
||||
return dfa_t::NIL;
|
||||
}
|
||||
|
||||
// see note [marking DFA states]
|
||||
for (nfa_state_t **p = kernel; p != end; ++p)
|
||||
{
|
||||
(*p)->mark = false;
|
||||
}
|
||||
|
||||
// sort kernel states: we need this to get stable hash
|
||||
// and to compare states with simple 'memcmp'
|
||||
std::sort(kernel, end);
|
||||
const size_t size = static_cast<size_t>(end - kernel) * sizeof(nfa_state_t*);
|
||||
return kernels.insert(kernel, size);
|
||||
}
|
||||
|
||||
dfa_t::dfa_t(const nfa_t &nfa, const charset_t &charset, rules_t &rules)
|
||||
: states()
|
||||
, nchars(charset.size() - 1) // (n + 1) bounds for n ranges
|
||||
{
|
||||
std::map<size_t, std::set<RuleOp*> > s2rules;
|
||||
ord_hash_set_t kernels;
|
||||
nfa_state_t **const buffer = new nfa_state_t*[nfa.size];
|
||||
std::vector<std::vector<nfa_state_t*> > arcs(nchars);
|
||||
|
||||
find_state(buffer, closure(buffer, nfa.root), kernels);
|
||||
for (size_t i = 0; i < kernels.size(); ++i)
|
||||
{
|
||||
dfa_state_t *s = new dfa_state_t;
|
||||
states.push_back(s);
|
||||
|
||||
nfa_state_t **kernel;
|
||||
const size_t kernel_size = kernels.deref<nfa_state_t*>(i, kernel);
|
||||
for (size_t j = 0; j < kernel_size; ++j)
|
||||
{
|
||||
nfa_state_t *n = kernel[j];
|
||||
switch (n->type)
|
||||
{
|
||||
case nfa_state_t::RAN:
|
||||
{
|
||||
nfa_state_t *m = n->value.ran.out;
|
||||
size_t c = 0;
|
||||
for (Range *r = n->value.ran.ran; r; r = r->next ())
|
||||
{
|
||||
for (; charset[c] != r->lower(); ++c);
|
||||
for (; charset[c] != r->upper(); ++c)
|
||||
{
|
||||
arcs[c].push_back(m);
|
||||
}
|
||||
}
|
||||
break;
|
||||
}
|
||||
case nfa_state_t::CTX:
|
||||
s->ctx = true;
|
||||
break;
|
||||
case nfa_state_t::FIN:
|
||||
s2rules[i].insert(n->value.fin.rule);
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
s->arcs = new size_t[nchars];
|
||||
for(size_t c = 0; c < nchars; ++c)
|
||||
{
|
||||
nfa_state_t **end = buffer;
|
||||
for (std::vector<nfa_state_t*>::const_iterator j = arcs[c].begin(); j != arcs[c].end(); ++j)
|
||||
{
|
||||
end = closure(end, *j);
|
||||
}
|
||||
s->arcs[c] = find_state(buffer, end, kernels);
|
||||
}
|
||||
|
||||
for(size_t c = 0; c < nchars; ++c)
|
||||
{
|
||||
arcs[c].clear();
|
||||
}
|
||||
}
|
||||
delete[] buffer;
|
||||
|
||||
const size_t count = states.size();
|
||||
for (size_t i = 0; i < count; ++i)
|
||||
{
|
||||
dfa_state_t *s = states[i];
|
||||
std::set<RuleOp*> &rs = s2rules[i];
|
||||
// for each final state: choose the rule with the smallest rank
|
||||
for (std::set<RuleOp*>::const_iterator j = rs.begin(); j != rs.end(); ++j)
|
||||
{
|
||||
RuleOp *rule = *j;
|
||||
if (!s->rule || rule->rank < s->rule->rank)
|
||||
{
|
||||
s->rule = rule;
|
||||
}
|
||||
}
|
||||
// other rules are shadowed by the chosen rule
|
||||
for (std::set<RuleOp*>::const_iterator j = rs.begin(); j != rs.end(); ++j)
|
||||
{
|
||||
RuleOp *rule = *j;
|
||||
if (s->rule != rule)
|
||||
{
|
||||
rules[rule->rank].shadow.insert(s->rule->rank);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
dfa_t::~dfa_t()
|
||||
{
|
||||
std::vector<dfa_state_t*>::iterator
|
||||
i = states.begin(),
|
||||
e = states.end();
|
||||
for (; i != e; ++i)
|
||||
{
|
||||
delete *i;
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace re2c
|
||||
|
58
tools/re2c/src/ir/dfa/dfa.h
Normal file
58
tools/re2c/src/ir/dfa/dfa.h
Normal file
|
@ -0,0 +1,58 @@
|
|||
#ifndef _RE2C_IR_DFA_DFA_
|
||||
#define _RE2C_IR_DFA_DFA_
|
||||
|
||||
#include "src/util/c99_stdint.h"
|
||||
#include <vector>
|
||||
|
||||
#include "src/ir/regexp/regexp.h"
|
||||
#include "src/parse/rules.h"
|
||||
#include "src/util/forbid_copy.h"
|
||||
|
||||
namespace re2c
|
||||
{
|
||||
|
||||
struct nfa_t;
|
||||
class RuleOp;
|
||||
|
||||
struct dfa_state_t
|
||||
{
|
||||
size_t *arcs;
|
||||
RuleOp *rule;
|
||||
bool ctx;
|
||||
|
||||
dfa_state_t()
|
||||
: arcs(NULL)
|
||||
, rule(NULL)
|
||||
, ctx(false)
|
||||
{}
|
||||
~dfa_state_t()
|
||||
{
|
||||
delete[] arcs;
|
||||
}
|
||||
|
||||
FORBID_COPY(dfa_state_t);
|
||||
};
|
||||
|
||||
struct dfa_t
|
||||
{
|
||||
static const size_t NIL;
|
||||
|
||||
std::vector<dfa_state_t*> states;
|
||||
const size_t nchars;
|
||||
|
||||
dfa_t(const nfa_t &nfa, const charset_t &charset, rules_t &rules);
|
||||
~dfa_t();
|
||||
};
|
||||
|
||||
enum dfa_minimization_t
|
||||
{
|
||||
DFA_MINIMIZATION_TABLE,
|
||||
DFA_MINIMIZATION_MOORE
|
||||
};
|
||||
|
||||
void minimization(dfa_t &dfa);
|
||||
void fillpoints(const dfa_t &dfa, std::vector<size_t> &fill);
|
||||
|
||||
} // namespace re2c
|
||||
|
||||
#endif // _RE2C_IR_DFA_DFA_
|
154
tools/re2c/src/ir/dfa/fillpoints.cc
Normal file
154
tools/re2c/src/ir/dfa/fillpoints.cc
Normal file
|
@ -0,0 +1,154 @@
|
|||
#include <limits>
|
||||
#include <stack>
|
||||
#include <vector>
|
||||
|
||||
#include "src/ir/dfa/dfa.h"
|
||||
|
||||
namespace re2c
|
||||
{
|
||||
|
||||
static const size_t INFINITY = std::numeric_limits<size_t>::max();
|
||||
static const size_t UNDEFINED = INFINITY - 1;
|
||||
|
||||
static bool loopback(size_t node, size_t narcs, const size_t *arcs)
|
||||
{
|
||||
for (size_t i = 0; i < narcs; ++i)
|
||||
{
|
||||
if (arcs[i] == node)
|
||||
{
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
/*
|
||||
* node [finding strongly connected components of DFA]
|
||||
*
|
||||
* A slight modification of Tarjan's algorithm.
|
||||
*
|
||||
* The algorithm walks graph in deep-first order. It maintains a stack
|
||||
* of nodes that have already been visited but haven't been assigned to
|
||||
* SCC yet. For each node the algorithm calculates 'lowlink': index of
|
||||
* the highest ancestor node reachable in one step from a descendant of
|
||||
* the node. Lowlink is used to determine when a set of nodes should be
|
||||
* popped off the stack into a new SCC.
|
||||
*
|
||||
* We use lowlink to hold different kinds of information:
|
||||
* - values in range [0 .. stack size] mean that this node is on stack
|
||||
* (link to a node with the smallest index reachable from this one)
|
||||
* - UNDEFINED means that this node has not been visited yet
|
||||
* - INFINITY means that this node has already been popped off stack
|
||||
*
|
||||
* We use stack size (rather than topological sort index) as unique index
|
||||
* of a node on stack. This is safe because indices of nodes on stack are
|
||||
* still unique and less than indices of nodes that have been popped off
|
||||
* stack (INFINITY).
|
||||
*
|
||||
*/
|
||||
static void scc(
|
||||
const dfa_t &dfa,
|
||||
std::stack<size_t> &stack,
|
||||
std::vector<size_t> &lowlink,
|
||||
std::vector<bool> &trivial,
|
||||
size_t i)
|
||||
{
|
||||
const size_t link = stack.size();
|
||||
lowlink[i] = link;
|
||||
stack.push(i);
|
||||
|
||||
const size_t *arcs = dfa.states[i]->arcs;
|
||||
for (size_t c = 0; c < dfa.nchars; ++c)
|
||||
{
|
||||
const size_t j = arcs[c];
|
||||
if (j != dfa_t::NIL)
|
||||
{
|
||||
if (lowlink[j] == UNDEFINED)
|
||||
{
|
||||
scc(dfa, stack, lowlink, trivial, j);
|
||||
}
|
||||
if (lowlink[j] < lowlink[i])
|
||||
{
|
||||
lowlink[i] = lowlink[j];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (lowlink[i] == link)
|
||||
{
|
||||
// SCC is non-trivial (has loops) iff it either:
|
||||
// - consists of multiple nodes (they all must be interconnected)
|
||||
// - consists of single node which loops back to itself
|
||||
trivial[i] = i == stack.top()
|
||||
&& !loopback(i, dfa.nchars, arcs);
|
||||
|
||||
size_t j;
|
||||
do
|
||||
{
|
||||
j = stack.top();
|
||||
stack.pop();
|
||||
lowlink[j] = INFINITY;
|
||||
}
|
||||
while (j != i);
|
||||
}
|
||||
}
|
||||
|
||||
static void calc_fill(
|
||||
const dfa_t &dfa,
|
||||
const std::vector<bool> &trivial,
|
||||
std::vector<size_t> &fill,
|
||||
size_t i)
|
||||
{
|
||||
if (fill[i] == UNDEFINED)
|
||||
{
|
||||
fill[i] = 0;
|
||||
const size_t *arcs = dfa.states[i]->arcs;
|
||||
for (size_t c = 0; c < dfa.nchars; ++c)
|
||||
{
|
||||
const size_t j = arcs[c];
|
||||
if (j != dfa_t::NIL)
|
||||
{
|
||||
calc_fill(dfa, trivial, fill, j);
|
||||
size_t max = 1;
|
||||
if (trivial[j])
|
||||
{
|
||||
max += fill[j];
|
||||
}
|
||||
if (max > fill[i])
|
||||
{
|
||||
fill[i] = max;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void fillpoints(const dfa_t &dfa, std::vector<size_t> &fill)
|
||||
{
|
||||
const size_t size = dfa.states.size();
|
||||
|
||||
// find DFA states that belong to non-trivial SCC
|
||||
std::stack<size_t> stack;
|
||||
std::vector<size_t> lowlink(size, UNDEFINED);
|
||||
std::vector<bool> trivial(size, false);
|
||||
scc(dfa, stack, lowlink, trivial, 0);
|
||||
|
||||
// for each DFA state, calculate YYFILL argument:
|
||||
// maximal path length to the next YYFILL state
|
||||
fill.resize(size, UNDEFINED);
|
||||
calc_fill(dfa, trivial, fill, 0);
|
||||
|
||||
// The following states must trigger YYFILL:
|
||||
// - inital state
|
||||
// - all states in non-trivial SCCs
|
||||
// for other states, reset YYFILL argument to zero
|
||||
for (size_t i = 1; i < size; ++i)
|
||||
{
|
||||
if (trivial[i])
|
||||
{
|
||||
fill[i] = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace re2c
|
252
tools/re2c/src/ir/dfa/minimization.cc
Normal file
252
tools/re2c/src/ir/dfa/minimization.cc
Normal file
|
@ -0,0 +1,252 @@
|
|||
#include <string.h>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
#include "src/conf/opt.h"
|
||||
#include "src/ir/dfa/dfa.h"
|
||||
#include "src/globals.h"
|
||||
|
||||
namespace re2c
|
||||
{
|
||||
|
||||
class RuleOp;
|
||||
|
||||
/*
|
||||
* note [DFA minimization: table filling algorithm]
|
||||
*
|
||||
* This algorithm is simple and slow; it's a reference implementation.
|
||||
*
|
||||
* The algorithm constructs (strictly lower triangular) boolean matrix
|
||||
* indexed by DFA states. Each matrix cell (S1,S2) indicates if states
|
||||
* S1 and S2 are distinguishable. Initialy states are distinguished
|
||||
* according to their rule and context. One step of the algorithm
|
||||
* updates the matrix as follows: each pair of states S1 and S2 is
|
||||
* marked as distinguishable iff exist transitions from S1 and S2 on
|
||||
* the same symbol that go to distinguishable states. The algorithm
|
||||
* loops until the matrix stops changing.
|
||||
*/
|
||||
static void minimization_table(
|
||||
size_t *part,
|
||||
const std::vector<dfa_state_t*> &states,
|
||||
size_t nchars)
|
||||
{
|
||||
const size_t count = states.size();
|
||||
|
||||
bool **tbl = new bool*[count];
|
||||
tbl[0] = new bool[count * (count - 1) / 2];
|
||||
for (size_t i = 0; i < count - 1; ++i)
|
||||
{
|
||||
tbl[i + 1] = tbl[i] + i;
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < count; ++i)
|
||||
{
|
||||
dfa_state_t *s1 = states[i];
|
||||
for (size_t j = 0; j < i; ++j)
|
||||
{
|
||||
dfa_state_t *s2 = states[j];
|
||||
tbl[i][j] = s1->ctx != s2->ctx
|
||||
|| s1->rule != s2->rule;
|
||||
}
|
||||
}
|
||||
|
||||
for (bool loop = true; loop;)
|
||||
{
|
||||
loop = false;
|
||||
for (size_t i = 0; i < count; ++i)
|
||||
{
|
||||
for (size_t j = 0; j < i; ++j)
|
||||
{
|
||||
if (!tbl[i][j])
|
||||
{
|
||||
for (size_t k = 0; k < nchars; ++k)
|
||||
{
|
||||
size_t oi = states[i]->arcs[k];
|
||||
size_t oj = states[j]->arcs[k];
|
||||
if (oi < oj)
|
||||
{
|
||||
std::swap(oi, oj);
|
||||
}
|
||||
if (oi != oj &&
|
||||
(oi == dfa_t::NIL ||
|
||||
oj == dfa_t::NIL ||
|
||||
tbl[oi][oj]))
|
||||
{
|
||||
tbl[i][j] = true;
|
||||
loop = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < count; ++i)
|
||||
{
|
||||
part[i] = i;
|
||||
for (size_t j = 0; j < i; ++j)
|
||||
{
|
||||
if (!tbl[i][j])
|
||||
{
|
||||
part[i] = j;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
delete[] tbl[0];
|
||||
delete[] tbl;
|
||||
}
|
||||
|
||||
/*
|
||||
* note [DFA minimization: Moore algorithm]
|
||||
*
|
||||
* The algorithm maintains partition of DFA states.
|
||||
* Initial partition is coarse: states are distinguished according
|
||||
* to their rule and context. Partition is gradually refined: each
|
||||
* set of states is split into minimal number of subsets such that
|
||||
* for all states in a subset transitions on the same symbol go to
|
||||
* the same set of states.
|
||||
* The algorithm loops until partition stops changing.
|
||||
*/
|
||||
static void minimization_moore(
|
||||
size_t *part,
|
||||
const std::vector<dfa_state_t*> &states,
|
||||
size_t nchars)
|
||||
{
|
||||
const size_t count = states.size();
|
||||
|
||||
size_t *next = new size_t[count];
|
||||
|
||||
std::map<std::pair<RuleOp*, bool>, size_t> init;
|
||||
for (size_t i = 0; i < count; ++i)
|
||||
{
|
||||
dfa_state_t *s = states[i];
|
||||
std::pair<RuleOp*, bool> key(s->rule, s->ctx);
|
||||
if (init.insert(std::make_pair(key, i)).second)
|
||||
{
|
||||
part[i] = i;
|
||||
next[i] = dfa_t::NIL;
|
||||
}
|
||||
else
|
||||
{
|
||||
const size_t j = init[key];
|
||||
part[i] = j;
|
||||
next[i] = next[j];
|
||||
next[j] = i;
|
||||
}
|
||||
}
|
||||
|
||||
size_t *out = new size_t[nchars * count];
|
||||
size_t *diff = new size_t[count];
|
||||
for (bool loop = true; loop;)
|
||||
{
|
||||
loop = false;
|
||||
for (size_t i = 0; i < count; ++i)
|
||||
{
|
||||
if (i != part[i] || next[i] == dfa_t::NIL)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
for (size_t j = i; j != dfa_t::NIL; j = next[j])
|
||||
{
|
||||
size_t *o = &out[j * nchars];
|
||||
size_t *a = states[j]->arcs;
|
||||
for (size_t c = 0; c < nchars; ++c)
|
||||
{
|
||||
o[c] = a[c] == dfa_t::NIL
|
||||
? dfa_t::NIL
|
||||
: part[a[c]];
|
||||
}
|
||||
}
|
||||
|
||||
size_t diff_count = 0;
|
||||
for (size_t j = i; j != dfa_t::NIL;)
|
||||
{
|
||||
const size_t j_next = next[j];
|
||||
size_t n = 0;
|
||||
for (; n < diff_count; ++n)
|
||||
{
|
||||
size_t k = diff[n];
|
||||
if (memcmp(&out[j * nchars],
|
||||
&out[k * nchars],
|
||||
nchars * sizeof(size_t)) == 0)
|
||||
{
|
||||
part[j] = k;
|
||||
next[j] = next[k];
|
||||
next[k] = j;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (n == diff_count)
|
||||
{
|
||||
diff[diff_count++] = j;
|
||||
part[j] = j;
|
||||
next[j] = dfa_t::NIL;
|
||||
}
|
||||
j = j_next;
|
||||
}
|
||||
loop |= diff_count > 1;
|
||||
}
|
||||
}
|
||||
delete[] out;
|
||||
delete[] diff;
|
||||
delete[] next;
|
||||
}
|
||||
|
||||
void minimization(dfa_t &dfa)
|
||||
{
|
||||
const size_t count = dfa.states.size();
|
||||
|
||||
size_t *part = new size_t[count];
|
||||
|
||||
switch (opts->dfa_minimization)
|
||||
{
|
||||
case DFA_MINIMIZATION_TABLE:
|
||||
minimization_table(part, dfa.states, dfa.nchars);
|
||||
break;
|
||||
case DFA_MINIMIZATION_MOORE:
|
||||
minimization_moore(part, dfa.states, dfa.nchars);
|
||||
break;
|
||||
}
|
||||
|
||||
size_t *compact = new size_t[count];
|
||||
for (size_t i = 0, j = 0; i < count; ++i)
|
||||
{
|
||||
if (i == part[i])
|
||||
{
|
||||
compact[i] = j++;
|
||||
}
|
||||
}
|
||||
|
||||
size_t new_count = 0;
|
||||
for (size_t i = 0; i < count; ++i)
|
||||
{
|
||||
dfa_state_t *s = dfa.states[i];
|
||||
if (i == part[i])
|
||||
{
|
||||
size_t *arcs = s->arcs;
|
||||
for (size_t c = 0; c < dfa.nchars; ++c)
|
||||
{
|
||||
if (arcs[c] != dfa_t::NIL)
|
||||
{
|
||||
arcs[c] = compact[part[arcs[c]]];
|
||||
}
|
||||
}
|
||||
dfa.states[new_count++] = s;
|
||||
}
|
||||
else
|
||||
{
|
||||
delete s;
|
||||
}
|
||||
}
|
||||
dfa.states.resize(new_count);
|
||||
|
||||
delete[] compact;
|
||||
delete[] part;
|
||||
}
|
||||
|
||||
} // namespace re2c
|
||||
|
Some files were not shown because too many files have changed in this diff Show more
Loading…
Reference in a new issue