mirror of
https://github.com/ZDoom/Raze.git
synced 2025-01-18 06:32:37 +00:00
- added compile tools from GZDoom repo:
- updaterevision for automated revision string generation - re2c as a prerequisite to use sc_man in the future - zipdir to automatically generate an engine resource file.
This commit is contained in:
parent
0d98e7f256
commit
8c95516224
161 changed files with 29182 additions and 0 deletions
7
tools/CMakeLists.txt
Normal file
7
tools/CMakeLists.txt
Normal file
|
@ -0,0 +1,7 @@
|
|||
cmake_minimum_required( VERSION 2.8.7 )
|
||||
|
||||
add_subdirectory( re2c )
|
||||
add_subdirectory( updaterevision )
|
||||
add_subdirectory( zipdir )
|
||||
|
||||
set( CROSS_EXPORTS ${CROSS_EXPORTS} PARENT_SCOPE )
|
403
tools/re2c/CHANGELOG
Normal file
403
tools/re2c/CHANGELOG
Normal file
|
@ -0,0 +1,403 @@
|
|||
Version 0.16 (2016-01-21)
|
||||
---------------------------
|
||||
- Fixed bug #127 "code generation error with wide chars and bitmaps (omitted 'goto' statement)"
|
||||
- Added DFA minimization and option '--dfa-minimization <table | moore>'
|
||||
- Fixed bug #128 "very slow DFA construction (resulting in a very large DFA)"
|
||||
- Fixed bug #132 "test failure on big endian archs with 0.15.3"
|
||||
|
||||
Version 0.15.3 (2015-12-02)
|
||||
---------------------------
|
||||
- Fixed bugs and applied patches:
|
||||
#122 "clang does not compile re2c 0.15.x" (reported and fixed by Oleksii Taran).
|
||||
#124 "Get rid of UINT32_MAX and friends" (patch by Sergei Trofimovich, fixes FreeBSD builds).
|
||||
#125 "[OS X] git reports changes not staged for commit in newly cloned repository" (by Oleksii Taran, this fix also applies to Windows).
|
||||
- Added option --no-version that allows to omit version information.
|
||||
- Reduced memory and time consumed with -Wundefined-control-flow.
|
||||
- Improved coverage of input data generated with -S --skeleton.
|
||||
|
||||
Version 0.15.2 (2015-11-23)
|
||||
---------------------------
|
||||
- Fixed build system: lexer depends on bison-generated parser
|
||||
(Gentoo bug: https://bugs.gentoo.org/show_bug.cgi?id=566620)
|
||||
|
||||
Version 0.15.1 (2015-11-22)
|
||||
---------------------------
|
||||
- Fixed test failures caused by locale-sensitive 'sort'.
|
||||
|
||||
Version 0.15 (2015-11-22)
|
||||
-------------------------
|
||||
- Updated website http://re2c.org:
|
||||
added examples
|
||||
updated docs
|
||||
added news
|
||||
added web feed (Atom 1.0)
|
||||
- Added options:
|
||||
-S, --skeleton
|
||||
--empty-class <match-empty | match-none | error>
|
||||
- Added warnings:
|
||||
-W
|
||||
-Werror
|
||||
-W<warning>
|
||||
-Wno-<warning>
|
||||
-Werror-<warning>
|
||||
-Wno-error-<warning>
|
||||
- Added individual warnings:
|
||||
-Wundefined-control-flow
|
||||
-Wunreachable-rules
|
||||
-Wcondition-order
|
||||
-Wuseless-escape
|
||||
-Wempty-character-class
|
||||
-Wswapped-range
|
||||
-Wmatch-empty-string
|
||||
- Fixed options:
|
||||
-- (interpret remaining arguments as non-options)
|
||||
- Deprecated options:
|
||||
-1 --single-pass (single pass is by default now)
|
||||
- Reduced size of the generated .dot files.
|
||||
- Fixed bugs:
|
||||
#27 re2c crashes reading files containing %{ %} (patch by Rui)
|
||||
#51 default rule doesn't work in reuse mode
|
||||
#52 eliminate multiple passes
|
||||
#59 bogus yyaccept in -c mode
|
||||
#60 redundant use of YYMARKER
|
||||
#61 empty character class [] matches empty string
|
||||
#115 flex-style named definitions cause ambiguity in re2c grammar
|
||||
#119 -f with -b/-g generates incorrect dispatch on fill labels
|
||||
#116 empty string with non-empty trailing context consumes code units
|
||||
- Added test options:
|
||||
-j, -j <N> (run tests in N threads, defaults to the number of CPUs)
|
||||
--wine (test windows builds using wine)
|
||||
--skeleton (generate skeleton programs, compile and execute them)
|
||||
--keep-tmp-files (don't delete intermediate files for successful tests)
|
||||
- Updated build system:
|
||||
support out of source builds
|
||||
support `make distcheck`
|
||||
added `make bootstrap` (rebuild re2c after building with precomplied .re files)
|
||||
added `make tests` (run tests with -j)
|
||||
added `make vtests` (run tests with --valgrind -j)
|
||||
added `make wtests` (run tests with --wine -j 1)
|
||||
added Autoconf tests for CXXFLAGS. By default try the following options:
|
||||
-W -Wall -Wextra -Weffc++ -pedantic -Wformat=2 -Wredundant-decls
|
||||
-Wsuggest-attribute=format -Wconversion -Wsign-conversion -O2 -Weverything),
|
||||
respect user-defined CXXFLAGS
|
||||
support Mingw builds: `configure -host i686-w64-mingw32`
|
||||
structured source files
|
||||
removed old MSVC files
|
||||
- Moved development to github (https://github.com/skvadrik/re2c), keep a mirror on sourceforge.
|
||||
|
||||
Version 0.14.3 (2015-05-20)
|
||||
-----------------------------
|
||||
- applied patch '#27 re2c crashes reading files containing %{ %}' by Rui
|
||||
- dropped distfiles for MSVC (they are broken anyway)
|
||||
|
||||
Version 0.14.2 (2015-03-25)
|
||||
-----------------------------
|
||||
- fixed #57 Wrong result only if another rule is present
|
||||
|
||||
Version 0.14.1 (2015-02-27)
|
||||
-----------------------------
|
||||
- fixed #55 re2c-0.14: re2c -V outputs null byte
|
||||
|
||||
Version 0.14 (2015-02-23)
|
||||
-----------------------------
|
||||
- Added generic input API 21 (#21 Support to configure how re2c code interfaced with the symbol buffer?)
|
||||
- fixed #46 re2c generates an infinite loop, depends on existence of previous parser
|
||||
- fixed #47 Dot output label escaped characters
|
||||
|
||||
Version 0.13.7.5 (2014-08-22)
|
||||
-----------------------------
|
||||
- Fixed Gentoo bug: https://bugs.gentoo.org/show_bug.cgi?id=518904 (PHP lexer)
|
||||
|
||||
Version 0.13.7.4 (2014-07-29)
|
||||
-----------------------------
|
||||
- Enabled 'make docs' only if configured with '--enable-docs'
|
||||
- Disallowed to use yacc/byacc instead of bison to build parser
|
||||
- Removed non-portable sed feature in script that runs tests
|
||||
|
||||
Version 0.13.7.3 (2014-07-28)
|
||||
-----------------------------
|
||||
- Fixed CXX warning
|
||||
- Got rid of asciidoc build-time dependency
|
||||
|
||||
Version 0.13.7.2 (2014-07-27)
|
||||
-----------------------------
|
||||
- Included man page into dist, respect users CXXFLAGS.
|
||||
|
||||
Version 0.13.7.1 (2014-07-26)
|
||||
-----------------------------
|
||||
- Added missing files to tarball
|
||||
|
||||
Version 0.13.7 (2014-07-25)
|
||||
---------------------------
|
||||
- Added UTF-8 support
|
||||
- Added UTF-16 support
|
||||
- Added default rule
|
||||
- Added option to control ill-formed Unicode
|
||||
|
||||
Version 0.13.6 (2013-07-04)
|
||||
---------------------------
|
||||
- Fixed #2535084 uint problem with Sun C 5.8
|
||||
- #3308400: allow Yacc-style %{code brackets}%
|
||||
- #2506253: allow C++ // comments
|
||||
- Fixed inplace configuration in -e mode.
|
||||
- Applied #2482572 Typos in error messages.
|
||||
- Applied #2482561 Error in manual section on -r mode.
|
||||
- Fixed #2478216 Wrong start_label in -c mode.
|
||||
- Fixed #2186718 Unescaped backslash in file name of #line directive.
|
||||
- Fixed #2102138 Duplicate case labels on EBCDIC.
|
||||
- Fixed #2088583 Compile problem on AIX.
|
||||
- Fixed #2038610 Ebcdic problem.
|
||||
- improve dot support: make char intervals (e.g. [A-Z]) instead of one edge per char
|
||||
|
||||
Version 0.13.5 (2008-05-25)
|
||||
---------------------------
|
||||
- Fixed #1952896 Segfault in re2c::Scanner::scan.
|
||||
- Fixed #1952842 Regression.
|
||||
|
||||
Version 0.13.4 (2008-04-05)
|
||||
---------------------------
|
||||
- Added transparent handling of #line directives in input files.
|
||||
- Added re2c:yyfill:check inplace configuration.
|
||||
- Added re2c:define:YYSETSTATE:naked inplace configuration.
|
||||
- Added re2c:flags:w and re2c:flags:u inplace configurations.
|
||||
- Added the ability to add rules in 'use:re2c' blocks.
|
||||
- Changed -r flag to accept only 'rules:re2c' and 'use:re2c' blocks.
|
||||
|
||||
Version 0.13.3 (2008-03-14)
|
||||
---------------------------
|
||||
- Added -r flag to allow reuse of scanner definitions.
|
||||
- Added -F flag to support flex syntax in rules.
|
||||
- Fixed SEGV in scanner that occurs with very large blocks.
|
||||
- Fixed issue with unused yybm.
|
||||
- Partial support for flex syntax.
|
||||
- Changed to allow /* comments with -c switch.
|
||||
- Added flag -D/--emit-dot.
|
||||
|
||||
Version 0.13.2 (2008-02-14)
|
||||
---------------------------
|
||||
- Added flag --case-inverted.
|
||||
- Added flag --case-insensitive.
|
||||
- Added support for '<!...>' to enable rule setup.
|
||||
- Added support for '=>' style rules.
|
||||
- Added support for ':=' style rules.
|
||||
- Added support for ':=>' style rules.
|
||||
- Added re2c:cond:divider and re2c:con:goto inplace configuration.
|
||||
- Fixed code generation to emit space after 'if'.
|
||||
|
||||
Version 0.13.1 (2007-08-24)
|
||||
---------------------------
|
||||
- Added custom build rules for Visual Studio 2005 (re2c.rules). (William Swanson)
|
||||
- Fixed issue with some compilers.
|
||||
- Fixed #1776177 Build on AIX.
|
||||
- Fixed #1743180 fwrite with 0 length crashes on OS X.
|
||||
|
||||
Version 0.13.0 (2007-06-24)
|
||||
---------------------------
|
||||
- Added -c and -t to generate scanners with (f)lex-like condition support.
|
||||
- Fixed issue with short form of switches and parameter if not first switch.
|
||||
- Fixed #1708378 segfault in actions.cc.
|
||||
|
||||
Version 0.12.3 (2007-08-24)
|
||||
---------------------------
|
||||
- Fixed issue with some compilers.
|
||||
- Fixed #1776177 Build on AIX.
|
||||
- Fixed #1743180 fwrite with 0 length crashes on OS X.
|
||||
|
||||
Version 0.12.2 (2007-06-26)
|
||||
---------------------------
|
||||
- Fixed #1743180 fwrite with 0 length crashes on OS X.
|
||||
|
||||
Version 0.12.1 (2007-05-23)
|
||||
---------------------------
|
||||
- Fixed #1711240 problem with '"' and 7F on EBCDIC plattforms.
|
||||
|
||||
Version 0.12.0 (2007-05-01)
|
||||
---------------------------
|
||||
- Re-release of 0.11.3 as new stable branch.
|
||||
- Fixed issue with short form of switches and parameter if not first switch.
|
||||
- Fixed #1708378 segfault in actions.cc.
|
||||
|
||||
Version 0.11.3 (2007-04-01)
|
||||
---------------------------
|
||||
- Added support for underscores in named definitions.
|
||||
- Added new option --no-generation-date.
|
||||
- Fixed issue with long form of switches.
|
||||
|
||||
Version 0.11.2 (2007-03-01)
|
||||
---------------------------
|
||||
- Added inplace configuration 're2c:yyfill:parameter'.
|
||||
- Added inplace configuration 're2c:yych:conversion'.
|
||||
- Fixed -u switch code generation.
|
||||
- Added ability to avoid defines and overwrite variable and label names.
|
||||
|
||||
Version 0.11.1 (2007-02-20)
|
||||
---------------------------
|
||||
- Applied #1647875 add const to yybm vector.
|
||||
|
||||
Version 0.11.0 (2007-01-01)
|
||||
---------------------------
|
||||
- Added -u switch to support unicode.
|
||||
|
||||
Version 0.10.8 (2007-04-01)
|
||||
---------------------------
|
||||
- Fixed issue with long form of switches.
|
||||
|
||||
Version 0.10.7 (2007-02-20)
|
||||
---------------------------
|
||||
- Applied #1647875 add const to yybm vector.
|
||||
|
||||
Version 0.10.6 (2006-08-05)
|
||||
---------------------------
|
||||
- Fixed #1529351 Segv bug on unterminated code blocks.
|
||||
- Fixed #1528269 Invalid code generation.
|
||||
|
||||
Version 0.10.5 (2006-06-11)
|
||||
---------------------------
|
||||
- Fixed long form of -1 switch to --single-pass as noted in man page and help.
|
||||
- Added MSVC 2003 project files and renamed old 2002 ones.
|
||||
|
||||
Version 0.10.4 (2006-06-01)
|
||||
---------------------------
|
||||
- Fix whitespace in generated code.
|
||||
|
||||
Version 0.10.3 (2006-05-14)
|
||||
---------------------------
|
||||
- Fixed issue with -wb and -ws.
|
||||
- Added -g switch to support gcc's computed goto's.
|
||||
- Changed to use nested if's instead of "switch(yyaccept)" in -s mode.
|
||||
|
||||
Version 0.10.2 (2006-05-01)
|
||||
---------------------------
|
||||
- Changed to generate YYMARKER only when needed or in single pass mode.
|
||||
- Added -1 switch to force single pass generation and make two pass the default.
|
||||
- Fixed -i switch.
|
||||
- Added configuration 'yyfill:enable' to allow suppression of YYFILL() blocks.
|
||||
- Added tutorial like lessons to re2c.
|
||||
- Added /*!ignore:re2c */ to support documenting of re2c source.
|
||||
- Fixed issue with multiline re2c comments (/*!max:re2c ... */ and alike).
|
||||
- Fixed generation of YYDEBUG() when using -d switch.
|
||||
- Added /*!getstate:re2c */ which triggers generation of the YYGETSTATE() block.
|
||||
- Added configuration 'state:abort'.
|
||||
- Changed to not generate yyNext unless configuration 'state:nextlabel' is used.
|
||||
- Changed to not generate yyaccept code unless needed.
|
||||
- Changed to use if- instead of switch-expression when yyaccpt has only one case.
|
||||
- Added docu, examples and tests to .src.zip package (0.10.1 zip was repackaged).
|
||||
- Fixed #1479044 incorrect code generated when using -b.
|
||||
- Fixed #1472770 re2c creates an infinite loop.
|
||||
- Fixed #1454253 Piece of code saving a backtracking point not generated.
|
||||
- Fixed #1463639 Missing forward declaration.
|
||||
- Implemented #1187127 savable state support for multiple re2c blocks.
|
||||
|
||||
Version 0.10.1 (2006-02-28)
|
||||
---------------------------
|
||||
- Added support for Solaris and native SUN compiler.
|
||||
- Applied #1438160 expose YYCTXMARKER.
|
||||
|
||||
Version 0.10.0 (2006-02-18)
|
||||
---------------------------
|
||||
- Added make target zip to create windows source packages as zip files.
|
||||
- Added re2c:startlabel configuration.
|
||||
- Fixed code generation to not generate unreachable code for initial state.
|
||||
- Added support for c/c++ compatible \u and \U unicode notation.
|
||||
- Added ability to control indendation.
|
||||
- Made scanner error out in case an ambiguous /* is found.
|
||||
- Fixed indendation of generated code.
|
||||
- Added support for DOS line endings.
|
||||
- Added experimental unicode support.
|
||||
- Added config_w32.h to build out of the box on windows (using msvc 2002+).
|
||||
- Added Microsoft Visual C .NET 2005 build files.
|
||||
- Applied #1411087 variable length trailing context.
|
||||
- Applied #1408326 do not generate goto next state.
|
||||
- Applied #1408282 CharSet initialization fix.
|
||||
- Applied #1408278 readsome with MSVC.
|
||||
- Applied #1307467 Unicode patch for 0.9.7.
|
||||
|
||||
Version 0.9.12 (2005-12-28)
|
||||
---------------------------
|
||||
- Fixed bug #1390174 re2c cannot accept {0,}.
|
||||
|
||||
Version 0.9.11 (2005-12-18)
|
||||
---------------------------
|
||||
- Fixed #1313083 -e (EBCDIC cross compile) broken.
|
||||
- Fixed #1297658 underestimation of n in YYFILL(n).
|
||||
- Applied #1339483 Avoid rebuilds of re2c when running subtargets.
|
||||
- Implemented #1335305 symbol table reimplementation, just slightly modifed.
|
||||
|
||||
Version 0.9.10 (2005-09-04)
|
||||
---------------------------
|
||||
- Added -i switch to avoid generating #line information.
|
||||
- Fixed bug #1251653 re2c generate some invalid #line on WIN32.
|
||||
|
||||
Version 0.9.9 (2005-07-21)
|
||||
--------------------------
|
||||
- Implemented #1232777 negated char classes '[^...]' and the dot operator '.'.
|
||||
- Added hexadecimal character definitions.
|
||||
- Added consistency check for octal character definitions.
|
||||
|
||||
Version 0.9.8 (2005-06-26)
|
||||
--------------------------
|
||||
- Fixed code generation for -b switch.
|
||||
- Added Microsoft Visual C .NET build files.
|
||||
|
||||
Version 0.9.7 (2005-04-30)
|
||||
--------------------------
|
||||
- Applied #1181535 storable state patch.
|
||||
- Added -d flag which outputs a debugable parser.
|
||||
- Fixed generation of '#line' directives (according to ISO-C99).
|
||||
- Fixed bug #1187785 Re2c fails to generate valid code.
|
||||
- Fixed bug #1187452 unused variable `yyaccept'.
|
||||
|
||||
Version 0.9.6 (2005-04-14)
|
||||
--------------------------
|
||||
- Fixed build with gcc >= 3.4.
|
||||
|
||||
Version 0.9.5 (2005-04-08)
|
||||
--------------------------
|
||||
- Added /*!max:re2c */ which emits a '#define YYMAXFILL <max>\n' line
|
||||
This allows to define buffers of the minimum required length. Occurence
|
||||
must follow '/*re2c */ and cannot preceed it.
|
||||
- Changed re2c to two pass generation to output warning free code.
|
||||
- Fixed bug #1163046 re2c hangs when processing valid re-file.
|
||||
- Fixed bug #1022799 re2c scanner has buffering bug.
|
||||
|
||||
Version 0.9.4 (2005-03-12)
|
||||
--------------------------
|
||||
- Added --vernum support.
|
||||
- Fixed bug #1054496 incorrect code generated with -b option.
|
||||
- Fixed bug #1012748 re2c does not emit last line if '\n' missing.
|
||||
- Fixed bug #999104 --output=output option does not work as documented.
|
||||
- Fixed bug #999103 Invalid options prefixed with two dashes cause program crash.
|
||||
|
||||
Version 0.9.3 (2004-05-26)
|
||||
--------------------------
|
||||
- Fixes one small possible bug in the generated output. ych instead of yych is
|
||||
output in certain circumstances
|
||||
|
||||
Version 0.9.2 (2004-05-26)
|
||||
--------------------------
|
||||
- Added -o option to specify the output file which also will set the #line
|
||||
directives to something useful.
|
||||
- Print version to cout instead cerr.
|
||||
- Added -h and -- style options.
|
||||
- Moved development to http://sourceforge.net/projects/re2c .
|
||||
- Fixed bug #960144 minor cosmetic problem.
|
||||
- Fixed bug #953181 cannot compile with.
|
||||
- Fixed bug #939277 Windows support.
|
||||
- Fixed bug #914462 automake build patch
|
||||
- Fixed bug #891940 braced quantifiers: {\d+(,|,\d+)?} style.
|
||||
- Fixed bug #869298 Add case insensitive string literals.
|
||||
- Fixed bug #869297 Input buffer overrun.
|
||||
|
||||
Version 0.9.1 (2003-12-13)
|
||||
--------------------------
|
||||
- Removed rcs comments in source files.
|
||||
|
||||
Version 0.9
|
||||
-----------
|
||||
- Redistribution based on version 0.5.
|
||||
- Added parentheses to assignment expressions in 'if' statements.
|
||||
- Rearranged class members to match initialization order.
|
||||
- Substr fix.
|
||||
- Use array delete [] when necessary.
|
||||
- Other minor fixes for subduing compiler warnings.
|
||||
|
104
tools/re2c/CMakeLists.txt
Normal file
104
tools/re2c/CMakeLists.txt
Normal file
|
@ -0,0 +1,104 @@
|
|||
cmake_minimum_required( VERSION 2.8.7 )
|
||||
|
||||
if( NOT CMAKE_CROSSCOMPILING )
|
||||
|
||||
include( CheckFunctionExists )
|
||||
include( CheckTypeSize )
|
||||
|
||||
if( MSVC )
|
||||
# Runtime type information is required and don't complain about uint32_t to bool conversions
|
||||
set( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /GR /wd4800" )
|
||||
endif()
|
||||
|
||||
set( PACKAGE_NAME re2c )
|
||||
set( PACKAGE_TARNAME re2c )
|
||||
set( PACKAGE_VERSION 0.16 )
|
||||
set( PACKAGE_STRING "re2c 0.16" )
|
||||
set( PACKAGE_BUGREPORT "re2c-general@lists.sourceforge.net" )
|
||||
|
||||
CHECK_FUNCTION_EXISTS( strdup HAVE_STRDUP )
|
||||
CHECK_FUNCTION_EXISTS( strndup HAVE_STRNDUP )
|
||||
|
||||
CHECK_TYPE_SIZE( "0i8" SIZEOF_0I8 )
|
||||
CHECK_TYPE_SIZE( "0l" SIZEOF_0L )
|
||||
CHECK_TYPE_SIZE( "0ll" SIZEOF_0LL )
|
||||
CHECK_TYPE_SIZE( char SIZEOF_CHAR )
|
||||
CHECK_TYPE_SIZE( short SIZEOF_SHORT )
|
||||
CHECK_TYPE_SIZE( int SIZEOF_INT )
|
||||
CHECK_TYPE_SIZE( long SIZEOF_LONG )
|
||||
CHECK_TYPE_SIZE( "long long" SIZEOF_LONG_LONG )
|
||||
CHECK_TYPE_SIZE( "void *" SIZEOF_VOID_P )
|
||||
CHECK_TYPE_SIZE( __int64 SIZEOF___INT_64 )
|
||||
|
||||
configure_file( ${CMAKE_CURRENT_SOURCE_DIR}/config.h.in ${CMAKE_CURRENT_BINARY_DIR}/config.h )
|
||||
include_directories( ${CMAKE_CURRENT_BINARY_DIR} ${CMAKE_CURRENT_SOURCE_DIR} )
|
||||
add_definitions( -DHAVE_CONFIG_H )
|
||||
|
||||
file( GLOB SRC_HDR
|
||||
src/codegen/*.h
|
||||
src/conf/*.h
|
||||
src/ir/*.h
|
||||
src/*.h
|
||||
src/parse/*.h
|
||||
src/util/*.h )
|
||||
|
||||
add_executable( re2c
|
||||
${SRC_HDR}
|
||||
src/codegen/bitmap.cc
|
||||
src/codegen/emit_action.cc
|
||||
src/codegen/emit_dfa.cc
|
||||
src/codegen/label.cc
|
||||
src/codegen/go_construct.cc
|
||||
src/codegen/go_destruct.cc
|
||||
src/codegen/go_emit.cc
|
||||
src/codegen/go_used_labels.cc
|
||||
src/codegen/input_api.cc
|
||||
src/codegen/output.cc
|
||||
src/codegen/print.cc
|
||||
src/conf/msg.cc
|
||||
src/conf/opt.cc
|
||||
src/conf/parse_opts.cc
|
||||
src/conf/warn.cc
|
||||
src/ir/nfa/calc_size.cc
|
||||
src/ir/nfa/nfa.cc
|
||||
src/ir/nfa/split.cc
|
||||
src/ir/adfa/adfa.cc
|
||||
src/ir/adfa/prepare.cc
|
||||
src/ir/dfa/determinization.cc
|
||||
src/ir/dfa/fillpoints.cc
|
||||
src/ir/dfa/minimization.cc
|
||||
src/ir/regexp/display.cc
|
||||
src/ir/regexp/encoding/enc.cc
|
||||
src/ir/regexp/encoding/range_suffix.cc
|
||||
src/ir/regexp/encoding/utf8/utf8_regexp.cc
|
||||
src/ir/regexp/encoding/utf8/utf8_range.cc
|
||||
src/ir/regexp/encoding/utf8/utf8.cc
|
||||
src/ir/regexp/encoding/utf16/utf16_regexp.cc
|
||||
src/ir/regexp/encoding/utf16/utf16.cc
|
||||
src/ir/regexp/encoding/utf16/utf16_range.cc
|
||||
src/ir/regexp/fixed_length.cc
|
||||
src/ir/regexp/regexp.cc
|
||||
src/ir/compile.cc
|
||||
src/ir/rule_rank.cc
|
||||
src/ir/skeleton/control_flow.cc
|
||||
src/ir/skeleton/generate_code.cc
|
||||
src/ir/skeleton/generate_data.cc
|
||||
src/ir/skeleton/match_empty.cc
|
||||
src/ir/skeleton/maxlen.cc
|
||||
src/ir/skeleton/skeleton.cc
|
||||
src/ir/skeleton/unreachable.cc
|
||||
src/ir/skeleton/way.cc
|
||||
src/main.cc
|
||||
src/parse/code.cc
|
||||
src/parse/input.cc
|
||||
src/parse/lex.cc
|
||||
src/parse/lex_conf.cc
|
||||
src/parse/parser.cc
|
||||
src/parse/scanner.cc
|
||||
src/parse/unescape.cc
|
||||
src/util/s_to_n32_unsafe.cc
|
||||
src/util/range.cc )
|
||||
|
||||
set( CROSS_EXPORTS ${CROSS_EXPORTS} re2c PARENT_SCOPE )
|
||||
|
||||
endif()
|
2
tools/re2c/NO_WARRANTY
Normal file
2
tools/re2c/NO_WARRANTY
Normal file
|
@ -0,0 +1,2 @@
|
|||
re2c is distributed with no warranty whatever. The author and any other
|
||||
contributors take no responsibility for the consequences of its use.
|
159
tools/re2c/README
Normal file
159
tools/re2c/README
Normal file
|
@ -0,0 +1,159 @@
|
|||
re2c
|
||||
--------------------------------------------------------------------------------
|
||||
|
||||
|
||||
DESCRIPTION
|
||||
--------------------------------------------------------------------------------
|
||||
re2c is a tool for generating C-based recognizers from regular expressions.
|
||||
re2c-based scanners are efficient: for programming languages, given similar
|
||||
specifications, a re2c-based scanner is typically almost twice as fast as a
|
||||
flex-based scanner with little or no increase in size (possibly a decrease
|
||||
on cisc architectures). Indeed, re2c-based scanners are quite competitive with
|
||||
hand-crafted ones.
|
||||
|
||||
Unlike flex, re2c does not generate complete scanners: the user must supply some
|
||||
interface code. While this code is not bulky (about 50-100 lines for a
|
||||
flex-like scanner; see the man page and examples in the distribution) careful
|
||||
coding is required for efficiency (and correctness). One advantage of this
|
||||
arrangement is that the generated code is not tied to any particular input
|
||||
model.
|
||||
--------------------------------------------------------------------------------
|
||||
|
||||
|
||||
DOWNLOAD
|
||||
--------------------------------------------------------------------------------
|
||||
The re2c distribution can be found at:
|
||||
https://sourceforge.net/projects/re2c/
|
||||
|
||||
Download the latest tarball:
|
||||
https://sourceforge.net/projects/re2c/files/latest/download
|
||||
|
||||
Clone git repo:
|
||||
git clone git://git.code.sf.net/p/re2c/code-git
|
||||
--------------------------------------------------------------------------------
|
||||
|
||||
|
||||
BUILD
|
||||
--------------------------------------------------------------------------------
|
||||
Contents:
|
||||
1. simple build
|
||||
2. bootstrap
|
||||
3. out-of-source build
|
||||
4. testing
|
||||
5. rebuild documentation
|
||||
6. build for windows with mingw
|
||||
7. build from git
|
||||
|
||||
1. Simplest possible build:
|
||||
$ ./configure [--prefix=<prefix>]
|
||||
$ make
|
||||
$ make install
|
||||
This will build re2c and install it (binary and man page) to <prefix> (defaults
|
||||
to /usr/local).
|
||||
|
||||
2. Bootstrap and rebuild:
|
||||
$ ./configure [--prefix=<prefix>]
|
||||
$ make bootstrap
|
||||
$ make install
|
||||
Usual bootstrap procedure: re2c uses re2c to compile its lexer.
|
||||
1. build lexer (if make finds re2c binary in build directory, it will build lexer
|
||||
from source, otherwize it will use prebuilt lexer)
|
||||
2. build re2c
|
||||
3. build lexer from source using re2c binary in build directory
|
||||
4. rebuild re2c
|
||||
|
||||
3. Out-of-source build:
|
||||
$ mkdir <build-directory>
|
||||
$ cd <build-directory>
|
||||
$ <path-to-configure>/configure [--prefix=<prefix>]
|
||||
$ make
|
||||
$ make install
|
||||
|
||||
4. Testing:
|
||||
$ make check
|
||||
This will redirect test script output to file. If you want to see progress:
|
||||
$ make tests
|
||||
Testing under valgrind (takes a long time):
|
||||
$ make vtests
|
||||
|
||||
5. Rebuild documentation (requires rst2man.py):
|
||||
$ ./configure --enable-docs [--prefix=<prefix>]
|
||||
$ make docs
|
||||
$ make install
|
||||
|
||||
6. Build for windows using mingw:
|
||||
$ ../configure --host i686-w64-mingw32 [--prefix=<prefix>]
|
||||
$ make
|
||||
This will result into an executable re2c.exe, which can be tested with wine:
|
||||
$ make wtests
|
||||
|
||||
7. If you want to build from git, you'll first need to generate autotools files:
|
||||
$ ./autogen.sh
|
||||
--------------------------------------------------------------------------------
|
||||
|
||||
|
||||
INFO
|
||||
--------------------------------------------------------------------------------
|
||||
$ man re2c
|
||||
|
||||
re2c home page:
|
||||
re2c.org
|
||||
|
||||
re2c manual:
|
||||
re2c.org/manual.html
|
||||
|
||||
Ulya Trofimovich's blog on re2c:
|
||||
skvadrik.github.io/aleph_null/re2c.html
|
||||
|
||||
Original paper on re2c: "RE2C: a More Versatile Parser Generator" (1994, Peter
|
||||
Bumbulis and Donald D. Cowan).
|
||||
|
||||
Examples can be found in 'examples' directory.
|
||||
--------------------------------------------------------------------------------
|
||||
|
||||
|
||||
MAILING LISTS
|
||||
--------------------------------------------------------------------------------
|
||||
re2c-general:
|
||||
re2c-general@lists.sourceforge.net
|
||||
re2c-devel:
|
||||
re2c-devel@lists.sourceforge.net
|
||||
|
||||
You are welcome to ask for help or share your thoughts and ideas about re2c :)
|
||||
--------------------------------------------------------------------------------
|
||||
|
||||
|
||||
BUGS
|
||||
--------------------------------------------------------------------------------
|
||||
Please report any bugs and send feature requests to:
|
||||
https://sourceforge.net/p/re2c/_list/tickets
|
||||
--------------------------------------------------------------------------------
|
||||
|
||||
|
||||
AUTHORS
|
||||
--------------------------------------------------------------------------------
|
||||
Originally written by Peter Bumbulis (peter@csg.uwaterloo.ca)
|
||||
Currently maintained by:
|
||||
Ulya Trofimovich <skvadrik@gmail.com>
|
||||
Dan Nuffer <nuffer@users.sourceforge.net>
|
||||
Marcus Boerger <helly@users.sourceforge.net>
|
||||
Hartmut Kaiser <hkaiser@users.sourceforge.net>
|
||||
--------------------------------------------------------------------------------
|
||||
|
||||
|
||||
LICENSE
|
||||
--------------------------------------------------------------------------------
|
||||
re2c is distributed with no warranty whatever. The code is certain to contain
|
||||
errors. Neither the author nor any contributor takes responsibility for any
|
||||
consequences of its use.
|
||||
|
||||
re2c is in the public domain. The data structures and algorithms used in re2c
|
||||
are all either taken from documents available to the general public or are
|
||||
inventions of the authors. Programs generated by re2c may be distributed freely.
|
||||
re2c itself may be distributed freely, in source or binary, unchanged or
|
||||
modified. Distributors may charge whatever fees they can obtain for re2c.
|
||||
|
||||
If you do make use of re2c, or incorporate it into a larger project an
|
||||
acknowledgement somewhere (documentation, research report, etc.) would be
|
||||
appreciated.
|
||||
--------------------------------------------------------------------------------
|
58
tools/re2c/config.h.in
Normal file
58
tools/re2c/config.h.in
Normal file
|
@ -0,0 +1,58 @@
|
|||
/* config.h.in. Generated from configure.ac by autoheader. */
|
||||
|
||||
/* Define to 1 if you have the <stdint.h> header file. */
|
||||
#cmakedefine HAVE_STDINT_H @HAVE_STDINT_H@
|
||||
|
||||
/* Name of package */
|
||||
#cmakedefine PACKAGE "@PACKAGE_NAME@"
|
||||
|
||||
/* Define to the address where bug reports for this package should be sent. */
|
||||
#cmakedefine PACKAGE_BUGREPORT "@PACKAGE_BUGREPORT@"
|
||||
|
||||
/* Define to the full name of this package. */
|
||||
#cmakedefine PACKAGE_NAME "@PACKAGE_NAME@"
|
||||
|
||||
/* Define to the full name and version of this package. */
|
||||
#cmakedefine PACKAGE_STRING "@PACKAGE_STRING@"
|
||||
|
||||
/* Define to the one symbol short name of this package. */
|
||||
#cmakedefine PACKAGE_TARNAME "@PACKAGE_TARNAME@"
|
||||
|
||||
/* Define to the home page for this package. */
|
||||
#cmakedefine PACKAGE_URL "@PACKAGE_URL@"
|
||||
|
||||
/* Define to the version of this package. */
|
||||
#cmakedefine PACKAGE_VERSION "@PACKAGE_VERSION@"
|
||||
|
||||
/* The size of `0i8', as computed by sizeof. */
|
||||
#cmakedefine SIZEOF_0I8 @SIZEOF_0I8@
|
||||
|
||||
/* The size of `0l', as computed by sizeof. */
|
||||
#cmakedefine SIZEOF_0L @SIZEOF_0L@
|
||||
|
||||
/* The size of `0ll', as computed by sizeof. */
|
||||
#cmakedefine SIZEOF_0LL @SIZEOF_0LL@
|
||||
|
||||
/* The size of `char', as computed by sizeof. */
|
||||
#cmakedefine SIZEOF_CHAR @SIZEOF_CHAR@
|
||||
|
||||
/* The size of `int', as computed by sizeof. */
|
||||
#cmakedefine SIZEOF_INT @SIZEOF_INT@
|
||||
|
||||
/* The size of `long', as computed by sizeof. */
|
||||
#cmakedefine SIZEOF_LONG @SIZEOF_LONG@
|
||||
|
||||
/* The size of `long long', as computed by sizeof. */
|
||||
#cmakedefine SIZEOF_LONG_LONG @SIZEOF_LONG_LONG@
|
||||
|
||||
/* The size of `short', as computed by sizeof. */
|
||||
#cmakedefine SIZEOF_SHORT @SIZEOF_SHORT@
|
||||
|
||||
/* The size of `void *', as computed by sizeof. */
|
||||
#cmakedefine SIZEOF_VOID_P @SIZEOF_VOID_P@
|
||||
|
||||
/* The size of `__int64', as computed by sizeof. */
|
||||
#cmakedefine SIZEOF___INT64 @SIZEOF___INT64@
|
||||
|
||||
/* Version number of package */
|
||||
#cmakedefine VERSION @PACKAGE_VERSION@
|
62
tools/re2c/config.msc.h
Normal file
62
tools/re2c/config.msc.h
Normal file
|
@ -0,0 +1,62 @@
|
|||
/* config.h.in. Generated from configure.ac by autoheader. */
|
||||
|
||||
/* Define to 1 if you have the <stdint.h> header file. */
|
||||
/* #undef HAVE_STDINT_H */
|
||||
|
||||
/* Name of package */
|
||||
/* #undef PACKAGE */
|
||||
|
||||
/* Define to the address where bug reports for this package should be sent. */
|
||||
#define PACKAGE_BUGREPORT "re2c-general@lists.sourceforge.net"
|
||||
|
||||
/* Define to the full name of this package. */
|
||||
#define PACKAGE_NAME "re2c"
|
||||
|
||||
/* Define to the full name and version of this package. */
|
||||
#define PACKAGE_STRING "re2c 0.16"
|
||||
|
||||
/* Define to the one symbol short name of this package. */
|
||||
#define PACKAGE_TARNAME "re2c"
|
||||
|
||||
/* Define to the home page for this package. */
|
||||
/* #undef PACKAGE_URL */
|
||||
|
||||
/* Define to the version of this package. */
|
||||
#define PACKAGE_VERSION "0.16"
|
||||
|
||||
/* The size of `0i8', as computed by sizeof. */
|
||||
#define SIZEOF_0I8 1
|
||||
|
||||
/* The size of `0l', as computed by sizeof. */
|
||||
#define SIZEOF_0L 4
|
||||
|
||||
/* The size of `0ll', as computed by sizeof. */
|
||||
#define SIZEOF_0LL 8
|
||||
|
||||
/* The size of `char', as computed by sizeof. */
|
||||
#define SIZEOF_CHAR 1
|
||||
|
||||
/* The size of `int', as computed by sizeof. */
|
||||
#define SIZEOF_INT 4
|
||||
|
||||
/* The size of `long', as computed by sizeof. */
|
||||
#define SIZEOF_LONG 4
|
||||
|
||||
/* The size of `long long', as computed by sizeof. */
|
||||
#define SIZEOF_LONG_LONG 8
|
||||
|
||||
/* The size of `short', as computed by sizeof. */
|
||||
#define SIZEOF_SHORT 2
|
||||
|
||||
/* The size of `void *', as computed by sizeof. */
|
||||
#ifdef _M_X64
|
||||
#define SIZEOF_VOID_P 8
|
||||
#else
|
||||
#define SIZEOF_VOID_P 4
|
||||
#endif
|
||||
|
||||
/* The size of `__int64', as computed by sizeof. */
|
||||
#define SIZEOF___INT64 8
|
||||
|
||||
/* Version number of package */
|
||||
/* #undef VERSION */
|
83
tools/re2c/examples/001_upn_calculator/README
Normal file
83
tools/re2c/examples/001_upn_calculator/README
Normal file
|
@ -0,0 +1,83 @@
|
|||
re2c lesson 001_upn_calculator, (c) M. Boerger 2006
|
||||
|
||||
This lesson gets you started with re2c. In the end you will have an easy RPN
|
||||
(reverse polish notation) calculator for use at command line.
|
||||
|
||||
You will learn about the basic interface of re2c when scanning input strings.
|
||||
How to detect the end of the input and use that to stop scanning in order to
|
||||
avoid problems.
|
||||
|
||||
Once you have successfully installed re2c you can use it to generate *.c files
|
||||
from the *.re files presented in this lesson. Actually the expected *.c files
|
||||
are already present. So you should name them *.cc or something alike or just
|
||||
give them a different name like test.c. To do so you simply change into the
|
||||
directory and execute the following command:
|
||||
|
||||
re2c calc_001.re > test.c
|
||||
|
||||
Then use your compiler to compile that code and run it. If you are using gcc
|
||||
you simply do the following:
|
||||
|
||||
gcc -o test.o test.c
|
||||
./test.o <input_file_name>
|
||||
|
||||
If you are using windows you might want to read till the end of this lesson.
|
||||
|
||||
When you want to debug the code it helps to make re2c generate working #line
|
||||
information. To do so you simply specify the output file using the -o switch
|
||||
followed by the output filename:
|
||||
|
||||
re2c -o test.c calc_001.re
|
||||
|
||||
The input files *.re each contain basic step by comments that explain what is
|
||||
going on and what you can see in the examples.
|
||||
|
||||
In order to optimize the generated code we will use the -s command line switch
|
||||
of re2c. This tells re2c to generate code that uses if statements rather
|
||||
then endless switch/case expressions where appropriate. Note that the file name
|
||||
extension is actually '.s.re' to tell the test system to use the -s switch. To
|
||||
invoke re2 you do the following:
|
||||
|
||||
re2c -s -o test.c calc_006.s.re
|
||||
|
||||
Finally we use the -b switch to have the code use a decision table. The -b
|
||||
switch also contains the -s behavior.
|
||||
|
||||
re2c -b -o test.c calc_007.b.re
|
||||
|
||||
|
||||
|
||||
-------------------------------------------------------------------------------
|
||||
|
||||
For windows users Lynn Allan provided some additional stuff to get you started
|
||||
in the Microsoft world. This addon resides in the windows subdirectory and
|
||||
gives you something to expereiment with. The code in that directory is based
|
||||
on the first step and has the following changes:
|
||||
|
||||
* vc6 .dsp/.dsw and vc7/vc8 .sln/.vcproj project files that have "Custom Build
|
||||
Steps" that can tell when main.re changes, and know how to generate main.c
|
||||
from main.re. They assume that you unpacked the zip package and have re2c
|
||||
itself build or installed in Release and Release-2005 directory respectively.
|
||||
If re2c cannot be found you need to modify the custom build step and correct
|
||||
the path to re2c.
|
||||
|
||||
* BuildAndRun.bat to do command line rec2 and then cl and then run the
|
||||
executable (discontinues with message if errors).
|
||||
|
||||
* built-in cppunit-like test to confirm it worked as expected.
|
||||
|
||||
* array of test strings "fed" to scan rather than file contents to facilitate
|
||||
testing and also reduce the newbie learning curve.
|
||||
|
||||
* HiResTimer output for 10,000 loops and 100,000 loops. While this might be
|
||||
excessive for this lesson, it illustrates how to do it for subsequent lessons
|
||||
and your own stuff using windows. Also it shows that Release build is as fast
|
||||
as strncmp for this test and can probably be made significantly faster.
|
||||
|
||||
* If you want to build the other steps of this lesson using windows tools
|
||||
simply copy the *.re files into the windows directory as main.re and rebuild.
|
||||
|
||||
|
||||
-------------------------------------------------------------------------------
|
||||
Sidenote: UPN is the german translation of RPN, somehow hardcoded into the
|
||||
authors brain :-)
|
84
tools/re2c/examples/001_upn_calculator/calc_001.re
Normal file
84
tools/re2c/examples/001_upn_calculator/calc_001.re
Normal file
|
@ -0,0 +1,84 @@
|
|||
/* re2c lesson 001_upn_calculator, calc_001, (c) M. Boerger 2006 - 2007 */
|
||||
/*!ignore:re2c
|
||||
|
||||
- basic interface for string reading
|
||||
|
||||
. We define the macros YYCTYPE, YYCURSOR, YYLIMIT, YYMARKER, YYFILL
|
||||
. YYCTYPE is the type re2c operates on or in other words the type that
|
||||
it generates code for. While it is not a big difference when we were
|
||||
using 'unsigned char' here we would need to run re2c with option -w
|
||||
to fully support types with sieof() > 1.
|
||||
. YYCURSOR is used internally and holds the current scanner position. In
|
||||
expression handlers, the code blocks after re2c expressions, this can be
|
||||
used to identify the end of the token.
|
||||
. YYMARKER is not always being used so we set an initial value to avoid
|
||||
a compiler warning. Here we could also omit it compleley.
|
||||
. YYLIMIT stores the end of the input. Unfortunatley we have to use strlen()
|
||||
in this lesson. In the next example we see one way to get rid of it.
|
||||
. We use a 'for(;;)'-loop around the scanner block. We could have used a
|
||||
'while(1)'-loop instead but some compilers generate a warning for it.
|
||||
. To make the output more readable we use 're2c:indent:top' scanner
|
||||
configuration that configures re2c to prepend a single tab (the default)
|
||||
to the beginning of each output line.
|
||||
. The following lines are expressions and for each expression we output the
|
||||
token name and continue the scanner loop.
|
||||
. The second last token detects the end of our input, the terminating zero in
|
||||
our input string. In other scanners detecting the end of input may vary.
|
||||
For example binary code may contain \0 as valid input.
|
||||
. The last expression accepts any input character. It tells re2c to accept
|
||||
the opposit of the empty range. This includes numbers and our tokens but
|
||||
as re2c goes from top to botton when evaluating the expressions this is no
|
||||
problem.
|
||||
. The first three rules show that re2c actually prioritizes the expressions
|
||||
from top to bottom. Octal number require a starting "0" and the actual
|
||||
number. Normal numbers start with a digit greater 0. And zero is finally a
|
||||
special case. A single "0" is detected by the last rule of this set. And
|
||||
valid ocal number is already being detected by the first rule. This even
|
||||
includes multi "0" sequences that in octal notation also means zero.
|
||||
Another way would be to only use two rules:
|
||||
"0" [0-9]+
|
||||
"0" | ( [1-9] [0-9]* )
|
||||
A full description of re2c rule syntax can be found in the manual.
|
||||
*/
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
|
||||
int scan(char *s, int l)
|
||||
{
|
||||
char *p = s;
|
||||
char *q = 0;
|
||||
#define YYCTYPE char
|
||||
#define YYCURSOR p
|
||||
#define YYLIMIT (s+l)
|
||||
#define YYMARKER q
|
||||
#define YYFILL(n)
|
||||
|
||||
for(;;)
|
||||
{
|
||||
/*!re2c
|
||||
re2c:indent:top = 2;
|
||||
"0"[0-9]+ { printf("Oct\n"); continue; }
|
||||
[1-9][0-9]* { printf("Num\n"); continue; }
|
||||
"0" { printf("Num\n"); continue; }
|
||||
"+" { printf("+\n"); continue; }
|
||||
"-" { printf("-\n"); continue; }
|
||||
"\000" { printf("EOF\n"); return 0; }
|
||||
[^] { printf("ERR\n"); return 1; }
|
||||
*/
|
||||
}
|
||||
}
|
||||
|
||||
int main(int argc, char **argv)
|
||||
{
|
||||
if (argc > 1)
|
||||
{
|
||||
return scan(argv[1], strlen(argv[1]));
|
||||
}
|
||||
else
|
||||
{
|
||||
fprintf(stderr, "%s <expr>\n", argv[0]);
|
||||
return 1;
|
||||
}
|
||||
}
|
69
tools/re2c/examples/001_upn_calculator/calc_002.re
Normal file
69
tools/re2c/examples/001_upn_calculator/calc_002.re
Normal file
|
@ -0,0 +1,69 @@
|
|||
/* re2c lesson 001_upn_calculator, calc_002, (c) M. Boerger 2006 - 2007 */
|
||||
/*!ignore:re2c
|
||||
|
||||
- making use of YYFILL
|
||||
|
||||
. Here we modified the scanner to not require strlen() on the call. Instead
|
||||
we compute limit on the fly. That is whenever more input is needed we
|
||||
search for the terminating \0 in the next n chars the scanner needs.
|
||||
. If there is not enough input we quit the scanner.
|
||||
. Note that in lesson_001 YYLIMIT was a character pointer computed only once.
|
||||
Here is of course also of type YYCTYPE but a variable that gets reevaluated
|
||||
by YYFILL().
|
||||
. To make the code smaller we take advantage of the fact that our loop has no
|
||||
break so far. This allows us to use break here and have the code that is
|
||||
used for YYFILL() not contain the printf in every occurence. That way the
|
||||
generated code gets smaller.
|
||||
|
||||
*/
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
|
||||
int fill(char *p, int n, char **l)
|
||||
{
|
||||
while (*++p && n--) ;
|
||||
* l = p;
|
||||
return n <= 0;
|
||||
}
|
||||
|
||||
int scan(char *s)
|
||||
{
|
||||
char *p = s;
|
||||
char *l = s;
|
||||
char *q = 0;
|
||||
#define YYCTYPE char
|
||||
#define YYCURSOR p
|
||||
#define YYLIMIT l
|
||||
#define YYMARKER q
|
||||
#define YYFILL(n) { if (!fill(p, n, &l)) break; }
|
||||
|
||||
for(;;)
|
||||
{
|
||||
/*!re2c
|
||||
re2c:indent:top = 2;
|
||||
"0"[0-9]+ { printf("Oct\n"); continue; }
|
||||
[1-9][0-9]* { printf("Num\n"); continue; }
|
||||
"0" { printf("Num\n"); continue; }
|
||||
"+" { printf("+\n"); continue; }
|
||||
"-" { printf("+\n"); continue; }
|
||||
"\000" { printf("EOF\n"); return 0; }
|
||||
[^] { printf("ERR\n"); return 1; }
|
||||
*/
|
||||
}
|
||||
printf("OOD\n"); return 2;
|
||||
}
|
||||
|
||||
int main(int argc, char **argv)
|
||||
{
|
||||
if (argc > 1)
|
||||
{
|
||||
return scan(argv[1]);
|
||||
}
|
||||
else
|
||||
{
|
||||
fprintf(stderr, "%s <expr>\n", argv[0]);
|
||||
return 0;
|
||||
}
|
||||
}
|
61
tools/re2c/examples/001_upn_calculator/calc_003.re
Normal file
61
tools/re2c/examples/001_upn_calculator/calc_003.re
Normal file
|
@ -0,0 +1,61 @@
|
|||
/* re2c lesson 001_upn_calculator, calc_003, (c) M. Boerger 2006 - 2007 */
|
||||
/*!ignore:re2c
|
||||
|
||||
- making use of YYFILL
|
||||
|
||||
. Again provide the length of the input to generate the limit only once. Now
|
||||
we can use YYFILL() to detect the end and simply return since YYFILL() is
|
||||
only being used if the next scanner run might use more chars then YYLIMIT
|
||||
allows.
|
||||
. Note that we now use (s+l+2) instead of (s+l) as we did in lesson_001. In
|
||||
the first lesson we did not quit from YYFILL() and used a special rule to
|
||||
detect the end of input. Here we use the fact that we know the exact end
|
||||
of input and that this length does not include the terminating zero. Since
|
||||
YYLIMIT points to the first character behind the used buffer we use "+ 2".
|
||||
If we would use "+1" we could drop the "\000" rule but could no longer
|
||||
distinguish between end of input and out of data.
|
||||
|
||||
*/
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
|
||||
int scan(char *s, int l)
|
||||
{
|
||||
char *p = s;
|
||||
char *q = 0;
|
||||
#define YYCTYPE char
|
||||
#define YYCURSOR p
|
||||
#define YYLIMIT (s+l+2)
|
||||
#define YYMARKER q
|
||||
#define YYFILL(n) { printf("OOD\n"); return 2; }
|
||||
|
||||
for(;;)
|
||||
{
|
||||
/*!re2c
|
||||
re2c:indent:top = 2;
|
||||
"0"[0-9]+ { printf("Oct\n"); continue; }
|
||||
[1-9][0-9]* { printf("Num\n"); continue; }
|
||||
"0" { printf("Num\n"); continue; }
|
||||
"+" { printf("+\n"); continue; }
|
||||
"-" { printf("+\n"); continue; }
|
||||
"\000" { printf("EOF\n"); return 0; }
|
||||
[^] { printf("ERR\n"); return 1; }
|
||||
*/
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
int main(int argc, char **argv)
|
||||
{
|
||||
if (argc > 1)
|
||||
{
|
||||
return scan(argv[1], strlen(argv[1]));
|
||||
}
|
||||
else
|
||||
{
|
||||
fprintf(stderr, "%s <expr>\n", argv[0]);
|
||||
return 0;
|
||||
}
|
||||
}
|
78
tools/re2c/examples/001_upn_calculator/calc_004.re
Normal file
78
tools/re2c/examples/001_upn_calculator/calc_004.re
Normal file
|
@ -0,0 +1,78 @@
|
|||
/* re2c lesson 001_upn_calculator, calc_004, (c) M. Boerger 2006 - 2007 */
|
||||
/*!ignore:re2c
|
||||
|
||||
- making use of definitions
|
||||
. We provide complex rules as definitions. We can even have definitions made
|
||||
up from other definitions. And we could also use definitions as part of
|
||||
rules and not only as full rules as shown in this lesson.
|
||||
|
||||
- showing the tokens
|
||||
. re2c does not store the beginning of a token on its own but we can easily
|
||||
do this by providing variable, in our case t, that is set to YYCURSOR on
|
||||
every loop. If we were not using a loop here the token, we could have used
|
||||
s instead of a new variable instead.
|
||||
. As we use the token for an output function that requires a terminating zero
|
||||
we copy the token. Alternatively we could store the end of the token, then
|
||||
replace it with a zero character and replace it after the token has been
|
||||
used. However that approach is not always acceptable.
|
||||
|
||||
*/
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
|
||||
char * tokendup(const char *t, const char *l)
|
||||
{
|
||||
size_t n = l -t + 1;
|
||||
char *r = (char*)malloc(n);
|
||||
|
||||
memmove(r, t, n-1);
|
||||
r[n] = '\0';
|
||||
return r;
|
||||
}
|
||||
|
||||
int scan(char *s, int l)
|
||||
{
|
||||
char *p = s;
|
||||
char *q = 0;
|
||||
char *t;
|
||||
#define YYCTYPE char
|
||||
#define YYCURSOR p
|
||||
#define YYLIMIT (s+l+2)
|
||||
#define YYMARKER q
|
||||
#define YYFILL(n) { printf("OOD\n"); return 2; }
|
||||
|
||||
for(;;)
|
||||
{
|
||||
t = p;
|
||||
/*!re2c
|
||||
re2c:indent:top = 2;
|
||||
|
||||
DIGIT = [0-9] ;
|
||||
OCT = "0" DIGIT+ ;
|
||||
INT = "0" | ( [1-9] DIGIT* ) ;
|
||||
|
||||
OCT { t = tokendup(t, p); printf("Oct: %s\n", t); free(t); continue; }
|
||||
INT { t = tokendup(t, p); printf("Num: %s\n", t); free(t); continue; }
|
||||
"+" { printf("+\n"); continue; }
|
||||
"-" { printf("+\n"); continue; }
|
||||
"\000" { printf("EOF\n"); return 0; }
|
||||
[^] { printf("ERR\n"); return 1; }
|
||||
*/
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
int main(int argc, char **argv)
|
||||
{
|
||||
if (argc > 1)
|
||||
{
|
||||
return scan(argv[1], strlen(argv[1]));
|
||||
}
|
||||
else
|
||||
{
|
||||
fprintf(stderr, "%s <expr>\n", argv[0]);
|
||||
return 0;
|
||||
}
|
||||
}
|
144
tools/re2c/examples/001_upn_calculator/calc_005.re
Normal file
144
tools/re2c/examples/001_upn_calculator/calc_005.re
Normal file
|
@ -0,0 +1,144 @@
|
|||
/* re2c lesson 001_upn_calculator, calc_005, (c) M. Boerger 2006 - 2007 */
|
||||
/*!ignore:re2c
|
||||
|
||||
- turning this lesson into an easy calculator
|
||||
. We are going to write an UPN calculator so we need an additional rule to
|
||||
ignore white space.
|
||||
. Then we need to store the scanned input somewhere and do our math on it.
|
||||
. Also we need to scan all arguments since the main c code gets the input
|
||||
split up into chunks.
|
||||
. In contrast to what we did before we now add a variable res that holds the
|
||||
scanner state. We initialize that variable to 0 and quit the loop when it
|
||||
is non zero. This will also be our return value so that we can use it in
|
||||
function main to generate error information.
|
||||
. To support operating systems where ' and " get passed in program arguments
|
||||
we check for them being first and last input character. If so we correct
|
||||
input pointer and input length. Since now our scanner might not see a
|
||||
terminating zero we change YYLIMIT again and drop the special zero rule.
|
||||
*/
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
|
||||
#define DEBUG(stmt) stmt
|
||||
|
||||
int stack[4];
|
||||
int depth = 0;
|
||||
|
||||
int push_num(const char *t, const char *l, int radix)
|
||||
{
|
||||
int num = 0;
|
||||
|
||||
if (depth >= sizeof(stack))
|
||||
{
|
||||
return 3;
|
||||
}
|
||||
|
||||
--t;
|
||||
while(++t < l)
|
||||
{
|
||||
num = num * radix + (*t - '0');
|
||||
}
|
||||
DEBUG(printf("Num: %d\n", num));
|
||||
|
||||
stack[depth++] = num;
|
||||
return 0;
|
||||
}
|
||||
|
||||
int stack_add()
|
||||
{
|
||||
if (depth < 2) return 4;
|
||||
|
||||
--depth;
|
||||
stack[depth-1] = stack[depth-1] + stack[depth];
|
||||
return 0;
|
||||
}
|
||||
|
||||
int stack_sub()
|
||||
{
|
||||
if (depth < 2) return 4;
|
||||
|
||||
--depth;
|
||||
stack[depth-1] = stack[depth-1] - stack[depth];
|
||||
return 0;
|
||||
}
|
||||
|
||||
int scan(char *s, int l)
|
||||
{
|
||||
char *p = s;
|
||||
char *q = 0;
|
||||
char *t;
|
||||
int res = 0;
|
||||
|
||||
#define YYCTYPE char
|
||||
#define YYCURSOR p
|
||||
#define YYLIMIT (s+l+1)
|
||||
#define YYMARKER q
|
||||
#define YYFILL(n) { return depth == 1 ? 0 : 2; }
|
||||
|
||||
while(!res)
|
||||
{
|
||||
t = p;
|
||||
/*!re2c
|
||||
re2c:indent:top = 2;
|
||||
|
||||
DIGIT = [0-9] ;
|
||||
OCT = "0" DIGIT+ ;
|
||||
INT = "0" | ( [1-9] DIGIT* ) ;
|
||||
WS = [ \t]+ ;
|
||||
|
||||
WS { continue; }
|
||||
OCT { res = push_num(t, p, 8); continue; }
|
||||
INT { res = push_num(t, p, 10); continue; }
|
||||
"+" { res = stack_add(); continue; }
|
||||
"-" { res = stack_sub(); continue; }
|
||||
[^] { res = 1; continue; }
|
||||
*/
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
||||
int main(int argc, char **argv)
|
||||
{
|
||||
if (argc > 1)
|
||||
{
|
||||
char *inp;
|
||||
int res = 0, argp = 0, len;
|
||||
|
||||
while(!res && ++argp < argc)
|
||||
{
|
||||
inp = argv[argp];
|
||||
len = strlen(inp);
|
||||
if (inp[0] == '\"' && inp[len-1] == '\"')
|
||||
{
|
||||
++inp;
|
||||
len -=2;
|
||||
}
|
||||
res = scan(inp, len);
|
||||
}
|
||||
switch(res)
|
||||
{
|
||||
case 0:
|
||||
printf("Result: %d\n", stack[0]);
|
||||
return 0;
|
||||
case 1:
|
||||
fprintf(stderr, "Illegal character in input.\n");
|
||||
return 1;
|
||||
case 2:
|
||||
fprintf(stderr, "Premature end of input.\n");
|
||||
return 2;
|
||||
case 3:
|
||||
fprintf(stderr, "Stack overflow.\n");
|
||||
return 3;
|
||||
case 4:
|
||||
fprintf(stderr, "Stack underflow.\n");
|
||||
return 4;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
fprintf(stderr, "%s <expr>\n", argv[0]);
|
||||
return 0;
|
||||
}
|
||||
}
|
162
tools/re2c/examples/001_upn_calculator/calc_006.s.re
Normal file
162
tools/re2c/examples/001_upn_calculator/calc_006.s.re
Normal file
|
@ -0,0 +1,162 @@
|
|||
/* re2c lesson 001_upn_calculator, calc_006, (c) M. Boerger 2006 - 2007 */
|
||||
/*!ignore:re2c
|
||||
|
||||
- avoiding YYFILL()
|
||||
. We use the inplace configuration re2c:yyfill to suppress generation of
|
||||
YYFILL() blocks. This of course means we no longer have to provide the
|
||||
macro.
|
||||
. We also drop the YYMARKER stuff since we know that re2c does not generate
|
||||
it for this example.
|
||||
. Since re2c does no longer check for out of data situations we must do this.
|
||||
For that reason we first reintroduce our zero rule and second we need to
|
||||
ensure that the scanner does not take more than one bytes in one go.
|
||||
|
||||
In the example suppose "0" is passed. The scanner reads the first "0" and
|
||||
then is in an undecided state. The scanner can earliest decide on the next
|
||||
char what the token is. In case of a zero the input ends and it was a
|
||||
number, 0 to be precise. In case of a digit it is an octal number and the
|
||||
next character needs to be read. In case of any other character the scanner
|
||||
will detect an error with the any rule [^].
|
||||
|
||||
Now the above shows that the scanner may read two characters directly. But
|
||||
only if the first is a "0". So we could easily check that if the first char
|
||||
is "0" and the next char is a digit then yet another charcter is present.
|
||||
But we require our inut to be zero terminated. And that means we do not
|
||||
have to check anything for this scanner.
|
||||
|
||||
However with other rule sets re2c might read more then one character in a
|
||||
row. In those cases it is normally hard to impossible to avoid YYFILL.
|
||||
|
||||
- optimizing the generated code by using -s command line switch of re2c
|
||||
. This tells re2c to generate code that uses if statements rather
|
||||
then endless switch/case expressions where appropriate. Note that the
|
||||
generated code now requires the input to be unsigned char rather than char
|
||||
due to the way comparisons are generated.
|
||||
*/
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
|
||||
#define DEBUG(stmt) stmt
|
||||
|
||||
int stack[4];
|
||||
int depth = 0;
|
||||
|
||||
int push_num(const unsigned char *t, const unsigned char *l, int radix)
|
||||
{
|
||||
int num = 0;
|
||||
|
||||
if (depth >= sizeof(stack))
|
||||
{
|
||||
return 3;
|
||||
}
|
||||
|
||||
--t;
|
||||
while(++t < l)
|
||||
{
|
||||
num = num * radix + (*t - (unsigned char)'0');
|
||||
}
|
||||
DEBUG(printf("Num: %d\n", num));
|
||||
|
||||
stack[depth++] = num;
|
||||
return 0;
|
||||
}
|
||||
|
||||
int stack_add()
|
||||
{
|
||||
if (depth < 2) return 4;
|
||||
|
||||
--depth;
|
||||
stack[depth-1] = stack[depth-1] + stack[depth];
|
||||
DEBUG(printf("+\n"));
|
||||
return 0;
|
||||
}
|
||||
|
||||
int stack_sub()
|
||||
{
|
||||
if (depth < 2) return 4;
|
||||
|
||||
--depth;
|
||||
stack[depth-1] = stack[depth-1] - stack[depth];
|
||||
DEBUG(printf("-\n"));
|
||||
return 0;
|
||||
}
|
||||
|
||||
int scan(char *s)
|
||||
{
|
||||
unsigned char *p = (unsigned char*)s;
|
||||
unsigned char *t;
|
||||
int res = 0;
|
||||
|
||||
#define YYCTYPE unsigned char
|
||||
#define YYCURSOR p
|
||||
|
||||
while(!res)
|
||||
{
|
||||
t = p;
|
||||
/*!re2c
|
||||
re2c:indent:top = 2;
|
||||
re2c:yyfill:enable = 0;
|
||||
|
||||
DIGIT = [0-9] ;
|
||||
OCT = "0" DIGIT+ ;
|
||||
INT = "0" | ( [1-9] DIGIT* ) ;
|
||||
WS = [ \t]+ ;
|
||||
|
||||
WS { continue; }
|
||||
OCT { res = push_num(t, p, 8); continue; }
|
||||
INT { res = push_num(t, p, 10); continue; }
|
||||
"+" { res = stack_add(); continue; }
|
||||
"-" { res = stack_sub(); continue; }
|
||||
"\000" { res = depth == 1 ? 0 : 2; break; }
|
||||
[^] { res = 1; continue; }
|
||||
*/
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
||||
int main(int argc, char **argv)
|
||||
{
|
||||
if (argc > 1)
|
||||
{
|
||||
char *inp;
|
||||
int res = 0, argp = 0, len;
|
||||
|
||||
while(!res && ++argp < argc)
|
||||
{
|
||||
inp = strdup(argv[argp]);
|
||||
len = strlen(inp);
|
||||
if (inp[0] == '\"' && inp[len-1] == '\"')
|
||||
{
|
||||
inp[len - 1] = '\0';
|
||||
++inp;
|
||||
}
|
||||
res = scan(inp);
|
||||
free(inp);
|
||||
}
|
||||
switch(res)
|
||||
{
|
||||
case 0:
|
||||
printf("Result: %d\n", stack[0]);
|
||||
return 0;
|
||||
case 1:
|
||||
fprintf(stderr, "Illegal character in input.\n");
|
||||
return 1;
|
||||
case 2:
|
||||
fprintf(stderr, "Premature end of input.\n");
|
||||
return 2;
|
||||
case 3:
|
||||
fprintf(stderr, "Stack overflow.\n");
|
||||
return 3;
|
||||
case 4:
|
||||
fprintf(stderr, "Stack underflow.\n");
|
||||
return 4;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
fprintf(stderr, "%s <expr>\n", argv[0]);
|
||||
return 0;
|
||||
}
|
||||
}
|
135
tools/re2c/examples/001_upn_calculator/calc_007.b.re
Normal file
135
tools/re2c/examples/001_upn_calculator/calc_007.b.re
Normal file
|
@ -0,0 +1,135 @@
|
|||
/* re2c lesson 001_upn_calculator, calc_007, (c) M. Boerger 2006 - 2007 */
|
||||
/*!ignore:re2c
|
||||
|
||||
- optimizing the generated code by using -b command line switch of re2c
|
||||
. This tells re2c to generate code that uses a decision table. The -b switch
|
||||
also contains the -s behavior. And -b also requires the input to be
|
||||
unsigned chars.
|
||||
*/
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
|
||||
#define DEBUG(stmt) stmt
|
||||
|
||||
int stack[4];
|
||||
int depth = 0;
|
||||
|
||||
int push_num(const unsigned char *t, const unsigned char *l, int radix)
|
||||
{
|
||||
int num = 0;
|
||||
|
||||
if (depth >= sizeof(stack))
|
||||
{
|
||||
return 3;
|
||||
}
|
||||
|
||||
--t;
|
||||
while(++t < l)
|
||||
{
|
||||
num = num * radix + (*t - (unsigned char)'0');
|
||||
}
|
||||
DEBUG(printf("Num: %d\n", num));
|
||||
|
||||
stack[depth++] = num;
|
||||
return 0;
|
||||
}
|
||||
|
||||
int stack_add()
|
||||
{
|
||||
if (depth < 2) return 4;
|
||||
|
||||
--depth;
|
||||
stack[depth-1] = stack[depth-1] + stack[depth];
|
||||
DEBUG(printf("+\n"));
|
||||
return 0;
|
||||
}
|
||||
|
||||
int stack_sub()
|
||||
{
|
||||
if (depth < 2) return 4;
|
||||
|
||||
--depth;
|
||||
stack[depth-1] = stack[depth-1] - stack[depth];
|
||||
DEBUG(printf("+\n"));
|
||||
return 0;
|
||||
}
|
||||
|
||||
int scan(char *s)
|
||||
{
|
||||
unsigned char *p = (unsigned char*)s;
|
||||
unsigned char *t;
|
||||
int res = 0;
|
||||
|
||||
#define YYCTYPE unsigned char
|
||||
#define YYCURSOR p
|
||||
|
||||
while(!res)
|
||||
{
|
||||
t = p;
|
||||
/*!re2c
|
||||
re2c:indent:top = 2;
|
||||
re2c:yyfill:enable = 0;
|
||||
|
||||
DIGIT = [0-9] ;
|
||||
OCT = "0" DIGIT+ ;
|
||||
INT = "0" | ( [1-9] DIGIT* ) ;
|
||||
WS = [ \t]+ ;
|
||||
|
||||
WS { continue; }
|
||||
OCT { res = push_num(t, p, 8); continue; }
|
||||
INT { res = push_num(t, p, 10); continue; }
|
||||
"+" { res = stack_add(); continue; }
|
||||
"-" { res = stack_sub(); continue; }
|
||||
"\000" { res = depth == 1 ? 0 : 2; break; }
|
||||
[^] { res = 1; continue; }
|
||||
*/
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
||||
int main(int argc, char **argv)
|
||||
{
|
||||
if (argc > 1)
|
||||
{
|
||||
char *inp;
|
||||
int res = 0, argp = 0, len;
|
||||
|
||||
while(!res && ++argp < argc)
|
||||
{
|
||||
inp = strdup(argv[argp]);
|
||||
len = strlen(inp);
|
||||
if (inp[0] == '\"' && inp[len-1] == '\"')
|
||||
{
|
||||
inp[len - 1] = '\0';
|
||||
++inp;
|
||||
}
|
||||
res = scan(inp);
|
||||
free(inp);
|
||||
}
|
||||
switch(res)
|
||||
{
|
||||
case 0:
|
||||
printf("Result: %d\n", stack[0]);
|
||||
return 0;
|
||||
case 1:
|
||||
fprintf(stderr, "Illegal character in input.\n");
|
||||
return 1;
|
||||
case 2:
|
||||
fprintf(stderr, "Premature end of input.\n");
|
||||
return 2;
|
||||
case 3:
|
||||
fprintf(stderr, "Stack overflow.\n");
|
||||
return 3;
|
||||
case 4:
|
||||
fprintf(stderr, "Stack underflow.\n");
|
||||
return 4;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
fprintf(stderr, "%s <expr>\n", argv[0]);
|
||||
return 0;
|
||||
}
|
||||
}
|
158
tools/re2c/examples/001_upn_calculator/calc_008.b.re
Normal file
158
tools/re2c/examples/001_upn_calculator/calc_008.b.re
Normal file
|
@ -0,0 +1,158 @@
|
|||
/* re2c lesson 001_upn_calculator, calc_008, (c) M. Boerger 2006 - 2007 */
|
||||
/*!ignore:re2c
|
||||
|
||||
- using -b with signed character input
|
||||
. Since the code is being generated with -b switch re2c requires the internal
|
||||
character variable yych to use an unsigned character type. For that reason
|
||||
the previous lessons had a conversion at the beginning of their scan()
|
||||
function. Other re2c generated code often have the scanners work completely
|
||||
on unsigned input. Thus requesting a conversion.
|
||||
|
||||
To avoid the conversion on input, re2c allows to do the conversion when
|
||||
reading the internal yych variable. To enable that conversion you need to
|
||||
use the implace configuration 're2c:yych:conversion' and set it to 1. This
|
||||
will change the generated code to insert conversions to YYCTYPE whenever
|
||||
yych is being read.
|
||||
|
||||
- More inplace configurations for better/nicer code
|
||||
. re2c allows to overwrite the generation of any define, label or variable
|
||||
used in the generated code. For example we overwrite the 'yych' variable
|
||||
name to 'curr' using inplace configuration 're2c:variable:yych = curr;'.
|
||||
|
||||
. We further more use inplace configurations instead of defines. This allows
|
||||
to use correct conversions to 'unsigned char' instead of having to convert
|
||||
to 'YYCTYPE' when placing 're2c:define:YYCTYPE = "unsigned char";' infront
|
||||
of 're2c:yych:conversion'. Note that we have to use apostrophies for the
|
||||
first setting as it contains a space.
|
||||
|
||||
. Last but not least we use 're2c:labelprefix = scan' to change the prefix
|
||||
of generated labels.
|
||||
*/
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
|
||||
#define DEBUG(stmt) stmt
|
||||
|
||||
int stack[4];
|
||||
int depth = 0;
|
||||
|
||||
int push_num(const char *t, const char *l, int radix)
|
||||
{
|
||||
int num = 0;
|
||||
|
||||
if (depth >= sizeof(stack))
|
||||
{
|
||||
return 3;
|
||||
}
|
||||
|
||||
--t;
|
||||
while(++t < l)
|
||||
{
|
||||
num = num * radix + (*t - '0');
|
||||
}
|
||||
DEBUG(printf("Num: %d\n", num));
|
||||
|
||||
stack[depth++] = num;
|
||||
return 0;
|
||||
}
|
||||
|
||||
int stack_add()
|
||||
{
|
||||
if (depth < 2) return 4;
|
||||
|
||||
--depth;
|
||||
stack[depth-1] = stack[depth-1] + stack[depth];
|
||||
DEBUG(printf("+\n"));
|
||||
return 0;
|
||||
}
|
||||
|
||||
int stack_sub()
|
||||
{
|
||||
if (depth < 2) return 4;
|
||||
|
||||
--depth;
|
||||
stack[depth-1] = stack[depth-1] - stack[depth];
|
||||
DEBUG(printf("+\n"));
|
||||
return 0;
|
||||
}
|
||||
|
||||
int scan(char *p)
|
||||
{
|
||||
char *t;
|
||||
int res = 0;
|
||||
|
||||
while(!res)
|
||||
{
|
||||
t = p;
|
||||
/*!re2c
|
||||
re2c:define:YYCTYPE = "unsigned char";
|
||||
re2c:define:YYCURSOR = p;
|
||||
re2c:variable:yych = curr;
|
||||
re2c:indent:top = 2;
|
||||
re2c:yyfill:enable = 0;
|
||||
re2c:yych:conversion = 1;
|
||||
re2c:labelprefix = scan;
|
||||
|
||||
DIGIT = [0-9] ;
|
||||
OCT = "0" DIGIT+ ;
|
||||
INT = "0" | ( [1-9] DIGIT* ) ;
|
||||
WS = [ \t]+ ;
|
||||
|
||||
WS { continue; }
|
||||
OCT { res = push_num(t, p, 8); continue; }
|
||||
INT { res = push_num(t, p, 10); continue; }
|
||||
"+" { res = stack_add(); continue; }
|
||||
"-" { res = stack_sub(); continue; }
|
||||
"\000" { res = depth == 1 ? 0 : 2; break; }
|
||||
[^] { res = 1; continue; }
|
||||
*/
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
||||
int main(int argc, char **argv)
|
||||
{
|
||||
if (argc > 1)
|
||||
{
|
||||
char *inp;
|
||||
int res = 0, argp = 0, len;
|
||||
|
||||
while(!res && ++argp < argc)
|
||||
{
|
||||
inp = strdup(argv[argp]);
|
||||
len = strlen(inp);
|
||||
if (inp[0] == '\"' && inp[len-1] == '\"')
|
||||
{
|
||||
inp[len - 1] = '\0';
|
||||
++inp;
|
||||
}
|
||||
res = scan(inp);
|
||||
free(inp);
|
||||
}
|
||||
switch(res)
|
||||
{
|
||||
case 0:
|
||||
printf("Result: %d\n", stack[0]);
|
||||
return 0;
|
||||
case 1:
|
||||
fprintf(stderr, "Illegal character in input.\n");
|
||||
return 1;
|
||||
case 2:
|
||||
fprintf(stderr, "Premature end of input.\n");
|
||||
return 2;
|
||||
case 3:
|
||||
fprintf(stderr, "Stack overflow.\n");
|
||||
return 3;
|
||||
case 4:
|
||||
fprintf(stderr, "Stack underflow.\n");
|
||||
return 4;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
fprintf(stderr, "%s <expr>\n", argv[0]);
|
||||
return 0;
|
||||
}
|
||||
}
|
54
tools/re2c/examples/001_upn_calculator/windows/HiResTimer.h
Normal file
54
tools/re2c/examples/001_upn_calculator/windows/HiResTimer.h
Normal file
|
@ -0,0 +1,54 @@
|
|||
/**
|
||||
* @file HiResTimer.h
|
||||
* @brief
|
||||
* @note
|
||||
*/
|
||||
|
||||
#ifndef _HI_RES_TIMER_H_
|
||||
#define _HI_RES_TIMER_H_
|
||||
|
||||
#ifdef WIN32
|
||||
#include <windows.h> // probably already done in stdafx.h
|
||||
static LARGE_INTEGER start;
|
||||
static LARGE_INTEGER stop;
|
||||
static LARGE_INTEGER freq;
|
||||
static _int64 elapsedCounts;
|
||||
static double elapsedMillis;
|
||||
static double elapsedMicros;
|
||||
static HANDLE processHandle;
|
||||
static DWORD prevPriorityClass;
|
||||
|
||||
void HrtInit()
|
||||
{
|
||||
processHandle = GetCurrentProcess();
|
||||
prevPriorityClass = GetPriorityClass(processHandle);
|
||||
QueryPerformanceFrequency(&freq);
|
||||
}
|
||||
|
||||
void HrtStart()
|
||||
{
|
||||
QueryPerformanceCounter(&start);
|
||||
}
|
||||
|
||||
void HrtSetPriority(DWORD priority)
|
||||
{
|
||||
int flag;
|
||||
prevPriorityClass = GetPriorityClass(processHandle);
|
||||
flag = SetPriorityClass(processHandle, priority);
|
||||
}
|
||||
|
||||
void HrtResetPriority(void)
|
||||
{
|
||||
int flag = SetPriorityClass(processHandle, prevPriorityClass);
|
||||
}
|
||||
|
||||
double HrtElapsedMillis()
|
||||
{
|
||||
QueryPerformanceCounter(&stop);
|
||||
elapsedCounts = (stop.QuadPart - start.QuadPart);
|
||||
elapsedMillis = ((elapsedCounts * 1000.0) / freq.QuadPart);
|
||||
return elapsedMillis;
|
||||
}
|
||||
|
||||
#endif
|
||||
#endif
|
291
tools/re2c/examples/001_upn_calculator/windows/main.b.re
Normal file
291
tools/re2c/examples/001_upn_calculator/windows/main.b.re
Normal file
|
@ -0,0 +1,291 @@
|
|||
/* re2c lesson 001_upn_calculator, main.b.re, (c) M. Boerger, L. Allan 2006 */
|
||||
/*!ignore:re2c
|
||||
|
||||
- basic interface for string reading
|
||||
|
||||
. We define the macros YYCTYPE, YYCURSOR, YYLIMIT, YYMARKER, YYFILL
|
||||
. YYCTYPE is the type re2c operates on or in other words the type that
|
||||
it generates code for. While it is not a big difference when we were
|
||||
using 'unsigned char' here we would need to run re2c with option -w
|
||||
to fully support types with sieof() > 1.
|
||||
. YYCURSOR is used internally and holds the current scanner position. In
|
||||
expression handlers, the code blocks after re2c expressions, this can be
|
||||
used to identify the end of the token.
|
||||
. YYMARKER is not always being used so we set an initial value to avoid
|
||||
a compiler warning.
|
||||
. YYLIMIT stores the end of the input. Unfortunatley we have to use strlen()
|
||||
in this lesson. In the next example we see one way to get rid of it.
|
||||
. We use a 'for(;;)'-loop around the scanner block. We could have used a
|
||||
'while(1)'-loop instead but some compilers generate a warning for it.
|
||||
. To make the output more readable we use 're2c:indent:top' scanner
|
||||
configuration that configures re2c to prepend a single tab (the default)
|
||||
to the beginning of each output line.
|
||||
. The following lines are expressions and for each expression we output the
|
||||
token name and continue the scanner loop.
|
||||
. The second last token detects the end of our input, the terminating zero in
|
||||
our input string. In other scanners detecting the end of input may vary.
|
||||
For example binary code may contain \0 as valid input.
|
||||
. The last expression accepts any input character. It tells re2c to accept
|
||||
the opposit of the empty range. This includes numbers and our tokens but
|
||||
as re2c goes from top to botton when evaluating the expressions this is no
|
||||
problem.
|
||||
. The first three rules show that re2c actually prioritizes the expressions
|
||||
from top to bottom. Octal number require a starting "0" and the actual
|
||||
number. Normal numbers start with a digit greater 0. And zero is finally a
|
||||
special case. A single "0" is detected by the last rule of this set. And
|
||||
valid ocal number is already being detected by the first rule. This even
|
||||
includes multi "0" sequences that in octal notation also means zero.
|
||||
Another way would be to only use two rules:
|
||||
"0" [0-9]+
|
||||
"0" | ( [1-9] [0-9]* )
|
||||
A full description of re2c rule syntax can be found in the manual.
|
||||
*/
|
||||
|
||||
#define VC_EXTRALEAN // Exclude rarely-used stuff from Windows headers
|
||||
|
||||
#if _MSC_VER > 1200
|
||||
#define WINVER 0x0400 // Change this to the appropriate value to target Windows 98 and Windows 2000 or later.
|
||||
#endif // Prevents warning from vc7.1 complaining about redefinition
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <assert.h>
|
||||
#include <string.h>
|
||||
#include <windows.h>
|
||||
#include "HiResTimer.h"
|
||||
|
||||
static char gTestBuf[1000] = "";
|
||||
|
||||
/**
|
||||
* @brief Setup HiResolution timer and confirm it is working ok
|
||||
*/
|
||||
void InitHiResTimerAndVerifyWorking(void)
|
||||
{
|
||||
double elapsed;
|
||||
HrtInit();
|
||||
HrtSetPriority(ABOVE_NORMAL_PRIORITY_CLASS);
|
||||
HrtStart();
|
||||
Sleep(100);
|
||||
elapsed = HrtElapsedMillis();
|
||||
if ((elapsed < 90) || (elapsed > 110)) {
|
||||
printf("HiResTimer misbehaving: %f\n", elapsed);
|
||||
exit(2);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Scan for numbers in different formats
|
||||
*/
|
||||
int ScanFullSpeed(char *pzStrToScan, size_t lenStrToScan)
|
||||
{
|
||||
unsigned char *pzCurScanPos = (unsigned char*)pzStrToScan;
|
||||
unsigned char *pzBacktrackInfo = 0;
|
||||
#define YYCTYPE unsigned char
|
||||
#define YYCURSOR pzCurScanPos
|
||||
#define YYLIMIT (pzStrToScan+lenStrToScan)
|
||||
#define YYMARKER pzBacktrackInfo
|
||||
#define YYFILL(n)
|
||||
|
||||
for(;;)
|
||||
{
|
||||
/*!re2c
|
||||
re2c:indent:top = 2;
|
||||
[1-9][0-9]* { continue; }
|
||||
[0][0-9]+ { continue; }
|
||||
"+" { continue; }
|
||||
"-" { continue; }
|
||||
"\000" { return 0; }
|
||||
[^] { return 1; }
|
||||
*/
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Scan for numbers in different formats
|
||||
*/
|
||||
int scan(char *pzStrToScan, size_t lenStrToScan)
|
||||
{
|
||||
unsigned char *pzCurScanPos = (unsigned char*)pzStrToScan;
|
||||
unsigned char *pzBacktrackInfo = 0;
|
||||
#define YYCTYPE unsigned char
|
||||
#define YYCURSOR pzCurScanPos
|
||||
#define YYLIMIT (pzStrToScan+lenStrToScan)
|
||||
#define YYMARKER pzBacktrackInfo
|
||||
#define YYFILL(n)
|
||||
|
||||
for(;;)
|
||||
{
|
||||
/*!re2c
|
||||
re2c:indent:top = 2;
|
||||
[1-9][0-9]* { printf("Num\n"); strcat(gTestBuf, "Num "); continue; }
|
||||
[0][0-9]+ { printf("Oct\n"); strcat(gTestBuf, "Oct "); continue; }
|
||||
"+" { printf("+\n"); strcat(gTestBuf, "+ "); continue; }
|
||||
"-" { printf("-\n"); strcat(gTestBuf, "- "); continue; }
|
||||
"\000" { printf("EOF\n"); return 0; }
|
||||
[^] { printf("ERR\n"); strcat(gTestBuf, "ERR "); return 1; }
|
||||
*/
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Show high resolution elapsed time for 10,000 and 100,000 loops
|
||||
*/
|
||||
void DoTimingsOfStrnCmp(void)
|
||||
{
|
||||
char testStr[] = "Hello, world";
|
||||
int totLoops = 10000;
|
||||
int totFoundCount = 0;
|
||||
int foundCount = 0;
|
||||
int loop;
|
||||
int rc;
|
||||
const int progressAnd = 0xFFFFF000;
|
||||
double elapsed;
|
||||
|
||||
printf("\n\n%d loops with * every %d loops to confirm\n", totLoops, ((~progressAnd) + 1));
|
||||
|
||||
HrtStart();
|
||||
for (loop = 0; loop < totLoops; ++loop) {
|
||||
foundCount = 0;
|
||||
rc = strncmp(testStr, "Hello", 5);
|
||||
if (rc == 0) {
|
||||
foundCount++;
|
||||
totFoundCount++;
|
||||
if ((totFoundCount & progressAnd) == totFoundCount) {
|
||||
printf("*");
|
||||
}
|
||||
}
|
||||
}
|
||||
elapsed = HrtElapsedMillis();
|
||||
printf("\nstrncmp Elapsed for %7d loops milliseconds: %7.3f\n", totLoops, elapsed);
|
||||
printf("FoundCount each loop: %d\n", foundCount);
|
||||
printf("TotalFoundCount for all loops: %d\n", totFoundCount);
|
||||
|
||||
totLoops = 100000;
|
||||
HrtStart();
|
||||
for (loop = 0; loop < totLoops; ++loop) {
|
||||
foundCount = 0;
|
||||
rc = strncmp(testStr, "Hello", 5);
|
||||
if (rc == 0) {
|
||||
foundCount++;
|
||||
totFoundCount++;
|
||||
if ((totFoundCount & progressAnd) == totFoundCount) {
|
||||
printf("*");
|
||||
}
|
||||
}
|
||||
}
|
||||
elapsed = HrtElapsedMillis();
|
||||
printf("\nstrncmp Elapsed for %7d loops milliseconds: %7.3f\n", totLoops, elapsed);
|
||||
printf("FoundCount each loop: %d\n", foundCount);
|
||||
printf("TotalFoundCount for all loops: %d\n", totFoundCount);
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Show high resolution elapsed time for 10,000 and 100,000 loops
|
||||
*/
|
||||
void DoTimingsOfRe2c(void)
|
||||
{
|
||||
char* testStrings[] = { "123", "1234", "+123", "01234", "-04321", "abc", "123abc" };
|
||||
const int testCount = sizeof(testStrings) / sizeof(testStrings[0]);
|
||||
int i;
|
||||
int totLoops = 10000 / testCount; // Doing more than one per loop
|
||||
int totFoundCount = 0;
|
||||
int foundCount = 0;
|
||||
int loop;
|
||||
int rc;
|
||||
const int progressAnd = 0xFFFFF000;
|
||||
double elapsed;
|
||||
|
||||
printf("\n\n%d loops with * every %d loops to confirm\n", totLoops, ((~progressAnd) + 1));
|
||||
|
||||
HrtStart();
|
||||
for (loop = 0; loop < totLoops; ++loop) {
|
||||
foundCount = 0;
|
||||
strcpy(gTestBuf, "");
|
||||
for (i = 0; i < testCount; ++i) {
|
||||
char* pzCurStr = testStrings[i];
|
||||
size_t len = strlen(pzCurStr); // Calc of strlen slows things down ... std::string?
|
||||
rc = ScanFullSpeed(pzCurStr, len);
|
||||
if (rc == 0) {
|
||||
foundCount++;
|
||||
totFoundCount++;
|
||||
if ((totFoundCount & progressAnd) == totFoundCount) {
|
||||
printf("*");
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
elapsed = HrtElapsedMillis();
|
||||
printf("\nRe2c Elapsed for %7d loops milliseconds: %7.3f\n", totLoops, elapsed);
|
||||
printf("FoundCount each loop: %d\n", foundCount);
|
||||
printf("TotalFoundCount for all loops: %d\n", totFoundCount);
|
||||
|
||||
totLoops = 100000 / testCount;
|
||||
printf("\n\n%d loops with * every %d loops to confirm\n", totLoops, ((~progressAnd) + 1));
|
||||
|
||||
HrtStart();
|
||||
for (loop = 0; loop < totLoops; ++loop) {
|
||||
foundCount = 0;
|
||||
strcpy(gTestBuf, "");
|
||||
for (i = 0; i < testCount; ++i) {
|
||||
char* pzCurStr = testStrings[i];
|
||||
size_t len = strlen(pzCurStr); // Calc of strlen slows things down ... std::string?
|
||||
rc = ScanFullSpeed(pzCurStr, len);
|
||||
if (rc == 0) {
|
||||
foundCount++;
|
||||
totFoundCount++;
|
||||
if ((totFoundCount & progressAnd) == totFoundCount) {
|
||||
printf("*");
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
elapsed = HrtElapsedMillis();
|
||||
printf("\nRe2c Elapsed for %7d loops milliseconds: %7.3f\n", totLoops, elapsed);
|
||||
printf("FoundCount each loop: %d\n", foundCount);
|
||||
printf("TotalFoundCount for all loops: %d\n", totFoundCount);
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Entry point for console app
|
||||
*/
|
||||
int main(int argc, char **argv)
|
||||
{
|
||||
char testStr_A[] = "123";
|
||||
char* testStr_B = "456";
|
||||
char* testStrings[] = { "123", "1234", "+123", "01234", "-04321", "abc", "123abc" };
|
||||
const int testCount = sizeof(testStrings) / sizeof(testStrings[0]);
|
||||
int i;
|
||||
|
||||
int rc = scan(testStr_A, 3);
|
||||
printf("rc: %d\n", rc);
|
||||
|
||||
rc = scan(testStr_B, 3);
|
||||
printf("rc: %d\n", rc);
|
||||
|
||||
rc = scan("789", 3);
|
||||
printf("rc: %d\n", rc);
|
||||
|
||||
strcpy(gTestBuf, "");
|
||||
for (i = 0; i < testCount; ++i) {
|
||||
char* pzCurStr = testStrings[i];
|
||||
size_t len = strlen(pzCurStr);
|
||||
scan(pzCurStr, len);
|
||||
}
|
||||
printf("%s\n", gTestBuf);
|
||||
rc = strcmp(gTestBuf, "Num Num + Num Oct - Oct ERR Num ERR ");
|
||||
if (rc == 0) {
|
||||
printf("Success\n");
|
||||
}
|
||||
else {
|
||||
printf("Failure\n");
|
||||
}
|
||||
assert(0 == rc); // Doesn't work with Release build
|
||||
|
||||
InitHiResTimerAndVerifyWorking();
|
||||
|
||||
DoTimingsOfStrnCmp();
|
||||
|
||||
DoTimingsOfRe2c();
|
||||
|
||||
return 0;
|
||||
}
|
21
tools/re2c/examples/002_strip_comments/README
Normal file
21
tools/re2c/examples/002_strip_comments/README
Normal file
|
@ -0,0 +1,21 @@
|
|||
re2c lesson 002_strip_comments, (c) M. Boerger 2006
|
||||
|
||||
In this lesson you will learn how to use multiple scanner blocks and how to
|
||||
read the input from a file instead of a zero terminated string. In the end you
|
||||
will have a scanner that filters comments out of c source files but keeps re2c
|
||||
comments.
|
||||
|
||||
The first scanner can be generated with:
|
||||
|
||||
re2c -s -o t.c strip_001.s.re
|
||||
|
||||
In the second step we will learn about YYMARKER that stores backtracking
|
||||
information.
|
||||
|
||||
re2c -s -0 t.c strip_002.b.re
|
||||
|
||||
The third step brings trailing contexts that are stored in YYCTXMARKER. We also
|
||||
change to use -b instead of -s option since the scanner gets more and more
|
||||
complex.
|
||||
|
||||
re2c -b -0 t.c strip_002.b.re
|
147
tools/re2c/examples/002_strip_comments/strip_001.s.re
Normal file
147
tools/re2c/examples/002_strip_comments/strip_001.s.re
Normal file
|
@ -0,0 +1,147 @@
|
|||
/* re2c lesson 002_strip_comments, strip_001.s, (c) M. Boerger 2006 - 2007 */
|
||||
/*!ignore:re2c
|
||||
|
||||
- basic interface for file reading
|
||||
. This scanner will read chunks of input from a file. The easiest way would
|
||||
be to read the whole file into a memory buffer and use that a zero
|
||||
terminated string.
|
||||
. Instead we want to read input chunks of a reasonable size as they are neede
|
||||
by the scanner. Thus we basically need YYFILL(n) to call fread(n).
|
||||
. Before we provide a buffer that we constantly reallocate we instead use
|
||||
one buffer that we get from the stack or global memory just once. When we
|
||||
reach the end of the buffer we simply move the beginning of our input
|
||||
that is somewhere in our buffer to the beginning of our buffer and then
|
||||
append the next chunk of input to the correct end inside our buffer.
|
||||
. As re2c scanners might read more than one character we need to ensure our
|
||||
buffer is long enough. We can use re2c to inform about the maximum size
|
||||
by placing a "!max:re2c" comment somewhere. This gets translated to a
|
||||
"#define YYMAXFILL <n>" line where <n> is the maximum length value. This
|
||||
define can be used as precompiler condition.
|
||||
|
||||
- multiple scanner blocks
|
||||
. We use a main scanner block that outputs every input character unless the
|
||||
input is two /s or a / followed by a *. In the latter two cases we switch
|
||||
to a special c++ comment and a comment block respectively.
|
||||
. Both special blocks simply detect their end ignore any other character.
|
||||
. The c++ block is a bit special. Since the terminating new line needs to
|
||||
be output and that can either be a new line or a carridge return followed
|
||||
by a new line.
|
||||
. In order to ensure that we do not read behind our buffer we reset the token
|
||||
pointer to the cursor on every scanner run.
|
||||
*/
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
|
||||
/*!max:re2c */
|
||||
#define BSIZE 128
|
||||
|
||||
#if BSIZE < YYMAXFILL
|
||||
# error BSIZE must be greater YYMAXFILL
|
||||
#endif
|
||||
|
||||
#define YYCTYPE unsigned char
|
||||
#define YYCURSOR s.cur
|
||||
#define YYLIMIT s.lim
|
||||
#define YYFILL(n) { if ((res = fill(&s, n)) >= 0) break; }
|
||||
|
||||
typedef struct Scanner
|
||||
{
|
||||
FILE *fp;
|
||||
unsigned char *cur, *tok, *lim, *eof;
|
||||
unsigned char buffer[BSIZE];
|
||||
} Scanner;
|
||||
|
||||
int fill(Scanner *s, int len)
|
||||
{
|
||||
if (!len)
|
||||
{
|
||||
s->cur = s->tok = s->lim = s->buffer;
|
||||
s->eof = 0;
|
||||
}
|
||||
if (!s->eof)
|
||||
{
|
||||
int got, cnt = s->tok - s->buffer;
|
||||
|
||||
if (cnt > 0)
|
||||
{
|
||||
memcpy(s->buffer, s->tok, s->lim - s->tok);
|
||||
s->tok -= cnt;
|
||||
s->cur -= cnt;
|
||||
s->lim -= cnt;
|
||||
}
|
||||
cnt = BSIZE - cnt;
|
||||
if ((got = fread(s->lim, 1, cnt, s->fp)) != cnt)
|
||||
{
|
||||
s->eof = &s->lim[got];
|
||||
}
|
||||
s->lim += got;
|
||||
}
|
||||
else if (s->cur + len > s->eof)
|
||||
{
|
||||
return 0; /* not enough input data */
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
||||
int scan(FILE *fp)
|
||||
{
|
||||
int res = 0;
|
||||
Scanner s;
|
||||
|
||||
if (!fp)
|
||||
{
|
||||
return 1; /* no file was opened */
|
||||
}
|
||||
|
||||
s.fp = fp;
|
||||
|
||||
fill(&s, 0);
|
||||
|
||||
for(;;)
|
||||
{
|
||||
s.tok = s.cur;
|
||||
/*!re2c
|
||||
re2c:indent:top = 2;
|
||||
|
||||
NL = "\r"? "\n" ;
|
||||
ANY = [^] ;
|
||||
|
||||
"/" "/" { goto cppcomment; }
|
||||
"/" "*" { goto comment; }
|
||||
ANY { fputc(*s.tok, stdout); continue; }
|
||||
*/
|
||||
comment:
|
||||
s.tok = s.cur;
|
||||
/*!re2c
|
||||
"*" "/" { continue; }
|
||||
ANY { goto comment; }
|
||||
*/
|
||||
cppcomment:
|
||||
s.tok = s.cur;
|
||||
/*!re2c
|
||||
NL { fwrite(s.tok, 1, s.cur - s.tok, stdout); continue; }
|
||||
ANY { goto cppcomment; }
|
||||
*/
|
||||
}
|
||||
|
||||
if (fp != stdin)
|
||||
{
|
||||
fclose(fp); /* close only if not stdin */
|
||||
}
|
||||
return res; /* return result */
|
||||
}
|
||||
|
||||
int main(int argc, char **argv)
|
||||
{
|
||||
if (argc > 1)
|
||||
{
|
||||
return scan(!strcmp(argv[1], "-") ? stdin : fopen(argv[1], "r"));
|
||||
}
|
||||
else
|
||||
{
|
||||
fprintf(stderr, "%s <expr>\n", argv[0]);
|
||||
return 1;
|
||||
}
|
||||
}
|
162
tools/re2c/examples/002_strip_comments/strip_002.s.re
Normal file
162
tools/re2c/examples/002_strip_comments/strip_002.s.re
Normal file
|
@ -0,0 +1,162 @@
|
|||
/* re2c lesson 002_strip_comments, strip_002.s, (c) M. Boerger 2006 - 2007 */
|
||||
/*!ignore:re2c
|
||||
|
||||
- complexity
|
||||
. When a comment is preceeded by a new line and followed by whitespace and a
|
||||
new line then we can drop the trailing whitespace and new line.
|
||||
. Additional to what we strip out already what about two consequtive comment
|
||||
blocks? When two comments are only separated by whitespace we want to drop
|
||||
both. In other words when detecting the end of a comment block we need to
|
||||
check whether it is followed by only whitespace and the a new comment in
|
||||
which case we continure ignoring the input. If it is followed only by white
|
||||
space and a new line we strip out the new white space and new line. In any
|
||||
other case we start outputting all that follows.
|
||||
But we cannot simply use the following two rules:
|
||||
"*" "/" WS* "/" "*" { continue; }
|
||||
"*" "/" WS* NL { continue; }
|
||||
The main problem is that WS* can get bigger then our buffer, so we need a
|
||||
new scanner.
|
||||
. Meanwhile our scanner gets a bit more complex and we have to add two more
|
||||
things. First the scanner code now uses a YYMARKER to store backtracking
|
||||
information.
|
||||
|
||||
- backtracking information
|
||||
. When the scanner has two rules that can have the same beginning but a
|
||||
different ending then it needs to store the position that identifies the
|
||||
common part. This is called backtracking. As mentioned above re2c expects
|
||||
you to provide compiler define YYMARKER and a pointer variable.
|
||||
. When shifting buffer contents as done in our fill function the marker needs
|
||||
to be corrected, too.
|
||||
|
||||
*/
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
|
||||
/*!max:re2c */
|
||||
#define BSIZE 128
|
||||
|
||||
#if BSIZE < YYMAXFILL
|
||||
# error BSIZE must be greater YYMAXFILL
|
||||
#endif
|
||||
|
||||
#define YYCTYPE unsigned char
|
||||
#define YYCURSOR s.cur
|
||||
#define YYLIMIT s.lim
|
||||
#define YYMARKER s.mrk
|
||||
#define YYFILL(n) { if ((res = fill(&s, n)) >= 0) break; }
|
||||
|
||||
typedef struct Scanner
|
||||
{
|
||||
FILE *fp;
|
||||
unsigned char *cur, *tok, *lim, *eof, *mrk;
|
||||
unsigned char buffer[BSIZE];
|
||||
} Scanner;
|
||||
|
||||
int fill(Scanner *s, int len)
|
||||
{
|
||||
if (!len)
|
||||
{
|
||||
s->cur = s->tok = s->lim = s->mrk = s->buffer;
|
||||
s->eof = 0;
|
||||
}
|
||||
if (!s->eof)
|
||||
{
|
||||
int got, cnt = s->tok - s->buffer;
|
||||
|
||||
if (cnt > 0)
|
||||
{
|
||||
memcpy(s->buffer, s->tok, s->lim - s->tok);
|
||||
s->tok -= cnt;
|
||||
s->cur -= cnt;
|
||||
s->lim -= cnt;
|
||||
s->mrk -= cnt;
|
||||
}
|
||||
cnt = BSIZE - cnt;
|
||||
if ((got = fread(s->lim, 1, cnt, s->fp)) != cnt)
|
||||
{
|
||||
s->eof = &s->lim[got];
|
||||
}
|
||||
s->lim += got;
|
||||
}
|
||||
else if (s->cur + len > s->eof)
|
||||
{
|
||||
return 0; /* not enough input data */
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
||||
void echo(Scanner *s)
|
||||
{
|
||||
fwrite(s->tok, 1, s->cur - s->tok, stdout);
|
||||
}
|
||||
|
||||
int scan(FILE *fp)
|
||||
{
|
||||
int res = 0;
|
||||
Scanner s;
|
||||
|
||||
if (!fp)
|
||||
{
|
||||
return 1; /* no file was opened */
|
||||
}
|
||||
|
||||
s.fp = fp;
|
||||
|
||||
fill(&s, 0);
|
||||
|
||||
for(;;)
|
||||
{
|
||||
s.tok = s.cur;
|
||||
/*!re2c
|
||||
re2c:indent:top = 2;
|
||||
|
||||
NL = "\r"? "\n" ;
|
||||
WS = [\r\n\t ] ;
|
||||
ANY = [^] ;
|
||||
|
||||
"/" "/" { goto cppcomment; }
|
||||
"/" "*" { goto comment; }
|
||||
ANY { fputc(*s.tok, stdout); continue; }
|
||||
*/
|
||||
comment:
|
||||
s.tok = s.cur;
|
||||
/*!re2c
|
||||
"*" "/" { goto commentws; }
|
||||
ANY { goto comment; }
|
||||
*/
|
||||
commentws:
|
||||
s.tok = s.cur;
|
||||
/*!re2c
|
||||
NL { echo(&s); continue; }
|
||||
WS { goto commentws; }
|
||||
ANY { echo(&s); continue; }
|
||||
*/
|
||||
cppcomment:
|
||||
s.tok = s.cur;
|
||||
/*!re2c
|
||||
NL { echo(&s); continue; }
|
||||
ANY { goto cppcomment; }
|
||||
*/
|
||||
}
|
||||
|
||||
if (fp != stdin)
|
||||
{
|
||||
fclose(fp); /* close only if not stdin */
|
||||
}
|
||||
return res; /* return result */
|
||||
}
|
||||
|
||||
int main(int argc, char **argv)
|
||||
{
|
||||
if (argc > 1)
|
||||
{
|
||||
return scan(!strcmp(argv[1], "-") ? stdin : fopen(argv[1], "r"));
|
||||
}
|
||||
else
|
||||
{
|
||||
fprintf(stderr, "%s <expr>\n", argv[0]);
|
||||
return 1;
|
||||
}
|
||||
}
|
179
tools/re2c/examples/002_strip_comments/strip_003.b.re
Normal file
179
tools/re2c/examples/002_strip_comments/strip_003.b.re
Normal file
|
@ -0,0 +1,179 @@
|
|||
/* re2c lesson 002_strip_comments, strip_003.b, (c) M. Boerger 2006 - 2007 */
|
||||
/*!ignore:re2c
|
||||
|
||||
- more complexity
|
||||
. Right now we strip out trailing white space and new lines after a comment
|
||||
block. This can be a problem when the comment block was not preceeded by
|
||||
a new line.
|
||||
. The solution is to use trailing contexts.
|
||||
|
||||
- trailing contexts
|
||||
. Re2c allows to check for a portion of input and only recognize it when it
|
||||
is followed by another portion. This is called a trailing context.
|
||||
. The trailing context is not part of the identified input. That means that
|
||||
it follows exactly at the cursor. A consequence is that the scanner has
|
||||
already read more input and on the next run you need to restore begining
|
||||
of input, in our case s.tok, from the cursor, here s.cur, rather then
|
||||
restoring to the beginning of the buffer. This way the scanner can reuse
|
||||
the portion it has already read.
|
||||
. The position of the trailing context is stored in YYCTXMARKER for which
|
||||
a pointer variable needs to be provided.
|
||||
. As with YYMARKER the corrsponding variable needs to be corrected if we
|
||||
shift in some buffer.
|
||||
. Still this is not all we need to solve the problem. What is left is that
|
||||
the information whether we detected a trailing context was detected has to
|
||||
be stored somewhere. This is done by the new variable nlcomment.
|
||||
|
||||
- formatting
|
||||
. Until now we only used single line expression code and we always had the
|
||||
opening { on the same line as the rule itself. If we have multiline rule
|
||||
code and care for formatting we can no longer rely on re2c. Now we have
|
||||
to indent the rule code ourself. Also we need to take care of the opening
|
||||
{. If we keep it on the same line as the rule then re2c will indent it
|
||||
correctly and the emitted #line informations will be correct. If we place
|
||||
it on the next line then the #line directive will also point to that line
|
||||
and not to the rule.
|
||||
*/
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
|
||||
/*!max:re2c */
|
||||
#define BSIZE 128
|
||||
|
||||
#if BSIZE < YYMAXFILL
|
||||
# error BSIZE must be greater YYMAXFILL
|
||||
#endif
|
||||
|
||||
#define YYCTYPE unsigned char
|
||||
#define YYCURSOR s.cur
|
||||
#define YYLIMIT s.lim
|
||||
#define YYMARKER s.mrk
|
||||
#define YYCTXMARKER s.ctx
|
||||
#define YYFILL(n) { if ((res = fill(&s, n)) >= 0) break; }
|
||||
|
||||
typedef struct Scanner
|
||||
{
|
||||
FILE *fp;
|
||||
unsigned char *cur, *tok, *lim, *eof, *ctx, *mrk;
|
||||
unsigned char buffer[BSIZE];
|
||||
} Scanner;
|
||||
|
||||
int fill(Scanner *s, int len)
|
||||
{
|
||||
if (!len)
|
||||
{
|
||||
s->cur = s->tok = s->lim = s->mrk = s->buffer;
|
||||
s->eof = 0;
|
||||
}
|
||||
if (!s->eof)
|
||||
{
|
||||
int got, cnt = s->tok - s->buffer;
|
||||
|
||||
if (cnt > 0)
|
||||
{
|
||||
memcpy(s->buffer, s->tok, s->lim - s->tok);
|
||||
s->tok -= cnt;
|
||||
s->cur -= cnt;
|
||||
s->lim -= cnt;
|
||||
s->mrk -= cnt;
|
||||
s->ctx -= cnt;
|
||||
}
|
||||
cnt = BSIZE - cnt;
|
||||
if ((got = fread(s->lim, 1, cnt, s->fp)) != cnt)
|
||||
{
|
||||
s->eof = &s->lim[got];
|
||||
}
|
||||
s->lim += got;
|
||||
}
|
||||
else if (s->cur + len > s->eof)
|
||||
{
|
||||
return 0; /* not enough input data */
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
||||
void echo(Scanner *s)
|
||||
{
|
||||
fwrite(s->tok, 1, s->cur - s->tok, stdout);
|
||||
}
|
||||
|
||||
int scan(FILE *fp)
|
||||
{
|
||||
int res = 0;
|
||||
int nlcomment = 0;
|
||||
Scanner s;
|
||||
|
||||
if (!fp)
|
||||
{
|
||||
return 1; /* no file was opened */
|
||||
}
|
||||
|
||||
s.fp = fp;
|
||||
|
||||
fill(&s, 0);
|
||||
|
||||
for(;;)
|
||||
{
|
||||
s.tok = s.cur;
|
||||
/*!re2c
|
||||
re2c:indent:top = 2;
|
||||
|
||||
NL = "\r"? "\n" ;
|
||||
WS = [\r\n\t ] ;
|
||||
ANY = [^] ;
|
||||
|
||||
"/" "/" { goto cppcomment; }
|
||||
NL / "/""*" { echo(&s); nlcomment = 1; continue; }
|
||||
"/" "*" { goto comment; }
|
||||
ANY { fputc(*s.tok, stdout); continue; }
|
||||
*/
|
||||
comment:
|
||||
s.tok = s.cur;
|
||||
/*!re2c
|
||||
"*" "/" { goto commentws; }
|
||||
ANY { goto comment; }
|
||||
*/
|
||||
commentws:
|
||||
s.tok = s.cur;
|
||||
/*!re2c
|
||||
NL? "/" "*" { goto comment; }
|
||||
NL {
|
||||
if (!nlcomment)
|
||||
{
|
||||
echo(&s);
|
||||
}
|
||||
nlcomment = 0;
|
||||
continue;
|
||||
}
|
||||
WS { goto commentws; }
|
||||
ANY { echo(&s); nlcomment = 0; continue; }
|
||||
*/
|
||||
cppcomment:
|
||||
s.tok = s.cur;
|
||||
/*!re2c
|
||||
NL { echo(&s); continue; }
|
||||
ANY { goto cppcomment; }
|
||||
*/
|
||||
}
|
||||
|
||||
if (fp != stdin)
|
||||
{
|
||||
fclose(fp); /* close only if not stdin */
|
||||
}
|
||||
return res; /* return result */
|
||||
}
|
||||
|
||||
int main(int argc, char **argv)
|
||||
{
|
||||
if (argc > 1)
|
||||
{
|
||||
return scan(!strcmp(argv[1], "-") ? stdin : fopen(argv[1], "r"));
|
||||
}
|
||||
else
|
||||
{
|
||||
fprintf(stderr, "%s <expr>\n", argv[0]);
|
||||
return 1;
|
||||
}
|
||||
}
|
35
tools/re2c/examples/input_custom/fixed.re
Normal file
35
tools/re2c/examples/input_custom/fixed.re
Normal file
|
@ -0,0 +1,35 @@
|
|||
// Build with "--input custom" re2c switch.
|
||||
//
|
||||
// This is an example of handling fixed-length buffer with "--input custom":
|
||||
// on each YYPEEK we check for the end of input, thus YYFILL generation
|
||||
// can be safely suppressed.
|
||||
//
|
||||
// Note that YYLIMIT points not to terminating NULL, but to the previous
|
||||
// character: we emulate the case when input has no terminating NULL.
|
||||
//
|
||||
// For a real-life example see https://github.com/sopyer/mjson
|
||||
// or mjson.re from re2c test collection.
|
||||
|
||||
bool lex (const char * cursor, const char * const limit)
|
||||
{
|
||||
const char * marker;
|
||||
const char * ctxmarker;
|
||||
# define YYCTYPE char
|
||||
# define YYPEEK() (cursor >= limit ? 0 : *cursor)
|
||||
# define YYSKIP() ++cursor
|
||||
# define YYBACKUP() marker = cursor
|
||||
# define YYBACKUPCTX() ctxmarker = cursor
|
||||
# define YYRESTORE() cursor = marker
|
||||
# define YYRESTORECTX() cursor = ctxmarker
|
||||
/*!re2c
|
||||
re2c:yyfill:enable = 0;
|
||||
"int buffer " / "[" [0-9]+ "]" { return true; }
|
||||
* { return false; }
|
||||
*/
|
||||
}
|
||||
|
||||
int main ()
|
||||
{
|
||||
char buffer [] = "int buffer [1024]";
|
||||
return !lex (buffer, buffer + sizeof (buffer) - 1);
|
||||
}
|
20
tools/re2c/examples/input_custom/simple/README
Normal file
20
tools/re2c/examples/input_custom/simple/README
Normal file
|
@ -0,0 +1,20 @@
|
|||
Build with "--input custom" re2c switch.
|
||||
|
||||
These are three examples of "--input custom" usage:
|
||||
|
||||
- input_custom_default.re:
|
||||
implements default re2c input model (pointers to plain buffer)
|
||||
|
||||
- input_custom_fgetc:
|
||||
implements C-style file input (using <stdio.h>)
|
||||
|
||||
- input_custom_fgetc:
|
||||
implements std::istringstream input
|
||||
|
||||
Note that these examples are very simple and don't need
|
||||
to implement YYFILL; the only reason they don't use
|
||||
"re2c:yyfill:enable = 0;" is to keep YYLESSTHAN and YYLIMIT
|
||||
(for the sake of example).
|
||||
|
||||
In real-life programs one will need to care for correct
|
||||
end-of-input handling.
|
24
tools/re2c/examples/input_custom/simple/default.re
Normal file
24
tools/re2c/examples/input_custom/simple/default.re
Normal file
|
@ -0,0 +1,24 @@
|
|||
bool lex (const char * cursor, const char * const limit)
|
||||
{
|
||||
const char * marker;
|
||||
const char * ctxmarker;
|
||||
# define YYCTYPE char
|
||||
# define YYPEEK() *cursor
|
||||
# define YYSKIP() ++cursor
|
||||
# define YYBACKUP() marker = cursor
|
||||
# define YYBACKUPCTX() ctxmarker = cursor
|
||||
# define YYRESTORE() cursor = marker
|
||||
# define YYRESTORECTX() cursor = ctxmarker
|
||||
# define YYLESSTHAN(n) limit - cursor < n
|
||||
# define YYFILL(n) {}
|
||||
/*!re2c
|
||||
"int buffer " / "[" [0-9]+ "]" { return true; }
|
||||
* { return false; }
|
||||
*/
|
||||
}
|
||||
|
||||
int main ()
|
||||
{
|
||||
char buffer [] = "int buffer [1024]";
|
||||
return !lex (buffer, buffer + sizeof (buffer));
|
||||
}
|
43
tools/re2c/examples/input_custom/simple/fgetc.re
Normal file
43
tools/re2c/examples/input_custom/simple/fgetc.re
Normal file
|
@ -0,0 +1,43 @@
|
|||
#include <stdio.h>
|
||||
|
||||
char peek (FILE * f)
|
||||
{
|
||||
char c = fgetc (f);
|
||||
ungetc (c, f);
|
||||
return c;
|
||||
}
|
||||
|
||||
bool lex (FILE * f, const long limit)
|
||||
{
|
||||
long marker;
|
||||
long ctxmarker;
|
||||
# define YYCTYPE char
|
||||
# define YYPEEK() peek (f)
|
||||
# define YYSKIP() fgetc (f)
|
||||
# define YYBACKUP() marker = ftell (f)
|
||||
# define YYBACKUPCTX() ctxmarker = ftell (f)
|
||||
# define YYRESTORE() fseek (f, marker, SEEK_SET)
|
||||
# define YYRESTORECTX() fseek (f, ctxmarker, SEEK_SET)
|
||||
# define YYLESSTHAN(n) limit - ftell (f) < n
|
||||
# define YYFILL(n) {}
|
||||
/*!re2c
|
||||
"int buffer " / "[" [0-9]+ "]" { return true; }
|
||||
* { return false; }
|
||||
*/
|
||||
}
|
||||
|
||||
int main ()
|
||||
{
|
||||
const char buffer [] = "int buffer [1024]";
|
||||
const char fn [] = "input.txt";
|
||||
|
||||
FILE * f = fopen (fn, "w");
|
||||
fwrite (buffer, 1, sizeof (buffer), f);
|
||||
fclose (f);
|
||||
|
||||
f = fopen (fn, "rb");
|
||||
int result = !lex (f, sizeof (buffer));
|
||||
fclose (f);
|
||||
|
||||
return result;
|
||||
}
|
27
tools/re2c/examples/input_custom/simple/istringstream.re
Normal file
27
tools/re2c/examples/input_custom/simple/istringstream.re
Normal file
|
@ -0,0 +1,27 @@
|
|||
#include <sstream>
|
||||
|
||||
bool lex (std::istringstream & is, const std::streampos limit)
|
||||
{
|
||||
std::streampos marker;
|
||||
std::streampos ctxmarker;
|
||||
# define YYCTYPE char
|
||||
# define YYPEEK() is.peek ()
|
||||
# define YYSKIP() is.ignore ()
|
||||
# define YYBACKUP() marker = is.tellg ()
|
||||
# define YYBACKUPCTX() ctxmarker = is.tellg ()
|
||||
# define YYRESTORE() is.seekg (marker)
|
||||
# define YYRESTORECTX() is.seekg (ctxmarker)
|
||||
# define YYLESSTHAN(n) limit - is.tellg () < n
|
||||
# define YYFILL(n) {}
|
||||
/*!re2c
|
||||
"int buffer " / "[" [0-9]+ "]" { return true; }
|
||||
* { return false; }
|
||||
*/
|
||||
}
|
||||
|
||||
int main ()
|
||||
{
|
||||
const char buffer [] = "int buffer [1024]";
|
||||
std::istringstream is (buffer);
|
||||
return !lex (is, sizeof (buffer));
|
||||
}
|
272
tools/re2c/examples/langs/c.re
Normal file
272
tools/re2c/examples/langs/c.re
Normal file
|
@ -0,0 +1,272 @@
|
|||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
|
||||
#define ADDEQ 257
|
||||
#define ANDAND 258
|
||||
#define ANDEQ 259
|
||||
#define ARRAY 260
|
||||
#define ASM 261
|
||||
#define AUTO 262
|
||||
#define BREAK 263
|
||||
#define CASE 264
|
||||
#define CHAR 265
|
||||
#define CONST 266
|
||||
#define CONTINUE 267
|
||||
#define DECR 268
|
||||
#define DEFAULT 269
|
||||
#define DEREF 270
|
||||
#define DIVEQ 271
|
||||
#define DO 272
|
||||
#define DOUBLE 273
|
||||
#define ELLIPSIS 274
|
||||
#define ELSE 275
|
||||
#define ENUM 276
|
||||
#define EQL 277
|
||||
#define EXTERN 278
|
||||
#define FCON 279
|
||||
#define FLOAT 280
|
||||
#define FOR 281
|
||||
#define FUNCTION 282
|
||||
#define GEQ 283
|
||||
#define GOTO 284
|
||||
#define ICON 285
|
||||
#define ID 286
|
||||
#define IF 287
|
||||
#define INCR 288
|
||||
#define INT 289
|
||||
#define LEQ 290
|
||||
#define LONG 291
|
||||
#define LSHIFT 292
|
||||
#define LSHIFTEQ 293
|
||||
#define MODEQ 294
|
||||
#define MULEQ 295
|
||||
#define NEQ 296
|
||||
#define OREQ 297
|
||||
#define OROR 298
|
||||
#define POINTER 299
|
||||
#define REGISTER 300
|
||||
#define RETURN 301
|
||||
#define RSHIFT 302
|
||||
#define RSHIFTEQ 303
|
||||
#define SCON 304
|
||||
#define SHORT 305
|
||||
#define SIGNED 306
|
||||
#define SIZEOF 307
|
||||
#define STATIC 308
|
||||
#define STRUCT 309
|
||||
#define SUBEQ 310
|
||||
#define SWITCH 311
|
||||
#define TYPEDEF 312
|
||||
#define UNION 313
|
||||
#define UNSIGNED 314
|
||||
#define VOID 315
|
||||
#define VOLATILE 316
|
||||
#define WHILE 317
|
||||
#define XOREQ 318
|
||||
#define EOI 319
|
||||
|
||||
typedef unsigned int uint;
|
||||
typedef unsigned char uchar;
|
||||
|
||||
#define BSIZE 8192
|
||||
|
||||
#define YYCTYPE uchar
|
||||
#define YYCURSOR cursor
|
||||
#define YYLIMIT s->lim
|
||||
#define YYMARKER s->ptr
|
||||
#define YYFILL(n) {cursor = fill(s, cursor);}
|
||||
|
||||
#define RET(i) {s->cur = cursor; return i;}
|
||||
|
||||
typedef struct Scanner {
|
||||
int fd;
|
||||
uchar *bot, *tok, *ptr, *cur, *pos, *lim, *top, *eof;
|
||||
uint line;
|
||||
} Scanner;
|
||||
|
||||
uchar *fill(Scanner *s, uchar *cursor){
|
||||
if(!s->eof) {
|
||||
uint cnt = s->tok - s->bot;
|
||||
if(cnt){
|
||||
memcpy(s->bot, s->tok, s->lim - s->tok);
|
||||
s->tok = s->bot;
|
||||
s->ptr -= cnt;
|
||||
cursor -= cnt;
|
||||
s->pos -= cnt;
|
||||
s->lim -= cnt;
|
||||
}
|
||||
if((s->top - s->lim) < BSIZE){
|
||||
uchar *buf = (uchar*) malloc(((s->lim - s->bot) + BSIZE)*sizeof(uchar));
|
||||
memcpy(buf, s->tok, s->lim - s->tok);
|
||||
s->tok = buf;
|
||||
s->ptr = &buf[s->ptr - s->bot];
|
||||
cursor = &buf[cursor - s->bot];
|
||||
s->pos = &buf[s->pos - s->bot];
|
||||
s->lim = &buf[s->lim - s->bot];
|
||||
s->top = &s->lim[BSIZE];
|
||||
free(s->bot);
|
||||
s->bot = buf;
|
||||
}
|
||||
if((cnt = read(s->fd, (char*) s->lim, BSIZE)) != BSIZE){
|
||||
s->eof = &s->lim[cnt]; *(s->eof)++ = '\n';
|
||||
}
|
||||
s->lim += cnt;
|
||||
}
|
||||
return cursor;
|
||||
}
|
||||
|
||||
int scan(Scanner *s){
|
||||
uchar *cursor = s->cur;
|
||||
std:
|
||||
s->tok = cursor;
|
||||
/*!re2c
|
||||
any = [\000-\377];
|
||||
O = [0-7];
|
||||
D = [0-9];
|
||||
L = [a-zA-Z_];
|
||||
H = [a-fA-F0-9];
|
||||
E = [Ee] [+-]? D+;
|
||||
FS = [fFlL];
|
||||
IS = [uUlL]*;
|
||||
ESC = [\\] ([abfnrtv?'"\\] | "x" H+ | O+);
|
||||
*/
|
||||
|
||||
/*!re2c
|
||||
"/*" { goto comment; }
|
||||
|
||||
"auto" { RET(AUTO); }
|
||||
"break" { RET(BREAK); }
|
||||
"case" { RET(CASE); }
|
||||
"char" { RET(CHAR); }
|
||||
"const" { RET(CONST); }
|
||||
"continue" { RET(CONTINUE); }
|
||||
"default" { RET(DEFAULT); }
|
||||
"do" { RET(DO); }
|
||||
"double" { RET(DOUBLE); }
|
||||
"else" { RET(ELSE); }
|
||||
"enum" { RET(ENUM); }
|
||||
"extern" { RET(EXTERN); }
|
||||
"float" { RET(FLOAT); }
|
||||
"for" { RET(FOR); }
|
||||
"goto" { RET(GOTO); }
|
||||
"if" { RET(IF); }
|
||||
"int" { RET(INT); }
|
||||
"long" { RET(LONG); }
|
||||
"register" { RET(REGISTER); }
|
||||
"return" { RET(RETURN); }
|
||||
"short" { RET(SHORT); }
|
||||
"signed" { RET(SIGNED); }
|
||||
"sizeof" { RET(SIZEOF); }
|
||||
"static" { RET(STATIC); }
|
||||
"struct" { RET(STRUCT); }
|
||||
"switch" { RET(SWITCH); }
|
||||
"typedef" { RET(TYPEDEF); }
|
||||
"union" { RET(UNION); }
|
||||
"unsigned" { RET(UNSIGNED); }
|
||||
"void" { RET(VOID); }
|
||||
"volatile" { RET(VOLATILE); }
|
||||
"while" { RET(WHILE); }
|
||||
|
||||
L (L|D)* { RET(ID); }
|
||||
|
||||
("0" [xX] H+ IS?) | ("0" D+ IS?) | (D+ IS?) |
|
||||
(['] (ESC|any\[\n\\'])* ['])
|
||||
{ RET(ICON); }
|
||||
|
||||
(D+ E FS?) | (D* "." D+ E? FS?) | (D+ "." D* E? FS?)
|
||||
{ RET(FCON); }
|
||||
|
||||
(["] (ESC|any\[\n\\"])* ["])
|
||||
{ RET(SCON); }
|
||||
|
||||
"..." { RET(ELLIPSIS); }
|
||||
">>=" { RET(RSHIFTEQ); }
|
||||
"<<=" { RET(LSHIFTEQ); }
|
||||
"+=" { RET(ADDEQ); }
|
||||
"-=" { RET(SUBEQ); }
|
||||
"*=" { RET(MULEQ); }
|
||||
"/=" { RET(DIVEQ); }
|
||||
"%=" { RET(MODEQ); }
|
||||
"&=" { RET(ANDEQ); }
|
||||
"^=" { RET(XOREQ); }
|
||||
"|=" { RET(OREQ); }
|
||||
">>" { RET(RSHIFT); }
|
||||
"<<" { RET(LSHIFT); }
|
||||
"++" { RET(INCR); }
|
||||
"--" { RET(DECR); }
|
||||
"->" { RET(DEREF); }
|
||||
"&&" { RET(ANDAND); }
|
||||
"||" { RET(OROR); }
|
||||
"<=" { RET(LEQ); }
|
||||
">=" { RET(GEQ); }
|
||||
"==" { RET(EQL); }
|
||||
"!=" { RET(NEQ); }
|
||||
";" { RET(';'); }
|
||||
"{" { RET('{'); }
|
||||
"}" { RET('}'); }
|
||||
"," { RET(','); }
|
||||
":" { RET(':'); }
|
||||
"=" { RET('='); }
|
||||
"(" { RET('('); }
|
||||
")" { RET(')'); }
|
||||
"[" { RET('['); }
|
||||
"]" { RET(']'); }
|
||||
"." { RET('.'); }
|
||||
"&" { RET('&'); }
|
||||
"!" { RET('!'); }
|
||||
"~" { RET('~'); }
|
||||
"-" { RET('-'); }
|
||||
"+" { RET('+'); }
|
||||
"*" { RET('*'); }
|
||||
"/" { RET('/'); }
|
||||
"%" { RET('%'); }
|
||||
"<" { RET('<'); }
|
||||
">" { RET('>'); }
|
||||
"^" { RET('^'); }
|
||||
"|" { RET('|'); }
|
||||
"?" { RET('?'); }
|
||||
|
||||
|
||||
[ \t\v\f]+ { goto std; }
|
||||
|
||||
"\n"
|
||||
{
|
||||
if(cursor == s->eof) RET(EOI);
|
||||
s->pos = cursor; s->line++;
|
||||
goto std;
|
||||
}
|
||||
|
||||
any
|
||||
{
|
||||
printf("unexpected character: %c\n", *s->tok);
|
||||
goto std;
|
||||
}
|
||||
*/
|
||||
|
||||
comment:
|
||||
/*!re2c
|
||||
"*/" { goto std; }
|
||||
"\n"
|
||||
{
|
||||
if(cursor == s->eof) RET(EOI);
|
||||
s->tok = s->pos = cursor; s->line++;
|
||||
goto comment;
|
||||
}
|
||||
any { goto comment; }
|
||||
*/
|
||||
}
|
||||
|
||||
main(){
|
||||
Scanner in;
|
||||
int t;
|
||||
memset((char*) &in, 0, sizeof(in));
|
||||
in.fd = 0;
|
||||
while((t = scan(&in)) != EOI){
|
||||
/*
|
||||
printf("%d\t%.*s\n", t, in.cur - in.tok, in.tok);
|
||||
printf("%d\n", t);
|
||||
*/
|
||||
}
|
||||
close(in.fd);
|
||||
}
|
203
tools/re2c/examples/langs/modula.re
Normal file
203
tools/re2c/examples/langs/modula.re
Normal file
|
@ -0,0 +1,203 @@
|
|||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
|
||||
typedef unsigned int uint;
|
||||
typedef unsigned char uchar;
|
||||
|
||||
#define BSIZE 8192
|
||||
|
||||
#define YYCTYPE uchar
|
||||
#define YYCURSOR cursor
|
||||
#define YYLIMIT s->lim
|
||||
#define YYMARKER s->ptr
|
||||
#define YYCTXMARKER s->ctx
|
||||
#define YYFILL {cursor = fill(s, cursor);}
|
||||
|
||||
#define RETURN(i) {s->cur = cursor; return i;}
|
||||
|
||||
typedef struct Scanner {
|
||||
int fd;
|
||||
uchar *bot, *tok, *ptr, *ctx, *cur, *pos, *lim, *top, *eof;
|
||||
uint line;
|
||||
} Scanner;
|
||||
|
||||
uchar *fill(Scanner *s, uchar *cursor){
|
||||
if(!s->eof){
|
||||
uint cnt = s->tok - s->bot;
|
||||
if(cnt){
|
||||
memcpy(s->bot, s->tok, s->lim - s->tok);
|
||||
s->tok = s->bot;
|
||||
s->ptr -= cnt;
|
||||
cursor -= cnt;
|
||||
s->pos -= cnt;
|
||||
s->lim -= cnt;
|
||||
}
|
||||
if((s->top - s->lim) < BSIZE){
|
||||
uchar *buf = (uchar*) malloc(((s->lim - s->bot) + BSIZE)*sizeof(uchar));
|
||||
memcpy(buf, s->tok, s->lim - s->tok);
|
||||
s->tok = buf;
|
||||
s->ptr = &buf[s->ptr - s->bot];
|
||||
cursor = &buf[cursor - s->bot];
|
||||
s->pos = &buf[s->pos - s->bot];
|
||||
s->lim = &buf[s->lim - s->bot];
|
||||
s->top = &s->lim[BSIZE];
|
||||
free(s->bot);
|
||||
s->bot = buf;
|
||||
}
|
||||
if((cnt = read(s->fd, (char*) s->lim, BSIZE)) != BSIZE){
|
||||
s->eof = &s->lim[cnt]; *(s->eof)++ = '\n';
|
||||
}
|
||||
s->lim += cnt;
|
||||
}
|
||||
return cursor;
|
||||
}
|
||||
|
||||
int scan(Scanner *s){
|
||||
uchar *cursor = s->cur;
|
||||
uint depth;
|
||||
std:
|
||||
s->tok = cursor;
|
||||
/*!re2c
|
||||
any = [\000-\377];
|
||||
digit = [0-9];
|
||||
letter = [a-zA-Z];
|
||||
*/
|
||||
|
||||
/*!re2c
|
||||
"(*" { depth = 1; goto comment; }
|
||||
|
||||
digit + {RETURN(1);}
|
||||
digit + / ".." {RETURN(1);}
|
||||
[0-7] + "B" {RETURN(2);}
|
||||
[0-7] + "C" {RETURN(3);}
|
||||
digit [0-9A-F] * "H" {RETURN(4);}
|
||||
digit + "." digit * ("E" ([+-]) ? digit +) ? {RETURN(5);}
|
||||
['] (any\[\n']) * ['] | ["] (any\[\n"]) * ["] {RETURN(6);}
|
||||
|
||||
"#" {RETURN(7);}
|
||||
"&" {RETURN(8);}
|
||||
"(" {RETURN(9);}
|
||||
")" {RETURN(10);}
|
||||
"*" {RETURN(11);}
|
||||
"+" {RETURN(12);}
|
||||
"," {RETURN(13);}
|
||||
"-" {RETURN(14);}
|
||||
"." {RETURN(15);}
|
||||
".." {RETURN(16);}
|
||||
"/" {RETURN(17);}
|
||||
":" {RETURN(18);}
|
||||
":=" {RETURN(19);}
|
||||
";" {RETURN(20);}
|
||||
"<" {RETURN(21);}
|
||||
"<=" {RETURN(22);}
|
||||
"<>" {RETURN(23);}
|
||||
"=" {RETURN(24);}
|
||||
">" {RETURN(25);}
|
||||
">=" {RETURN(26);}
|
||||
"[" {RETURN(27);}
|
||||
"]" {RETURN(28);}
|
||||
"^" {RETURN(29);}
|
||||
"{" {RETURN(30);}
|
||||
"|" {RETURN(31);}
|
||||
"}" {RETURN(32);}
|
||||
"~" {RETURN(33);}
|
||||
|
||||
"AND" {RETURN(34);}
|
||||
"ARRAY" {RETURN(35);}
|
||||
"BEGIN" {RETURN(36);}
|
||||
"BY" {RETURN(37);}
|
||||
"CASE" {RETURN(38);}
|
||||
"CONST" {RETURN(39);}
|
||||
"DEFINITION" {RETURN(40);}
|
||||
"DIV" {RETURN(41);}
|
||||
"DO" {RETURN(42);}
|
||||
"ELSE" {RETURN(43);}
|
||||
"ELSIF" {RETURN(44);}
|
||||
"END" {RETURN(45);}
|
||||
"EXIT" {RETURN(46);}
|
||||
"EXPORT" {RETURN(47);}
|
||||
"FOR" {RETURN(48);}
|
||||
"FROM" {RETURN(49);}
|
||||
"IF" {RETURN(50);}
|
||||
"IMPLEMENTATION" {RETURN(51);}
|
||||
"IMPORT" {RETURN(52);}
|
||||
"IN" {RETURN(53);}
|
||||
"LOOP" {RETURN(54);}
|
||||
"MOD" {RETURN(55);}
|
||||
"MODULE" {RETURN(56);}
|
||||
"NOT" {RETURN(57);}
|
||||
"OF" {RETURN(58);}
|
||||
"OR" {RETURN(59);}
|
||||
"POINTER" {RETURN(60);}
|
||||
"PROCEDURE" {RETURN(61);}
|
||||
"QUALIFIED" {RETURN(62);}
|
||||
"RECORD" {RETURN(63);}
|
||||
"REPEAT" {RETURN(64);}
|
||||
"RETURN" {RETURN(65);}
|
||||
"SET" {RETURN(66);}
|
||||
"THEN" {RETURN(67);}
|
||||
"TO" {RETURN(68);}
|
||||
"TYPE" {RETURN(69);}
|
||||
"UNTIL" {RETURN(70);}
|
||||
"VAR" {RETURN(71);}
|
||||
"WHILE" {RETURN(72);}
|
||||
"WITH" {RETURN(73);}
|
||||
|
||||
letter (letter | digit) * {RETURN(74);}
|
||||
|
||||
[ \t]+ { goto std; }
|
||||
|
||||
"\n"
|
||||
{
|
||||
if(cursor == s->eof) RETURN(0);
|
||||
s->pos = cursor; s->line++;
|
||||
goto std;
|
||||
}
|
||||
|
||||
any
|
||||
{
|
||||
printf("unexpected character: %c\n", *s->tok);
|
||||
goto std;
|
||||
}
|
||||
*/
|
||||
comment:
|
||||
/*!re2c
|
||||
"*)"
|
||||
{
|
||||
if(--depth == 0)
|
||||
goto std;
|
||||
else
|
||||
goto comment;
|
||||
}
|
||||
"(*" { ++depth; goto comment; }
|
||||
"\n"
|
||||
{
|
||||
if(cursor == s->eof) RETURN(0);
|
||||
s->tok = s->pos = cursor; s->line++;
|
||||
goto comment;
|
||||
}
|
||||
any { goto comment; }
|
||||
*/
|
||||
}
|
||||
|
||||
/*
|
||||
void putStr(FILE *o, char *s, uint l){
|
||||
while(l-- > 0)
|
||||
putc(*s++, o);
|
||||
}
|
||||
*/
|
||||
|
||||
main(){
|
||||
Scanner in;
|
||||
memset((char*) &in, 0, sizeof(in));
|
||||
in.fd = 0;
|
||||
while(scan(&in)){
|
||||
/*
|
||||
putc('<', stdout);
|
||||
putStr(stdout, (char*) in.tok, in.cur - in.tok);
|
||||
putc('>', stdout);
|
||||
putc('\n', stdout);
|
||||
*/
|
||||
}
|
||||
}
|
319
tools/re2c/examples/langs/rexx.re
Normal file
319
tools/re2c/examples/langs/rexx.re
Normal file
|
@ -0,0 +1,319 @@
|
|||
#include "scanio.h"
|
||||
#include "scanner.h"
|
||||
|
||||
#define CURSOR ch
|
||||
#define LOADCURSOR ch = *cursor;
|
||||
#define ADVANCE cursor++;
|
||||
#define BACK(n) cursor -= (n);
|
||||
#define CHECK(n) if((ScanCB.lim - cursor) < (n)){cursor = ScanFill(cursor);}
|
||||
#define MARK(n) ScanCB.ptr = cursor; sel = (n);
|
||||
#define REVERT cursor = ScanCB.ptr;
|
||||
#define MARKER sel
|
||||
|
||||
#define RETURN(i) {ScanCB.cur = cursor; return i;}
|
||||
|
||||
int ScanToken(){
|
||||
uchar *cursor = ScanCB.cur;
|
||||
unsigned sel;
|
||||
uchar ch;
|
||||
ScanCB.tok = cursor;
|
||||
ScanCB.eot = NULL;
|
||||
/*!re2c
|
||||
all = [\000-\377];
|
||||
eof = [\000];
|
||||
any = all\eof;
|
||||
letter = [a-z]|[A-Z];
|
||||
digit = [0-9];
|
||||
symchr = letter|digit|[.!?_];
|
||||
const = (digit|[.])symchr*([eE][+-]?digit+)?;
|
||||
simple = (symchr\(digit|[.]))(symchr\[.])*;
|
||||
stem = simple [.];
|
||||
symbol = symchr*;
|
||||
sqstr = ['] ((any\['\n])|(['][']))* ['];
|
||||
dqstr = ["] ((any\["\n])|(["]["]))* ["];
|
||||
str = sqstr|dqstr;
|
||||
ob = [ \t]*;
|
||||
not = [\\~];
|
||||
A = [aA];
|
||||
B = [bB];
|
||||
C = [cC];
|
||||
D = [dD];
|
||||
E = [eE];
|
||||
F = [fF];
|
||||
G = [gG];
|
||||
H = [hH];
|
||||
I = [iI];
|
||||
J = [jJ];
|
||||
K = [kK];
|
||||
L = [lL];
|
||||
M = [mM];
|
||||
N = [nN];
|
||||
O = [oO];
|
||||
P = [pP];
|
||||
Q = [qQ];
|
||||
R = [rR];
|
||||
S = [sS];
|
||||
T = [tT];
|
||||
U = [uU];
|
||||
V = [vV];
|
||||
W = [wW];
|
||||
X = [xX];
|
||||
Y = [yY];
|
||||
Z = [zZ];
|
||||
*/
|
||||
|
||||
scan:
|
||||
/*!re2c
|
||||
"\n"
|
||||
{
|
||||
++(ScanCB.lineNum);
|
||||
ScanCB.linePos = ScanCB.pos + (cursor - ScanCB.mrk);
|
||||
RETURN(SU_EOL);
|
||||
}
|
||||
"|" ob "|"
|
||||
{ RETURN(OP_CONCAT); }
|
||||
"+"
|
||||
{ RETURN(OP_PLUS); }
|
||||
"-"
|
||||
{ RETURN(OP_MINUS); }
|
||||
"*"
|
||||
{ RETURN(OP_MULT); }
|
||||
"/"
|
||||
{ RETURN(OP_DIV); }
|
||||
"%"
|
||||
{ RETURN(OP_IDIV); }
|
||||
"/" ob "/"
|
||||
{ RETURN(OP_REMAIN); }
|
||||
"*" ob "*"
|
||||
{ RETURN(OP_POWER); }
|
||||
"="
|
||||
{ RETURN(OP_EQUAL); }
|
||||
not ob "=" | "<" ob ">" | ">" ob "<"
|
||||
{ RETURN(OP_EQUAL_N); }
|
||||
">"
|
||||
{ RETURN(OP_GT); }
|
||||
"<"
|
||||
{ RETURN(OP_LT); }
|
||||
">" ob "=" | not ob "<"
|
||||
{ RETURN(OP_GE); }
|
||||
"<" ob "=" | not ob ">"
|
||||
{ RETURN(OP_LE); }
|
||||
"=" ob "="
|
||||
{ RETURN(OP_EQUAL_EQ); }
|
||||
not ob "=" ob "="
|
||||
{ RETURN(OP_EQUAL_EQ_N); }
|
||||
">" ob ">"
|
||||
{ RETURN(OP_GT_STRICT); }
|
||||
"<" ob "<"
|
||||
{ RETURN(OP_LT_STRICT); }
|
||||
">" ob ">" ob "=" | not ob "<" ob "<"
|
||||
{ RETURN(OP_GE_STRICT); }
|
||||
"<" ob "<" ob "=" | not ob ">" ob ">"
|
||||
{ RETURN(OP_LE_STRICT); }
|
||||
"&"
|
||||
{ RETURN(OP_AND); }
|
||||
"|"
|
||||
{ RETURN(OP_OR); }
|
||||
"&" ob "&"
|
||||
{ RETURN(OP_XOR); }
|
||||
not
|
||||
{ RETURN(OP_NOT); }
|
||||
|
||||
":"
|
||||
{ RETURN(SU_COLON); }
|
||||
","
|
||||
{ RETURN(SU_COMMA); }
|
||||
"("
|
||||
{ RETURN(SU_POPEN); }
|
||||
")"
|
||||
{ RETURN(SU_PCLOSE); }
|
||||
";"
|
||||
{ RETURN(SU_EOC); }
|
||||
|
||||
A D D R E S S
|
||||
{ RETURN(RX_ADDRESS); }
|
||||
A R G
|
||||
{ RETURN(RX_ARG); }
|
||||
C A L L
|
||||
{ RETURN(RX_CALL); }
|
||||
D O
|
||||
{ RETURN(RX_DO); }
|
||||
D R O P
|
||||
{ RETURN(RX_DROP); }
|
||||
E L S E
|
||||
{ RETURN(RX_ELSE); }
|
||||
E N D
|
||||
{ RETURN(RX_END); }
|
||||
E X I T
|
||||
{ RETURN(RX_EXIT); }
|
||||
I F
|
||||
{ RETURN(RX_IF); }
|
||||
I N T E R P R E T
|
||||
{ RETURN(RX_INTERPRET); }
|
||||
I T E R A T E
|
||||
{ RETURN(RX_ITERATE); }
|
||||
L E A V E
|
||||
{ RETURN(RX_LEAVE); }
|
||||
N O P
|
||||
{ RETURN(RX_NOP); }
|
||||
N U M E R I C
|
||||
{ RETURN(RX_NUMERIC); }
|
||||
O P T I O N S
|
||||
{ RETURN(RX_OPTIONS); }
|
||||
O T H E R W I S E
|
||||
{ RETURN(RX_OTHERWISE); }
|
||||
P A R S E
|
||||
{ RETURN(RX_PARSE); }
|
||||
P R O C E D U R E
|
||||
{ RETURN(RX_PROCEDURE); }
|
||||
P U L L
|
||||
{ RETURN(RX_PULL); }
|
||||
P U S H
|
||||
{ RETURN(RX_PUSH); }
|
||||
Q U E U E
|
||||
{ RETURN(RX_QUEUE); }
|
||||
R E T U R N
|
||||
{ RETURN(RX_RETURN); }
|
||||
S A Y
|
||||
{ RETURN(RX_SAY); }
|
||||
S E L E C T
|
||||
{ RETURN(RX_SELECT); }
|
||||
S I G N A L
|
||||
{ RETURN(RX_SIGNAL); }
|
||||
T H E N
|
||||
{ RETURN(RX_THEN); }
|
||||
T R A C E
|
||||
{ RETURN(RX_TRACE); }
|
||||
W H E N
|
||||
{ RETURN(RX_WHEN); }
|
||||
O F F
|
||||
{ RETURN(RXS_OFF); }
|
||||
O N
|
||||
{ RETURN(RXS_ON); }
|
||||
B Y
|
||||
{ RETURN(RXS_BY); }
|
||||
D I G I T S
|
||||
{ RETURN(RXS_DIGITS); }
|
||||
E N G I N E E R I N G
|
||||
{ RETURN(RXS_ENGINEERING); }
|
||||
E R R O R
|
||||
{ RETURN(RXS_ERROR); }
|
||||
E X P O S E
|
||||
{ RETURN(RXS_EXPOSE); }
|
||||
F A I L U R E
|
||||
{ RETURN(RXS_FAILURE); }
|
||||
F O R
|
||||
{ RETURN(RXS_FOR); }
|
||||
F O R E V E R
|
||||
{ RETURN(RXS_FOREVER); }
|
||||
F O R M
|
||||
{ RETURN(RXS_FORM); }
|
||||
F U Z Z
|
||||
{ RETURN(RXS_FUZZ); }
|
||||
H A L T
|
||||
{ RETURN(RXS_HALT); }
|
||||
L I N E I N
|
||||
{ RETURN(RXS_LINEIN); }
|
||||
N A M E
|
||||
{ RETURN(RXS_NAME); }
|
||||
N O T R E A D Y
|
||||
{ RETURN(RXS_NOTREADY); }
|
||||
N O V A L U E
|
||||
{ RETURN(RXS_NOVALUE); }
|
||||
S C I E N T I F I C
|
||||
{ RETURN(RXS_SCIENTIFIC); }
|
||||
S O U R C E
|
||||
{ RETURN(RXS_SOURCE); }
|
||||
S Y N T A X
|
||||
{ RETURN(RXS_SYNTAX); }
|
||||
T O
|
||||
{ RETURN(RXS_TO); }
|
||||
U N T I L
|
||||
{ RETURN(RXS_UNTIL); }
|
||||
U P P E R
|
||||
{ RETURN(RXS_UPPER); }
|
||||
V A L U E
|
||||
{ RETURN(RXS_VALUE); }
|
||||
V A R
|
||||
{ RETURN(RXS_VAR); }
|
||||
V E R S I O N
|
||||
{ RETURN(RXS_VERSION); }
|
||||
W H I L E
|
||||
{ RETURN(RXS_WHILE); }
|
||||
W I T H
|
||||
{ RETURN(RXS_WITH); }
|
||||
|
||||
const
|
||||
{ RETURN(SU_CONST); }
|
||||
simple
|
||||
{ RETURN(SU_SYMBOL); }
|
||||
stem
|
||||
{ RETURN(SU_SYMBOL_STEM); }
|
||||
symbol
|
||||
{ RETURN(SU_SYMBOL_COMPOUND); }
|
||||
str
|
||||
{ RETURN(SU_LITERAL); }
|
||||
str [bB] / (all\symchr)
|
||||
{ RETURN(SU_LITERAL_BIN); }
|
||||
str [xX] / (all\symchr)
|
||||
{ RETURN(SU_LITERAL_HEX); }
|
||||
|
||||
eof
|
||||
{ RETURN(SU_EOF); }
|
||||
any
|
||||
{ RETURN(SU_ERROR); }
|
||||
*/
|
||||
}
|
||||
|
||||
bool StripToken(){
|
||||
uchar *cursor = ScanCB.cur;
|
||||
unsigned depth;
|
||||
uchar ch;
|
||||
bool blanks = FALSE;
|
||||
ScanCB.eot = cursor;
|
||||
strip:
|
||||
/*!re2c
|
||||
"/*"
|
||||
{
|
||||
depth = 1;
|
||||
goto comment;
|
||||
}
|
||||
"\r"
|
||||
{ goto strip; }
|
||||
[ \t]
|
||||
{
|
||||
blanks = TRUE;
|
||||
goto strip;
|
||||
}
|
||||
[] / all
|
||||
{ RETURN(blanks); }
|
||||
*/
|
||||
|
||||
comment:
|
||||
/*!re2c
|
||||
"*/"
|
||||
{
|
||||
if(--depth == 0)
|
||||
goto strip;
|
||||
else
|
||||
goto comment;
|
||||
}
|
||||
"\n"
|
||||
{
|
||||
++(ScanCB.lineNum);
|
||||
ScanCB.linePos = ScanCB.pos + (cursor - ScanCB.mrk);
|
||||
goto comment;
|
||||
}
|
||||
"/*"
|
||||
{
|
||||
++depth;
|
||||
goto comment;
|
||||
}
|
||||
eof
|
||||
{ RETURN(blanks); }
|
||||
any
|
||||
{
|
||||
goto comment;
|
||||
}
|
||||
*/
|
||||
}
|
340
tools/re2c/examples/push_model/push.re
Normal file
340
tools/re2c/examples/push_model/push.re
Normal file
|
@ -0,0 +1,340 @@
|
|||
/*
|
||||
* A push-model scanner example for re2c -f
|
||||
* Written Mon Apr 11 2005 by mgix@mgix.com
|
||||
* This file is in the public domain.
|
||||
*
|
||||
*/
|
||||
|
||||
// ----------------------------------------------------------------------
|
||||
|
||||
#include <fcntl.h>
|
||||
#include <stdio.h>
|
||||
#include <stddef.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
#if defined(WIN32)
|
||||
|
||||
typedef signed char int8_t;
|
||||
typedef signed short int16_t;
|
||||
typedef signed int int32_t;
|
||||
|
||||
typedef unsigned char uint8_t;
|
||||
typedef unsigned short uint16_t;
|
||||
typedef unsigned int uint32_t;
|
||||
|
||||
#else
|
||||
|
||||
#include <stdint.h>
|
||||
#include <unistd.h>
|
||||
|
||||
#ifndef O_BINARY
|
||||
#define O_BINARY 0
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
// ----------------------------------------------------------------------
|
||||
#define TOKENS \
|
||||
\
|
||||
TOK(kEOF) \
|
||||
TOK(kEOL) \
|
||||
TOK(kUnknown) \
|
||||
TOK(kIdentifier) \
|
||||
TOK(kDecimalConstant) \
|
||||
\
|
||||
TOK(kEqual) \
|
||||
TOK(kLeftParen) \
|
||||
TOK(kRightParen) \
|
||||
TOK(kMinus) \
|
||||
TOK(kPlus) \
|
||||
TOK(kStar) \
|
||||
TOK(kSlash) \
|
||||
\
|
||||
TOK(kIf) \
|
||||
TOK(kFor) \
|
||||
TOK(kElse) \
|
||||
TOK(kGoto) \
|
||||
TOK(kBreak) \
|
||||
TOK(kWhile) \
|
||||
TOK(kReturn) \
|
||||
|
||||
|
||||
// ----------------------------------------------------------------------
|
||||
static const char *tokenNames[] =
|
||||
{
|
||||
#define TOK(x) #x,
|
||||
TOKENS
|
||||
#undef TOK
|
||||
};
|
||||
|
||||
// ----------------------------------------------------------------------
|
||||
class PushScanner
|
||||
{
|
||||
public:
|
||||
|
||||
enum Token
|
||||
{
|
||||
#define TOK(x) x,
|
||||
TOKENS
|
||||
#undef TOK
|
||||
};
|
||||
|
||||
private:
|
||||
|
||||
bool eof;
|
||||
int32_t state;
|
||||
|
||||
uint8_t *limit;
|
||||
uint8_t *start;
|
||||
uint8_t *cursor;
|
||||
uint8_t *marker;
|
||||
|
||||
uint8_t *buffer;
|
||||
uint8_t *bufferEnd;
|
||||
|
||||
uint8_t yych;
|
||||
uint32_t yyaccept;
|
||||
|
||||
public:
|
||||
|
||||
// ----------------------------------------------------------------------
|
||||
PushScanner()
|
||||
{
|
||||
limit = 0;
|
||||
start = 0;
|
||||
state = -1;
|
||||
cursor = 0;
|
||||
marker = 0;
|
||||
buffer = 0;
|
||||
eof = false;
|
||||
bufferEnd = 0;
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------------
|
||||
~PushScanner()
|
||||
{
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------------
|
||||
void send(
|
||||
Token token
|
||||
)
|
||||
{
|
||||
size_t tokenSize = cursor-start;
|
||||
const char *tokenName = tokenNames[token];
|
||||
printf(
|
||||
"scanner is pushing out a token of type %d (%s)",
|
||||
token,
|
||||
tokenName
|
||||
);
|
||||
|
||||
if(token==kEOF) putchar('\n');
|
||||
else
|
||||
{
|
||||
size_t tokenNameSize = strlen(tokenNames[token]);
|
||||
size_t padSize = 20-(20<tokenNameSize ? 20 : tokenNameSize);
|
||||
for(size_t i=0; i<padSize; ++i) putchar(' ');
|
||||
printf(" : ---->");
|
||||
|
||||
fwrite(
|
||||
start,
|
||||
tokenSize,
|
||||
1,
|
||||
stdout
|
||||
);
|
||||
|
||||
printf("<----\n");
|
||||
}
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------------
|
||||
uint32_t push(
|
||||
const void *input,
|
||||
ssize_t inputSize
|
||||
)
|
||||
{
|
||||
printf(
|
||||
"scanner is receiving a new data batch of length %d\n"
|
||||
"scanner continues with saved state = %d\n",
|
||||
inputSize,
|
||||
state
|
||||
);
|
||||
|
||||
/*
|
||||
* Data source is signaling end of file when batch size
|
||||
* is less than maxFill. This is slightly annoying because
|
||||
* maxFill is a value that can only be known after re2c does
|
||||
* its thing. Practically though, maxFill is never bigger than
|
||||
* the longest keyword, so given our grammar, 32 is a safe bet.
|
||||
*/
|
||||
uint8_t null[64];
|
||||
const ssize_t maxFill = 32;
|
||||
if(inputSize<maxFill)
|
||||
{
|
||||
eof = true;
|
||||
input = null;
|
||||
inputSize = sizeof(null);
|
||||
memset(null, 0, sizeof(null));
|
||||
}
|
||||
|
||||
/*
|
||||
* When we get here, we have a partially
|
||||
* consumed buffer which is in the following state:
|
||||
* last valid char last valid buffer spot
|
||||
* v v
|
||||
* +-------------------+-------------+---------------+-------------+----------------------+
|
||||
* ^ ^ ^ ^ ^ ^
|
||||
* buffer start marker cursor limit bufferEnd
|
||||
*
|
||||
* We need to stretch the buffer and concatenate the new chunk of input to it
|
||||
*
|
||||
*/
|
||||
size_t used = limit-buffer;
|
||||
size_t needed = used+inputSize;
|
||||
size_t allocated = bufferEnd-buffer;
|
||||
if(allocated<needed)
|
||||
{
|
||||
size_t limitOffset = limit-buffer;
|
||||
size_t startOffset = start-buffer;
|
||||
size_t markerOffset = marker-buffer;
|
||||
size_t cursorOffset = cursor-buffer;
|
||||
|
||||
buffer = (uint8_t*)realloc(buffer, needed);
|
||||
bufferEnd = needed+buffer;
|
||||
|
||||
marker = markerOffset + buffer;
|
||||
cursor = cursorOffset + buffer;
|
||||
start = buffer + startOffset;
|
||||
limit = limitOffset + buffer;
|
||||
}
|
||||
memcpy(limit, input, inputSize);
|
||||
limit += inputSize;
|
||||
|
||||
// The scanner starts here
|
||||
#define YYLIMIT limit
|
||||
#define YYCURSOR cursor
|
||||
#define YYMARKER marker
|
||||
#define YYCTYPE uint8_t
|
||||
|
||||
#define SKIP(x) { start = cursor; goto yy0; }
|
||||
#define SEND(x) { send(x); SKIP(); }
|
||||
#define YYFILL(n) { goto fill; }
|
||||
|
||||
#define YYGETSTATE() state
|
||||
#define YYSETSTATE(x) { state = (x); }
|
||||
|
||||
start:
|
||||
|
||||
/*!re2c
|
||||
re2c:startlabel = 1;
|
||||
eol = "\n";
|
||||
eof = "\000";
|
||||
digit = [0-9];
|
||||
integer = digit+;
|
||||
alpha = [A-Za-z_];
|
||||
any = [\000-\377];
|
||||
space = [ \h\t\v\f\r];
|
||||
|
||||
"if" { SEND(kIf); }
|
||||
"for" { SEND(kFor); }
|
||||
"else" { SEND(kElse); }
|
||||
"goto" { SEND(kGoto); }
|
||||
"break" { SEND(kBreak); }
|
||||
"while" { SEND(kWhile); }
|
||||
"return" { SEND(kReturn); }
|
||||
alpha (alpha|digit)* { SEND(kIdentifier); }
|
||||
integer { SEND(kDecimalConstant);}
|
||||
|
||||
"=" { SEND(kEqual); }
|
||||
"(" { SEND(kLeftParen); }
|
||||
")" { SEND(kRightParen); }
|
||||
"-" { SEND(kMinus); }
|
||||
"+" { SEND(kPlus); }
|
||||
"*" { SEND(kStar); }
|
||||
"/" { SEND(kSlash); }
|
||||
|
||||
eol { SKIP(); }
|
||||
space { SKIP(); }
|
||||
eof { send(kEOF); return 1; }
|
||||
any { SEND(kUnknown); }
|
||||
*/
|
||||
|
||||
fill:
|
||||
ssize_t unfinishedSize = cursor-start;
|
||||
printf(
|
||||
"scanner needs a refill. Exiting for now with:\n"
|
||||
" saved fill state = %d\n"
|
||||
" unfinished token size = %d\n",
|
||||
state,
|
||||
unfinishedSize
|
||||
);
|
||||
|
||||
if(0<unfinishedSize && start<limit)
|
||||
{
|
||||
printf(" unfinished token is :");
|
||||
fwrite(start, 1, cursor-start, stdout);
|
||||
putchar('\n');
|
||||
}
|
||||
putchar('\n');
|
||||
|
||||
/*
|
||||
* Once we get here, we can get rid of
|
||||
* everything before start and after limit.
|
||||
*/
|
||||
if(eof==true) goto start;
|
||||
if(buffer<start)
|
||||
{
|
||||
size_t startOffset = start-buffer;
|
||||
memmove(buffer, start, limit-start);
|
||||
marker -= startOffset;
|
||||
cursor -= startOffset;
|
||||
limit -= startOffset;
|
||||
start -= startOffset;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
};
|
||||
|
||||
// ----------------------------------------------------------------------
|
||||
int main(
|
||||
int argc,
|
||||
char **argv
|
||||
)
|
||||
{
|
||||
// Parse cmd line
|
||||
int input = 0;
|
||||
if(1<argc)
|
||||
{
|
||||
input = open(argv[1], O_RDONLY | O_BINARY);
|
||||
if(input<0)
|
||||
{
|
||||
fprintf(
|
||||
stderr,
|
||||
"could not open file %s\n",
|
||||
argv[1]
|
||||
);
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Tokenize input file by pushing batches
|
||||
* of data one by one into the scanner.
|
||||
*/
|
||||
const size_t batchSize = 256;
|
||||
uint8_t buffer[batchSize];
|
||||
PushScanner scanner;
|
||||
while(1)
|
||||
{
|
||||
ssize_t n = read(input, buffer, batchSize);
|
||||
scanner.push(buffer, n);
|
||||
if(n<batchSize) break;
|
||||
}
|
||||
scanner.push(0, -1);
|
||||
close(input);
|
||||
|
||||
// Done
|
||||
return 0;
|
||||
}
|
||||
|
168
tools/re2c/src/codegen/bitmap.cc
Normal file
168
tools/re2c/src/codegen/bitmap.cc
Normal file
|
@ -0,0 +1,168 @@
|
|||
#include <algorithm> // min
|
||||
#include <string.h> // memset
|
||||
|
||||
#include "src/codegen/bitmap.h"
|
||||
#include "src/codegen/go.h"
|
||||
#include "src/codegen/output.h"
|
||||
#include "src/conf/opt.h"
|
||||
#include "src/globals.h"
|
||||
|
||||
namespace re2c
|
||||
{
|
||||
|
||||
BitMap *BitMap::first = NULL;
|
||||
|
||||
BitMap::BitMap(const Go *g, const State *x)
|
||||
: go(g)
|
||||
, on(x)
|
||||
, next(first)
|
||||
, i(0)
|
||||
, m(0)
|
||||
{
|
||||
first = this;
|
||||
}
|
||||
|
||||
BitMap::~BitMap()
|
||||
{
|
||||
delete next;
|
||||
}
|
||||
|
||||
const BitMap *BitMap::find(const Go *g, const State *x)
|
||||
{
|
||||
for (const BitMap *b = first; b; b = b->next)
|
||||
{
|
||||
if (matches(b->go->span, b->go->nSpans, b->on, g->span, g->nSpans, x))
|
||||
{
|
||||
return b;
|
||||
}
|
||||
}
|
||||
|
||||
return new BitMap(g, x);
|
||||
}
|
||||
|
||||
const BitMap *BitMap::find(const State *x)
|
||||
{
|
||||
for (const BitMap *b = first; b; b = b->next)
|
||||
{
|
||||
if (b->on == x)
|
||||
{
|
||||
return b;
|
||||
}
|
||||
}
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static void doGen(const Go *g, const State *s, uint32_t *bm, uint32_t f, uint32_t m)
|
||||
{
|
||||
Span *b = g->span, *e = &b[g->nSpans];
|
||||
uint32_t lb = 0;
|
||||
|
||||
for (; b < e; ++b)
|
||||
{
|
||||
if (b->to == s)
|
||||
{
|
||||
for (; lb < b->ub && lb < 256; ++lb)
|
||||
{
|
||||
bm[lb-f] |= m;
|
||||
}
|
||||
}
|
||||
|
||||
lb = b->ub;
|
||||
}
|
||||
}
|
||||
|
||||
void BitMap::gen(OutputFile & o, uint32_t ind, uint32_t lb, uint32_t ub)
|
||||
{
|
||||
if (first && bUsedYYBitmap)
|
||||
{
|
||||
o.wind(ind).ws("static const unsigned char ").wstring(opts->yybm).ws("[] = {");
|
||||
|
||||
uint32_t c = 1, n = ub - lb;
|
||||
const BitMap *cb = first;
|
||||
|
||||
while((cb = cb->next) != NULL) {
|
||||
++c;
|
||||
}
|
||||
BitMap *b = first;
|
||||
|
||||
uint32_t *bm = new uint32_t[n];
|
||||
|
||||
for (uint32_t i = 0, t = 1; b; i += n, t += 8)
|
||||
{
|
||||
memset(bm, 0, n * sizeof(uint32_t));
|
||||
|
||||
for (uint32_t m = 0x80; b && m; m >>= 1)
|
||||
{
|
||||
b->i = i;
|
||||
b->m = m;
|
||||
doGen(b->go, b->on, bm, lb, m);
|
||||
b = const_cast<BitMap*>(b->next);
|
||||
}
|
||||
|
||||
if (c > 8)
|
||||
{
|
||||
o.ws("\n").wind(ind+1).ws("/* table ").wu32(t).ws(" .. ").wu32(std::min(c, t+7)).ws(": ").wu32(i).ws(" */");
|
||||
}
|
||||
|
||||
for (uint32_t j = 0; j < n; ++j)
|
||||
{
|
||||
if (j % 8 == 0)
|
||||
{
|
||||
o.ws("\n").wind(ind+1);
|
||||
}
|
||||
|
||||
if (opts->yybmHexTable)
|
||||
{
|
||||
o.wu32_hex(bm[j]);
|
||||
}
|
||||
else
|
||||
{
|
||||
o.wu32_width(bm[j], 3);
|
||||
}
|
||||
o.ws(", ");
|
||||
}
|
||||
}
|
||||
|
||||
o.ws("\n").wind(ind).ws("};\n");
|
||||
|
||||
delete[] bm;
|
||||
}
|
||||
}
|
||||
|
||||
// All spans in b1 that lead to s1 are pairwise equal to that in b2 leading to s2
|
||||
bool matches(const Span * b1, uint32_t n1, const State * s1, const Span * b2, uint32_t n2, const State * s2)
|
||||
{
|
||||
const Span * e1 = &b1[n1];
|
||||
uint32_t lb1 = 0;
|
||||
const Span * e2 = &b2[n2];
|
||||
uint32_t lb2 = 0;
|
||||
|
||||
for (;;)
|
||||
{
|
||||
for (; b1 < e1 && b1->to != s1; ++b1)
|
||||
{
|
||||
lb1 = b1->ub;
|
||||
}
|
||||
for (; b2 < e2 && b2->to != s2; ++b2)
|
||||
{
|
||||
lb2 = b2->ub;
|
||||
}
|
||||
if (b1 == e1)
|
||||
{
|
||||
return b2 == e2;
|
||||
}
|
||||
if (b2 == e2)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
if (lb1 != lb2 || b1->ub != b2->ub)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
++b1;
|
||||
++b2;
|
||||
}
|
||||
}
|
||||
|
||||
} // end namespace re2c
|
45
tools/re2c/src/codegen/bitmap.h
Normal file
45
tools/re2c/src/codegen/bitmap.h
Normal file
|
@ -0,0 +1,45 @@
|
|||
#ifndef _RE2C_CODEGEN_BITMAP_
|
||||
#define _RE2C_CODEGEN_BITMAP_
|
||||
|
||||
#include "src/util/c99_stdint.h"
|
||||
|
||||
#include "src/util/forbid_copy.h"
|
||||
|
||||
namespace re2c
|
||||
{
|
||||
|
||||
struct Go;
|
||||
struct Span;
|
||||
struct State;
|
||||
struct OutputFile;
|
||||
|
||||
class BitMap
|
||||
{
|
||||
public:
|
||||
static BitMap *first;
|
||||
|
||||
const Go *go;
|
||||
const State *on;
|
||||
const BitMap *next;
|
||||
uint32_t i;
|
||||
uint32_t m;
|
||||
|
||||
public:
|
||||
static const BitMap *find(const Go*, const State*);
|
||||
static const BitMap *find(const State*);
|
||||
static void gen(OutputFile &, uint32_t ind, uint32_t, uint32_t);
|
||||
BitMap(const Go*, const State*);
|
||||
~BitMap();
|
||||
|
||||
FORBID_COPY (BitMap);
|
||||
};
|
||||
|
||||
bool matches(const Span * b1, uint32_t n1, const State * s1, const Span * b2, uint32_t n2, const State * s2);
|
||||
|
||||
#ifdef _MSC_VER
|
||||
# pragma warning(disable: 4355) /* 'this' : used in base member initializer list */
|
||||
#endif
|
||||
|
||||
} // end namespace re2c
|
||||
|
||||
#endif // _RE2C_CODEGEN_BITMAP_
|
43
tools/re2c/src/codegen/emit.h
Normal file
43
tools/re2c/src/codegen/emit.h
Normal file
|
@ -0,0 +1,43 @@
|
|||
#ifndef _RE2C_CODEGEN_EMIT_
|
||||
#define _RE2C_CODEGEN_EMIT_
|
||||
|
||||
#include "src/codegen/output.h"
|
||||
#include "src/ir/adfa/adfa.h"
|
||||
|
||||
namespace re2c {
|
||||
|
||||
typedef std::vector<std::string> RegExpIndices;
|
||||
|
||||
void emit_action
|
||||
( const Action & action
|
||||
, OutputFile & o
|
||||
, uint32_t ind
|
||||
, bool & readCh
|
||||
, const State * const s
|
||||
, const std::string & condName
|
||||
, const Skeleton * skeleton
|
||||
, const std::set<label_t> & used_labels
|
||||
, bool save_yyaccept
|
||||
);
|
||||
|
||||
// helpers
|
||||
void genGoTo (OutputFile & o, uint32_t ind, const State * from, const State * to, bool & readCh);
|
||||
|
||||
template<typename _Ty> std::string replaceParam (std::string str, const std::string & param, const _Ty & value)
|
||||
{
|
||||
if (!param.empty ())
|
||||
{
|
||||
std::ostringstream strValue;
|
||||
strValue << value;
|
||||
std::string::size_type pos;
|
||||
while((pos = str.find(param)) != std::string::npos)
|
||||
{
|
||||
str.replace(pos, param.length(), strValue.str());
|
||||
}
|
||||
}
|
||||
return str;
|
||||
}
|
||||
|
||||
} // namespace re2c
|
||||
|
||||
#endif // _RE2C_CODEGEN_EMIT_
|
388
tools/re2c/src/codegen/emit_action.cc
Normal file
388
tools/re2c/src/codegen/emit_action.cc
Normal file
|
@ -0,0 +1,388 @@
|
|||
#include "src/util/c99_stdint.h"
|
||||
#include <stddef.h>
|
||||
#include <set>
|
||||
#include <string>
|
||||
|
||||
#include "src/codegen/emit.h"
|
||||
#include "src/codegen/input_api.h"
|
||||
#include "src/codegen/output.h"
|
||||
#include "src/conf/opt.h"
|
||||
#include "src/globals.h"
|
||||
#include "src/ir/adfa/action.h"
|
||||
#include "src/ir/adfa/adfa.h"
|
||||
#include "src/ir/regexp/regexp.h"
|
||||
#include "src/ir/regexp/regexp_rule.h"
|
||||
#include "src/ir/skeleton/skeleton.h"
|
||||
#include "src/parse/code.h"
|
||||
#include "src/parse/loc.h"
|
||||
|
||||
namespace re2c
|
||||
{
|
||||
|
||||
class label_t;
|
||||
|
||||
static void need (OutputFile & o, uint32_t ind, bool & readCh, size_t n, bool bSetMarker);
|
||||
static void emit_match (OutputFile & o, uint32_t ind, bool & readCh, const State * const s);
|
||||
static void emit_initial (OutputFile & o, uint32_t ind, bool & readCh, const State * const s, const Initial & init, const std::set<label_t> & used_labels);
|
||||
static void emit_save (OutputFile & o, uint32_t ind, bool & readCh, const State * const s, uint32_t save, bool save_yyaccept);
|
||||
static void emit_accept_binary (OutputFile & o, uint32_t ind, bool & readCh, const State * const s, const accept_t & accept, size_t l, size_t r);
|
||||
static void emit_accept (OutputFile & o, uint32_t ind, bool & readCh, const State * const s, const accept_t & accept);
|
||||
static void emit_rule (OutputFile & o, uint32_t ind, const State * const s, const RuleOp * const rule, const std::string & condName, const Skeleton * skeleton);
|
||||
static void genYYFill (OutputFile & o, size_t need);
|
||||
static void genSetCondition (OutputFile & o, uint32_t ind, const std::string & newcond);
|
||||
static void genSetState (OutputFile & o, uint32_t ind, uint32_t fillIndex);
|
||||
|
||||
void emit_action
|
||||
( const Action & action
|
||||
, OutputFile & o
|
||||
, uint32_t ind
|
||||
, bool & readCh
|
||||
, const State * const s
|
||||
, const std::string & condName
|
||||
, const Skeleton * skeleton
|
||||
, const std::set<label_t> & used_labels
|
||||
, bool save_yyaccept
|
||||
)
|
||||
{
|
||||
switch (action.type)
|
||||
{
|
||||
case Action::MATCH:
|
||||
emit_match (o, ind, readCh, s);
|
||||
break;
|
||||
case Action::INITIAL:
|
||||
emit_initial (o, ind, readCh, s, * action.info.initial, used_labels);
|
||||
break;
|
||||
case Action::SAVE:
|
||||
emit_save (o, ind, readCh, s, action.info.save, save_yyaccept);
|
||||
break;
|
||||
case Action::MOVE:
|
||||
break;
|
||||
case Action::ACCEPT:
|
||||
emit_accept (o, ind, readCh, s, * action.info.accepts);
|
||||
break;
|
||||
case Action::RULE:
|
||||
emit_rule (o, ind, s, action.info.rule, condName, skeleton);
|
||||
break;
|
||||
}
|
||||
if (s->isPreCtxt && opts->target != opt_t::DOT)
|
||||
{
|
||||
o.wstring(opts->input_api.stmt_backupctx (ind));
|
||||
}
|
||||
}
|
||||
|
||||
void emit_match (OutputFile & o, uint32_t ind, bool & readCh, const State * const s)
|
||||
{
|
||||
if (opts->target == opt_t::DOT)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
const bool read_ahead = s
|
||||
&& s->next
|
||||
&& s->next->action.type != Action::RULE;
|
||||
if (s->fill != 0)
|
||||
{
|
||||
o.wstring(opts->input_api.stmt_skip (ind));
|
||||
}
|
||||
else if (!read_ahead)
|
||||
{
|
||||
/* do not read next char if match */
|
||||
o.wstring(opts->input_api.stmt_skip (ind));
|
||||
readCh = true;
|
||||
}
|
||||
else
|
||||
{
|
||||
o.wstring(opts->input_api.stmt_skip_peek (ind));
|
||||
readCh = false;
|
||||
}
|
||||
|
||||
if (s->fill != 0)
|
||||
{
|
||||
need(o, ind, readCh, s->fill, false);
|
||||
}
|
||||
}
|
||||
|
||||
void emit_initial (OutputFile & o, uint32_t ind, bool & readCh, const State * const s, const Initial & initial, const std::set<label_t> & used_labels)
|
||||
{
|
||||
if (opts->target == opt_t::DOT)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
if (used_labels.count(s->label))
|
||||
{
|
||||
if (s->fill != 0)
|
||||
{
|
||||
o.wstring(opts->input_api.stmt_skip (ind));
|
||||
}
|
||||
else
|
||||
{
|
||||
o.wstring(opts->input_api.stmt_skip_peek (ind));
|
||||
}
|
||||
}
|
||||
|
||||
if (used_labels.count(initial.label))
|
||||
{
|
||||
o.wstring(opts->labelPrefix).wlabel(initial.label).ws(":\n");
|
||||
}
|
||||
|
||||
if (opts->dFlag)
|
||||
{
|
||||
o.wind(ind).wstring(opts->yydebug).ws("(").wlabel(initial.label).ws(", *").wstring(opts->yycursor).ws(");\n");
|
||||
}
|
||||
|
||||
if (s->fill != 0)
|
||||
{
|
||||
need(o, ind, readCh, s->fill, initial.setMarker);
|
||||
}
|
||||
else
|
||||
{
|
||||
if (initial.setMarker)
|
||||
{
|
||||
o.wstring(opts->input_api.stmt_backup (ind));
|
||||
}
|
||||
readCh = false;
|
||||
}
|
||||
}
|
||||
|
||||
void emit_save (OutputFile & o, uint32_t ind, bool & readCh, const State * const s, uint32_t save, bool save_yyaccept)
|
||||
{
|
||||
if (opts->target == opt_t::DOT)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
if (save_yyaccept)
|
||||
{
|
||||
o.wind(ind).wstring(opts->yyaccept).ws(" = ").wu32(save).ws(";\n");
|
||||
}
|
||||
|
||||
if (s->fill != 0)
|
||||
{
|
||||
o.wstring(opts->input_api.stmt_skip_backup (ind));
|
||||
need(o, ind, readCh, s->fill, false);
|
||||
}
|
||||
else
|
||||
{
|
||||
o.wstring(opts->input_api.stmt_skip_backup_peek (ind));
|
||||
readCh = false;
|
||||
}
|
||||
}
|
||||
|
||||
void emit_accept_binary (OutputFile & o, uint32_t ind, bool & readCh, const State * const s, const accept_t & accepts, size_t l, size_t r)
|
||||
{
|
||||
if (l < r)
|
||||
{
|
||||
const size_t m = (l + r) >> 1;
|
||||
o.wind(ind).ws("if (").wstring(opts->yyaccept).ws(r == l+1 ? " == " : " <= ").wu64(m).ws(") {\n");
|
||||
emit_accept_binary (o, ++ind, readCh, s, accepts, l, m);
|
||||
o.wind(--ind).ws("} else {\n");
|
||||
emit_accept_binary (o, ++ind, readCh, s, accepts, m + 1, r);
|
||||
o.wind(--ind).ws("}\n");
|
||||
}
|
||||
else
|
||||
{
|
||||
genGoTo(o, ind, s, accepts[l], readCh);
|
||||
}
|
||||
}
|
||||
|
||||
void emit_accept (OutputFile & o, uint32_t ind, bool & readCh, const State * const s, const accept_t & accepts)
|
||||
{
|
||||
const size_t accepts_size = accepts.size ();
|
||||
if (accepts_size > 0)
|
||||
{
|
||||
if (opts->target != opt_t::DOT)
|
||||
{
|
||||
o.wstring(opts->input_api.stmt_restore (ind));
|
||||
}
|
||||
|
||||
if (readCh) // shouldn't be necessary, but might become at some point
|
||||
{
|
||||
o.wstring(opts->input_api.stmt_peek (ind));
|
||||
readCh = false;
|
||||
}
|
||||
|
||||
if (accepts_size > 1)
|
||||
{
|
||||
if (opts->gFlag && accepts_size >= opts->cGotoThreshold)
|
||||
{
|
||||
o.wind(ind++).ws("{\n");
|
||||
o.wind(ind++).ws("static void *").wstring(opts->yytarget).ws("[").wu64(accepts_size).ws("] = {\n");
|
||||
for (uint32_t i = 0; i < accepts_size; ++i)
|
||||
{
|
||||
o.wind(ind).ws("&&").wstring(opts->labelPrefix).wlabel(accepts[i]->label).ws(",\n");
|
||||
}
|
||||
o.wind(--ind).ws("};\n");
|
||||
o.wind(ind).ws("goto *").wstring(opts->yytarget).ws("[").wstring(opts->yyaccept).ws("];\n");
|
||||
o.wind(--ind).ws("}\n");
|
||||
}
|
||||
else if (opts->sFlag || (accepts_size == 2 && opts->target != opt_t::DOT))
|
||||
{
|
||||
emit_accept_binary (o, ind, readCh, s, accepts, 0, accepts_size - 1);
|
||||
}
|
||||
else if (opts->target == opt_t::DOT)
|
||||
{
|
||||
for (uint32_t i = 0; i < accepts_size; ++i)
|
||||
{
|
||||
o.wlabel(s->label).ws(" -> ").wlabel(accepts[i]->label);
|
||||
o.ws(" [label=\"yyaccept=").wu32(i).ws("\"]\n");
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
o.wind(ind).ws("switch (").wstring(opts->yyaccept).ws(") {\n");
|
||||
for (uint32_t i = 0; i < accepts_size - 1; ++i)
|
||||
{
|
||||
o.wind(ind).ws("case ").wu32(i).ws(": \t");
|
||||
genGoTo(o, 0, s, accepts[i], readCh);
|
||||
}
|
||||
o.wind(ind).ws("default:\t");
|
||||
genGoTo(o, 0, s, accepts[accepts_size - 1], readCh);
|
||||
o.wind(ind).ws("}\n");
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
// no need to write if statement here since there is only case 0.
|
||||
genGoTo(o, ind, s, accepts[0], readCh);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void emit_rule (OutputFile & o, uint32_t ind, const State * const s, const RuleOp * const rule, const std::string & condName, const Skeleton * skeleton)
|
||||
{
|
||||
if (opts->target == opt_t::DOT)
|
||||
{
|
||||
o.wlabel(s->label);
|
||||
if (rule->code)
|
||||
{
|
||||
o.ws(" [label=\"").wstring(rule->code->loc.filename).ws(":").wu32(rule->code->loc.line).ws("\"]");
|
||||
}
|
||||
o.ws("\n");
|
||||
return;
|
||||
}
|
||||
|
||||
uint32_t back = rule->ctx->fixedLength();
|
||||
if (back != 0u && opts->target != opt_t::DOT)
|
||||
{
|
||||
o.wstring(opts->input_api.stmt_restorectx (ind));
|
||||
}
|
||||
|
||||
if (opts->target == opt_t::SKELETON)
|
||||
{
|
||||
skeleton->emit_action (o, ind, rule->rank);
|
||||
}
|
||||
else
|
||||
{
|
||||
if (!rule->newcond.empty () && condName != rule->newcond)
|
||||
{
|
||||
genSetCondition(o, ind, rule->newcond);
|
||||
}
|
||||
|
||||
if (rule->code)
|
||||
{
|
||||
if (!yySetupRule.empty ())
|
||||
{
|
||||
o.wind(ind).wstring(yySetupRule).ws("\n");
|
||||
}
|
||||
o.wline_info(rule->code->loc.line, rule->code->loc.filename.c_str ())
|
||||
.wind(ind).wstring(rule->code->text).ws("\n")
|
||||
.wdelay_line_info ();
|
||||
}
|
||||
else if (!rule->newcond.empty ())
|
||||
{
|
||||
o.wind(ind).wstring(replaceParam(opts->condGoto, opts->condGotoParam, opts->condPrefix + rule->newcond)).ws("\n");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void need (OutputFile & o, uint32_t ind, bool & readCh, size_t n, bool bSetMarker)
|
||||
{
|
||||
if (opts->target == opt_t::DOT)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
uint32_t fillIndex = last_fill_index;
|
||||
|
||||
if (opts->fFlag)
|
||||
{
|
||||
last_fill_index++;
|
||||
genSetState (o, ind, fillIndex);
|
||||
}
|
||||
|
||||
if (opts->fill_use && n > 0)
|
||||
{
|
||||
o.wind(ind);
|
||||
if (n == 1)
|
||||
{
|
||||
if (opts->fill_check)
|
||||
{
|
||||
o.ws("if (").wstring(opts->input_api.expr_lessthan_one ()).ws(") ");
|
||||
}
|
||||
genYYFill(o, n);
|
||||
}
|
||||
else
|
||||
{
|
||||
if (opts->fill_check)
|
||||
{
|
||||
o.ws("if (").wstring(opts->input_api.expr_lessthan (n)).ws(") ");
|
||||
}
|
||||
genYYFill(o, n);
|
||||
}
|
||||
}
|
||||
|
||||
if (opts->fFlag)
|
||||
{
|
||||
o.wstring(opts->yyfilllabel).wu32(fillIndex).ws(":\n");
|
||||
}
|
||||
|
||||
if (n > 0)
|
||||
{
|
||||
if (bSetMarker)
|
||||
{
|
||||
o.wstring(opts->input_api.stmt_backup_peek (ind));
|
||||
}
|
||||
else
|
||||
{
|
||||
o.wstring(opts->input_api.stmt_peek (ind));
|
||||
}
|
||||
readCh = false;
|
||||
}
|
||||
}
|
||||
|
||||
void genYYFill (OutputFile & o, size_t need)
|
||||
{
|
||||
o.wstring(replaceParam (opts->fill, opts->fill_arg, need));
|
||||
if (!opts->fill_naked)
|
||||
{
|
||||
if (opts->fill_arg_use)
|
||||
{
|
||||
o.ws("(").wu64(need).ws(")");
|
||||
}
|
||||
o.ws(";");
|
||||
}
|
||||
o.ws("\n");
|
||||
}
|
||||
|
||||
void genSetCondition(OutputFile & o, uint32_t ind, const std::string& newcond)
|
||||
{
|
||||
o.wind(ind).wstring(replaceParam (opts->cond_set, opts->cond_set_arg, opts->condEnumPrefix + newcond));
|
||||
if (!opts->cond_set_naked)
|
||||
{
|
||||
o.ws("(").wstring(opts->condEnumPrefix).wstring(newcond).ws(");");
|
||||
}
|
||||
o.ws("\n");
|
||||
}
|
||||
|
||||
void genSetState(OutputFile & o, uint32_t ind, uint32_t fillIndex)
|
||||
{
|
||||
o.wind(ind).wstring(replaceParam (opts->state_set, opts->state_set_arg, fillIndex));
|
||||
if (!opts->state_set_naked)
|
||||
{
|
||||
o.ws("(").wu32(fillIndex).ws(");");
|
||||
}
|
||||
o.ws("\n");
|
||||
}
|
||||
|
||||
} // namespace re2c
|
348
tools/re2c/src/codegen/emit_dfa.cc
Normal file
348
tools/re2c/src/codegen/emit_dfa.cc
Normal file
|
@ -0,0 +1,348 @@
|
|||
#include "src/util/c99_stdint.h"
|
||||
#include <stddef.h>
|
||||
#include <set>
|
||||
#include <string>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
#include "src/codegen/bitmap.h"
|
||||
#include "src/codegen/emit.h"
|
||||
#include "src/codegen/go.h"
|
||||
#include "src/codegen/input_api.h"
|
||||
#include "src/codegen/label.h"
|
||||
#include "src/codegen/output.h"
|
||||
#include "src/conf/opt.h"
|
||||
#include "src/globals.h"
|
||||
#include "src/ir/adfa/action.h"
|
||||
#include "src/ir/adfa/adfa.h"
|
||||
#include "src/ir/skeleton/skeleton.h"
|
||||
#include "src/util/counter.h"
|
||||
|
||||
namespace re2c
|
||||
{
|
||||
|
||||
static std::string genGetCondition ();
|
||||
static void genCondGotoSub (OutputFile & o, uint32_t ind, const std::vector<std::string> & condnames, uint32_t cMin, uint32_t cMax);
|
||||
static void genCondTable (OutputFile & o, uint32_t ind, const std::vector<std::string> & condnames);
|
||||
static void genCondGoto (OutputFile & o, uint32_t ind, const std::vector<std::string> & condnames);
|
||||
static void emit_state (OutputFile & o, uint32_t ind, const State * s, bool used_label);
|
||||
|
||||
std::string genGetCondition()
|
||||
{
|
||||
return opts->cond_get_naked
|
||||
? opts->cond_get
|
||||
: opts->cond_get + "()";
|
||||
}
|
||||
|
||||
void genGoTo(OutputFile & o, uint32_t ind, const State *from, const State *to, bool & readCh)
|
||||
{
|
||||
if (opts->target == opt_t::DOT)
|
||||
{
|
||||
o.wlabel(from->label).ws(" -> ").wlabel(to->label).ws("\n");
|
||||
return;
|
||||
}
|
||||
|
||||
if (readCh && from->next != to)
|
||||
{
|
||||
o.wstring(opts->input_api.stmt_peek (ind));
|
||||
readCh = false;
|
||||
}
|
||||
|
||||
o.wind(ind).ws("goto ").wstring(opts->labelPrefix).wlabel(to->label).ws(";\n");
|
||||
}
|
||||
|
||||
void emit_state (OutputFile & o, uint32_t ind, const State * s, bool used_label)
|
||||
{
|
||||
if (opts->target != opt_t::DOT)
|
||||
{
|
||||
if (used_label)
|
||||
{
|
||||
o.wstring(opts->labelPrefix).wlabel(s->label).ws(":\n");
|
||||
}
|
||||
if (opts->dFlag && (s->action.type != Action::INITIAL))
|
||||
{
|
||||
o.wind(ind).wstring(opts->yydebug).ws("(").wlabel(s->label).ws(", ").wstring(opts->input_api.expr_peek ()).ws(");\n");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void DFA::count_used_labels (std::set<label_t> & used, label_t start, label_t initial, bool force_start) const
|
||||
{
|
||||
// In '-f' mode, default state is always state 0
|
||||
if (opts->fFlag)
|
||||
{
|
||||
used.insert (label_t::first ());
|
||||
}
|
||||
if (force_start)
|
||||
{
|
||||
used.insert (start);
|
||||
}
|
||||
for (State * s = head; s; s = s->next)
|
||||
{
|
||||
s->go.used_labels (used);
|
||||
}
|
||||
for (uint32_t i = 0; i < accepts.size (); ++i)
|
||||
{
|
||||
used.insert (accepts[i]->label);
|
||||
}
|
||||
// must go last: it needs the set of used labels
|
||||
if (used.count (head->label))
|
||||
{
|
||||
used.insert (initial);
|
||||
}
|
||||
}
|
||||
|
||||
void DFA::emit_body (OutputFile & o, uint32_t& ind, const std::set<label_t> & used_labels, label_t initial) const
|
||||
{
|
||||
// If DFA has transitions to initial state, then initial state
|
||||
// has a piece of code that advances input position. Wee must
|
||||
// skip it when entering DFA.
|
||||
if (used_labels.count(head->label))
|
||||
{
|
||||
o.wind(ind).ws("goto ").wstring(opts->labelPrefix).wlabel(initial).ws(";\n");
|
||||
}
|
||||
|
||||
const bool save_yyaccept = accepts.size () > 1;
|
||||
for (State * s = head; s; s = s->next)
|
||||
{
|
||||
bool readCh = false;
|
||||
emit_state (o, ind, s, used_labels.count (s->label) != 0);
|
||||
emit_action (s->action, o, ind, readCh, s, cond, skeleton, used_labels, save_yyaccept);
|
||||
s->go.emit(o, ind, readCh);
|
||||
}
|
||||
}
|
||||
|
||||
void DFA::emit(Output & output, uint32_t& ind, bool isLastCond, bool& bPrologBrace)
|
||||
{
|
||||
OutputFile & o = output.source;
|
||||
|
||||
bool bProlog = (!opts->cFlag || !bWroteCondCheck);
|
||||
|
||||
// start_label points to the beginning of current re2c block
|
||||
// (prior to condition dispatch in '-c' mode)
|
||||
// it can forced by configuration 're2c:startlabel = <integer>;'
|
||||
label_t start_label = o.label_counter.next ();
|
||||
// initial_label points to the beginning of DFA
|
||||
// in '-c' mode this is NOT equal to start_label
|
||||
label_t initial_label = bProlog && opts->cFlag
|
||||
? o.label_counter.next ()
|
||||
: start_label;
|
||||
for (State * s = head; s; s = s->next)
|
||||
{
|
||||
s->label = o.label_counter.next ();
|
||||
}
|
||||
std::set<label_t> used_labels;
|
||||
count_used_labels (used_labels, start_label, initial_label, o.get_force_start_label ());
|
||||
|
||||
head->action.set_initial (initial_label, head->action.type == Action::SAVE);
|
||||
|
||||
skeleton->warn_undefined_control_flow ();
|
||||
skeleton->warn_unreachable_rules ();
|
||||
skeleton->warn_match_empty ();
|
||||
|
||||
if (opts->target == opt_t::SKELETON)
|
||||
{
|
||||
if (output.skeletons.insert (name).second)
|
||||
{
|
||||
skeleton->emit_data (o.file_name);
|
||||
skeleton->emit_start (o, max_fill, need_backup, need_backupctx, need_accept);
|
||||
uint32_t i = 2;
|
||||
emit_body (o, i, used_labels, initial_label);
|
||||
skeleton->emit_end (o, need_backup, need_backupctx);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
// Generate prolog
|
||||
if (bProlog)
|
||||
{
|
||||
o.ws("\n").wdelay_line_info ();
|
||||
if (opts->target == opt_t::DOT)
|
||||
{
|
||||
bPrologBrace = true;
|
||||
o.ws("digraph re2c {\n");
|
||||
}
|
||||
else if ((!opts->fFlag && o.get_used_yyaccept ())
|
||||
|| (!opts->fFlag && opts->bEmitYYCh)
|
||||
|| (opts->bFlag && !opts->cFlag && BitMap::first)
|
||||
|| (opts->cFlag && !bWroteCondCheck && opts->gFlag)
|
||||
|| (opts->fFlag && !bWroteGetState && opts->gFlag)
|
||||
)
|
||||
{
|
||||
bPrologBrace = true;
|
||||
o.wind(ind++).ws("{\n");
|
||||
}
|
||||
else if (ind == 0)
|
||||
{
|
||||
ind = 1;
|
||||
}
|
||||
if (!opts->fFlag && opts->target != opt_t::DOT)
|
||||
{
|
||||
if (opts->bEmitYYCh)
|
||||
{
|
||||
o.wind(ind).wstring(opts->yyctype).ws(" ").wstring(opts->yych).ws(";\n");
|
||||
}
|
||||
o.wdelay_yyaccept_init (ind);
|
||||
}
|
||||
else
|
||||
{
|
||||
o.ws("\n");
|
||||
}
|
||||
}
|
||||
if (opts->bFlag && !opts->cFlag && BitMap::first)
|
||||
{
|
||||
BitMap::gen(o, ind, lbChar, ubChar <= 256 ? ubChar : 256);
|
||||
}
|
||||
if (bProlog)
|
||||
{
|
||||
if (opts->cFlag && !bWroteCondCheck && opts->gFlag)
|
||||
{
|
||||
genCondTable(o, ind, output.types);
|
||||
}
|
||||
o.wdelay_state_goto (ind);
|
||||
if (opts->cFlag && opts->target != opt_t::DOT)
|
||||
{
|
||||
if (used_labels.count(start_label))
|
||||
{
|
||||
o.wstring(opts->labelPrefix).wlabel(start_label).ws(":\n");
|
||||
}
|
||||
}
|
||||
o.wuser_start_label ();
|
||||
if (opts->cFlag && !bWroteCondCheck)
|
||||
{
|
||||
genCondGoto(o, ind, output.types);
|
||||
}
|
||||
}
|
||||
if (opts->cFlag && !cond.empty())
|
||||
{
|
||||
if (opts->condDivider.length())
|
||||
{
|
||||
o.wstring(replaceParam(opts->condDivider, opts->condDividerParam, cond)).ws("\n");
|
||||
}
|
||||
if (opts->target == opt_t::DOT)
|
||||
{
|
||||
o.wstring(cond).ws(" -> ").wlabel(head->label).ws("\n");
|
||||
}
|
||||
else
|
||||
{
|
||||
o.wstring(opts->condPrefix).wstring(cond).ws(":\n");
|
||||
}
|
||||
}
|
||||
if (opts->cFlag && opts->bFlag && BitMap::first)
|
||||
{
|
||||
o.wind(ind++).ws("{\n");
|
||||
BitMap::gen(o, ind, lbChar, ubChar <= 256 ? ubChar : 256);
|
||||
}
|
||||
// Generate code
|
||||
emit_body (o, ind, used_labels, initial_label);
|
||||
if (opts->cFlag && opts->bFlag && BitMap::first)
|
||||
{
|
||||
o.wind(--ind).ws("}\n");
|
||||
}
|
||||
// Generate epilog
|
||||
if ((!opts->cFlag || isLastCond) && bPrologBrace)
|
||||
{
|
||||
o.wind(--ind).ws("}\n");
|
||||
}
|
||||
}
|
||||
|
||||
// Cleanup
|
||||
if (BitMap::first)
|
||||
{
|
||||
delete BitMap::first;
|
||||
BitMap::first = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
void genCondTable(OutputFile & o, uint32_t ind, const std::vector<std::string> & condnames)
|
||||
{
|
||||
const size_t conds = condnames.size ();
|
||||
o.wind(ind++).ws("static void *").wstring(opts->yyctable).ws("[").wu64(conds).ws("] = {\n");
|
||||
for (size_t i = 0; i < conds; ++i)
|
||||
{
|
||||
o.wind(ind).ws("&&").wstring(opts->condPrefix).wstring(condnames[i]).ws(",\n");
|
||||
}
|
||||
o.wind(--ind).ws("};\n");
|
||||
}
|
||||
|
||||
void genCondGotoSub(OutputFile & o, uint32_t ind, const std::vector<std::string> & condnames, uint32_t cMin, uint32_t cMax)
|
||||
{
|
||||
if (cMin == cMax)
|
||||
{
|
||||
o.wind(ind).ws("goto ").wstring(opts->condPrefix).wstring(condnames[cMin]).ws(";\n");
|
||||
}
|
||||
else
|
||||
{
|
||||
uint32_t cMid = cMin + ((cMax - cMin + 1) / 2);
|
||||
|
||||
o.wind(ind).ws("if (").wstring(genGetCondition()).ws(" < ").wu32(cMid).ws(") {\n");
|
||||
genCondGotoSub(o, ind + 1, condnames, cMin, cMid - 1);
|
||||
o.wind(ind).ws("} else {\n");
|
||||
genCondGotoSub(o, ind + 1, condnames, cMid, cMax);
|
||||
o.wind(ind).ws("}\n");
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* note [condition order]
|
||||
*
|
||||
* In theory re2c makes no guarantee about the order of conditions in
|
||||
* the generated lexer. Users should define condition type 'YYCONDTYPE'
|
||||
* and use values of this type with 'YYGETCONDITION' and 'YYSETCONDITION'.
|
||||
* This way code is independent of internal re2c condition numbering.
|
||||
*
|
||||
* However, it is possible to manually hardcode condition numbers and make
|
||||
* re2c generate condition dispatch without explicit use of condition names
|
||||
* (nested 'if' statements with '-b' or computed 'goto' table with '-g').
|
||||
* This code is syntactically valid (compiles), but unsafe:
|
||||
* - change of re2c options may break compilation
|
||||
* - change of internal re2c condition numbering may break runtime
|
||||
*
|
||||
* re2c has to preserve the existing numbering scheme.
|
||||
*
|
||||
* re2c warns about implicit assumptions about condition order, unless:
|
||||
* - condition type is defined with 'types:re2c' or '-t, --type-header'
|
||||
* - dispatch is independent of condition order: either it uses
|
||||
* explicit condition names or there's only one condition and
|
||||
* dispatch shrinks to unconditional jump
|
||||
*/
|
||||
void genCondGoto(OutputFile & o, uint32_t ind, const std::vector<std::string> & condnames)
|
||||
{
|
||||
const size_t conds = condnames.size ();
|
||||
if (opts->target == opt_t::DOT)
|
||||
{
|
||||
o.warn_condition_order = false; // see note [condition order]
|
||||
for (size_t i = 0; i < conds; ++i)
|
||||
{
|
||||
const std::string cond = condnames[i];
|
||||
o.ws("0 -> ").wstring(cond).ws(" [label=\"state=").wstring(cond).ws("\"]\n");
|
||||
}
|
||||
}
|
||||
else if (opts->gFlag)
|
||||
{
|
||||
o.wind(ind).ws("goto *").wstring(opts->yyctable).ws("[").wstring(genGetCondition()).ws("];\n");
|
||||
}
|
||||
else if (opts->sFlag)
|
||||
{
|
||||
if (conds == 1)
|
||||
{
|
||||
o.warn_condition_order = false; // see note [condition order]
|
||||
}
|
||||
genCondGotoSub(o, ind, condnames, 0, static_cast<uint32_t> (conds) - 1);
|
||||
}
|
||||
else
|
||||
{
|
||||
o.warn_condition_order = false; // see note [condition order]
|
||||
o.wind(ind).ws("switch (").wstring(genGetCondition()).ws(") {\n");
|
||||
for (size_t i = 0; i < conds; ++i)
|
||||
{
|
||||
const std::string & cond = condnames[i];
|
||||
o.wind(ind).ws("case ").wstring(opts->condEnumPrefix).wstring(cond).ws(": goto ").wstring(opts->condPrefix).wstring(cond).ws(";\n");
|
||||
}
|
||||
o.wind(ind).ws("}\n");
|
||||
}
|
||||
o.wdelay_warn_condition_order ();
|
||||
bWroteCondCheck = true;
|
||||
}
|
||||
|
||||
} // end namespace re2c
|
216
tools/re2c/src/codegen/go.h
Normal file
216
tools/re2c/src/codegen/go.h
Normal file
|
@ -0,0 +1,216 @@
|
|||
#ifndef _RE2C_CODEGEN_GO_
|
||||
#define _RE2C_CODEGEN_GO_
|
||||
|
||||
#include <iostream>
|
||||
#include <set>
|
||||
#include <vector>
|
||||
|
||||
#include "src/codegen/output.h"
|
||||
#include "src/util/c99_stdint.h"
|
||||
#include "src/util/forbid_copy.h"
|
||||
|
||||
namespace re2c
|
||||
{
|
||||
|
||||
class BitMap;
|
||||
struct State;
|
||||
struct If;
|
||||
|
||||
struct Span
|
||||
{
|
||||
uint32_t ub;
|
||||
State * to;
|
||||
|
||||
FORBID_COPY (Span);
|
||||
};
|
||||
|
||||
struct Case
|
||||
{
|
||||
std::vector<std::pair<uint32_t, uint32_t> > ranges;
|
||||
const State * to;
|
||||
void emit (OutputFile & o, uint32_t ind);
|
||||
|
||||
inline Case ()
|
||||
: ranges ()
|
||||
, to (NULL)
|
||||
{}
|
||||
|
||||
FORBID_COPY (Case);
|
||||
};
|
||||
|
||||
struct Cases
|
||||
{
|
||||
const State * def;
|
||||
Case * cases;
|
||||
uint32_t cases_size;
|
||||
void add (uint32_t lb, uint32_t ub, State * to);
|
||||
Cases (const Span * s, uint32_t n);
|
||||
~Cases ();
|
||||
void emit (OutputFile & o, uint32_t ind, bool & readCh);
|
||||
void used_labels (std::set<label_t> & used);
|
||||
|
||||
FORBID_COPY (Cases);
|
||||
};
|
||||
|
||||
struct Cond
|
||||
{
|
||||
std::string compare;
|
||||
uint32_t value;
|
||||
Cond (const std::string & cmp, uint32_t val);
|
||||
};
|
||||
|
||||
struct Binary
|
||||
{
|
||||
Cond * cond;
|
||||
If * thn;
|
||||
If * els;
|
||||
Binary (const Span * s, uint32_t n, const State * next);
|
||||
~Binary ();
|
||||
void emit (OutputFile & o, uint32_t ind, bool & readCh);
|
||||
void used_labels (std::set<label_t> & used);
|
||||
|
||||
FORBID_COPY (Binary);
|
||||
};
|
||||
|
||||
struct Linear
|
||||
{
|
||||
std::vector<std::pair<const Cond *, const State *> > branches;
|
||||
Linear (const Span * s, uint32_t n, const State * next);
|
||||
~Linear ();
|
||||
void emit (OutputFile & o, uint32_t ind, bool & readCh);
|
||||
void used_labels (std::set<label_t> & used);
|
||||
};
|
||||
|
||||
struct If
|
||||
{
|
||||
enum type_t
|
||||
{
|
||||
BINARY,
|
||||
LINEAR
|
||||
} type;
|
||||
union
|
||||
{
|
||||
Binary * binary;
|
||||
Linear * linear;
|
||||
} info;
|
||||
If (type_t t, const Span * sp, uint32_t nsp, const State * next);
|
||||
~If ();
|
||||
void emit (OutputFile & o, uint32_t ind, bool & readCh);
|
||||
void used_labels (std::set<label_t> & used);
|
||||
};
|
||||
|
||||
struct SwitchIf
|
||||
{
|
||||
enum
|
||||
{
|
||||
SWITCH,
|
||||
IF
|
||||
} type;
|
||||
union
|
||||
{
|
||||
Cases * cases;
|
||||
If * ifs;
|
||||
} info;
|
||||
SwitchIf (const Span * sp, uint32_t nsp, const State * next);
|
||||
~SwitchIf ();
|
||||
void emit (OutputFile & o, uint32_t ind, bool & readCh);
|
||||
void used_labels (std::set<label_t> & used);
|
||||
};
|
||||
|
||||
struct GoBitmap
|
||||
{
|
||||
const BitMap * bitmap;
|
||||
const State * bitmap_state;
|
||||
SwitchIf * hgo;
|
||||
SwitchIf * lgo;
|
||||
GoBitmap (const Span * span, uint32_t nSpans, const Span * hspan, uint32_t hSpans, const BitMap * bm, const State * bm_state, const State * next);
|
||||
~GoBitmap ();
|
||||
void emit (OutputFile & o, uint32_t ind, bool & readCh);
|
||||
void used_labels (std::set<label_t> & used);
|
||||
|
||||
FORBID_COPY (GoBitmap);
|
||||
};
|
||||
|
||||
struct CpgotoTable
|
||||
{
|
||||
static const uint32_t TABLE_SIZE;
|
||||
const State ** table;
|
||||
CpgotoTable (const Span * span, uint32_t nSpans);
|
||||
~CpgotoTable ();
|
||||
void emit (OutputFile & o, uint32_t ind);
|
||||
void used_labels (std::set<label_t> & used);
|
||||
|
||||
private:
|
||||
label_t max_label () const;
|
||||
|
||||
FORBID_COPY (CpgotoTable);
|
||||
};
|
||||
|
||||
struct Cpgoto
|
||||
{
|
||||
SwitchIf * hgo;
|
||||
CpgotoTable * table;
|
||||
Cpgoto (const Span * span, uint32_t nSpans, const Span * hspan, uint32_t hSpans, const State * next);
|
||||
~Cpgoto ();
|
||||
void emit (OutputFile & o, uint32_t ind, bool & readCh);
|
||||
void used_labels (std::set<label_t> & used);
|
||||
|
||||
FORBID_COPY (Cpgoto);
|
||||
};
|
||||
|
||||
struct Dot
|
||||
{
|
||||
const State * from;
|
||||
Cases * cases;
|
||||
Dot (const Span * sp, uint32_t nsp, const State * from);
|
||||
~Dot ();
|
||||
void emit (OutputFile & o);
|
||||
|
||||
FORBID_COPY (Dot);
|
||||
};
|
||||
|
||||
struct Go
|
||||
{
|
||||
uint32_t nSpans; // number of spans
|
||||
Span * span;
|
||||
enum
|
||||
{
|
||||
EMPTY,
|
||||
SWITCH_IF,
|
||||
BITMAP,
|
||||
CPGOTO,
|
||||
DOT
|
||||
} type;
|
||||
union
|
||||
{
|
||||
SwitchIf * switchif;
|
||||
GoBitmap * bitmap;
|
||||
Cpgoto * cpgoto;
|
||||
Dot * dot;
|
||||
} info;
|
||||
|
||||
Go ();
|
||||
~Go ();
|
||||
void init (const State * from);
|
||||
void emit (OutputFile & o, uint32_t ind, bool & readCh);
|
||||
void used_labels (std::set<label_t> & used);
|
||||
|
||||
Go (const Go & g)
|
||||
: nSpans (g.nSpans)
|
||||
, span (g.span)
|
||||
, type (g.type)
|
||||
, info (g.info)
|
||||
{}
|
||||
Go & operator = (const Go & g)
|
||||
{
|
||||
nSpans = g.nSpans;
|
||||
span = g.span;
|
||||
type = g.type;
|
||||
info = g.info;
|
||||
return * this;
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace re2c
|
||||
|
||||
#endif // _RE2C_CODEGEN_GO_
|
284
tools/re2c/src/codegen/go_construct.cc
Normal file
284
tools/re2c/src/codegen/go_construct.cc
Normal file
|
@ -0,0 +1,284 @@
|
|||
#include <stddef.h>
|
||||
#include "src/util/c99_stdint.h"
|
||||
#include <string>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
#include "src/codegen/bitmap.h"
|
||||
#include "src/codegen/go.h"
|
||||
#include "src/conf/opt.h"
|
||||
#include "src/globals.h"
|
||||
#include "src/ir/adfa/adfa.h"
|
||||
#include "src/util/allocate.h"
|
||||
|
||||
namespace re2c
|
||||
{
|
||||
|
||||
static uint32_t unmap (Span * new_span, const Span * old_span, uint32_t old_nspans, const State * x);
|
||||
|
||||
Cases::Cases (const Span * span, uint32_t span_size)
|
||||
: def (span_size == 0 ? NULL : span[span_size - 1].to)
|
||||
, cases (new Case[span_size])
|
||||
, cases_size (0)
|
||||
{
|
||||
for (uint32_t i = 0, lb = 0; i < span_size; ++ i)
|
||||
{
|
||||
add (lb, span[i].ub, span[i].to);
|
||||
lb = span[i].ub;
|
||||
}
|
||||
}
|
||||
|
||||
void Cases::add (uint32_t lb, uint32_t ub, State * to)
|
||||
{
|
||||
for (uint32_t i = 0; i < cases_size; ++i)
|
||||
{
|
||||
if (cases[i].to == to)
|
||||
{
|
||||
cases[i].ranges.push_back (std::make_pair (lb, ub));
|
||||
return;
|
||||
}
|
||||
}
|
||||
cases[cases_size].ranges.push_back (std::make_pair (lb, ub));
|
||||
cases[cases_size].to = to;
|
||||
++cases_size;
|
||||
}
|
||||
|
||||
Cond::Cond (const std::string & cmp, uint32_t val)
|
||||
: compare (cmp)
|
||||
, value (val)
|
||||
{}
|
||||
|
||||
Binary::Binary (const Span * s, uint32_t n, const State * next)
|
||||
: cond (NULL)
|
||||
, thn (NULL)
|
||||
, els (NULL)
|
||||
{
|
||||
const uint32_t l = n / 2;
|
||||
const uint32_t h = n - l;
|
||||
cond = new Cond ("<=", s[l - 1].ub - 1);
|
||||
thn = new If (l > 4 ? If::BINARY : If::LINEAR, &s[0], l, next);
|
||||
els = new If (h > 4 ? If::BINARY : If::LINEAR, &s[l], h, next);
|
||||
}
|
||||
|
||||
Linear::Linear (const Span * s, uint32_t n, const State * next)
|
||||
: branches ()
|
||||
{
|
||||
for (;;)
|
||||
{
|
||||
const State *bg = s[0].to;
|
||||
while (n >= 3 && s[2].to == bg && (s[1].ub - s[0].ub) == 1)
|
||||
{
|
||||
if (s[1].to == next && n == 3)
|
||||
{
|
||||
branches.push_back (std::make_pair (new Cond ("!=", s[0].ub), bg));
|
||||
return ;
|
||||
}
|
||||
else
|
||||
{
|
||||
branches.push_back (std::make_pair (new Cond ("==", s[0].ub), s[1].to));
|
||||
}
|
||||
n -= 2;
|
||||
s += 2;
|
||||
}
|
||||
if (n == 1)
|
||||
{
|
||||
if (next == NULL || s[0].to != next)
|
||||
{
|
||||
branches.push_back (std::make_pair (static_cast<const Cond *> (NULL), s[0].to));
|
||||
}
|
||||
return;
|
||||
}
|
||||
else if (n == 2 && bg == next)
|
||||
{
|
||||
branches.push_back (std::make_pair (new Cond (">=", s[0].ub), s[1].to));
|
||||
return;
|
||||
}
|
||||
else
|
||||
{
|
||||
branches.push_back (std::make_pair (new Cond ("<=", s[0].ub - 1), bg));
|
||||
n -= 1;
|
||||
s += 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
If::If (type_t t, const Span * sp, uint32_t nsp, const State * next)
|
||||
: type (t)
|
||||
, info ()
|
||||
{
|
||||
switch (type)
|
||||
{
|
||||
case BINARY:
|
||||
info.binary = new Binary (sp, nsp, next);
|
||||
break;
|
||||
case LINEAR:
|
||||
info.linear = new Linear (sp, nsp, next);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
SwitchIf::SwitchIf (const Span * sp, uint32_t nsp, const State * next)
|
||||
: type (IF)
|
||||
, info ()
|
||||
{
|
||||
if ((!opts->sFlag && nsp > 2) || (nsp > 8 && (sp[nsp - 2].ub - sp[0].ub <= 3 * (nsp - 2))))
|
||||
{
|
||||
type = SWITCH;
|
||||
info.cases = new Cases (sp, nsp);
|
||||
}
|
||||
else if (nsp > 5)
|
||||
{
|
||||
info.ifs = new If (If::BINARY, sp, nsp, next);
|
||||
}
|
||||
else
|
||||
{
|
||||
info.ifs = new If (If::LINEAR, sp, nsp, next);
|
||||
}
|
||||
}
|
||||
|
||||
GoBitmap::GoBitmap (const Span * span, uint32_t nSpans, const Span * hspan, uint32_t hSpans, const BitMap * bm, const State * bm_state, const State * next)
|
||||
: bitmap (bm)
|
||||
, bitmap_state (bm_state)
|
||||
, hgo (NULL)
|
||||
, lgo (NULL)
|
||||
{
|
||||
Span * bspan = allocate<Span> (nSpans);
|
||||
uint32_t bSpans = unmap (bspan, span, nSpans, bm_state);
|
||||
lgo = bSpans == 0
|
||||
? NULL
|
||||
: new SwitchIf (bspan, bSpans, next);
|
||||
// if there are any low spans, then next state for high spans
|
||||
// must be NULL to trigger explicit goto generation in linear 'if'
|
||||
hgo = hSpans == 0
|
||||
? NULL
|
||||
: new SwitchIf (hspan, hSpans, lgo ? NULL : next);
|
||||
operator delete (bspan);
|
||||
}
|
||||
|
||||
const uint32_t CpgotoTable::TABLE_SIZE = 0x100;
|
||||
|
||||
CpgotoTable::CpgotoTable (const Span * span, uint32_t nSpans)
|
||||
: table (new const State * [TABLE_SIZE])
|
||||
{
|
||||
uint32_t c = 0;
|
||||
for (uint32_t i = 0; i < nSpans; ++i)
|
||||
{
|
||||
for(; c < span[i].ub && c < TABLE_SIZE; ++c)
|
||||
{
|
||||
table[c] = span[i].to;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Cpgoto::Cpgoto (const Span * span, uint32_t nSpans, const Span * hspan, uint32_t hSpans, const State * next)
|
||||
: hgo (hSpans == 0 ? NULL : new SwitchIf (hspan, hSpans, next))
|
||||
, table (new CpgotoTable (span, nSpans))
|
||||
{}
|
||||
|
||||
Dot::Dot (const Span * sp, uint32_t nsp, const State * s)
|
||||
: from (s)
|
||||
, cases (new Cases (sp, nsp))
|
||||
{}
|
||||
|
||||
Go::Go ()
|
||||
: nSpans (0)
|
||||
, span (NULL)
|
||||
, type (EMPTY)
|
||||
, info ()
|
||||
{}
|
||||
|
||||
void Go::init (const State * from)
|
||||
{
|
||||
if (nSpans == 0)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
// initialize high (wide) spans
|
||||
uint32_t hSpans = 0;
|
||||
const Span * hspan = NULL;
|
||||
for (uint32_t i = 0; i < nSpans; ++i)
|
||||
{
|
||||
if (span[i].ub > 0x100)
|
||||
{
|
||||
hspan = &span[i];
|
||||
hSpans = nSpans - i;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// initialize bitmaps
|
||||
uint32_t nBitmaps = 0;
|
||||
const BitMap * bitmap = NULL;
|
||||
const State * bitmap_state = NULL;
|
||||
for (uint32_t i = 0; i < nSpans; ++i)
|
||||
{
|
||||
if (span[i].to->isBase)
|
||||
{
|
||||
const BitMap *b = BitMap::find (span[i].to);
|
||||
if (b && matches(b->go->span, b->go->nSpans, b->on, span, nSpans, span[i].to))
|
||||
{
|
||||
if (bitmap == NULL)
|
||||
{
|
||||
bitmap = b;
|
||||
bitmap_state = span[i].to;
|
||||
}
|
||||
nBitmaps++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const uint32_t dSpans = nSpans - hSpans - nBitmaps;
|
||||
if (opts->target == opt_t::DOT)
|
||||
{
|
||||
type = DOT;
|
||||
info.dot = new Dot (span, nSpans, from);
|
||||
}
|
||||
else if (opts->gFlag && (dSpans >= opts->cGotoThreshold))
|
||||
{
|
||||
type = CPGOTO;
|
||||
info.cpgoto = new Cpgoto (span, nSpans, hspan, hSpans, from->next);
|
||||
}
|
||||
else if (opts->bFlag && (nBitmaps > 0))
|
||||
{
|
||||
type = BITMAP;
|
||||
info.bitmap = new GoBitmap (span, nSpans, hspan, hSpans, bitmap, bitmap_state, from->next);
|
||||
bUsedYYBitmap = true;
|
||||
}
|
||||
else
|
||||
{
|
||||
type = SWITCH_IF;
|
||||
info.switchif = new SwitchIf (span, nSpans, from->next);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Find all spans, that map to the given state. For each of them,
|
||||
* find upper adjacent span, that maps to another state (if such
|
||||
* span exists, otherwize try lower one).
|
||||
* If input contains single span that maps to the given state,
|
||||
* then output contains 0 spans.
|
||||
*/
|
||||
uint32_t unmap (Span * new_span, const Span * old_span, uint32_t old_nspans, const State * x)
|
||||
{
|
||||
uint32_t new_nspans = 0;
|
||||
for (uint32_t i = 0; i < old_nspans; ++i)
|
||||
{
|
||||
if (old_span[i].to != x)
|
||||
{
|
||||
if (new_nspans > 0 && new_span[new_nspans - 1].to == old_span[i].to)
|
||||
new_span[new_nspans - 1].ub = old_span[i].ub;
|
||||
else
|
||||
{
|
||||
new_span[new_nspans].to = old_span[i].to;
|
||||
new_span[new_nspans].ub = old_span[i].ub;
|
||||
++new_nspans;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (new_nspans > 0)
|
||||
new_span[new_nspans - 1].ub = old_span[old_nspans - 1].ub;
|
||||
return new_nspans;
|
||||
}
|
||||
|
||||
} // namespace re2c
|
99
tools/re2c/src/codegen/go_destruct.cc
Normal file
99
tools/re2c/src/codegen/go_destruct.cc
Normal file
|
@ -0,0 +1,99 @@
|
|||
#include "src/util/c99_stdint.h"
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
#include "src/codegen/go.h"
|
||||
|
||||
namespace re2c
|
||||
{
|
||||
|
||||
Cases::~Cases ()
|
||||
{
|
||||
delete [] cases;
|
||||
}
|
||||
|
||||
Binary::~Binary ()
|
||||
{
|
||||
delete cond;
|
||||
delete thn;
|
||||
delete els;
|
||||
}
|
||||
|
||||
Linear::~Linear ()
|
||||
{
|
||||
for (uint32_t i = 0; i < branches.size (); ++i)
|
||||
{
|
||||
delete branches[i].first;
|
||||
}
|
||||
}
|
||||
|
||||
If::~If ()
|
||||
{
|
||||
switch (type)
|
||||
{
|
||||
case BINARY:
|
||||
delete info.binary;
|
||||
break;
|
||||
case LINEAR:
|
||||
delete info.linear;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
SwitchIf::~SwitchIf ()
|
||||
{
|
||||
switch (type)
|
||||
{
|
||||
case SWITCH:
|
||||
delete info.cases;
|
||||
break;
|
||||
case IF:
|
||||
delete info.ifs;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
GoBitmap::~GoBitmap ()
|
||||
{
|
||||
delete hgo;
|
||||
delete lgo;
|
||||
}
|
||||
|
||||
CpgotoTable::~CpgotoTable ()
|
||||
{
|
||||
delete [] table;
|
||||
}
|
||||
|
||||
Cpgoto::~Cpgoto ()
|
||||
{
|
||||
delete hgo;
|
||||
delete table;
|
||||
}
|
||||
|
||||
Dot::~Dot ()
|
||||
{
|
||||
delete cases;
|
||||
}
|
||||
|
||||
Go::~Go ()
|
||||
{
|
||||
switch (type)
|
||||
{
|
||||
case EMPTY:
|
||||
break;
|
||||
case SWITCH_IF:
|
||||
delete info.switchif;
|
||||
break;
|
||||
case BITMAP:
|
||||
delete info.bitmap;
|
||||
break;
|
||||
case CPGOTO:
|
||||
delete info.cpgoto;
|
||||
break;
|
||||
case DOT:
|
||||
delete info.dot;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace re2c
|
271
tools/re2c/src/codegen/go_emit.cc
Normal file
271
tools/re2c/src/codegen/go_emit.cc
Normal file
|
@ -0,0 +1,271 @@
|
|||
#include <stddef.h>
|
||||
#include "src/util/c99_stdint.h"
|
||||
#include <string>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
#include "src/codegen/bitmap.h"
|
||||
#include "src/codegen/go.h"
|
||||
#include "src/codegen/input_api.h"
|
||||
#include "src/codegen/label.h"
|
||||
#include "src/codegen/output.h"
|
||||
#include "src/codegen/print.h"
|
||||
#include "src/conf/opt.h"
|
||||
#include "src/globals.h"
|
||||
#include "src/ir/adfa/adfa.h"
|
||||
#include "src/ir/regexp/encoding/enc.h"
|
||||
|
||||
namespace re2c
|
||||
{
|
||||
|
||||
static void output_if (OutputFile & o, uint32_t ind, bool & readCh, const std::string & compare, uint32_t value);
|
||||
static void output_goto (OutputFile & o, uint32_t ind, bool & readCh, label_t to);
|
||||
static std::string output_yych (bool & readCh);
|
||||
static std::string output_hgo (OutputFile & o, uint32_t ind, bool & readCh, SwitchIf * hgo);
|
||||
|
||||
std::string output_yych (bool & readCh)
|
||||
{
|
||||
if (readCh)
|
||||
{
|
||||
readCh = false;
|
||||
return "(" + opts->input_api.expr_peek_save () + ")";
|
||||
}
|
||||
else
|
||||
{
|
||||
return opts->yych;
|
||||
}
|
||||
}
|
||||
|
||||
void output_if (OutputFile & o, uint32_t ind, bool & readCh, const std::string & compare, uint32_t value)
|
||||
{
|
||||
o.wind(ind).ws("if (").wstring(output_yych (readCh)).ws(" ").wstring(compare).ws(" ").wc_hex (value).ws(") ");
|
||||
}
|
||||
|
||||
void output_goto (OutputFile & o, uint32_t ind, bool & readCh, label_t to)
|
||||
{
|
||||
if (readCh)
|
||||
{
|
||||
o.wstring(opts->input_api.stmt_peek (ind));
|
||||
readCh = false;
|
||||
}
|
||||
o.wind(ind).ws("goto ").wstring(opts->labelPrefix).wlabel(to).ws(";\n");
|
||||
}
|
||||
|
||||
std::string output_hgo (OutputFile & o, uint32_t ind, bool & readCh, SwitchIf * hgo)
|
||||
{
|
||||
std::string yych = output_yych (readCh);
|
||||
if (hgo != NULL)
|
||||
{
|
||||
o.wind(ind).ws("if (").wstring(yych).ws(" & ~0xFF) {\n");
|
||||
hgo->emit (o, ind + 1, readCh);
|
||||
o.wind(ind).ws("} else ");
|
||||
yych = opts->yych;
|
||||
}
|
||||
else
|
||||
{
|
||||
o.wind(ind);
|
||||
}
|
||||
return yych;
|
||||
}
|
||||
|
||||
void Case::emit (OutputFile & o, uint32_t ind)
|
||||
{
|
||||
for (uint32_t i = 0; i < ranges.size (); ++i)
|
||||
{
|
||||
for (uint32_t b = ranges[i].first; b < ranges[i].second; ++b)
|
||||
{
|
||||
o.wind(ind).ws("case ").wc_hex (b).ws(":");
|
||||
if (opts->dFlag && opts->encoding.type () == Enc::EBCDIC)
|
||||
{
|
||||
const uint32_t c = opts->encoding.decodeUnsafe (b);
|
||||
if (is_print (c))
|
||||
o.ws(" /* ").wc(static_cast<char> (c)).ws(" */");
|
||||
}
|
||||
bool last_case = i == ranges.size () - 1 && b == ranges[i].second - 1;
|
||||
if (!last_case)
|
||||
{
|
||||
o.ws("\n");
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void Cases::emit (OutputFile & o, uint32_t ind, bool & readCh)
|
||||
{
|
||||
o.wind(ind).ws("switch (").wstring(output_yych (readCh)).ws(") {\n");
|
||||
for (uint32_t i = 0; i < cases_size; ++i)
|
||||
{
|
||||
if (cases[i].to != def)
|
||||
{
|
||||
cases[i].emit (o, ind);
|
||||
output_goto (o, 1, readCh, cases[i].to->label);
|
||||
}
|
||||
}
|
||||
o.wind(ind).ws("default:");
|
||||
output_goto (o, 1, readCh, def->label);
|
||||
o.wind(ind).ws("}\n");
|
||||
}
|
||||
|
||||
void Binary::emit (OutputFile & o, uint32_t ind, bool & readCh)
|
||||
{
|
||||
output_if (o, ind, readCh, cond->compare, cond->value);
|
||||
o.ws("{\n");
|
||||
thn->emit (o, ind + 1, readCh);
|
||||
o.wind(ind).ws("} else {\n");
|
||||
els->emit (o, ind + 1, readCh);
|
||||
o.wind(ind).ws("}\n");
|
||||
}
|
||||
|
||||
void Linear::emit (OutputFile & o, uint32_t ind, bool & readCh)
|
||||
{
|
||||
for (uint32_t i = 0; i < branches.size (); ++i)
|
||||
{
|
||||
if (branches[i].first != NULL)
|
||||
{
|
||||
output_if (o, ind, readCh, branches[i].first->compare, branches[i].first->value);
|
||||
output_goto (o, 0, readCh, branches[i].second->label);
|
||||
}
|
||||
else
|
||||
{
|
||||
output_goto (o, ind, readCh, branches[i].second->label);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void If::emit (OutputFile & o, uint32_t ind, bool & readCh)
|
||||
{
|
||||
switch (type)
|
||||
{
|
||||
case BINARY:
|
||||
info.binary->emit (o, ind, readCh);
|
||||
break;
|
||||
case LINEAR:
|
||||
info.linear->emit (o, ind, readCh);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
void SwitchIf::emit (OutputFile & o, uint32_t ind, bool & readCh)
|
||||
{
|
||||
switch (type)
|
||||
{
|
||||
case SWITCH:
|
||||
info.cases->emit (o, ind, readCh);
|
||||
break;
|
||||
case IF:
|
||||
info.ifs->emit (o, ind, readCh);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
void GoBitmap::emit (OutputFile & o, uint32_t ind, bool & readCh)
|
||||
{
|
||||
std::string yych = output_hgo (o, ind, readCh, hgo);
|
||||
o.ws("if (").wstring(opts->yybm).ws("[").wu32(bitmap->i).ws("+").wstring(yych).ws("] & ");
|
||||
if (opts->yybmHexTable)
|
||||
{
|
||||
o.wu32_hex(bitmap->m);
|
||||
}
|
||||
else
|
||||
{
|
||||
o.wu32(bitmap->m);
|
||||
}
|
||||
o.ws(") {\n");
|
||||
output_goto (o, ind + 1, readCh, bitmap_state->label);
|
||||
o.wind(ind).ws("}\n");
|
||||
if (lgo != NULL)
|
||||
{
|
||||
lgo->emit (o, ind, readCh);
|
||||
}
|
||||
}
|
||||
|
||||
label_t CpgotoTable::max_label () const
|
||||
{
|
||||
label_t max = label_t::first ();
|
||||
for (uint32_t i = 0; i < TABLE_SIZE; ++i)
|
||||
{
|
||||
if (max < table[i]->label)
|
||||
{
|
||||
max = table[i]->label;
|
||||
};
|
||||
}
|
||||
return max;
|
||||
}
|
||||
|
||||
void CpgotoTable::emit (OutputFile & o, uint32_t ind)
|
||||
{
|
||||
o.wind(ind).ws("static void *").wstring(opts->yytarget).ws("[256] = {\n");
|
||||
o.wind(++ind);
|
||||
const uint32_t max_digits = max_label ().width ();
|
||||
for (uint32_t i = 0; i < TABLE_SIZE; ++i)
|
||||
{
|
||||
o.ws("&&").wstring(opts->labelPrefix).wlabel(table[i]->label);
|
||||
if (i == TABLE_SIZE - 1)
|
||||
{
|
||||
o.ws("\n");
|
||||
}
|
||||
else if (i % 8 == 7)
|
||||
{
|
||||
o.ws(",\n").wind(ind);
|
||||
}
|
||||
else
|
||||
{
|
||||
const uint32_t padding = max_digits - table[i]->label.width () + 1;
|
||||
o.ws(",").wstring(std::string (padding, ' '));
|
||||
}
|
||||
}
|
||||
o.wind(--ind).ws("};\n");
|
||||
}
|
||||
|
||||
void Cpgoto::emit (OutputFile & o, uint32_t ind, bool & readCh)
|
||||
{
|
||||
std::string yych = output_hgo (o, ind, readCh, hgo);
|
||||
o.ws("{\n");
|
||||
table->emit (o, ++ind);
|
||||
o.wind(ind).ws("goto *").wstring(opts->yytarget).ws("[").wstring(yych).ws("];\n");
|
||||
o.wind(--ind).ws("}\n");
|
||||
}
|
||||
|
||||
void Dot::emit (OutputFile & o)
|
||||
{
|
||||
const uint32_t n = cases->cases_size;
|
||||
if (n == 1)
|
||||
{
|
||||
o.wlabel(from->label).ws(" -> ").wlabel(cases->cases[0].to->label).ws("\n");
|
||||
}
|
||||
else
|
||||
{
|
||||
for (uint32_t i = 0; i < n; ++i)
|
||||
{
|
||||
o.wlabel(from->label).ws(" -> ").wlabel(cases->cases[i].to->label).ws(" [label=\"");
|
||||
for (uint32_t j = 0; j < cases->cases[i].ranges.size (); ++j)
|
||||
{
|
||||
o.wrange(cases->cases[i].ranges[j].first, cases->cases[i].ranges[j].second);
|
||||
}
|
||||
o.ws("\"]\n");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void Go::emit (OutputFile & o, uint32_t ind, bool & readCh)
|
||||
{
|
||||
switch (type)
|
||||
{
|
||||
case EMPTY:
|
||||
break;
|
||||
case SWITCH_IF:
|
||||
info.switchif->emit (o, ind, readCh);
|
||||
break;
|
||||
case BITMAP:
|
||||
info.bitmap->emit (o, ind, readCh);
|
||||
break;
|
||||
case CPGOTO:
|
||||
info.cpgoto->emit (o, ind, readCh);
|
||||
break;
|
||||
case DOT:
|
||||
info.dot->emit (o);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace re2c
|
111
tools/re2c/src/codegen/go_used_labels.cc
Normal file
111
tools/re2c/src/codegen/go_used_labels.cc
Normal file
|
@ -0,0 +1,111 @@
|
|||
#include <stddef.h>
|
||||
#include "src/util/c99_stdint.h"
|
||||
#include <set>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
#include "src/codegen/go.h"
|
||||
#include "src/codegen/label.h"
|
||||
#include "src/ir/adfa/adfa.h"
|
||||
|
||||
namespace re2c
|
||||
{
|
||||
|
||||
void Cases::used_labels (std::set<label_t> & used)
|
||||
{
|
||||
for (uint32_t i = 0; i < cases_size; ++i)
|
||||
{
|
||||
used.insert (cases[i].to->label);
|
||||
}
|
||||
}
|
||||
|
||||
void Binary::used_labels (std::set<label_t> & used)
|
||||
{
|
||||
thn->used_labels (used);
|
||||
els->used_labels (used);
|
||||
}
|
||||
|
||||
void Linear::used_labels (std::set<label_t> & used)
|
||||
{
|
||||
for (uint32_t i = 0; i < branches.size (); ++i)
|
||||
{
|
||||
used.insert (branches[i].second->label);
|
||||
}
|
||||
}
|
||||
|
||||
void If::used_labels (std::set<label_t> & used)
|
||||
{
|
||||
switch (type)
|
||||
{
|
||||
case BINARY:
|
||||
info.binary->used_labels (used);
|
||||
break;
|
||||
case LINEAR:
|
||||
info.linear->used_labels (used);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
void SwitchIf::used_labels (std::set<label_t> & used)
|
||||
{
|
||||
switch (type)
|
||||
{
|
||||
case SWITCH:
|
||||
info.cases->used_labels (used);
|
||||
break;
|
||||
case IF:
|
||||
info.ifs->used_labels (used);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
void GoBitmap::used_labels (std::set<label_t> & used)
|
||||
{
|
||||
if (hgo != NULL)
|
||||
{
|
||||
hgo->used_labels (used);
|
||||
}
|
||||
used.insert (bitmap_state->label);
|
||||
if (lgo != NULL)
|
||||
{
|
||||
lgo->used_labels (used);
|
||||
}
|
||||
}
|
||||
|
||||
void CpgotoTable::used_labels (std::set<label_t> & used)
|
||||
{
|
||||
for (uint32_t i = 0; i < TABLE_SIZE; ++i)
|
||||
{
|
||||
used.insert (table[i]->label);
|
||||
}
|
||||
}
|
||||
|
||||
void Cpgoto::used_labels (std::set<label_t> & used)
|
||||
{
|
||||
if (hgo != NULL)
|
||||
{
|
||||
hgo->used_labels (used);
|
||||
}
|
||||
table->used_labels (used);
|
||||
}
|
||||
|
||||
void Go::used_labels (std::set<label_t> & used)
|
||||
{
|
||||
switch (type)
|
||||
{
|
||||
case EMPTY:
|
||||
case DOT:
|
||||
break;
|
||||
case SWITCH_IF:
|
||||
info.switchif->used_labels (used);
|
||||
break;
|
||||
case BITMAP:
|
||||
info.bitmap->used_labels (used);
|
||||
break;
|
||||
case CPGOTO:
|
||||
info.cpgoto->used_labels (used);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace re2c
|
24
tools/re2c/src/codegen/indent.h
Normal file
24
tools/re2c/src/codegen/indent.h
Normal file
|
@ -0,0 +1,24 @@
|
|||
#ifndef _RE2C_CODEGEN_INDENT_
|
||||
#define _RE2C_CODEGEN_INDENT_
|
||||
|
||||
#include <string>
|
||||
|
||||
#include "src/globals.h"
|
||||
|
||||
namespace re2c
|
||||
{
|
||||
|
||||
inline std::string indent (uint32_t ind)
|
||||
{
|
||||
std::string str;
|
||||
|
||||
while (opts->target != opt_t::DOT && ind-- > 0)
|
||||
{
|
||||
str += opts->indString;
|
||||
}
|
||||
return str;
|
||||
}
|
||||
|
||||
} // end namespace re2c
|
||||
|
||||
#endif // _RE2C_CODEGEN_INDENT_
|
175
tools/re2c/src/codegen/input_api.cc
Normal file
175
tools/re2c/src/codegen/input_api.cc
Normal file
|
@ -0,0 +1,175 @@
|
|||
#include <sstream>
|
||||
|
||||
#include "src/codegen/input_api.h"
|
||||
#include "src/codegen/indent.h"
|
||||
#include "src/conf/opt.h"
|
||||
#include "src/globals.h"
|
||||
|
||||
namespace re2c
|
||||
{
|
||||
|
||||
InputAPI::InputAPI ()
|
||||
: type_ (DEFAULT)
|
||||
{}
|
||||
|
||||
InputAPI::type_t InputAPI::type () const
|
||||
{
|
||||
return type_;
|
||||
}
|
||||
|
||||
void InputAPI::set (type_t t)
|
||||
{
|
||||
type_ = t;
|
||||
}
|
||||
|
||||
std::string InputAPI::expr_peek () const
|
||||
{
|
||||
std::string s;
|
||||
switch (type_)
|
||||
{
|
||||
case DEFAULT:
|
||||
s = "*" + opts->yycursor;
|
||||
break;
|
||||
case CUSTOM:
|
||||
s = opts->yypeek + " ()";
|
||||
break;
|
||||
}
|
||||
return s;
|
||||
}
|
||||
|
||||
std::string InputAPI::expr_peek_save () const
|
||||
{
|
||||
return opts->yych + " = " + opts.yychConversion () + expr_peek ();
|
||||
}
|
||||
|
||||
std::string InputAPI::stmt_peek (uint32_t ind) const
|
||||
{
|
||||
return indent (ind) + expr_peek_save () + ";\n";
|
||||
}
|
||||
|
||||
std::string InputAPI::stmt_skip (uint32_t ind) const
|
||||
{
|
||||
std::string s;
|
||||
switch (type_)
|
||||
{
|
||||
case DEFAULT:
|
||||
s = "++" + opts->yycursor;
|
||||
break;
|
||||
case CUSTOM:
|
||||
s = opts->yyskip + " ()";
|
||||
break;
|
||||
}
|
||||
return indent (ind) + s + ";\n";
|
||||
}
|
||||
|
||||
std::string InputAPI::stmt_backup (uint32_t ind) const
|
||||
{
|
||||
std::string s;
|
||||
switch (type_)
|
||||
{
|
||||
case DEFAULT:
|
||||
s = opts->yymarker + " = " + opts->yycursor;
|
||||
break;
|
||||
case CUSTOM:
|
||||
s = opts->yybackup + " ()";
|
||||
break;
|
||||
}
|
||||
return indent (ind) + s + ";\n";
|
||||
}
|
||||
|
||||
std::string InputAPI::stmt_backupctx (uint32_t ind) const
|
||||
{
|
||||
std::string s;
|
||||
switch (type_)
|
||||
{
|
||||
case DEFAULT:
|
||||
s = opts->yyctxmarker + " = " + opts->yycursor;
|
||||
break;
|
||||
case CUSTOM:
|
||||
s = opts->yybackupctx + " ()";
|
||||
break;
|
||||
}
|
||||
return indent (ind) + s + ";\n";
|
||||
}
|
||||
|
||||
std::string InputAPI::stmt_restore (uint32_t ind) const
|
||||
{
|
||||
std::string s;
|
||||
switch (type_)
|
||||
{
|
||||
case DEFAULT:
|
||||
s = opts->yycursor + " = " + opts->yymarker;
|
||||
break;
|
||||
case CUSTOM:
|
||||
s = opts->yyrestore + " ()";
|
||||
break;
|
||||
}
|
||||
return indent (ind) + s + ";\n";
|
||||
}
|
||||
|
||||
std::string InputAPI::stmt_restorectx (uint32_t ind) const
|
||||
{
|
||||
std::string s;
|
||||
switch (type_)
|
||||
{
|
||||
case DEFAULT:
|
||||
s = indent (ind) + opts->yycursor + " = " + opts->yyctxmarker + ";\n";
|
||||
break;
|
||||
case CUSTOM:
|
||||
s = indent (ind) + opts->yyrestorectx + " ();\n";
|
||||
break;
|
||||
}
|
||||
return s;
|
||||
}
|
||||
|
||||
std::string InputAPI::stmt_skip_peek (uint32_t ind) const
|
||||
{
|
||||
return type_ == DEFAULT
|
||||
? indent (ind) + opts->yych + " = " + opts.yychConversion () + "*++" + opts->yycursor + ";\n"
|
||||
: stmt_skip (ind) + stmt_peek (ind);
|
||||
}
|
||||
|
||||
std::string InputAPI::stmt_skip_backup (uint32_t ind) const
|
||||
{
|
||||
return type_ == DEFAULT
|
||||
? indent (ind) + opts->yymarker + " = ++" + opts->yycursor + ";\n"
|
||||
: stmt_skip (ind) + stmt_backup (ind);
|
||||
}
|
||||
|
||||
std::string InputAPI::stmt_backup_peek (uint32_t ind) const
|
||||
{
|
||||
return type_ == DEFAULT
|
||||
? indent (ind) + opts->yych + " = " + opts.yychConversion () + "*(" + opts->yymarker + " = " + opts->yycursor + ");\n"
|
||||
: stmt_backup (ind) + stmt_peek (ind);
|
||||
}
|
||||
|
||||
std::string InputAPI::stmt_skip_backup_peek (uint32_t ind) const
|
||||
{
|
||||
return type_ == DEFAULT
|
||||
? indent (ind) + opts->yych + " = " + opts.yychConversion () + "*(" + opts->yymarker + " = ++" + opts->yycursor + ");\n"
|
||||
: stmt_skip (ind) + stmt_backup (ind) + stmt_peek (ind);
|
||||
}
|
||||
|
||||
std::string InputAPI::expr_lessthan_one () const
|
||||
{
|
||||
return type_ == DEFAULT
|
||||
? opts->yylimit + " <= " + opts->yycursor
|
||||
: expr_lessthan (1);
|
||||
}
|
||||
|
||||
std::string InputAPI::expr_lessthan (size_t n) const
|
||||
{
|
||||
std::ostringstream s;
|
||||
switch (type_)
|
||||
{
|
||||
case DEFAULT:
|
||||
s << "(" << opts->yylimit << " - " << opts->yycursor << ") < " << n;
|
||||
break;
|
||||
case CUSTOM:
|
||||
s << opts->yylessthan << " (" << n << ")";
|
||||
break;
|
||||
}
|
||||
return s.str ();
|
||||
}
|
||||
|
||||
} // end namespace re2c
|
43
tools/re2c/src/codegen/input_api.h
Normal file
43
tools/re2c/src/codegen/input_api.h
Normal file
|
@ -0,0 +1,43 @@
|
|||
#ifndef _RE2C_CODEGEN_INPUT_API_
|
||||
#define _RE2C_CODEGEN_INPUT_API_
|
||||
|
||||
#include "src/util/c99_stdint.h"
|
||||
#include <string>
|
||||
|
||||
namespace re2c
|
||||
{
|
||||
|
||||
class InputAPI
|
||||
{
|
||||
public:
|
||||
enum type_t
|
||||
{ DEFAULT
|
||||
, CUSTOM
|
||||
};
|
||||
|
||||
private:
|
||||
type_t type_;
|
||||
|
||||
public:
|
||||
InputAPI ();
|
||||
type_t type () const;
|
||||
void set (type_t t);
|
||||
std::string expr_peek () const;
|
||||
std::string expr_peek_save () const;
|
||||
std::string stmt_peek (uint32_t ind) const;
|
||||
std::string stmt_skip (uint32_t ind) const;
|
||||
std::string stmt_backup (uint32_t ind) const;
|
||||
std::string stmt_backupctx (uint32_t ind) const;
|
||||
std::string stmt_restore (uint32_t ind) const;
|
||||
std::string stmt_restorectx (uint32_t ind) const;
|
||||
std::string stmt_skip_peek (uint32_t ind) const;
|
||||
std::string stmt_skip_backup (uint32_t ind) const;
|
||||
std::string stmt_backup_peek (uint32_t ind) const;
|
||||
std::string stmt_skip_backup_peek (uint32_t ind) const;
|
||||
std::string expr_lessthan_one () const;
|
||||
std::string expr_lessthan (size_t n) const;
|
||||
};
|
||||
|
||||
} // end namespace re2c
|
||||
|
||||
#endif // _RE2C_CODEGEN_INPUT_API_
|
42
tools/re2c/src/codegen/label.cc
Normal file
42
tools/re2c/src/codegen/label.cc
Normal file
|
@ -0,0 +1,42 @@
|
|||
#include <ostream>
|
||||
|
||||
#include "src/codegen/label.h"
|
||||
|
||||
namespace re2c {
|
||||
|
||||
const uint32_t label_t::FIRST = 0;
|
||||
|
||||
label_t::label_t ()
|
||||
: value (FIRST)
|
||||
{}
|
||||
|
||||
void label_t::inc ()
|
||||
{
|
||||
++value;
|
||||
}
|
||||
|
||||
label_t label_t::first ()
|
||||
{
|
||||
return label_t ();
|
||||
}
|
||||
|
||||
bool label_t::operator < (const label_t & l) const
|
||||
{
|
||||
return value < l.value;
|
||||
}
|
||||
|
||||
uint32_t label_t::width () const
|
||||
{
|
||||
uint32_t v = value;
|
||||
uint32_t n = 0;
|
||||
while (v /= 10) ++n;
|
||||
return n;
|
||||
}
|
||||
|
||||
std::ostream & operator << (std::ostream & o, label_t l)
|
||||
{
|
||||
o << l.value;
|
||||
return o;
|
||||
}
|
||||
|
||||
} // namespace re2c
|
39
tools/re2c/src/codegen/label.h
Normal file
39
tools/re2c/src/codegen/label.h
Normal file
|
@ -0,0 +1,39 @@
|
|||
#ifndef _RE2C_CODEGEN_LABEL_
|
||||
#define _RE2C_CODEGEN_LABEL_
|
||||
|
||||
#include <iosfwd> // ostream
|
||||
|
||||
#include "src/util/c99_stdint.h"
|
||||
|
||||
namespace re2c {
|
||||
|
||||
template <typename num_t> class counter_t;
|
||||
|
||||
// label public API:
|
||||
// - get first label
|
||||
// - compare labels
|
||||
// - get label width
|
||||
// - output label to std::ostream
|
||||
//
|
||||
// label private API (for label counter):
|
||||
// - get initial label
|
||||
// - get next label
|
||||
class label_t
|
||||
{
|
||||
static const uint32_t FIRST;
|
||||
uint32_t value;
|
||||
label_t ();
|
||||
void inc ();
|
||||
|
||||
public:
|
||||
static label_t first ();
|
||||
bool operator < (const label_t & l) const;
|
||||
uint32_t width () const;
|
||||
friend std::ostream & operator << (std::ostream & o, label_t l);
|
||||
|
||||
friend class counter_t<label_t>;
|
||||
};
|
||||
|
||||
} // namespace re2c
|
||||
|
||||
#endif // _RE2C_CODEGEN_LABEL_
|
465
tools/re2c/src/codegen/output.cc
Normal file
465
tools/re2c/src/codegen/output.cc
Normal file
|
@ -0,0 +1,465 @@
|
|||
#include <stdio.h>
|
||||
#include <time.h>
|
||||
#include <iomanip>
|
||||
|
||||
#include "src/codegen/indent.h"
|
||||
#include "src/codegen/output.h"
|
||||
#include "src/codegen/print.h"
|
||||
#include "src/conf/opt.h"
|
||||
#include "src/conf/warn.h"
|
||||
#include "src/globals.h"
|
||||
#include "src/ir/rule_rank.h"
|
||||
|
||||
namespace re2c
|
||||
{
|
||||
|
||||
OutputFragment::OutputFragment (type_t t, uint32_t i)
|
||||
: type (t)
|
||||
, stream ()
|
||||
, indent (i)
|
||||
{}
|
||||
|
||||
uint32_t OutputFragment::count_lines ()
|
||||
{
|
||||
uint32_t lines = 0;
|
||||
const std::string content = stream.str ();
|
||||
const char * p = content.c_str ();
|
||||
for (uint32_t i = 0; i < content.size (); ++i)
|
||||
{
|
||||
if (p[i] == '\n')
|
||||
{
|
||||
++lines;
|
||||
}
|
||||
}
|
||||
return lines;
|
||||
}
|
||||
|
||||
OutputBlock::OutputBlock ()
|
||||
: fragments ()
|
||||
, used_yyaccept (false)
|
||||
, force_start_label (false)
|
||||
, user_start_label ()
|
||||
, line (0)
|
||||
{
|
||||
fragments.push_back (new OutputFragment (OutputFragment::CODE, 0));
|
||||
}
|
||||
|
||||
OutputBlock::~OutputBlock ()
|
||||
{
|
||||
for (unsigned int i = 0; i < fragments.size (); ++i)
|
||||
{
|
||||
delete fragments[i];
|
||||
}
|
||||
}
|
||||
|
||||
OutputFile::OutputFile (const char * fn)
|
||||
: file_name (fn)
|
||||
, file (NULL)
|
||||
, blocks ()
|
||||
, label_counter ()
|
||||
, warn_condition_order (!opts->tFlag) // see note [condition order]
|
||||
{
|
||||
new_block ();
|
||||
}
|
||||
|
||||
bool OutputFile::open ()
|
||||
{
|
||||
if (file_name == NULL)
|
||||
{
|
||||
file_name = "<stdout>";
|
||||
file = stdout;
|
||||
}
|
||||
else
|
||||
{
|
||||
file = fopen (file_name, "wb");
|
||||
}
|
||||
return file != NULL;
|
||||
}
|
||||
|
||||
OutputFile::~OutputFile ()
|
||||
{
|
||||
if (file != NULL && file != stdout)
|
||||
{
|
||||
fclose (file);
|
||||
}
|
||||
for (unsigned int i = 0; i < blocks.size (); ++i)
|
||||
{
|
||||
delete blocks[i];
|
||||
}
|
||||
}
|
||||
|
||||
std::ostream & OutputFile::stream ()
|
||||
{
|
||||
return blocks.back ()->fragments.back ()->stream;
|
||||
}
|
||||
|
||||
OutputFile & OutputFile::wraw (const char * s, size_t n)
|
||||
{
|
||||
stream ().write (s, static_cast<std::streamsize> (n));
|
||||
return *this;
|
||||
}
|
||||
|
||||
OutputFile & OutputFile::wu32_hex (uint32_t n)
|
||||
{
|
||||
prtHex (stream (), n);
|
||||
return *this;
|
||||
}
|
||||
|
||||
OutputFile & OutputFile::wc_hex (uint32_t n)
|
||||
{
|
||||
prtChOrHex (stream (), n);
|
||||
return *this;
|
||||
}
|
||||
|
||||
OutputFile & OutputFile::wrange (uint32_t l, uint32_t u)
|
||||
{
|
||||
printSpan (stream (), l, u);
|
||||
return *this;
|
||||
}
|
||||
|
||||
OutputFile & OutputFile::wu32_width (uint32_t n, int w)
|
||||
{
|
||||
stream () << std::setw (w);
|
||||
stream () << n;
|
||||
return *this;
|
||||
}
|
||||
|
||||
OutputFile & OutputFile::wline_info (uint32_t l, const char * fn)
|
||||
{
|
||||
output_line_info (stream (), l, fn);
|
||||
return *this;
|
||||
}
|
||||
|
||||
OutputFile & OutputFile::wversion_time ()
|
||||
{
|
||||
output_version_time (stream ());
|
||||
return *this;
|
||||
}
|
||||
|
||||
OutputFile & OutputFile::wuser_start_label ()
|
||||
{
|
||||
const std::string label = blocks.back ()->user_start_label;
|
||||
if (!label.empty ())
|
||||
{
|
||||
wstring(label).ws(":\n");
|
||||
}
|
||||
return *this;
|
||||
}
|
||||
|
||||
OutputFile & OutputFile::wc (char c)
|
||||
{
|
||||
stream () << c;
|
||||
return *this;
|
||||
}
|
||||
|
||||
OutputFile & OutputFile::wu32 (uint32_t n)
|
||||
{
|
||||
stream () << n;
|
||||
return *this;
|
||||
}
|
||||
|
||||
OutputFile & OutputFile::wu64 (uint64_t n)
|
||||
{
|
||||
stream () << n;
|
||||
return *this;
|
||||
}
|
||||
|
||||
OutputFile & OutputFile::wstring (const std::string & s)
|
||||
{
|
||||
stream () << s;
|
||||
return *this;
|
||||
}
|
||||
|
||||
OutputFile & OutputFile::ws (const char * s)
|
||||
{
|
||||
stream () << s;
|
||||
return *this;
|
||||
}
|
||||
|
||||
OutputFile & OutputFile::wlabel (label_t l)
|
||||
{
|
||||
stream () << l;
|
||||
return *this;
|
||||
}
|
||||
|
||||
OutputFile & OutputFile::wrank (rule_rank_t r)
|
||||
{
|
||||
stream () << r;
|
||||
return *this;
|
||||
}
|
||||
|
||||
OutputFile & OutputFile::wind (uint32_t ind)
|
||||
{
|
||||
stream () << indent(ind);
|
||||
return *this;
|
||||
}
|
||||
|
||||
void OutputFile::insert_code ()
|
||||
{
|
||||
blocks.back ()->fragments.push_back (new OutputFragment (OutputFragment::CODE, 0));
|
||||
}
|
||||
|
||||
OutputFile & OutputFile::wdelay_line_info ()
|
||||
{
|
||||
blocks.back ()->fragments.push_back (new OutputFragment (OutputFragment::LINE_INFO, 0));
|
||||
insert_code ();
|
||||
return *this;
|
||||
}
|
||||
|
||||
OutputFile & OutputFile::wdelay_state_goto (uint32_t ind)
|
||||
{
|
||||
if (opts->fFlag && !bWroteGetState)
|
||||
{
|
||||
blocks.back ()->fragments.push_back (new OutputFragment (OutputFragment::STATE_GOTO, ind));
|
||||
insert_code ();
|
||||
bWroteGetState = true;
|
||||
}
|
||||
return *this;
|
||||
}
|
||||
|
||||
OutputFile & OutputFile::wdelay_types ()
|
||||
{
|
||||
warn_condition_order = false; // see note [condition order]
|
||||
blocks.back ()->fragments.push_back (new OutputFragment (OutputFragment::TYPES, 0));
|
||||
insert_code ();
|
||||
return *this;
|
||||
}
|
||||
|
||||
OutputFile & OutputFile::wdelay_warn_condition_order ()
|
||||
{
|
||||
blocks.back ()->fragments.push_back (new OutputFragment (OutputFragment::WARN_CONDITION_ORDER, 0));
|
||||
insert_code ();
|
||||
return *this;
|
||||
}
|
||||
|
||||
OutputFile & OutputFile::wdelay_yyaccept_init (uint32_t ind)
|
||||
{
|
||||
blocks.back ()->fragments.push_back (new OutputFragment (OutputFragment::YYACCEPT_INIT, ind));
|
||||
insert_code ();
|
||||
return *this;
|
||||
}
|
||||
|
||||
OutputFile & OutputFile::wdelay_yymaxfill ()
|
||||
{
|
||||
blocks.back ()->fragments.push_back (new OutputFragment (OutputFragment::YYMAXFILL, 0));
|
||||
insert_code ();
|
||||
return *this;
|
||||
}
|
||||
|
||||
void OutputFile::set_used_yyaccept ()
|
||||
{
|
||||
blocks.back ()->used_yyaccept = true;
|
||||
}
|
||||
|
||||
bool OutputFile::get_used_yyaccept () const
|
||||
{
|
||||
return blocks.back ()->used_yyaccept;
|
||||
}
|
||||
|
||||
void OutputFile::set_force_start_label (bool force)
|
||||
{
|
||||
blocks.back ()->force_start_label = force;
|
||||
}
|
||||
|
||||
void OutputFile::set_user_start_label (const std::string & label)
|
||||
{
|
||||
blocks.back ()->user_start_label = label;
|
||||
}
|
||||
|
||||
bool OutputFile::get_force_start_label () const
|
||||
{
|
||||
return blocks.back ()->force_start_label;
|
||||
}
|
||||
|
||||
void OutputFile::set_block_line (uint32_t l)
|
||||
{
|
||||
blocks.back ()->line = l;
|
||||
}
|
||||
|
||||
uint32_t OutputFile::get_block_line () const
|
||||
{
|
||||
return blocks.back ()->line;
|
||||
}
|
||||
|
||||
void OutputFile::new_block ()
|
||||
{
|
||||
blocks.push_back (new OutputBlock ());
|
||||
insert_code ();
|
||||
}
|
||||
|
||||
void OutputFile::emit
|
||||
( const std::vector<std::string> & types
|
||||
, size_t max_fill
|
||||
)
|
||||
{
|
||||
if (file != NULL)
|
||||
{
|
||||
unsigned int line_count = 1;
|
||||
for (unsigned int j = 0; j < blocks.size (); ++j)
|
||||
{
|
||||
OutputBlock & b = * blocks[j];
|
||||
for (unsigned int i = 0; i < b.fragments.size (); ++i)
|
||||
{
|
||||
OutputFragment & f = * b.fragments[i];
|
||||
switch (f.type)
|
||||
{
|
||||
case OutputFragment::CODE:
|
||||
break;
|
||||
case OutputFragment::LINE_INFO:
|
||||
output_line_info (f.stream, line_count + 1, file_name);
|
||||
break;
|
||||
case OutputFragment::STATE_GOTO:
|
||||
output_state_goto (f.stream, f.indent, 0);
|
||||
break;
|
||||
case OutputFragment::TYPES:
|
||||
output_types (f.stream, f.indent, types);
|
||||
break;
|
||||
case OutputFragment::WARN_CONDITION_ORDER:
|
||||
if (warn_condition_order) // see note [condition order]
|
||||
{
|
||||
warn.condition_order (b.line);
|
||||
}
|
||||
break;
|
||||
case OutputFragment::YYACCEPT_INIT:
|
||||
output_yyaccept_init (f.stream, f.indent, b.used_yyaccept);
|
||||
break;
|
||||
case OutputFragment::YYMAXFILL:
|
||||
output_yymaxfill (f.stream, max_fill);
|
||||
break;
|
||||
}
|
||||
std::string content = f.stream.str ();
|
||||
fwrite (content.c_str (), 1, content.size (), file);
|
||||
line_count += f.count_lines ();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
HeaderFile::HeaderFile (const char * fn)
|
||||
: stream ()
|
||||
// header is always generated, but not always dumped to file
|
||||
// NULL filename crashes 'operator <<' on some platforms
|
||||
// TODO: generate header only if necessary
|
||||
, file_name (fn ? fn : "<stdout>.h")
|
||||
, file (NULL)
|
||||
{}
|
||||
|
||||
bool HeaderFile::open ()
|
||||
{
|
||||
file = fopen (file_name, "wb");
|
||||
return file != NULL;
|
||||
}
|
||||
|
||||
void HeaderFile::emit (const std::vector<std::string> & types)
|
||||
{
|
||||
output_version_time (stream);
|
||||
output_line_info (stream, 3, file_name);
|
||||
stream << "\n";
|
||||
output_types (stream, 0, types);
|
||||
}
|
||||
|
||||
HeaderFile::~HeaderFile ()
|
||||
{
|
||||
if (file != NULL)
|
||||
{
|
||||
std::string content = stream.str ();
|
||||
fwrite (content.c_str (), 1, content.size (), file);
|
||||
fclose (file);
|
||||
}
|
||||
}
|
||||
|
||||
Output::Output (const char * source_name, const char * header_name)
|
||||
: source (source_name)
|
||||
, header (header_name)
|
||||
, types ()
|
||||
, skeletons ()
|
||||
, max_fill (1)
|
||||
{}
|
||||
|
||||
Output::~Output ()
|
||||
{
|
||||
if (!warn.error ())
|
||||
{
|
||||
source.emit (types, max_fill);
|
||||
header.emit (types);
|
||||
}
|
||||
}
|
||||
|
||||
void output_state_goto (std::ostream & o, uint32_t ind, uint32_t start_label)
|
||||
{
|
||||
o << indent(ind) << "switch (" << output_get_state() << ") {\n";
|
||||
if (opts->bUseStateAbort)
|
||||
{
|
||||
o << indent(ind) << "default: abort();\n";
|
||||
o << indent(ind) << "case -1: goto " << opts->labelPrefix << start_label << ";\n";
|
||||
}
|
||||
else
|
||||
{
|
||||
o << indent(ind) << "default: goto " << opts->labelPrefix << start_label << ";\n";
|
||||
}
|
||||
for (uint32_t i = 0; i < last_fill_index; ++i)
|
||||
{
|
||||
o << indent(ind) << "case " << i << ": goto " << opts->yyfilllabel << i << ";\n";
|
||||
}
|
||||
o << indent(ind) << "}\n";
|
||||
if (opts->bUseStateNext)
|
||||
{
|
||||
o << opts->yynext << ":\n";
|
||||
}
|
||||
}
|
||||
|
||||
void output_yyaccept_init (std::ostream & o, uint32_t ind, bool used_yyaccept)
|
||||
{
|
||||
if (used_yyaccept)
|
||||
{
|
||||
o << indent (ind) << "unsigned int " << opts->yyaccept << " = 0;\n";
|
||||
}
|
||||
}
|
||||
|
||||
void output_yymaxfill (std::ostream & o, size_t max_fill)
|
||||
{
|
||||
o << "#define YYMAXFILL " << max_fill << "\n";
|
||||
}
|
||||
|
||||
void output_line_info (std::ostream & o, uint32_t line_number, const char * file_name)
|
||||
{
|
||||
if (!opts->iFlag)
|
||||
{
|
||||
o << "#line " << line_number << " \"" << file_name << "\"\n";
|
||||
}
|
||||
}
|
||||
|
||||
void output_types (std::ostream & o, uint32_t ind, const std::vector<std::string> & types)
|
||||
{
|
||||
o << indent (ind++) << "enum " << opts->yycondtype << " {\n";
|
||||
for (unsigned int i = 0; i < types.size (); ++i)
|
||||
{
|
||||
o << indent (ind) << opts->condEnumPrefix << types[i] << ",\n";
|
||||
}
|
||||
o << indent (--ind) << "};\n";
|
||||
}
|
||||
|
||||
void output_version_time (std::ostream & o)
|
||||
{
|
||||
o << "/* Generated by re2c";
|
||||
if (opts->version)
|
||||
{
|
||||
o << " " << PACKAGE_VERSION;
|
||||
}
|
||||
if (!opts->bNoGenerationDate)
|
||||
{
|
||||
o << " on ";
|
||||
time_t now = time (NULL);
|
||||
o.write (ctime (&now), 24);
|
||||
}
|
||||
o << " */" << "\n";
|
||||
}
|
||||
|
||||
std::string output_get_state ()
|
||||
{
|
||||
return opts->state_get_naked
|
||||
? opts->state_get
|
||||
: opts->state_get + "()";
|
||||
}
|
||||
|
||||
} // namespace re2c
|
158
tools/re2c/src/codegen/output.h
Normal file
158
tools/re2c/src/codegen/output.h
Normal file
|
@ -0,0 +1,158 @@
|
|||
#ifndef _RE2C_CODEGEN_OUTPUT_
|
||||
#define _RE2C_CODEGEN_OUTPUT_
|
||||
|
||||
#include "src/util/c99_stdint.h"
|
||||
#include <stddef.h>
|
||||
#include <stdio.h>
|
||||
#include <fstream>
|
||||
#include <set>
|
||||
#include <sstream>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include "src/codegen/label.h"
|
||||
#include "src/util/counter.h"
|
||||
#include "src/util/forbid_copy.h"
|
||||
|
||||
namespace re2c
|
||||
{
|
||||
|
||||
class rule_rank_t;
|
||||
|
||||
struct OutputFragment
|
||||
{
|
||||
enum type_t
|
||||
{ CODE
|
||||
// , CONFIG
|
||||
, LINE_INFO
|
||||
, STATE_GOTO
|
||||
, TYPES
|
||||
, WARN_CONDITION_ORDER
|
||||
, YYACCEPT_INIT
|
||||
, YYMAXFILL
|
||||
};
|
||||
|
||||
type_t type;
|
||||
std::ostringstream stream;
|
||||
uint32_t indent;
|
||||
|
||||
OutputFragment (type_t t, uint32_t i);
|
||||
uint32_t count_lines ();
|
||||
};
|
||||
|
||||
struct OutputBlock
|
||||
{
|
||||
std::vector<OutputFragment *> fragments;
|
||||
bool used_yyaccept;
|
||||
bool force_start_label;
|
||||
std::string user_start_label;
|
||||
uint32_t line;
|
||||
|
||||
OutputBlock ();
|
||||
~OutputBlock ();
|
||||
};
|
||||
|
||||
struct OutputFile
|
||||
{
|
||||
public:
|
||||
const char * file_name;
|
||||
|
||||
private:
|
||||
FILE * file;
|
||||
std::vector<OutputBlock *> blocks;
|
||||
|
||||
public:
|
||||
counter_t<label_t> label_counter;
|
||||
bool warn_condition_order;
|
||||
|
||||
private:
|
||||
std::ostream & stream ();
|
||||
void insert_code ();
|
||||
|
||||
public:
|
||||
OutputFile (const char * fn);
|
||||
~OutputFile ();
|
||||
|
||||
bool open ();
|
||||
|
||||
void new_block ();
|
||||
|
||||
// immediate output
|
||||
OutputFile & wraw (const char * s, size_t n);
|
||||
OutputFile & wc (char c);
|
||||
OutputFile & wc_hex (uint32_t n);
|
||||
OutputFile & wu32 (uint32_t n);
|
||||
OutputFile & wu32_hex (uint32_t n);
|
||||
OutputFile & wu32_width (uint32_t n, int w);
|
||||
OutputFile & wu64 (uint64_t n);
|
||||
OutputFile & wstring (const std::string & s);
|
||||
OutputFile & ws (const char * s);
|
||||
OutputFile & wlabel (label_t l);
|
||||
OutputFile & wrank (rule_rank_t l);
|
||||
OutputFile & wrange (uint32_t u, uint32_t l);
|
||||
OutputFile & wline_info (uint32_t l, const char * fn);
|
||||
OutputFile & wversion_time ();
|
||||
OutputFile & wuser_start_label ();
|
||||
OutputFile & wind (uint32_t ind);
|
||||
|
||||
// delayed output
|
||||
OutputFile & wdelay_line_info ();
|
||||
OutputFile & wdelay_state_goto (uint32_t ind);
|
||||
OutputFile & wdelay_types ();
|
||||
OutputFile & wdelay_warn_condition_order ();
|
||||
OutputFile & wdelay_yyaccept_init (uint32_t ind);
|
||||
OutputFile & wdelay_yymaxfill ();
|
||||
|
||||
void set_used_yyaccept ();
|
||||
bool get_used_yyaccept () const;
|
||||
void set_force_start_label (bool force);
|
||||
void set_user_start_label (const std::string & label);
|
||||
bool get_force_start_label () const;
|
||||
void set_block_line (uint32_t l);
|
||||
uint32_t get_block_line () const;
|
||||
|
||||
void emit (const std::vector<std::string> & types, size_t max_fill);
|
||||
|
||||
FORBID_COPY (OutputFile);
|
||||
};
|
||||
|
||||
struct HeaderFile
|
||||
{
|
||||
HeaderFile (const char * fn);
|
||||
~HeaderFile ();
|
||||
bool open ();
|
||||
void emit (const std::vector<std::string> & types);
|
||||
|
||||
private:
|
||||
std::ostringstream stream;
|
||||
const char * file_name;
|
||||
FILE * file;
|
||||
|
||||
FORBID_COPY (HeaderFile);
|
||||
};
|
||||
|
||||
struct Output
|
||||
{
|
||||
OutputFile source;
|
||||
HeaderFile header;
|
||||
std::vector<std::string> types;
|
||||
std::set<std::string> skeletons;
|
||||
size_t max_fill;
|
||||
|
||||
Output (const char * source_name, const char * header_name);
|
||||
~Output ();
|
||||
};
|
||||
|
||||
void output_line_info (std::ostream &, uint32_t, const char *);
|
||||
void output_state_goto (std::ostream &, uint32_t, uint32_t);
|
||||
void output_types (std::ostream &, uint32_t, const std::vector<std::string> &);
|
||||
void output_version_time (std::ostream &);
|
||||
void output_yyaccept_init (std::ostream &, uint32_t, bool);
|
||||
void output_yymaxfill (std::ostream &, size_t);
|
||||
|
||||
// helpers
|
||||
std::string output_get_state ();
|
||||
|
||||
} // namespace re2c
|
||||
|
||||
#endif // _RE2C_CODEGEN_OUTPUT_
|
156
tools/re2c/src/codegen/print.cc
Normal file
156
tools/re2c/src/codegen/print.cc
Normal file
|
@ -0,0 +1,156 @@
|
|||
#include <iostream>
|
||||
|
||||
#include "src/codegen/print.h"
|
||||
#include "src/conf/opt.h"
|
||||
#include "src/globals.h"
|
||||
#include "src/ir/regexp/encoding/enc.h"
|
||||
|
||||
namespace re2c
|
||||
{
|
||||
|
||||
bool is_print (uint32_t c)
|
||||
{
|
||||
return c >= 0x20 && c < 0x7F;
|
||||
}
|
||||
|
||||
bool is_space (uint32_t c)
|
||||
{
|
||||
switch (c)
|
||||
{
|
||||
case '\t':
|
||||
case '\f':
|
||||
case '\v':
|
||||
case '\n':
|
||||
case '\r':
|
||||
case ' ':
|
||||
return true;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
char hexCh(uint32_t c)
|
||||
{
|
||||
static const char * sHex = "0123456789ABCDEF";
|
||||
return sHex[c & 0x0F];
|
||||
}
|
||||
|
||||
void prtChOrHex(std::ostream& o, uint32_t c)
|
||||
{
|
||||
if (opts->encoding.type () != Enc::EBCDIC
|
||||
&& (is_print (c) || is_space (c)))
|
||||
{
|
||||
o << '\'';
|
||||
prtCh(o, c);
|
||||
o << '\'';
|
||||
}
|
||||
else
|
||||
{
|
||||
prtHex(o, c);
|
||||
}
|
||||
}
|
||||
|
||||
void prtHex(std::ostream& o, uint32_t c)
|
||||
{
|
||||
o << "0x";
|
||||
const uint32_t cunit_size = opts->encoding.szCodeUnit ();
|
||||
if (cunit_size >= 4)
|
||||
{
|
||||
o << hexCh (c >> 28u)
|
||||
<< hexCh (c >> 24u)
|
||||
<< hexCh (c >> 20u)
|
||||
<< hexCh (c >> 16u);
|
||||
}
|
||||
if (cunit_size >= 2)
|
||||
{
|
||||
o << hexCh (c >> 12u)
|
||||
<< hexCh (c >> 8u);
|
||||
}
|
||||
o << hexCh (c >> 4u)
|
||||
<< hexCh (c);
|
||||
}
|
||||
|
||||
void prtCh(std::ostream& o, uint32_t c)
|
||||
{
|
||||
const bool dot = opts->target == opt_t::DOT;
|
||||
|
||||
switch (c)
|
||||
{
|
||||
case '\'':
|
||||
o << (dot ? "'" : "\\'");
|
||||
break;
|
||||
|
||||
case '"':
|
||||
o << (dot ? "\\\"" : "\"");
|
||||
break;
|
||||
|
||||
case '\n':
|
||||
o << (dot ? "\\\\n" : "\\n");
|
||||
break;
|
||||
|
||||
case '\t':
|
||||
o << (dot ? "\\\\t" : "\\t");
|
||||
break;
|
||||
|
||||
case '\v':
|
||||
o << (dot ? "\\\\v" : "\\v");
|
||||
break;
|
||||
|
||||
case '\b':
|
||||
o << (dot ? "\\\\b" : "\\b");
|
||||
break;
|
||||
|
||||
case '\r':
|
||||
o << (dot ? "\\\\r" : "\\r");
|
||||
break;
|
||||
|
||||
case '\f':
|
||||
o << (dot ? "\\\\f" : "\\f");
|
||||
break;
|
||||
|
||||
case '\a':
|
||||
o << (dot ? "\\\\a" :"\\a");
|
||||
break;
|
||||
|
||||
case '\\':
|
||||
o << "\\\\"; // both .dot and C/C++ code expect "\\"
|
||||
break;
|
||||
|
||||
default:
|
||||
o << static_cast<char> (c);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
void prtChOrHexForSpan(std::ostream& o, uint32_t c)
|
||||
{
|
||||
if (opts->encoding.type () != Enc::EBCDIC
|
||||
&& is_print (c)
|
||||
&& (c != ']'))
|
||||
{
|
||||
prtCh(o, c);
|
||||
}
|
||||
else
|
||||
{
|
||||
prtHex(o, c);
|
||||
}
|
||||
}
|
||||
|
||||
void printSpan(std::ostream& o, uint32_t lb, uint32_t ub)
|
||||
{
|
||||
o << "[";
|
||||
if ((ub - lb) == 1)
|
||||
{
|
||||
prtChOrHexForSpan(o, lb);
|
||||
}
|
||||
else
|
||||
{
|
||||
prtChOrHexForSpan(o, lb);
|
||||
o << "-";
|
||||
prtChOrHexForSpan(o, ub - 1);
|
||||
}
|
||||
o << "]";
|
||||
}
|
||||
|
||||
} // end namespace re2c
|
||||
|
20
tools/re2c/src/codegen/print.h
Normal file
20
tools/re2c/src/codegen/print.h
Normal file
|
@ -0,0 +1,20 @@
|
|||
#ifndef _RE2C_CODEGEN_PRINT_
|
||||
#define _RE2C_CODEGEN_PRINT_
|
||||
|
||||
#include "src/util/c99_stdint.h"
|
||||
#include <iosfwd>
|
||||
|
||||
namespace re2c
|
||||
{
|
||||
|
||||
bool is_print (uint32_t c);
|
||||
bool is_space (uint32_t c);
|
||||
char hexCh(uint32_t c);
|
||||
void prtCh(std::ostream&, uint32_t);
|
||||
void prtHex(std::ostream&, uint32_t);
|
||||
void prtChOrHex(std::ostream&, uint32_t);
|
||||
void printSpan(std::ostream&, uint32_t, uint32_t);
|
||||
|
||||
} // end namespace re2c
|
||||
|
||||
#endif // _RE2C_CODEGEN_PRINT_
|
258
tools/re2c/src/conf/msg.cc
Normal file
258
tools/re2c/src/conf/msg.cc
Normal file
|
@ -0,0 +1,258 @@
|
|||
#include <stdarg.h>
|
||||
#include <stdio.h>
|
||||
#include <string>
|
||||
|
||||
#if defined(_MSC_VER) && _MSC_VER < 1500
|
||||
#include "config.msc.h"
|
||||
#else
|
||||
#include "config.h"
|
||||
#endif
|
||||
#include "src/conf/msg.h"
|
||||
|
||||
namespace re2c {
|
||||
|
||||
void error (const char * fmt, ...)
|
||||
{
|
||||
fprintf (stderr, "re2c: error: ");
|
||||
|
||||
va_list args;
|
||||
va_start (args, fmt);
|
||||
vfprintf (stderr, fmt, args);
|
||||
va_end (args);
|
||||
|
||||
fprintf (stderr, "\n");
|
||||
}
|
||||
|
||||
void error_encoding ()
|
||||
{
|
||||
error ("only one of switches -e, -w, -x, -u and -8 must be set");
|
||||
}
|
||||
|
||||
void error_arg (const char * option)
|
||||
{
|
||||
error ("expected argument to option %s", option);
|
||||
}
|
||||
|
||||
void warning_start (uint32_t line, bool error)
|
||||
{
|
||||
static const char * msg = error ? "error" : "warning";
|
||||
fprintf (stderr, "re2c: %s: line %u: ", msg, line);
|
||||
}
|
||||
|
||||
void warning_end (const char * type, bool error)
|
||||
{
|
||||
if (type != NULL)
|
||||
{
|
||||
const char * prefix = error ? "error-" : "";
|
||||
fprintf (stderr, " [-W%s%s]", prefix, type);
|
||||
}
|
||||
fprintf (stderr, "\n");
|
||||
}
|
||||
|
||||
void warning (const char * type, uint32_t line, bool error, const char * fmt, ...)
|
||||
{
|
||||
warning_start (line, error);
|
||||
|
||||
va_list args;
|
||||
va_start (args, fmt);
|
||||
vfprintf (stderr, fmt, args);
|
||||
va_end (args);
|
||||
|
||||
warning_end (type, error);
|
||||
}
|
||||
|
||||
void usage ()
|
||||
{
|
||||
fprintf (stderr,
|
||||
"usage: re2c [-bcdDefFghirsuvVwx18] [-o of] [-t th] file\n"
|
||||
"\n"
|
||||
"-? -h --help Display this info.\n"
|
||||
"\n"
|
||||
"-b --bit-vectors Implies -s. Use bit vectors as well in the attempt to\n"
|
||||
" coax better code out of the compiler. Most useful for\n"
|
||||
" specifications with more than a few keywords (e.g. for\n"
|
||||
" most programming languages).\n"
|
||||
"\n"
|
||||
"-c --conditions Require start conditions.\n"
|
||||
"\n"
|
||||
"-d --debug-output Creates a parser that dumps information during\n"
|
||||
" about the current position and in which state the\n"
|
||||
" parser is.\n"
|
||||
"\n"
|
||||
"-D --emit-dot Emit a Graphviz dot view of the DFA graph\n"
|
||||
"\n"
|
||||
"-e --ecb Generate a parser that supports EBCDIC. The generated code\n"
|
||||
" can deal with any character up to 0xFF. In this mode re2c\n"
|
||||
" assumes that input character size is 1 byte. This switch is\n"
|
||||
" incompatible with -w, -u, -x and -8\n"
|
||||
"\n"
|
||||
"-f --storable-state Generate a scanner that supports storable states.\n"
|
||||
"\n"
|
||||
"-F --flex-syntax Partial support for flex syntax.\n"
|
||||
"\n"
|
||||
"-g --computed-gotos Implies -b. Generate computed goto code (only useable\n"
|
||||
" with gcc).\n"
|
||||
"\n"
|
||||
"-i --no-debug-info Do not generate '#line' info (useful for versioning).\n"
|
||||
"\n"
|
||||
"-o of --output=of Specify the output file (of) instead of stdout\n"
|
||||
"\n"
|
||||
"-r --reusable Allow reuse of scanner definitions.\n"
|
||||
"\n"
|
||||
"-s --nested-ifs Generate nested ifs for some switches. Many compilers\n"
|
||||
" need this assist to generate better code.\n"
|
||||
"\n"
|
||||
"-t th --type-header=th Generate a type header file (th) with type definitions.\n"
|
||||
"\n"
|
||||
"-u --unicode Generate a parser that supports UTF-32. The generated code\n"
|
||||
" can deal with any valid Unicode character up to 0x10FFFF.\n"
|
||||
" In this mode re2c assumes that input character size is 4 bytes.\n"
|
||||
" This switch is incompatible with -e, -w, -x and -8. It implies -s.\n"
|
||||
"\n"
|
||||
"-v --version Show version information.\n"
|
||||
"\n"
|
||||
"-V --vernum Show version as one number.\n"
|
||||
"\n"
|
||||
"-w --wide-chars Generate a parser that supports UCS-2. The generated code can\n"
|
||||
" deal with any valid Unicode character up to 0xFFFF. In this mode\n"
|
||||
" re2c assumes that input character size is 2 bytes. This switch is\n"
|
||||
" incompatible with -e, -x, -u and -8. It implies -s."
|
||||
"\n"
|
||||
"-x --utf-16 Generate a parser that supports UTF-16. The generated code can\n"
|
||||
" deal with any valid Unicode character up to 0x10FFFF. In this mode\n"
|
||||
" re2c assumes that input character size is 2 bytes. This switch is\n"
|
||||
" incompatible with -e, -w, -u and -8. It implies -s."
|
||||
"\n"
|
||||
"-8 --utf-8 Generate a parser that supports UTF-8. The generated code can\n"
|
||||
" deal with any valid Unicode character up to 0x10FFFF. In this mode\n"
|
||||
" re2c assumes that input character size is 1 byte. This switch is\n"
|
||||
" incompatible with -e, -w, -x and -u."
|
||||
"\n"
|
||||
"--no-generation-date Suppress date output in the generated file.\n"
|
||||
"\n"
|
||||
"--no-version Suppress version output in the generated file.\n"
|
||||
"\n"
|
||||
"--case-insensitive All strings are case insensitive, so all \"-expressions\n"
|
||||
" are treated in the same way '-expressions are.\n"
|
||||
"\n"
|
||||
"--case-inverted Invert the meaning of single and double quoted strings.\n"
|
||||
" With this switch single quotes are case sensitive and\n"
|
||||
" double quotes are case insensitive.\n"
|
||||
"\n"
|
||||
"--encoding-policy ep Specify what re2c should do when given bad code unit.\n"
|
||||
" ep can be one of the following: fail, substitute, ignore.\n"
|
||||
"\n"
|
||||
"--input i Specify re2c input API.\n"
|
||||
" i can be one of the following: default, custom.\n"
|
||||
"\n"
|
||||
"--skeleton Instead of embedding re2c-generated code into C/C++ source,\n"
|
||||
" generate a self-contained program for the same DFA.\n"
|
||||
" Most useful for correctness and performance testing.\n"
|
||||
"\n"
|
||||
"--empty-class policy What to do if user inputs empty character class. policy can be\n"
|
||||
" one of the following: 'match-empty' (match empty input, default),\n"
|
||||
" 'match-none' (fail to match on any input), 'error' (compilation\n"
|
||||
" error). Note that there are various ways to construct empty class,\n"
|
||||
" e.g: [], [^\\x00-\\xFF], [\\x00-\\xFF]\\[\\x00-\\xFF].\n"
|
||||
"\n"
|
||||
"--dfa-minimization <table | moore>\n"
|
||||
" Internal algorithm used by re2c to minimize DFA (defaults to\n"
|
||||
" 'moore'). Both table filling and Moore's algorithms should\n"
|
||||
" produce identical DFA (up to states relabelling). Table filling\n"
|
||||
" algorithm is much simpler and slower; it serves as a reference\n"
|
||||
" implementation.\n"
|
||||
"\n"
|
||||
"-1 --single-pass Deprecated and does nothing (single pass is by default now).\n"
|
||||
"\n"
|
||||
"-W Turn on all warnings.\n"
|
||||
"\n"
|
||||
"-Werror Turn warnings into errors. Note that this option along doesn't\n"
|
||||
" turn on any warnings, it only affects those warnings that have\n"
|
||||
" been turned on so far or will be turned on later.\n"
|
||||
"\n"
|
||||
"-W<warning> Turn on individual warning.\n"
|
||||
"\n"
|
||||
"-Wno-<warning> Turn off individual warning.\n"
|
||||
"\n"
|
||||
"-Werror-<warning> Turn on individual warning and treat it as error (this implies\n"
|
||||
" '-W<warning>').\n"
|
||||
"\n"
|
||||
"-Wno-error-<warning> Don't treat this particular warning as error. This doesn't turn\n"
|
||||
" off the warning itself.\n"
|
||||
"\n"
|
||||
"Warnings:\n"
|
||||
"\n"
|
||||
"-Wcondition-order Warn if the generated program makes implicit assumptions about\n"
|
||||
" condition numbering. One should use either '-t, --type-header'\n"
|
||||
" option or '/*!types:re2c*/' directive to generate mapping of\n"
|
||||
" condition names to numbers and use autogenerated condition names.\n"
|
||||
"\n"
|
||||
"-Wempty-character-class Warn if regular expression contains empty character class. From\n"
|
||||
" the rational point of view trying to match empty character class\n"
|
||||
" makes no sense: it should always fail. However, for backwards\n"
|
||||
" compatibility reasons re2c allows empty character class and treats\n"
|
||||
" it as empty string. Use '--empty-class' option to change default\n"
|
||||
" behaviour.\n"
|
||||
"\n"
|
||||
"-Wmatch-empty-string Warn if regular expression in a rule is nullable (matches empty\n"
|
||||
" string). If DFA runs in a loop and empty match is unintentional\n"
|
||||
" (input position in not advanced manually), lexer may get stuck\n"
|
||||
" in eternal loop.\n"
|
||||
"\n"
|
||||
"-Wswapped-range Warn if range lower bound is greater that upper bound. Default\n"
|
||||
" re2c behaviour is to silently swap range bounds.\n"
|
||||
"\n"
|
||||
"-Wundefined-control-flow\n"
|
||||
" Warn if some input strings cause undefined control flow in lexer\n"
|
||||
" (the faulty patterns are reported). This is the most dangerous\n"
|
||||
" and common mistake. It can be easily fixed by adding default rule\n"
|
||||
" '*' (this rule has the lowest priority, matches any code unit\n"
|
||||
" and consumes exactly one code unit).\n"
|
||||
"\n"
|
||||
"-Wuseless-escape Warn if a symbol is escaped when it shouldn't be. By default re2c\n"
|
||||
" silently ignores escape, but this may as well indicate a typo\n"
|
||||
" or an error in escape sequence.\n"
|
||||
"\n"
|
||||
);
|
||||
}
|
||||
|
||||
void vernum ()
|
||||
{
|
||||
std::string vernum (PACKAGE_VERSION);
|
||||
if (vernum[1] == '.')
|
||||
{
|
||||
vernum.insert(0, "0");
|
||||
}
|
||||
vernum.erase(2, 1);
|
||||
if (vernum[3] == '.')
|
||||
{
|
||||
vernum.insert(2, "0");
|
||||
}
|
||||
vernum.erase(4, 1);
|
||||
if (vernum.length() < 6 || vernum[5] < '0' || vernum[5] > '9')
|
||||
{
|
||||
vernum.insert(4, "0");
|
||||
}
|
||||
vernum.resize(6, '0');
|
||||
|
||||
printf ("%s\n", vernum.c_str ());
|
||||
}
|
||||
|
||||
void version ()
|
||||
{
|
||||
printf ("re2c %s\n", PACKAGE_VERSION);
|
||||
}
|
||||
|
||||
std::string incond (const std::string & cond)
|
||||
{
|
||||
std::string s;
|
||||
if (!cond.empty ())
|
||||
{
|
||||
s += "in condition '";
|
||||
s += cond;
|
||||
s += "' ";
|
||||
}
|
||||
return s;
|
||||
}
|
||||
|
||||
} // namespace re2c
|
24
tools/re2c/src/conf/msg.h
Normal file
24
tools/re2c/src/conf/msg.h
Normal file
|
@ -0,0 +1,24 @@
|
|||
#ifndef _RE2C_CONF_MSG_
|
||||
#define _RE2C_CONF_MSG_
|
||||
|
||||
#include <string>
|
||||
|
||||
#include "src/util/attribute.h"
|
||||
#include "src/util/c99_stdint.h"
|
||||
|
||||
namespace re2c {
|
||||
|
||||
void error (const char * fmt, ...) RE2C_GXX_ATTRIBUTE ((format (printf, 1, 2)));
|
||||
void error_encoding ();
|
||||
void error_arg (const char * option);
|
||||
void warning_start (uint32_t line, bool error);
|
||||
void warning_end (const char * type, bool error);
|
||||
void warning (const char * type, uint32_t line, bool error, const char * fmt, ...) RE2C_GXX_ATTRIBUTE ((format (printf, 4, 5)));
|
||||
void usage ();
|
||||
void vernum ();
|
||||
void version ();
|
||||
std::string incond (const std::string & cond);
|
||||
|
||||
} // namespace re2c
|
||||
|
||||
#endif // _RE2C_CONF_MSG_
|
331
tools/re2c/src/conf/opt.cc
Normal file
331
tools/re2c/src/conf/opt.cc
Normal file
|
@ -0,0 +1,331 @@
|
|||
#include "src/conf/msg.h"
|
||||
#include "src/conf/opt.h"
|
||||
|
||||
namespace re2c
|
||||
{
|
||||
|
||||
Opt opts;
|
||||
|
||||
opt_t::opt_t ()
|
||||
#define OPT1(type, name, value) : name (value)
|
||||
#define OPT(type, name, value) , name (value)
|
||||
RE2C_OPTS
|
||||
#undef OPT1
|
||||
#undef OPT
|
||||
{}
|
||||
|
||||
opt_t::opt_t (const opt_t & opt)
|
||||
#define OPT1(type, name, value) : name (opt.name)
|
||||
#define OPT(type, name, value) , name (opt.name)
|
||||
RE2C_OPTS
|
||||
#undef OPT1
|
||||
#undef OPT
|
||||
{}
|
||||
|
||||
opt_t & opt_t::operator = (const opt_t & opt)
|
||||
{
|
||||
#define OPT1 OPT
|
||||
#define OPT(type, name, value) name = opt.name;
|
||||
RE2C_OPTS
|
||||
#undef OPT1
|
||||
#undef OPT
|
||||
return *this;
|
||||
}
|
||||
|
||||
void opt_t::fix ()
|
||||
{
|
||||
// some options either make no sense or must have fixed value
|
||||
// with current target: reset them to default
|
||||
switch (target)
|
||||
{
|
||||
case DOT:
|
||||
// default code generation options
|
||||
sFlag = Opt::baseopt.sFlag;
|
||||
bFlag = Opt::baseopt.bFlag;
|
||||
gFlag = Opt::baseopt.gFlag;
|
||||
cGotoThreshold = Opt::baseopt.cGotoThreshold;
|
||||
// default environment-insensitive formatting
|
||||
yybmHexTable = Opt::baseopt.yybmHexTable;
|
||||
// fallthrough
|
||||
case SKELETON:
|
||||
// default line information
|
||||
iFlag = Opt::baseopt.iFlag;
|
||||
// default environment-sensitive formatting
|
||||
topIndent = Opt::baseopt.topIndent;
|
||||
indString = Opt::baseopt.indString;
|
||||
condDivider = Opt::baseopt.condDivider;
|
||||
condDividerParam = Opt::baseopt.condDividerParam;
|
||||
// default environment bindings
|
||||
tFlag = Opt::baseopt.tFlag;
|
||||
header_file = Opt::baseopt.header_file;
|
||||
yycondtype = Opt::baseopt.yycondtype;
|
||||
cond_get = Opt::baseopt.cond_get;
|
||||
cond_get_naked = Opt::baseopt.cond_get_naked;
|
||||
cond_set = Opt::baseopt.cond_set;
|
||||
cond_set_arg = Opt::baseopt.cond_set_arg;
|
||||
cond_set_naked = Opt::baseopt.cond_set_naked;
|
||||
yyctable = Opt::baseopt.yyctable;
|
||||
condPrefix = Opt::baseopt.condPrefix;
|
||||
condEnumPrefix = Opt::baseopt.condEnumPrefix;
|
||||
condGoto = Opt::baseopt.condGoto;
|
||||
condGotoParam = Opt::baseopt.condGotoParam;
|
||||
fFlag = Opt::baseopt.fFlag;
|
||||
state_get = Opt::baseopt.state_get;
|
||||
state_get_naked = Opt::baseopt.state_get_naked;
|
||||
state_set = Opt::baseopt.state_set;
|
||||
state_set_arg = Opt::baseopt.state_set_arg;
|
||||
state_set_naked = Opt::baseopt.state_set_naked;
|
||||
yyfilllabel = Opt::baseopt.yyfilllabel;
|
||||
yynext = Opt::baseopt.yynext;
|
||||
yyaccept = Opt::baseopt.yyaccept;
|
||||
bUseStateAbort = Opt::baseopt.bUseStateAbort;
|
||||
bUseStateNext = Opt::baseopt.bUseStateNext;
|
||||
yybm = Opt::baseopt.yybm;
|
||||
yytarget = Opt::baseopt.yytarget;
|
||||
input_api = Opt::baseopt.input_api;
|
||||
yycursor = Opt::baseopt.yycursor;
|
||||
yymarker = Opt::baseopt.yymarker;
|
||||
yyctxmarker = Opt::baseopt.yyctxmarker;
|
||||
yylimit = Opt::baseopt.yylimit;
|
||||
yypeek = Opt::baseopt.yypeek;
|
||||
yyskip = Opt::baseopt.yyskip;
|
||||
yybackup = Opt::baseopt.yybackup;
|
||||
yybackupctx = Opt::baseopt.yybackupctx;
|
||||
yyrestore = Opt::baseopt.yyrestore;
|
||||
yyrestorectx = Opt::baseopt.yyrestorectx;
|
||||
yylessthan = Opt::baseopt.yylessthan;
|
||||
dFlag = Opt::baseopt.dFlag;
|
||||
yydebug = Opt::baseopt.yydebug;
|
||||
yyctype = Opt::baseopt.yyctype;
|
||||
yych = Opt::baseopt.yych;
|
||||
bEmitYYCh = Opt::baseopt.bEmitYYCh;
|
||||
yychConversion = Opt::baseopt.yychConversion;
|
||||
fill = Opt::baseopt.fill;
|
||||
fill_use = Opt::baseopt.fill_use;
|
||||
fill_check = Opt::baseopt.fill_check;
|
||||
fill_arg = Opt::baseopt.fill_arg;
|
||||
fill_arg_use = Opt::baseopt.fill_arg_use;
|
||||
fill_naked = Opt::baseopt.fill_naked;
|
||||
labelPrefix = Opt::baseopt.labelPrefix;
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
if (bCaseInsensitive)
|
||||
{
|
||||
bCaseInverted = Opt::baseopt.bCaseInverted;
|
||||
}
|
||||
|
||||
// respect hierarchy
|
||||
if (!cFlag)
|
||||
{
|
||||
tFlag = Opt::baseopt.tFlag;
|
||||
header_file = Opt::baseopt.header_file;
|
||||
yycondtype = Opt::baseopt.yycondtype;
|
||||
cond_get = Opt::baseopt.cond_get;
|
||||
cond_get_naked = Opt::baseopt.cond_get_naked;
|
||||
cond_set = Opt::baseopt.cond_set;
|
||||
cond_set_arg = Opt::baseopt.cond_set_arg;
|
||||
cond_set_naked = Opt::baseopt.cond_set_naked;
|
||||
yyctable = Opt::baseopt.yyctable;
|
||||
condPrefix = Opt::baseopt.condPrefix;
|
||||
condEnumPrefix = Opt::baseopt.condEnumPrefix;
|
||||
condDivider = Opt::baseopt.condDivider;
|
||||
condDividerParam = Opt::baseopt.condDividerParam;
|
||||
condGoto = Opt::baseopt.condGoto;
|
||||
condGotoParam = Opt::baseopt.condGotoParam;
|
||||
}
|
||||
if (!fFlag)
|
||||
{
|
||||
state_get = Opt::baseopt.state_get;
|
||||
state_get_naked = Opt::baseopt.state_get_naked;
|
||||
state_set = Opt::baseopt.state_set;
|
||||
state_set_arg = Opt::baseopt.state_set_arg;
|
||||
state_set_naked = Opt::baseopt.state_set_naked;
|
||||
yyfilllabel = Opt::baseopt.yyfilllabel;
|
||||
yynext = Opt::baseopt.yynext;
|
||||
yyaccept = Opt::baseopt.yyaccept;
|
||||
bUseStateAbort = Opt::baseopt.bUseStateAbort;
|
||||
bUseStateNext = Opt::baseopt.bUseStateNext;
|
||||
}
|
||||
if (!bFlag)
|
||||
{
|
||||
yybmHexTable = Opt::baseopt.yybmHexTable;
|
||||
yybm = Opt::baseopt.yybm;
|
||||
}
|
||||
if (!gFlag)
|
||||
{
|
||||
cGotoThreshold = Opt::baseopt.cGotoThreshold;
|
||||
yytarget = Opt::baseopt.yytarget;
|
||||
}
|
||||
if (input_api.type () != InputAPI::DEFAULT)
|
||||
{
|
||||
yycursor = Opt::baseopt.yycursor;
|
||||
yymarker = Opt::baseopt.yymarker;
|
||||
yyctxmarker = Opt::baseopt.yyctxmarker;
|
||||
yylimit = Opt::baseopt.yylimit;
|
||||
}
|
||||
if (input_api.type () != InputAPI::CUSTOM)
|
||||
{
|
||||
yypeek = Opt::baseopt.yypeek;
|
||||
yyskip = Opt::baseopt.yyskip;
|
||||
yybackup = Opt::baseopt.yybackup;
|
||||
yybackupctx = Opt::baseopt.yybackupctx;
|
||||
yyrestore = Opt::baseopt.yyrestore;
|
||||
yyrestorectx = Opt::baseopt.yyrestorectx;
|
||||
yylessthan = Opt::baseopt.yylessthan;
|
||||
}
|
||||
if (!dFlag)
|
||||
{
|
||||
yydebug = Opt::baseopt.yydebug;
|
||||
}
|
||||
if (!fill_use)
|
||||
{
|
||||
fill = Opt::baseopt.fill;
|
||||
fill_check = Opt::baseopt.fill_check;
|
||||
fill_arg = Opt::baseopt.fill_arg;
|
||||
fill_arg_use = Opt::baseopt.fill_arg_use;
|
||||
fill_naked = Opt::baseopt.fill_naked;
|
||||
}
|
||||
|
||||
// force individual options
|
||||
switch (target)
|
||||
{
|
||||
case DOT:
|
||||
iFlag = true;
|
||||
break;
|
||||
case SKELETON:
|
||||
iFlag = true;
|
||||
input_api.set (InputAPI::CUSTOM);
|
||||
indString = " ";
|
||||
topIndent = 2;
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
switch (encoding.type ())
|
||||
{
|
||||
case Enc::UCS2:
|
||||
case Enc::UTF16:
|
||||
case Enc::UTF32:
|
||||
sFlag = true;
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
if (bFlag)
|
||||
{
|
||||
sFlag = true;
|
||||
}
|
||||
if (gFlag)
|
||||
{
|
||||
bFlag = true;
|
||||
sFlag = true;
|
||||
}
|
||||
if (header_file != NULL)
|
||||
{
|
||||
tFlag = true;
|
||||
}
|
||||
}
|
||||
|
||||
realopt_t::realopt_t (useropt_t & opt)
|
||||
: real ()
|
||||
, user (opt)
|
||||
{}
|
||||
|
||||
const opt_t * realopt_t::operator -> ()
|
||||
{
|
||||
sync ();
|
||||
return ℜ
|
||||
}
|
||||
|
||||
void realopt_t::sync ()
|
||||
{
|
||||
if (user.diverge)
|
||||
{
|
||||
real = user.opt;
|
||||
real.fix ();
|
||||
user.diverge = false;
|
||||
}
|
||||
}
|
||||
|
||||
useropt_t::useropt_t ()
|
||||
: opt ()
|
||||
, diverge (true)
|
||||
{}
|
||||
|
||||
opt_t * useropt_t::operator -> ()
|
||||
{
|
||||
diverge = true;
|
||||
return &opt;
|
||||
}
|
||||
|
||||
const opt_t Opt::baseopt;
|
||||
|
||||
bool Opt::source (const char * s)
|
||||
{
|
||||
if (source_file)
|
||||
{
|
||||
error ("multiple source files: %s, %s", source_file, s);
|
||||
return false;
|
||||
}
|
||||
else
|
||||
{
|
||||
source_file = s;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
bool Opt::output (const char * s)
|
||||
{
|
||||
if (output_file)
|
||||
{
|
||||
error ("multiple output files: %s, %s", output_file, s);
|
||||
return false;
|
||||
}
|
||||
else
|
||||
{
|
||||
output_file = s;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
void Opt::reset_encoding (const Enc & enc)
|
||||
{
|
||||
useropt->encoding = enc;
|
||||
}
|
||||
|
||||
void Opt::reset_mapCodeName ()
|
||||
{
|
||||
// historically arranged set of names
|
||||
// no actual reason why these particular options should be reset
|
||||
useropt->cond_get = Opt::baseopt.cond_get;
|
||||
useropt->cond_set = Opt::baseopt.cond_set;
|
||||
useropt->fill = Opt::baseopt.fill;
|
||||
useropt->state_get = Opt::baseopt.state_get;
|
||||
useropt->state_set = Opt::baseopt.state_set;
|
||||
useropt->yybackup = Opt::baseopt.yybackup;
|
||||
useropt->yybackupctx = Opt::baseopt.yybackupctx;
|
||||
useropt->yycondtype = Opt::baseopt.yycondtype;
|
||||
useropt->yyctxmarker = Opt::baseopt.yyctxmarker;
|
||||
useropt->yyctype = Opt::baseopt.yyctype;
|
||||
useropt->yycursor = Opt::baseopt.yycursor;
|
||||
useropt->yydebug = Opt::baseopt.yydebug;
|
||||
useropt->yylessthan = Opt::baseopt.yylessthan;
|
||||
useropt->yylimit = Opt::baseopt.yylimit;
|
||||
useropt->yymarker = Opt::baseopt.yymarker;
|
||||
useropt->yypeek = Opt::baseopt.yypeek;
|
||||
useropt->yyrestore = Opt::baseopt.yyrestore;
|
||||
useropt->yyrestorectx = Opt::baseopt.yyrestorectx;
|
||||
useropt->yyskip = Opt::baseopt.yyskip;
|
||||
useropt->yyfilllabel = Opt::baseopt.yyfilllabel;
|
||||
useropt->yynext = Opt::baseopt.yynext;
|
||||
useropt->yyaccept = Opt::baseopt.yyaccept;
|
||||
useropt->yybm = Opt::baseopt.yybm;
|
||||
useropt->yych = Opt::baseopt.yych;
|
||||
useropt->yyctable = Opt::baseopt.yyctable;
|
||||
useropt->yytarget = Opt::baseopt.yytarget;
|
||||
}
|
||||
|
||||
} // namespace re2c
|
218
tools/re2c/src/conf/opt.h
Normal file
218
tools/re2c/src/conf/opt.h
Normal file
|
@ -0,0 +1,218 @@
|
|||
#ifndef _RE2C_CONF_OPT_
|
||||
#define _RE2C_CONF_OPT_
|
||||
|
||||
#include "src/util/c99_stdint.h"
|
||||
#include <stddef.h>
|
||||
#include <string>
|
||||
|
||||
#include "src/codegen/input_api.h"
|
||||
#include "src/ir/dfa/dfa.h"
|
||||
#include "src/ir/regexp/empty_class_policy.h"
|
||||
#include "src/ir/regexp/encoding/enc.h"
|
||||
#include "src/util/forbid_copy.h"
|
||||
|
||||
namespace re2c
|
||||
{
|
||||
|
||||
#define RE2C_OPTS \
|
||||
/* target */ \
|
||||
OPT1 (opt_t::target_t, target, CODE) \
|
||||
/* fingerprint */ \
|
||||
OPT (bool, bNoGenerationDate, false) \
|
||||
OPT (bool, version, true) \
|
||||
/* regular expressions */ \
|
||||
OPT (Enc, encoding, Enc ()) \
|
||||
OPT (bool, bCaseInsensitive, false) \
|
||||
OPT (bool, bCaseInverted, false) \
|
||||
OPT (empty_class_policy_t, empty_class_policy, EMPTY_CLASS_MATCH_EMPTY) \
|
||||
/* conditions */ \
|
||||
OPT (bool, cFlag, false) \
|
||||
OPT (bool, tFlag, false) \
|
||||
OPT (const char *, header_file, NULL) \
|
||||
OPT (std::string, yycondtype, "YYCONDTYPE") \
|
||||
OPT (std::string, cond_get, "YYGETCONDITION") \
|
||||
OPT (bool, cond_get_naked, false) \
|
||||
OPT (std::string, cond_set, "YYSETCONDITION" ) \
|
||||
OPT (std::string, cond_set_arg, "@@" ) \
|
||||
OPT (bool, cond_set_naked, false ) \
|
||||
OPT (std::string, yyctable, "yyctable") \
|
||||
OPT (std::string, condPrefix, "yyc_") \
|
||||
OPT (std::string, condEnumPrefix, "yyc") \
|
||||
OPT (std::string, condDivider, "/* *********************************** */") \
|
||||
OPT (std::string, condDividerParam, "@@") \
|
||||
OPT (std::string, condGoto, "goto @@;") \
|
||||
OPT (std::string, condGotoParam, "@@") \
|
||||
/* states */ \
|
||||
OPT (bool, fFlag, false) \
|
||||
OPT (std::string, state_get, "YYGETSTATE") \
|
||||
OPT (bool, state_get_naked, false) \
|
||||
OPT (std::string, state_set, "YYSETSTATE") \
|
||||
OPT (std::string, state_set_arg, "@@") \
|
||||
OPT (bool, state_set_naked, false) \
|
||||
OPT (std::string, yyfilllabel, "yyFillLabel") \
|
||||
OPT (std::string, yynext, "yyNext") \
|
||||
OPT (std::string, yyaccept, "yyaccept") \
|
||||
OPT (bool, bUseStateAbort, false) \
|
||||
OPT (bool, bUseStateNext, false) \
|
||||
/* reuse */ \
|
||||
OPT (bool, rFlag, false) \
|
||||
/* partial flex syntax support */ \
|
||||
OPT (bool, FFlag, false) \
|
||||
/* code generation */ \
|
||||
OPT (bool, sFlag, false) \
|
||||
OPT (bool, bFlag, false) \
|
||||
OPT (std::string, yybm, "yybm") \
|
||||
OPT (bool, yybmHexTable, false) \
|
||||
OPT (bool, gFlag, false) \
|
||||
OPT (std::string, yytarget, "yytarget") \
|
||||
OPT (uint32_t, cGotoThreshold, 9) \
|
||||
/* formatting */ \
|
||||
OPT (uint32_t, topIndent, 0) \
|
||||
OPT (std::string, indString, "\t") \
|
||||
/* input API */ \
|
||||
OPT (InputAPI, input_api, InputAPI ()) \
|
||||
OPT (std::string, yycursor, "YYCURSOR") \
|
||||
OPT (std::string, yymarker, "YYMARKER") \
|
||||
OPT (std::string, yyctxmarker, "YYCTXMARKER") \
|
||||
OPT (std::string, yylimit, "YYLIMIT") \
|
||||
OPT (std::string, yypeek, "YYPEEK") \
|
||||
OPT (std::string, yyskip, "YYSKIP") \
|
||||
OPT (std::string, yybackup, "YYBACKUP") \
|
||||
OPT (std::string, yybackupctx, "YYBACKUPCTX") \
|
||||
OPT (std::string, yyrestore, "YYRESTORE") \
|
||||
OPT (std::string, yyrestorectx, "YYRESTORECTX") \
|
||||
OPT (std::string, yylessthan, "YYLESSTHAN") \
|
||||
/* #line directives */ \
|
||||
OPT (bool, iFlag, false) \
|
||||
/* debug */ \
|
||||
OPT (bool, dFlag, false) \
|
||||
OPT (std::string, yydebug, "YYDEBUG") \
|
||||
/* yych */ \
|
||||
OPT (std::string, yyctype, "YYCTYPE") \
|
||||
OPT (std::string, yych, "yych") \
|
||||
OPT (bool, bEmitYYCh, true) \
|
||||
OPT (bool, yychConversion, false) \
|
||||
/* YYFILL */ \
|
||||
OPT (std::string, fill, "YYFILL") \
|
||||
OPT (bool, fill_use, true) \
|
||||
OPT (bool, fill_check, true) \
|
||||
OPT (std::string, fill_arg, "@@") \
|
||||
OPT (bool, fill_arg_use, true) \
|
||||
OPT (bool, fill_naked, false) \
|
||||
/* labels */ \
|
||||
OPT (std::string, labelPrefix, "yy") \
|
||||
/* internals */ \
|
||||
OPT (dfa_minimization_t, dfa_minimization, DFA_MINIMIZATION_MOORE)
|
||||
|
||||
struct opt_t
|
||||
{
|
||||
enum target_t
|
||||
{
|
||||
CODE,
|
||||
DOT,
|
||||
SKELETON
|
||||
};
|
||||
|
||||
#define OPT1 OPT
|
||||
#define OPT(type, name, value) type name;
|
||||
RE2C_OPTS
|
||||
#undef OPT1
|
||||
#undef OPT
|
||||
|
||||
opt_t ();
|
||||
opt_t (const opt_t & opt);
|
||||
opt_t & operator = (const opt_t & opt);
|
||||
void fix ();
|
||||
};
|
||||
|
||||
class useropt_t;
|
||||
class realopt_t
|
||||
{
|
||||
opt_t real;
|
||||
useropt_t & user;
|
||||
public:
|
||||
realopt_t (useropt_t & opt);
|
||||
const opt_t * operator -> ();
|
||||
void sync ();
|
||||
};
|
||||
|
||||
class useropt_t
|
||||
{
|
||||
opt_t opt;
|
||||
bool diverge;
|
||||
public:
|
||||
useropt_t ();
|
||||
opt_t * operator -> ();
|
||||
friend void realopt_t::sync ();
|
||||
};
|
||||
|
||||
struct Opt
|
||||
{
|
||||
static const opt_t baseopt;
|
||||
|
||||
const char * source_file;
|
||||
const char * output_file;
|
||||
|
||||
private:
|
||||
useropt_t useropt;
|
||||
realopt_t realopt;
|
||||
|
||||
public:
|
||||
Opt ()
|
||||
: source_file (NULL)
|
||||
, output_file (NULL)
|
||||
, useropt ()
|
||||
, realopt (useropt)
|
||||
{}
|
||||
|
||||
// read-only access, forces options syncronization
|
||||
const opt_t * operator -> ()
|
||||
{
|
||||
return realopt.operator -> ();
|
||||
}
|
||||
|
||||
bool source (const char * s);
|
||||
bool output (const char * s);
|
||||
|
||||
// Inplace configurations are applied immediately when parsed.
|
||||
// This is very bad: first, re2c behaviour is changed in the middle
|
||||
// of the block; second, config is resynced too often (every
|
||||
// attempt to read config that has been updated results in
|
||||
// automatic resync). It is much better to set all options at once.
|
||||
bool set_encoding (Enc::type_t t) { return useropt->encoding.set (t); }
|
||||
void unset_encoding (Enc::type_t t) { useropt->encoding.unset (t); }
|
||||
void set_encoding_policy (Enc::policy_t p) { useropt->encoding.setPolicy (p); }
|
||||
void set_input_api (InputAPI::type_t t) { useropt->input_api.set (t); }
|
||||
#define OPT1 OPT
|
||||
#define OPT(type, name, value) void set_##name (type arg) { useropt->name = arg; }
|
||||
RE2C_OPTS
|
||||
#undef OPT1
|
||||
#undef OPT
|
||||
|
||||
// helpers
|
||||
std::string yychConversion ()
|
||||
{
|
||||
return realopt->yychConversion
|
||||
? "(" + realopt->yyctype + ")"
|
||||
: "";
|
||||
}
|
||||
|
||||
// bad temporary hacks, should be fixed by proper scoping of config (parts).
|
||||
void reset_encoding (const Enc & enc);
|
||||
void reset_mapCodeName ();
|
||||
|
||||
FORBID_COPY (Opt);
|
||||
};
|
||||
|
||||
enum parse_opts_t
|
||||
{
|
||||
OK,
|
||||
EXIT_OK,
|
||||
EXIT_FAIL
|
||||
};
|
||||
|
||||
parse_opts_t parse_opts (char ** argv, Opt & opts);
|
||||
|
||||
} // namespace re2c
|
||||
|
||||
#endif // _RE2C_CONF_OPT_
|
2846
tools/re2c/src/conf/parse_opts.cc
Normal file
2846
tools/re2c/src/conf/parse_opts.cc
Normal file
File diff suppressed because it is too large
Load diff
261
tools/re2c/src/conf/parse_opts.re
Normal file
261
tools/re2c/src/conf/parse_opts.re
Normal file
|
@ -0,0 +1,261 @@
|
|||
#include "src/codegen/input_api.h"
|
||||
#include "src/conf/msg.h"
|
||||
#include "src/conf/opt.h"
|
||||
#include "src/conf/warn.h"
|
||||
#include "src/globals.h"
|
||||
#include "src/ir/regexp/empty_class_policy.h"
|
||||
#include "src/ir/regexp/encoding/enc.h"
|
||||
|
||||
namespace re2c
|
||||
{
|
||||
|
||||
static inline bool next (char * & arg, char ** & argv)
|
||||
{
|
||||
arg = *++argv;
|
||||
return arg != NULL;
|
||||
}
|
||||
|
||||
parse_opts_t parse_opts (char ** argv, Opt & opts)
|
||||
{
|
||||
#define YYCTYPE unsigned char
|
||||
char * YYCURSOR;
|
||||
char * YYMARKER;
|
||||
Warn::option_t option;
|
||||
|
||||
/*!re2c
|
||||
re2c:yyfill:enable = 0;
|
||||
re2c:yych:conversion = 1;
|
||||
|
||||
end = "\x00";
|
||||
filename = [^\x00-] [^\x00]*;
|
||||
*/
|
||||
|
||||
opt:
|
||||
if (!next (YYCURSOR, argv))
|
||||
{
|
||||
goto end;
|
||||
}
|
||||
/*!re2c
|
||||
*
|
||||
{
|
||||
error ("bad option: %s", *argv);
|
||||
return EXIT_FAIL;
|
||||
}
|
||||
|
||||
"--" end
|
||||
{
|
||||
// all remaining arguments are non-options
|
||||
// so they must be input files
|
||||
// re2c expects exactly one input file
|
||||
for (char * f; next (f, argv);)
|
||||
{
|
||||
if (!opts.source (f))
|
||||
{
|
||||
return EXIT_FAIL;
|
||||
}
|
||||
}
|
||||
goto end;
|
||||
}
|
||||
|
||||
"-" end { if (!opts.source ("<stdin>")) return EXIT_FAIL; goto opt; }
|
||||
filename end { if (!opts.source (*argv)) return EXIT_FAIL; goto opt; }
|
||||
|
||||
"-" { goto opt_short; }
|
||||
"--" { goto opt_long; }
|
||||
|
||||
"-W" end { warn.set_all (); goto opt; }
|
||||
"-Werror" end { warn.set_all_error (); goto opt; }
|
||||
"-W" { option = Warn::W; goto opt_warn; }
|
||||
"-Wno-" { option = Warn::WNO; goto opt_warn; }
|
||||
"-Werror-" { option = Warn::WERROR; goto opt_warn; }
|
||||
"-Wno-error-" { option = Warn::WNOERROR; goto opt_warn; }
|
||||
*/
|
||||
|
||||
opt_warn:
|
||||
/*!re2c
|
||||
*
|
||||
{
|
||||
error ("bad warning: %s", *argv);
|
||||
return EXIT_FAIL;
|
||||
}
|
||||
"condition-order" end { warn.set (Warn::CONDITION_ORDER, option); goto opt; }
|
||||
"empty-character-class" end { warn.set (Warn::EMPTY_CHARACTER_CLASS, option); goto opt; }
|
||||
"match-empty-string" end { warn.set (Warn::MATCH_EMPTY_STRING, option); goto opt; }
|
||||
"swapped-range" end { warn.set (Warn::SWAPPED_RANGE, option); goto opt; }
|
||||
"undefined-control-flow" end { warn.set (Warn::UNDEFINED_CONTROL_FLOW, option); goto opt; }
|
||||
"unreachable-rules" end { warn.set (Warn::UNREACHABLE_RULES, option); goto opt; }
|
||||
"useless-escape" end { warn.set (Warn::USELESS_ESCAPE, option); goto opt; }
|
||||
*/
|
||||
|
||||
opt_short:
|
||||
/*!re2c
|
||||
*
|
||||
{
|
||||
error ("bad short option: %s", *argv);
|
||||
return EXIT_FAIL;
|
||||
}
|
||||
end { goto opt; }
|
||||
[?h] { usage (); return EXIT_OK; }
|
||||
"v" { version (); return EXIT_OK; }
|
||||
"V" { vernum (); return EXIT_OK; }
|
||||
"b" { opts.set_bFlag (true); goto opt_short; }
|
||||
"c" { opts.set_cFlag (true); goto opt_short; }
|
||||
"d" { opts.set_dFlag (true); goto opt_short; }
|
||||
"D" { opts.set_target (opt_t::DOT); goto opt_short; }
|
||||
"f" { opts.set_fFlag (true); goto opt_short; }
|
||||
"F" { opts.set_FFlag (true); goto opt_short; }
|
||||
"g" { opts.set_gFlag (true); goto opt_short; }
|
||||
"i" { opts.set_iFlag (true); goto opt_short; }
|
||||
"r" { opts.set_rFlag (true); goto opt_short; }
|
||||
"s" { opts.set_sFlag (true); goto opt_short; }
|
||||
"S" { opts.set_target (opt_t::SKELETON); goto opt_short; }
|
||||
"e" { if (!opts.set_encoding (Enc::EBCDIC)) { error_encoding (); return EXIT_FAIL; } goto opt_short; }
|
||||
"u" { if (!opts.set_encoding (Enc::UTF32)) { error_encoding (); return EXIT_FAIL; } goto opt_short; }
|
||||
"w" { if (!opts.set_encoding (Enc::UCS2)) { error_encoding (); return EXIT_FAIL; } goto opt_short; }
|
||||
"x" { if (!opts.set_encoding (Enc::UTF16)) { error_encoding (); return EXIT_FAIL; } goto opt_short; }
|
||||
"8" { if (!opts.set_encoding (Enc::UTF8)) { error_encoding (); return EXIT_FAIL; } goto opt_short; }
|
||||
"o" end { if (!next (YYCURSOR, argv)) { error_arg ("-o, --output"); return EXIT_FAIL; } goto opt_output; }
|
||||
"o" { *argv = YYCURSOR; goto opt_output; }
|
||||
"t" end { if (!next (YYCURSOR, argv)) { error_arg ("-t, --type-header"); return EXIT_FAIL; } goto opt_header; }
|
||||
"t" { *argv = YYCURSOR; goto opt_header; }
|
||||
"1" { goto opt_short; } // deprecated
|
||||
*/
|
||||
|
||||
opt_long:
|
||||
/*!re2c
|
||||
*
|
||||
{
|
||||
error ("bad long option: %s", *argv);
|
||||
return EXIT_FAIL;
|
||||
}
|
||||
"help" end { usage (); return EXIT_OK; }
|
||||
"version" end { version (); return EXIT_OK; }
|
||||
"vernum" end { vernum (); return EXIT_OK; }
|
||||
"bit-vectors" end { opts.set_bFlag (true); goto opt; }
|
||||
"start-conditions" end { opts.set_cFlag (true); goto opt; }
|
||||
"debug-output" end { opts.set_dFlag (true); goto opt; }
|
||||
"emit-dot" end { opts.set_target (opt_t::DOT); goto opt; }
|
||||
"storable-state" end { opts.set_fFlag (true); goto opt; }
|
||||
"flex-syntax" end { opts.set_FFlag (true); goto opt; }
|
||||
"computed-gotos" end { opts.set_gFlag (true); goto opt; }
|
||||
"no-debug-info" end { opts.set_iFlag (true); goto opt; }
|
||||
"reusable" end { opts.set_rFlag (true); goto opt; }
|
||||
"nested-ifs" end { opts.set_sFlag (true); goto opt; }
|
||||
"no-generation-date" end { opts.set_bNoGenerationDate (true); goto opt; }
|
||||
"no-version" end { opts.set_version (false); goto opt; }
|
||||
"case-insensitive" end { opts.set_bCaseInsensitive (true); goto opt; }
|
||||
"case-inverted" end { opts.set_bCaseInverted (true); goto opt; }
|
||||
"skeleton" end { opts.set_target (opt_t::SKELETON); goto opt; }
|
||||
"ecb" end { if (!opts.set_encoding (Enc::EBCDIC)) { error_encoding (); return EXIT_FAIL; } goto opt; }
|
||||
"unicode" end { if (!opts.set_encoding (Enc::UTF32)) { error_encoding (); return EXIT_FAIL; } goto opt; }
|
||||
"wide-chars" end { if (!opts.set_encoding (Enc::UCS2)) { error_encoding (); return EXIT_FAIL; } goto opt; }
|
||||
"utf-16" end { if (!opts.set_encoding (Enc::UTF16)) { error_encoding (); return EXIT_FAIL; } goto opt; }
|
||||
"utf-8" end { if (!opts.set_encoding (Enc::UTF8)) { error_encoding (); return EXIT_FAIL; } goto opt; }
|
||||
"output" end { if (!next (YYCURSOR, argv)) { error_arg ("-o, --output"); return EXIT_FAIL; } goto opt_output; }
|
||||
"type-header" end { if (!next (YYCURSOR, argv)) { error_arg ("-t, --type-header"); return EXIT_FAIL; } goto opt_header; }
|
||||
"encoding-policy" end { goto opt_encoding_policy; }
|
||||
"input" end { goto opt_input; }
|
||||
"empty-class" end { goto opt_empty_class; }
|
||||
"dfa-minimization" end { goto opt_dfa_minimization; }
|
||||
"single-pass" end { goto opt; } // deprecated
|
||||
*/
|
||||
|
||||
opt_output:
|
||||
/*!re2c
|
||||
*
|
||||
{
|
||||
error ("bad argument to option -o, --output: %s", *argv);
|
||||
return EXIT_FAIL;
|
||||
}
|
||||
filename end { if (!opts.output (*argv)) return EXIT_FAIL; goto opt; }
|
||||
*/
|
||||
|
||||
opt_header:
|
||||
/*!re2c
|
||||
*
|
||||
{
|
||||
error ("bad argument to option -t, --type-header: %s", *argv);
|
||||
return EXIT_FAIL;
|
||||
}
|
||||
filename end { opts.set_header_file (*argv); goto opt; }
|
||||
*/
|
||||
|
||||
opt_encoding_policy:
|
||||
if (!next (YYCURSOR, argv))
|
||||
{
|
||||
error_arg ("--encoding-policy");
|
||||
return EXIT_FAIL;
|
||||
}
|
||||
/*!re2c
|
||||
*
|
||||
{
|
||||
error ("bad argument to option --encoding-policy (expected: ignore | substitute | fail): %s", *argv);
|
||||
return EXIT_FAIL;
|
||||
}
|
||||
"ignore" end { opts.set_encoding_policy (Enc::POLICY_IGNORE); goto opt; }
|
||||
"substitute" end { opts.set_encoding_policy (Enc::POLICY_SUBSTITUTE); goto opt; }
|
||||
"fail" end { opts.set_encoding_policy (Enc::POLICY_FAIL); goto opt; }
|
||||
*/
|
||||
|
||||
opt_input:
|
||||
if (!next (YYCURSOR, argv))
|
||||
{
|
||||
error_arg ("--input");
|
||||
return EXIT_FAIL;
|
||||
}
|
||||
/*!re2c
|
||||
*
|
||||
{
|
||||
error ("bad argument to option --input (expected: default | custom): %s", *argv);
|
||||
return EXIT_FAIL;
|
||||
}
|
||||
"default" end { opts.set_input_api (InputAPI::DEFAULT); goto opt; }
|
||||
"custom" end { opts.set_input_api (InputAPI::CUSTOM); goto opt; }
|
||||
*/
|
||||
|
||||
opt_empty_class:
|
||||
if (!next (YYCURSOR, argv))
|
||||
{
|
||||
error_arg ("--empty-class");
|
||||
return EXIT_FAIL;
|
||||
}
|
||||
/*!re2c
|
||||
*
|
||||
{
|
||||
error ("bad argument to option --empty-class (expected: match-empty | match-none | error): %s", *argv);
|
||||
return EXIT_FAIL;
|
||||
}
|
||||
"match-empty" end { opts.set_empty_class_policy (EMPTY_CLASS_MATCH_EMPTY); goto opt; }
|
||||
"match-none" end { opts.set_empty_class_policy (EMPTY_CLASS_MATCH_NONE); goto opt; }
|
||||
"error" end { opts.set_empty_class_policy (EMPTY_CLASS_ERROR); goto opt; }
|
||||
*/
|
||||
|
||||
opt_dfa_minimization:
|
||||
if (!next (YYCURSOR, argv))
|
||||
{
|
||||
error_arg ("--minimization");
|
||||
return EXIT_FAIL;
|
||||
}
|
||||
/*!re2c
|
||||
*
|
||||
{
|
||||
error ("bad argument to option --dfa-minimization (expected: table | moore): %s", *argv);
|
||||
return EXIT_FAIL;
|
||||
}
|
||||
"table" end { opts.set_dfa_minimization (DFA_MINIMIZATION_TABLE); goto opt; }
|
||||
"moore" end { opts.set_dfa_minimization (DFA_MINIMIZATION_MOORE); goto opt; }
|
||||
*/
|
||||
|
||||
end:
|
||||
if (!opts.source_file)
|
||||
{
|
||||
error ("no source file");
|
||||
return EXIT_FAIL;
|
||||
}
|
||||
|
||||
return OK;
|
||||
|
||||
#undef YYCTYPE
|
||||
}
|
||||
|
||||
} // namespace re2c
|
200
tools/re2c/src/conf/warn.cc
Normal file
200
tools/re2c/src/conf/warn.cc
Normal file
|
@ -0,0 +1,200 @@
|
|||
#include <stddef.h>
|
||||
#include <stdio.h>
|
||||
#include <algorithm>
|
||||
#include <utility>
|
||||
|
||||
#include "src/conf/msg.h"
|
||||
#include "src/conf/warn.h"
|
||||
|
||||
namespace re2c {
|
||||
|
||||
Warn warn;
|
||||
|
||||
const uint32_t Warn::SILENT = 0;
|
||||
const uint32_t Warn::WARNING = 1u << 0;
|
||||
const uint32_t Warn::ERROR = 1u << 1;
|
||||
|
||||
const char * Warn::names [TYPES] =
|
||||
{
|
||||
#define W(x, y) y
|
||||
RE2C_WARNING_TYPES
|
||||
#undef W
|
||||
};
|
||||
|
||||
Warn::Warn ()
|
||||
: mask ()
|
||||
, error_accuml (false)
|
||||
{
|
||||
for (uint32_t i = 0; i < TYPES; ++i)
|
||||
{
|
||||
mask[i] = SILENT;
|
||||
}
|
||||
}
|
||||
|
||||
bool Warn::error () const
|
||||
{
|
||||
return error_accuml;
|
||||
}
|
||||
|
||||
void Warn::set (type_t t, option_t o)
|
||||
{
|
||||
switch (o)
|
||||
{
|
||||
case W:
|
||||
mask[t] |= WARNING;
|
||||
break;
|
||||
case WNO:
|
||||
mask[t] &= ~WARNING;
|
||||
break;
|
||||
case WERROR:
|
||||
// unlike -Werror, -Werror-<warning> implies -W<warning>
|
||||
mask[t] |= (WARNING | ERROR);
|
||||
break;
|
||||
case WNOERROR:
|
||||
mask[t] &= ~ERROR;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
void Warn::set_all ()
|
||||
{
|
||||
for (uint32_t i = 0; i < TYPES; ++i)
|
||||
{
|
||||
mask[i] |= WARNING;
|
||||
}
|
||||
}
|
||||
|
||||
// -Werror doesn't set any warnings: it only guarantees that if a warning
|
||||
// has been set by now or will be set later then it will result into error.
|
||||
void Warn::set_all_error ()
|
||||
{
|
||||
for (uint32_t i = 0; i < TYPES; ++i)
|
||||
{
|
||||
mask[i] |= ERROR;
|
||||
}
|
||||
}
|
||||
|
||||
void Warn::fail (type_t t, uint32_t line, const char * s)
|
||||
{
|
||||
if (mask[t] & WARNING)
|
||||
{
|
||||
// -Werror has no effect
|
||||
warning (names[t], line, false, "%s", s);
|
||||
}
|
||||
}
|
||||
|
||||
void Warn::condition_order (uint32_t line)
|
||||
{
|
||||
if (mask[CONDITION_ORDER] & WARNING)
|
||||
{
|
||||
const bool e = mask[CONDITION_ORDER] & ERROR;
|
||||
error_accuml |= e;
|
||||
warning (names[CONDITION_ORDER], line, e,
|
||||
"looks like you use hardcoded numbers instead of autogenerated condition names: "
|
||||
"better add '/*!types:re2c*/' directive or '-t, --type-header' option "
|
||||
"and don't rely on fixed condition order.");
|
||||
}
|
||||
}
|
||||
|
||||
void Warn::empty_class (uint32_t line)
|
||||
{
|
||||
if (mask[EMPTY_CHARACTER_CLASS] & WARNING)
|
||||
{
|
||||
const bool e = mask[EMPTY_CHARACTER_CLASS] & ERROR;
|
||||
error_accuml |= e;
|
||||
warning (names[EMPTY_CHARACTER_CLASS], line, e, "empty character class");
|
||||
}
|
||||
}
|
||||
|
||||
void Warn::match_empty_string (uint32_t line)
|
||||
{
|
||||
if (mask[MATCH_EMPTY_STRING] & WARNING)
|
||||
{
|
||||
const bool e = mask[MATCH_EMPTY_STRING] & ERROR;
|
||||
error_accuml |= e;
|
||||
warning (names[MATCH_EMPTY_STRING], line, e, "rule matches empty string");
|
||||
}
|
||||
}
|
||||
|
||||
void Warn::swapped_range (uint32_t line, uint32_t l, uint32_t u)
|
||||
{
|
||||
if (mask[SWAPPED_RANGE] & WARNING)
|
||||
{
|
||||
const bool e = mask[SWAPPED_RANGE] & ERROR;
|
||||
error_accuml |= e;
|
||||
warning (names[SWAPPED_RANGE], line, e, "range lower bound (0x%X) is greater than upper bound (0x%X), swapping", l, u);
|
||||
}
|
||||
}
|
||||
|
||||
void Warn::undefined_control_flow (uint32_t line, const std::string & cond, std::vector<way_t> & ways, bool overflow)
|
||||
{
|
||||
if (mask[UNDEFINED_CONTROL_FLOW] & WARNING)
|
||||
{
|
||||
const bool e = mask[UNDEFINED_CONTROL_FLOW] & ERROR;
|
||||
error_accuml |= e;
|
||||
|
||||
// report shorter patterns first
|
||||
std::sort (ways.begin (), ways.end (), cmp_ways);
|
||||
|
||||
warning_start (line, e);
|
||||
fprintf (stderr, "control flow %sis undefined for strings that match ", incond (cond).c_str ());
|
||||
const size_t count = ways.size ();
|
||||
if (count == 1)
|
||||
{
|
||||
fprint_way (stderr, ways[0]);
|
||||
}
|
||||
else
|
||||
{
|
||||
for (size_t i = 0; i < count; ++i)
|
||||
{
|
||||
fprintf (stderr, "\n\t");
|
||||
fprint_way (stderr, ways[i]);
|
||||
}
|
||||
fprintf (stderr, "\n");
|
||||
}
|
||||
if (overflow)
|
||||
{
|
||||
fprintf (stderr, " ... and a few more");
|
||||
}
|
||||
fprintf (stderr, ", use default rule '*'");
|
||||
warning_end (names[UNDEFINED_CONTROL_FLOW], e);
|
||||
}
|
||||
}
|
||||
|
||||
void Warn::unreachable_rule (const std::string & cond, const rule_info_t & rule, const rules_t & rules)
|
||||
{
|
||||
if (mask[UNREACHABLE_RULES] & WARNING)
|
||||
{
|
||||
const bool e = mask[UNREACHABLE_RULES] & ERROR;
|
||||
error_accuml |= e;
|
||||
warning_start (rule.line, e);
|
||||
fprintf (stderr, "unreachable rule %s", incond (cond).c_str ());
|
||||
const size_t shadows = rule.shadow.size ();
|
||||
if (shadows > 0)
|
||||
{
|
||||
const char * pl = shadows > 1
|
||||
? "s"
|
||||
: "";
|
||||
std::set<rule_rank_t>::const_iterator i = rule.shadow.begin ();
|
||||
fprintf (stderr, "(shadowed by rule%s at line%s %u", pl, pl, rules.find (*i)->second.line);
|
||||
for (++i; i != rule.shadow.end (); ++i)
|
||||
{
|
||||
fprintf (stderr, ", %u", rules.find (*i)->second.line);
|
||||
}
|
||||
fprintf (stderr, ")");
|
||||
}
|
||||
warning_end (names[UNREACHABLE_RULES], e);
|
||||
}
|
||||
}
|
||||
|
||||
void Warn::useless_escape (uint32_t line, uint32_t col, char c)
|
||||
{
|
||||
if (mask[USELESS_ESCAPE] & WARNING)
|
||||
{
|
||||
const bool e = mask[USELESS_ESCAPE] & ERROR;
|
||||
error_accuml |= e;
|
||||
warning (names[USELESS_ESCAPE], line, e, "column %u: escape has no effect: '\\%c'", col, c);
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace re2c
|
67
tools/re2c/src/conf/warn.h
Normal file
67
tools/re2c/src/conf/warn.h
Normal file
|
@ -0,0 +1,67 @@
|
|||
#ifndef _RE2C_CONF_WARN_
|
||||
#define _RE2C_CONF_WARN_
|
||||
|
||||
#include "src/util/c99_stdint.h"
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include "src/ir/skeleton/way.h"
|
||||
#include "src/parse/rules.h"
|
||||
|
||||
namespace re2c {
|
||||
|
||||
#define RE2C_WARNING_TYPES \
|
||||
W (CONDITION_ORDER, "condition-order"), \
|
||||
W (EMPTY_CHARACTER_CLASS, "empty-character-class"), \
|
||||
W (MATCH_EMPTY_STRING, "match-empty-string"), \
|
||||
W (SWAPPED_RANGE, "swapped-range"), \
|
||||
W (UNDEFINED_CONTROL_FLOW, "undefined-control-flow"), \
|
||||
W (UNREACHABLE_RULES, "unreachable-rules"), \
|
||||
W (USELESS_ESCAPE, "useless-escape"),
|
||||
|
||||
class Warn
|
||||
{
|
||||
public:
|
||||
enum type_t
|
||||
{
|
||||
#define W(x, y) x
|
||||
RE2C_WARNING_TYPES
|
||||
#undef W
|
||||
TYPES // count
|
||||
};
|
||||
enum option_t
|
||||
{
|
||||
W,
|
||||
WNO,
|
||||
WERROR,
|
||||
WNOERROR
|
||||
};
|
||||
|
||||
private:
|
||||
static const uint32_t SILENT;
|
||||
static const uint32_t WARNING;
|
||||
static const uint32_t ERROR;
|
||||
static const char * names [TYPES];
|
||||
uint32_t mask[TYPES];
|
||||
bool error_accuml;
|
||||
|
||||
public:
|
||||
Warn ();
|
||||
bool error () const;
|
||||
void set (type_t t, option_t o);
|
||||
void set_all ();
|
||||
void set_all_error ();
|
||||
void fail (type_t t, uint32_t line, const char * s);
|
||||
|
||||
void condition_order (uint32_t line);
|
||||
void empty_class (uint32_t line);
|
||||
void match_empty_string (uint32_t line);
|
||||
void swapped_range (uint32_t line, uint32_t l, uint32_t u);
|
||||
void undefined_control_flow (uint32_t line, const std::string & cond, std::vector<way_t> & ways, bool overflow);
|
||||
void unreachable_rule (const std::string & cond, const rule_info_t & rule, const rules_t & rules);
|
||||
void useless_escape (uint32_t line, uint32_t col, char c);
|
||||
};
|
||||
|
||||
} // namespace re2c
|
||||
|
||||
#endif // _RE2C_CONF_WARN_
|
24
tools/re2c/src/globals.h
Normal file
24
tools/re2c/src/globals.h
Normal file
|
@ -0,0 +1,24 @@
|
|||
#ifndef _RE2C_GLOBALS_
|
||||
#define _RE2C_GLOBALS_
|
||||
|
||||
#include <string>
|
||||
|
||||
#include "src/conf/opt.h"
|
||||
#include "src/conf/warn.h"
|
||||
#include "src/util/c99_stdint.h"
|
||||
|
||||
namespace re2c
|
||||
{
|
||||
|
||||
extern bool bUsedYYBitmap;
|
||||
extern bool bWroteGetState;
|
||||
extern bool bWroteCondCheck;
|
||||
extern uint32_t last_fill_index;
|
||||
extern std::string yySetupRule;
|
||||
|
||||
extern Opt opts;
|
||||
extern Warn warn;
|
||||
|
||||
} // end namespace re2c
|
||||
|
||||
#endif // _RE2C_GLOBALS_
|
109
tools/re2c/src/ir/adfa/action.h
Normal file
109
tools/re2c/src/ir/adfa/action.h
Normal file
|
@ -0,0 +1,109 @@
|
|||
#ifndef _RE2C_IR_ADFA_ACTION_
|
||||
#define _RE2C_IR_ADFA_ACTION_
|
||||
|
||||
#include <vector>
|
||||
|
||||
#include "src/codegen/label.h"
|
||||
#include "src/util/c99_stdint.h"
|
||||
#include "src/util/uniq_vector.h"
|
||||
|
||||
namespace re2c
|
||||
{
|
||||
|
||||
struct OutputFile;
|
||||
class RuleOp;
|
||||
struct State;
|
||||
|
||||
struct Initial
|
||||
{
|
||||
label_t label;
|
||||
bool setMarker;
|
||||
|
||||
inline Initial (label_t l, bool b)
|
||||
: label (l)
|
||||
, setMarker (b)
|
||||
{}
|
||||
};
|
||||
|
||||
typedef uniq_vector_t<const State *> accept_t;
|
||||
|
||||
class Action
|
||||
{
|
||||
public:
|
||||
enum type_t
|
||||
{
|
||||
MATCH,
|
||||
INITIAL,
|
||||
SAVE,
|
||||
MOVE,
|
||||
ACCEPT,
|
||||
RULE
|
||||
} type;
|
||||
union
|
||||
{
|
||||
Initial * initial;
|
||||
uint32_t save;
|
||||
const accept_t * accepts;
|
||||
const RuleOp * rule;
|
||||
} info;
|
||||
|
||||
public:
|
||||
inline Action ()
|
||||
: type (MATCH)
|
||||
, info ()
|
||||
{}
|
||||
~Action ()
|
||||
{
|
||||
clear ();
|
||||
}
|
||||
void set_initial (label_t label, bool used_marker)
|
||||
{
|
||||
clear ();
|
||||
type = INITIAL;
|
||||
info.initial = new Initial (label, used_marker);
|
||||
}
|
||||
void set_save (uint32_t save)
|
||||
{
|
||||
clear ();
|
||||
type = SAVE;
|
||||
info.save = save;
|
||||
}
|
||||
void set_move ()
|
||||
{
|
||||
clear ();
|
||||
type = MOVE;
|
||||
}
|
||||
void set_accept (const accept_t * accepts)
|
||||
{
|
||||
clear ();
|
||||
type = ACCEPT;
|
||||
info.accepts = accepts;
|
||||
}
|
||||
void set_rule (const RuleOp * const rule)
|
||||
{
|
||||
clear ();
|
||||
type = RULE;
|
||||
info.rule = rule;
|
||||
}
|
||||
|
||||
private:
|
||||
void clear ()
|
||||
{
|
||||
switch (type)
|
||||
{
|
||||
case INITIAL:
|
||||
delete info.initial;
|
||||
break;
|
||||
case MATCH:
|
||||
case SAVE:
|
||||
case MOVE:
|
||||
case ACCEPT:
|
||||
case RULE:
|
||||
break;
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace re2c
|
||||
|
||||
#endif // _RE2C_IR_ADFA_ACTION_
|
135
tools/re2c/src/ir/adfa/adfa.cc
Normal file
135
tools/re2c/src/ir/adfa/adfa.cc
Normal file
|
@ -0,0 +1,135 @@
|
|||
#include <assert.h>
|
||||
#include <queue>
|
||||
#include <set>
|
||||
#include <vector>
|
||||
#include <utility>
|
||||
|
||||
#include "src/codegen/go.h"
|
||||
#include "src/ir/adfa/adfa.h"
|
||||
#include "src/ir/dfa/dfa.h"
|
||||
#include "src/ir/skeleton/skeleton.h"
|
||||
#include "src/util/allocate.h"
|
||||
|
||||
namespace re2c
|
||||
{
|
||||
|
||||
DFA::DFA
|
||||
( const dfa_t &dfa
|
||||
, const std::vector<size_t> &fill
|
||||
, Skeleton *skel
|
||||
, const charset_t &charset
|
||||
, const std::string &n
|
||||
, const std::string &c
|
||||
, uint32_t l
|
||||
)
|
||||
: accepts ()
|
||||
, skeleton (skel)
|
||||
, name (n)
|
||||
, cond (c)
|
||||
, line (l)
|
||||
, lbChar(0)
|
||||
, ubChar(charset.back())
|
||||
, nStates(0)
|
||||
, head(NULL)
|
||||
|
||||
// statistics
|
||||
, max_fill (0)
|
||||
, need_backup (false)
|
||||
, need_backupctx (false)
|
||||
, need_accept (false)
|
||||
{
|
||||
const size_t nstates = dfa.states.size();
|
||||
const size_t nchars = dfa.nchars;
|
||||
|
||||
State **i2s = new State*[nstates];
|
||||
for (size_t i = 0; i < nstates; ++i)
|
||||
{
|
||||
i2s[i] = new State;
|
||||
}
|
||||
|
||||
State **p = &head;
|
||||
for (size_t i = 0; i < nstates; ++i)
|
||||
{
|
||||
dfa_state_t *t = dfa.states[i];
|
||||
State *s = i2s[i];
|
||||
|
||||
++nStates;
|
||||
*p = s;
|
||||
p = &s->next;
|
||||
|
||||
s->isPreCtxt = t->ctx;
|
||||
s->rule = t->rule;
|
||||
s->fill = fill[i];
|
||||
s->go.span = allocate<Span>(nchars);
|
||||
uint32_t j = 0;
|
||||
for (uint32_t c = 0; c < nchars; ++j)
|
||||
{
|
||||
const size_t to = t->arcs[c];
|
||||
for (;++c < nchars && t->arcs[c] == to;);
|
||||
s->go.span[j].to = to == dfa_t::NIL ? NULL : i2s[to];
|
||||
s->go.span[j].ub = charset[c];
|
||||
}
|
||||
s->go.nSpans = j;
|
||||
}
|
||||
*p = NULL;
|
||||
|
||||
delete[] i2s;
|
||||
}
|
||||
|
||||
DFA::~DFA()
|
||||
{
|
||||
State *s;
|
||||
|
||||
while ((s = head))
|
||||
{
|
||||
head = s->next;
|
||||
delete s;
|
||||
}
|
||||
|
||||
delete skeleton;
|
||||
}
|
||||
|
||||
void DFA::reorder()
|
||||
{
|
||||
std::vector<State*> ord;
|
||||
ord.reserve(nStates);
|
||||
|
||||
std::queue<State*> todo;
|
||||
todo.push(head);
|
||||
|
||||
std::set<State*> done;
|
||||
done.insert(head);
|
||||
|
||||
for(;!todo.empty();)
|
||||
{
|
||||
State *s = todo.front();
|
||||
todo.pop();
|
||||
ord.push_back(s);
|
||||
for(uint32_t i = 0; i < s->go.nSpans; ++i)
|
||||
{
|
||||
State *q = s->go.span[i].to;
|
||||
if(q && done.insert(q).second)
|
||||
{
|
||||
todo.push(q);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
assert(nStates == ord.size());
|
||||
|
||||
ord.push_back(NULL);
|
||||
for(uint32_t i = 0; i < nStates; ++i)
|
||||
{
|
||||
ord[i]->next = ord[i + 1];
|
||||
}
|
||||
}
|
||||
|
||||
void DFA::addState(State *s, State *next)
|
||||
{
|
||||
++nStates;
|
||||
s->next = next->next;
|
||||
next->next = s;
|
||||
}
|
||||
|
||||
} // namespace re2c
|
||||
|
101
tools/re2c/src/ir/adfa/adfa.h
Normal file
101
tools/re2c/src/ir/adfa/adfa.h
Normal file
|
@ -0,0 +1,101 @@
|
|||
#ifndef _RE2C_IR_ADFA_ADFA_
|
||||
#define _RE2C_IR_ADFA_ADFA_
|
||||
|
||||
#include <stddef.h>
|
||||
#include "src/util/c99_stdint.h"
|
||||
#include <set>
|
||||
#include <string>
|
||||
|
||||
#include "src/codegen/go.h"
|
||||
#include "src/codegen/label.h"
|
||||
#include "src/ir/adfa/action.h"
|
||||
#include "src/ir/regexp/regexp.h"
|
||||
#include "src/util/forbid_copy.h"
|
||||
|
||||
namespace re2c
|
||||
{
|
||||
|
||||
struct Skeleton;
|
||||
struct Output;
|
||||
struct OutputFile;
|
||||
struct dfa_t;
|
||||
|
||||
struct State
|
||||
{
|
||||
label_t label;
|
||||
RuleOp * rule;
|
||||
State * next;
|
||||
size_t fill;
|
||||
|
||||
bool isPreCtxt;
|
||||
bool isBase;
|
||||
Go go;
|
||||
Action action;
|
||||
|
||||
State ()
|
||||
: label (label_t::first ())
|
||||
, rule (NULL)
|
||||
, next (0)
|
||||
, fill (0)
|
||||
, isPreCtxt (false)
|
||||
, isBase (false)
|
||||
, go ()
|
||||
, action ()
|
||||
{}
|
||||
~State ()
|
||||
{
|
||||
operator delete (go.span);
|
||||
}
|
||||
|
||||
FORBID_COPY (State);
|
||||
};
|
||||
|
||||
class DFA
|
||||
{
|
||||
accept_t accepts;
|
||||
Skeleton * skeleton;
|
||||
|
||||
public:
|
||||
const std::string name;
|
||||
const std::string cond;
|
||||
const uint32_t line;
|
||||
|
||||
uint32_t lbChar;
|
||||
uint32_t ubChar;
|
||||
uint32_t nStates;
|
||||
State * head;
|
||||
|
||||
// statistics
|
||||
size_t max_fill;
|
||||
bool need_backup;
|
||||
bool need_backupctx;
|
||||
bool need_accept;
|
||||
|
||||
public:
|
||||
DFA ( const dfa_t &dfa
|
||||
, const std::vector<size_t> &fill
|
||||
, Skeleton *skel
|
||||
, const charset_t &charset
|
||||
, const std::string &n
|
||||
, const std::string &c
|
||||
, uint32_t l
|
||||
);
|
||||
~DFA ();
|
||||
void reorder();
|
||||
void prepare();
|
||||
void calc_stats();
|
||||
void emit (Output &, uint32_t &, bool, bool &);
|
||||
|
||||
private:
|
||||
void addState(State*, State *);
|
||||
void split (State *);
|
||||
void findBaseState ();
|
||||
void count_used_labels (std::set<label_t> & used, label_t prolog, label_t start, bool force_start) const;
|
||||
void emit_body (OutputFile &, uint32_t &, const std::set<label_t> & used_labels, label_t initial) const;
|
||||
|
||||
FORBID_COPY (DFA);
|
||||
};
|
||||
|
||||
} // namespace re2c
|
||||
|
||||
#endif // _RE2C_IR_ADFA_ADFA_
|
268
tools/re2c/src/ir/adfa/prepare.cc
Normal file
268
tools/re2c/src/ir/adfa/prepare.cc
Normal file
|
@ -0,0 +1,268 @@
|
|||
#include "src/util/c99_stdint.h"
|
||||
#include <string.h>
|
||||
#include <map>
|
||||
|
||||
#include "src/codegen/bitmap.h"
|
||||
#include "src/codegen/go.h"
|
||||
#include "src/globals.h"
|
||||
#include "src/ir/adfa/action.h"
|
||||
#include "src/ir/adfa/adfa.h"
|
||||
#include "src/ir/regexp/regexp_rule.h"
|
||||
#include "src/ir/rule_rank.h"
|
||||
#include "src/util/allocate.h"
|
||||
|
||||
namespace re2c {
|
||||
|
||||
void DFA::split(State *s)
|
||||
{
|
||||
State *move = new State;
|
||||
addState(move, s);
|
||||
move->action.set_move ();
|
||||
move->rule = s->rule;
|
||||
move->fill = s->fill;
|
||||
move->go = s->go;
|
||||
s->rule = NULL;
|
||||
s->go.nSpans = 1;
|
||||
s->go.span = allocate<Span> (1);
|
||||
s->go.span[0].ub = ubChar;
|
||||
s->go.span[0].to = move;
|
||||
}
|
||||
|
||||
static uint32_t merge(Span *x0, State *fg, State *bg)
|
||||
{
|
||||
Span *x = x0, *f = fg->go.span, *b = bg->go.span;
|
||||
uint32_t nf = fg->go.nSpans, nb = bg->go.nSpans;
|
||||
State *prev = NULL, *to;
|
||||
// NB: we assume both spans are for same range
|
||||
|
||||
for (;;)
|
||||
{
|
||||
if (f->ub == b->ub)
|
||||
{
|
||||
to = f->to == b->to ? bg : f->to;
|
||||
|
||||
if (to == prev)
|
||||
{
|
||||
--x;
|
||||
}
|
||||
else
|
||||
{
|
||||
x->to = prev = to;
|
||||
}
|
||||
|
||||
x->ub = f->ub;
|
||||
++x;
|
||||
++f;
|
||||
--nf;
|
||||
++b;
|
||||
--nb;
|
||||
|
||||
if (nf == 0 && nb == 0)
|
||||
{
|
||||
return static_cast<uint32_t> (x - x0);
|
||||
}
|
||||
}
|
||||
|
||||
while (f->ub < b->ub)
|
||||
{
|
||||
to = f->to == b->to ? bg : f->to;
|
||||
|
||||
if (to == prev)
|
||||
{
|
||||
--x;
|
||||
}
|
||||
else
|
||||
{
|
||||
x->to = prev = to;
|
||||
}
|
||||
|
||||
x->ub = f->ub;
|
||||
++x;
|
||||
++f;
|
||||
--nf;
|
||||
}
|
||||
|
||||
while (b->ub < f->ub)
|
||||
{
|
||||
to = b->to == f->to ? bg : f->to;
|
||||
|
||||
if (to == prev)
|
||||
{
|
||||
--x;
|
||||
}
|
||||
else
|
||||
{
|
||||
x->to = prev = to;
|
||||
}
|
||||
|
||||
x->ub = b->ub;
|
||||
++x;
|
||||
++b;
|
||||
--nb;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void DFA::findBaseState()
|
||||
{
|
||||
Span *span = allocate<Span> (ubChar - lbChar);
|
||||
|
||||
for (State *s = head; s; s = s->next)
|
||||
{
|
||||
if (s->fill == 0)
|
||||
{
|
||||
for (uint32_t i = 0; i < s->go.nSpans; ++i)
|
||||
{
|
||||
State *to = s->go.span[i].to;
|
||||
|
||||
if (to->isBase)
|
||||
{
|
||||
to = to->go.span[0].to;
|
||||
uint32_t nSpans = merge(span, s, to);
|
||||
|
||||
if (nSpans < s->go.nSpans)
|
||||
{
|
||||
operator delete (s->go.span);
|
||||
s->go.nSpans = nSpans;
|
||||
s->go.span = allocate<Span> (nSpans);
|
||||
memcpy(s->go.span, span, nSpans*sizeof(Span));
|
||||
}
|
||||
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
operator delete (span);
|
||||
}
|
||||
|
||||
void DFA::prepare ()
|
||||
{
|
||||
bUsedYYBitmap = false;
|
||||
|
||||
// create rule states
|
||||
std::map<rule_rank_t, State *> rules;
|
||||
for (State * s = head; s; s = s->next)
|
||||
{
|
||||
if (s->rule)
|
||||
{
|
||||
if (rules.find (s->rule->rank) == rules.end ())
|
||||
{
|
||||
State *n = new State;
|
||||
n->action.set_rule (s->rule);
|
||||
rules[s->rule->rank] = n;
|
||||
addState(n, s);
|
||||
}
|
||||
for (uint32_t i = 0; i < s->go.nSpans; ++i)
|
||||
{
|
||||
if (!s->go.span[i].to)
|
||||
{
|
||||
s->go.span[i].to = rules[s->rule->rank];
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// create default state (if needed)
|
||||
State * default_state = NULL;
|
||||
for (State * s = head; s; s = s->next)
|
||||
{
|
||||
for (uint32_t i = 0; i < s->go.nSpans; ++i)
|
||||
{
|
||||
if (!s->go.span[i].to)
|
||||
{
|
||||
if (!default_state)
|
||||
{
|
||||
default_state = new State;
|
||||
addState(default_state, s);
|
||||
}
|
||||
s->go.span[i].to = default_state;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// find backup states and create accept state (if needed)
|
||||
if (default_state)
|
||||
{
|
||||
for (State * s = head; s; s = s->next)
|
||||
{
|
||||
if (s->rule)
|
||||
{
|
||||
for (uint32_t i = 0; i < s->go.nSpans; ++i)
|
||||
{
|
||||
if (!s->go.span[i].to->rule && s->go.span[i].to->action.type != Action::RULE)
|
||||
{
|
||||
const uint32_t accept = static_cast<uint32_t> (accepts.find_or_add (rules[s->rule->rank]));
|
||||
s->action.set_save (accept);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
default_state->action.set_accept (&accepts);
|
||||
}
|
||||
|
||||
// split ``base'' states into two parts
|
||||
for (State * s = head; s; s = s->next)
|
||||
{
|
||||
s->isBase = false;
|
||||
|
||||
if (s->fill != 0)
|
||||
{
|
||||
for (uint32_t i = 0; i < s->go.nSpans; ++i)
|
||||
{
|
||||
if (s->go.span[i].to == s)
|
||||
{
|
||||
s->isBase = true;
|
||||
split(s);
|
||||
|
||||
if (opts->bFlag)
|
||||
{
|
||||
BitMap::find(&s->next->go, s);
|
||||
}
|
||||
|
||||
s = s->next;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// find ``base'' state, if possible
|
||||
findBaseState();
|
||||
|
||||
for (State * s = head; s; s = s->next)
|
||||
{
|
||||
s->go.init (s);
|
||||
}
|
||||
}
|
||||
|
||||
void DFA::calc_stats ()
|
||||
{
|
||||
// calculate 'YYMAXFILL'
|
||||
max_fill = 0;
|
||||
for (State * s = head; s; s = s->next)
|
||||
{
|
||||
if (max_fill < s->fill)
|
||||
{
|
||||
max_fill = s->fill;
|
||||
}
|
||||
}
|
||||
|
||||
// determine if 'YYMARKER' or 'YYBACKUP'/'YYRESTORE' pair is used
|
||||
need_backup = accepts.size () > 0;
|
||||
|
||||
// determine if 'YYCTXMARKER' or 'YYBACKUPCTX'/'YYRESTORECTX' pair is used
|
||||
for (State * s = head; s; s = s->next)
|
||||
{
|
||||
if (s->isPreCtxt)
|
||||
{
|
||||
need_backupctx = true;
|
||||
}
|
||||
}
|
||||
|
||||
// determine if 'yyaccept' variable is used
|
||||
need_accept = accepts.size () > 1;
|
||||
}
|
||||
|
||||
} // namespace re2c
|
104
tools/re2c/src/ir/compile.cc
Normal file
104
tools/re2c/src/ir/compile.cc
Normal file
|
@ -0,0 +1,104 @@
|
|||
#include <algorithm>
|
||||
#include <ostream>
|
||||
#include <set>
|
||||
|
||||
#include "src/codegen/output.h"
|
||||
#include "src/ir/compile.h"
|
||||
#include "src/ir/adfa/adfa.h"
|
||||
#include "src/ir/dfa/dfa.h"
|
||||
#include "src/ir/nfa/nfa.h"
|
||||
#include "src/ir/regexp/regexp.h"
|
||||
#include "src/ir/skeleton/skeleton.h"
|
||||
#include "src/parse/spec.h"
|
||||
|
||||
namespace re2c {
|
||||
|
||||
static std::string make_name(const std::string &cond, uint32_t line)
|
||||
{
|
||||
std::ostringstream os;
|
||||
os << "line" << line;
|
||||
std::string name = os.str();
|
||||
if (!cond.empty ())
|
||||
{
|
||||
name += "_";
|
||||
name += cond;
|
||||
}
|
||||
return name;
|
||||
}
|
||||
|
||||
smart_ptr<DFA> compile (Spec & spec, Output & output, const std::string & cond, uint32_t cunits)
|
||||
{
|
||||
const uint32_t line = output.source.get_block_line();
|
||||
const std::string name = make_name(cond, line);
|
||||
|
||||
// The original set of code units (charset) might be very large.
|
||||
// A common trick it is to split charset into disjoint character ranges
|
||||
// and choose a representative of each range (we choose lower bound).
|
||||
// The set of all representatives is the new (compacted) charset.
|
||||
// Don't forget to include zero and upper bound, even if they
|
||||
// do not explicitely apper in ranges.
|
||||
std::set<uint32_t> bounds;
|
||||
spec.re->split(bounds);
|
||||
bounds.insert(0);
|
||||
bounds.insert(cunits);
|
||||
charset_t cs;
|
||||
for (std::set<uint32_t>::const_iterator i = bounds.begin(); i != bounds.end(); ++i)
|
||||
{
|
||||
cs.push_back(*i);
|
||||
}
|
||||
|
||||
nfa_t nfa(spec.re);
|
||||
|
||||
dfa_t dfa(nfa, cs, spec.rules);
|
||||
|
||||
// skeleton must be constructed after DFA construction
|
||||
// but prior to any other DFA transformations
|
||||
Skeleton *skeleton = new Skeleton(dfa, cs, spec.rules, name, cond, line);
|
||||
|
||||
minimization(dfa);
|
||||
|
||||
// find YYFILL states and calculate argument to YYFILL
|
||||
std::vector<size_t> fill;
|
||||
fillpoints(dfa, fill);
|
||||
|
||||
// ADFA stands for 'DFA with actions'
|
||||
DFA *adfa = new DFA(dfa, fill, skeleton, cs, name, cond, line);
|
||||
|
||||
/*
|
||||
* note [reordering DFA states]
|
||||
*
|
||||
* re2c-generated code depends on the order of states in DFA: simply
|
||||
* flipping two states may change the output significantly.
|
||||
* The order of states is affected by many factors, e.g.:
|
||||
* - flipping left and right subtrees of alternative when constructing
|
||||
* AST (also applies to iteration and counted repetition)
|
||||
* - changing the order in which graph nodes are visited (applies to
|
||||
* any intermediate representation: bytecode, NFA, DFA, etc.)
|
||||
*
|
||||
* To make the resulting code independent of such changes, we hereby
|
||||
* reorder DFA states. The ordering scheme is very simple:
|
||||
*
|
||||
* Starting with DFA root, walk DFA nodes in breadth-first order.
|
||||
* Child nodes are ordered accoding to the (alphabetically) first symbol
|
||||
* leading to each node. Each node must be visited exactly once.
|
||||
* Default state (NULL) is always the last state.
|
||||
*/
|
||||
adfa->reorder();
|
||||
|
||||
// skeleton is constructed, do further DFA transformations
|
||||
adfa->prepare();
|
||||
|
||||
// finally gather overall DFA statistics
|
||||
adfa->calc_stats();
|
||||
|
||||
// accumulate global statistics from this particular DFA
|
||||
output.max_fill = std::max (output.max_fill, adfa->max_fill);
|
||||
if (adfa->need_accept)
|
||||
{
|
||||
output.source.set_used_yyaccept ();
|
||||
}
|
||||
|
||||
return make_smart_ptr(adfa);
|
||||
}
|
||||
|
||||
} // namespace re2c
|
20
tools/re2c/src/ir/compile.h
Normal file
20
tools/re2c/src/ir/compile.h
Normal file
|
@ -0,0 +1,20 @@
|
|||
#ifndef _RE2C_IR_COMPILE_
|
||||
#define _RE2C_IR_COMPILE_
|
||||
|
||||
#include "src/util/c99_stdint.h"
|
||||
#include <string>
|
||||
|
||||
#include "src/util/smart_ptr.h"
|
||||
|
||||
namespace re2c
|
||||
{
|
||||
|
||||
class DFA;
|
||||
struct Output;
|
||||
struct Spec;
|
||||
|
||||
smart_ptr<DFA> compile (Spec & spec, Output & output, const std::string & cond, uint32_t cunits);
|
||||
|
||||
} // namespace re2c
|
||||
|
||||
#endif // _RE2C_IR_COMPILE_
|
197
tools/re2c/src/ir/dfa/determinization.cc
Normal file
197
tools/re2c/src/ir/dfa/determinization.cc
Normal file
|
@ -0,0 +1,197 @@
|
|||
#include <algorithm>
|
||||
#include <limits>
|
||||
#include <map>
|
||||
#include <set>
|
||||
#include <vector>
|
||||
|
||||
#include "src/ir/dfa/dfa.h"
|
||||
#include "src/ir/nfa/nfa.h"
|
||||
#include "src/ir/regexp/regexp.h"
|
||||
#include "src/ir/regexp/regexp_rule.h"
|
||||
#include "src/ir/rule_rank.h"
|
||||
#include "src/parse/rules.h"
|
||||
#include "src/util/ord_hash_set.h"
|
||||
#include "src/util/range.h"
|
||||
|
||||
namespace re2c
|
||||
{
|
||||
|
||||
const size_t dfa_t::NIL = std::numeric_limits<size_t>::max();
|
||||
|
||||
/*
|
||||
* note [marking DFA states]
|
||||
*
|
||||
* DFA state is a set of NFA states.
|
||||
* However, DFA state includes not all NFA states that are in
|
||||
* epsilon-closure (NFA states that have only epsilon-transitions
|
||||
* and are not context of final states are omitted).
|
||||
* The included states are called 'kernel' states.
|
||||
*
|
||||
* We mark visited NFA states during closure construction.
|
||||
* These marks serve two purposes:
|
||||
* - avoid loops in NFA
|
||||
* - avoid duplication of NFA states in kernel
|
||||
*
|
||||
* Note that after closure construction:
|
||||
* - all non-kernel states must be unmarked (these states are
|
||||
* not stored in kernel and it is impossible to unmark them
|
||||
* afterwards)
|
||||
* - all kernel states must be marked (because we may later
|
||||
* extend this kernel with epsilon-closure of another NFA
|
||||
* state). Kernel states are unmarked later (before finding
|
||||
* or adding DFA state).
|
||||
*/
|
||||
static nfa_state_t **closure(nfa_state_t **cP, nfa_state_t *n)
|
||||
{
|
||||
if (!n->mark)
|
||||
{
|
||||
n->mark = true;
|
||||
switch (n->type)
|
||||
{
|
||||
case nfa_state_t::ALT:
|
||||
cP = closure(cP, n->value.alt.out2);
|
||||
cP = closure(cP, n->value.alt.out1);
|
||||
n->mark = false;
|
||||
break;
|
||||
case nfa_state_t::CTX:
|
||||
*(cP++) = n;
|
||||
cP = closure(cP, n->value.ctx.out);
|
||||
break;
|
||||
default:
|
||||
*(cP++) = n;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return cP;
|
||||
}
|
||||
|
||||
static size_t find_state
|
||||
( nfa_state_t **kernel
|
||||
, nfa_state_t **end
|
||||
, ord_hash_set_t &kernels
|
||||
)
|
||||
{
|
||||
// zero-sized kernel corresponds to default state
|
||||
if (kernel == end)
|
||||
{
|
||||
return dfa_t::NIL;
|
||||
}
|
||||
|
||||
// see note [marking DFA states]
|
||||
for (nfa_state_t **p = kernel; p != end; ++p)
|
||||
{
|
||||
(*p)->mark = false;
|
||||
}
|
||||
|
||||
// sort kernel states: we need this to get stable hash
|
||||
// and to compare states with simple 'memcmp'
|
||||
std::sort(kernel, end);
|
||||
const size_t size = static_cast<size_t>(end - kernel) * sizeof(nfa_state_t*);
|
||||
return kernels.insert(kernel, size);
|
||||
}
|
||||
|
||||
dfa_t::dfa_t(const nfa_t &nfa, const charset_t &charset, rules_t &rules)
|
||||
: states()
|
||||
, nchars(charset.size() - 1) // (n + 1) bounds for n ranges
|
||||
{
|
||||
std::map<size_t, std::set<RuleOp*> > s2rules;
|
||||
ord_hash_set_t kernels;
|
||||
nfa_state_t **const buffer = new nfa_state_t*[nfa.size];
|
||||
std::vector<std::vector<nfa_state_t*> > arcs(nchars);
|
||||
|
||||
find_state(buffer, closure(buffer, nfa.root), kernels);
|
||||
for (size_t i = 0; i < kernels.size(); ++i)
|
||||
{
|
||||
dfa_state_t *s = new dfa_state_t;
|
||||
states.push_back(s);
|
||||
|
||||
nfa_state_t **kernel;
|
||||
const size_t kernel_size = kernels.deref<nfa_state_t*>(i, kernel);
|
||||
for (size_t j = 0; j < kernel_size; ++j)
|
||||
{
|
||||
nfa_state_t *n = kernel[j];
|
||||
switch (n->type)
|
||||
{
|
||||
case nfa_state_t::RAN:
|
||||
{
|
||||
nfa_state_t *m = n->value.ran.out;
|
||||
size_t c = 0;
|
||||
for (Range *r = n->value.ran.ran; r; r = r->next ())
|
||||
{
|
||||
for (; charset[c] != r->lower(); ++c);
|
||||
for (; charset[c] != r->upper(); ++c)
|
||||
{
|
||||
arcs[c].push_back(m);
|
||||
}
|
||||
}
|
||||
break;
|
||||
}
|
||||
case nfa_state_t::CTX:
|
||||
s->ctx = true;
|
||||
break;
|
||||
case nfa_state_t::FIN:
|
||||
s2rules[i].insert(n->value.fin.rule);
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
s->arcs = new size_t[nchars];
|
||||
for(size_t c = 0; c < nchars; ++c)
|
||||
{
|
||||
nfa_state_t **end = buffer;
|
||||
for (std::vector<nfa_state_t*>::const_iterator j = arcs[c].begin(); j != arcs[c].end(); ++j)
|
||||
{
|
||||
end = closure(end, *j);
|
||||
}
|
||||
s->arcs[c] = find_state(buffer, end, kernels);
|
||||
}
|
||||
|
||||
for(size_t c = 0; c < nchars; ++c)
|
||||
{
|
||||
arcs[c].clear();
|
||||
}
|
||||
}
|
||||
delete[] buffer;
|
||||
|
||||
const size_t count = states.size();
|
||||
for (size_t i = 0; i < count; ++i)
|
||||
{
|
||||
dfa_state_t *s = states[i];
|
||||
std::set<RuleOp*> &rs = s2rules[i];
|
||||
// for each final state: choose the rule with the smallest rank
|
||||
for (std::set<RuleOp*>::const_iterator j = rs.begin(); j != rs.end(); ++j)
|
||||
{
|
||||
RuleOp *rule = *j;
|
||||
if (!s->rule || rule->rank < s->rule->rank)
|
||||
{
|
||||
s->rule = rule;
|
||||
}
|
||||
}
|
||||
// other rules are shadowed by the chosen rule
|
||||
for (std::set<RuleOp*>::const_iterator j = rs.begin(); j != rs.end(); ++j)
|
||||
{
|
||||
RuleOp *rule = *j;
|
||||
if (s->rule != rule)
|
||||
{
|
||||
rules[rule->rank].shadow.insert(s->rule->rank);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
dfa_t::~dfa_t()
|
||||
{
|
||||
std::vector<dfa_state_t*>::iterator
|
||||
i = states.begin(),
|
||||
e = states.end();
|
||||
for (; i != e; ++i)
|
||||
{
|
||||
delete *i;
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace re2c
|
||||
|
58
tools/re2c/src/ir/dfa/dfa.h
Normal file
58
tools/re2c/src/ir/dfa/dfa.h
Normal file
|
@ -0,0 +1,58 @@
|
|||
#ifndef _RE2C_IR_DFA_DFA_
|
||||
#define _RE2C_IR_DFA_DFA_
|
||||
|
||||
#include "src/util/c99_stdint.h"
|
||||
#include <vector>
|
||||
|
||||
#include "src/ir/regexp/regexp.h"
|
||||
#include "src/parse/rules.h"
|
||||
#include "src/util/forbid_copy.h"
|
||||
|
||||
namespace re2c
|
||||
{
|
||||
|
||||
struct nfa_t;
|
||||
class RuleOp;
|
||||
|
||||
struct dfa_state_t
|
||||
{
|
||||
size_t *arcs;
|
||||
RuleOp *rule;
|
||||
bool ctx;
|
||||
|
||||
dfa_state_t()
|
||||
: arcs(NULL)
|
||||
, rule(NULL)
|
||||
, ctx(false)
|
||||
{}
|
||||
~dfa_state_t()
|
||||
{
|
||||
delete[] arcs;
|
||||
}
|
||||
|
||||
FORBID_COPY(dfa_state_t);
|
||||
};
|
||||
|
||||
struct dfa_t
|
||||
{
|
||||
static const size_t NIL;
|
||||
|
||||
std::vector<dfa_state_t*> states;
|
||||
const size_t nchars;
|
||||
|
||||
dfa_t(const nfa_t &nfa, const charset_t &charset, rules_t &rules);
|
||||
~dfa_t();
|
||||
};
|
||||
|
||||
enum dfa_minimization_t
|
||||
{
|
||||
DFA_MINIMIZATION_TABLE,
|
||||
DFA_MINIMIZATION_MOORE
|
||||
};
|
||||
|
||||
void minimization(dfa_t &dfa);
|
||||
void fillpoints(const dfa_t &dfa, std::vector<size_t> &fill);
|
||||
|
||||
} // namespace re2c
|
||||
|
||||
#endif // _RE2C_IR_DFA_DFA_
|
154
tools/re2c/src/ir/dfa/fillpoints.cc
Normal file
154
tools/re2c/src/ir/dfa/fillpoints.cc
Normal file
|
@ -0,0 +1,154 @@
|
|||
#include <limits>
|
||||
#include <stack>
|
||||
#include <vector>
|
||||
|
||||
#include "src/ir/dfa/dfa.h"
|
||||
|
||||
namespace re2c
|
||||
{
|
||||
|
||||
static const size_t INFINITI = std::numeric_limits<size_t>::max();
|
||||
static const size_t UNDEFINED = INFINITI - 1;
|
||||
|
||||
static bool loopback(size_t node, size_t narcs, const size_t *arcs)
|
||||
{
|
||||
for (size_t i = 0; i < narcs; ++i)
|
||||
{
|
||||
if (arcs[i] == node)
|
||||
{
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
/*
|
||||
* node [finding strongly connected components of DFA]
|
||||
*
|
||||
* A slight modification of Tarjan's algorithm.
|
||||
*
|
||||
* The algorithm walks graph in deep-first order. It maintains a stack
|
||||
* of nodes that have already been visited but haven't been assigned to
|
||||
* SCC yet. For each node the algorithm calculates 'lowlink': index of
|
||||
* the highest ancestor node reachable in one step from a descendant of
|
||||
* the node. Lowlink is used to determine when a set of nodes should be
|
||||
* popped off the stack into a new SCC.
|
||||
*
|
||||
* We use lowlink to hold different kinds of information:
|
||||
* - values in range [0 .. stack size] mean that this node is on stack
|
||||
* (link to a node with the smallest index reachable from this one)
|
||||
* - UNDEFINED means that this node has not been visited yet
|
||||
* - INFINITI means that this node has already been popped off stack
|
||||
*
|
||||
* We use stack size (rather than topological sort index) as unique index
|
||||
* of a node on stack. This is safe because indices of nodes on stack are
|
||||
* still unique and less than indices of nodes that have been popped off
|
||||
* stack (INFINITI).
|
||||
*
|
||||
*/
|
||||
static void scc(
|
||||
const dfa_t &dfa,
|
||||
std::stack<size_t> &stack,
|
||||
std::vector<size_t> &lowlink,
|
||||
std::vector<bool> &trivial,
|
||||
size_t i)
|
||||
{
|
||||
const size_t link = stack.size();
|
||||
lowlink[i] = link;
|
||||
stack.push(i);
|
||||
|
||||
const size_t *arcs = dfa.states[i]->arcs;
|
||||
for (size_t c = 0; c < dfa.nchars; ++c)
|
||||
{
|
||||
const size_t j = arcs[c];
|
||||
if (j != dfa_t::NIL)
|
||||
{
|
||||
if (lowlink[j] == UNDEFINED)
|
||||
{
|
||||
scc(dfa, stack, lowlink, trivial, j);
|
||||
}
|
||||
if (lowlink[j] < lowlink[i])
|
||||
{
|
||||
lowlink[i] = lowlink[j];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (lowlink[i] == link)
|
||||
{
|
||||
// SCC is non-trivial (has loops) iff it either:
|
||||
// - consists of multiple nodes (they all must be interconnected)
|
||||
// - consists of single node which loops back to itself
|
||||
trivial[i] = i == stack.top()
|
||||
&& !loopback(i, dfa.nchars, arcs);
|
||||
|
||||
size_t j;
|
||||
do
|
||||
{
|
||||
j = stack.top();
|
||||
stack.pop();
|
||||
lowlink[j] = INFINITI;
|
||||
}
|
||||
while (j != i);
|
||||
}
|
||||
}
|
||||
|
||||
static void calc_fill(
|
||||
const dfa_t &dfa,
|
||||
const std::vector<bool> &trivial,
|
||||
std::vector<size_t> &fill,
|
||||
size_t i)
|
||||
{
|
||||
if (fill[i] == UNDEFINED)
|
||||
{
|
||||
fill[i] = 0;
|
||||
const size_t *arcs = dfa.states[i]->arcs;
|
||||
for (size_t c = 0; c < dfa.nchars; ++c)
|
||||
{
|
||||
const size_t j = arcs[c];
|
||||
if (j != dfa_t::NIL)
|
||||
{
|
||||
calc_fill(dfa, trivial, fill, j);
|
||||
size_t max = 1;
|
||||
if (trivial[j])
|
||||
{
|
||||
max += fill[j];
|
||||
}
|
||||
if (max > fill[i])
|
||||
{
|
||||
fill[i] = max;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void fillpoints(const dfa_t &dfa, std::vector<size_t> &fill)
|
||||
{
|
||||
const size_t size = dfa.states.size();
|
||||
|
||||
// find DFA states that belong to non-trivial SCC
|
||||
std::stack<size_t> stack;
|
||||
std::vector<size_t> lowlink(size, UNDEFINED);
|
||||
std::vector<bool> trivial(size, false);
|
||||
scc(dfa, stack, lowlink, trivial, 0);
|
||||
|
||||
// for each DFA state, calculate YYFILL argument:
|
||||
// maximal path length to the next YYFILL state
|
||||
fill.resize(size, UNDEFINED);
|
||||
calc_fill(dfa, trivial, fill, 0);
|
||||
|
||||
// The following states must trigger YYFILL:
|
||||
// - inital state
|
||||
// - all states in non-trivial SCCs
|
||||
// for other states, reset YYFILL argument to zero
|
||||
for (size_t i = 1; i < size; ++i)
|
||||
{
|
||||
if (trivial[i])
|
||||
{
|
||||
fill[i] = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace re2c
|
252
tools/re2c/src/ir/dfa/minimization.cc
Normal file
252
tools/re2c/src/ir/dfa/minimization.cc
Normal file
|
@ -0,0 +1,252 @@
|
|||
#include <string.h>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
#include "src/conf/opt.h"
|
||||
#include "src/ir/dfa/dfa.h"
|
||||
#include "src/globals.h"
|
||||
|
||||
namespace re2c
|
||||
{
|
||||
|
||||
class RuleOp;
|
||||
|
||||
/*
|
||||
* note [DFA minimization: table filling algorithm]
|
||||
*
|
||||
* This algorithm is simple and slow; it's a reference implementation.
|
||||
*
|
||||
* The algorithm constructs (strictly lower triangular) boolean matrix
|
||||
* indexed by DFA states. Each matrix cell (S1,S2) indicates if states
|
||||
* S1 and S2 are distinguishable. Initialy states are distinguished
|
||||
* according to their rule and context. One step of the algorithm
|
||||
* updates the matrix as follows: each pair of states S1 and S2 is
|
||||
* marked as distinguishable iff exist transitions from S1 and S2 on
|
||||
* the same symbol that go to distinguishable states. The algorithm
|
||||
* loops until the matrix stops changing.
|
||||
*/
|
||||
static void minimization_table(
|
||||
size_t *part,
|
||||
const std::vector<dfa_state_t*> &states,
|
||||
size_t nchars)
|
||||
{
|
||||
const size_t count = states.size();
|
||||
|
||||
bool **tbl = new bool*[count];
|
||||
tbl[0] = new bool[count * (count - 1) / 2];
|
||||
for (size_t i = 0; i < count - 1; ++i)
|
||||
{
|
||||
tbl[i + 1] = tbl[i] + i;
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < count; ++i)
|
||||
{
|
||||
dfa_state_t *s1 = states[i];
|
||||
for (size_t j = 0; j < i; ++j)
|
||||
{
|
||||
dfa_state_t *s2 = states[j];
|
||||
tbl[i][j] = s1->ctx != s2->ctx
|
||||
|| s1->rule != s2->rule;
|
||||
}
|
||||
}
|
||||
|
||||
for (bool loop = true; loop;)
|
||||
{
|
||||
loop = false;
|
||||
for (size_t i = 0; i < count; ++i)
|
||||
{
|
||||
for (size_t j = 0; j < i; ++j)
|
||||
{
|
||||
if (!tbl[i][j])
|
||||
{
|
||||
for (size_t k = 0; k < nchars; ++k)
|
||||
{
|
||||
size_t oi = states[i]->arcs[k];
|
||||
size_t oj = states[j]->arcs[k];
|
||||
if (oi < oj)
|
||||
{
|
||||
std::swap(oi, oj);
|
||||
}
|
||||
if (oi != oj &&
|
||||
(oi == dfa_t::NIL ||
|
||||
oj == dfa_t::NIL ||
|
||||
tbl[oi][oj]))
|
||||
{
|
||||
tbl[i][j] = true;
|
||||
loop = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < count; ++i)
|
||||
{
|
||||
part[i] = i;
|
||||
for (size_t j = 0; j < i; ++j)
|
||||
{
|
||||
if (!tbl[i][j])
|
||||
{
|
||||
part[i] = j;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
delete[] tbl[0];
|
||||
delete[] tbl;
|
||||
}
|
||||
|
||||
/*
|
||||
* note [DFA minimization: Moore algorithm]
|
||||
*
|
||||
* The algorithm maintains partition of DFA states.
|
||||
* Initial partition is coarse: states are distinguished according
|
||||
* to their rule and context. Partition is gradually refined: each
|
||||
* set of states is split into minimal number of subsets such that
|
||||
* for all states in a subset transitions on the same symbol go to
|
||||
* the same set of states.
|
||||
* The algorithm loops until partition stops changing.
|
||||
*/
|
||||
static void minimization_moore(
|
||||
size_t *part,
|
||||
const std::vector<dfa_state_t*> &states,
|
||||
size_t nchars)
|
||||
{
|
||||
const size_t count = states.size();
|
||||
|
||||
size_t *next = new size_t[count];
|
||||
|
||||
std::map<std::pair<RuleOp*, bool>, size_t> init;
|
||||
for (size_t i = 0; i < count; ++i)
|
||||
{
|
||||
dfa_state_t *s = states[i];
|
||||
std::pair<RuleOp*, bool> key(s->rule, s->ctx);
|
||||
if (init.insert(std::make_pair(key, i)).second)
|
||||
{
|
||||
part[i] = i;
|
||||
next[i] = dfa_t::NIL;
|
||||
}
|
||||
else
|
||||
{
|
||||
const size_t j = init[key];
|
||||
part[i] = j;
|
||||
next[i] = next[j];
|
||||
next[j] = i;
|
||||
}
|
||||
}
|
||||
|
||||
size_t *out = new size_t[nchars * count];
|
||||
size_t *diff = new size_t[count];
|
||||
for (bool loop = true; loop;)
|
||||
{
|
||||
loop = false;
|
||||
for (size_t i = 0; i < count; ++i)
|
||||
{
|
||||
if (i != part[i] || next[i] == dfa_t::NIL)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
for (size_t j = i; j != dfa_t::NIL; j = next[j])
|
||||
{
|
||||
size_t *o = &out[j * nchars];
|
||||
size_t *a = states[j]->arcs;
|
||||
for (size_t c = 0; c < nchars; ++c)
|
||||
{
|
||||
o[c] = a[c] == dfa_t::NIL
|
||||
? dfa_t::NIL
|
||||
: part[a[c]];
|
||||
}
|
||||
}
|
||||
|
||||
size_t diff_count = 0;
|
||||
for (size_t j = i; j != dfa_t::NIL;)
|
||||
{
|
||||
const size_t j_next = next[j];
|
||||
size_t n = 0;
|
||||
for (; n < diff_count; ++n)
|
||||
{
|
||||
size_t k = diff[n];
|
||||
if (memcmp(&out[j * nchars],
|
||||
&out[k * nchars],
|
||||
nchars * sizeof(size_t)) == 0)
|
||||
{
|
||||
part[j] = k;
|
||||
next[j] = next[k];
|
||||
next[k] = j;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (n == diff_count)
|
||||
{
|
||||
diff[diff_count++] = j;
|
||||
part[j] = j;
|
||||
next[j] = dfa_t::NIL;
|
||||
}
|
||||
j = j_next;
|
||||
}
|
||||
loop |= diff_count > 1;
|
||||
}
|
||||
}
|
||||
delete[] out;
|
||||
delete[] diff;
|
||||
delete[] next;
|
||||
}
|
||||
|
||||
void minimization(dfa_t &dfa)
|
||||
{
|
||||
const size_t count = dfa.states.size();
|
||||
|
||||
size_t *part = new size_t[count];
|
||||
|
||||
switch (opts->dfa_minimization)
|
||||
{
|
||||
case DFA_MINIMIZATION_TABLE:
|
||||
minimization_table(part, dfa.states, dfa.nchars);
|
||||
break;
|
||||
case DFA_MINIMIZATION_MOORE:
|
||||
minimization_moore(part, dfa.states, dfa.nchars);
|
||||
break;
|
||||
}
|
||||
|
||||
size_t *compact = new size_t[count];
|
||||
for (size_t i = 0, j = 0; i < count; ++i)
|
||||
{
|
||||
if (i == part[i])
|
||||
{
|
||||
compact[i] = j++;
|
||||
}
|
||||
}
|
||||
|
||||
size_t new_count = 0;
|
||||
for (size_t i = 0; i < count; ++i)
|
||||
{
|
||||
dfa_state_t *s = dfa.states[i];
|
||||
if (i == part[i])
|
||||
{
|
||||
size_t *arcs = s->arcs;
|
||||
for (size_t c = 0; c < dfa.nchars; ++c)
|
||||
{
|
||||
if (arcs[c] != dfa_t::NIL)
|
||||
{
|
||||
arcs[c] = compact[part[arcs[c]]];
|
||||
}
|
||||
}
|
||||
dfa.states[new_count++] = s;
|
||||
}
|
||||
else
|
||||
{
|
||||
delete s;
|
||||
}
|
||||
}
|
||||
dfa.states.resize(new_count);
|
||||
|
||||
delete[] compact;
|
||||
delete[] part;
|
||||
}
|
||||
|
||||
} // namespace re2c
|
||||
|
50
tools/re2c/src/ir/nfa/calc_size.cc
Normal file
50
tools/re2c/src/ir/nfa/calc_size.cc
Normal file
|
@ -0,0 +1,50 @@
|
|||
#include "src/util/c99_stdint.h"
|
||||
|
||||
#include "src/ir/regexp/regexp.h"
|
||||
#include "src/ir/regexp/regexp_alt.h"
|
||||
#include "src/ir/regexp/regexp_cat.h"
|
||||
#include "src/ir/regexp/regexp_close.h"
|
||||
#include "src/ir/regexp/regexp_match.h"
|
||||
#include "src/ir/regexp/regexp_null.h"
|
||||
#include "src/ir/regexp/regexp_rule.h"
|
||||
|
||||
namespace re2c
|
||||
{
|
||||
|
||||
uint32_t AltOp::calc_size() const
|
||||
{
|
||||
return exp1->calc_size()
|
||||
+ exp2->calc_size()
|
||||
+ 1;
|
||||
}
|
||||
|
||||
uint32_t CatOp::calc_size() const
|
||||
{
|
||||
return exp1->calc_size()
|
||||
+ exp2->calc_size();
|
||||
}
|
||||
|
||||
uint32_t CloseOp::calc_size() const
|
||||
{
|
||||
return exp->calc_size() + 1;
|
||||
}
|
||||
|
||||
uint32_t MatchOp::calc_size() const
|
||||
{
|
||||
return 1;
|
||||
}
|
||||
|
||||
uint32_t NullOp::calc_size() const
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
uint32_t RuleOp::calc_size() const
|
||||
{
|
||||
const uint32_t n = ctx->calc_size();
|
||||
return exp->calc_size()
|
||||
+ (n > 0 ? n + 1 : 0)
|
||||
+ 1;
|
||||
}
|
||||
|
||||
} // end namespace re2c
|
72
tools/re2c/src/ir/nfa/nfa.cc
Normal file
72
tools/re2c/src/ir/nfa/nfa.cc
Normal file
|
@ -0,0 +1,72 @@
|
|||
#include "src/ir/nfa/nfa.h"
|
||||
#include "src/ir/regexp/regexp.h"
|
||||
#include "src/ir/regexp/regexp_alt.h"
|
||||
#include "src/ir/regexp/regexp_cat.h"
|
||||
#include "src/ir/regexp/regexp_close.h"
|
||||
#include "src/ir/regexp/regexp_match.h"
|
||||
#include "src/ir/regexp/regexp_null.h"
|
||||
#include "src/ir/regexp/regexp_rule.h"
|
||||
|
||||
namespace re2c {
|
||||
|
||||
nfa_t::nfa_t(RegExp *re)
|
||||
: max_size(re->calc_size())
|
||||
, size(0)
|
||||
, states(new nfa_state_t[max_size])
|
||||
, root(re->compile(*this, NULL))
|
||||
{}
|
||||
|
||||
nfa_t::~nfa_t()
|
||||
{
|
||||
delete[] states;
|
||||
}
|
||||
|
||||
nfa_state_t *AltOp::compile(nfa_t &nfa, nfa_state_t *t)
|
||||
{
|
||||
nfa_state_t *s = &nfa.states[nfa.size++];
|
||||
s->alt(exp1->compile(nfa, t),
|
||||
exp2->compile(nfa, t));
|
||||
return s;
|
||||
}
|
||||
|
||||
nfa_state_t *CatOp::compile(nfa_t &nfa, nfa_state_t *t)
|
||||
{
|
||||
nfa_state_t *s2 = exp2->compile(nfa, t);
|
||||
nfa_state_t *s1 = exp1->compile(nfa, s2);
|
||||
return s1;
|
||||
}
|
||||
|
||||
nfa_state_t *CloseOp::compile(nfa_t &nfa, nfa_state_t *t)
|
||||
{
|
||||
nfa_state_t *s = &nfa.states[nfa.size++];
|
||||
s->alt(t, exp->compile(nfa, s));
|
||||
return s;
|
||||
}
|
||||
|
||||
nfa_state_t *MatchOp::compile(nfa_t &nfa, nfa_state_t *t)
|
||||
{
|
||||
nfa_state_t *s = &nfa.states[nfa.size++];
|
||||
s->ran(t, match);
|
||||
return s;
|
||||
}
|
||||
|
||||
nfa_state_t *NullOp::compile(nfa_t &, nfa_state_t *t)
|
||||
{
|
||||
return t;
|
||||
}
|
||||
|
||||
nfa_state_t *RuleOp::compile(nfa_t &nfa, nfa_state_t *)
|
||||
{
|
||||
nfa_state_t *s3 = &nfa.states[nfa.size++];
|
||||
s3->fin(this);
|
||||
if (ctx->calc_size() > 0)
|
||||
{
|
||||
nfa_state_t *s2 = &nfa.states[nfa.size++];
|
||||
s2->ctx(ctx->compile(nfa, s3));
|
||||
s3 = s2;
|
||||
}
|
||||
nfa_state_t *s1 = exp->compile(nfa, s3);
|
||||
return s1;
|
||||
}
|
||||
|
||||
} // namespace re2c
|
90
tools/re2c/src/ir/nfa/nfa.h
Normal file
90
tools/re2c/src/ir/nfa/nfa.h
Normal file
|
@ -0,0 +1,90 @@
|
|||
#ifndef _RE2C_IR_NFA_NFA_
|
||||
#define _RE2C_IR_NFA_NFA_
|
||||
|
||||
#include "src/util/c99_stdint.h"
|
||||
|
||||
#include "src/util/forbid_copy.h"
|
||||
|
||||
namespace re2c
|
||||
{
|
||||
|
||||
class Range;
|
||||
class RegExp;
|
||||
class RuleOp;
|
||||
|
||||
struct nfa_state_t
|
||||
{
|
||||
enum type_t
|
||||
{
|
||||
ALT,
|
||||
RAN,
|
||||
CTX,
|
||||
FIN
|
||||
} type;
|
||||
union
|
||||
{
|
||||
struct
|
||||
{
|
||||
nfa_state_t *out1;
|
||||
nfa_state_t *out2;
|
||||
} alt;
|
||||
struct
|
||||
{
|
||||
nfa_state_t *out;
|
||||
Range *ran;
|
||||
} ran;
|
||||
struct
|
||||
{
|
||||
nfa_state_t *out;
|
||||
} ctx;
|
||||
struct
|
||||
{
|
||||
RuleOp *rule;
|
||||
} fin;
|
||||
} value;
|
||||
bool mark;
|
||||
|
||||
void alt(nfa_state_t *s1, nfa_state_t *s2)
|
||||
{
|
||||
type = ALT;
|
||||
value.alt.out1 = s1;
|
||||
value.alt.out2 = s2;
|
||||
mark = false;
|
||||
}
|
||||
void ran(nfa_state_t *s, Range *r)
|
||||
{
|
||||
type = RAN;
|
||||
value.ran.out = s;
|
||||
value.ran.ran = r;
|
||||
mark = false;
|
||||
}
|
||||
void ctx(nfa_state_t *s)
|
||||
{
|
||||
type = CTX;
|
||||
value.ctx.out = s;
|
||||
mark = false;
|
||||
}
|
||||
void fin(RuleOp *r)
|
||||
{
|
||||
type = FIN;
|
||||
value.fin.rule = r;
|
||||
mark = false;
|
||||
}
|
||||
};
|
||||
|
||||
struct nfa_t
|
||||
{
|
||||
const uint32_t max_size;
|
||||
uint32_t size;
|
||||
nfa_state_t *states;
|
||||
nfa_state_t *root;
|
||||
|
||||
nfa_t(RegExp *re);
|
||||
~nfa_t();
|
||||
|
||||
FORBID_COPY(nfa_t);
|
||||
};
|
||||
|
||||
} // namespace re2c
|
||||
|
||||
#endif // _RE2C_IR_NFA_NFA_
|
49
tools/re2c/src/ir/nfa/split.cc
Normal file
49
tools/re2c/src/ir/nfa/split.cc
Normal file
|
@ -0,0 +1,49 @@
|
|||
#include "src/util/c99_stdint.h"
|
||||
#include <set>
|
||||
|
||||
#include "src/ir/regexp/regexp.h"
|
||||
#include "src/ir/regexp/regexp_alt.h"
|
||||
#include "src/ir/regexp/regexp_cat.h"
|
||||
#include "src/ir/regexp/regexp_close.h"
|
||||
#include "src/ir/regexp/regexp_match.h"
|
||||
#include "src/ir/regexp/regexp_null.h"
|
||||
#include "src/ir/regexp/regexp_rule.h"
|
||||
#include "src/util/range.h"
|
||||
|
||||
namespace re2c {
|
||||
|
||||
void AltOp::split (std::set<uint32_t> & cs)
|
||||
{
|
||||
exp1->split (cs);
|
||||
exp2->split (cs);
|
||||
}
|
||||
|
||||
void CatOp::split (std::set<uint32_t> & cs)
|
||||
{
|
||||
exp1->split (cs);
|
||||
exp2->split (cs);
|
||||
}
|
||||
|
||||
void CloseOp::split (std::set<uint32_t> & cs)
|
||||
{
|
||||
exp->split (cs);
|
||||
}
|
||||
|
||||
void MatchOp::split (std::set<uint32_t> & cs)
|
||||
{
|
||||
for (Range *r = match; r; r = r->next ())
|
||||
{
|
||||
cs.insert (r->lower ());
|
||||
cs.insert (r->upper ());
|
||||
}
|
||||
}
|
||||
|
||||
void NullOp::split (std::set<uint32_t> &) {}
|
||||
|
||||
void RuleOp::split (std::set<uint32_t> & cs)
|
||||
{
|
||||
exp->split (cs);
|
||||
ctx->split (cs);
|
||||
}
|
||||
|
||||
} // namespace re2c
|
51
tools/re2c/src/ir/regexp/display.cc
Normal file
51
tools/re2c/src/ir/regexp/display.cc
Normal file
|
@ -0,0 +1,51 @@
|
|||
#include <iostream>
|
||||
|
||||
#include "src/ir/regexp/regexp.h"
|
||||
#include "src/ir/regexp/regexp_alt.h"
|
||||
#include "src/ir/regexp/regexp_cat.h"
|
||||
#include "src/ir/regexp/regexp_close.h"
|
||||
#include "src/ir/regexp/regexp_match.h"
|
||||
#include "src/ir/regexp/regexp_null.h"
|
||||
#include "src/ir/regexp/regexp_rule.h"
|
||||
|
||||
namespace re2c
|
||||
{
|
||||
|
||||
std::ostream & operator << (std::ostream & o, const RegExp & re)
|
||||
{
|
||||
re.display (o);
|
||||
return o;
|
||||
}
|
||||
|
||||
void AltOp::display (std::ostream & o) const
|
||||
{
|
||||
o << exp1 << "|" << exp2;
|
||||
}
|
||||
|
||||
void CatOp::display (std::ostream & o) const
|
||||
{
|
||||
o << exp1 << exp2;
|
||||
}
|
||||
|
||||
void CloseOp::display (std::ostream & o) const
|
||||
{
|
||||
o << exp << "+";
|
||||
}
|
||||
|
||||
void MatchOp::display (std::ostream & o) const
|
||||
{
|
||||
o << match;
|
||||
}
|
||||
|
||||
void NullOp::display (std::ostream & o) const
|
||||
{
|
||||
o << "_";
|
||||
}
|
||||
|
||||
void RuleOp::display (std::ostream & o) const
|
||||
{
|
||||
o << exp << "/" << ctx << ";";
|
||||
}
|
||||
|
||||
} // end namespace re2c
|
||||
|
15
tools/re2c/src/ir/regexp/empty_class_policy.h
Normal file
15
tools/re2c/src/ir/regexp/empty_class_policy.h
Normal file
|
@ -0,0 +1,15 @@
|
|||
#ifndef _RE2C_IR_REGEXP_EMPTY_CLASS_POLICY_
|
||||
#define _RE2C_IR_REGEXP_EMPTY_CLASS_POLICY_
|
||||
|
||||
namespace re2c {
|
||||
|
||||
enum empty_class_policy_t
|
||||
{
|
||||
EMPTY_CLASS_MATCH_EMPTY, // match on empty input
|
||||
EMPTY_CLASS_MATCH_NONE, // fail to match on any input
|
||||
EMPTY_CLASS_ERROR // compilation error
|
||||
};
|
||||
|
||||
} // namespace re2c
|
||||
|
||||
#endif // _RE2C_IR_REGEXP_EMPTY_CLASS_POLICY_
|
31
tools/re2c/src/ir/regexp/encoding/case.h
Normal file
31
tools/re2c/src/ir/regexp/encoding/case.h
Normal file
|
@ -0,0 +1,31 @@
|
|||
#ifndef _RE2C_IR_REGEXP_ENCODING_CASE_
|
||||
#define _RE2C_IR_REGEXP_ENCODING_CASE_
|
||||
|
||||
#include "src/util/c99_stdint.h"
|
||||
|
||||
namespace re2c {
|
||||
|
||||
// TODO: support non-ASCII encodings
|
||||
bool is_alpha (uint32_t c);
|
||||
uint32_t to_lower_unsafe (uint32_t c);
|
||||
uint32_t to_upper_unsafe (uint32_t c);
|
||||
|
||||
inline bool is_alpha (uint32_t c)
|
||||
{
|
||||
return (c >= 'a' && c <= 'z')
|
||||
|| (c >= 'A' && c <= 'Z');
|
||||
}
|
||||
|
||||
inline uint32_t to_lower_unsafe (uint32_t c)
|
||||
{
|
||||
return c | 0x20u;
|
||||
}
|
||||
|
||||
inline uint32_t to_upper_unsafe (uint32_t c)
|
||||
{
|
||||
return c & ~0x20u;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
#endif // _RE2C_IR_REGEXP_ENCODING_CASE_
|
206
tools/re2c/src/ir/regexp/encoding/enc.cc
Normal file
206
tools/re2c/src/ir/regexp/encoding/enc.cc
Normal file
|
@ -0,0 +1,206 @@
|
|||
#include "src/ir/regexp/encoding/enc.h"
|
||||
#include "src/util/range.h"
|
||||
|
||||
namespace re2c {
|
||||
|
||||
const uint32_t Enc::SURR_MIN = 0xD800;
|
||||
const uint32_t Enc::SURR_MAX = 0xDFFF;
|
||||
const uint32_t Enc::UNICODE_ERROR = 0xFFFD;
|
||||
|
||||
const uint32_t Enc::asc2ebc[256] =
|
||||
{ /* Based on ISO 8859/1 and Code Page 37 */
|
||||
0x00, 0x01, 0x02, 0x03, 0x37, 0x2d, 0x2e, 0x2f, 0x16, 0x05, 0x25, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
|
||||
0x10, 0x11, 0x12, 0x13, 0x3c, 0x3d, 0x32, 0x26, 0x18, 0x19, 0x3f, 0x27, 0x1c, 0x1d, 0x1e, 0x1f,
|
||||
0x40, 0x5a, 0x7f, 0x7b, 0x5b, 0x6c, 0x50, 0x7d, 0x4d, 0x5d, 0x5c, 0x4e, 0x6b, 0x60, 0x4b, 0x61,
|
||||
0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0x7a, 0x5e, 0x4c, 0x7e, 0x6e, 0x6f,
|
||||
0x7c, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7, 0xc8, 0xc9, 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6,
|
||||
0xd7, 0xd8, 0xd9, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, 0xe8, 0xe9, 0xba, 0xe0, 0xbb, 0xb0, 0x6d,
|
||||
0x79, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96,
|
||||
0x97, 0x98, 0x99, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xc0, 0x4f, 0xd0, 0xa1, 0x07,
|
||||
0x20, 0x21, 0x22, 0x23, 0x24, 0x15, 0x06, 0x17, 0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x09, 0x0a, 0x1b,
|
||||
0x30, 0x31, 0x1a, 0x33, 0x34, 0x35, 0x36, 0x08, 0x38, 0x39, 0x3a, 0x3b, 0x04, 0x14, 0x3e, 0xff,
|
||||
0x41, 0xaa, 0x4a, 0xb1, 0x9f, 0xb2, 0x6a, 0xb5, 0xbd, 0xb4, 0x9a, 0x8a, 0x5f, 0xca, 0xaf, 0xbc,
|
||||
0x90, 0x8f, 0xea, 0xfa, 0xbe, 0xa0, 0xb6, 0xb3, 0x9d, 0xda, 0x9b, 0x8b, 0xb7, 0xb8, 0xb9, 0xab,
|
||||
0x64, 0x65, 0x62, 0x66, 0x63, 0x67, 0x9e, 0x68, 0x74, 0x71, 0x72, 0x73, 0x78, 0x75, 0x76, 0x77,
|
||||
0xac, 0x69, 0xed, 0xee, 0xeb, 0xef, 0xec, 0xbf, 0x80, 0xfd, 0xfe, 0xfb, 0xfc, 0xad, 0x8e, 0x59,
|
||||
0x44, 0x45, 0x42, 0x46, 0x43, 0x47, 0x9c, 0x48, 0x54, 0x51, 0x52, 0x53, 0x58, 0x55, 0x56, 0x57,
|
||||
0x8c, 0x49, 0xcd, 0xce, 0xcb, 0xcf, 0xcc, 0xe1, 0x70, 0xdd, 0xde, 0xdb, 0xdc, 0x8d, 0xae, 0xdf
|
||||
};
|
||||
|
||||
const uint32_t Enc::ebc2asc[256] =
|
||||
{ /* Based on ISO 8859/1 and Code Page 37 */
|
||||
0x00, 0x01, 0x02, 0x03, 0x9c, 0x09, 0x86, 0x7f, 0x97, 0x8d, 0x8e, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
|
||||
0x10, 0x11, 0x12, 0x13, 0x9d, 0x85, 0x08, 0x87, 0x18, 0x19, 0x92, 0x8f, 0x1c, 0x1d, 0x1e, 0x1f,
|
||||
0x80, 0x81, 0x82, 0x83, 0x84, 0x0a, 0x17, 0x1b, 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x05, 0x06, 0x07,
|
||||
0x90, 0x91, 0x16, 0x93, 0x94, 0x95, 0x96, 0x04, 0x98, 0x99, 0x9a, 0x9b, 0x14, 0x15, 0x9e, 0x1a,
|
||||
0x20, 0xa0, 0xe2, 0xe4, 0xe0, 0xe1, 0xe3, 0xe5, 0xe7, 0xf1, 0xa2, 0x2e, 0x3c, 0x28, 0x2b, 0x7c,
|
||||
0x26, 0xe9, 0xea, 0xeb, 0xe8, 0xed, 0xee, 0xef, 0xec, 0xdf, 0x21, 0x24, 0x2a, 0x29, 0x3b, 0xac,
|
||||
0x2d, 0x2f, 0xc2, 0xc4, 0xc0, 0xc1, 0xc3, 0xc5, 0xc7, 0xd1, 0xa6, 0x2c, 0x25, 0x5f, 0x3e, 0x3f,
|
||||
0xf8, 0xc9, 0xca, 0xcb, 0xc8, 0xcd, 0xce, 0xcf, 0xcc, 0x60, 0x3a, 0x23, 0x40, 0x27, 0x3d, 0x22,
|
||||
0xd8, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0xab, 0xbb, 0xf0, 0xfd, 0xde, 0xb1,
|
||||
0xb0, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, 0x70, 0x71, 0x72, 0xaa, 0xba, 0xe6, 0xb8, 0xc6, 0xa4,
|
||||
0xb5, 0x7e, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7a, 0xa1, 0xbf, 0xd0, 0xdd, 0xfe, 0xae,
|
||||
0x5e, 0xa3, 0xa5, 0xb7, 0xa9, 0xa7, 0xb6, 0xbc, 0xbd, 0xbe, 0x5b, 0x5d, 0xaf, 0xa8, 0xb4, 0xd7,
|
||||
0x7b, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0xad, 0xf4, 0xf6, 0xf2, 0xf3, 0xf5,
|
||||
0x7d, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f, 0x50, 0x51, 0x52, 0xb9, 0xfb, 0xfc, 0xf9, 0xfa, 0xff,
|
||||
0x5c, 0xf7, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59, 0x5a, 0xb2, 0xd4, 0xd6, 0xd2, 0xd3, 0xd5,
|
||||
0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0xb3, 0xdb, 0xdc, 0xd9, 0xda, 0x9f
|
||||
};
|
||||
|
||||
/*
|
||||
* Returns code point representation for current
|
||||
* encoding with regard to current policy.
|
||||
*
|
||||
* Since code point is exacly specified by user,
|
||||
* it is assumed that user considers it to be valid.
|
||||
* We must check it.
|
||||
*
|
||||
* Returns false if this code point exceeds maximum
|
||||
* or is forbidden by current policy, otherwise
|
||||
* returns true. Overwrites code point.
|
||||
*/
|
||||
bool Enc::encode(uint32_t & c) const
|
||||
{
|
||||
if (c >= nCodePoints ())
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
switch (type_)
|
||||
{
|
||||
case ASCII:
|
||||
return true;
|
||||
case EBCDIC:
|
||||
c = asc2ebc[c];
|
||||
return true;
|
||||
case UCS2:
|
||||
case UTF16:
|
||||
case UTF32:
|
||||
case UTF8:
|
||||
if (c < SURR_MIN || c > SURR_MAX)
|
||||
return true;
|
||||
else
|
||||
{
|
||||
switch (policy_)
|
||||
{
|
||||
case POLICY_FAIL:
|
||||
return false;
|
||||
case POLICY_SUBSTITUTE:
|
||||
c = UNICODE_ERROR;
|
||||
return true;
|
||||
case POLICY_IGNORE:
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
return false; // to silence gcc warning
|
||||
}
|
||||
|
||||
/*
|
||||
* Returns original representation of code point.
|
||||
* Assumes code point is valid (hence 'unsafe').
|
||||
*/
|
||||
uint32_t Enc::decodeUnsafe(uint32_t c) const
|
||||
{
|
||||
switch (type_)
|
||||
{
|
||||
case EBCDIC:
|
||||
c = ebc2asc[c & 0xFF];
|
||||
break;
|
||||
case ASCII:
|
||||
case UCS2:
|
||||
case UTF16:
|
||||
case UTF32:
|
||||
case UTF8:
|
||||
break;
|
||||
}
|
||||
return c;
|
||||
}
|
||||
|
||||
/*
|
||||
* Returns [l - h] range representation for current
|
||||
* encoding with regard to current policy.
|
||||
*
|
||||
* Since range borders are exacly specified by user,
|
||||
* it is assumed that user considers that all code
|
||||
* points from this range are valid. re2c must check it.
|
||||
*
|
||||
* Returns NULL if range contains code points that
|
||||
* exceed maximum or are forbidden by current policy,
|
||||
* otherwise returns pointer to newly constructed range.
|
||||
*/
|
||||
Range * Enc::encodeRange(uint32_t l, uint32_t h) const
|
||||
{
|
||||
if (l >= nCodePoints () || h >= nCodePoints ())
|
||||
{
|
||||
return NULL;
|
||||
}
|
||||
|
||||
Range * r = NULL;
|
||||
switch (type_)
|
||||
{
|
||||
case ASCII:
|
||||
r = Range::ran (l, h + 1);
|
||||
break;
|
||||
case EBCDIC:
|
||||
{
|
||||
const uint32_t el = asc2ebc[l];
|
||||
r = Range::sym (el);
|
||||
for (uint32_t c = l + 1; c <= h; ++c)
|
||||
{
|
||||
const uint32_t ec = asc2ebc[c];
|
||||
r = Range::add (r, Range::sym (ec));
|
||||
}
|
||||
break;
|
||||
}
|
||||
case UCS2:
|
||||
case UTF16:
|
||||
case UTF32:
|
||||
case UTF8:
|
||||
r = Range::ran (l, h + 1);
|
||||
if (l <= SURR_MAX && h >= SURR_MIN)
|
||||
{
|
||||
switch (policy_)
|
||||
{
|
||||
case POLICY_FAIL:
|
||||
r = NULL;
|
||||
break;
|
||||
case POLICY_SUBSTITUTE:
|
||||
{
|
||||
Range * surrs = Range::ran (SURR_MIN, SURR_MAX + 1);
|
||||
Range * error = Range::sym (UNICODE_ERROR);
|
||||
r = Range::sub (r, surrs);
|
||||
r = Range::add (r, error);
|
||||
break;
|
||||
}
|
||||
case POLICY_IGNORE:
|
||||
break;
|
||||
}
|
||||
}
|
||||
break;
|
||||
}
|
||||
return r;
|
||||
}
|
||||
|
||||
/*
|
||||
* Returns full range representation for current encoding
|
||||
* with regard to current policy.
|
||||
*
|
||||
* Since range is defined declaratively, re2c does
|
||||
* all the necessary corrections 'for free'.
|
||||
*
|
||||
* Always succeeds, returns pointer to newly constructed
|
||||
* range.
|
||||
*/
|
||||
Range * Enc::fullRange() const
|
||||
{
|
||||
Range * r = Range::ran (0, nCodePoints());
|
||||
if (policy_ != POLICY_IGNORE)
|
||||
{
|
||||
Range * surrs = Range::ran (SURR_MIN, SURR_MAX + 1);
|
||||
r = Range::sub (r, surrs);
|
||||
}
|
||||
return r;
|
||||
}
|
||||
|
||||
} // namespace re2c
|
197
tools/re2c/src/ir/regexp/encoding/enc.h
Normal file
197
tools/re2c/src/ir/regexp/encoding/enc.h
Normal file
|
@ -0,0 +1,197 @@
|
|||
#ifndef _RE2C_IR_REGEXP_ENCODING_ENC_
|
||||
#define _RE2C_IR_REGEXP_ENCODING_ENC_
|
||||
|
||||
#include "src/util/c99_stdint.h"
|
||||
|
||||
namespace re2c {
|
||||
|
||||
class Range;
|
||||
|
||||
/*
|
||||
* note [encodings]
|
||||
*
|
||||
* Each encoding defines two concepts:
|
||||
*
|
||||
* 1) Code point -- abstract number, which represents single encoding symbol.
|
||||
* E.g., Unicode defines code points in the range [0 - 0x10FFFF] , so each
|
||||
* Unicode encoding must be capable of representing 0x110000 code points.
|
||||
*
|
||||
* 2) Code unit -- the smallest unit of memory, which is used in the encoded
|
||||
* text. One or more code units can be needed to represent a single code
|
||||
* point, depending on the encoding. For each encoding, all code points
|
||||
* either are represented with equal number of code units (fixed-length
|
||||
* encodings), or with variable number of code units (variable-length
|
||||
* encodings).
|
||||
*
|
||||
* +----------+------------------+-----------------------+-----------------+----------------+
|
||||
* | encoding | code point range | code point size | code unit range | code unit size |
|
||||
* +----------+------------------+-----------------------+-----------------+----------------+
|
||||
* | ASCII | 0 - 0xFF | fixed, 1 byte | 0 - 0xFF | 1 byte |
|
||||
* | EBCDIC | 0 - 0xFF | fixed, 1 byte | 0 - 0xFF | 1 byte |
|
||||
* | UCS2 | 0 - 0xFFFF | fixed, 2 bytes | 0 - 0xFFFF | 2 bytes |
|
||||
* | UTF16 | 0 - 0x10FFFF | variable, 2 - 4 bytes | 0 - 0xFFFF | 2 bytes |
|
||||
* | UTF32 | 0 - 0x10FFFF | fixed, 4 bytes | 0 - 0x10FFFF | 4 bytes |
|
||||
* | UTF8 | 0 - 0x10FFFF | variable, 1 - 4 bytes | 0 - 0xFF | 1 byte |
|
||||
* +----------+------------------+-----------------------+-----------------+----------------+
|
||||
*/
|
||||
|
||||
class Enc
|
||||
{
|
||||
public:
|
||||
// Supported encodings.
|
||||
enum type_t
|
||||
{ ASCII
|
||||
, EBCDIC
|
||||
, UCS2
|
||||
, UTF16
|
||||
, UTF32
|
||||
, UTF8
|
||||
};
|
||||
|
||||
// What to do with invalid code points
|
||||
enum policy_t
|
||||
{ POLICY_FAIL
|
||||
, POLICY_SUBSTITUTE
|
||||
, POLICY_IGNORE
|
||||
};
|
||||
|
||||
private:
|
||||
static const uint32_t asc2ebc[256];
|
||||
static const uint32_t ebc2asc[256];
|
||||
static const uint32_t SURR_MIN;
|
||||
static const uint32_t SURR_MAX;
|
||||
static const uint32_t UNICODE_ERROR;
|
||||
|
||||
type_t type_;
|
||||
policy_t policy_;
|
||||
|
||||
public:
|
||||
Enc()
|
||||
: type_ (ASCII)
|
||||
, policy_ (POLICY_IGNORE)
|
||||
{ }
|
||||
|
||||
static const char * name (type_t t);
|
||||
|
||||
bool operator != (const Enc & e) const { return type_ != e.type_; }
|
||||
|
||||
inline uint32_t nCodePoints() const;
|
||||
inline uint32_t nCodeUnits() const;
|
||||
inline uint32_t szCodePoint() const;
|
||||
inline uint32_t szCodeUnit() const;
|
||||
|
||||
inline bool set(type_t t);
|
||||
inline void unset(type_t);
|
||||
inline type_t type () const;
|
||||
|
||||
inline void setPolicy(policy_t t);
|
||||
|
||||
bool encode(uint32_t & c) const;
|
||||
uint32_t decodeUnsafe(uint32_t c) const;
|
||||
Range * encodeRange(uint32_t l, uint32_t h) const;
|
||||
Range * fullRange() const;
|
||||
};
|
||||
|
||||
inline const char * Enc::name (type_t t)
|
||||
{
|
||||
switch (t)
|
||||
{
|
||||
case ASCII: return "ASCII";
|
||||
case EBCDIC: return "EBCDIC";
|
||||
case UTF8: return "UTF8";
|
||||
case UCS2: return "USC2";
|
||||
case UTF16: return "UTF16";
|
||||
case UTF32: return "UTF32";
|
||||
default: return "<bad encoding>";
|
||||
}
|
||||
}
|
||||
|
||||
inline uint32_t Enc::nCodePoints() const
|
||||
{
|
||||
switch (type_)
|
||||
{
|
||||
case ASCII:
|
||||
case EBCDIC: return 0x100;
|
||||
case UCS2: return 0x10000;
|
||||
case UTF16:
|
||||
case UTF32:
|
||||
case UTF8:
|
||||
default: return 0x110000;
|
||||
}
|
||||
}
|
||||
|
||||
inline uint32_t Enc::nCodeUnits() const
|
||||
{
|
||||
switch (type_)
|
||||
{
|
||||
case ASCII:
|
||||
case EBCDIC:
|
||||
case UTF8: return 0x100;
|
||||
case UCS2:
|
||||
case UTF16: return 0x10000;
|
||||
case UTF32:
|
||||
default: return 0x110000;
|
||||
}
|
||||
}
|
||||
|
||||
// returns *maximal* code point size for encoding
|
||||
inline uint32_t Enc::szCodePoint() const
|
||||
{
|
||||
switch (type_)
|
||||
{
|
||||
case ASCII:
|
||||
case EBCDIC: return 1;
|
||||
case UCS2: return 2;
|
||||
case UTF16:
|
||||
case UTF32:
|
||||
case UTF8:
|
||||
default: return 4;
|
||||
}
|
||||
}
|
||||
|
||||
inline uint32_t Enc::szCodeUnit() const
|
||||
{
|
||||
switch (type_)
|
||||
{
|
||||
case ASCII:
|
||||
case EBCDIC:
|
||||
case UTF8: return 1;
|
||||
case UCS2:
|
||||
case UTF16: return 2;
|
||||
case UTF32:
|
||||
default: return 4;
|
||||
}
|
||||
}
|
||||
|
||||
inline bool Enc::set(type_t t)
|
||||
{
|
||||
if (type_ == t)
|
||||
return true;
|
||||
else if (type_ != ASCII)
|
||||
return false;
|
||||
else
|
||||
{
|
||||
type_ = t;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
inline void Enc::unset(type_t t)
|
||||
{
|
||||
if (type_ == t)
|
||||
type_ = ASCII;
|
||||
}
|
||||
|
||||
inline Enc::type_t Enc::type () const
|
||||
{
|
||||
return type_;
|
||||
}
|
||||
|
||||
inline void Enc::setPolicy(policy_t t)
|
||||
{
|
||||
policy_ = t;
|
||||
}
|
||||
|
||||
} // namespace re2c
|
||||
|
||||
#endif // _RE2C_IR_REGEXP_ENCODING_ENC_
|
38
tools/re2c/src/ir/regexp/encoding/range_suffix.cc
Normal file
38
tools/re2c/src/ir/regexp/encoding/range_suffix.cc
Normal file
|
@ -0,0 +1,38 @@
|
|||
#include "src/ir/regexp/encoding/range_suffix.h"
|
||||
#include "src/ir/regexp/regexp.h"
|
||||
#include "src/ir/regexp/regexp_match.h"
|
||||
#include "src/util/range.h"
|
||||
|
||||
namespace re2c {
|
||||
|
||||
static RegExp * emit (RangeSuffix * p, RegExp * re);
|
||||
|
||||
free_list<RangeSuffix *> RangeSuffix::freeList;
|
||||
|
||||
RegExp * to_regexp (RangeSuffix * p)
|
||||
{
|
||||
return p
|
||||
? emit (p, NULL)
|
||||
: new MatchOp (NULL);
|
||||
}
|
||||
|
||||
/*
|
||||
* Build regexp from suffix tree.
|
||||
*/
|
||||
RegExp * emit(RangeSuffix * p, RegExp * re)
|
||||
{
|
||||
if (p == NULL)
|
||||
return re;
|
||||
else
|
||||
{
|
||||
RegExp * regexp = NULL;
|
||||
for (; p != NULL; p = p->next)
|
||||
{
|
||||
RegExp * re1 = doCat(new MatchOp(Range::ran (p->l, p->h + 1)), re);
|
||||
regexp = doAlt(regexp, emit(p->child, re1));
|
||||
}
|
||||
return regexp;
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace re2c
|
39
tools/re2c/src/ir/regexp/encoding/range_suffix.h
Normal file
39
tools/re2c/src/ir/regexp/encoding/range_suffix.h
Normal file
|
@ -0,0 +1,39 @@
|
|||
#ifndef _RE2C_IR_REGEXP_ENCODING_RANGE_SUFFIX_
|
||||
#define _RE2C_IR_REGEXP_ENCODING_RANGE_SUFFIX_
|
||||
|
||||
#include "src/util/c99_stdint.h"
|
||||
#include <stddef.h> // NULL
|
||||
|
||||
#include "src/util/forbid_copy.h"
|
||||
#include "src/util/free_list.h"
|
||||
|
||||
namespace re2c {
|
||||
|
||||
class RegExp;
|
||||
|
||||
struct RangeSuffix
|
||||
{
|
||||
static free_list<RangeSuffix *> freeList;
|
||||
|
||||
uint32_t l;
|
||||
uint32_t h;
|
||||
RangeSuffix * next;
|
||||
RangeSuffix * child;
|
||||
|
||||
RangeSuffix (uint32_t lo, uint32_t hi)
|
||||
: l (lo)
|
||||
, h (hi)
|
||||
, next (NULL)
|
||||
, child (NULL)
|
||||
{
|
||||
freeList.insert(this);
|
||||
}
|
||||
|
||||
FORBID_COPY (RangeSuffix);
|
||||
};
|
||||
|
||||
RegExp * to_regexp (RangeSuffix * p);
|
||||
|
||||
} // namespace re2c
|
||||
|
||||
#endif // _RE2C_IR_REGEXP_ENCODING_RANGE_SUFFIX_
|
10
tools/re2c/src/ir/regexp/encoding/utf16/utf16.cc
Normal file
10
tools/re2c/src/ir/regexp/encoding/utf16/utf16.cc
Normal file
|
@ -0,0 +1,10 @@
|
|||
#include "src/ir/regexp/encoding/utf16/utf16.h"
|
||||
|
||||
namespace re2c {
|
||||
|
||||
const uint32_t utf16::MAX_1WORD_RUNE = 0xFFFFu;
|
||||
const uint32_t utf16::MIN_LEAD_SURR = 0xD800u;
|
||||
const uint32_t utf16::MIN_TRAIL_SURR = 0xDC00u;
|
||||
const uint32_t utf16::MAX_TRAIL_SURR = 0xDFFFu;
|
||||
|
||||
} // namespace re2c
|
37
tools/re2c/src/ir/regexp/encoding/utf16/utf16.h
Normal file
37
tools/re2c/src/ir/regexp/encoding/utf16/utf16.h
Normal file
|
@ -0,0 +1,37 @@
|
|||
#ifndef _RE2C_IR_REGEXP_ENCODING_UTF16_UTF16_
|
||||
#define _RE2C_IR_REGEXP_ENCODING_UTF16_UTF16_
|
||||
|
||||
#include "src/util/c99_stdint.h"
|
||||
|
||||
namespace re2c {
|
||||
|
||||
class utf16
|
||||
{
|
||||
public:
|
||||
typedef uint32_t rune;
|
||||
|
||||
static const uint32_t MAX_1WORD_RUNE;
|
||||
static const uint32_t MIN_LEAD_SURR;
|
||||
static const uint32_t MIN_TRAIL_SURR;
|
||||
static const uint32_t MAX_TRAIL_SURR;
|
||||
|
||||
/* leading surrogate of UTF-16 symbol */
|
||||
static inline uint32_t lead_surr(rune r);
|
||||
|
||||
/* trailing surrogate of UTF-16 symbol */
|
||||
static inline uint32_t trail_surr(rune r);
|
||||
};
|
||||
|
||||
inline uint32_t utf16::lead_surr(rune r)
|
||||
{
|
||||
return ((r - 0x10000u) / 0x400u) + MIN_LEAD_SURR;
|
||||
}
|
||||
|
||||
inline uint32_t utf16::trail_surr(rune r)
|
||||
{
|
||||
return ((r - 0x10000u) % 0x400u) + MIN_TRAIL_SURR;
|
||||
}
|
||||
|
||||
} // namespace re2c
|
||||
|
||||
#endif // _RE2C_IR_REGEXP_ENCODING_UTF16_UTF16_
|
146
tools/re2c/src/ir/regexp/encoding/utf16/utf16_range.cc
Normal file
146
tools/re2c/src/ir/regexp/encoding/utf16/utf16_range.cc
Normal file
|
@ -0,0 +1,146 @@
|
|||
#include "src/ir/regexp/encoding/utf16/utf16_range.h"
|
||||
#include "src/ir/regexp/encoding/range_suffix.h"
|
||||
|
||||
namespace re2c {
|
||||
|
||||
/*
|
||||
* Add word range [w1-w2].
|
||||
*/
|
||||
void UTF16addContinuous1(RangeSuffix * & root, uint32_t l, uint32_t h)
|
||||
{
|
||||
RangeSuffix ** p = &root;
|
||||
for (;;)
|
||||
{
|
||||
if (*p == NULL)
|
||||
{
|
||||
*p = new RangeSuffix(l, h);
|
||||
break;
|
||||
}
|
||||
else if ((*p)->l == l && (*p)->h == h)
|
||||
{
|
||||
break;
|
||||
}
|
||||
else
|
||||
p = &(*p)->next;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Now that we have catenation of word ranges [l1-h1],[l2-h2],
|
||||
* we want to add it to existing range, merging suffixes on the fly.
|
||||
*/
|
||||
void UTF16addContinuous2(RangeSuffix * & root, uint32_t l_ld, uint32_t h_ld, uint32_t l_tr, uint32_t h_tr)
|
||||
{
|
||||
RangeSuffix ** p = &root;
|
||||
for (;;)
|
||||
{
|
||||
if (*p == NULL)
|
||||
{
|
||||
*p = new RangeSuffix(l_tr, h_tr);
|
||||
p = &(*p)->child;
|
||||
break;
|
||||
}
|
||||
else if ((*p)->l == l_tr && (*p)->h == h_tr)
|
||||
{
|
||||
p = &(*p)->child;
|
||||
break;
|
||||
}
|
||||
else
|
||||
p = &(*p)->next;
|
||||
}
|
||||
for (;;)
|
||||
{
|
||||
if (*p == NULL)
|
||||
{
|
||||
*p = new RangeSuffix(l_ld, h_ld);
|
||||
break;
|
||||
}
|
||||
else if ((*p)->l == l_ld && (*p)->h == h_ld)
|
||||
{
|
||||
break;
|
||||
}
|
||||
else
|
||||
p = &(*p)->next;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Split range into sub-ranges that agree on leading surrogates.
|
||||
*
|
||||
* We have two Unicode runes, L and H, both map to UTF-16
|
||||
* surrogate pairs 'L1 L2' and 'H1 H2'.
|
||||
* We want to represent Unicode range [L - H] as a catenation
|
||||
* of word ranges [L1 - H1],[L2 - H2].
|
||||
*
|
||||
* This is only possible if the following condition holds:
|
||||
* if L1 /= H1, then L2 == 0xdc00 and H2 == 0xdfff.
|
||||
* This condition ensures that:
|
||||
* 1) all possible UTF-16 sequences between L and H are allowed
|
||||
* 2) no word ranges [w1 - w2] appear, such that w1 > w2
|
||||
*
|
||||
* E.g.:
|
||||
* [\U00010001-\U00010400] => [d800-d801],[dc01-dc00].
|
||||
* The last word range, [dc01-dc00], is incorrect: its lower bound
|
||||
* is greater than its upper bound. To fix this, we must split
|
||||
* the original range into two sub-ranges:
|
||||
* [\U00010001-\U000103ff] => [d800-d800],[dc01-dfff]
|
||||
* [\U00010400-\U00010400] => [d801-d801],[dc00-dc00]
|
||||
*
|
||||
* This function finds all such 'points of discontinuity'
|
||||
* and represents original range as alternation of continuous
|
||||
* sub-ranges.
|
||||
*/
|
||||
void UTF16splitByContinuity(RangeSuffix * & root, uint32_t l_ld, uint32_t h_ld, uint32_t l_tr, uint32_t h_tr)
|
||||
{
|
||||
if (l_ld != h_ld)
|
||||
{
|
||||
if (l_tr > utf16::MIN_TRAIL_SURR)
|
||||
{
|
||||
UTF16splitByContinuity(root, l_ld, l_ld, l_tr, utf16::MAX_TRAIL_SURR);
|
||||
UTF16splitByContinuity(root, l_ld + 1, h_ld, utf16::MIN_TRAIL_SURR, h_tr);
|
||||
return;
|
||||
}
|
||||
if (h_tr < utf16::MAX_TRAIL_SURR)
|
||||
{
|
||||
UTF16splitByContinuity(root, l_ld, h_ld - 1, l_tr, utf16::MAX_TRAIL_SURR);
|
||||
UTF16splitByContinuity(root, h_ld, h_ld, utf16::MIN_TRAIL_SURR, h_tr);
|
||||
return;
|
||||
}
|
||||
}
|
||||
UTF16addContinuous2(root, l_ld, h_ld, l_tr, h_tr);
|
||||
}
|
||||
|
||||
/*
|
||||
* Split range into sub-ranges, so that all runes in the same
|
||||
* sub-range have equal length of UTF-16 sequence. E.g., full
|
||||
* Unicode range [0-0x10FFFF] gets split into sub-ranges:
|
||||
* [0 - 0xFFFF] (2-byte UTF-16 sequences)
|
||||
* [0x10000 - 0x10FFFF] (4-byte UTF-16 sequences)
|
||||
*/
|
||||
void UTF16splitByRuneLength(RangeSuffix * & root, utf16::rune l, utf16::rune h)
|
||||
{
|
||||
if (l <= utf16::MAX_1WORD_RUNE)
|
||||
{
|
||||
if (h <= utf16::MAX_1WORD_RUNE)
|
||||
{
|
||||
UTF16addContinuous1(root, l, h);
|
||||
}
|
||||
else
|
||||
{
|
||||
UTF16addContinuous1(root, l, utf16::MAX_1WORD_RUNE);
|
||||
const uint32_t h_ld = utf16::lead_surr(h);
|
||||
const uint32_t h_tr = utf16::trail_surr(h);
|
||||
UTF16splitByContinuity(root, utf16::MIN_LEAD_SURR, h_ld, utf16::MIN_TRAIL_SURR, h_tr);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
const uint32_t l_ld = utf16::lead_surr(l);
|
||||
const uint32_t l_tr = utf16::trail_surr(l);
|
||||
const uint32_t h_ld = utf16::lead_surr(h);
|
||||
const uint32_t h_tr = utf16::trail_surr(h);
|
||||
UTF16splitByContinuity(root, l_ld, h_ld, l_tr, h_tr);
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace re2c
|
19
tools/re2c/src/ir/regexp/encoding/utf16/utf16_range.h
Normal file
19
tools/re2c/src/ir/regexp/encoding/utf16/utf16_range.h
Normal file
|
@ -0,0 +1,19 @@
|
|||
#ifndef _RE2C_IR_REGEXP_ENCODING_UTF16_RANGE_
|
||||
#define _RE2C_IR_REGEXP_ENCODING_UTF16_RANGE_
|
||||
|
||||
#include "src/util/c99_stdint.h"
|
||||
|
||||
#include "src/ir/regexp/encoding/utf16/utf16.h"
|
||||
|
||||
namespace re2c {
|
||||
|
||||
struct RangeSuffix;
|
||||
|
||||
void UTF16addContinuous1(RangeSuffix * & root, uint32_t l, uint32_t h);
|
||||
void UTF16addContinuous2(RangeSuffix * & root, uint32_t l_ld, uint32_t h_ld, uint32_t l_tr, uint32_t h_tr);
|
||||
void UTF16splitByContinuity(RangeSuffix * & root, uint32_t l_ld, uint32_t h_ld, uint32_t l_tr, uint32_t h_tr);
|
||||
void UTF16splitByRuneLength(RangeSuffix * & root, utf16::rune l, utf16::rune h);
|
||||
|
||||
} // namespace re2c
|
||||
|
||||
#endif // _RE2C_IR_REGEXP_ENCODING_UTF16_RANGE_
|
38
tools/re2c/src/ir/regexp/encoding/utf16/utf16_regexp.cc
Normal file
38
tools/re2c/src/ir/regexp/encoding/utf16/utf16_regexp.cc
Normal file
|
@ -0,0 +1,38 @@
|
|||
#include "src/util/c99_stdint.h"
|
||||
|
||||
#include "src/ir/regexp/encoding/utf16/utf16_regexp.h"
|
||||
#include "src/ir/regexp/encoding/range_suffix.h"
|
||||
#include "src/ir/regexp/encoding/utf16/utf16_range.h"
|
||||
#include "src/ir/regexp/regexp_cat.h"
|
||||
#include "src/ir/regexp/regexp_match.h"
|
||||
#include "src/util/range.h"
|
||||
|
||||
namespace re2c {
|
||||
|
||||
RegExp * UTF16Symbol(utf16::rune r)
|
||||
{
|
||||
if (r <= utf16::MAX_1WORD_RUNE)
|
||||
return new MatchOp(Range::sym (r));
|
||||
else
|
||||
{
|
||||
const uint32_t ld = utf16::lead_surr(r);
|
||||
const uint32_t tr = utf16::trail_surr(r);
|
||||
return new CatOp(new MatchOp(Range::sym (ld)), new MatchOp(Range::sym (tr)));
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Split Unicode character class {[l1, h1), ..., [lN, hN)} into
|
||||
* ranges [l1, h1-1], ..., [lN, hN-1] and return alternation of
|
||||
* them. We store partially built range in suffix tree, which
|
||||
* allows to eliminate common suffixes while building.
|
||||
*/
|
||||
RegExp * UTF16Range(const Range * r)
|
||||
{
|
||||
RangeSuffix * root = NULL;
|
||||
for (; r != NULL; r = r->next ())
|
||||
UTF16splitByRuneLength(root, r->lower (), r->upper () - 1);
|
||||
return to_regexp (root);
|
||||
}
|
||||
|
||||
} // namespace re2c
|
16
tools/re2c/src/ir/regexp/encoding/utf16/utf16_regexp.h
Normal file
16
tools/re2c/src/ir/regexp/encoding/utf16/utf16_regexp.h
Normal file
|
@ -0,0 +1,16 @@
|
|||
#ifndef _RE2C_IR_REGEXP_ENCODING_UTF16_REGEXP_
|
||||
#define _RE2C_IR_REGEXP_ENCODING_UTF16_REGEXP_
|
||||
|
||||
#include "src/ir/regexp/encoding/utf16/utf16.h"
|
||||
|
||||
namespace re2c {
|
||||
|
||||
class Range;
|
||||
class RegExp;
|
||||
|
||||
RegExp * UTF16Symbol(utf16::rune r);
|
||||
RegExp * UTF16Range(const Range * r);
|
||||
|
||||
} // namespace re2c
|
||||
|
||||
#endif // _RE2C_IR_REGEXP_ENCODING_UTF16_REGEXP_
|
84
tools/re2c/src/ir/regexp/encoding/utf8/utf8.cc
Normal file
84
tools/re2c/src/ir/regexp/encoding/utf8/utf8.cc
Normal file
|
@ -0,0 +1,84 @@
|
|||
#include "src/ir/regexp/encoding/utf8/utf8.h"
|
||||
|
||||
namespace re2c {
|
||||
|
||||
const uint32_t utf8::ERROR = 0xFFFDu;
|
||||
|
||||
const utf8::rune utf8::MAX_1BYTE_RUNE = 0x7Fu;
|
||||
const utf8::rune utf8::MAX_2BYTE_RUNE = 0x7FFu;
|
||||
const utf8::rune utf8::MAX_3BYTE_RUNE = 0xFFFFu;
|
||||
const utf8::rune utf8::MAX_4BYTE_RUNE = 0x10FFFFu;
|
||||
const utf8::rune utf8::MAX_RUNE = utf8::MAX_4BYTE_RUNE;
|
||||
|
||||
const uint32_t utf8::PREFIX_1BYTE = 0u; // 0000 0000
|
||||
const uint32_t utf8::INFIX = 0x80u; // 1000 0000
|
||||
const uint32_t utf8::PREFIX_2BYTE = 0xC0u; // 1100 0000
|
||||
const uint32_t utf8::PREFIX_3BYTE = 0xE0u; // 1110 0000
|
||||
const uint32_t utf8::PREFIX_4BYTE = 0xF0u; // 1111 0000
|
||||
|
||||
const uint32_t utf8::SHIFT = 6u;
|
||||
const uint32_t utf8::MASK = 0x3Fu; // 0011 1111
|
||||
|
||||
uint32_t utf8::rune_to_bytes(uint32_t *str, rune c)
|
||||
{
|
||||
// one byte sequence: 0-0x7F => 0xxxxxxx
|
||||
if (c <= MAX_1BYTE_RUNE)
|
||||
{
|
||||
str[0] = PREFIX_1BYTE | c;
|
||||
return 1;
|
||||
}
|
||||
|
||||
// two byte sequence: 0x80-0x7FF => 110xxxxx 10xxxxxx
|
||||
if (c <= MAX_2BYTE_RUNE)
|
||||
{
|
||||
str[0] = PREFIX_2BYTE | (c >> 1*SHIFT);
|
||||
str[1] = INFIX | (c & MASK);
|
||||
return 2;
|
||||
}
|
||||
|
||||
// If the Rune is out of range, convert it to the error rune.
|
||||
// Do this test here because the error rune encodes to three bytes.
|
||||
// Doing it earlier would duplicate work, since an out of range
|
||||
// Rune wouldn't have fit in one or two bytes.
|
||||
if (c > MAX_RUNE)
|
||||
c = ERROR;
|
||||
|
||||
// three byte sequence: 0x800 - 0xFFFF => 1110xxxx 10xxxxxx 10xxxxxx
|
||||
if (c <= MAX_3BYTE_RUNE)
|
||||
{
|
||||
str[0] = PREFIX_3BYTE | (c >> 2*SHIFT);
|
||||
str[1] = INFIX | ((c >> 1*SHIFT) & MASK);
|
||||
str[2] = INFIX | (c & MASK);
|
||||
return 3;
|
||||
}
|
||||
|
||||
// four byte sequence (21-bit value):
|
||||
// 0x10000 - 0x1FFFFF => 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
|
||||
str[0] = PREFIX_4BYTE | (c >> 3*SHIFT);
|
||||
str[1] = INFIX | ((c >> 2*SHIFT) & MASK);
|
||||
str[2] = INFIX | ((c >> 1*SHIFT) & MASK);
|
||||
str[3] = INFIX | (c & MASK);
|
||||
return 4;
|
||||
}
|
||||
|
||||
uint32_t utf8::rune_length(rune r)
|
||||
{
|
||||
if (r <= MAX_2BYTE_RUNE)
|
||||
return r <= MAX_1BYTE_RUNE ? 1 : 2;
|
||||
else
|
||||
return r <= MAX_3BYTE_RUNE ? 3 : 4;
|
||||
}
|
||||
|
||||
utf8::rune utf8::max_rune(uint32_t i)
|
||||
{
|
||||
switch (i)
|
||||
{
|
||||
case 1: return MAX_1BYTE_RUNE;
|
||||
case 2: return MAX_2BYTE_RUNE;
|
||||
case 3: return MAX_3BYTE_RUNE;
|
||||
case 4: return MAX_4BYTE_RUNE;
|
||||
default: return ERROR;
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace re2c
|
48
tools/re2c/src/ir/regexp/encoding/utf8/utf8.h
Normal file
48
tools/re2c/src/ir/regexp/encoding/utf8/utf8.h
Normal file
|
@ -0,0 +1,48 @@
|
|||
#ifndef _RE2C_IR_REGEXP_ENCODING_UTF8_UTF8_
|
||||
#define _RE2C_IR_REGEXP_ENCODING_UTF8_UTF8_
|
||||
|
||||
#include "src/util/c99_stdint.h"
|
||||
|
||||
namespace re2c {
|
||||
|
||||
class utf8
|
||||
{
|
||||
public:
|
||||
typedef uint32_t rune;
|
||||
|
||||
// maximum characters per rune
|
||||
// enum instead of static const member because of [-Wvla]
|
||||
enum { MAX_RUNE_LENGTH = 4u };
|
||||
|
||||
// decoding error
|
||||
static const uint32_t ERROR;
|
||||
|
||||
// maximal runes for each rune length
|
||||
static const rune MAX_1BYTE_RUNE;
|
||||
static const rune MAX_2BYTE_RUNE;
|
||||
static const rune MAX_3BYTE_RUNE;
|
||||
static const rune MAX_4BYTE_RUNE;
|
||||
static const rune MAX_RUNE;
|
||||
|
||||
static const uint32_t PREFIX_1BYTE;
|
||||
static const uint32_t INFIX;
|
||||
static const uint32_t PREFIX_2BYTE;
|
||||
static const uint32_t PREFIX_3BYTE;
|
||||
static const uint32_t PREFIX_4BYTE;
|
||||
|
||||
static const uint32_t SHIFT;
|
||||
static const uint32_t MASK;
|
||||
|
||||
// UTF-8 bytestring for given Unicode rune
|
||||
static uint32_t rune_to_bytes(uint32_t * s, rune r);
|
||||
|
||||
// length of UTF-8 bytestring for given Unicode rune
|
||||
static uint32_t rune_length(rune r);
|
||||
|
||||
// maximal Unicode rune with given length of UTF-8 bytestring
|
||||
static rune max_rune(uint32_t i);
|
||||
};
|
||||
|
||||
} // namespace re2c
|
||||
|
||||
#endif // _RE2C_IR_REGEXP_ENCODING_UTF8_UTF8_
|
112
tools/re2c/src/ir/regexp/encoding/utf8/utf8_range.cc
Normal file
112
tools/re2c/src/ir/regexp/encoding/utf8/utf8_range.cc
Normal file
|
@ -0,0 +1,112 @@
|
|||
#include "src/ir/regexp/encoding/utf8/utf8_range.h"
|
||||
#include "src/ir/regexp/encoding/range_suffix.h"
|
||||
|
||||
namespace re2c {
|
||||
|
||||
/*
|
||||
* Now that we have catenation of byte ranges [l1-h1]...[lN-hN],
|
||||
* we want to add it to existing range, merging suffixes on the fly.
|
||||
*/
|
||||
void UTF8addContinuous(RangeSuffix * & root, utf8::rune l, utf8::rune h, uint32_t n)
|
||||
{
|
||||
uint32_t lcs[utf8::MAX_RUNE_LENGTH];
|
||||
uint32_t hcs[utf8::MAX_RUNE_LENGTH];
|
||||
utf8::rune_to_bytes(lcs, l);
|
||||
utf8::rune_to_bytes(hcs, h);
|
||||
|
||||
RangeSuffix ** p = &root;
|
||||
for (uint32_t i = 1; i <= n; ++i)
|
||||
{
|
||||
const uint32_t lc = lcs[n - i];
|
||||
const uint32_t hc = hcs[n - i];
|
||||
for (;;)
|
||||
{
|
||||
if (*p == NULL)
|
||||
{
|
||||
*p = new RangeSuffix(lc, hc);
|
||||
p = &(*p)->child;
|
||||
break;
|
||||
}
|
||||
else if ((*p)->l == lc && (*p)->h == hc)
|
||||
{
|
||||
p = &(*p)->child;
|
||||
break;
|
||||
}
|
||||
else
|
||||
p = &(*p)->next;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Split range into sub-ranges that agree on leading bytes.
|
||||
*
|
||||
* We have two Unicode runes of equal length, L and H, which
|
||||
* map to UTF-8 sequences 'L_1 ... L_n' and 'H_1 ... H_n'.
|
||||
* We want to represent Unicode range [L - H] as a catenation
|
||||
* of byte ranges [L_1 - H_1], ..., [L_n - H_n].
|
||||
*
|
||||
* This is only possible if for all i > 1:
|
||||
* if L_i /= H_i, then L_(i+1) == 0x80 and H_(i+1) == 0xbf.
|
||||
* This condition ensures that:
|
||||
* 1) all possible UTF-8 sequences between L and H are allowed
|
||||
* 2) no byte ranges [b1 - b2] appear, such that b1 > b2
|
||||
*
|
||||
* E.g.:
|
||||
* [\U000e0031-\U000e0043] => [f3-f3],[a0-a0],[80-81],[b1-83].
|
||||
* The last byte range, [b1-83], is incorrect: its lower bound
|
||||
* is greater than its upper bound. To fix this, we must split
|
||||
* the original range into two sub-ranges:
|
||||
* [\U000e0031-\U000e003f] => [f3-f3],[a0-a0],[80-80],[b1-bf]
|
||||
* [\U000e0040-\U000e0043] => [f3-f3],[a0-a0],[81-81],[80-83]
|
||||
*
|
||||
* This function finds all such 'points of discontinuity'
|
||||
* and represents original range as alternation of continuous
|
||||
* sub-ranges.
|
||||
*/
|
||||
void UTF8splitByContinuity(RangeSuffix * & root, utf8::rune l, utf8::rune h, uint32_t n)
|
||||
{
|
||||
for (uint32_t i = 1; i < n; ++i)
|
||||
{
|
||||
uint32_t m = (1u << (6u * i)) - 1u; // last i bytes of a UTF-8 sequence
|
||||
if ((l & ~m) != (h & ~m))
|
||||
{
|
||||
if ((l & m) != 0)
|
||||
{
|
||||
UTF8splitByContinuity(root, l, l | m, n);
|
||||
UTF8splitByContinuity(root, (l | m) + 1, h, n);
|
||||
return;
|
||||
}
|
||||
if ((h & m) != m)
|
||||
{
|
||||
UTF8splitByContinuity(root, l, (h & ~m) - 1, n);
|
||||
UTF8splitByContinuity(root, h & ~m, h, n);
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
UTF8addContinuous(root, l, h, n);
|
||||
}
|
||||
|
||||
/*
|
||||
* Split range into sub-ranges, so that all runes in the same
|
||||
* sub-range have equal length of UTF-8 sequence. E.g., full
|
||||
* Unicode range [0-0x10FFFF] gets split into sub-ranges:
|
||||
* [0 - 0x7F] (1-byte UTF-8 sequences)
|
||||
* [0x80 - 0x7FF] (2-byte UTF-8 sequences)
|
||||
* [0x800 - 0xFFFF] (3-byte UTF-8 sequences)
|
||||
* [0x10000 - 0x10FFFF] (4-byte UTF-8 sequences)
|
||||
*/
|
||||
void UTF8splitByRuneLength(RangeSuffix * & root, utf8::rune l, utf8::rune h)
|
||||
{
|
||||
const uint32_t nh = utf8::rune_length(h);
|
||||
for (uint32_t nl = utf8::rune_length(l); nl < nh; ++nl)
|
||||
{
|
||||
utf8::rune r = utf8::max_rune(nl);
|
||||
UTF8splitByContinuity(root, l, r, nl);
|
||||
l = r + 1;
|
||||
}
|
||||
UTF8splitByContinuity(root, l, h, nh);
|
||||
}
|
||||
|
||||
} // namespace re2c
|
18
tools/re2c/src/ir/regexp/encoding/utf8/utf8_range.h
Normal file
18
tools/re2c/src/ir/regexp/encoding/utf8/utf8_range.h
Normal file
|
@ -0,0 +1,18 @@
|
|||
#ifndef _RE2C_IR_REGEXP_ENCODING_UTF8_RANGE_
|
||||
#define _RE2C_IR_REGEXP_ENCODING_UTF8_RANGE_
|
||||
|
||||
#include "src/util/c99_stdint.h"
|
||||
|
||||
#include "src/ir/regexp/encoding/utf8/utf8.h"
|
||||
|
||||
namespace re2c {
|
||||
|
||||
struct RangeSuffix;
|
||||
|
||||
void UTF8addContinuous(RangeSuffix * & p, utf8::rune l, utf8::rune h, uint32_t n);
|
||||
void UTF8splitByContinuity(RangeSuffix * & p, utf8::rune l, utf8::rune h, uint32_t n);
|
||||
void UTF8splitByRuneLength(RangeSuffix * & p, utf8::rune l, utf8::rune h);
|
||||
|
||||
} // namespace re2c
|
||||
|
||||
#endif // _RE2C_IR_REGEXP_ENCODING_UTF8_RANGE_
|
36
tools/re2c/src/ir/regexp/encoding/utf8/utf8_regexp.cc
Normal file
36
tools/re2c/src/ir/regexp/encoding/utf8/utf8_regexp.cc
Normal file
|
@ -0,0 +1,36 @@
|
|||
#include "src/util/c99_stdint.h"
|
||||
|
||||
#include "src/ir/regexp/encoding/utf8/utf8_regexp.h"
|
||||
#include "src/ir/regexp/encoding/range_suffix.h"
|
||||
#include "src/ir/regexp/encoding/utf8/utf8_range.h"
|
||||
#include "src/ir/regexp/regexp_cat.h"
|
||||
#include "src/ir/regexp/regexp_match.h"
|
||||
#include "src/util/range.h"
|
||||
|
||||
namespace re2c {
|
||||
|
||||
RegExp * UTF8Symbol(utf8::rune r)
|
||||
{
|
||||
uint32_t chars[utf8::MAX_RUNE_LENGTH];
|
||||
const uint32_t chars_count = utf8::rune_to_bytes(chars, r);
|
||||
RegExp * re = new MatchOp(Range::sym (chars[0]));
|
||||
for (uint32_t i = 1; i < chars_count; ++i)
|
||||
re = new CatOp(re, new MatchOp(Range::sym (chars[i])));
|
||||
return re;
|
||||
}
|
||||
|
||||
/*
|
||||
* Split Unicode character class {[l1, h1), ..., [lN, hN)} into
|
||||
* ranges [l1, h1-1], ..., [lN, hN-1] and return alternation of
|
||||
* them. We store partially built range in suffix tree, which
|
||||
* allows to eliminate common suffixes while building.
|
||||
*/
|
||||
RegExp * UTF8Range(const Range * r)
|
||||
{
|
||||
RangeSuffix * root = NULL;
|
||||
for (; r != NULL; r = r->next ())
|
||||
UTF8splitByRuneLength(root, r->lower (), r->upper () - 1);
|
||||
return to_regexp (root);
|
||||
}
|
||||
|
||||
} // namespace re2c
|
16
tools/re2c/src/ir/regexp/encoding/utf8/utf8_regexp.h
Normal file
16
tools/re2c/src/ir/regexp/encoding/utf8/utf8_regexp.h
Normal file
|
@ -0,0 +1,16 @@
|
|||
#ifndef _RE2C_IR_REGEXP_ENCODING_UTF8_REGEXP_
|
||||
#define _RE2C_IR_REGEXP_ENCODING_UTF8_REGEXP_
|
||||
|
||||
#include "src/ir/regexp/encoding/utf8/utf8.h"
|
||||
|
||||
namespace re2c {
|
||||
|
||||
class Range;
|
||||
class RegExp;
|
||||
|
||||
RegExp * UTF8Symbol(utf8::rune r);
|
||||
RegExp * UTF8Range(const Range * r);
|
||||
|
||||
} // namespace re2c
|
||||
|
||||
#endif // _RE2C_IR_REGEXP_ENCODING_UTF8_REGEXP_
|
55
tools/re2c/src/ir/regexp/fixed_length.cc
Normal file
55
tools/re2c/src/ir/regexp/fixed_length.cc
Normal file
|
@ -0,0 +1,55 @@
|
|||
#include "src/util/c99_stdint.h"
|
||||
|
||||
#include "src/ir/regexp/regexp.h"
|
||||
#include "src/ir/regexp/regexp_alt.h"
|
||||
#include "src/ir/regexp/regexp_cat.h"
|
||||
#include "src/ir/regexp/regexp_match.h"
|
||||
#include "src/ir/regexp/regexp_null.h"
|
||||
|
||||
namespace re2c
|
||||
{
|
||||
|
||||
uint32_t RegExp::fixedLength ()
|
||||
{
|
||||
return ~0u;
|
||||
}
|
||||
|
||||
uint32_t AltOp::fixedLength ()
|
||||
{
|
||||
uint32_t l1 = exp1->fixedLength ();
|
||||
uint32_t l2 = exp1->fixedLength ();
|
||||
|
||||
if (l1 != l2 || l1 == ~0u)
|
||||
{
|
||||
return ~0u;
|
||||
}
|
||||
|
||||
return l1;
|
||||
}
|
||||
|
||||
uint32_t CatOp::fixedLength ()
|
||||
{
|
||||
const uint32_t l1 = exp1->fixedLength ();
|
||||
if (l1 != ~0u)
|
||||
{
|
||||
const uint32_t l2 = exp2->fixedLength ();
|
||||
if (l2 != ~0u)
|
||||
{
|
||||
return l1 + l2;
|
||||
}
|
||||
}
|
||||
return ~0u;
|
||||
}
|
||||
|
||||
uint32_t MatchOp::fixedLength ()
|
||||
{
|
||||
return 1;
|
||||
}
|
||||
|
||||
uint32_t NullOp::fixedLength ()
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
} // end namespace re2c
|
||||
|
241
tools/re2c/src/ir/regexp/regexp.cc
Normal file
241
tools/re2c/src/ir/regexp/regexp.cc
Normal file
|
@ -0,0 +1,241 @@
|
|||
#include <stddef.h>
|
||||
|
||||
#include "src/conf/opt.h"
|
||||
#include "src/conf/warn.h"
|
||||
#include "src/globals.h"
|
||||
#include "src/ir/regexp/empty_class_policy.h"
|
||||
#include "src/ir/regexp/encoding/case.h"
|
||||
#include "src/ir/regexp/encoding/enc.h"
|
||||
#include "src/ir/regexp/encoding/utf16/utf16_regexp.h"
|
||||
#include "src/ir/regexp/encoding/utf8/utf8_regexp.h"
|
||||
#include "src/ir/regexp/regexp.h"
|
||||
#include "src/ir/regexp/regexp_alt.h"
|
||||
#include "src/ir/regexp/regexp_cat.h"
|
||||
#include "src/ir/regexp/regexp_close.h"
|
||||
#include "src/ir/regexp/regexp_match.h"
|
||||
#include "src/ir/regexp/regexp_null.h"
|
||||
#include "src/parse/scanner.h"
|
||||
#include "src/util/range.h"
|
||||
|
||||
namespace re2c
|
||||
{
|
||||
|
||||
static MatchOp * merge (MatchOp * m1, MatchOp * m2);
|
||||
|
||||
free_list<RegExp*> RegExp::vFreeList;
|
||||
|
||||
RegExp * doAlt (RegExp * e1, RegExp * e2)
|
||||
{
|
||||
if (!e1)
|
||||
{
|
||||
return e2;
|
||||
}
|
||||
if (!e2)
|
||||
{
|
||||
return e1;
|
||||
}
|
||||
return new AltOp (e1, e2);
|
||||
}
|
||||
|
||||
RegExp * mkAlt (RegExp * e1, RegExp * e2)
|
||||
{
|
||||
AltOp * a;
|
||||
MatchOp * m1;
|
||||
MatchOp * m2;
|
||||
|
||||
a = dynamic_cast<AltOp*> (e1);
|
||||
if (a != NULL)
|
||||
{
|
||||
m1 = dynamic_cast<MatchOp*> (a->exp1);
|
||||
if (m1 != NULL)
|
||||
{
|
||||
e1 = a->exp2;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
m1 = dynamic_cast<MatchOp*> (e1);
|
||||
if (m1 != NULL)
|
||||
{
|
||||
e1 = NULL;
|
||||
}
|
||||
}
|
||||
a = dynamic_cast<AltOp*> (e2);
|
||||
if (a != NULL)
|
||||
{
|
||||
m2 = dynamic_cast<MatchOp*> (a->exp1);
|
||||
if (m2 != NULL)
|
||||
{
|
||||
e2 = a->exp2;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
m2 = dynamic_cast<MatchOp*> (e2);
|
||||
if (m2 != NULL)
|
||||
{
|
||||
e2 = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
return doAlt (merge (m1, m2), doAlt (e1, e2));
|
||||
}
|
||||
|
||||
MatchOp * merge (MatchOp * m1, MatchOp * m2)
|
||||
{
|
||||
if (!m1)
|
||||
{
|
||||
return m2;
|
||||
}
|
||||
if (!m2)
|
||||
{
|
||||
return m1;
|
||||
}
|
||||
MatchOp * m = new MatchOp (Range::add (m1->match, m2->match));
|
||||
return m;
|
||||
}
|
||||
|
||||
RegExp * doCat (RegExp * e1, RegExp * e2)
|
||||
{
|
||||
if (!e1)
|
||||
{
|
||||
return e2;
|
||||
}
|
||||
if (!e2)
|
||||
{
|
||||
return e1;
|
||||
}
|
||||
return new CatOp (e1, e2);
|
||||
}
|
||||
|
||||
RegExp *Scanner::schr(uint32_t c) const
|
||||
{
|
||||
if (!opts->encoding.encode(c)) {
|
||||
fatalf("Bad code point: '0x%X'", c);
|
||||
}
|
||||
switch (opts->encoding.type ()) {
|
||||
case Enc::UTF16: return UTF16Symbol(c);
|
||||
case Enc::UTF8: return UTF8Symbol(c);
|
||||
default: return new MatchOp(Range::sym(c));
|
||||
}
|
||||
}
|
||||
|
||||
RegExp *Scanner::ichr(uint32_t c) const
|
||||
{
|
||||
if (is_alpha(c)) {
|
||||
RegExp *l = schr(to_lower_unsafe(c));
|
||||
RegExp *u = schr(to_upper_unsafe(c));
|
||||
return mkAlt(l, u);
|
||||
} else {
|
||||
return schr(c);
|
||||
}
|
||||
}
|
||||
|
||||
RegExp *Scanner::cls(Range *r) const
|
||||
{
|
||||
if (!r)
|
||||
{
|
||||
switch (opts->empty_class_policy)
|
||||
{
|
||||
case EMPTY_CLASS_MATCH_EMPTY:
|
||||
warn.empty_class (get_line ());
|
||||
return new NullOp;
|
||||
case EMPTY_CLASS_MATCH_NONE:
|
||||
warn.empty_class (get_line ());
|
||||
break;
|
||||
case EMPTY_CLASS_ERROR:
|
||||
fatal ("empty character class");
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
switch (opts->encoding.type ())
|
||||
{
|
||||
case Enc::UTF16: return UTF16Range(r);
|
||||
case Enc::UTF8: return UTF8Range(r);
|
||||
default: return new MatchOp(r);
|
||||
}
|
||||
}
|
||||
|
||||
RegExp * Scanner::mkDiff (RegExp * e1, RegExp * e2) const
|
||||
{
|
||||
MatchOp * m1 = dynamic_cast<MatchOp *> (e1);
|
||||
MatchOp * m2 = dynamic_cast<MatchOp *> (e2);
|
||||
if (m1 == NULL || m2 == NULL)
|
||||
{
|
||||
fatal("can only difference char sets");
|
||||
}
|
||||
Range * r = Range::sub (m1->match, m2->match);
|
||||
|
||||
return cls(r);
|
||||
}
|
||||
|
||||
RegExp * Scanner::mkDot() const
|
||||
{
|
||||
Range * full = opts->encoding.fullRange();
|
||||
uint32_t c = '\n';
|
||||
if (!opts->encoding.encode(c))
|
||||
fatalf("Bad code point: '0x%X'", c);
|
||||
Range * ran = Range::sym (c);
|
||||
Range * inv = Range::sub (full, ran);
|
||||
|
||||
return cls(inv);
|
||||
}
|
||||
|
||||
/*
|
||||
* Create a byte range that includes all possible input characters.
|
||||
* This may include characters, which do not map to any valid symbol
|
||||
* in current encoding. For encodings, which directly map symbols to
|
||||
* input characters (ASCII, EBCDIC, UTF-32), it equals [^]. For other
|
||||
* encodings (UTF-16, UTF-8), [^] and this range are different.
|
||||
*
|
||||
* Also note that default range doesn't respect encoding policy
|
||||
* (the way invalid code points are treated).
|
||||
*/
|
||||
RegExp * Scanner::mkDefault() const
|
||||
{
|
||||
Range * def = Range::ran (0, opts->encoding.nCodeUnits());
|
||||
return new MatchOp(def);
|
||||
}
|
||||
|
||||
/*
|
||||
* note [counted repetition expansion]
|
||||
*
|
||||
* r{0} ;;= <empty regexp>
|
||||
* r{n} ::= r{n-1} r
|
||||
* r{n,m} ::= r{n} (r{0} | ... | r{m-n})
|
||||
* r{n,} ::= r{n} r*
|
||||
*/
|
||||
|
||||
// see note [counted repetition expansion]
|
||||
RegExp * repeat (RegExp * e, uint32_t n)
|
||||
{
|
||||
RegExp * r = NULL;
|
||||
for (uint32_t i = 0; i < n; ++i)
|
||||
{
|
||||
r = doCat (r, e);
|
||||
}
|
||||
return r;
|
||||
}
|
||||
|
||||
// see note [counted repetition expansion]
|
||||
RegExp * repeat_from_to (RegExp * e, uint32_t n, uint32_t m)
|
||||
{
|
||||
RegExp * r1 = repeat (e, n);
|
||||
RegExp * r2 = NULL;
|
||||
for (uint32_t i = n; i < m; ++i)
|
||||
{
|
||||
r2 = mkAlt (new NullOp, doCat (e, r2));
|
||||
}
|
||||
return doCat (r1, r2);
|
||||
}
|
||||
|
||||
// see note [counted repetition expansion]
|
||||
RegExp * repeat_from (RegExp * e, uint32_t n)
|
||||
{
|
||||
RegExp * r1 = repeat (e, n);
|
||||
RegExp * r2 = new CloseOp (e);
|
||||
return doCat (r1, r2);
|
||||
}
|
||||
|
||||
} // namespace re2c
|
52
tools/re2c/src/ir/regexp/regexp.h
Normal file
52
tools/re2c/src/ir/regexp/regexp.h
Normal file
|
@ -0,0 +1,52 @@
|
|||
#ifndef _RE2C_IR_REGEXP_REGEXP_
|
||||
#define _RE2C_IR_REGEXP_REGEXP_
|
||||
|
||||
#include "src/util/c99_stdint.h"
|
||||
#include <iosfwd>
|
||||
#include <set>
|
||||
#include <vector>
|
||||
|
||||
#include "src/util/free_list.h"
|
||||
#include "src/util/forbid_copy.h"
|
||||
|
||||
namespace re2c
|
||||
{
|
||||
|
||||
struct nfa_state_t;
|
||||
struct nfa_t;
|
||||
|
||||
typedef std::vector<uint32_t> charset_t;
|
||||
|
||||
class RegExp
|
||||
{
|
||||
public:
|
||||
static free_list <RegExp *> vFreeList;
|
||||
|
||||
inline RegExp ()
|
||||
{
|
||||
vFreeList.insert (this);
|
||||
}
|
||||
inline virtual ~RegExp ()
|
||||
{
|
||||
vFreeList.erase (this);
|
||||
}
|
||||
virtual void split (std::set<uint32_t> &) = 0;
|
||||
virtual uint32_t calc_size() const = 0;
|
||||
virtual uint32_t fixedLength ();
|
||||
virtual nfa_state_t *compile(nfa_t &nfa, nfa_state_t *n) = 0;
|
||||
virtual void display (std::ostream &) const = 0;
|
||||
friend std::ostream & operator << (std::ostream & o, const RegExp & re);
|
||||
|
||||
FORBID_COPY (RegExp);
|
||||
};
|
||||
|
||||
RegExp * doAlt (RegExp * e1, RegExp * e2);
|
||||
RegExp * mkAlt (RegExp * e1, RegExp * e2);
|
||||
RegExp * doCat (RegExp * e1, RegExp * e2);
|
||||
RegExp * repeat (RegExp * e, uint32_t n);
|
||||
RegExp * repeat_from_to (RegExp * e, uint32_t n, uint32_t m);
|
||||
RegExp * repeat_from (RegExp * e, uint32_t n);
|
||||
|
||||
} // end namespace re2c
|
||||
|
||||
#endif // _RE2C_IR_REGEXP_REGEXP_
|
31
tools/re2c/src/ir/regexp/regexp_alt.h
Normal file
31
tools/re2c/src/ir/regexp/regexp_alt.h
Normal file
|
@ -0,0 +1,31 @@
|
|||
#ifndef _RE2C_IR_REGEXP_REGEXP_ALT_
|
||||
#define _RE2C_IR_REGEXP_REGEXP_ALT_
|
||||
|
||||
#include "src/ir/regexp/regexp.h"
|
||||
|
||||
namespace re2c
|
||||
{
|
||||
|
||||
class AltOp: public RegExp
|
||||
{
|
||||
RegExp * exp1;
|
||||
RegExp * exp2;
|
||||
|
||||
public:
|
||||
inline AltOp (RegExp * e1, RegExp * e2)
|
||||
: exp1 (e1)
|
||||
, exp2 (e2)
|
||||
{}
|
||||
void split (std::set<uint32_t> &);
|
||||
uint32_t calc_size() const;
|
||||
uint32_t fixedLength ();
|
||||
nfa_state_t *compile(nfa_t &nfa, nfa_state_t *n);
|
||||
void display (std::ostream & o) const;
|
||||
friend RegExp * mkAlt (RegExp *, RegExp *);
|
||||
|
||||
FORBID_COPY (AltOp);
|
||||
};
|
||||
|
||||
} // end namespace re2c
|
||||
|
||||
#endif // _RE2C_IR_REGEXP_REGEXP_ALT_
|
30
tools/re2c/src/ir/regexp/regexp_cat.h
Normal file
30
tools/re2c/src/ir/regexp/regexp_cat.h
Normal file
|
@ -0,0 +1,30 @@
|
|||
#ifndef _RE2C_IR_REGEXP_REGEXP_CAT_
|
||||
#define _RE2C_IR_REGEXP_REGEXP_CAT_
|
||||
|
||||
#include "src/ir/regexp/regexp.h"
|
||||
|
||||
namespace re2c
|
||||
{
|
||||
|
||||
class CatOp: public RegExp
|
||||
{
|
||||
RegExp * exp1;
|
||||
RegExp * exp2;
|
||||
|
||||
public:
|
||||
inline CatOp (RegExp * e1, RegExp * e2)
|
||||
: exp1 (e1)
|
||||
, exp2 (e2)
|
||||
{}
|
||||
void split (std::set<uint32_t> &);
|
||||
uint32_t calc_size() const;
|
||||
uint32_t fixedLength ();
|
||||
nfa_state_t *compile(nfa_t &nfa, nfa_state_t *n);
|
||||
void display (std::ostream & o) const;
|
||||
|
||||
FORBID_COPY (CatOp);
|
||||
};
|
||||
|
||||
} // end namespace re2c
|
||||
|
||||
#endif // _RE2C_IR_REGEXP_REGEXP_CAT_
|
27
tools/re2c/src/ir/regexp/regexp_close.h
Normal file
27
tools/re2c/src/ir/regexp/regexp_close.h
Normal file
|
@ -0,0 +1,27 @@
|
|||
#ifndef _RE2C_IR_REGEXP_REGEXP_CLOSE_
|
||||
#define _RE2C_IR_REGEXP_REGEXP_CLOSE_
|
||||
|
||||
#include "src/ir/regexp/regexp.h"
|
||||
|
||||
namespace re2c
|
||||
{
|
||||
|
||||
class CloseOp: public RegExp
|
||||
{
|
||||
RegExp * exp;
|
||||
|
||||
public:
|
||||
inline CloseOp (RegExp * e)
|
||||
: exp (e)
|
||||
{}
|
||||
void split (std::set<uint32_t> &);
|
||||
uint32_t calc_size() const;
|
||||
nfa_state_t *compile(nfa_t &nfa, nfa_state_t *n);
|
||||
void display (std::ostream & o) const;
|
||||
|
||||
FORBID_COPY (CloseOp);
|
||||
};
|
||||
|
||||
} // end namespace re2c
|
||||
|
||||
#endif // _RE2C_IR_REGEXP_REGEXP_CLOSE_
|
29
tools/re2c/src/ir/regexp/regexp_match.h
Normal file
29
tools/re2c/src/ir/regexp/regexp_match.h
Normal file
|
@ -0,0 +1,29 @@
|
|||
#ifndef _RE2C_IR_REGEXP_REGEXP_MATCH_
|
||||
#define _RE2C_IR_REGEXP_REGEXP_MATCH_
|
||||
|
||||
#include "src/ir/regexp/regexp.h"
|
||||
#include "src/util/range.h"
|
||||
|
||||
namespace re2c
|
||||
{
|
||||
|
||||
class MatchOp: public RegExp
|
||||
{
|
||||
public:
|
||||
Range * match;
|
||||
|
||||
inline MatchOp (Range * m)
|
||||
: match (m)
|
||||
{}
|
||||
void split (std::set<uint32_t> &);
|
||||
uint32_t calc_size() const;
|
||||
uint32_t fixedLength ();
|
||||
nfa_state_t *compile(nfa_t &nfa, nfa_state_t *n);
|
||||
void display (std::ostream & o) const;
|
||||
|
||||
FORBID_COPY (MatchOp);
|
||||
};
|
||||
|
||||
} // end namespace re2c
|
||||
|
||||
#endif // _RE2C_IR_REGEXP_REGEXP_MATCH_
|
21
tools/re2c/src/ir/regexp/regexp_null.h
Normal file
21
tools/re2c/src/ir/regexp/regexp_null.h
Normal file
|
@ -0,0 +1,21 @@
|
|||
#ifndef _RE2C_IR_REGEXP_REGEXP_NULL_
|
||||
#define _RE2C_IR_REGEXP_REGEXP_NULL_
|
||||
|
||||
#include "src/ir/regexp/regexp.h"
|
||||
|
||||
namespace re2c
|
||||
{
|
||||
|
||||
class NullOp: public RegExp
|
||||
{
|
||||
public:
|
||||
void split (std::set<uint32_t> &);
|
||||
uint32_t calc_size() const;
|
||||
uint32_t fixedLength ();
|
||||
nfa_state_t *compile(nfa_t &nfa, nfa_state_t *n);
|
||||
void display (std::ostream & o) const;
|
||||
};
|
||||
|
||||
} // end namespace re2c
|
||||
|
||||
#endif // _RE2C_IR_REGEXP_REGEXP_NULL_
|
Some files were not shown because too many files have changed in this diff Show more
Loading…
Reference in a new issue