From 8c95516224b311ae2d029e3e4de6acad723f08af Mon Sep 17 00:00:00 2001 From: Christoph Oelckers Date: Sun, 22 Sep 2019 08:52:30 +0200 Subject: [PATCH] - added compile tools from GZDoom repo: - updaterevision for automated revision string generation - re2c as a prerequisite to use sc_man in the future - zipdir to automatically generate an engine resource file. --- tools/CMakeLists.txt | 7 + tools/re2c/CHANGELOG | 403 +++ tools/re2c/CMakeLists.txt | 104 + tools/re2c/NO_WARRANTY | 2 + tools/re2c/README | 159 + tools/re2c/config.h.in | 58 + tools/re2c/config.msc.h | 62 + tools/re2c/examples/001_upn_calculator/README | 83 + .../examples/001_upn_calculator/calc_001.re | 84 + .../examples/001_upn_calculator/calc_002.re | 69 + .../examples/001_upn_calculator/calc_003.re | 61 + .../examples/001_upn_calculator/calc_004.re | 78 + .../examples/001_upn_calculator/calc_005.re | 144 + .../examples/001_upn_calculator/calc_006.s.re | 162 + .../examples/001_upn_calculator/calc_007.b.re | 135 + .../examples/001_upn_calculator/calc_008.b.re | 158 + .../001_upn_calculator/windows/HiResTimer.h | 54 + .../001_upn_calculator/windows/main.b.re | 291 ++ tools/re2c/examples/002_strip_comments/README | 21 + .../002_strip_comments/strip_001.s.re | 147 + .../002_strip_comments/strip_002.s.re | 162 + .../002_strip_comments/strip_003.b.re | 179 ++ tools/re2c/examples/input_custom/fixed.re | 35 + .../re2c/examples/input_custom/simple/README | 20 + .../examples/input_custom/simple/default.re | 24 + .../examples/input_custom/simple/fgetc.re | 43 + .../input_custom/simple/istringstream.re | 27 + tools/re2c/examples/langs/c.re | 272 ++ tools/re2c/examples/langs/modula.re | 203 ++ tools/re2c/examples/langs/rexx.re | 319 ++ tools/re2c/examples/push_model/push.re | 340 ++ tools/re2c/src/codegen/bitmap.cc | 168 + tools/re2c/src/codegen/bitmap.h | 45 + tools/re2c/src/codegen/emit.h | 43 + tools/re2c/src/codegen/emit_action.cc | 388 +++ tools/re2c/src/codegen/emit_dfa.cc | 348 ++ tools/re2c/src/codegen/go.h | 216 ++ tools/re2c/src/codegen/go_construct.cc | 284 ++ tools/re2c/src/codegen/go_destruct.cc | 99 + tools/re2c/src/codegen/go_emit.cc | 271 ++ tools/re2c/src/codegen/go_used_labels.cc | 111 + tools/re2c/src/codegen/indent.h | 24 + tools/re2c/src/codegen/input_api.cc | 175 + tools/re2c/src/codegen/input_api.h | 43 + tools/re2c/src/codegen/label.cc | 42 + tools/re2c/src/codegen/label.h | 39 + tools/re2c/src/codegen/output.cc | 465 +++ tools/re2c/src/codegen/output.h | 158 + tools/re2c/src/codegen/print.cc | 156 + tools/re2c/src/codegen/print.h | 20 + tools/re2c/src/conf/msg.cc | 258 ++ tools/re2c/src/conf/msg.h | 24 + tools/re2c/src/conf/opt.cc | 331 ++ tools/re2c/src/conf/opt.h | 218 ++ tools/re2c/src/conf/parse_opts.cc | 2846 ++++++++++++++++ tools/re2c/src/conf/parse_opts.re | 261 ++ tools/re2c/src/conf/warn.cc | 200 ++ tools/re2c/src/conf/warn.h | 67 + tools/re2c/src/globals.h | 24 + tools/re2c/src/ir/adfa/action.h | 109 + tools/re2c/src/ir/adfa/adfa.cc | 135 + tools/re2c/src/ir/adfa/adfa.h | 101 + tools/re2c/src/ir/adfa/prepare.cc | 268 ++ tools/re2c/src/ir/compile.cc | 104 + tools/re2c/src/ir/compile.h | 20 + tools/re2c/src/ir/dfa/determinization.cc | 197 ++ tools/re2c/src/ir/dfa/dfa.h | 58 + tools/re2c/src/ir/dfa/fillpoints.cc | 154 + tools/re2c/src/ir/dfa/minimization.cc | 252 ++ tools/re2c/src/ir/nfa/calc_size.cc | 50 + tools/re2c/src/ir/nfa/nfa.cc | 72 + tools/re2c/src/ir/nfa/nfa.h | 90 + tools/re2c/src/ir/nfa/split.cc | 49 + tools/re2c/src/ir/regexp/display.cc | 51 + tools/re2c/src/ir/regexp/empty_class_policy.h | 15 + tools/re2c/src/ir/regexp/encoding/case.h | 31 + tools/re2c/src/ir/regexp/encoding/enc.cc | 206 ++ tools/re2c/src/ir/regexp/encoding/enc.h | 197 ++ .../src/ir/regexp/encoding/range_suffix.cc | 38 + .../src/ir/regexp/encoding/range_suffix.h | 39 + .../src/ir/regexp/encoding/utf16/utf16.cc | 10 + .../re2c/src/ir/regexp/encoding/utf16/utf16.h | 37 + .../ir/regexp/encoding/utf16/utf16_range.cc | 146 + .../ir/regexp/encoding/utf16/utf16_range.h | 19 + .../ir/regexp/encoding/utf16/utf16_regexp.cc | 38 + .../ir/regexp/encoding/utf16/utf16_regexp.h | 16 + .../re2c/src/ir/regexp/encoding/utf8/utf8.cc | 84 + tools/re2c/src/ir/regexp/encoding/utf8/utf8.h | 48 + .../src/ir/regexp/encoding/utf8/utf8_range.cc | 112 + .../src/ir/regexp/encoding/utf8/utf8_range.h | 18 + .../ir/regexp/encoding/utf8/utf8_regexp.cc | 36 + .../src/ir/regexp/encoding/utf8/utf8_regexp.h | 16 + tools/re2c/src/ir/regexp/fixed_length.cc | 55 + tools/re2c/src/ir/regexp/regexp.cc | 241 ++ tools/re2c/src/ir/regexp/regexp.h | 52 + tools/re2c/src/ir/regexp/regexp_alt.h | 31 + tools/re2c/src/ir/regexp/regexp_cat.h | 30 + tools/re2c/src/ir/regexp/regexp_close.h | 27 + tools/re2c/src/ir/regexp/regexp_match.h | 29 + tools/re2c/src/ir/regexp/regexp_null.h | 21 + tools/re2c/src/ir/regexp/regexp_rule.h | 52 + tools/re2c/src/ir/rule_rank.cc | 68 + tools/re2c/src/ir/rule_rank.h | 44 + tools/re2c/src/ir/skeleton/control_flow.cc | 61 + tools/re2c/src/ir/skeleton/generate_code.cc | 323 ++ tools/re2c/src/ir/skeleton/generate_data.cc | 215 ++ tools/re2c/src/ir/skeleton/match_empty.cc | 49 + tools/re2c/src/ir/skeleton/maxlen.cc | 50 + tools/re2c/src/ir/skeleton/path.h | 103 + tools/re2c/src/ir/skeleton/skeleton.cc | 163 + tools/re2c/src/ir/skeleton/skeleton.h | 174 + tools/re2c/src/ir/skeleton/unreachable.cc | 73 + tools/re2c/src/ir/skeleton/way.cc | 74 + tools/re2c/src/ir/skeleton/way.h | 20 + tools/re2c/src/main.cc | 60 + tools/re2c/src/parse/code.cc | 8 + tools/re2c/src/parse/code.h | 31 + tools/re2c/src/parse/extop.h | 17 + tools/re2c/src/parse/input.cc | 31 + tools/re2c/src/parse/input.h | 25 + tools/re2c/src/parse/lex.cc | 2861 +++++++++++++++++ tools/re2c/src/parse/lex.re | 701 ++++ tools/re2c/src/parse/lex_conf.cc | 2284 +++++++++++++ tools/re2c/src/parse/lex_conf.re | 222 ++ tools/re2c/src/parse/loc.h | 24 + tools/re2c/src/parse/parser.cc | 2396 ++++++++++++++ tools/re2c/src/parse/parser.h | 28 + tools/re2c/src/parse/parser.ypp | 775 +++++ tools/re2c/src/parse/rules.h | 29 + tools/re2c/src/parse/scanner.cc | 211 ++ tools/re2c/src/parse/scanner.h | 147 + tools/re2c/src/parse/spec.h | 55 + tools/re2c/src/parse/unescape.cc | 60 + tools/re2c/src/parse/unescape.h | 13 + tools/re2c/src/parse/y.tab.h | 89 + tools/re2c/src/test/range/test-impl.h | 50 + tools/re2c/src/test/range/test.cc | 94 + tools/re2c/src/test/range/test.h | 26 + tools/re2c/src/test/s_to_n32_unsafe/test.cc | 102 + tools/re2c/src/util/allocate.h | 19 + tools/re2c/src/util/attribute.h | 10 + tools/re2c/src/util/c99_stdint.h | 266 ++ tools/re2c/src/util/counter.h | 29 + tools/re2c/src/util/forbid_copy.h | 11 + tools/re2c/src/util/free_list.h | 56 + tools/re2c/src/util/local_increment.h | 22 + tools/re2c/src/util/ord_hash_set.h | 115 + tools/re2c/src/util/range.cc | 97 + tools/re2c/src/util/range.h | 65 + tools/re2c/src/util/s_to_n32_unsafe.cc | 55 + tools/re2c/src/util/s_to_n32_unsafe.h | 10 + tools/re2c/src/util/smart_ptr.h | 69 + tools/re2c/src/util/static_assert.h | 14 + tools/re2c/src/util/u32lim.h | 72 + tools/re2c/src/util/uniq_vector.h | 46 + tools/updaterevision/CMakeLists.txt | 24 + tools/updaterevision/trustinfo.rc | 6 + tools/updaterevision/trustinfo.txt | 16 + tools/updaterevision/updaterevision.c | 136 + tools/zipdir/CMakeLists.txt | 9 + tools/zipdir/zipdir.c | 1695 ++++++++++ 161 files changed, 29182 insertions(+) create mode 100644 tools/CMakeLists.txt create mode 100644 tools/re2c/CHANGELOG create mode 100644 tools/re2c/CMakeLists.txt create mode 100644 tools/re2c/NO_WARRANTY create mode 100644 tools/re2c/README create mode 100644 tools/re2c/config.h.in create mode 100644 tools/re2c/config.msc.h create mode 100644 tools/re2c/examples/001_upn_calculator/README create mode 100644 tools/re2c/examples/001_upn_calculator/calc_001.re create mode 100644 tools/re2c/examples/001_upn_calculator/calc_002.re create mode 100644 tools/re2c/examples/001_upn_calculator/calc_003.re create mode 100644 tools/re2c/examples/001_upn_calculator/calc_004.re create mode 100644 tools/re2c/examples/001_upn_calculator/calc_005.re create mode 100644 tools/re2c/examples/001_upn_calculator/calc_006.s.re create mode 100644 tools/re2c/examples/001_upn_calculator/calc_007.b.re create mode 100644 tools/re2c/examples/001_upn_calculator/calc_008.b.re create mode 100644 tools/re2c/examples/001_upn_calculator/windows/HiResTimer.h create mode 100644 tools/re2c/examples/001_upn_calculator/windows/main.b.re create mode 100644 tools/re2c/examples/002_strip_comments/README create mode 100644 tools/re2c/examples/002_strip_comments/strip_001.s.re create mode 100644 tools/re2c/examples/002_strip_comments/strip_002.s.re create mode 100644 tools/re2c/examples/002_strip_comments/strip_003.b.re create mode 100644 tools/re2c/examples/input_custom/fixed.re create mode 100644 tools/re2c/examples/input_custom/simple/README create mode 100644 tools/re2c/examples/input_custom/simple/default.re create mode 100644 tools/re2c/examples/input_custom/simple/fgetc.re create mode 100644 tools/re2c/examples/input_custom/simple/istringstream.re create mode 100644 tools/re2c/examples/langs/c.re create mode 100644 tools/re2c/examples/langs/modula.re create mode 100644 tools/re2c/examples/langs/rexx.re create mode 100644 tools/re2c/examples/push_model/push.re create mode 100644 tools/re2c/src/codegen/bitmap.cc create mode 100644 tools/re2c/src/codegen/bitmap.h create mode 100644 tools/re2c/src/codegen/emit.h create mode 100644 tools/re2c/src/codegen/emit_action.cc create mode 100644 tools/re2c/src/codegen/emit_dfa.cc create mode 100644 tools/re2c/src/codegen/go.h create mode 100644 tools/re2c/src/codegen/go_construct.cc create mode 100644 tools/re2c/src/codegen/go_destruct.cc create mode 100644 tools/re2c/src/codegen/go_emit.cc create mode 100644 tools/re2c/src/codegen/go_used_labels.cc create mode 100644 tools/re2c/src/codegen/indent.h create mode 100644 tools/re2c/src/codegen/input_api.cc create mode 100644 tools/re2c/src/codegen/input_api.h create mode 100644 tools/re2c/src/codegen/label.cc create mode 100644 tools/re2c/src/codegen/label.h create mode 100644 tools/re2c/src/codegen/output.cc create mode 100644 tools/re2c/src/codegen/output.h create mode 100644 tools/re2c/src/codegen/print.cc create mode 100644 tools/re2c/src/codegen/print.h create mode 100644 tools/re2c/src/conf/msg.cc create mode 100644 tools/re2c/src/conf/msg.h create mode 100644 tools/re2c/src/conf/opt.cc create mode 100644 tools/re2c/src/conf/opt.h create mode 100644 tools/re2c/src/conf/parse_opts.cc create mode 100644 tools/re2c/src/conf/parse_opts.re create mode 100644 tools/re2c/src/conf/warn.cc create mode 100644 tools/re2c/src/conf/warn.h create mode 100644 tools/re2c/src/globals.h create mode 100644 tools/re2c/src/ir/adfa/action.h create mode 100644 tools/re2c/src/ir/adfa/adfa.cc create mode 100644 tools/re2c/src/ir/adfa/adfa.h create mode 100644 tools/re2c/src/ir/adfa/prepare.cc create mode 100644 tools/re2c/src/ir/compile.cc create mode 100644 tools/re2c/src/ir/compile.h create mode 100644 tools/re2c/src/ir/dfa/determinization.cc create mode 100644 tools/re2c/src/ir/dfa/dfa.h create mode 100644 tools/re2c/src/ir/dfa/fillpoints.cc create mode 100644 tools/re2c/src/ir/dfa/minimization.cc create mode 100644 tools/re2c/src/ir/nfa/calc_size.cc create mode 100644 tools/re2c/src/ir/nfa/nfa.cc create mode 100644 tools/re2c/src/ir/nfa/nfa.h create mode 100644 tools/re2c/src/ir/nfa/split.cc create mode 100644 tools/re2c/src/ir/regexp/display.cc create mode 100644 tools/re2c/src/ir/regexp/empty_class_policy.h create mode 100644 tools/re2c/src/ir/regexp/encoding/case.h create mode 100644 tools/re2c/src/ir/regexp/encoding/enc.cc create mode 100644 tools/re2c/src/ir/regexp/encoding/enc.h create mode 100644 tools/re2c/src/ir/regexp/encoding/range_suffix.cc create mode 100644 tools/re2c/src/ir/regexp/encoding/range_suffix.h create mode 100644 tools/re2c/src/ir/regexp/encoding/utf16/utf16.cc create mode 100644 tools/re2c/src/ir/regexp/encoding/utf16/utf16.h create mode 100644 tools/re2c/src/ir/regexp/encoding/utf16/utf16_range.cc create mode 100644 tools/re2c/src/ir/regexp/encoding/utf16/utf16_range.h create mode 100644 tools/re2c/src/ir/regexp/encoding/utf16/utf16_regexp.cc create mode 100644 tools/re2c/src/ir/regexp/encoding/utf16/utf16_regexp.h create mode 100644 tools/re2c/src/ir/regexp/encoding/utf8/utf8.cc create mode 100644 tools/re2c/src/ir/regexp/encoding/utf8/utf8.h create mode 100644 tools/re2c/src/ir/regexp/encoding/utf8/utf8_range.cc create mode 100644 tools/re2c/src/ir/regexp/encoding/utf8/utf8_range.h create mode 100644 tools/re2c/src/ir/regexp/encoding/utf8/utf8_regexp.cc create mode 100644 tools/re2c/src/ir/regexp/encoding/utf8/utf8_regexp.h create mode 100644 tools/re2c/src/ir/regexp/fixed_length.cc create mode 100644 tools/re2c/src/ir/regexp/regexp.cc create mode 100644 tools/re2c/src/ir/regexp/regexp.h create mode 100644 tools/re2c/src/ir/regexp/regexp_alt.h create mode 100644 tools/re2c/src/ir/regexp/regexp_cat.h create mode 100644 tools/re2c/src/ir/regexp/regexp_close.h create mode 100644 tools/re2c/src/ir/regexp/regexp_match.h create mode 100644 tools/re2c/src/ir/regexp/regexp_null.h create mode 100644 tools/re2c/src/ir/regexp/regexp_rule.h create mode 100644 tools/re2c/src/ir/rule_rank.cc create mode 100644 tools/re2c/src/ir/rule_rank.h create mode 100644 tools/re2c/src/ir/skeleton/control_flow.cc create mode 100644 tools/re2c/src/ir/skeleton/generate_code.cc create mode 100644 tools/re2c/src/ir/skeleton/generate_data.cc create mode 100644 tools/re2c/src/ir/skeleton/match_empty.cc create mode 100644 tools/re2c/src/ir/skeleton/maxlen.cc create mode 100644 tools/re2c/src/ir/skeleton/path.h create mode 100644 tools/re2c/src/ir/skeleton/skeleton.cc create mode 100644 tools/re2c/src/ir/skeleton/skeleton.h create mode 100644 tools/re2c/src/ir/skeleton/unreachable.cc create mode 100644 tools/re2c/src/ir/skeleton/way.cc create mode 100644 tools/re2c/src/ir/skeleton/way.h create mode 100644 tools/re2c/src/main.cc create mode 100644 tools/re2c/src/parse/code.cc create mode 100644 tools/re2c/src/parse/code.h create mode 100644 tools/re2c/src/parse/extop.h create mode 100644 tools/re2c/src/parse/input.cc create mode 100644 tools/re2c/src/parse/input.h create mode 100644 tools/re2c/src/parse/lex.cc create mode 100644 tools/re2c/src/parse/lex.re create mode 100644 tools/re2c/src/parse/lex_conf.cc create mode 100644 tools/re2c/src/parse/lex_conf.re create mode 100644 tools/re2c/src/parse/loc.h create mode 100644 tools/re2c/src/parse/parser.cc create mode 100644 tools/re2c/src/parse/parser.h create mode 100644 tools/re2c/src/parse/parser.ypp create mode 100644 tools/re2c/src/parse/rules.h create mode 100644 tools/re2c/src/parse/scanner.cc create mode 100644 tools/re2c/src/parse/scanner.h create mode 100644 tools/re2c/src/parse/spec.h create mode 100644 tools/re2c/src/parse/unescape.cc create mode 100644 tools/re2c/src/parse/unescape.h create mode 100644 tools/re2c/src/parse/y.tab.h create mode 100644 tools/re2c/src/test/range/test-impl.h create mode 100644 tools/re2c/src/test/range/test.cc create mode 100644 tools/re2c/src/test/range/test.h create mode 100644 tools/re2c/src/test/s_to_n32_unsafe/test.cc create mode 100644 tools/re2c/src/util/allocate.h create mode 100644 tools/re2c/src/util/attribute.h create mode 100644 tools/re2c/src/util/c99_stdint.h create mode 100644 tools/re2c/src/util/counter.h create mode 100644 tools/re2c/src/util/forbid_copy.h create mode 100644 tools/re2c/src/util/free_list.h create mode 100644 tools/re2c/src/util/local_increment.h create mode 100644 tools/re2c/src/util/ord_hash_set.h create mode 100644 tools/re2c/src/util/range.cc create mode 100644 tools/re2c/src/util/range.h create mode 100644 tools/re2c/src/util/s_to_n32_unsafe.cc create mode 100644 tools/re2c/src/util/s_to_n32_unsafe.h create mode 100644 tools/re2c/src/util/smart_ptr.h create mode 100644 tools/re2c/src/util/static_assert.h create mode 100644 tools/re2c/src/util/u32lim.h create mode 100644 tools/re2c/src/util/uniq_vector.h create mode 100644 tools/updaterevision/CMakeLists.txt create mode 100644 tools/updaterevision/trustinfo.rc create mode 100644 tools/updaterevision/trustinfo.txt create mode 100644 tools/updaterevision/updaterevision.c create mode 100644 tools/zipdir/CMakeLists.txt create mode 100644 tools/zipdir/zipdir.c diff --git a/tools/CMakeLists.txt b/tools/CMakeLists.txt new file mode 100644 index 000000000..21ca13352 --- /dev/null +++ b/tools/CMakeLists.txt @@ -0,0 +1,7 @@ +cmake_minimum_required( VERSION 2.8.7 ) + +add_subdirectory( re2c ) +add_subdirectory( updaterevision ) +add_subdirectory( zipdir ) + +set( CROSS_EXPORTS ${CROSS_EXPORTS} PARENT_SCOPE ) diff --git a/tools/re2c/CHANGELOG b/tools/re2c/CHANGELOG new file mode 100644 index 000000000..02a523c27 --- /dev/null +++ b/tools/re2c/CHANGELOG @@ -0,0 +1,403 @@ +Version 0.16 (2016-01-21) +--------------------------- +- Fixed bug #127 "code generation error with wide chars and bitmaps (omitted 'goto' statement)" +- Added DFA minimization and option '--dfa-minimization ' +- Fixed bug #128 "very slow DFA construction (resulting in a very large DFA)" +- Fixed bug #132 "test failure on big endian archs with 0.15.3" + +Version 0.15.3 (2015-12-02) +--------------------------- +- Fixed bugs and applied patches: + #122 "clang does not compile re2c 0.15.x" (reported and fixed by Oleksii Taran). + #124 "Get rid of UINT32_MAX and friends" (patch by Sergei Trofimovich, fixes FreeBSD builds). + #125 "[OS X] git reports changes not staged for commit in newly cloned repository" (by Oleksii Taran, this fix also applies to Windows). +- Added option --no-version that allows to omit version information. +- Reduced memory and time consumed with -Wundefined-control-flow. +- Improved coverage of input data generated with -S --skeleton. + +Version 0.15.2 (2015-11-23) +--------------------------- +- Fixed build system: lexer depends on bison-generated parser + (Gentoo bug: https://bugs.gentoo.org/show_bug.cgi?id=566620) + +Version 0.15.1 (2015-11-22) +--------------------------- +- Fixed test failures caused by locale-sensitive 'sort'. + +Version 0.15 (2015-11-22) +------------------------- +- Updated website http://re2c.org: + added examples + updated docs + added news + added web feed (Atom 1.0) +- Added options: + -S, --skeleton + --empty-class +- Added warnings: + -W + -Werror + -W + -Wno- + -Werror- + -Wno-error- +- Added individual warnings: + -Wundefined-control-flow + -Wunreachable-rules + -Wcondition-order + -Wuseless-escape + -Wempty-character-class + -Wswapped-range + -Wmatch-empty-string +- Fixed options: + -- (interpret remaining arguments as non-options) +- Deprecated options: + -1 --single-pass (single pass is by default now) +- Reduced size of the generated .dot files. +- Fixed bugs: + #27 re2c crashes reading files containing %{ %} (patch by Rui) + #51 default rule doesn't work in reuse mode + #52 eliminate multiple passes + #59 bogus yyaccept in -c mode + #60 redundant use of YYMARKER + #61 empty character class [] matches empty string + #115 flex-style named definitions cause ambiguity in re2c grammar + #119 -f with -b/-g generates incorrect dispatch on fill labels + #116 empty string with non-empty trailing context consumes code units +- Added test options: + -j, -j (run tests in N threads, defaults to the number of CPUs) + --wine (test windows builds using wine) + --skeleton (generate skeleton programs, compile and execute them) + --keep-tmp-files (don't delete intermediate files for successful tests) +- Updated build system: + support out of source builds + support `make distcheck` + added `make bootstrap` (rebuild re2c after building with precomplied .re files) + added `make tests` (run tests with -j) + added `make vtests` (run tests with --valgrind -j) + added `make wtests` (run tests with --wine -j 1) + added Autoconf tests for CXXFLAGS. By default try the following options: + -W -Wall -Wextra -Weffc++ -pedantic -Wformat=2 -Wredundant-decls + -Wsuggest-attribute=format -Wconversion -Wsign-conversion -O2 -Weverything), + respect user-defined CXXFLAGS + support Mingw builds: `configure -host i686-w64-mingw32` + structured source files + removed old MSVC files +- Moved development to github (https://github.com/skvadrik/re2c), keep a mirror on sourceforge. + +Version 0.14.3 (2015-05-20) +----------------------------- +- applied patch '#27 re2c crashes reading files containing %{ %}' by Rui +- dropped distfiles for MSVC (they are broken anyway) + +Version 0.14.2 (2015-03-25) +----------------------------- +- fixed #57 Wrong result only if another rule is present + +Version 0.14.1 (2015-02-27) +----------------------------- +- fixed #55 re2c-0.14: re2c -V outputs null byte + +Version 0.14 (2015-02-23) +----------------------------- +- Added generic input API 21 (#21 Support to configure how re2c code interfaced with the symbol buffer?) +- fixed #46 re2c generates an infinite loop, depends on existence of previous parser +- fixed #47 Dot output label escaped characters + +Version 0.13.7.5 (2014-08-22) +----------------------------- +- Fixed Gentoo bug: https://bugs.gentoo.org/show_bug.cgi?id=518904 (PHP lexer) + +Version 0.13.7.4 (2014-07-29) +----------------------------- +- Enabled 'make docs' only if configured with '--enable-docs' +- Disallowed to use yacc/byacc instead of bison to build parser +- Removed non-portable sed feature in script that runs tests + +Version 0.13.7.3 (2014-07-28) +----------------------------- +- Fixed CXX warning +- Got rid of asciidoc build-time dependency + +Version 0.13.7.2 (2014-07-27) +----------------------------- +- Included man page into dist, respect users CXXFLAGS. + +Version 0.13.7.1 (2014-07-26) +----------------------------- +- Added missing files to tarball + +Version 0.13.7 (2014-07-25) +--------------------------- +- Added UTF-8 support +- Added UTF-16 support +- Added default rule +- Added option to control ill-formed Unicode + +Version 0.13.6 (2013-07-04) +--------------------------- +- Fixed #2535084 uint problem with Sun C 5.8 +- #3308400: allow Yacc-style %{code brackets}% +- #2506253: allow C++ // comments +- Fixed inplace configuration in -e mode. +- Applied #2482572 Typos in error messages. +- Applied #2482561 Error in manual section on -r mode. +- Fixed #2478216 Wrong start_label in -c mode. +- Fixed #2186718 Unescaped backslash in file name of #line directive. +- Fixed #2102138 Duplicate case labels on EBCDIC. +- Fixed #2088583 Compile problem on AIX. +- Fixed #2038610 Ebcdic problem. +- improve dot support: make char intervals (e.g. [A-Z]) instead of one edge per char + +Version 0.13.5 (2008-05-25) +--------------------------- +- Fixed #1952896 Segfault in re2c::Scanner::scan. +- Fixed #1952842 Regression. + +Version 0.13.4 (2008-04-05) +--------------------------- +- Added transparent handling of #line directives in input files. +- Added re2c:yyfill:check inplace configuration. +- Added re2c:define:YYSETSTATE:naked inplace configuration. +- Added re2c:flags:w and re2c:flags:u inplace configurations. +- Added the ability to add rules in 'use:re2c' blocks. +- Changed -r flag to accept only 'rules:re2c' and 'use:re2c' blocks. + +Version 0.13.3 (2008-03-14) +--------------------------- +- Added -r flag to allow reuse of scanner definitions. +- Added -F flag to support flex syntax in rules. +- Fixed SEGV in scanner that occurs with very large blocks. +- Fixed issue with unused yybm. +- Partial support for flex syntax. +- Changed to allow /* comments with -c switch. +- Added flag -D/--emit-dot. + +Version 0.13.2 (2008-02-14) +--------------------------- +- Added flag --case-inverted. +- Added flag --case-insensitive. +- Added support for '' to enable rule setup. +- Added support for '=>' style rules. +- Added support for ':=' style rules. +- Added support for ':=>' style rules. +- Added re2c:cond:divider and re2c:con:goto inplace configuration. +- Fixed code generation to emit space after 'if'. + +Version 0.13.1 (2007-08-24) +--------------------------- +- Added custom build rules for Visual Studio 2005 (re2c.rules). (William Swanson) +- Fixed issue with some compilers. +- Fixed #1776177 Build on AIX. +- Fixed #1743180 fwrite with 0 length crashes on OS X. + +Version 0.13.0 (2007-06-24) +--------------------------- +- Added -c and -t to generate scanners with (f)lex-like condition support. +- Fixed issue with short form of switches and parameter if not first switch. +- Fixed #1708378 segfault in actions.cc. + +Version 0.12.3 (2007-08-24) +--------------------------- +- Fixed issue with some compilers. +- Fixed #1776177 Build on AIX. +- Fixed #1743180 fwrite with 0 length crashes on OS X. + +Version 0.12.2 (2007-06-26) +--------------------------- +- Fixed #1743180 fwrite with 0 length crashes on OS X. + +Version 0.12.1 (2007-05-23) +--------------------------- +- Fixed #1711240 problem with '"' and 7F on EBCDIC plattforms. + +Version 0.12.0 (2007-05-01) +--------------------------- +- Re-release of 0.11.3 as new stable branch. +- Fixed issue with short form of switches and parameter if not first switch. +- Fixed #1708378 segfault in actions.cc. + +Version 0.11.3 (2007-04-01) +--------------------------- +- Added support for underscores in named definitions. +- Added new option --no-generation-date. +- Fixed issue with long form of switches. + +Version 0.11.2 (2007-03-01) +--------------------------- +- Added inplace configuration 're2c:yyfill:parameter'. +- Added inplace configuration 're2c:yych:conversion'. +- Fixed -u switch code generation. +- Added ability to avoid defines and overwrite variable and label names. + +Version 0.11.1 (2007-02-20) +--------------------------- +- Applied #1647875 add const to yybm vector. + +Version 0.11.0 (2007-01-01) +--------------------------- +- Added -u switch to support unicode. + +Version 0.10.8 (2007-04-01) +--------------------------- +- Fixed issue with long form of switches. + +Version 0.10.7 (2007-02-20) +--------------------------- +- Applied #1647875 add const to yybm vector. + +Version 0.10.6 (2006-08-05) +--------------------------- +- Fixed #1529351 Segv bug on unterminated code blocks. +- Fixed #1528269 Invalid code generation. + +Version 0.10.5 (2006-06-11) +--------------------------- +- Fixed long form of -1 switch to --single-pass as noted in man page and help. +- Added MSVC 2003 project files and renamed old 2002 ones. + +Version 0.10.4 (2006-06-01) +--------------------------- +- Fix whitespace in generated code. + +Version 0.10.3 (2006-05-14) +--------------------------- +- Fixed issue with -wb and -ws. +- Added -g switch to support gcc's computed goto's. +- Changed to use nested if's instead of "switch(yyaccept)" in -s mode. + +Version 0.10.2 (2006-05-01) +--------------------------- +- Changed to generate YYMARKER only when needed or in single pass mode. +- Added -1 switch to force single pass generation and make two pass the default. +- Fixed -i switch. +- Added configuration 'yyfill:enable' to allow suppression of YYFILL() blocks. +- Added tutorial like lessons to re2c. +- Added /*!ignore:re2c */ to support documenting of re2c source. +- Fixed issue with multiline re2c comments (/*!max:re2c ... */ and alike). +- Fixed generation of YYDEBUG() when using -d switch. +- Added /*!getstate:re2c */ which triggers generation of the YYGETSTATE() block. +- Added configuration 'state:abort'. +- Changed to not generate yyNext unless configuration 'state:nextlabel' is used. +- Changed to not generate yyaccept code unless needed. +- Changed to use if- instead of switch-expression when yyaccpt has only one case. +- Added docu, examples and tests to .src.zip package (0.10.1 zip was repackaged). +- Fixed #1479044 incorrect code generated when using -b. +- Fixed #1472770 re2c creates an infinite loop. +- Fixed #1454253 Piece of code saving a backtracking point not generated. +- Fixed #1463639 Missing forward declaration. +- Implemented #1187127 savable state support for multiple re2c blocks. + +Version 0.10.1 (2006-02-28) +--------------------------- +- Added support for Solaris and native SUN compiler. +- Applied #1438160 expose YYCTXMARKER. + +Version 0.10.0 (2006-02-18) +--------------------------- +- Added make target zip to create windows source packages as zip files. +- Added re2c:startlabel configuration. +- Fixed code generation to not generate unreachable code for initial state. +- Added support for c/c++ compatible \u and \U unicode notation. +- Added ability to control indendation. +- Made scanner error out in case an ambiguous /* is found. +- Fixed indendation of generated code. +- Added support for DOS line endings. +- Added experimental unicode support. +- Added config_w32.h to build out of the box on windows (using msvc 2002+). +- Added Microsoft Visual C .NET 2005 build files. +- Applied #1411087 variable length trailing context. +- Applied #1408326 do not generate goto next state. +- Applied #1408282 CharSet initialization fix. +- Applied #1408278 readsome with MSVC. +- Applied #1307467 Unicode patch for 0.9.7. + +Version 0.9.12 (2005-12-28) +--------------------------- +- Fixed bug #1390174 re2c cannot accept {0,}. + +Version 0.9.11 (2005-12-18) +--------------------------- +- Fixed #1313083 -e (EBCDIC cross compile) broken. +- Fixed #1297658 underestimation of n in YYFILL(n). +- Applied #1339483 Avoid rebuilds of re2c when running subtargets. +- Implemented #1335305 symbol table reimplementation, just slightly modifed. + +Version 0.9.10 (2005-09-04) +--------------------------- +- Added -i switch to avoid generating #line information. +- Fixed bug #1251653 re2c generate some invalid #line on WIN32. + +Version 0.9.9 (2005-07-21) +-------------------------- +- Implemented #1232777 negated char classes '[^...]' and the dot operator '.'. +- Added hexadecimal character definitions. +- Added consistency check for octal character definitions. + +Version 0.9.8 (2005-06-26) +-------------------------- +- Fixed code generation for -b switch. +- Added Microsoft Visual C .NET build files. + +Version 0.9.7 (2005-04-30) +-------------------------- +- Applied #1181535 storable state patch. +- Added -d flag which outputs a debugable parser. +- Fixed generation of '#line' directives (according to ISO-C99). +- Fixed bug #1187785 Re2c fails to generate valid code. +- Fixed bug #1187452 unused variable `yyaccept'. + +Version 0.9.6 (2005-04-14) +-------------------------- +- Fixed build with gcc >= 3.4. + +Version 0.9.5 (2005-04-08) +-------------------------- +- Added /*!max:re2c */ which emits a '#define YYMAXFILL \n' line + This allows to define buffers of the minimum required length. Occurence + must follow '/*re2c */ and cannot preceed it. +- Changed re2c to two pass generation to output warning free code. +- Fixed bug #1163046 re2c hangs when processing valid re-file. +- Fixed bug #1022799 re2c scanner has buffering bug. + +Version 0.9.4 (2005-03-12) +-------------------------- +- Added --vernum support. +- Fixed bug #1054496 incorrect code generated with -b option. +- Fixed bug #1012748 re2c does not emit last line if '\n' missing. +- Fixed bug #999104 --output=output option does not work as documented. +- Fixed bug #999103 Invalid options prefixed with two dashes cause program crash. + +Version 0.9.3 (2004-05-26) +-------------------------- +- Fixes one small possible bug in the generated output. ych instead of yych is + output in certain circumstances + +Version 0.9.2 (2004-05-26) +-------------------------- +- Added -o option to specify the output file which also will set the #line + directives to something useful. +- Print version to cout instead cerr. +- Added -h and -- style options. +- Moved development to http://sourceforge.net/projects/re2c . +- Fixed bug #960144 minor cosmetic problem. +- Fixed bug #953181 cannot compile with. +- Fixed bug #939277 Windows support. +- Fixed bug #914462 automake build patch +- Fixed bug #891940 braced quantifiers: {\d+(,|,\d+)?} style. +- Fixed bug #869298 Add case insensitive string literals. +- Fixed bug #869297 Input buffer overrun. + +Version 0.9.1 (2003-12-13) +-------------------------- +- Removed rcs comments in source files. + +Version 0.9 +----------- +- Redistribution based on version 0.5. +- Added parentheses to assignment expressions in 'if' statements. +- Rearranged class members to match initialization order. +- Substr fix. +- Use array delete [] when necessary. +- Other minor fixes for subduing compiler warnings. + diff --git a/tools/re2c/CMakeLists.txt b/tools/re2c/CMakeLists.txt new file mode 100644 index 000000000..b362a3b84 --- /dev/null +++ b/tools/re2c/CMakeLists.txt @@ -0,0 +1,104 @@ +cmake_minimum_required( VERSION 2.8.7 ) + +if( NOT CMAKE_CROSSCOMPILING ) + +include( CheckFunctionExists ) +include( CheckTypeSize ) + +if( MSVC ) + # Runtime type information is required and don't complain about uint32_t to bool conversions + set( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /GR /wd4800" ) +endif() + +set( PACKAGE_NAME re2c ) +set( PACKAGE_TARNAME re2c ) +set( PACKAGE_VERSION 0.16 ) +set( PACKAGE_STRING "re2c 0.16" ) +set( PACKAGE_BUGREPORT "re2c-general@lists.sourceforge.net" ) + +CHECK_FUNCTION_EXISTS( strdup HAVE_STRDUP ) +CHECK_FUNCTION_EXISTS( strndup HAVE_STRNDUP ) + +CHECK_TYPE_SIZE( "0i8" SIZEOF_0I8 ) +CHECK_TYPE_SIZE( "0l" SIZEOF_0L ) +CHECK_TYPE_SIZE( "0ll" SIZEOF_0LL ) +CHECK_TYPE_SIZE( char SIZEOF_CHAR ) +CHECK_TYPE_SIZE( short SIZEOF_SHORT ) +CHECK_TYPE_SIZE( int SIZEOF_INT ) +CHECK_TYPE_SIZE( long SIZEOF_LONG ) +CHECK_TYPE_SIZE( "long long" SIZEOF_LONG_LONG ) +CHECK_TYPE_SIZE( "void *" SIZEOF_VOID_P ) +CHECK_TYPE_SIZE( __int64 SIZEOF___INT_64 ) + +configure_file( ${CMAKE_CURRENT_SOURCE_DIR}/config.h.in ${CMAKE_CURRENT_BINARY_DIR}/config.h ) +include_directories( ${CMAKE_CURRENT_BINARY_DIR} ${CMAKE_CURRENT_SOURCE_DIR} ) +add_definitions( -DHAVE_CONFIG_H ) + +file( GLOB SRC_HDR + src/codegen/*.h + src/conf/*.h + src/ir/*.h + src/*.h + src/parse/*.h + src/util/*.h ) + +add_executable( re2c + ${SRC_HDR} + src/codegen/bitmap.cc + src/codegen/emit_action.cc + src/codegen/emit_dfa.cc + src/codegen/label.cc + src/codegen/go_construct.cc + src/codegen/go_destruct.cc + src/codegen/go_emit.cc + src/codegen/go_used_labels.cc + src/codegen/input_api.cc + src/codegen/output.cc + src/codegen/print.cc + src/conf/msg.cc + src/conf/opt.cc + src/conf/parse_opts.cc + src/conf/warn.cc + src/ir/nfa/calc_size.cc + src/ir/nfa/nfa.cc + src/ir/nfa/split.cc + src/ir/adfa/adfa.cc + src/ir/adfa/prepare.cc + src/ir/dfa/determinization.cc + src/ir/dfa/fillpoints.cc + src/ir/dfa/minimization.cc + src/ir/regexp/display.cc + src/ir/regexp/encoding/enc.cc + src/ir/regexp/encoding/range_suffix.cc + src/ir/regexp/encoding/utf8/utf8_regexp.cc + src/ir/regexp/encoding/utf8/utf8_range.cc + src/ir/regexp/encoding/utf8/utf8.cc + src/ir/regexp/encoding/utf16/utf16_regexp.cc + src/ir/regexp/encoding/utf16/utf16.cc + src/ir/regexp/encoding/utf16/utf16_range.cc + src/ir/regexp/fixed_length.cc + src/ir/regexp/regexp.cc + src/ir/compile.cc + src/ir/rule_rank.cc + src/ir/skeleton/control_flow.cc + src/ir/skeleton/generate_code.cc + src/ir/skeleton/generate_data.cc + src/ir/skeleton/match_empty.cc + src/ir/skeleton/maxlen.cc + src/ir/skeleton/skeleton.cc + src/ir/skeleton/unreachable.cc + src/ir/skeleton/way.cc + src/main.cc + src/parse/code.cc + src/parse/input.cc + src/parse/lex.cc + src/parse/lex_conf.cc + src/parse/parser.cc + src/parse/scanner.cc + src/parse/unescape.cc + src/util/s_to_n32_unsafe.cc + src/util/range.cc ) + +set( CROSS_EXPORTS ${CROSS_EXPORTS} re2c PARENT_SCOPE ) + +endif() diff --git a/tools/re2c/NO_WARRANTY b/tools/re2c/NO_WARRANTY new file mode 100644 index 000000000..885a13d06 --- /dev/null +++ b/tools/re2c/NO_WARRANTY @@ -0,0 +1,2 @@ +re2c is distributed with no warranty whatever. The author and any other +contributors take no responsibility for the consequences of its use. diff --git a/tools/re2c/README b/tools/re2c/README new file mode 100644 index 000000000..29d97b674 --- /dev/null +++ b/tools/re2c/README @@ -0,0 +1,159 @@ +re2c +-------------------------------------------------------------------------------- + + +DESCRIPTION +-------------------------------------------------------------------------------- +re2c is a tool for generating C-based recognizers from regular expressions. +re2c-based scanners are efficient: for programming languages, given similar +specifications, a re2c-based scanner is typically almost twice as fast as a +flex-based scanner with little or no increase in size (possibly a decrease +on cisc architectures). Indeed, re2c-based scanners are quite competitive with +hand-crafted ones. + +Unlike flex, re2c does not generate complete scanners: the user must supply some +interface code. While this code is not bulky (about 50-100 lines for a +flex-like scanner; see the man page and examples in the distribution) careful +coding is required for efficiency (and correctness). One advantage of this +arrangement is that the generated code is not tied to any particular input +model. +-------------------------------------------------------------------------------- + + +DOWNLOAD +-------------------------------------------------------------------------------- +The re2c distribution can be found at: + https://sourceforge.net/projects/re2c/ + +Download the latest tarball: + https://sourceforge.net/projects/re2c/files/latest/download + +Clone git repo: + git clone git://git.code.sf.net/p/re2c/code-git +-------------------------------------------------------------------------------- + + +BUILD +-------------------------------------------------------------------------------- +Contents: + 1. simple build + 2. bootstrap + 3. out-of-source build + 4. testing + 5. rebuild documentation + 6. build for windows with mingw + 7. build from git + +1. Simplest possible build: + $ ./configure [--prefix=] + $ make + $ make install +This will build re2c and install it (binary and man page) to (defaults +to /usr/local). + +2. Bootstrap and rebuild: + $ ./configure [--prefix=] + $ make bootstrap + $ make install +Usual bootstrap procedure: re2c uses re2c to compile its lexer. +1. build lexer (if make finds re2c binary in build directory, it will build lexer + from source, otherwize it will use prebuilt lexer) +2. build re2c +3. build lexer from source using re2c binary in build directory +4. rebuild re2c + +3. Out-of-source build: + $ mkdir + $ cd + $ /configure [--prefix=] + $ make + $ make install + +4. Testing: + $ make check +This will redirect test script output to file. If you want to see progress: + $ make tests +Testing under valgrind (takes a long time): + $ make vtests + +5. Rebuild documentation (requires rst2man.py): + $ ./configure --enable-docs [--prefix=] + $ make docs + $ make install + +6. Build for windows using mingw: + $ ../configure --host i686-w64-mingw32 [--prefix=] + $ make +This will result into an executable re2c.exe, which can be tested with wine: + $ make wtests + +7. If you want to build from git, you'll first need to generate autotools files: + $ ./autogen.sh +-------------------------------------------------------------------------------- + + +INFO +-------------------------------------------------------------------------------- + $ man re2c + +re2c home page: + re2c.org + +re2c manual: + re2c.org/manual.html + +Ulya Trofimovich's blog on re2c: + skvadrik.github.io/aleph_null/re2c.html + +Original paper on re2c: "RE2C: a More Versatile Parser Generator" (1994, Peter +Bumbulis and Donald D. Cowan). + +Examples can be found in 'examples' directory. +-------------------------------------------------------------------------------- + + +MAILING LISTS +-------------------------------------------------------------------------------- +re2c-general: + re2c-general@lists.sourceforge.net +re2c-devel: + re2c-devel@lists.sourceforge.net + +You are welcome to ask for help or share your thoughts and ideas about re2c :) +-------------------------------------------------------------------------------- + + +BUGS +-------------------------------------------------------------------------------- +Please report any bugs and send feature requests to: + https://sourceforge.net/p/re2c/_list/tickets +-------------------------------------------------------------------------------- + + +AUTHORS +-------------------------------------------------------------------------------- +Originally written by Peter Bumbulis (peter@csg.uwaterloo.ca) +Currently maintained by: + Ulya Trofimovich + Dan Nuffer + Marcus Boerger + Hartmut Kaiser +-------------------------------------------------------------------------------- + + +LICENSE +-------------------------------------------------------------------------------- +re2c is distributed with no warranty whatever. The code is certain to contain +errors. Neither the author nor any contributor takes responsibility for any +consequences of its use. + +re2c is in the public domain. The data structures and algorithms used in re2c +are all either taken from documents available to the general public or are +inventions of the authors. Programs generated by re2c may be distributed freely. +re2c itself may be distributed freely, in source or binary, unchanged or +modified. Distributors may charge whatever fees they can obtain for re2c. + +If you do make use of re2c, or incorporate it into a larger project an +acknowledgement somewhere (documentation, research report, etc.) would be +appreciated. +-------------------------------------------------------------------------------- diff --git a/tools/re2c/config.h.in b/tools/re2c/config.h.in new file mode 100644 index 000000000..38029b7cf --- /dev/null +++ b/tools/re2c/config.h.in @@ -0,0 +1,58 @@ +/* config.h.in. Generated from configure.ac by autoheader. */ + +/* Define to 1 if you have the header file. */ +#cmakedefine HAVE_STDINT_H @HAVE_STDINT_H@ + +/* Name of package */ +#cmakedefine PACKAGE "@PACKAGE_NAME@" + +/* Define to the address where bug reports for this package should be sent. */ +#cmakedefine PACKAGE_BUGREPORT "@PACKAGE_BUGREPORT@" + +/* Define to the full name of this package. */ +#cmakedefine PACKAGE_NAME "@PACKAGE_NAME@" + +/* Define to the full name and version of this package. */ +#cmakedefine PACKAGE_STRING "@PACKAGE_STRING@" + +/* Define to the one symbol short name of this package. */ +#cmakedefine PACKAGE_TARNAME "@PACKAGE_TARNAME@" + +/* Define to the home page for this package. */ +#cmakedefine PACKAGE_URL "@PACKAGE_URL@" + +/* Define to the version of this package. */ +#cmakedefine PACKAGE_VERSION "@PACKAGE_VERSION@" + +/* The size of `0i8', as computed by sizeof. */ +#cmakedefine SIZEOF_0I8 @SIZEOF_0I8@ + +/* The size of `0l', as computed by sizeof. */ +#cmakedefine SIZEOF_0L @SIZEOF_0L@ + +/* The size of `0ll', as computed by sizeof. */ +#cmakedefine SIZEOF_0LL @SIZEOF_0LL@ + +/* The size of `char', as computed by sizeof. */ +#cmakedefine SIZEOF_CHAR @SIZEOF_CHAR@ + +/* The size of `int', as computed by sizeof. */ +#cmakedefine SIZEOF_INT @SIZEOF_INT@ + +/* The size of `long', as computed by sizeof. */ +#cmakedefine SIZEOF_LONG @SIZEOF_LONG@ + +/* The size of `long long', as computed by sizeof. */ +#cmakedefine SIZEOF_LONG_LONG @SIZEOF_LONG_LONG@ + +/* The size of `short', as computed by sizeof. */ +#cmakedefine SIZEOF_SHORT @SIZEOF_SHORT@ + +/* The size of `void *', as computed by sizeof. */ +#cmakedefine SIZEOF_VOID_P @SIZEOF_VOID_P@ + +/* The size of `__int64', as computed by sizeof. */ +#cmakedefine SIZEOF___INT64 @SIZEOF___INT64@ + +/* Version number of package */ +#cmakedefine VERSION @PACKAGE_VERSION@ diff --git a/tools/re2c/config.msc.h b/tools/re2c/config.msc.h new file mode 100644 index 000000000..d64e6d9e9 --- /dev/null +++ b/tools/re2c/config.msc.h @@ -0,0 +1,62 @@ +/* config.h.in. Generated from configure.ac by autoheader. */ + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_STDINT_H */ + +/* Name of package */ +/* #undef PACKAGE */ + +/* Define to the address where bug reports for this package should be sent. */ +#define PACKAGE_BUGREPORT "re2c-general@lists.sourceforge.net" + +/* Define to the full name of this package. */ +#define PACKAGE_NAME "re2c" + +/* Define to the full name and version of this package. */ +#define PACKAGE_STRING "re2c 0.16" + +/* Define to the one symbol short name of this package. */ +#define PACKAGE_TARNAME "re2c" + +/* Define to the home page for this package. */ +/* #undef PACKAGE_URL */ + +/* Define to the version of this package. */ +#define PACKAGE_VERSION "0.16" + +/* The size of `0i8', as computed by sizeof. */ +#define SIZEOF_0I8 1 + +/* The size of `0l', as computed by sizeof. */ +#define SIZEOF_0L 4 + +/* The size of `0ll', as computed by sizeof. */ +#define SIZEOF_0LL 8 + +/* The size of `char', as computed by sizeof. */ +#define SIZEOF_CHAR 1 + +/* The size of `int', as computed by sizeof. */ +#define SIZEOF_INT 4 + +/* The size of `long', as computed by sizeof. */ +#define SIZEOF_LONG 4 + +/* The size of `long long', as computed by sizeof. */ +#define SIZEOF_LONG_LONG 8 + +/* The size of `short', as computed by sizeof. */ +#define SIZEOF_SHORT 2 + +/* The size of `void *', as computed by sizeof. */ +#ifdef _M_X64 +#define SIZEOF_VOID_P 8 +#else +#define SIZEOF_VOID_P 4 +#endif + +/* The size of `__int64', as computed by sizeof. */ +#define SIZEOF___INT64 8 + +/* Version number of package */ +/* #undef VERSION */ diff --git a/tools/re2c/examples/001_upn_calculator/README b/tools/re2c/examples/001_upn_calculator/README new file mode 100644 index 000000000..81377d75a --- /dev/null +++ b/tools/re2c/examples/001_upn_calculator/README @@ -0,0 +1,83 @@ +re2c lesson 001_upn_calculator, (c) M. Boerger 2006 + +This lesson gets you started with re2c. In the end you will have an easy RPN +(reverse polish notation) calculator for use at command line. + +You will learn about the basic interface of re2c when scanning input strings. +How to detect the end of the input and use that to stop scanning in order to +avoid problems. + +Once you have successfully installed re2c you can use it to generate *.c files +from the *.re files presented in this lesson. Actually the expected *.c files +are already present. So you should name them *.cc or something alike or just +give them a different name like test.c. To do so you simply change into the +directory and execute the following command: + + re2c calc_001.re > test.c + +Then use your compiler to compile that code and run it. If you are using gcc +you simply do the following: + + gcc -o test.o test.c + ./test.o + +If you are using windows you might want to read till the end of this lesson. + +When you want to debug the code it helps to make re2c generate working #line +information. To do so you simply specify the output file using the -o switch +followed by the output filename: + + re2c -o test.c calc_001.re + +The input files *.re each contain basic step by comments that explain what is +going on and what you can see in the examples. + +In order to optimize the generated code we will use the -s command line switch +of re2c. This tells re2c to generate code that uses if statements rather +then endless switch/case expressions where appropriate. Note that the file name +extension is actually '.s.re' to tell the test system to use the -s switch. To +invoke re2 you do the following: + + re2c -s -o test.c calc_006.s.re + +Finally we use the -b switch to have the code use a decision table. The -b +switch also contains the -s behavior. + + re2c -b -o test.c calc_007.b.re + + + +------------------------------------------------------------------------------- + +For windows users Lynn Allan provided some additional stuff to get you started +in the Microsoft world. This addon resides in the windows subdirectory and +gives you something to expereiment with. The code in that directory is based +on the first step and has the following changes: + +* vc6 .dsp/.dsw and vc7/vc8 .sln/.vcproj project files that have "Custom Build +Steps" that can tell when main.re changes, and know how to generate main.c +from main.re. They assume that you unpacked the zip package and have re2c +itself build or installed in Release and Release-2005 directory respectively. +If re2c cannot be found you need to modify the custom build step and correct +the path to re2c. + +* BuildAndRun.bat to do command line rec2 and then cl and then run the +executable (discontinues with message if errors). + +* built-in cppunit-like test to confirm it worked as expected. + +* array of test strings "fed" to scan rather than file contents to facilitate +testing and also reduce the newbie learning curve. + +* HiResTimer output for 10,000 loops and 100,000 loops. While this might be +excessive for this lesson, it illustrates how to do it for subsequent lessons +and your own stuff using windows. Also it shows that Release build is as fast +as strncmp for this test and can probably be made significantly faster. + +* If you want to build the other steps of this lesson using windows tools +simply copy the *.re files into the windows directory as main.re and rebuild. + + +------------------------------------------------------------------------------- +Sidenote: UPN is the german translation of RPN, somehow hardcoded into the +authors brain :-) diff --git a/tools/re2c/examples/001_upn_calculator/calc_001.re b/tools/re2c/examples/001_upn_calculator/calc_001.re new file mode 100644 index 000000000..fe8d3ae1b --- /dev/null +++ b/tools/re2c/examples/001_upn_calculator/calc_001.re @@ -0,0 +1,84 @@ +/* re2c lesson 001_upn_calculator, calc_001, (c) M. Boerger 2006 - 2007 */ +/*!ignore:re2c + +- basic interface for string reading + + . We define the macros YYCTYPE, YYCURSOR, YYLIMIT, YYMARKER, YYFILL + . YYCTYPE is the type re2c operates on or in other words the type that + it generates code for. While it is not a big difference when we were + using 'unsigned char' here we would need to run re2c with option -w + to fully support types with sieof() > 1. + . YYCURSOR is used internally and holds the current scanner position. In + expression handlers, the code blocks after re2c expressions, this can be + used to identify the end of the token. + . YYMARKER is not always being used so we set an initial value to avoid + a compiler warning. Here we could also omit it compleley. + . YYLIMIT stores the end of the input. Unfortunatley we have to use strlen() + in this lesson. In the next example we see one way to get rid of it. + . We use a 'for(;;)'-loop around the scanner block. We could have used a + 'while(1)'-loop instead but some compilers generate a warning for it. + . To make the output more readable we use 're2c:indent:top' scanner + configuration that configures re2c to prepend a single tab (the default) + to the beginning of each output line. + . The following lines are expressions and for each expression we output the + token name and continue the scanner loop. + . The second last token detects the end of our input, the terminating zero in + our input string. In other scanners detecting the end of input may vary. + For example binary code may contain \0 as valid input. + . The last expression accepts any input character. It tells re2c to accept + the opposit of the empty range. This includes numbers and our tokens but + as re2c goes from top to botton when evaluating the expressions this is no + problem. + . The first three rules show that re2c actually prioritizes the expressions + from top to bottom. Octal number require a starting "0" and the actual + number. Normal numbers start with a digit greater 0. And zero is finally a + special case. A single "0" is detected by the last rule of this set. And + valid ocal number is already being detected by the first rule. This even + includes multi "0" sequences that in octal notation also means zero. + Another way would be to only use two rules: + "0" [0-9]+ + "0" | ( [1-9] [0-9]* ) + A full description of re2c rule syntax can be found in the manual. +*/ + +#include +#include +#include + +int scan(char *s, int l) +{ + char *p = s; + char *q = 0; +#define YYCTYPE char +#define YYCURSOR p +#define YYLIMIT (s+l) +#define YYMARKER q +#define YYFILL(n) + + for(;;) + { +/*!re2c + re2c:indent:top = 2; + "0"[0-9]+ { printf("Oct\n"); continue; } + [1-9][0-9]* { printf("Num\n"); continue; } + "0" { printf("Num\n"); continue; } + "+" { printf("+\n"); continue; } + "-" { printf("-\n"); continue; } + "\000" { printf("EOF\n"); return 0; } + [^] { printf("ERR\n"); return 1; } +*/ + } +} + +int main(int argc, char **argv) +{ + if (argc > 1) + { + return scan(argv[1], strlen(argv[1])); + } + else + { + fprintf(stderr, "%s \n", argv[0]); + return 1; + } +} diff --git a/tools/re2c/examples/001_upn_calculator/calc_002.re b/tools/re2c/examples/001_upn_calculator/calc_002.re new file mode 100644 index 000000000..417e9f315 --- /dev/null +++ b/tools/re2c/examples/001_upn_calculator/calc_002.re @@ -0,0 +1,69 @@ +/* re2c lesson 001_upn_calculator, calc_002, (c) M. Boerger 2006 - 2007 */ +/*!ignore:re2c + +- making use of YYFILL + + . Here we modified the scanner to not require strlen() on the call. Instead + we compute limit on the fly. That is whenever more input is needed we + search for the terminating \0 in the next n chars the scanner needs. + . If there is not enough input we quit the scanner. + . Note that in lesson_001 YYLIMIT was a character pointer computed only once. + Here is of course also of type YYCTYPE but a variable that gets reevaluated + by YYFILL(). + . To make the code smaller we take advantage of the fact that our loop has no + break so far. This allows us to use break here and have the code that is + used for YYFILL() not contain the printf in every occurence. That way the + generated code gets smaller. + +*/ + +#include +#include +#include + +int fill(char *p, int n, char **l) +{ + while (*++p && n--) ; + * l = p; + return n <= 0; +} + +int scan(char *s) +{ + char *p = s; + char *l = s; + char *q = 0; +#define YYCTYPE char +#define YYCURSOR p +#define YYLIMIT l +#define YYMARKER q +#define YYFILL(n) { if (!fill(p, n, &l)) break; } + + for(;;) + { +/*!re2c + re2c:indent:top = 2; + "0"[0-9]+ { printf("Oct\n"); continue; } + [1-9][0-9]* { printf("Num\n"); continue; } + "0" { printf("Num\n"); continue; } + "+" { printf("+\n"); continue; } + "-" { printf("+\n"); continue; } + "\000" { printf("EOF\n"); return 0; } + [^] { printf("ERR\n"); return 1; } +*/ + } + printf("OOD\n"); return 2; +} + +int main(int argc, char **argv) +{ + if (argc > 1) + { + return scan(argv[1]); + } + else + { + fprintf(stderr, "%s \n", argv[0]); + return 0; + } +} diff --git a/tools/re2c/examples/001_upn_calculator/calc_003.re b/tools/re2c/examples/001_upn_calculator/calc_003.re new file mode 100644 index 000000000..e48aec928 --- /dev/null +++ b/tools/re2c/examples/001_upn_calculator/calc_003.re @@ -0,0 +1,61 @@ +/* re2c lesson 001_upn_calculator, calc_003, (c) M. Boerger 2006 - 2007 */ +/*!ignore:re2c + +- making use of YYFILL + + . Again provide the length of the input to generate the limit only once. Now + we can use YYFILL() to detect the end and simply return since YYFILL() is + only being used if the next scanner run might use more chars then YYLIMIT + allows. + . Note that we now use (s+l+2) instead of (s+l) as we did in lesson_001. In + the first lesson we did not quit from YYFILL() and used a special rule to + detect the end of input. Here we use the fact that we know the exact end + of input and that this length does not include the terminating zero. Since + YYLIMIT points to the first character behind the used buffer we use "+ 2". + If we would use "+1" we could drop the "\000" rule but could no longer + distinguish between end of input and out of data. + +*/ + +#include +#include +#include + +int scan(char *s, int l) +{ + char *p = s; + char *q = 0; +#define YYCTYPE char +#define YYCURSOR p +#define YYLIMIT (s+l+2) +#define YYMARKER q +#define YYFILL(n) { printf("OOD\n"); return 2; } + + for(;;) + { +/*!re2c + re2c:indent:top = 2; + "0"[0-9]+ { printf("Oct\n"); continue; } + [1-9][0-9]* { printf("Num\n"); continue; } + "0" { printf("Num\n"); continue; } + "+" { printf("+\n"); continue; } + "-" { printf("+\n"); continue; } + "\000" { printf("EOF\n"); return 0; } + [^] { printf("ERR\n"); return 1; } +*/ + } + return 0; +} + +int main(int argc, char **argv) +{ + if (argc > 1) + { + return scan(argv[1], strlen(argv[1])); + } + else + { + fprintf(stderr, "%s \n", argv[0]); + return 0; + } +} diff --git a/tools/re2c/examples/001_upn_calculator/calc_004.re b/tools/re2c/examples/001_upn_calculator/calc_004.re new file mode 100644 index 000000000..977e438bf --- /dev/null +++ b/tools/re2c/examples/001_upn_calculator/calc_004.re @@ -0,0 +1,78 @@ +/* re2c lesson 001_upn_calculator, calc_004, (c) M. Boerger 2006 - 2007 */ +/*!ignore:re2c + +- making use of definitions + . We provide complex rules as definitions. We can even have definitions made + up from other definitions. And we could also use definitions as part of + rules and not only as full rules as shown in this lesson. + +- showing the tokens + . re2c does not store the beginning of a token on its own but we can easily + do this by providing variable, in our case t, that is set to YYCURSOR on + every loop. If we were not using a loop here the token, we could have used + s instead of a new variable instead. + . As we use the token for an output function that requires a terminating zero + we copy the token. Alternatively we could store the end of the token, then + replace it with a zero character and replace it after the token has been + used. However that approach is not always acceptable. + +*/ + +#include +#include +#include + +char * tokendup(const char *t, const char *l) +{ + size_t n = l -t + 1; + char *r = (char*)malloc(n); + + memmove(r, t, n-1); + r[n] = '\0'; + return r; +} + +int scan(char *s, int l) +{ + char *p = s; + char *q = 0; + char *t; +#define YYCTYPE char +#define YYCURSOR p +#define YYLIMIT (s+l+2) +#define YYMARKER q +#define YYFILL(n) { printf("OOD\n"); return 2; } + + for(;;) + { + t = p; +/*!re2c + re2c:indent:top = 2; + + DIGIT = [0-9] ; + OCT = "0" DIGIT+ ; + INT = "0" | ( [1-9] DIGIT* ) ; + + OCT { t = tokendup(t, p); printf("Oct: %s\n", t); free(t); continue; } + INT { t = tokendup(t, p); printf("Num: %s\n", t); free(t); continue; } + "+" { printf("+\n"); continue; } + "-" { printf("+\n"); continue; } + "\000" { printf("EOF\n"); return 0; } + [^] { printf("ERR\n"); return 1; } +*/ + } + return 0; +} + +int main(int argc, char **argv) +{ + if (argc > 1) + { + return scan(argv[1], strlen(argv[1])); + } + else + { + fprintf(stderr, "%s \n", argv[0]); + return 0; + } +} diff --git a/tools/re2c/examples/001_upn_calculator/calc_005.re b/tools/re2c/examples/001_upn_calculator/calc_005.re new file mode 100644 index 000000000..6ae2a484c --- /dev/null +++ b/tools/re2c/examples/001_upn_calculator/calc_005.re @@ -0,0 +1,144 @@ +/* re2c lesson 001_upn_calculator, calc_005, (c) M. Boerger 2006 - 2007 */ +/*!ignore:re2c + +- turning this lesson into an easy calculator + . We are going to write an UPN calculator so we need an additional rule to + ignore white space. + . Then we need to store the scanned input somewhere and do our math on it. + . Also we need to scan all arguments since the main c code gets the input + split up into chunks. + . In contrast to what we did before we now add a variable res that holds the + scanner state. We initialize that variable to 0 and quit the loop when it + is non zero. This will also be our return value so that we can use it in + function main to generate error information. + . To support operating systems where ' and " get passed in program arguments + we check for them being first and last input character. If so we correct + input pointer and input length. Since now our scanner might not see a + terminating zero we change YYLIMIT again and drop the special zero rule. +*/ + +#include +#include +#include + +#define DEBUG(stmt) stmt + +int stack[4]; +int depth = 0; + +int push_num(const char *t, const char *l, int radix) +{ + int num = 0; + + if (depth >= sizeof(stack)) + { + return 3; + } + + --t; + while(++t < l) + { + num = num * radix + (*t - '0'); + } + DEBUG(printf("Num: %d\n", num)); + + stack[depth++] = num; + return 0; +} + +int stack_add() +{ + if (depth < 2) return 4; + + --depth; + stack[depth-1] = stack[depth-1] + stack[depth]; + return 0; +} + +int stack_sub() +{ + if (depth < 2) return 4; + + --depth; + stack[depth-1] = stack[depth-1] - stack[depth]; + return 0; +} + +int scan(char *s, int l) +{ + char *p = s; + char *q = 0; + char *t; + int res = 0; + +#define YYCTYPE char +#define YYCURSOR p +#define YYLIMIT (s+l+1) +#define YYMARKER q +#define YYFILL(n) { return depth == 1 ? 0 : 2; } + + while(!res) + { + t = p; +/*!re2c + re2c:indent:top = 2; + + DIGIT = [0-9] ; + OCT = "0" DIGIT+ ; + INT = "0" | ( [1-9] DIGIT* ) ; + WS = [ \t]+ ; + + WS { continue; } + OCT { res = push_num(t, p, 8); continue; } + INT { res = push_num(t, p, 10); continue; } + "+" { res = stack_add(); continue; } + "-" { res = stack_sub(); continue; } + [^] { res = 1; continue; } +*/ + } + return res; +} + +int main(int argc, char **argv) +{ + if (argc > 1) + { + char *inp; + int res = 0, argp = 0, len; + + while(!res && ++argp < argc) + { + inp = argv[argp]; + len = strlen(inp); + if (inp[0] == '\"' && inp[len-1] == '\"') + { + ++inp; + len -=2; + } + res = scan(inp, len); + } + switch(res) + { + case 0: + printf("Result: %d\n", stack[0]); + return 0; + case 1: + fprintf(stderr, "Illegal character in input.\n"); + return 1; + case 2: + fprintf(stderr, "Premature end of input.\n"); + return 2; + case 3: + fprintf(stderr, "Stack overflow.\n"); + return 3; + case 4: + fprintf(stderr, "Stack underflow.\n"); + return 4; + } + } + else + { + fprintf(stderr, "%s \n", argv[0]); + return 0; + } +} diff --git a/tools/re2c/examples/001_upn_calculator/calc_006.s.re b/tools/re2c/examples/001_upn_calculator/calc_006.s.re new file mode 100644 index 000000000..10da31cd1 --- /dev/null +++ b/tools/re2c/examples/001_upn_calculator/calc_006.s.re @@ -0,0 +1,162 @@ +/* re2c lesson 001_upn_calculator, calc_006, (c) M. Boerger 2006 - 2007 */ +/*!ignore:re2c + +- avoiding YYFILL() + . We use the inplace configuration re2c:yyfill to suppress generation of + YYFILL() blocks. This of course means we no longer have to provide the + macro. + . We also drop the YYMARKER stuff since we know that re2c does not generate + it for this example. + . Since re2c does no longer check for out of data situations we must do this. + For that reason we first reintroduce our zero rule and second we need to + ensure that the scanner does not take more than one bytes in one go. + + In the example suppose "0" is passed. The scanner reads the first "0" and + then is in an undecided state. The scanner can earliest decide on the next + char what the token is. In case of a zero the input ends and it was a + number, 0 to be precise. In case of a digit it is an octal number and the + next character needs to be read. In case of any other character the scanner + will detect an error with the any rule [^]. + + Now the above shows that the scanner may read two characters directly. But + only if the first is a "0". So we could easily check that if the first char + is "0" and the next char is a digit then yet another charcter is present. + But we require our inut to be zero terminated. And that means we do not + have to check anything for this scanner. + + However with other rule sets re2c might read more then one character in a + row. In those cases it is normally hard to impossible to avoid YYFILL. + +- optimizing the generated code by using -s command line switch of re2c + . This tells re2c to generate code that uses if statements rather + then endless switch/case expressions where appropriate. Note that the + generated code now requires the input to be unsigned char rather than char + due to the way comparisons are generated. +*/ + +#include +#include +#include + +#define DEBUG(stmt) stmt + +int stack[4]; +int depth = 0; + +int push_num(const unsigned char *t, const unsigned char *l, int radix) +{ + int num = 0; + + if (depth >= sizeof(stack)) + { + return 3; + } + + --t; + while(++t < l) + { + num = num * radix + (*t - (unsigned char)'0'); + } + DEBUG(printf("Num: %d\n", num)); + + stack[depth++] = num; + return 0; +} + +int stack_add() +{ + if (depth < 2) return 4; + + --depth; + stack[depth-1] = stack[depth-1] + stack[depth]; + DEBUG(printf("+\n")); + return 0; +} + +int stack_sub() +{ + if (depth < 2) return 4; + + --depth; + stack[depth-1] = stack[depth-1] - stack[depth]; + DEBUG(printf("-\n")); + return 0; +} + +int scan(char *s) +{ + unsigned char *p = (unsigned char*)s; + unsigned char *t; + int res = 0; + +#define YYCTYPE unsigned char +#define YYCURSOR p + + while(!res) + { + t = p; +/*!re2c + re2c:indent:top = 2; + re2c:yyfill:enable = 0; + + DIGIT = [0-9] ; + OCT = "0" DIGIT+ ; + INT = "0" | ( [1-9] DIGIT* ) ; + WS = [ \t]+ ; + + WS { continue; } + OCT { res = push_num(t, p, 8); continue; } + INT { res = push_num(t, p, 10); continue; } + "+" { res = stack_add(); continue; } + "-" { res = stack_sub(); continue; } + "\000" { res = depth == 1 ? 0 : 2; break; } + [^] { res = 1; continue; } +*/ + } + return res; +} + +int main(int argc, char **argv) +{ + if (argc > 1) + { + char *inp; + int res = 0, argp = 0, len; + + while(!res && ++argp < argc) + { + inp = strdup(argv[argp]); + len = strlen(inp); + if (inp[0] == '\"' && inp[len-1] == '\"') + { + inp[len - 1] = '\0'; + ++inp; + } + res = scan(inp); + free(inp); + } + switch(res) + { + case 0: + printf("Result: %d\n", stack[0]); + return 0; + case 1: + fprintf(stderr, "Illegal character in input.\n"); + return 1; + case 2: + fprintf(stderr, "Premature end of input.\n"); + return 2; + case 3: + fprintf(stderr, "Stack overflow.\n"); + return 3; + case 4: + fprintf(stderr, "Stack underflow.\n"); + return 4; + } + } + else + { + fprintf(stderr, "%s \n", argv[0]); + return 0; + } +} diff --git a/tools/re2c/examples/001_upn_calculator/calc_007.b.re b/tools/re2c/examples/001_upn_calculator/calc_007.b.re new file mode 100644 index 000000000..523819654 --- /dev/null +++ b/tools/re2c/examples/001_upn_calculator/calc_007.b.re @@ -0,0 +1,135 @@ +/* re2c lesson 001_upn_calculator, calc_007, (c) M. Boerger 2006 - 2007 */ +/*!ignore:re2c + +- optimizing the generated code by using -b command line switch of re2c + . This tells re2c to generate code that uses a decision table. The -b switch + also contains the -s behavior. And -b also requires the input to be + unsigned chars. +*/ + +#include +#include +#include + +#define DEBUG(stmt) stmt + +int stack[4]; +int depth = 0; + +int push_num(const unsigned char *t, const unsigned char *l, int radix) +{ + int num = 0; + + if (depth >= sizeof(stack)) + { + return 3; + } + + --t; + while(++t < l) + { + num = num * radix + (*t - (unsigned char)'0'); + } + DEBUG(printf("Num: %d\n", num)); + + stack[depth++] = num; + return 0; +} + +int stack_add() +{ + if (depth < 2) return 4; + + --depth; + stack[depth-1] = stack[depth-1] + stack[depth]; + DEBUG(printf("+\n")); + return 0; +} + +int stack_sub() +{ + if (depth < 2) return 4; + + --depth; + stack[depth-1] = stack[depth-1] - stack[depth]; + DEBUG(printf("+\n")); + return 0; +} + +int scan(char *s) +{ + unsigned char *p = (unsigned char*)s; + unsigned char *t; + int res = 0; + +#define YYCTYPE unsigned char +#define YYCURSOR p + + while(!res) + { + t = p; +/*!re2c + re2c:indent:top = 2; + re2c:yyfill:enable = 0; + + DIGIT = [0-9] ; + OCT = "0" DIGIT+ ; + INT = "0" | ( [1-9] DIGIT* ) ; + WS = [ \t]+ ; + + WS { continue; } + OCT { res = push_num(t, p, 8); continue; } + INT { res = push_num(t, p, 10); continue; } + "+" { res = stack_add(); continue; } + "-" { res = stack_sub(); continue; } + "\000" { res = depth == 1 ? 0 : 2; break; } + [^] { res = 1; continue; } +*/ + } + return res; +} + +int main(int argc, char **argv) +{ + if (argc > 1) + { + char *inp; + int res = 0, argp = 0, len; + + while(!res && ++argp < argc) + { + inp = strdup(argv[argp]); + len = strlen(inp); + if (inp[0] == '\"' && inp[len-1] == '\"') + { + inp[len - 1] = '\0'; + ++inp; + } + res = scan(inp); + free(inp); + } + switch(res) + { + case 0: + printf("Result: %d\n", stack[0]); + return 0; + case 1: + fprintf(stderr, "Illegal character in input.\n"); + return 1; + case 2: + fprintf(stderr, "Premature end of input.\n"); + return 2; + case 3: + fprintf(stderr, "Stack overflow.\n"); + return 3; + case 4: + fprintf(stderr, "Stack underflow.\n"); + return 4; + } + } + else + { + fprintf(stderr, "%s \n", argv[0]); + return 0; + } +} diff --git a/tools/re2c/examples/001_upn_calculator/calc_008.b.re b/tools/re2c/examples/001_upn_calculator/calc_008.b.re new file mode 100644 index 000000000..ed1a088e4 --- /dev/null +++ b/tools/re2c/examples/001_upn_calculator/calc_008.b.re @@ -0,0 +1,158 @@ +/* re2c lesson 001_upn_calculator, calc_008, (c) M. Boerger 2006 - 2007 */ +/*!ignore:re2c + +- using -b with signed character input + . Since the code is being generated with -b switch re2c requires the internal + character variable yych to use an unsigned character type. For that reason + the previous lessons had a conversion at the beginning of their scan() + function. Other re2c generated code often have the scanners work completely + on unsigned input. Thus requesting a conversion. + + To avoid the conversion on input, re2c allows to do the conversion when + reading the internal yych variable. To enable that conversion you need to + use the implace configuration 're2c:yych:conversion' and set it to 1. This + will change the generated code to insert conversions to YYCTYPE whenever + yych is being read. + +- More inplace configurations for better/nicer code + . re2c allows to overwrite the generation of any define, label or variable + used in the generated code. For example we overwrite the 'yych' variable + name to 'curr' using inplace configuration 're2c:variable:yych = curr;'. + + . We further more use inplace configurations instead of defines. This allows + to use correct conversions to 'unsigned char' instead of having to convert + to 'YYCTYPE' when placing 're2c:define:YYCTYPE = "unsigned char";' infront + of 're2c:yych:conversion'. Note that we have to use apostrophies for the + first setting as it contains a space. + + . Last but not least we use 're2c:labelprefix = scan' to change the prefix + of generated labels. +*/ + +#include +#include +#include + +#define DEBUG(stmt) stmt + +int stack[4]; +int depth = 0; + +int push_num(const char *t, const char *l, int radix) +{ + int num = 0; + + if (depth >= sizeof(stack)) + { + return 3; + } + + --t; + while(++t < l) + { + num = num * radix + (*t - '0'); + } + DEBUG(printf("Num: %d\n", num)); + + stack[depth++] = num; + return 0; +} + +int stack_add() +{ + if (depth < 2) return 4; + + --depth; + stack[depth-1] = stack[depth-1] + stack[depth]; + DEBUG(printf("+\n")); + return 0; +} + +int stack_sub() +{ + if (depth < 2) return 4; + + --depth; + stack[depth-1] = stack[depth-1] - stack[depth]; + DEBUG(printf("+\n")); + return 0; +} + +int scan(char *p) +{ + char *t; + int res = 0; + + while(!res) + { + t = p; +/*!re2c + re2c:define:YYCTYPE = "unsigned char"; + re2c:define:YYCURSOR = p; + re2c:variable:yych = curr; + re2c:indent:top = 2; + re2c:yyfill:enable = 0; + re2c:yych:conversion = 1; + re2c:labelprefix = scan; + + DIGIT = [0-9] ; + OCT = "0" DIGIT+ ; + INT = "0" | ( [1-9] DIGIT* ) ; + WS = [ \t]+ ; + + WS { continue; } + OCT { res = push_num(t, p, 8); continue; } + INT { res = push_num(t, p, 10); continue; } + "+" { res = stack_add(); continue; } + "-" { res = stack_sub(); continue; } + "\000" { res = depth == 1 ? 0 : 2; break; } + [^] { res = 1; continue; } +*/ + } + return res; +} + +int main(int argc, char **argv) +{ + if (argc > 1) + { + char *inp; + int res = 0, argp = 0, len; + + while(!res && ++argp < argc) + { + inp = strdup(argv[argp]); + len = strlen(inp); + if (inp[0] == '\"' && inp[len-1] == '\"') + { + inp[len - 1] = '\0'; + ++inp; + } + res = scan(inp); + free(inp); + } + switch(res) + { + case 0: + printf("Result: %d\n", stack[0]); + return 0; + case 1: + fprintf(stderr, "Illegal character in input.\n"); + return 1; + case 2: + fprintf(stderr, "Premature end of input.\n"); + return 2; + case 3: + fprintf(stderr, "Stack overflow.\n"); + return 3; + case 4: + fprintf(stderr, "Stack underflow.\n"); + return 4; + } + } + else + { + fprintf(stderr, "%s \n", argv[0]); + return 0; + } +} diff --git a/tools/re2c/examples/001_upn_calculator/windows/HiResTimer.h b/tools/re2c/examples/001_upn_calculator/windows/HiResTimer.h new file mode 100644 index 000000000..585a1d98f --- /dev/null +++ b/tools/re2c/examples/001_upn_calculator/windows/HiResTimer.h @@ -0,0 +1,54 @@ +/** + * @file HiResTimer.h + * @brief + * @note + */ + +#ifndef _HI_RES_TIMER_H_ +#define _HI_RES_TIMER_H_ + +#ifdef WIN32 +#include // probably already done in stdafx.h +static LARGE_INTEGER start; +static LARGE_INTEGER stop; +static LARGE_INTEGER freq; +static _int64 elapsedCounts; +static double elapsedMillis; +static double elapsedMicros; +static HANDLE processHandle; +static DWORD prevPriorityClass; + +void HrtInit() +{ + processHandle = GetCurrentProcess(); + prevPriorityClass = GetPriorityClass(processHandle); + QueryPerformanceFrequency(&freq); +} + +void HrtStart() +{ + QueryPerformanceCounter(&start); +} + +void HrtSetPriority(DWORD priority) +{ + int flag; + prevPriorityClass = GetPriorityClass(processHandle); + flag = SetPriorityClass(processHandle, priority); +} + +void HrtResetPriority(void) +{ + int flag = SetPriorityClass(processHandle, prevPriorityClass); +} + +double HrtElapsedMillis() +{ + QueryPerformanceCounter(&stop); + elapsedCounts = (stop.QuadPart - start.QuadPart); + elapsedMillis = ((elapsedCounts * 1000.0) / freq.QuadPart); + return elapsedMillis; +} + +#endif +#endif \ No newline at end of file diff --git a/tools/re2c/examples/001_upn_calculator/windows/main.b.re b/tools/re2c/examples/001_upn_calculator/windows/main.b.re new file mode 100644 index 000000000..1600b83d2 --- /dev/null +++ b/tools/re2c/examples/001_upn_calculator/windows/main.b.re @@ -0,0 +1,291 @@ +/* re2c lesson 001_upn_calculator, main.b.re, (c) M. Boerger, L. Allan 2006 */ +/*!ignore:re2c + +- basic interface for string reading + + . We define the macros YYCTYPE, YYCURSOR, YYLIMIT, YYMARKER, YYFILL + . YYCTYPE is the type re2c operates on or in other words the type that + it generates code for. While it is not a big difference when we were + using 'unsigned char' here we would need to run re2c with option -w + to fully support types with sieof() > 1. + . YYCURSOR is used internally and holds the current scanner position. In + expression handlers, the code blocks after re2c expressions, this can be + used to identify the end of the token. + . YYMARKER is not always being used so we set an initial value to avoid + a compiler warning. + . YYLIMIT stores the end of the input. Unfortunatley we have to use strlen() + in this lesson. In the next example we see one way to get rid of it. + . We use a 'for(;;)'-loop around the scanner block. We could have used a + 'while(1)'-loop instead but some compilers generate a warning for it. + . To make the output more readable we use 're2c:indent:top' scanner + configuration that configures re2c to prepend a single tab (the default) + to the beginning of each output line. + . The following lines are expressions and for each expression we output the + token name and continue the scanner loop. + . The second last token detects the end of our input, the terminating zero in + our input string. In other scanners detecting the end of input may vary. + For example binary code may contain \0 as valid input. + . The last expression accepts any input character. It tells re2c to accept + the opposit of the empty range. This includes numbers and our tokens but + as re2c goes from top to botton when evaluating the expressions this is no + problem. + . The first three rules show that re2c actually prioritizes the expressions + from top to bottom. Octal number require a starting "0" and the actual + number. Normal numbers start with a digit greater 0. And zero is finally a + special case. A single "0" is detected by the last rule of this set. And + valid ocal number is already being detected by the first rule. This even + includes multi "0" sequences that in octal notation also means zero. + Another way would be to only use two rules: + "0" [0-9]+ + "0" | ( [1-9] [0-9]* ) + A full description of re2c rule syntax can be found in the manual. +*/ + +#define VC_EXTRALEAN // Exclude rarely-used stuff from Windows headers + +#if _MSC_VER > 1200 +#define WINVER 0x0400 // Change this to the appropriate value to target Windows 98 and Windows 2000 or later. +#endif // Prevents warning from vc7.1 complaining about redefinition + +#include +#include +#include +#include +#include +#include "HiResTimer.h" + +static char gTestBuf[1000] = ""; + +/** + * @brief Setup HiResolution timer and confirm it is working ok + */ +void InitHiResTimerAndVerifyWorking(void) +{ + double elapsed; + HrtInit(); + HrtSetPriority(ABOVE_NORMAL_PRIORITY_CLASS); + HrtStart(); + Sleep(100); + elapsed = HrtElapsedMillis(); + if ((elapsed < 90) || (elapsed > 110)) { + printf("HiResTimer misbehaving: %f\n", elapsed); + exit(2); + } +} + +/** + * @brief Scan for numbers in different formats + */ +int ScanFullSpeed(char *pzStrToScan, size_t lenStrToScan) +{ + unsigned char *pzCurScanPos = (unsigned char*)pzStrToScan; + unsigned char *pzBacktrackInfo = 0; +#define YYCTYPE unsigned char +#define YYCURSOR pzCurScanPos +#define YYLIMIT (pzStrToScan+lenStrToScan) +#define YYMARKER pzBacktrackInfo +#define YYFILL(n) + + for(;;) + { +/*!re2c + re2c:indent:top = 2; + [1-9][0-9]* { continue; } + [0][0-9]+ { continue; } + "+" { continue; } + "-" { continue; } + "\000" { return 0; } + [^] { return 1; } +*/ + } +} + +/** + * @brief Scan for numbers in different formats + */ +int scan(char *pzStrToScan, size_t lenStrToScan) +{ + unsigned char *pzCurScanPos = (unsigned char*)pzStrToScan; + unsigned char *pzBacktrackInfo = 0; +#define YYCTYPE unsigned char +#define YYCURSOR pzCurScanPos +#define YYLIMIT (pzStrToScan+lenStrToScan) +#define YYMARKER pzBacktrackInfo +#define YYFILL(n) + + for(;;) + { +/*!re2c + re2c:indent:top = 2; + [1-9][0-9]* { printf("Num\n"); strcat(gTestBuf, "Num "); continue; } + [0][0-9]+ { printf("Oct\n"); strcat(gTestBuf, "Oct "); continue; } + "+" { printf("+\n"); strcat(gTestBuf, "+ "); continue; } + "-" { printf("-\n"); strcat(gTestBuf, "- "); continue; } + "\000" { printf("EOF\n"); return 0; } + [^] { printf("ERR\n"); strcat(gTestBuf, "ERR "); return 1; } +*/ + } +} + +/** + * @brief Show high resolution elapsed time for 10,000 and 100,000 loops + */ +void DoTimingsOfStrnCmp(void) +{ + char testStr[] = "Hello, world"; + int totLoops = 10000; + int totFoundCount = 0; + int foundCount = 0; + int loop; + int rc; + const int progressAnd = 0xFFFFF000; + double elapsed; + + printf("\n\n%d loops with * every %d loops to confirm\n", totLoops, ((~progressAnd) + 1)); + + HrtStart(); + for (loop = 0; loop < totLoops; ++loop) { + foundCount = 0; + rc = strncmp(testStr, "Hello", 5); + if (rc == 0) { + foundCount++; + totFoundCount++; + if ((totFoundCount & progressAnd) == totFoundCount) { + printf("*"); + } + } + } + elapsed = HrtElapsedMillis(); + printf("\nstrncmp Elapsed for %7d loops milliseconds: %7.3f\n", totLoops, elapsed); + printf("FoundCount each loop: %d\n", foundCount); + printf("TotalFoundCount for all loops: %d\n", totFoundCount); + + totLoops = 100000; + HrtStart(); + for (loop = 0; loop < totLoops; ++loop) { + foundCount = 0; + rc = strncmp(testStr, "Hello", 5); + if (rc == 0) { + foundCount++; + totFoundCount++; + if ((totFoundCount & progressAnd) == totFoundCount) { + printf("*"); + } + } + } + elapsed = HrtElapsedMillis(); + printf("\nstrncmp Elapsed for %7d loops milliseconds: %7.3f\n", totLoops, elapsed); + printf("FoundCount each loop: %d\n", foundCount); + printf("TotalFoundCount for all loops: %d\n", totFoundCount); +} + +/** + * @brief Show high resolution elapsed time for 10,000 and 100,000 loops + */ +void DoTimingsOfRe2c(void) +{ + char* testStrings[] = { "123", "1234", "+123", "01234", "-04321", "abc", "123abc" }; + const int testCount = sizeof(testStrings) / sizeof(testStrings[0]); + int i; + int totLoops = 10000 / testCount; // Doing more than one per loop + int totFoundCount = 0; + int foundCount = 0; + int loop; + int rc; + const int progressAnd = 0xFFFFF000; + double elapsed; + + printf("\n\n%d loops with * every %d loops to confirm\n", totLoops, ((~progressAnd) + 1)); + + HrtStart(); + for (loop = 0; loop < totLoops; ++loop) { + foundCount = 0; + strcpy(gTestBuf, ""); + for (i = 0; i < testCount; ++i) { + char* pzCurStr = testStrings[i]; + size_t len = strlen(pzCurStr); // Calc of strlen slows things down ... std::string? + rc = ScanFullSpeed(pzCurStr, len); + if (rc == 0) { + foundCount++; + totFoundCount++; + if ((totFoundCount & progressAnd) == totFoundCount) { + printf("*"); + } + } + } + } + elapsed = HrtElapsedMillis(); + printf("\nRe2c Elapsed for %7d loops milliseconds: %7.3f\n", totLoops, elapsed); + printf("FoundCount each loop: %d\n", foundCount); + printf("TotalFoundCount for all loops: %d\n", totFoundCount); + + totLoops = 100000 / testCount; + printf("\n\n%d loops with * every %d loops to confirm\n", totLoops, ((~progressAnd) + 1)); + + HrtStart(); + for (loop = 0; loop < totLoops; ++loop) { + foundCount = 0; + strcpy(gTestBuf, ""); + for (i = 0; i < testCount; ++i) { + char* pzCurStr = testStrings[i]; + size_t len = strlen(pzCurStr); // Calc of strlen slows things down ... std::string? + rc = ScanFullSpeed(pzCurStr, len); + if (rc == 0) { + foundCount++; + totFoundCount++; + if ((totFoundCount & progressAnd) == totFoundCount) { + printf("*"); + } + } + } + } + elapsed = HrtElapsedMillis(); + printf("\nRe2c Elapsed for %7d loops milliseconds: %7.3f\n", totLoops, elapsed); + printf("FoundCount each loop: %d\n", foundCount); + printf("TotalFoundCount for all loops: %d\n", totFoundCount); +} + +/** + * @brief Entry point for console app + */ +int main(int argc, char **argv) +{ + char testStr_A[] = "123"; + char* testStr_B = "456"; + char* testStrings[] = { "123", "1234", "+123", "01234", "-04321", "abc", "123abc" }; + const int testCount = sizeof(testStrings) / sizeof(testStrings[0]); + int i; + + int rc = scan(testStr_A, 3); + printf("rc: %d\n", rc); + + rc = scan(testStr_B, 3); + printf("rc: %d\n", rc); + + rc = scan("789", 3); + printf("rc: %d\n", rc); + + strcpy(gTestBuf, ""); + for (i = 0; i < testCount; ++i) { + char* pzCurStr = testStrings[i]; + size_t len = strlen(pzCurStr); + scan(pzCurStr, len); + } + printf("%s\n", gTestBuf); + rc = strcmp(gTestBuf, "Num Num + Num Oct - Oct ERR Num ERR "); + if (rc == 0) { + printf("Success\n"); + } + else { + printf("Failure\n"); + } + assert(0 == rc); // Doesn't work with Release build + + InitHiResTimerAndVerifyWorking(); + + DoTimingsOfStrnCmp(); + + DoTimingsOfRe2c(); + + return 0; +} diff --git a/tools/re2c/examples/002_strip_comments/README b/tools/re2c/examples/002_strip_comments/README new file mode 100644 index 000000000..353d66904 --- /dev/null +++ b/tools/re2c/examples/002_strip_comments/README @@ -0,0 +1,21 @@ +re2c lesson 002_strip_comments, (c) M. Boerger 2006 + +In this lesson you will learn how to use multiple scanner blocks and how to +read the input from a file instead of a zero terminated string. In the end you +will have a scanner that filters comments out of c source files but keeps re2c +comments. + +The first scanner can be generated with: + + re2c -s -o t.c strip_001.s.re + +In the second step we will learn about YYMARKER that stores backtracking +information. + + re2c -s -0 t.c strip_002.b.re + +The third step brings trailing contexts that are stored in YYCTXMARKER. We also +change to use -b instead of -s option since the scanner gets more and more +complex. + + re2c -b -0 t.c strip_002.b.re diff --git a/tools/re2c/examples/002_strip_comments/strip_001.s.re b/tools/re2c/examples/002_strip_comments/strip_001.s.re new file mode 100644 index 000000000..5525ae3c9 --- /dev/null +++ b/tools/re2c/examples/002_strip_comments/strip_001.s.re @@ -0,0 +1,147 @@ +/* re2c lesson 002_strip_comments, strip_001.s, (c) M. Boerger 2006 - 2007 */ +/*!ignore:re2c + +- basic interface for file reading + . This scanner will read chunks of input from a file. The easiest way would + be to read the whole file into a memory buffer and use that a zero + terminated string. + . Instead we want to read input chunks of a reasonable size as they are neede + by the scanner. Thus we basically need YYFILL(n) to call fread(n). + . Before we provide a buffer that we constantly reallocate we instead use + one buffer that we get from the stack or global memory just once. When we + reach the end of the buffer we simply move the beginning of our input + that is somewhere in our buffer to the beginning of our buffer and then + append the next chunk of input to the correct end inside our buffer. + . As re2c scanners might read more than one character we need to ensure our + buffer is long enough. We can use re2c to inform about the maximum size + by placing a "!max:re2c" comment somewhere. This gets translated to a + "#define YYMAXFILL " line where is the maximum length value. This + define can be used as precompiler condition. + +- multiple scanner blocks + . We use a main scanner block that outputs every input character unless the + input is two /s or a / followed by a *. In the latter two cases we switch + to a special c++ comment and a comment block respectively. + . Both special blocks simply detect their end ignore any other character. + . The c++ block is a bit special. Since the terminating new line needs to + be output and that can either be a new line or a carridge return followed + by a new line. + . In order to ensure that we do not read behind our buffer we reset the token + pointer to the cursor on every scanner run. +*/ + +#include +#include +#include + +/*!max:re2c */ +#define BSIZE 128 + +#if BSIZE < YYMAXFILL +# error BSIZE must be greater YYMAXFILL +#endif + +#define YYCTYPE unsigned char +#define YYCURSOR s.cur +#define YYLIMIT s.lim +#define YYFILL(n) { if ((res = fill(&s, n)) >= 0) break; } + +typedef struct Scanner +{ + FILE *fp; + unsigned char *cur, *tok, *lim, *eof; + unsigned char buffer[BSIZE]; +} Scanner; + +int fill(Scanner *s, int len) +{ + if (!len) + { + s->cur = s->tok = s->lim = s->buffer; + s->eof = 0; + } + if (!s->eof) + { + int got, cnt = s->tok - s->buffer; + + if (cnt > 0) + { + memcpy(s->buffer, s->tok, s->lim - s->tok); + s->tok -= cnt; + s->cur -= cnt; + s->lim -= cnt; + } + cnt = BSIZE - cnt; + if ((got = fread(s->lim, 1, cnt, s->fp)) != cnt) + { + s->eof = &s->lim[got]; + } + s->lim += got; + } + else if (s->cur + len > s->eof) + { + return 0; /* not enough input data */ + } + return -1; +} + +int scan(FILE *fp) +{ + int res = 0; + Scanner s; + + if (!fp) + { + return 1; /* no file was opened */ + } + + s.fp = fp; + + fill(&s, 0); + + for(;;) + { + s.tok = s.cur; +/*!re2c + re2c:indent:top = 2; + + NL = "\r"? "\n" ; + ANY = [^] ; + + "/" "/" { goto cppcomment; } + "/" "*" { goto comment; } + ANY { fputc(*s.tok, stdout); continue; } +*/ +comment: + s.tok = s.cur; +/*!re2c + "*" "/" { continue; } + ANY { goto comment; } +*/ +cppcomment: + s.tok = s.cur; +/*!re2c + NL { fwrite(s.tok, 1, s.cur - s.tok, stdout); continue; } + ANY { goto cppcomment; } +*/ + } + + if (fp != stdin) + { + fclose(fp); /* close only if not stdin */ + } + return res; /* return result */ +} + +int main(int argc, char **argv) +{ + if (argc > 1) + { + return scan(!strcmp(argv[1], "-") ? stdin : fopen(argv[1], "r")); + } + else + { + fprintf(stderr, "%s \n", argv[0]); + return 1; + } +} diff --git a/tools/re2c/examples/002_strip_comments/strip_002.s.re b/tools/re2c/examples/002_strip_comments/strip_002.s.re new file mode 100644 index 000000000..3c2a6cf8c --- /dev/null +++ b/tools/re2c/examples/002_strip_comments/strip_002.s.re @@ -0,0 +1,162 @@ +/* re2c lesson 002_strip_comments, strip_002.s, (c) M. Boerger 2006 - 2007 */ +/*!ignore:re2c + +- complexity + . When a comment is preceeded by a new line and followed by whitespace and a + new line then we can drop the trailing whitespace and new line. + . Additional to what we strip out already what about two consequtive comment + blocks? When two comments are only separated by whitespace we want to drop + both. In other words when detecting the end of a comment block we need to + check whether it is followed by only whitespace and the a new comment in + which case we continure ignoring the input. If it is followed only by white + space and a new line we strip out the new white space and new line. In any + other case we start outputting all that follows. + But we cannot simply use the following two rules: + "*" "/" WS* "/" "*" { continue; } + "*" "/" WS* NL { continue; } + The main problem is that WS* can get bigger then our buffer, so we need a + new scanner. + . Meanwhile our scanner gets a bit more complex and we have to add two more + things. First the scanner code now uses a YYMARKER to store backtracking + information. + +- backtracking information + . When the scanner has two rules that can have the same beginning but a + different ending then it needs to store the position that identifies the + common part. This is called backtracking. As mentioned above re2c expects + you to provide compiler define YYMARKER and a pointer variable. + . When shifting buffer contents as done in our fill function the marker needs + to be corrected, too. + +*/ + +#include +#include +#include + +/*!max:re2c */ +#define BSIZE 128 + +#if BSIZE < YYMAXFILL +# error BSIZE must be greater YYMAXFILL +#endif + +#define YYCTYPE unsigned char +#define YYCURSOR s.cur +#define YYLIMIT s.lim +#define YYMARKER s.mrk +#define YYFILL(n) { if ((res = fill(&s, n)) >= 0) break; } + +typedef struct Scanner +{ + FILE *fp; + unsigned char *cur, *tok, *lim, *eof, *mrk; + unsigned char buffer[BSIZE]; +} Scanner; + +int fill(Scanner *s, int len) +{ + if (!len) + { + s->cur = s->tok = s->lim = s->mrk = s->buffer; + s->eof = 0; + } + if (!s->eof) + { + int got, cnt = s->tok - s->buffer; + + if (cnt > 0) + { + memcpy(s->buffer, s->tok, s->lim - s->tok); + s->tok -= cnt; + s->cur -= cnt; + s->lim -= cnt; + s->mrk -= cnt; + } + cnt = BSIZE - cnt; + if ((got = fread(s->lim, 1, cnt, s->fp)) != cnt) + { + s->eof = &s->lim[got]; + } + s->lim += got; + } + else if (s->cur + len > s->eof) + { + return 0; /* not enough input data */ + } + return -1; +} + +void echo(Scanner *s) +{ + fwrite(s->tok, 1, s->cur - s->tok, stdout); +} + +int scan(FILE *fp) +{ + int res = 0; + Scanner s; + + if (!fp) + { + return 1; /* no file was opened */ + } + + s.fp = fp; + + fill(&s, 0); + + for(;;) + { + s.tok = s.cur; +/*!re2c + re2c:indent:top = 2; + + NL = "\r"? "\n" ; + WS = [\r\n\t ] ; + ANY = [^] ; + + "/" "/" { goto cppcomment; } + "/" "*" { goto comment; } + ANY { fputc(*s.tok, stdout); continue; } +*/ +comment: + s.tok = s.cur; +/*!re2c + "*" "/" { goto commentws; } + ANY { goto comment; } +*/ +commentws: + s.tok = s.cur; +/*!re2c + NL { echo(&s); continue; } + WS { goto commentws; } + ANY { echo(&s); continue; } +*/ +cppcomment: + s.tok = s.cur; +/*!re2c + NL { echo(&s); continue; } + ANY { goto cppcomment; } +*/ + } + + if (fp != stdin) + { + fclose(fp); /* close only if not stdin */ + } + return res; /* return result */ +} + +int main(int argc, char **argv) +{ + if (argc > 1) + { + return scan(!strcmp(argv[1], "-") ? stdin : fopen(argv[1], "r")); + } + else + { + fprintf(stderr, "%s \n", argv[0]); + return 1; + } +} diff --git a/tools/re2c/examples/002_strip_comments/strip_003.b.re b/tools/re2c/examples/002_strip_comments/strip_003.b.re new file mode 100644 index 000000000..a7b1a5c72 --- /dev/null +++ b/tools/re2c/examples/002_strip_comments/strip_003.b.re @@ -0,0 +1,179 @@ +/* re2c lesson 002_strip_comments, strip_003.b, (c) M. Boerger 2006 - 2007 */ +/*!ignore:re2c + +- more complexity + . Right now we strip out trailing white space and new lines after a comment + block. This can be a problem when the comment block was not preceeded by + a new line. + . The solution is to use trailing contexts. + +- trailing contexts + . Re2c allows to check for a portion of input and only recognize it when it + is followed by another portion. This is called a trailing context. + . The trailing context is not part of the identified input. That means that + it follows exactly at the cursor. A consequence is that the scanner has + already read more input and on the next run you need to restore begining + of input, in our case s.tok, from the cursor, here s.cur, rather then + restoring to the beginning of the buffer. This way the scanner can reuse + the portion it has already read. + . The position of the trailing context is stored in YYCTXMARKER for which + a pointer variable needs to be provided. + . As with YYMARKER the corrsponding variable needs to be corrected if we + shift in some buffer. + . Still this is not all we need to solve the problem. What is left is that + the information whether we detected a trailing context was detected has to + be stored somewhere. This is done by the new variable nlcomment. + +- formatting + . Until now we only used single line expression code and we always had the + opening { on the same line as the rule itself. If we have multiline rule + code and care for formatting we can no longer rely on re2c. Now we have + to indent the rule code ourself. Also we need to take care of the opening + {. If we keep it on the same line as the rule then re2c will indent it + correctly and the emitted #line informations will be correct. If we place + it on the next line then the #line directive will also point to that line + and not to the rule. +*/ + +#include +#include +#include + +/*!max:re2c */ +#define BSIZE 128 + +#if BSIZE < YYMAXFILL +# error BSIZE must be greater YYMAXFILL +#endif + +#define YYCTYPE unsigned char +#define YYCURSOR s.cur +#define YYLIMIT s.lim +#define YYMARKER s.mrk +#define YYCTXMARKER s.ctx +#define YYFILL(n) { if ((res = fill(&s, n)) >= 0) break; } + +typedef struct Scanner +{ + FILE *fp; + unsigned char *cur, *tok, *lim, *eof, *ctx, *mrk; + unsigned char buffer[BSIZE]; +} Scanner; + +int fill(Scanner *s, int len) +{ + if (!len) + { + s->cur = s->tok = s->lim = s->mrk = s->buffer; + s->eof = 0; + } + if (!s->eof) + { + int got, cnt = s->tok - s->buffer; + + if (cnt > 0) + { + memcpy(s->buffer, s->tok, s->lim - s->tok); + s->tok -= cnt; + s->cur -= cnt; + s->lim -= cnt; + s->mrk -= cnt; + s->ctx -= cnt; + } + cnt = BSIZE - cnt; + if ((got = fread(s->lim, 1, cnt, s->fp)) != cnt) + { + s->eof = &s->lim[got]; + } + s->lim += got; + } + else if (s->cur + len > s->eof) + { + return 0; /* not enough input data */ + } + return -1; +} + +void echo(Scanner *s) +{ + fwrite(s->tok, 1, s->cur - s->tok, stdout); +} + +int scan(FILE *fp) +{ + int res = 0; + int nlcomment = 0; + Scanner s; + + if (!fp) + { + return 1; /* no file was opened */ + } + + s.fp = fp; + + fill(&s, 0); + + for(;;) + { + s.tok = s.cur; +/*!re2c + re2c:indent:top = 2; + + NL = "\r"? "\n" ; + WS = [\r\n\t ] ; + ANY = [^] ; + + "/" "/" { goto cppcomment; } + NL / "/""*" { echo(&s); nlcomment = 1; continue; } + "/" "*" { goto comment; } + ANY { fputc(*s.tok, stdout); continue; } +*/ +comment: + s.tok = s.cur; +/*!re2c + "*" "/" { goto commentws; } + ANY { goto comment; } +*/ +commentws: + s.tok = s.cur; +/*!re2c + NL? "/" "*" { goto comment; } + NL { + if (!nlcomment) + { + echo(&s); + } + nlcomment = 0; + continue; + } + WS { goto commentws; } + ANY { echo(&s); nlcomment = 0; continue; } +*/ +cppcomment: + s.tok = s.cur; +/*!re2c + NL { echo(&s); continue; } + ANY { goto cppcomment; } +*/ + } + + if (fp != stdin) + { + fclose(fp); /* close only if not stdin */ + } + return res; /* return result */ +} + +int main(int argc, char **argv) +{ + if (argc > 1) + { + return scan(!strcmp(argv[1], "-") ? stdin : fopen(argv[1], "r")); + } + else + { + fprintf(stderr, "%s \n", argv[0]); + return 1; + } +} diff --git a/tools/re2c/examples/input_custom/fixed.re b/tools/re2c/examples/input_custom/fixed.re new file mode 100644 index 000000000..51f3b2b00 --- /dev/null +++ b/tools/re2c/examples/input_custom/fixed.re @@ -0,0 +1,35 @@ +// Build with "--input custom" re2c switch. +// +// This is an example of handling fixed-length buffer with "--input custom": +// on each YYPEEK we check for the end of input, thus YYFILL generation +// can be safely suppressed. +// +// Note that YYLIMIT points not to terminating NULL, but to the previous +// character: we emulate the case when input has no terminating NULL. +// +// For a real-life example see https://github.com/sopyer/mjson +// or mjson.re from re2c test collection. + +bool lex (const char * cursor, const char * const limit) +{ + const char * marker; + const char * ctxmarker; +# define YYCTYPE char +# define YYPEEK() (cursor >= limit ? 0 : *cursor) +# define YYSKIP() ++cursor +# define YYBACKUP() marker = cursor +# define YYBACKUPCTX() ctxmarker = cursor +# define YYRESTORE() cursor = marker +# define YYRESTORECTX() cursor = ctxmarker + /*!re2c + re2c:yyfill:enable = 0; + "int buffer " / "[" [0-9]+ "]" { return true; } + * { return false; } + */ +} + +int main () +{ + char buffer [] = "int buffer [1024]"; + return !lex (buffer, buffer + sizeof (buffer) - 1); +} diff --git a/tools/re2c/examples/input_custom/simple/README b/tools/re2c/examples/input_custom/simple/README new file mode 100644 index 000000000..c0c4d955a --- /dev/null +++ b/tools/re2c/examples/input_custom/simple/README @@ -0,0 +1,20 @@ +Build with "--input custom" re2c switch. + +These are three examples of "--input custom" usage: + +- input_custom_default.re: + implements default re2c input model (pointers to plain buffer) + +- input_custom_fgetc: + implements C-style file input (using ) + +- input_custom_fgetc: + implements std::istringstream input + +Note that these examples are very simple and don't need +to implement YYFILL; the only reason they don't use +"re2c:yyfill:enable = 0;" is to keep YYLESSTHAN and YYLIMIT +(for the sake of example). + +In real-life programs one will need to care for correct +end-of-input handling. diff --git a/tools/re2c/examples/input_custom/simple/default.re b/tools/re2c/examples/input_custom/simple/default.re new file mode 100644 index 000000000..94cde7cd6 --- /dev/null +++ b/tools/re2c/examples/input_custom/simple/default.re @@ -0,0 +1,24 @@ +bool lex (const char * cursor, const char * const limit) +{ + const char * marker; + const char * ctxmarker; +# define YYCTYPE char +# define YYPEEK() *cursor +# define YYSKIP() ++cursor +# define YYBACKUP() marker = cursor +# define YYBACKUPCTX() ctxmarker = cursor +# define YYRESTORE() cursor = marker +# define YYRESTORECTX() cursor = ctxmarker +# define YYLESSTHAN(n) limit - cursor < n +# define YYFILL(n) {} + /*!re2c + "int buffer " / "[" [0-9]+ "]" { return true; } + * { return false; } + */ +} + +int main () +{ + char buffer [] = "int buffer [1024]"; + return !lex (buffer, buffer + sizeof (buffer)); +} diff --git a/tools/re2c/examples/input_custom/simple/fgetc.re b/tools/re2c/examples/input_custom/simple/fgetc.re new file mode 100644 index 000000000..d2dffd9a5 --- /dev/null +++ b/tools/re2c/examples/input_custom/simple/fgetc.re @@ -0,0 +1,43 @@ +#include + +char peek (FILE * f) +{ + char c = fgetc (f); + ungetc (c, f); + return c; +} + +bool lex (FILE * f, const long limit) +{ + long marker; + long ctxmarker; +# define YYCTYPE char +# define YYPEEK() peek (f) +# define YYSKIP() fgetc (f) +# define YYBACKUP() marker = ftell (f) +# define YYBACKUPCTX() ctxmarker = ftell (f) +# define YYRESTORE() fseek (f, marker, SEEK_SET) +# define YYRESTORECTX() fseek (f, ctxmarker, SEEK_SET) +# define YYLESSTHAN(n) limit - ftell (f) < n +# define YYFILL(n) {} + /*!re2c + "int buffer " / "[" [0-9]+ "]" { return true; } + * { return false; } + */ +} + +int main () +{ + const char buffer [] = "int buffer [1024]"; + const char fn [] = "input.txt"; + + FILE * f = fopen (fn, "w"); + fwrite (buffer, 1, sizeof (buffer), f); + fclose (f); + + f = fopen (fn, "rb"); + int result = !lex (f, sizeof (buffer)); + fclose (f); + + return result; +} diff --git a/tools/re2c/examples/input_custom/simple/istringstream.re b/tools/re2c/examples/input_custom/simple/istringstream.re new file mode 100644 index 000000000..5d702291e --- /dev/null +++ b/tools/re2c/examples/input_custom/simple/istringstream.re @@ -0,0 +1,27 @@ +#include + +bool lex (std::istringstream & is, const std::streampos limit) +{ + std::streampos marker; + std::streampos ctxmarker; +# define YYCTYPE char +# define YYPEEK() is.peek () +# define YYSKIP() is.ignore () +# define YYBACKUP() marker = is.tellg () +# define YYBACKUPCTX() ctxmarker = is.tellg () +# define YYRESTORE() is.seekg (marker) +# define YYRESTORECTX() is.seekg (ctxmarker) +# define YYLESSTHAN(n) limit - is.tellg () < n +# define YYFILL(n) {} + /*!re2c + "int buffer " / "[" [0-9]+ "]" { return true; } + * { return false; } + */ +} + +int main () +{ + const char buffer [] = "int buffer [1024]"; + std::istringstream is (buffer); + return !lex (is, sizeof (buffer)); +} diff --git a/tools/re2c/examples/langs/c.re b/tools/re2c/examples/langs/c.re new file mode 100644 index 000000000..7e413e2d0 --- /dev/null +++ b/tools/re2c/examples/langs/c.re @@ -0,0 +1,272 @@ +#include +#include +#include + +#define ADDEQ 257 +#define ANDAND 258 +#define ANDEQ 259 +#define ARRAY 260 +#define ASM 261 +#define AUTO 262 +#define BREAK 263 +#define CASE 264 +#define CHAR 265 +#define CONST 266 +#define CONTINUE 267 +#define DECR 268 +#define DEFAULT 269 +#define DEREF 270 +#define DIVEQ 271 +#define DO 272 +#define DOUBLE 273 +#define ELLIPSIS 274 +#define ELSE 275 +#define ENUM 276 +#define EQL 277 +#define EXTERN 278 +#define FCON 279 +#define FLOAT 280 +#define FOR 281 +#define FUNCTION 282 +#define GEQ 283 +#define GOTO 284 +#define ICON 285 +#define ID 286 +#define IF 287 +#define INCR 288 +#define INT 289 +#define LEQ 290 +#define LONG 291 +#define LSHIFT 292 +#define LSHIFTEQ 293 +#define MODEQ 294 +#define MULEQ 295 +#define NEQ 296 +#define OREQ 297 +#define OROR 298 +#define POINTER 299 +#define REGISTER 300 +#define RETURN 301 +#define RSHIFT 302 +#define RSHIFTEQ 303 +#define SCON 304 +#define SHORT 305 +#define SIGNED 306 +#define SIZEOF 307 +#define STATIC 308 +#define STRUCT 309 +#define SUBEQ 310 +#define SWITCH 311 +#define TYPEDEF 312 +#define UNION 313 +#define UNSIGNED 314 +#define VOID 315 +#define VOLATILE 316 +#define WHILE 317 +#define XOREQ 318 +#define EOI 319 + +typedef unsigned int uint; +typedef unsigned char uchar; + +#define BSIZE 8192 + +#define YYCTYPE uchar +#define YYCURSOR cursor +#define YYLIMIT s->lim +#define YYMARKER s->ptr +#define YYFILL(n) {cursor = fill(s, cursor);} + +#define RET(i) {s->cur = cursor; return i;} + +typedef struct Scanner { + int fd; + uchar *bot, *tok, *ptr, *cur, *pos, *lim, *top, *eof; + uint line; +} Scanner; + +uchar *fill(Scanner *s, uchar *cursor){ + if(!s->eof) { + uint cnt = s->tok - s->bot; + if(cnt){ + memcpy(s->bot, s->tok, s->lim - s->tok); + s->tok = s->bot; + s->ptr -= cnt; + cursor -= cnt; + s->pos -= cnt; + s->lim -= cnt; + } + if((s->top - s->lim) < BSIZE){ + uchar *buf = (uchar*) malloc(((s->lim - s->bot) + BSIZE)*sizeof(uchar)); + memcpy(buf, s->tok, s->lim - s->tok); + s->tok = buf; + s->ptr = &buf[s->ptr - s->bot]; + cursor = &buf[cursor - s->bot]; + s->pos = &buf[s->pos - s->bot]; + s->lim = &buf[s->lim - s->bot]; + s->top = &s->lim[BSIZE]; + free(s->bot); + s->bot = buf; + } + if((cnt = read(s->fd, (char*) s->lim, BSIZE)) != BSIZE){ + s->eof = &s->lim[cnt]; *(s->eof)++ = '\n'; + } + s->lim += cnt; + } + return cursor; +} + +int scan(Scanner *s){ + uchar *cursor = s->cur; +std: + s->tok = cursor; +/*!re2c +any = [\000-\377]; +O = [0-7]; +D = [0-9]; +L = [a-zA-Z_]; +H = [a-fA-F0-9]; +E = [Ee] [+-]? D+; +FS = [fFlL]; +IS = [uUlL]*; +ESC = [\\] ([abfnrtv?'"\\] | "x" H+ | O+); +*/ + +/*!re2c + "/*" { goto comment; } + + "auto" { RET(AUTO); } + "break" { RET(BREAK); } + "case" { RET(CASE); } + "char" { RET(CHAR); } + "const" { RET(CONST); } + "continue" { RET(CONTINUE); } + "default" { RET(DEFAULT); } + "do" { RET(DO); } + "double" { RET(DOUBLE); } + "else" { RET(ELSE); } + "enum" { RET(ENUM); } + "extern" { RET(EXTERN); } + "float" { RET(FLOAT); } + "for" { RET(FOR); } + "goto" { RET(GOTO); } + "if" { RET(IF); } + "int" { RET(INT); } + "long" { RET(LONG); } + "register" { RET(REGISTER); } + "return" { RET(RETURN); } + "short" { RET(SHORT); } + "signed" { RET(SIGNED); } + "sizeof" { RET(SIZEOF); } + "static" { RET(STATIC); } + "struct" { RET(STRUCT); } + "switch" { RET(SWITCH); } + "typedef" { RET(TYPEDEF); } + "union" { RET(UNION); } + "unsigned" { RET(UNSIGNED); } + "void" { RET(VOID); } + "volatile" { RET(VOLATILE); } + "while" { RET(WHILE); } + + L (L|D)* { RET(ID); } + + ("0" [xX] H+ IS?) | ("0" D+ IS?) | (D+ IS?) | + (['] (ESC|any\[\n\\'])* [']) + { RET(ICON); } + + (D+ E FS?) | (D* "." D+ E? FS?) | (D+ "." D* E? FS?) + { RET(FCON); } + + (["] (ESC|any\[\n\\"])* ["]) + { RET(SCON); } + + "..." { RET(ELLIPSIS); } + ">>=" { RET(RSHIFTEQ); } + "<<=" { RET(LSHIFTEQ); } + "+=" { RET(ADDEQ); } + "-=" { RET(SUBEQ); } + "*=" { RET(MULEQ); } + "/=" { RET(DIVEQ); } + "%=" { RET(MODEQ); } + "&=" { RET(ANDEQ); } + "^=" { RET(XOREQ); } + "|=" { RET(OREQ); } + ">>" { RET(RSHIFT); } + "<<" { RET(LSHIFT); } + "++" { RET(INCR); } + "--" { RET(DECR); } + "->" { RET(DEREF); } + "&&" { RET(ANDAND); } + "||" { RET(OROR); } + "<=" { RET(LEQ); } + ">=" { RET(GEQ); } + "==" { RET(EQL); } + "!=" { RET(NEQ); } + ";" { RET(';'); } + "{" { RET('{'); } + "}" { RET('}'); } + "," { RET(','); } + ":" { RET(':'); } + "=" { RET('='); } + "(" { RET('('); } + ")" { RET(')'); } + "[" { RET('['); } + "]" { RET(']'); } + "." { RET('.'); } + "&" { RET('&'); } + "!" { RET('!'); } + "~" { RET('~'); } + "-" { RET('-'); } + "+" { RET('+'); } + "*" { RET('*'); } + "/" { RET('/'); } + "%" { RET('%'); } + "<" { RET('<'); } + ">" { RET('>'); } + "^" { RET('^'); } + "|" { RET('|'); } + "?" { RET('?'); } + + + [ \t\v\f]+ { goto std; } + + "\n" + { + if(cursor == s->eof) RET(EOI); + s->pos = cursor; s->line++; + goto std; + } + + any + { + printf("unexpected character: %c\n", *s->tok); + goto std; + } +*/ + +comment: +/*!re2c + "*/" { goto std; } + "\n" + { + if(cursor == s->eof) RET(EOI); + s->tok = s->pos = cursor; s->line++; + goto comment; + } + any { goto comment; } +*/ +} + +main(){ + Scanner in; + int t; + memset((char*) &in, 0, sizeof(in)); + in.fd = 0; + while((t = scan(&in)) != EOI){ +/* + printf("%d\t%.*s\n", t, in.cur - in.tok, in.tok); + printf("%d\n", t); +*/ + } + close(in.fd); +} diff --git a/tools/re2c/examples/langs/modula.re b/tools/re2c/examples/langs/modula.re new file mode 100644 index 000000000..186b0cc13 --- /dev/null +++ b/tools/re2c/examples/langs/modula.re @@ -0,0 +1,203 @@ +#include +#include +#include + +typedef unsigned int uint; +typedef unsigned char uchar; + +#define BSIZE 8192 + +#define YYCTYPE uchar +#define YYCURSOR cursor +#define YYLIMIT s->lim +#define YYMARKER s->ptr +#define YYCTXMARKER s->ctx +#define YYFILL {cursor = fill(s, cursor);} + +#define RETURN(i) {s->cur = cursor; return i;} + +typedef struct Scanner { + int fd; + uchar *bot, *tok, *ptr, *ctx, *cur, *pos, *lim, *top, *eof; + uint line; +} Scanner; + +uchar *fill(Scanner *s, uchar *cursor){ + if(!s->eof){ + uint cnt = s->tok - s->bot; + if(cnt){ + memcpy(s->bot, s->tok, s->lim - s->tok); + s->tok = s->bot; + s->ptr -= cnt; + cursor -= cnt; + s->pos -= cnt; + s->lim -= cnt; + } + if((s->top - s->lim) < BSIZE){ + uchar *buf = (uchar*) malloc(((s->lim - s->bot) + BSIZE)*sizeof(uchar)); + memcpy(buf, s->tok, s->lim - s->tok); + s->tok = buf; + s->ptr = &buf[s->ptr - s->bot]; + cursor = &buf[cursor - s->bot]; + s->pos = &buf[s->pos - s->bot]; + s->lim = &buf[s->lim - s->bot]; + s->top = &s->lim[BSIZE]; + free(s->bot); + s->bot = buf; + } + if((cnt = read(s->fd, (char*) s->lim, BSIZE)) != BSIZE){ + s->eof = &s->lim[cnt]; *(s->eof)++ = '\n'; + } + s->lim += cnt; + } + return cursor; +} + +int scan(Scanner *s){ + uchar *cursor = s->cur; + uint depth; +std: + s->tok = cursor; +/*!re2c +any = [\000-\377]; +digit = [0-9]; +letter = [a-zA-Z]; +*/ + +/*!re2c + "(*" { depth = 1; goto comment; } + + digit + {RETURN(1);} + digit + / ".." {RETURN(1);} + [0-7] + "B" {RETURN(2);} + [0-7] + "C" {RETURN(3);} + digit [0-9A-F] * "H" {RETURN(4);} + digit + "." digit * ("E" ([+-]) ? digit +) ? {RETURN(5);} + ['] (any\[\n']) * ['] | ["] (any\[\n"]) * ["] {RETURN(6);} + + "#" {RETURN(7);} + "&" {RETURN(8);} + "(" {RETURN(9);} + ")" {RETURN(10);} + "*" {RETURN(11);} + "+" {RETURN(12);} + "," {RETURN(13);} + "-" {RETURN(14);} + "." {RETURN(15);} + ".." {RETURN(16);} + "/" {RETURN(17);} + ":" {RETURN(18);} + ":=" {RETURN(19);} + ";" {RETURN(20);} + "<" {RETURN(21);} + "<=" {RETURN(22);} + "<>" {RETURN(23);} + "=" {RETURN(24);} + ">" {RETURN(25);} + ">=" {RETURN(26);} + "[" {RETURN(27);} + "]" {RETURN(28);} + "^" {RETURN(29);} + "{" {RETURN(30);} + "|" {RETURN(31);} + "}" {RETURN(32);} + "~" {RETURN(33);} + + "AND" {RETURN(34);} + "ARRAY" {RETURN(35);} + "BEGIN" {RETURN(36);} + "BY" {RETURN(37);} + "CASE" {RETURN(38);} + "CONST" {RETURN(39);} + "DEFINITION" {RETURN(40);} + "DIV" {RETURN(41);} + "DO" {RETURN(42);} + "ELSE" {RETURN(43);} + "ELSIF" {RETURN(44);} + "END" {RETURN(45);} + "EXIT" {RETURN(46);} + "EXPORT" {RETURN(47);} + "FOR" {RETURN(48);} + "FROM" {RETURN(49);} + "IF" {RETURN(50);} + "IMPLEMENTATION" {RETURN(51);} + "IMPORT" {RETURN(52);} + "IN" {RETURN(53);} + "LOOP" {RETURN(54);} + "MOD" {RETURN(55);} + "MODULE" {RETURN(56);} + "NOT" {RETURN(57);} + "OF" {RETURN(58);} + "OR" {RETURN(59);} + "POINTER" {RETURN(60);} + "PROCEDURE" {RETURN(61);} + "QUALIFIED" {RETURN(62);} + "RECORD" {RETURN(63);} + "REPEAT" {RETURN(64);} + "RETURN" {RETURN(65);} + "SET" {RETURN(66);} + "THEN" {RETURN(67);} + "TO" {RETURN(68);} + "TYPE" {RETURN(69);} + "UNTIL" {RETURN(70);} + "VAR" {RETURN(71);} + "WHILE" {RETURN(72);} + "WITH" {RETURN(73);} + + letter (letter | digit) * {RETURN(74);} + + [ \t]+ { goto std; } + + "\n" + { + if(cursor == s->eof) RETURN(0); + s->pos = cursor; s->line++; + goto std; + } + + any + { + printf("unexpected character: %c\n", *s->tok); + goto std; + } +*/ +comment: +/*!re2c + "*)" + { + if(--depth == 0) + goto std; + else + goto comment; + } + "(*" { ++depth; goto comment; } + "\n" + { + if(cursor == s->eof) RETURN(0); + s->tok = s->pos = cursor; s->line++; + goto comment; + } + any { goto comment; } +*/ +} + +/* +void putStr(FILE *o, char *s, uint l){ + while(l-- > 0) + putc(*s++, o); +} +*/ + +main(){ + Scanner in; + memset((char*) &in, 0, sizeof(in)); + in.fd = 0; + while(scan(&in)){ +/* + putc('<', stdout); + putStr(stdout, (char*) in.tok, in.cur - in.tok); + putc('>', stdout); + putc('\n', stdout); +*/ + } +} diff --git a/tools/re2c/examples/langs/rexx.re b/tools/re2c/examples/langs/rexx.re new file mode 100644 index 000000000..b74741daf --- /dev/null +++ b/tools/re2c/examples/langs/rexx.re @@ -0,0 +1,319 @@ +#include "scanio.h" +#include "scanner.h" + +#define CURSOR ch +#define LOADCURSOR ch = *cursor; +#define ADVANCE cursor++; +#define BACK(n) cursor -= (n); +#define CHECK(n) if((ScanCB.lim - cursor) < (n)){cursor = ScanFill(cursor);} +#define MARK(n) ScanCB.ptr = cursor; sel = (n); +#define REVERT cursor = ScanCB.ptr; +#define MARKER sel + +#define RETURN(i) {ScanCB.cur = cursor; return i;} + +int ScanToken(){ + uchar *cursor = ScanCB.cur; + unsigned sel; + uchar ch; + ScanCB.tok = cursor; + ScanCB.eot = NULL; +/*!re2c +all = [\000-\377]; +eof = [\000]; +any = all\eof; +letter = [a-z]|[A-Z]; +digit = [0-9]; +symchr = letter|digit|[.!?_]; +const = (digit|[.])symchr*([eE][+-]?digit+)?; +simple = (symchr\(digit|[.]))(symchr\[.])*; +stem = simple [.]; +symbol = symchr*; +sqstr = ['] ((any\['\n])|(['][']))* [']; +dqstr = ["] ((any\["\n])|(["]["]))* ["]; +str = sqstr|dqstr; +ob = [ \t]*; +not = [\\~]; +A = [aA]; +B = [bB]; +C = [cC]; +D = [dD]; +E = [eE]; +F = [fF]; +G = [gG]; +H = [hH]; +I = [iI]; +J = [jJ]; +K = [kK]; +L = [lL]; +M = [mM]; +N = [nN]; +O = [oO]; +P = [pP]; +Q = [qQ]; +R = [rR]; +S = [sS]; +T = [tT]; +U = [uU]; +V = [vV]; +W = [wW]; +X = [xX]; +Y = [yY]; +Z = [zZ]; +*/ + +scan: +/*!re2c +"\n" + { + ++(ScanCB.lineNum); + ScanCB.linePos = ScanCB.pos + (cursor - ScanCB.mrk); + RETURN(SU_EOL); + } +"|" ob "|" + { RETURN(OP_CONCAT); } +"+" + { RETURN(OP_PLUS); } +"-" + { RETURN(OP_MINUS); } +"*" + { RETURN(OP_MULT); } +"/" + { RETURN(OP_DIV); } +"%" + { RETURN(OP_IDIV); } +"/" ob "/" + { RETURN(OP_REMAIN); } +"*" ob "*" + { RETURN(OP_POWER); } +"=" + { RETURN(OP_EQUAL); } +not ob "=" | "<" ob ">" | ">" ob "<" + { RETURN(OP_EQUAL_N); } +">" + { RETURN(OP_GT); } +"<" + { RETURN(OP_LT); } +">" ob "=" | not ob "<" + { RETURN(OP_GE); } +"<" ob "=" | not ob ">" + { RETURN(OP_LE); } +"=" ob "=" + { RETURN(OP_EQUAL_EQ); } +not ob "=" ob "=" + { RETURN(OP_EQUAL_EQ_N); } +">" ob ">" + { RETURN(OP_GT_STRICT); } +"<" ob "<" + { RETURN(OP_LT_STRICT); } +">" ob ">" ob "=" | not ob "<" ob "<" + { RETURN(OP_GE_STRICT); } +"<" ob "<" ob "=" | not ob ">" ob ">" + { RETURN(OP_LE_STRICT); } +"&" + { RETURN(OP_AND); } +"|" + { RETURN(OP_OR); } +"&" ob "&" + { RETURN(OP_XOR); } +not + { RETURN(OP_NOT); } + +":" + { RETURN(SU_COLON); } +"," + { RETURN(SU_COMMA); } +"(" + { RETURN(SU_POPEN); } +")" + { RETURN(SU_PCLOSE); } +";" + { RETURN(SU_EOC); } + +A D D R E S S + { RETURN(RX_ADDRESS); } +A R G + { RETURN(RX_ARG); } +C A L L + { RETURN(RX_CALL); } +D O + { RETURN(RX_DO); } +D R O P + { RETURN(RX_DROP); } +E L S E + { RETURN(RX_ELSE); } +E N D + { RETURN(RX_END); } +E X I T + { RETURN(RX_EXIT); } +I F + { RETURN(RX_IF); } +I N T E R P R E T + { RETURN(RX_INTERPRET); } +I T E R A T E + { RETURN(RX_ITERATE); } +L E A V E + { RETURN(RX_LEAVE); } +N O P + { RETURN(RX_NOP); } +N U M E R I C + { RETURN(RX_NUMERIC); } +O P T I O N S + { RETURN(RX_OPTIONS); } +O T H E R W I S E + { RETURN(RX_OTHERWISE); } +P A R S E + { RETURN(RX_PARSE); } +P R O C E D U R E + { RETURN(RX_PROCEDURE); } +P U L L + { RETURN(RX_PULL); } +P U S H + { RETURN(RX_PUSH); } +Q U E U E + { RETURN(RX_QUEUE); } +R E T U R N + { RETURN(RX_RETURN); } +S A Y + { RETURN(RX_SAY); } +S E L E C T + { RETURN(RX_SELECT); } +S I G N A L + { RETURN(RX_SIGNAL); } +T H E N + { RETURN(RX_THEN); } +T R A C E + { RETURN(RX_TRACE); } +W H E N + { RETURN(RX_WHEN); } +O F F + { RETURN(RXS_OFF); } +O N + { RETURN(RXS_ON); } +B Y + { RETURN(RXS_BY); } +D I G I T S + { RETURN(RXS_DIGITS); } +E N G I N E E R I N G + { RETURN(RXS_ENGINEERING); } +E R R O R + { RETURN(RXS_ERROR); } +E X P O S E + { RETURN(RXS_EXPOSE); } +F A I L U R E + { RETURN(RXS_FAILURE); } +F O R + { RETURN(RXS_FOR); } +F O R E V E R + { RETURN(RXS_FOREVER); } +F O R M + { RETURN(RXS_FORM); } +F U Z Z + { RETURN(RXS_FUZZ); } +H A L T + { RETURN(RXS_HALT); } +L I N E I N + { RETURN(RXS_LINEIN); } +N A M E + { RETURN(RXS_NAME); } +N O T R E A D Y + { RETURN(RXS_NOTREADY); } +N O V A L U E + { RETURN(RXS_NOVALUE); } +S C I E N T I F I C + { RETURN(RXS_SCIENTIFIC); } +S O U R C E + { RETURN(RXS_SOURCE); } +S Y N T A X + { RETURN(RXS_SYNTAX); } +T O + { RETURN(RXS_TO); } +U N T I L + { RETURN(RXS_UNTIL); } +U P P E R + { RETURN(RXS_UPPER); } +V A L U E + { RETURN(RXS_VALUE); } +V A R + { RETURN(RXS_VAR); } +V E R S I O N + { RETURN(RXS_VERSION); } +W H I L E + { RETURN(RXS_WHILE); } +W I T H + { RETURN(RXS_WITH); } + +const + { RETURN(SU_CONST); } +simple + { RETURN(SU_SYMBOL); } +stem + { RETURN(SU_SYMBOL_STEM); } +symbol + { RETURN(SU_SYMBOL_COMPOUND); } +str + { RETURN(SU_LITERAL); } +str [bB] / (all\symchr) + { RETURN(SU_LITERAL_BIN); } +str [xX] / (all\symchr) + { RETURN(SU_LITERAL_HEX); } + +eof + { RETURN(SU_EOF); } +any + { RETURN(SU_ERROR); } +*/ +} + +bool StripToken(){ + uchar *cursor = ScanCB.cur; + unsigned depth; + uchar ch; + bool blanks = FALSE; + ScanCB.eot = cursor; +strip: +/*!re2c +"/*" + { + depth = 1; + goto comment; + } +"\r" + { goto strip; } +[ \t] + { + blanks = TRUE; + goto strip; + } +[] / all + { RETURN(blanks); } +*/ + +comment: +/*!re2c +"*/" + { + if(--depth == 0) + goto strip; + else + goto comment; + } +"\n" + { + ++(ScanCB.lineNum); + ScanCB.linePos = ScanCB.pos + (cursor - ScanCB.mrk); + goto comment; + } +"/*" + { + ++depth; + goto comment; + } +eof + { RETURN(blanks); } +any + { + goto comment; + } +*/ +} diff --git a/tools/re2c/examples/push_model/push.re b/tools/re2c/examples/push_model/push.re new file mode 100644 index 000000000..5ad6e7ac8 --- /dev/null +++ b/tools/re2c/examples/push_model/push.re @@ -0,0 +1,340 @@ +/* + * A push-model scanner example for re2c -f + * Written Mon Apr 11 2005 by mgix@mgix.com + * This file is in the public domain. + * + */ + +// ---------------------------------------------------------------------- + +#include +#include +#include +#include +#include + +#if defined(WIN32) + + typedef signed char int8_t; + typedef signed short int16_t; + typedef signed int int32_t; + + typedef unsigned char uint8_t; + typedef unsigned short uint16_t; + typedef unsigned int uint32_t; + +#else + + #include + #include + + #ifndef O_BINARY + #define O_BINARY 0 + #endif + +#endif + +// ---------------------------------------------------------------------- +#define TOKENS \ + \ + TOK(kEOF) \ + TOK(kEOL) \ + TOK(kUnknown) \ + TOK(kIdentifier) \ + TOK(kDecimalConstant) \ + \ + TOK(kEqual) \ + TOK(kLeftParen) \ + TOK(kRightParen) \ + TOK(kMinus) \ + TOK(kPlus) \ + TOK(kStar) \ + TOK(kSlash) \ + \ + TOK(kIf) \ + TOK(kFor) \ + TOK(kElse) \ + TOK(kGoto) \ + TOK(kBreak) \ + TOK(kWhile) \ + TOK(kReturn) \ + + +// ---------------------------------------------------------------------- +static const char *tokenNames[] = +{ + #define TOK(x) #x, + TOKENS + #undef TOK +}; + +// ---------------------------------------------------------------------- +class PushScanner +{ +public: + + enum Token + { + #define TOK(x) x, + TOKENS + #undef TOK + }; + +private: + + bool eof; + int32_t state; + + uint8_t *limit; + uint8_t *start; + uint8_t *cursor; + uint8_t *marker; + + uint8_t *buffer; + uint8_t *bufferEnd; + + uint8_t yych; + uint32_t yyaccept; + +public: + + // ---------------------------------------------------------------------- + PushScanner() + { + limit = 0; + start = 0; + state = -1; + cursor = 0; + marker = 0; + buffer = 0; + eof = false; + bufferEnd = 0; + } + + // ---------------------------------------------------------------------- + ~PushScanner() + { + } + + // ---------------------------------------------------------------------- + void send( + Token token + ) + { + size_t tokenSize = cursor-start; + const char *tokenName = tokenNames[token]; + printf( + "scanner is pushing out a token of type %d (%s)", + token, + tokenName + ); + + if(token==kEOF) putchar('\n'); + else + { + size_t tokenNameSize = strlen(tokenNames[token]); + size_t padSize = 20-(20"); + + fwrite( + start, + tokenSize, + 1, + stdout + ); + + printf("<----\n"); + } + } + + // ---------------------------------------------------------------------- + uint32_t push( + const void *input, + ssize_t inputSize + ) + { + printf( + "scanner is receiving a new data batch of length %d\n" + "scanner continues with saved state = %d\n", + inputSize, + state + ); + + /* + * Data source is signaling end of file when batch size + * is less than maxFill. This is slightly annoying because + * maxFill is a value that can only be known after re2c does + * its thing. Practically though, maxFill is never bigger than + * the longest keyword, so given our grammar, 32 is a safe bet. + */ + uint8_t null[64]; + const ssize_t maxFill = 32; + if(inputSize // min +#include // memset + +#include "src/codegen/bitmap.h" +#include "src/codegen/go.h" +#include "src/codegen/output.h" +#include "src/conf/opt.h" +#include "src/globals.h" + +namespace re2c +{ + +BitMap *BitMap::first = NULL; + +BitMap::BitMap(const Go *g, const State *x) + : go(g) + , on(x) + , next(first) + , i(0) + , m(0) +{ + first = this; +} + +BitMap::~BitMap() +{ + delete next; +} + +const BitMap *BitMap::find(const Go *g, const State *x) +{ + for (const BitMap *b = first; b; b = b->next) + { + if (matches(b->go->span, b->go->nSpans, b->on, g->span, g->nSpans, x)) + { + return b; + } + } + + return new BitMap(g, x); +} + +const BitMap *BitMap::find(const State *x) +{ + for (const BitMap *b = first; b; b = b->next) + { + if (b->on == x) + { + return b; + } + } + + return NULL; +} + +static void doGen(const Go *g, const State *s, uint32_t *bm, uint32_t f, uint32_t m) +{ + Span *b = g->span, *e = &b[g->nSpans]; + uint32_t lb = 0; + + for (; b < e; ++b) + { + if (b->to == s) + { + for (; lb < b->ub && lb < 256; ++lb) + { + bm[lb-f] |= m; + } + } + + lb = b->ub; + } +} + +void BitMap::gen(OutputFile & o, uint32_t ind, uint32_t lb, uint32_t ub) +{ + if (first && bUsedYYBitmap) + { + o.wind(ind).ws("static const unsigned char ").wstring(opts->yybm).ws("[] = {"); + + uint32_t c = 1, n = ub - lb; + const BitMap *cb = first; + + while((cb = cb->next) != NULL) { + ++c; + } + BitMap *b = first; + + uint32_t *bm = new uint32_t[n]; + + for (uint32_t i = 0, t = 1; b; i += n, t += 8) + { + memset(bm, 0, n * sizeof(uint32_t)); + + for (uint32_t m = 0x80; b && m; m >>= 1) + { + b->i = i; + b->m = m; + doGen(b->go, b->on, bm, lb, m); + b = const_cast(b->next); + } + + if (c > 8) + { + o.ws("\n").wind(ind+1).ws("/* table ").wu32(t).ws(" .. ").wu32(std::min(c, t+7)).ws(": ").wu32(i).ws(" */"); + } + + for (uint32_t j = 0; j < n; ++j) + { + if (j % 8 == 0) + { + o.ws("\n").wind(ind+1); + } + + if (opts->yybmHexTable) + { + o.wu32_hex(bm[j]); + } + else + { + o.wu32_width(bm[j], 3); + } + o.ws(", "); + } + } + + o.ws("\n").wind(ind).ws("};\n"); + + delete[] bm; + } +} + +// All spans in b1 that lead to s1 are pairwise equal to that in b2 leading to s2 +bool matches(const Span * b1, uint32_t n1, const State * s1, const Span * b2, uint32_t n2, const State * s2) +{ + const Span * e1 = &b1[n1]; + uint32_t lb1 = 0; + const Span * e2 = &b2[n2]; + uint32_t lb2 = 0; + + for (;;) + { + for (; b1 < e1 && b1->to != s1; ++b1) + { + lb1 = b1->ub; + } + for (; b2 < e2 && b2->to != s2; ++b2) + { + lb2 = b2->ub; + } + if (b1 == e1) + { + return b2 == e2; + } + if (b2 == e2) + { + return false; + } + if (lb1 != lb2 || b1->ub != b2->ub) + { + return false; + } + ++b1; + ++b2; + } +} + +} // end namespace re2c diff --git a/tools/re2c/src/codegen/bitmap.h b/tools/re2c/src/codegen/bitmap.h new file mode 100644 index 000000000..17d7a852b --- /dev/null +++ b/tools/re2c/src/codegen/bitmap.h @@ -0,0 +1,45 @@ +#ifndef _RE2C_CODEGEN_BITMAP_ +#define _RE2C_CODEGEN_BITMAP_ + +#include "src/util/c99_stdint.h" + +#include "src/util/forbid_copy.h" + +namespace re2c +{ + +struct Go; +struct Span; +struct State; +struct OutputFile; + +class BitMap +{ +public: + static BitMap *first; + + const Go *go; + const State *on; + const BitMap *next; + uint32_t i; + uint32_t m; + +public: + static const BitMap *find(const Go*, const State*); + static const BitMap *find(const State*); + static void gen(OutputFile &, uint32_t ind, uint32_t, uint32_t); + BitMap(const Go*, const State*); + ~BitMap(); + + FORBID_COPY (BitMap); +}; + +bool matches(const Span * b1, uint32_t n1, const State * s1, const Span * b2, uint32_t n2, const State * s2); + +#ifdef _MSC_VER +# pragma warning(disable: 4355) /* 'this' : used in base member initializer list */ +#endif + +} // end namespace re2c + +#endif // _RE2C_CODEGEN_BITMAP_ diff --git a/tools/re2c/src/codegen/emit.h b/tools/re2c/src/codegen/emit.h new file mode 100644 index 000000000..5d91b14e9 --- /dev/null +++ b/tools/re2c/src/codegen/emit.h @@ -0,0 +1,43 @@ +#ifndef _RE2C_CODEGEN_EMIT_ +#define _RE2C_CODEGEN_EMIT_ + +#include "src/codegen/output.h" +#include "src/ir/adfa/adfa.h" + +namespace re2c { + +typedef std::vector RegExpIndices; + +void emit_action + ( const Action & action + , OutputFile & o + , uint32_t ind + , bool & readCh + , const State * const s + , const std::string & condName + , const Skeleton * skeleton + , const std::set & used_labels + , bool save_yyaccept + ); + +// helpers +void genGoTo (OutputFile & o, uint32_t ind, const State * from, const State * to, bool & readCh); + +template std::string replaceParam (std::string str, const std::string & param, const _Ty & value) +{ + if (!param.empty ()) + { + std::ostringstream strValue; + strValue << value; + std::string::size_type pos; + while((pos = str.find(param)) != std::string::npos) + { + str.replace(pos, param.length(), strValue.str()); + } + } + return str; +} + +} // namespace re2c + +#endif // _RE2C_CODEGEN_EMIT_ diff --git a/tools/re2c/src/codegen/emit_action.cc b/tools/re2c/src/codegen/emit_action.cc new file mode 100644 index 000000000..d3146daef --- /dev/null +++ b/tools/re2c/src/codegen/emit_action.cc @@ -0,0 +1,388 @@ +#include "src/util/c99_stdint.h" +#include +#include +#include + +#include "src/codegen/emit.h" +#include "src/codegen/input_api.h" +#include "src/codegen/output.h" +#include "src/conf/opt.h" +#include "src/globals.h" +#include "src/ir/adfa/action.h" +#include "src/ir/adfa/adfa.h" +#include "src/ir/regexp/regexp.h" +#include "src/ir/regexp/regexp_rule.h" +#include "src/ir/skeleton/skeleton.h" +#include "src/parse/code.h" +#include "src/parse/loc.h" + +namespace re2c +{ + +class label_t; + +static void need (OutputFile & o, uint32_t ind, bool & readCh, size_t n, bool bSetMarker); +static void emit_match (OutputFile & o, uint32_t ind, bool & readCh, const State * const s); +static void emit_initial (OutputFile & o, uint32_t ind, bool & readCh, const State * const s, const Initial & init, const std::set & used_labels); +static void emit_save (OutputFile & o, uint32_t ind, bool & readCh, const State * const s, uint32_t save, bool save_yyaccept); +static void emit_accept_binary (OutputFile & o, uint32_t ind, bool & readCh, const State * const s, const accept_t & accept, size_t l, size_t r); +static void emit_accept (OutputFile & o, uint32_t ind, bool & readCh, const State * const s, const accept_t & accept); +static void emit_rule (OutputFile & o, uint32_t ind, const State * const s, const RuleOp * const rule, const std::string & condName, const Skeleton * skeleton); +static void genYYFill (OutputFile & o, size_t need); +static void genSetCondition (OutputFile & o, uint32_t ind, const std::string & newcond); +static void genSetState (OutputFile & o, uint32_t ind, uint32_t fillIndex); + +void emit_action + ( const Action & action + , OutputFile & o + , uint32_t ind + , bool & readCh + , const State * const s + , const std::string & condName + , const Skeleton * skeleton + , const std::set & used_labels + , bool save_yyaccept + ) +{ + switch (action.type) + { + case Action::MATCH: + emit_match (o, ind, readCh, s); + break; + case Action::INITIAL: + emit_initial (o, ind, readCh, s, * action.info.initial, used_labels); + break; + case Action::SAVE: + emit_save (o, ind, readCh, s, action.info.save, save_yyaccept); + break; + case Action::MOVE: + break; + case Action::ACCEPT: + emit_accept (o, ind, readCh, s, * action.info.accepts); + break; + case Action::RULE: + emit_rule (o, ind, s, action.info.rule, condName, skeleton); + break; + } + if (s->isPreCtxt && opts->target != opt_t::DOT) + { + o.wstring(opts->input_api.stmt_backupctx (ind)); + } +} + +void emit_match (OutputFile & o, uint32_t ind, bool & readCh, const State * const s) +{ + if (opts->target == opt_t::DOT) + { + return; + } + + const bool read_ahead = s + && s->next + && s->next->action.type != Action::RULE; + if (s->fill != 0) + { + o.wstring(opts->input_api.stmt_skip (ind)); + } + else if (!read_ahead) + { + /* do not read next char if match */ + o.wstring(opts->input_api.stmt_skip (ind)); + readCh = true; + } + else + { + o.wstring(opts->input_api.stmt_skip_peek (ind)); + readCh = false; + } + + if (s->fill != 0) + { + need(o, ind, readCh, s->fill, false); + } +} + +void emit_initial (OutputFile & o, uint32_t ind, bool & readCh, const State * const s, const Initial & initial, const std::set & used_labels) +{ + if (opts->target == opt_t::DOT) + { + return; + } + + if (used_labels.count(s->label)) + { + if (s->fill != 0) + { + o.wstring(opts->input_api.stmt_skip (ind)); + } + else + { + o.wstring(opts->input_api.stmt_skip_peek (ind)); + } + } + + if (used_labels.count(initial.label)) + { + o.wstring(opts->labelPrefix).wlabel(initial.label).ws(":\n"); + } + + if (opts->dFlag) + { + o.wind(ind).wstring(opts->yydebug).ws("(").wlabel(initial.label).ws(", *").wstring(opts->yycursor).ws(");\n"); + } + + if (s->fill != 0) + { + need(o, ind, readCh, s->fill, initial.setMarker); + } + else + { + if (initial.setMarker) + { + o.wstring(opts->input_api.stmt_backup (ind)); + } + readCh = false; + } +} + +void emit_save (OutputFile & o, uint32_t ind, bool & readCh, const State * const s, uint32_t save, bool save_yyaccept) +{ + if (opts->target == opt_t::DOT) + { + return; + } + + if (save_yyaccept) + { + o.wind(ind).wstring(opts->yyaccept).ws(" = ").wu32(save).ws(";\n"); + } + + if (s->fill != 0) + { + o.wstring(opts->input_api.stmt_skip_backup (ind)); + need(o, ind, readCh, s->fill, false); + } + else + { + o.wstring(opts->input_api.stmt_skip_backup_peek (ind)); + readCh = false; + } +} + +void emit_accept_binary (OutputFile & o, uint32_t ind, bool & readCh, const State * const s, const accept_t & accepts, size_t l, size_t r) +{ + if (l < r) + { + const size_t m = (l + r) >> 1; + o.wind(ind).ws("if (").wstring(opts->yyaccept).ws(r == l+1 ? " == " : " <= ").wu64(m).ws(") {\n"); + emit_accept_binary (o, ++ind, readCh, s, accepts, l, m); + o.wind(--ind).ws("} else {\n"); + emit_accept_binary (o, ++ind, readCh, s, accepts, m + 1, r); + o.wind(--ind).ws("}\n"); + } + else + { + genGoTo(o, ind, s, accepts[l], readCh); + } +} + +void emit_accept (OutputFile & o, uint32_t ind, bool & readCh, const State * const s, const accept_t & accepts) +{ + const size_t accepts_size = accepts.size (); + if (accepts_size > 0) + { + if (opts->target != opt_t::DOT) + { + o.wstring(opts->input_api.stmt_restore (ind)); + } + + if (readCh) // shouldn't be necessary, but might become at some point + { + o.wstring(opts->input_api.stmt_peek (ind)); + readCh = false; + } + + if (accepts_size > 1) + { + if (opts->gFlag && accepts_size >= opts->cGotoThreshold) + { + o.wind(ind++).ws("{\n"); + o.wind(ind++).ws("static void *").wstring(opts->yytarget).ws("[").wu64(accepts_size).ws("] = {\n"); + for (uint32_t i = 0; i < accepts_size; ++i) + { + o.wind(ind).ws("&&").wstring(opts->labelPrefix).wlabel(accepts[i]->label).ws(",\n"); + } + o.wind(--ind).ws("};\n"); + o.wind(ind).ws("goto *").wstring(opts->yytarget).ws("[").wstring(opts->yyaccept).ws("];\n"); + o.wind(--ind).ws("}\n"); + } + else if (opts->sFlag || (accepts_size == 2 && opts->target != opt_t::DOT)) + { + emit_accept_binary (o, ind, readCh, s, accepts, 0, accepts_size - 1); + } + else if (opts->target == opt_t::DOT) + { + for (uint32_t i = 0; i < accepts_size; ++i) + { + o.wlabel(s->label).ws(" -> ").wlabel(accepts[i]->label); + o.ws(" [label=\"yyaccept=").wu32(i).ws("\"]\n"); + } + } + else + { + o.wind(ind).ws("switch (").wstring(opts->yyaccept).ws(") {\n"); + for (uint32_t i = 0; i < accepts_size - 1; ++i) + { + o.wind(ind).ws("case ").wu32(i).ws(": \t"); + genGoTo(o, 0, s, accepts[i], readCh); + } + o.wind(ind).ws("default:\t"); + genGoTo(o, 0, s, accepts[accepts_size - 1], readCh); + o.wind(ind).ws("}\n"); + } + } + else + { + // no need to write if statement here since there is only case 0. + genGoTo(o, ind, s, accepts[0], readCh); + } + } +} + +void emit_rule (OutputFile & o, uint32_t ind, const State * const s, const RuleOp * const rule, const std::string & condName, const Skeleton * skeleton) +{ + if (opts->target == opt_t::DOT) + { + o.wlabel(s->label); + if (rule->code) + { + o.ws(" [label=\"").wstring(rule->code->loc.filename).ws(":").wu32(rule->code->loc.line).ws("\"]"); + } + o.ws("\n"); + return; + } + + uint32_t back = rule->ctx->fixedLength(); + if (back != 0u && opts->target != opt_t::DOT) + { + o.wstring(opts->input_api.stmt_restorectx (ind)); + } + + if (opts->target == opt_t::SKELETON) + { + skeleton->emit_action (o, ind, rule->rank); + } + else + { + if (!rule->newcond.empty () && condName != rule->newcond) + { + genSetCondition(o, ind, rule->newcond); + } + + if (rule->code) + { + if (!yySetupRule.empty ()) + { + o.wind(ind).wstring(yySetupRule).ws("\n"); + } + o.wline_info(rule->code->loc.line, rule->code->loc.filename.c_str ()) + .wind(ind).wstring(rule->code->text).ws("\n") + .wdelay_line_info (); + } + else if (!rule->newcond.empty ()) + { + o.wind(ind).wstring(replaceParam(opts->condGoto, opts->condGotoParam, opts->condPrefix + rule->newcond)).ws("\n"); + } + } +} + +void need (OutputFile & o, uint32_t ind, bool & readCh, size_t n, bool bSetMarker) +{ + if (opts->target == opt_t::DOT) + { + return; + } + + uint32_t fillIndex = last_fill_index; + + if (opts->fFlag) + { + last_fill_index++; + genSetState (o, ind, fillIndex); + } + + if (opts->fill_use && n > 0) + { + o.wind(ind); + if (n == 1) + { + if (opts->fill_check) + { + o.ws("if (").wstring(opts->input_api.expr_lessthan_one ()).ws(") "); + } + genYYFill(o, n); + } + else + { + if (opts->fill_check) + { + o.ws("if (").wstring(opts->input_api.expr_lessthan (n)).ws(") "); + } + genYYFill(o, n); + } + } + + if (opts->fFlag) + { + o.wstring(opts->yyfilllabel).wu32(fillIndex).ws(":\n"); + } + + if (n > 0) + { + if (bSetMarker) + { + o.wstring(opts->input_api.stmt_backup_peek (ind)); + } + else + { + o.wstring(opts->input_api.stmt_peek (ind)); + } + readCh = false; + } +} + +void genYYFill (OutputFile & o, size_t need) +{ + o.wstring(replaceParam (opts->fill, opts->fill_arg, need)); + if (!opts->fill_naked) + { + if (opts->fill_arg_use) + { + o.ws("(").wu64(need).ws(")"); + } + o.ws(";"); + } + o.ws("\n"); +} + +void genSetCondition(OutputFile & o, uint32_t ind, const std::string& newcond) +{ + o.wind(ind).wstring(replaceParam (opts->cond_set, opts->cond_set_arg, opts->condEnumPrefix + newcond)); + if (!opts->cond_set_naked) + { + o.ws("(").wstring(opts->condEnumPrefix).wstring(newcond).ws(");"); + } + o.ws("\n"); +} + +void genSetState(OutputFile & o, uint32_t ind, uint32_t fillIndex) +{ + o.wind(ind).wstring(replaceParam (opts->state_set, opts->state_set_arg, fillIndex)); + if (!opts->state_set_naked) + { + o.ws("(").wu32(fillIndex).ws(");"); + } + o.ws("\n"); +} + +} // namespace re2c diff --git a/tools/re2c/src/codegen/emit_dfa.cc b/tools/re2c/src/codegen/emit_dfa.cc new file mode 100644 index 000000000..ce826d310 --- /dev/null +++ b/tools/re2c/src/codegen/emit_dfa.cc @@ -0,0 +1,348 @@ +#include "src/util/c99_stdint.h" +#include +#include +#include +#include +#include + +#include "src/codegen/bitmap.h" +#include "src/codegen/emit.h" +#include "src/codegen/go.h" +#include "src/codegen/input_api.h" +#include "src/codegen/label.h" +#include "src/codegen/output.h" +#include "src/conf/opt.h" +#include "src/globals.h" +#include "src/ir/adfa/action.h" +#include "src/ir/adfa/adfa.h" +#include "src/ir/skeleton/skeleton.h" +#include "src/util/counter.h" + +namespace re2c +{ + +static std::string genGetCondition (); +static void genCondGotoSub (OutputFile & o, uint32_t ind, const std::vector & condnames, uint32_t cMin, uint32_t cMax); +static void genCondTable (OutputFile & o, uint32_t ind, const std::vector & condnames); +static void genCondGoto (OutputFile & o, uint32_t ind, const std::vector & condnames); +static void emit_state (OutputFile & o, uint32_t ind, const State * s, bool used_label); + +std::string genGetCondition() +{ + return opts->cond_get_naked + ? opts->cond_get + : opts->cond_get + "()"; +} + +void genGoTo(OutputFile & o, uint32_t ind, const State *from, const State *to, bool & readCh) +{ + if (opts->target == opt_t::DOT) + { + o.wlabel(from->label).ws(" -> ").wlabel(to->label).ws("\n"); + return; + } + + if (readCh && from->next != to) + { + o.wstring(opts->input_api.stmt_peek (ind)); + readCh = false; + } + + o.wind(ind).ws("goto ").wstring(opts->labelPrefix).wlabel(to->label).ws(";\n"); +} + +void emit_state (OutputFile & o, uint32_t ind, const State * s, bool used_label) +{ + if (opts->target != opt_t::DOT) + { + if (used_label) + { + o.wstring(opts->labelPrefix).wlabel(s->label).ws(":\n"); + } + if (opts->dFlag && (s->action.type != Action::INITIAL)) + { + o.wind(ind).wstring(opts->yydebug).ws("(").wlabel(s->label).ws(", ").wstring(opts->input_api.expr_peek ()).ws(");\n"); + } + } +} + +void DFA::count_used_labels (std::set & used, label_t start, label_t initial, bool force_start) const +{ + // In '-f' mode, default state is always state 0 + if (opts->fFlag) + { + used.insert (label_t::first ()); + } + if (force_start) + { + used.insert (start); + } + for (State * s = head; s; s = s->next) + { + s->go.used_labels (used); + } + for (uint32_t i = 0; i < accepts.size (); ++i) + { + used.insert (accepts[i]->label); + } + // must go last: it needs the set of used labels + if (used.count (head->label)) + { + used.insert (initial); + } +} + +void DFA::emit_body (OutputFile & o, uint32_t& ind, const std::set & used_labels, label_t initial) const +{ + // If DFA has transitions to initial state, then initial state + // has a piece of code that advances input position. Wee must + // skip it when entering DFA. + if (used_labels.count(head->label)) + { + o.wind(ind).ws("goto ").wstring(opts->labelPrefix).wlabel(initial).ws(";\n"); + } + + const bool save_yyaccept = accepts.size () > 1; + for (State * s = head; s; s = s->next) + { + bool readCh = false; + emit_state (o, ind, s, used_labels.count (s->label) != 0); + emit_action (s->action, o, ind, readCh, s, cond, skeleton, used_labels, save_yyaccept); + s->go.emit(o, ind, readCh); + } +} + +void DFA::emit(Output & output, uint32_t& ind, bool isLastCond, bool& bPrologBrace) +{ + OutputFile & o = output.source; + + bool bProlog = (!opts->cFlag || !bWroteCondCheck); + + // start_label points to the beginning of current re2c block + // (prior to condition dispatch in '-c' mode) + // it can forced by configuration 're2c:startlabel = ;' + label_t start_label = o.label_counter.next (); + // initial_label points to the beginning of DFA + // in '-c' mode this is NOT equal to start_label + label_t initial_label = bProlog && opts->cFlag + ? o.label_counter.next () + : start_label; + for (State * s = head; s; s = s->next) + { + s->label = o.label_counter.next (); + } + std::set used_labels; + count_used_labels (used_labels, start_label, initial_label, o.get_force_start_label ()); + + head->action.set_initial (initial_label, head->action.type == Action::SAVE); + + skeleton->warn_undefined_control_flow (); + skeleton->warn_unreachable_rules (); + skeleton->warn_match_empty (); + + if (opts->target == opt_t::SKELETON) + { + if (output.skeletons.insert (name).second) + { + skeleton->emit_data (o.file_name); + skeleton->emit_start (o, max_fill, need_backup, need_backupctx, need_accept); + uint32_t i = 2; + emit_body (o, i, used_labels, initial_label); + skeleton->emit_end (o, need_backup, need_backupctx); + } + } + else + { + // Generate prolog + if (bProlog) + { + o.ws("\n").wdelay_line_info (); + if (opts->target == opt_t::DOT) + { + bPrologBrace = true; + o.ws("digraph re2c {\n"); + } + else if ((!opts->fFlag && o.get_used_yyaccept ()) + || (!opts->fFlag && opts->bEmitYYCh) + || (opts->bFlag && !opts->cFlag && BitMap::first) + || (opts->cFlag && !bWroteCondCheck && opts->gFlag) + || (opts->fFlag && !bWroteGetState && opts->gFlag) + ) + { + bPrologBrace = true; + o.wind(ind++).ws("{\n"); + } + else if (ind == 0) + { + ind = 1; + } + if (!opts->fFlag && opts->target != opt_t::DOT) + { + if (opts->bEmitYYCh) + { + o.wind(ind).wstring(opts->yyctype).ws(" ").wstring(opts->yych).ws(";\n"); + } + o.wdelay_yyaccept_init (ind); + } + else + { + o.ws("\n"); + } + } + if (opts->bFlag && !opts->cFlag && BitMap::first) + { + BitMap::gen(o, ind, lbChar, ubChar <= 256 ? ubChar : 256); + } + if (bProlog) + { + if (opts->cFlag && !bWroteCondCheck && opts->gFlag) + { + genCondTable(o, ind, output.types); + } + o.wdelay_state_goto (ind); + if (opts->cFlag && opts->target != opt_t::DOT) + { + if (used_labels.count(start_label)) + { + o.wstring(opts->labelPrefix).wlabel(start_label).ws(":\n"); + } + } + o.wuser_start_label (); + if (opts->cFlag && !bWroteCondCheck) + { + genCondGoto(o, ind, output.types); + } + } + if (opts->cFlag && !cond.empty()) + { + if (opts->condDivider.length()) + { + o.wstring(replaceParam(opts->condDivider, opts->condDividerParam, cond)).ws("\n"); + } + if (opts->target == opt_t::DOT) + { + o.wstring(cond).ws(" -> ").wlabel(head->label).ws("\n"); + } + else + { + o.wstring(opts->condPrefix).wstring(cond).ws(":\n"); + } + } + if (opts->cFlag && opts->bFlag && BitMap::first) + { + o.wind(ind++).ws("{\n"); + BitMap::gen(o, ind, lbChar, ubChar <= 256 ? ubChar : 256); + } + // Generate code + emit_body (o, ind, used_labels, initial_label); + if (opts->cFlag && opts->bFlag && BitMap::first) + { + o.wind(--ind).ws("}\n"); + } + // Generate epilog + if ((!opts->cFlag || isLastCond) && bPrologBrace) + { + o.wind(--ind).ws("}\n"); + } + } + + // Cleanup + if (BitMap::first) + { + delete BitMap::first; + BitMap::first = NULL; + } +} + +void genCondTable(OutputFile & o, uint32_t ind, const std::vector & condnames) +{ + const size_t conds = condnames.size (); + o.wind(ind++).ws("static void *").wstring(opts->yyctable).ws("[").wu64(conds).ws("] = {\n"); + for (size_t i = 0; i < conds; ++i) + { + o.wind(ind).ws("&&").wstring(opts->condPrefix).wstring(condnames[i]).ws(",\n"); + } + o.wind(--ind).ws("};\n"); +} + +void genCondGotoSub(OutputFile & o, uint32_t ind, const std::vector & condnames, uint32_t cMin, uint32_t cMax) +{ + if (cMin == cMax) + { + o.wind(ind).ws("goto ").wstring(opts->condPrefix).wstring(condnames[cMin]).ws(";\n"); + } + else + { + uint32_t cMid = cMin + ((cMax - cMin + 1) / 2); + + o.wind(ind).ws("if (").wstring(genGetCondition()).ws(" < ").wu32(cMid).ws(") {\n"); + genCondGotoSub(o, ind + 1, condnames, cMin, cMid - 1); + o.wind(ind).ws("} else {\n"); + genCondGotoSub(o, ind + 1, condnames, cMid, cMax); + o.wind(ind).ws("}\n"); + } +} + +/* + * note [condition order] + * + * In theory re2c makes no guarantee about the order of conditions in + * the generated lexer. Users should define condition type 'YYCONDTYPE' + * and use values of this type with 'YYGETCONDITION' and 'YYSETCONDITION'. + * This way code is independent of internal re2c condition numbering. + * + * However, it is possible to manually hardcode condition numbers and make + * re2c generate condition dispatch without explicit use of condition names + * (nested 'if' statements with '-b' or computed 'goto' table with '-g'). + * This code is syntactically valid (compiles), but unsafe: + * - change of re2c options may break compilation + * - change of internal re2c condition numbering may break runtime + * + * re2c has to preserve the existing numbering scheme. + * + * re2c warns about implicit assumptions about condition order, unless: + * - condition type is defined with 'types:re2c' or '-t, --type-header' + * - dispatch is independent of condition order: either it uses + * explicit condition names or there's only one condition and + * dispatch shrinks to unconditional jump + */ +void genCondGoto(OutputFile & o, uint32_t ind, const std::vector & condnames) +{ + const size_t conds = condnames.size (); + if (opts->target == opt_t::DOT) + { + o.warn_condition_order = false; // see note [condition order] + for (size_t i = 0; i < conds; ++i) + { + const std::string cond = condnames[i]; + o.ws("0 -> ").wstring(cond).ws(" [label=\"state=").wstring(cond).ws("\"]\n"); + } + } + else if (opts->gFlag) + { + o.wind(ind).ws("goto *").wstring(opts->yyctable).ws("[").wstring(genGetCondition()).ws("];\n"); + } + else if (opts->sFlag) + { + if (conds == 1) + { + o.warn_condition_order = false; // see note [condition order] + } + genCondGotoSub(o, ind, condnames, 0, static_cast (conds) - 1); + } + else + { + o.warn_condition_order = false; // see note [condition order] + o.wind(ind).ws("switch (").wstring(genGetCondition()).ws(") {\n"); + for (size_t i = 0; i < conds; ++i) + { + const std::string & cond = condnames[i]; + o.wind(ind).ws("case ").wstring(opts->condEnumPrefix).wstring(cond).ws(": goto ").wstring(opts->condPrefix).wstring(cond).ws(";\n"); + } + o.wind(ind).ws("}\n"); + } + o.wdelay_warn_condition_order (); + bWroteCondCheck = true; +} + +} // end namespace re2c diff --git a/tools/re2c/src/codegen/go.h b/tools/re2c/src/codegen/go.h new file mode 100644 index 000000000..21f1be979 --- /dev/null +++ b/tools/re2c/src/codegen/go.h @@ -0,0 +1,216 @@ +#ifndef _RE2C_CODEGEN_GO_ +#define _RE2C_CODEGEN_GO_ + +#include +#include +#include + +#include "src/codegen/output.h" +#include "src/util/c99_stdint.h" +#include "src/util/forbid_copy.h" + +namespace re2c +{ + +class BitMap; +struct State; +struct If; + +struct Span +{ + uint32_t ub; + State * to; + + FORBID_COPY (Span); +}; + +struct Case +{ + std::vector > ranges; + const State * to; + void emit (OutputFile & o, uint32_t ind); + + inline Case () + : ranges () + , to (NULL) + {} + + FORBID_COPY (Case); +}; + +struct Cases +{ + const State * def; + Case * cases; + uint32_t cases_size; + void add (uint32_t lb, uint32_t ub, State * to); + Cases (const Span * s, uint32_t n); + ~Cases (); + void emit (OutputFile & o, uint32_t ind, bool & readCh); + void used_labels (std::set & used); + + FORBID_COPY (Cases); +}; + +struct Cond +{ + std::string compare; + uint32_t value; + Cond (const std::string & cmp, uint32_t val); +}; + +struct Binary +{ + Cond * cond; + If * thn; + If * els; + Binary (const Span * s, uint32_t n, const State * next); + ~Binary (); + void emit (OutputFile & o, uint32_t ind, bool & readCh); + void used_labels (std::set & used); + + FORBID_COPY (Binary); +}; + +struct Linear +{ + std::vector > branches; + Linear (const Span * s, uint32_t n, const State * next); + ~Linear (); + void emit (OutputFile & o, uint32_t ind, bool & readCh); + void used_labels (std::set & used); +}; + +struct If +{ + enum type_t + { + BINARY, + LINEAR + } type; + union + { + Binary * binary; + Linear * linear; + } info; + If (type_t t, const Span * sp, uint32_t nsp, const State * next); + ~If (); + void emit (OutputFile & o, uint32_t ind, bool & readCh); + void used_labels (std::set & used); +}; + +struct SwitchIf +{ + enum + { + SWITCH, + IF + } type; + union + { + Cases * cases; + If * ifs; + } info; + SwitchIf (const Span * sp, uint32_t nsp, const State * next); + ~SwitchIf (); + void emit (OutputFile & o, uint32_t ind, bool & readCh); + void used_labels (std::set & used); +}; + +struct GoBitmap +{ + const BitMap * bitmap; + const State * bitmap_state; + SwitchIf * hgo; + SwitchIf * lgo; + GoBitmap (const Span * span, uint32_t nSpans, const Span * hspan, uint32_t hSpans, const BitMap * bm, const State * bm_state, const State * next); + ~GoBitmap (); + void emit (OutputFile & o, uint32_t ind, bool & readCh); + void used_labels (std::set & used); + + FORBID_COPY (GoBitmap); +}; + +struct CpgotoTable +{ + static const uint32_t TABLE_SIZE; + const State ** table; + CpgotoTable (const Span * span, uint32_t nSpans); + ~CpgotoTable (); + void emit (OutputFile & o, uint32_t ind); + void used_labels (std::set & used); + +private: + label_t max_label () const; + + FORBID_COPY (CpgotoTable); +}; + +struct Cpgoto +{ + SwitchIf * hgo; + CpgotoTable * table; + Cpgoto (const Span * span, uint32_t nSpans, const Span * hspan, uint32_t hSpans, const State * next); + ~Cpgoto (); + void emit (OutputFile & o, uint32_t ind, bool & readCh); + void used_labels (std::set & used); + + FORBID_COPY (Cpgoto); +}; + +struct Dot +{ + const State * from; + Cases * cases; + Dot (const Span * sp, uint32_t nsp, const State * from); + ~Dot (); + void emit (OutputFile & o); + + FORBID_COPY (Dot); +}; + +struct Go +{ + uint32_t nSpans; // number of spans + Span * span; + enum + { + EMPTY, + SWITCH_IF, + BITMAP, + CPGOTO, + DOT + } type; + union + { + SwitchIf * switchif; + GoBitmap * bitmap; + Cpgoto * cpgoto; + Dot * dot; + } info; + + Go (); + ~Go (); + void init (const State * from); + void emit (OutputFile & o, uint32_t ind, bool & readCh); + void used_labels (std::set & used); + + Go (const Go & g) + : nSpans (g.nSpans) + , span (g.span) + , type (g.type) + , info (g.info) + {} + Go & operator = (const Go & g) + { + nSpans = g.nSpans; + span = g.span; + type = g.type; + info = g.info; + return * this; + } +}; + +} // namespace re2c + +#endif // _RE2C_CODEGEN_GO_ diff --git a/tools/re2c/src/codegen/go_construct.cc b/tools/re2c/src/codegen/go_construct.cc new file mode 100644 index 000000000..e680475f0 --- /dev/null +++ b/tools/re2c/src/codegen/go_construct.cc @@ -0,0 +1,284 @@ +#include +#include "src/util/c99_stdint.h" +#include +#include +#include + +#include "src/codegen/bitmap.h" +#include "src/codegen/go.h" +#include "src/conf/opt.h" +#include "src/globals.h" +#include "src/ir/adfa/adfa.h" +#include "src/util/allocate.h" + +namespace re2c +{ + +static uint32_t unmap (Span * new_span, const Span * old_span, uint32_t old_nspans, const State * x); + +Cases::Cases (const Span * span, uint32_t span_size) + : def (span_size == 0 ? NULL : span[span_size - 1].to) + , cases (new Case[span_size]) + , cases_size (0) +{ + for (uint32_t i = 0, lb = 0; i < span_size; ++ i) + { + add (lb, span[i].ub, span[i].to); + lb = span[i].ub; + } +} + +void Cases::add (uint32_t lb, uint32_t ub, State * to) +{ + for (uint32_t i = 0; i < cases_size; ++i) + { + if (cases[i].to == to) + { + cases[i].ranges.push_back (std::make_pair (lb, ub)); + return; + } + } + cases[cases_size].ranges.push_back (std::make_pair (lb, ub)); + cases[cases_size].to = to; + ++cases_size; +} + +Cond::Cond (const std::string & cmp, uint32_t val) + : compare (cmp) + , value (val) +{} + +Binary::Binary (const Span * s, uint32_t n, const State * next) + : cond (NULL) + , thn (NULL) + , els (NULL) +{ + const uint32_t l = n / 2; + const uint32_t h = n - l; + cond = new Cond ("<=", s[l - 1].ub - 1); + thn = new If (l > 4 ? If::BINARY : If::LINEAR, &s[0], l, next); + els = new If (h > 4 ? If::BINARY : If::LINEAR, &s[l], h, next); +} + +Linear::Linear (const Span * s, uint32_t n, const State * next) + : branches () +{ + for (;;) + { + const State *bg = s[0].to; + while (n >= 3 && s[2].to == bg && (s[1].ub - s[0].ub) == 1) + { + if (s[1].to == next && n == 3) + { + branches.push_back (std::make_pair (new Cond ("!=", s[0].ub), bg)); + return ; + } + else + { + branches.push_back (std::make_pair (new Cond ("==", s[0].ub), s[1].to)); + } + n -= 2; + s += 2; + } + if (n == 1) + { + if (next == NULL || s[0].to != next) + { + branches.push_back (std::make_pair (static_cast (NULL), s[0].to)); + } + return; + } + else if (n == 2 && bg == next) + { + branches.push_back (std::make_pair (new Cond (">=", s[0].ub), s[1].to)); + return; + } + else + { + branches.push_back (std::make_pair (new Cond ("<=", s[0].ub - 1), bg)); + n -= 1; + s += 1; + } + } +} + +If::If (type_t t, const Span * sp, uint32_t nsp, const State * next) + : type (t) + , info () +{ + switch (type) + { + case BINARY: + info.binary = new Binary (sp, nsp, next); + break; + case LINEAR: + info.linear = new Linear (sp, nsp, next); + break; + } +} + +SwitchIf::SwitchIf (const Span * sp, uint32_t nsp, const State * next) + : type (IF) + , info () +{ + if ((!opts->sFlag && nsp > 2) || (nsp > 8 && (sp[nsp - 2].ub - sp[0].ub <= 3 * (nsp - 2)))) + { + type = SWITCH; + info.cases = new Cases (sp, nsp); + } + else if (nsp > 5) + { + info.ifs = new If (If::BINARY, sp, nsp, next); + } + else + { + info.ifs = new If (If::LINEAR, sp, nsp, next); + } +} + +GoBitmap::GoBitmap (const Span * span, uint32_t nSpans, const Span * hspan, uint32_t hSpans, const BitMap * bm, const State * bm_state, const State * next) + : bitmap (bm) + , bitmap_state (bm_state) + , hgo (NULL) + , lgo (NULL) +{ + Span * bspan = allocate (nSpans); + uint32_t bSpans = unmap (bspan, span, nSpans, bm_state); + lgo = bSpans == 0 + ? NULL + : new SwitchIf (bspan, bSpans, next); + // if there are any low spans, then next state for high spans + // must be NULL to trigger explicit goto generation in linear 'if' + hgo = hSpans == 0 + ? NULL + : new SwitchIf (hspan, hSpans, lgo ? NULL : next); + operator delete (bspan); +} + +const uint32_t CpgotoTable::TABLE_SIZE = 0x100; + +CpgotoTable::CpgotoTable (const Span * span, uint32_t nSpans) + : table (new const State * [TABLE_SIZE]) +{ + uint32_t c = 0; + for (uint32_t i = 0; i < nSpans; ++i) + { + for(; c < span[i].ub && c < TABLE_SIZE; ++c) + { + table[c] = span[i].to; + } + } +} + +Cpgoto::Cpgoto (const Span * span, uint32_t nSpans, const Span * hspan, uint32_t hSpans, const State * next) + : hgo (hSpans == 0 ? NULL : new SwitchIf (hspan, hSpans, next)) + , table (new CpgotoTable (span, nSpans)) +{} + +Dot::Dot (const Span * sp, uint32_t nsp, const State * s) + : from (s) + , cases (new Cases (sp, nsp)) +{} + +Go::Go () + : nSpans (0) + , span (NULL) + , type (EMPTY) + , info () +{} + +void Go::init (const State * from) +{ + if (nSpans == 0) + { + return; + } + + // initialize high (wide) spans + uint32_t hSpans = 0; + const Span * hspan = NULL; + for (uint32_t i = 0; i < nSpans; ++i) + { + if (span[i].ub > 0x100) + { + hspan = &span[i]; + hSpans = nSpans - i; + break; + } + } + + // initialize bitmaps + uint32_t nBitmaps = 0; + const BitMap * bitmap = NULL; + const State * bitmap_state = NULL; + for (uint32_t i = 0; i < nSpans; ++i) + { + if (span[i].to->isBase) + { + const BitMap *b = BitMap::find (span[i].to); + if (b && matches(b->go->span, b->go->nSpans, b->on, span, nSpans, span[i].to)) + { + if (bitmap == NULL) + { + bitmap = b; + bitmap_state = span[i].to; + } + nBitmaps++; + } + } + } + + const uint32_t dSpans = nSpans - hSpans - nBitmaps; + if (opts->target == opt_t::DOT) + { + type = DOT; + info.dot = new Dot (span, nSpans, from); + } + else if (opts->gFlag && (dSpans >= opts->cGotoThreshold)) + { + type = CPGOTO; + info.cpgoto = new Cpgoto (span, nSpans, hspan, hSpans, from->next); + } + else if (opts->bFlag && (nBitmaps > 0)) + { + type = BITMAP; + info.bitmap = new GoBitmap (span, nSpans, hspan, hSpans, bitmap, bitmap_state, from->next); + bUsedYYBitmap = true; + } + else + { + type = SWITCH_IF; + info.switchif = new SwitchIf (span, nSpans, from->next); + } +} + +/* + * Find all spans, that map to the given state. For each of them, + * find upper adjacent span, that maps to another state (if such + * span exists, otherwize try lower one). + * If input contains single span that maps to the given state, + * then output contains 0 spans. + */ +uint32_t unmap (Span * new_span, const Span * old_span, uint32_t old_nspans, const State * x) +{ + uint32_t new_nspans = 0; + for (uint32_t i = 0; i < old_nspans; ++i) + { + if (old_span[i].to != x) + { + if (new_nspans > 0 && new_span[new_nspans - 1].to == old_span[i].to) + new_span[new_nspans - 1].ub = old_span[i].ub; + else + { + new_span[new_nspans].to = old_span[i].to; + new_span[new_nspans].ub = old_span[i].ub; + ++new_nspans; + } + } + } + if (new_nspans > 0) + new_span[new_nspans - 1].ub = old_span[old_nspans - 1].ub; + return new_nspans; +} + +} // namespace re2c diff --git a/tools/re2c/src/codegen/go_destruct.cc b/tools/re2c/src/codegen/go_destruct.cc new file mode 100644 index 000000000..0160d48f7 --- /dev/null +++ b/tools/re2c/src/codegen/go_destruct.cc @@ -0,0 +1,99 @@ +#include "src/util/c99_stdint.h" +#include +#include + +#include "src/codegen/go.h" + +namespace re2c +{ + +Cases::~Cases () +{ + delete [] cases; +} + +Binary::~Binary () +{ + delete cond; + delete thn; + delete els; +} + +Linear::~Linear () +{ + for (uint32_t i = 0; i < branches.size (); ++i) + { + delete branches[i].first; + } +} + +If::~If () +{ + switch (type) + { + case BINARY: + delete info.binary; + break; + case LINEAR: + delete info.linear; + break; + } +} + +SwitchIf::~SwitchIf () +{ + switch (type) + { + case SWITCH: + delete info.cases; + break; + case IF: + delete info.ifs; + break; + } +} + +GoBitmap::~GoBitmap () +{ + delete hgo; + delete lgo; +} + +CpgotoTable::~CpgotoTable () +{ + delete [] table; +} + +Cpgoto::~Cpgoto () +{ + delete hgo; + delete table; +} + +Dot::~Dot () +{ + delete cases; +} + +Go::~Go () +{ + switch (type) + { + case EMPTY: + break; + case SWITCH_IF: + delete info.switchif; + break; + case BITMAP: + delete info.bitmap; + break; + case CPGOTO: + delete info.cpgoto; + break; + case DOT: + delete info.dot; + break; + } +} + +} // namespace re2c diff --git a/tools/re2c/src/codegen/go_emit.cc b/tools/re2c/src/codegen/go_emit.cc new file mode 100644 index 000000000..e970eee56 --- /dev/null +++ b/tools/re2c/src/codegen/go_emit.cc @@ -0,0 +1,271 @@ +#include +#include "src/util/c99_stdint.h" +#include +#include +#include + +#include "src/codegen/bitmap.h" +#include "src/codegen/go.h" +#include "src/codegen/input_api.h" +#include "src/codegen/label.h" +#include "src/codegen/output.h" +#include "src/codegen/print.h" +#include "src/conf/opt.h" +#include "src/globals.h" +#include "src/ir/adfa/adfa.h" +#include "src/ir/regexp/encoding/enc.h" + +namespace re2c +{ + +static void output_if (OutputFile & o, uint32_t ind, bool & readCh, const std::string & compare, uint32_t value); +static void output_goto (OutputFile & o, uint32_t ind, bool & readCh, label_t to); +static std::string output_yych (bool & readCh); +static std::string output_hgo (OutputFile & o, uint32_t ind, bool & readCh, SwitchIf * hgo); + +std::string output_yych (bool & readCh) +{ + if (readCh) + { + readCh = false; + return "(" + opts->input_api.expr_peek_save () + ")"; + } + else + { + return opts->yych; + } +} + +void output_if (OutputFile & o, uint32_t ind, bool & readCh, const std::string & compare, uint32_t value) +{ + o.wind(ind).ws("if (").wstring(output_yych (readCh)).ws(" ").wstring(compare).ws(" ").wc_hex (value).ws(") "); +} + +void output_goto (OutputFile & o, uint32_t ind, bool & readCh, label_t to) +{ + if (readCh) + { + o.wstring(opts->input_api.stmt_peek (ind)); + readCh = false; + } + o.wind(ind).ws("goto ").wstring(opts->labelPrefix).wlabel(to).ws(";\n"); +} + +std::string output_hgo (OutputFile & o, uint32_t ind, bool & readCh, SwitchIf * hgo) +{ + std::string yych = output_yych (readCh); + if (hgo != NULL) + { + o.wind(ind).ws("if (").wstring(yych).ws(" & ~0xFF) {\n"); + hgo->emit (o, ind + 1, readCh); + o.wind(ind).ws("} else "); + yych = opts->yych; + } + else + { + o.wind(ind); + } + return yych; +} + +void Case::emit (OutputFile & o, uint32_t ind) +{ + for (uint32_t i = 0; i < ranges.size (); ++i) + { + for (uint32_t b = ranges[i].first; b < ranges[i].second; ++b) + { + o.wind(ind).ws("case ").wc_hex (b).ws(":"); + if (opts->dFlag && opts->encoding.type () == Enc::EBCDIC) + { + const uint32_t c = opts->encoding.decodeUnsafe (b); + if (is_print (c)) + o.ws(" /* ").wc(static_cast (c)).ws(" */"); + } + bool last_case = i == ranges.size () - 1 && b == ranges[i].second - 1; + if (!last_case) + { + o.ws("\n"); + } + } + } +} + +void Cases::emit (OutputFile & o, uint32_t ind, bool & readCh) +{ + o.wind(ind).ws("switch (").wstring(output_yych (readCh)).ws(") {\n"); + for (uint32_t i = 0; i < cases_size; ++i) + { + if (cases[i].to != def) + { + cases[i].emit (o, ind); + output_goto (o, 1, readCh, cases[i].to->label); + } + } + o.wind(ind).ws("default:"); + output_goto (o, 1, readCh, def->label); + o.wind(ind).ws("}\n"); +} + +void Binary::emit (OutputFile & o, uint32_t ind, bool & readCh) +{ + output_if (o, ind, readCh, cond->compare, cond->value); + o.ws("{\n"); + thn->emit (o, ind + 1, readCh); + o.wind(ind).ws("} else {\n"); + els->emit (o, ind + 1, readCh); + o.wind(ind).ws("}\n"); +} + +void Linear::emit (OutputFile & o, uint32_t ind, bool & readCh) +{ + for (uint32_t i = 0; i < branches.size (); ++i) + { + if (branches[i].first != NULL) + { + output_if (o, ind, readCh, branches[i].first->compare, branches[i].first->value); + output_goto (o, 0, readCh, branches[i].second->label); + } + else + { + output_goto (o, ind, readCh, branches[i].second->label); + } + } +} + +void If::emit (OutputFile & o, uint32_t ind, bool & readCh) +{ + switch (type) + { + case BINARY: + info.binary->emit (o, ind, readCh); + break; + case LINEAR: + info.linear->emit (o, ind, readCh); + break; + } +} + +void SwitchIf::emit (OutputFile & o, uint32_t ind, bool & readCh) +{ + switch (type) + { + case SWITCH: + info.cases->emit (o, ind, readCh); + break; + case IF: + info.ifs->emit (o, ind, readCh); + break; + } +} + +void GoBitmap::emit (OutputFile & o, uint32_t ind, bool & readCh) +{ + std::string yych = output_hgo (o, ind, readCh, hgo); + o.ws("if (").wstring(opts->yybm).ws("[").wu32(bitmap->i).ws("+").wstring(yych).ws("] & "); + if (opts->yybmHexTable) + { + o.wu32_hex(bitmap->m); + } + else + { + o.wu32(bitmap->m); + } + o.ws(") {\n"); + output_goto (o, ind + 1, readCh, bitmap_state->label); + o.wind(ind).ws("}\n"); + if (lgo != NULL) + { + lgo->emit (o, ind, readCh); + } +} + +label_t CpgotoTable::max_label () const +{ + label_t max = label_t::first (); + for (uint32_t i = 0; i < TABLE_SIZE; ++i) + { + if (max < table[i]->label) + { + max = table[i]->label; + }; + } + return max; +} + +void CpgotoTable::emit (OutputFile & o, uint32_t ind) +{ + o.wind(ind).ws("static void *").wstring(opts->yytarget).ws("[256] = {\n"); + o.wind(++ind); + const uint32_t max_digits = max_label ().width (); + for (uint32_t i = 0; i < TABLE_SIZE; ++i) + { + o.ws("&&").wstring(opts->labelPrefix).wlabel(table[i]->label); + if (i == TABLE_SIZE - 1) + { + o.ws("\n"); + } + else if (i % 8 == 7) + { + o.ws(",\n").wind(ind); + } + else + { + const uint32_t padding = max_digits - table[i]->label.width () + 1; + o.ws(",").wstring(std::string (padding, ' ')); + } + } + o.wind(--ind).ws("};\n"); +} + +void Cpgoto::emit (OutputFile & o, uint32_t ind, bool & readCh) +{ + std::string yych = output_hgo (o, ind, readCh, hgo); + o.ws("{\n"); + table->emit (o, ++ind); + o.wind(ind).ws("goto *").wstring(opts->yytarget).ws("[").wstring(yych).ws("];\n"); + o.wind(--ind).ws("}\n"); +} + +void Dot::emit (OutputFile & o) +{ + const uint32_t n = cases->cases_size; + if (n == 1) + { + o.wlabel(from->label).ws(" -> ").wlabel(cases->cases[0].to->label).ws("\n"); + } + else + { + for (uint32_t i = 0; i < n; ++i) + { + o.wlabel(from->label).ws(" -> ").wlabel(cases->cases[i].to->label).ws(" [label=\""); + for (uint32_t j = 0; j < cases->cases[i].ranges.size (); ++j) + { + o.wrange(cases->cases[i].ranges[j].first, cases->cases[i].ranges[j].second); + } + o.ws("\"]\n"); + } + } +} + +void Go::emit (OutputFile & o, uint32_t ind, bool & readCh) +{ + switch (type) + { + case EMPTY: + break; + case SWITCH_IF: + info.switchif->emit (o, ind, readCh); + break; + case BITMAP: + info.bitmap->emit (o, ind, readCh); + break; + case CPGOTO: + info.cpgoto->emit (o, ind, readCh); + break; + case DOT: + info.dot->emit (o); + break; + } +} + +} // namespace re2c diff --git a/tools/re2c/src/codegen/go_used_labels.cc b/tools/re2c/src/codegen/go_used_labels.cc new file mode 100644 index 000000000..09cf98ea8 --- /dev/null +++ b/tools/re2c/src/codegen/go_used_labels.cc @@ -0,0 +1,111 @@ +#include +#include "src/util/c99_stdint.h" +#include +#include +#include + +#include "src/codegen/go.h" +#include "src/codegen/label.h" +#include "src/ir/adfa/adfa.h" + +namespace re2c +{ + +void Cases::used_labels (std::set & used) +{ + for (uint32_t i = 0; i < cases_size; ++i) + { + used.insert (cases[i].to->label); + } +} + +void Binary::used_labels (std::set & used) +{ + thn->used_labels (used); + els->used_labels (used); +} + +void Linear::used_labels (std::set & used) +{ + for (uint32_t i = 0; i < branches.size (); ++i) + { + used.insert (branches[i].second->label); + } +} + +void If::used_labels (std::set & used) +{ + switch (type) + { + case BINARY: + info.binary->used_labels (used); + break; + case LINEAR: + info.linear->used_labels (used); + break; + } +} + +void SwitchIf::used_labels (std::set & used) +{ + switch (type) + { + case SWITCH: + info.cases->used_labels (used); + break; + case IF: + info.ifs->used_labels (used); + break; + } +} + +void GoBitmap::used_labels (std::set & used) +{ + if (hgo != NULL) + { + hgo->used_labels (used); + } + used.insert (bitmap_state->label); + if (lgo != NULL) + { + lgo->used_labels (used); + } +} + +void CpgotoTable::used_labels (std::set & used) +{ + for (uint32_t i = 0; i < TABLE_SIZE; ++i) + { + used.insert (table[i]->label); + } +} + +void Cpgoto::used_labels (std::set & used) +{ + if (hgo != NULL) + { + hgo->used_labels (used); + } + table->used_labels (used); +} + +void Go::used_labels (std::set & used) +{ + switch (type) + { + case EMPTY: + case DOT: + break; + case SWITCH_IF: + info.switchif->used_labels (used); + break; + case BITMAP: + info.bitmap->used_labels (used); + break; + case CPGOTO: + info.cpgoto->used_labels (used); + break; + } +} + +} // namespace re2c diff --git a/tools/re2c/src/codegen/indent.h b/tools/re2c/src/codegen/indent.h new file mode 100644 index 000000000..d2d8f369a --- /dev/null +++ b/tools/re2c/src/codegen/indent.h @@ -0,0 +1,24 @@ +#ifndef _RE2C_CODEGEN_INDENT_ +#define _RE2C_CODEGEN_INDENT_ + +#include + +#include "src/globals.h" + +namespace re2c +{ + +inline std::string indent (uint32_t ind) +{ + std::string str; + + while (opts->target != opt_t::DOT && ind-- > 0) + { + str += opts->indString; + } + return str; +} + +} // end namespace re2c + +#endif // _RE2C_CODEGEN_INDENT_ diff --git a/tools/re2c/src/codegen/input_api.cc b/tools/re2c/src/codegen/input_api.cc new file mode 100644 index 000000000..f2e32c387 --- /dev/null +++ b/tools/re2c/src/codegen/input_api.cc @@ -0,0 +1,175 @@ +#include + +#include "src/codegen/input_api.h" +#include "src/codegen/indent.h" +#include "src/conf/opt.h" +#include "src/globals.h" + +namespace re2c +{ + +InputAPI::InputAPI () + : type_ (DEFAULT) +{} + +InputAPI::type_t InputAPI::type () const +{ + return type_; +} + +void InputAPI::set (type_t t) +{ + type_ = t; +} + +std::string InputAPI::expr_peek () const +{ + std::string s; + switch (type_) + { + case DEFAULT: + s = "*" + opts->yycursor; + break; + case CUSTOM: + s = opts->yypeek + " ()"; + break; + } + return s; +} + +std::string InputAPI::expr_peek_save () const +{ + return opts->yych + " = " + opts.yychConversion () + expr_peek (); +} + +std::string InputAPI::stmt_peek (uint32_t ind) const +{ + return indent (ind) + expr_peek_save () + ";\n"; +} + +std::string InputAPI::stmt_skip (uint32_t ind) const +{ + std::string s; + switch (type_) + { + case DEFAULT: + s = "++" + opts->yycursor; + break; + case CUSTOM: + s = opts->yyskip + " ()"; + break; + } + return indent (ind) + s + ";\n"; +} + +std::string InputAPI::stmt_backup (uint32_t ind) const +{ + std::string s; + switch (type_) + { + case DEFAULT: + s = opts->yymarker + " = " + opts->yycursor; + break; + case CUSTOM: + s = opts->yybackup + " ()"; + break; + } + return indent (ind) + s + ";\n"; +} + +std::string InputAPI::stmt_backupctx (uint32_t ind) const +{ + std::string s; + switch (type_) + { + case DEFAULT: + s = opts->yyctxmarker + " = " + opts->yycursor; + break; + case CUSTOM: + s = opts->yybackupctx + " ()"; + break; + } + return indent (ind) + s + ";\n"; +} + +std::string InputAPI::stmt_restore (uint32_t ind) const +{ + std::string s; + switch (type_) + { + case DEFAULT: + s = opts->yycursor + " = " + opts->yymarker; + break; + case CUSTOM: + s = opts->yyrestore + " ()"; + break; + } + return indent (ind) + s + ";\n"; +} + +std::string InputAPI::stmt_restorectx (uint32_t ind) const +{ + std::string s; + switch (type_) + { + case DEFAULT: + s = indent (ind) + opts->yycursor + " = " + opts->yyctxmarker + ";\n"; + break; + case CUSTOM: + s = indent (ind) + opts->yyrestorectx + " ();\n"; + break; + } + return s; +} + +std::string InputAPI::stmt_skip_peek (uint32_t ind) const +{ + return type_ == DEFAULT + ? indent (ind) + opts->yych + " = " + opts.yychConversion () + "*++" + opts->yycursor + ";\n" + : stmt_skip (ind) + stmt_peek (ind); +} + +std::string InputAPI::stmt_skip_backup (uint32_t ind) const +{ + return type_ == DEFAULT + ? indent (ind) + opts->yymarker + " = ++" + opts->yycursor + ";\n" + : stmt_skip (ind) + stmt_backup (ind); +} + +std::string InputAPI::stmt_backup_peek (uint32_t ind) const +{ + return type_ == DEFAULT + ? indent (ind) + opts->yych + " = " + opts.yychConversion () + "*(" + opts->yymarker + " = " + opts->yycursor + ");\n" + : stmt_backup (ind) + stmt_peek (ind); +} + +std::string InputAPI::stmt_skip_backup_peek (uint32_t ind) const +{ + return type_ == DEFAULT + ? indent (ind) + opts->yych + " = " + opts.yychConversion () + "*(" + opts->yymarker + " = ++" + opts->yycursor + ");\n" + : stmt_skip (ind) + stmt_backup (ind) + stmt_peek (ind); +} + +std::string InputAPI::expr_lessthan_one () const +{ + return type_ == DEFAULT + ? opts->yylimit + " <= " + opts->yycursor + : expr_lessthan (1); +} + +std::string InputAPI::expr_lessthan (size_t n) const +{ + std::ostringstream s; + switch (type_) + { + case DEFAULT: + s << "(" << opts->yylimit << " - " << opts->yycursor << ") < " << n; + break; + case CUSTOM: + s << opts->yylessthan << " (" << n << ")"; + break; + } + return s.str (); +} + +} // end namespace re2c diff --git a/tools/re2c/src/codegen/input_api.h b/tools/re2c/src/codegen/input_api.h new file mode 100644 index 000000000..423475e6b --- /dev/null +++ b/tools/re2c/src/codegen/input_api.h @@ -0,0 +1,43 @@ +#ifndef _RE2C_CODEGEN_INPUT_API_ +#define _RE2C_CODEGEN_INPUT_API_ + +#include "src/util/c99_stdint.h" +#include + +namespace re2c +{ + +class InputAPI +{ +public: + enum type_t + { DEFAULT + , CUSTOM + }; + +private: + type_t type_; + +public: + InputAPI (); + type_t type () const; + void set (type_t t); + std::string expr_peek () const; + std::string expr_peek_save () const; + std::string stmt_peek (uint32_t ind) const; + std::string stmt_skip (uint32_t ind) const; + std::string stmt_backup (uint32_t ind) const; + std::string stmt_backupctx (uint32_t ind) const; + std::string stmt_restore (uint32_t ind) const; + std::string stmt_restorectx (uint32_t ind) const; + std::string stmt_skip_peek (uint32_t ind) const; + std::string stmt_skip_backup (uint32_t ind) const; + std::string stmt_backup_peek (uint32_t ind) const; + std::string stmt_skip_backup_peek (uint32_t ind) const; + std::string expr_lessthan_one () const; + std::string expr_lessthan (size_t n) const; +}; + +} // end namespace re2c + +#endif // _RE2C_CODEGEN_INPUT_API_ diff --git a/tools/re2c/src/codegen/label.cc b/tools/re2c/src/codegen/label.cc new file mode 100644 index 000000000..c2e384fbf --- /dev/null +++ b/tools/re2c/src/codegen/label.cc @@ -0,0 +1,42 @@ +#include + +#include "src/codegen/label.h" + +namespace re2c { + +const uint32_t label_t::FIRST = 0; + +label_t::label_t () + : value (FIRST) +{} + +void label_t::inc () +{ + ++value; +} + +label_t label_t::first () +{ + return label_t (); +} + +bool label_t::operator < (const label_t & l) const +{ + return value < l.value; +} + +uint32_t label_t::width () const +{ + uint32_t v = value; + uint32_t n = 0; + while (v /= 10) ++n; + return n; +} + +std::ostream & operator << (std::ostream & o, label_t l) +{ + o << l.value; + return o; +} + +} // namespace re2c diff --git a/tools/re2c/src/codegen/label.h b/tools/re2c/src/codegen/label.h new file mode 100644 index 000000000..cb2179d3c --- /dev/null +++ b/tools/re2c/src/codegen/label.h @@ -0,0 +1,39 @@ +#ifndef _RE2C_CODEGEN_LABEL_ +#define _RE2C_CODEGEN_LABEL_ + +#include // ostream + +#include "src/util/c99_stdint.h" + +namespace re2c { + +template class counter_t; + +// label public API: +// - get first label +// - compare labels +// - get label width +// - output label to std::ostream +// +// label private API (for label counter): +// - get initial label +// - get next label +class label_t +{ + static const uint32_t FIRST; + uint32_t value; + label_t (); + void inc (); + +public: + static label_t first (); + bool operator < (const label_t & l) const; + uint32_t width () const; + friend std::ostream & operator << (std::ostream & o, label_t l); + + friend class counter_t; +}; + +} // namespace re2c + +#endif // _RE2C_CODEGEN_LABEL_ diff --git a/tools/re2c/src/codegen/output.cc b/tools/re2c/src/codegen/output.cc new file mode 100644 index 000000000..5276ef77e --- /dev/null +++ b/tools/re2c/src/codegen/output.cc @@ -0,0 +1,465 @@ +#include +#include +#include + +#include "src/codegen/indent.h" +#include "src/codegen/output.h" +#include "src/codegen/print.h" +#include "src/conf/opt.h" +#include "src/conf/warn.h" +#include "src/globals.h" +#include "src/ir/rule_rank.h" + +namespace re2c +{ + +OutputFragment::OutputFragment (type_t t, uint32_t i) + : type (t) + , stream () + , indent (i) +{} + +uint32_t OutputFragment::count_lines () +{ + uint32_t lines = 0; + const std::string content = stream.str (); + const char * p = content.c_str (); + for (uint32_t i = 0; i < content.size (); ++i) + { + if (p[i] == '\n') + { + ++lines; + } + } + return lines; +} + +OutputBlock::OutputBlock () + : fragments () + , used_yyaccept (false) + , force_start_label (false) + , user_start_label () + , line (0) +{ + fragments.push_back (new OutputFragment (OutputFragment::CODE, 0)); +} + +OutputBlock::~OutputBlock () +{ + for (unsigned int i = 0; i < fragments.size (); ++i) + { + delete fragments[i]; + } +} + +OutputFile::OutputFile (const char * fn) + : file_name (fn) + , file (NULL) + , blocks () + , label_counter () + , warn_condition_order (!opts->tFlag) // see note [condition order] +{ + new_block (); +} + +bool OutputFile::open () +{ + if (file_name == NULL) + { + file_name = ""; + file = stdout; + } + else + { + file = fopen (file_name, "wb"); + } + return file != NULL; +} + +OutputFile::~OutputFile () +{ + if (file != NULL && file != stdout) + { + fclose (file); + } + for (unsigned int i = 0; i < blocks.size (); ++i) + { + delete blocks[i]; + } +} + +std::ostream & OutputFile::stream () +{ + return blocks.back ()->fragments.back ()->stream; +} + +OutputFile & OutputFile::wraw (const char * s, size_t n) +{ + stream ().write (s, static_cast (n)); + return *this; +} + +OutputFile & OutputFile::wu32_hex (uint32_t n) +{ + prtHex (stream (), n); + return *this; +} + +OutputFile & OutputFile::wc_hex (uint32_t n) +{ + prtChOrHex (stream (), n); + return *this; +} + +OutputFile & OutputFile::wrange (uint32_t l, uint32_t u) +{ + printSpan (stream (), l, u); + return *this; +} + +OutputFile & OutputFile::wu32_width (uint32_t n, int w) +{ + stream () << std::setw (w); + stream () << n; + return *this; +} + +OutputFile & OutputFile::wline_info (uint32_t l, const char * fn) +{ + output_line_info (stream (), l, fn); + return *this; +} + +OutputFile & OutputFile::wversion_time () +{ + output_version_time (stream ()); + return *this; +} + +OutputFile & OutputFile::wuser_start_label () +{ + const std::string label = blocks.back ()->user_start_label; + if (!label.empty ()) + { + wstring(label).ws(":\n"); + } + return *this; +} + +OutputFile & OutputFile::wc (char c) +{ + stream () << c; + return *this; +} + +OutputFile & OutputFile::wu32 (uint32_t n) +{ + stream () << n; + return *this; +} + +OutputFile & OutputFile::wu64 (uint64_t n) +{ + stream () << n; + return *this; +} + +OutputFile & OutputFile::wstring (const std::string & s) +{ + stream () << s; + return *this; +} + +OutputFile & OutputFile::ws (const char * s) +{ + stream () << s; + return *this; +} + +OutputFile & OutputFile::wlabel (label_t l) +{ + stream () << l; + return *this; +} + +OutputFile & OutputFile::wrank (rule_rank_t r) +{ + stream () << r; + return *this; +} + +OutputFile & OutputFile::wind (uint32_t ind) +{ + stream () << indent(ind); + return *this; +} + +void OutputFile::insert_code () +{ + blocks.back ()->fragments.push_back (new OutputFragment (OutputFragment::CODE, 0)); +} + +OutputFile & OutputFile::wdelay_line_info () +{ + blocks.back ()->fragments.push_back (new OutputFragment (OutputFragment::LINE_INFO, 0)); + insert_code (); + return *this; +} + +OutputFile & OutputFile::wdelay_state_goto (uint32_t ind) +{ + if (opts->fFlag && !bWroteGetState) + { + blocks.back ()->fragments.push_back (new OutputFragment (OutputFragment::STATE_GOTO, ind)); + insert_code (); + bWroteGetState = true; + } + return *this; +} + +OutputFile & OutputFile::wdelay_types () +{ + warn_condition_order = false; // see note [condition order] + blocks.back ()->fragments.push_back (new OutputFragment (OutputFragment::TYPES, 0)); + insert_code (); + return *this; +} + +OutputFile & OutputFile::wdelay_warn_condition_order () +{ + blocks.back ()->fragments.push_back (new OutputFragment (OutputFragment::WARN_CONDITION_ORDER, 0)); + insert_code (); + return *this; +} + +OutputFile & OutputFile::wdelay_yyaccept_init (uint32_t ind) +{ + blocks.back ()->fragments.push_back (new OutputFragment (OutputFragment::YYACCEPT_INIT, ind)); + insert_code (); + return *this; +} + +OutputFile & OutputFile::wdelay_yymaxfill () +{ + blocks.back ()->fragments.push_back (new OutputFragment (OutputFragment::YYMAXFILL, 0)); + insert_code (); + return *this; +} + +void OutputFile::set_used_yyaccept () +{ + blocks.back ()->used_yyaccept = true; +} + +bool OutputFile::get_used_yyaccept () const +{ + return blocks.back ()->used_yyaccept; +} + +void OutputFile::set_force_start_label (bool force) +{ + blocks.back ()->force_start_label = force; +} + +void OutputFile::set_user_start_label (const std::string & label) +{ + blocks.back ()->user_start_label = label; +} + +bool OutputFile::get_force_start_label () const +{ + return blocks.back ()->force_start_label; +} + +void OutputFile::set_block_line (uint32_t l) +{ + blocks.back ()->line = l; +} + +uint32_t OutputFile::get_block_line () const +{ + return blocks.back ()->line; +} + +void OutputFile::new_block () +{ + blocks.push_back (new OutputBlock ()); + insert_code (); +} + +void OutputFile::emit + ( const std::vector & types + , size_t max_fill + ) +{ + if (file != NULL) + { + unsigned int line_count = 1; + for (unsigned int j = 0; j < blocks.size (); ++j) + { + OutputBlock & b = * blocks[j]; + for (unsigned int i = 0; i < b.fragments.size (); ++i) + { + OutputFragment & f = * b.fragments[i]; + switch (f.type) + { + case OutputFragment::CODE: + break; + case OutputFragment::LINE_INFO: + output_line_info (f.stream, line_count + 1, file_name); + break; + case OutputFragment::STATE_GOTO: + output_state_goto (f.stream, f.indent, 0); + break; + case OutputFragment::TYPES: + output_types (f.stream, f.indent, types); + break; + case OutputFragment::WARN_CONDITION_ORDER: + if (warn_condition_order) // see note [condition order] + { + warn.condition_order (b.line); + } + break; + case OutputFragment::YYACCEPT_INIT: + output_yyaccept_init (f.stream, f.indent, b.used_yyaccept); + break; + case OutputFragment::YYMAXFILL: + output_yymaxfill (f.stream, max_fill); + break; + } + std::string content = f.stream.str (); + fwrite (content.c_str (), 1, content.size (), file); + line_count += f.count_lines (); + } + } + } +} + +HeaderFile::HeaderFile (const char * fn) + : stream () + // header is always generated, but not always dumped to file + // NULL filename crashes 'operator <<' on some platforms + // TODO: generate header only if necessary + , file_name (fn ? fn : ".h") + , file (NULL) +{} + +bool HeaderFile::open () +{ + file = fopen (file_name, "wb"); + return file != NULL; +} + +void HeaderFile::emit (const std::vector & types) +{ + output_version_time (stream); + output_line_info (stream, 3, file_name); + stream << "\n"; + output_types (stream, 0, types); +} + +HeaderFile::~HeaderFile () +{ + if (file != NULL) + { + std::string content = stream.str (); + fwrite (content.c_str (), 1, content.size (), file); + fclose (file); + } +} + +Output::Output (const char * source_name, const char * header_name) + : source (source_name) + , header (header_name) + , types () + , skeletons () + , max_fill (1) +{} + +Output::~Output () +{ + if (!warn.error ()) + { + source.emit (types, max_fill); + header.emit (types); + } +} + +void output_state_goto (std::ostream & o, uint32_t ind, uint32_t start_label) +{ + o << indent(ind) << "switch (" << output_get_state() << ") {\n"; + if (opts->bUseStateAbort) + { + o << indent(ind) << "default: abort();\n"; + o << indent(ind) << "case -1: goto " << opts->labelPrefix << start_label << ";\n"; + } + else + { + o << indent(ind) << "default: goto " << opts->labelPrefix << start_label << ";\n"; + } + for (uint32_t i = 0; i < last_fill_index; ++i) + { + o << indent(ind) << "case " << i << ": goto " << opts->yyfilllabel << i << ";\n"; + } + o << indent(ind) << "}\n"; + if (opts->bUseStateNext) + { + o << opts->yynext << ":\n"; + } +} + +void output_yyaccept_init (std::ostream & o, uint32_t ind, bool used_yyaccept) +{ + if (used_yyaccept) + { + o << indent (ind) << "unsigned int " << opts->yyaccept << " = 0;\n"; + } +} + +void output_yymaxfill (std::ostream & o, size_t max_fill) +{ + o << "#define YYMAXFILL " << max_fill << "\n"; +} + +void output_line_info (std::ostream & o, uint32_t line_number, const char * file_name) +{ + if (!opts->iFlag) + { + o << "#line " << line_number << " \"" << file_name << "\"\n"; + } +} + +void output_types (std::ostream & o, uint32_t ind, const std::vector & types) +{ + o << indent (ind++) << "enum " << opts->yycondtype << " {\n"; + for (unsigned int i = 0; i < types.size (); ++i) + { + o << indent (ind) << opts->condEnumPrefix << types[i] << ",\n"; + } + o << indent (--ind) << "};\n"; +} + +void output_version_time (std::ostream & o) +{ + o << "/* Generated by re2c"; + if (opts->version) + { + o << " " << PACKAGE_VERSION; + } + if (!opts->bNoGenerationDate) + { + o << " on "; + time_t now = time (NULL); + o.write (ctime (&now), 24); + } + o << " */" << "\n"; +} + +std::string output_get_state () +{ + return opts->state_get_naked + ? opts->state_get + : opts->state_get + "()"; +} + +} // namespace re2c diff --git a/tools/re2c/src/codegen/output.h b/tools/re2c/src/codegen/output.h new file mode 100644 index 000000000..774fea352 --- /dev/null +++ b/tools/re2c/src/codegen/output.h @@ -0,0 +1,158 @@ +#ifndef _RE2C_CODEGEN_OUTPUT_ +#define _RE2C_CODEGEN_OUTPUT_ + +#include "src/util/c99_stdint.h" +#include +#include +#include +#include +#include +#include +#include + +#include "src/codegen/label.h" +#include "src/util/counter.h" +#include "src/util/forbid_copy.h" + +namespace re2c +{ + +class rule_rank_t; + +struct OutputFragment +{ + enum type_t + { CODE +// , CONFIG + , LINE_INFO + , STATE_GOTO + , TYPES + , WARN_CONDITION_ORDER + , YYACCEPT_INIT + , YYMAXFILL + }; + + type_t type; + std::ostringstream stream; + uint32_t indent; + + OutputFragment (type_t t, uint32_t i); + uint32_t count_lines (); +}; + +struct OutputBlock +{ + std::vector fragments; + bool used_yyaccept; + bool force_start_label; + std::string user_start_label; + uint32_t line; + + OutputBlock (); + ~OutputBlock (); +}; + +struct OutputFile +{ +public: + const char * file_name; + +private: + FILE * file; + std::vector blocks; + +public: + counter_t label_counter; + bool warn_condition_order; + +private: + std::ostream & stream (); + void insert_code (); + +public: + OutputFile (const char * fn); + ~OutputFile (); + + bool open (); + + void new_block (); + + // immediate output + OutputFile & wraw (const char * s, size_t n); + OutputFile & wc (char c); + OutputFile & wc_hex (uint32_t n); + OutputFile & wu32 (uint32_t n); + OutputFile & wu32_hex (uint32_t n); + OutputFile & wu32_width (uint32_t n, int w); + OutputFile & wu64 (uint64_t n); + OutputFile & wstring (const std::string & s); + OutputFile & ws (const char * s); + OutputFile & wlabel (label_t l); + OutputFile & wrank (rule_rank_t l); + OutputFile & wrange (uint32_t u, uint32_t l); + OutputFile & wline_info (uint32_t l, const char * fn); + OutputFile & wversion_time (); + OutputFile & wuser_start_label (); + OutputFile & wind (uint32_t ind); + + // delayed output + OutputFile & wdelay_line_info (); + OutputFile & wdelay_state_goto (uint32_t ind); + OutputFile & wdelay_types (); + OutputFile & wdelay_warn_condition_order (); + OutputFile & wdelay_yyaccept_init (uint32_t ind); + OutputFile & wdelay_yymaxfill (); + + void set_used_yyaccept (); + bool get_used_yyaccept () const; + void set_force_start_label (bool force); + void set_user_start_label (const std::string & label); + bool get_force_start_label () const; + void set_block_line (uint32_t l); + uint32_t get_block_line () const; + + void emit (const std::vector & types, size_t max_fill); + + FORBID_COPY (OutputFile); +}; + +struct HeaderFile +{ + HeaderFile (const char * fn); + ~HeaderFile (); + bool open (); + void emit (const std::vector & types); + +private: + std::ostringstream stream; + const char * file_name; + FILE * file; + + FORBID_COPY (HeaderFile); +}; + +struct Output +{ + OutputFile source; + HeaderFile header; + std::vector types; + std::set skeletons; + size_t max_fill; + + Output (const char * source_name, const char * header_name); + ~Output (); +}; + +void output_line_info (std::ostream &, uint32_t, const char *); +void output_state_goto (std::ostream &, uint32_t, uint32_t); +void output_types (std::ostream &, uint32_t, const std::vector &); +void output_version_time (std::ostream &); +void output_yyaccept_init (std::ostream &, uint32_t, bool); +void output_yymaxfill (std::ostream &, size_t); + +// helpers +std::string output_get_state (); + +} // namespace re2c + +#endif // _RE2C_CODEGEN_OUTPUT_ diff --git a/tools/re2c/src/codegen/print.cc b/tools/re2c/src/codegen/print.cc new file mode 100644 index 000000000..2303e847f --- /dev/null +++ b/tools/re2c/src/codegen/print.cc @@ -0,0 +1,156 @@ +#include + +#include "src/codegen/print.h" +#include "src/conf/opt.h" +#include "src/globals.h" +#include "src/ir/regexp/encoding/enc.h" + +namespace re2c +{ + +bool is_print (uint32_t c) +{ + return c >= 0x20 && c < 0x7F; +} + +bool is_space (uint32_t c) +{ + switch (c) + { + case '\t': + case '\f': + case '\v': + case '\n': + case '\r': + case ' ': + return true; + default: + return false; + } +} + +char hexCh(uint32_t c) +{ + static const char * sHex = "0123456789ABCDEF"; + return sHex[c & 0x0F]; +} + +void prtChOrHex(std::ostream& o, uint32_t c) +{ + if (opts->encoding.type () != Enc::EBCDIC + && (is_print (c) || is_space (c))) + { + o << '\''; + prtCh(o, c); + o << '\''; + } + else + { + prtHex(o, c); + } +} + +void prtHex(std::ostream& o, uint32_t c) +{ + o << "0x"; + const uint32_t cunit_size = opts->encoding.szCodeUnit (); + if (cunit_size >= 4) + { + o << hexCh (c >> 28u) + << hexCh (c >> 24u) + << hexCh (c >> 20u) + << hexCh (c >> 16u); + } + if (cunit_size >= 2) + { + o << hexCh (c >> 12u) + << hexCh (c >> 8u); + } + o << hexCh (c >> 4u) + << hexCh (c); +} + +void prtCh(std::ostream& o, uint32_t c) +{ + const bool dot = opts->target == opt_t::DOT; + + switch (c) + { + case '\'': + o << (dot ? "'" : "\\'"); + break; + + case '"': + o << (dot ? "\\\"" : "\""); + break; + + case '\n': + o << (dot ? "\\\\n" : "\\n"); + break; + + case '\t': + o << (dot ? "\\\\t" : "\\t"); + break; + + case '\v': + o << (dot ? "\\\\v" : "\\v"); + break; + + case '\b': + o << (dot ? "\\\\b" : "\\b"); + break; + + case '\r': + o << (dot ? "\\\\r" : "\\r"); + break; + + case '\f': + o << (dot ? "\\\\f" : "\\f"); + break; + + case '\a': + o << (dot ? "\\\\a" :"\\a"); + break; + + case '\\': + o << "\\\\"; // both .dot and C/C++ code expect "\\" + break; + + default: + o << static_cast (c); + break; + } +} + +void prtChOrHexForSpan(std::ostream& o, uint32_t c) +{ + if (opts->encoding.type () != Enc::EBCDIC + && is_print (c) + && (c != ']')) + { + prtCh(o, c); + } + else + { + prtHex(o, c); + } +} + +void printSpan(std::ostream& o, uint32_t lb, uint32_t ub) +{ + o << "["; + if ((ub - lb) == 1) + { + prtChOrHexForSpan(o, lb); + } + else + { + prtChOrHexForSpan(o, lb); + o << "-"; + prtChOrHexForSpan(o, ub - 1); + } + o << "]"; +} + +} // end namespace re2c + diff --git a/tools/re2c/src/codegen/print.h b/tools/re2c/src/codegen/print.h new file mode 100644 index 000000000..978d13e5c --- /dev/null +++ b/tools/re2c/src/codegen/print.h @@ -0,0 +1,20 @@ +#ifndef _RE2C_CODEGEN_PRINT_ +#define _RE2C_CODEGEN_PRINT_ + +#include "src/util/c99_stdint.h" +#include + +namespace re2c +{ + +bool is_print (uint32_t c); +bool is_space (uint32_t c); +char hexCh(uint32_t c); +void prtCh(std::ostream&, uint32_t); +void prtHex(std::ostream&, uint32_t); +void prtChOrHex(std::ostream&, uint32_t); +void printSpan(std::ostream&, uint32_t, uint32_t); + +} // end namespace re2c + +#endif // _RE2C_CODEGEN_PRINT_ diff --git a/tools/re2c/src/conf/msg.cc b/tools/re2c/src/conf/msg.cc new file mode 100644 index 000000000..87962f0a9 --- /dev/null +++ b/tools/re2c/src/conf/msg.cc @@ -0,0 +1,258 @@ +#include +#include +#include + +#if defined(_MSC_VER) && _MSC_VER < 1500 +#include "config.msc.h" +#else +#include "config.h" +#endif +#include "src/conf/msg.h" + +namespace re2c { + +void error (const char * fmt, ...) +{ + fprintf (stderr, "re2c: error: "); + + va_list args; + va_start (args, fmt); + vfprintf (stderr, fmt, args); + va_end (args); + + fprintf (stderr, "\n"); +} + +void error_encoding () +{ + error ("only one of switches -e, -w, -x, -u and -8 must be set"); +} + +void error_arg (const char * option) +{ + error ("expected argument to option %s", option); +} + +void warning_start (uint32_t line, bool error) +{ + static const char * msg = error ? "error" : "warning"; + fprintf (stderr, "re2c: %s: line %u: ", msg, line); +} + +void warning_end (const char * type, bool error) +{ + if (type != NULL) + { + const char * prefix = error ? "error-" : ""; + fprintf (stderr, " [-W%s%s]", prefix, type); + } + fprintf (stderr, "\n"); +} + +void warning (const char * type, uint32_t line, bool error, const char * fmt, ...) +{ + warning_start (line, error); + + va_list args; + va_start (args, fmt); + vfprintf (stderr, fmt, args); + va_end (args); + + warning_end (type, error); +} + +void usage () +{ + fprintf (stderr, + "usage: re2c [-bcdDefFghirsuvVwx18] [-o of] [-t th] file\n" + "\n" + "-? -h --help Display this info.\n" + "\n" + "-b --bit-vectors Implies -s. Use bit vectors as well in the attempt to\n" + " coax better code out of the compiler. Most useful for\n" + " specifications with more than a few keywords (e.g. for\n" + " most programming languages).\n" + "\n" + "-c --conditions Require start conditions.\n" + "\n" + "-d --debug-output Creates a parser that dumps information during\n" + " about the current position and in which state the\n" + " parser is.\n" + "\n" + "-D --emit-dot Emit a Graphviz dot view of the DFA graph\n" + "\n" + "-e --ecb Generate a parser that supports EBCDIC. The generated code\n" + " can deal with any character up to 0xFF. In this mode re2c\n" + " assumes that input character size is 1 byte. This switch is\n" + " incompatible with -w, -u, -x and -8\n" + "\n" + "-f --storable-state Generate a scanner that supports storable states.\n" + "\n" + "-F --flex-syntax Partial support for flex syntax.\n" + "\n" + "-g --computed-gotos Implies -b. Generate computed goto code (only useable\n" + " with gcc).\n" + "\n" + "-i --no-debug-info Do not generate '#line' info (useful for versioning).\n" + "\n" + "-o of --output=of Specify the output file (of) instead of stdout\n" + "\n" + "-r --reusable Allow reuse of scanner definitions.\n" + "\n" + "-s --nested-ifs Generate nested ifs for some switches. Many compilers\n" + " need this assist to generate better code.\n" + "\n" + "-t th --type-header=th Generate a type header file (th) with type definitions.\n" + "\n" + "-u --unicode Generate a parser that supports UTF-32. The generated code\n" + " can deal with any valid Unicode character up to 0x10FFFF.\n" + " In this mode re2c assumes that input character size is 4 bytes.\n" + " This switch is incompatible with -e, -w, -x and -8. It implies -s.\n" + "\n" + "-v --version Show version information.\n" + "\n" + "-V --vernum Show version as one number.\n" + "\n" + "-w --wide-chars Generate a parser that supports UCS-2. The generated code can\n" + " deal with any valid Unicode character up to 0xFFFF. In this mode\n" + " re2c assumes that input character size is 2 bytes. This switch is\n" + " incompatible with -e, -x, -u and -8. It implies -s." + "\n" + "-x --utf-16 Generate a parser that supports UTF-16. The generated code can\n" + " deal with any valid Unicode character up to 0x10FFFF. In this mode\n" + " re2c assumes that input character size is 2 bytes. This switch is\n" + " incompatible with -e, -w, -u and -8. It implies -s." + "\n" + "-8 --utf-8 Generate a parser that supports UTF-8. The generated code can\n" + " deal with any valid Unicode character up to 0x10FFFF. In this mode\n" + " re2c assumes that input character size is 1 byte. This switch is\n" + " incompatible with -e, -w, -x and -u." + "\n" + "--no-generation-date Suppress date output in the generated file.\n" + "\n" + "--no-version Suppress version output in the generated file.\n" + "\n" + "--case-insensitive All strings are case insensitive, so all \"-expressions\n" + " are treated in the same way '-expressions are.\n" + "\n" + "--case-inverted Invert the meaning of single and double quoted strings.\n" + " With this switch single quotes are case sensitive and\n" + " double quotes are case insensitive.\n" + "\n" + "--encoding-policy ep Specify what re2c should do when given bad code unit.\n" + " ep can be one of the following: fail, substitute, ignore.\n" + "\n" + "--input i Specify re2c input API.\n" + " i can be one of the following: default, custom.\n" + "\n" + "--skeleton Instead of embedding re2c-generated code into C/C++ source,\n" + " generate a self-contained program for the same DFA.\n" + " Most useful for correctness and performance testing.\n" + "\n" + "--empty-class policy What to do if user inputs empty character class. policy can be\n" + " one of the following: 'match-empty' (match empty input, default),\n" + " 'match-none' (fail to match on any input), 'error' (compilation\n" + " error). Note that there are various ways to construct empty class,\n" + " e.g: [], [^\\x00-\\xFF], [\\x00-\\xFF]\\[\\x00-\\xFF].\n" + "\n" + "--dfa-minimization
\n" + " Internal algorithm used by re2c to minimize DFA (defaults to\n" + " 'moore'). Both table filling and Moore's algorithms should\n" + " produce identical DFA (up to states relabelling). Table filling\n" + " algorithm is much simpler and slower; it serves as a reference\n" + " implementation.\n" + "\n" + "-1 --single-pass Deprecated and does nothing (single pass is by default now).\n" + "\n" + "-W Turn on all warnings.\n" + "\n" + "-Werror Turn warnings into errors. Note that this option along doesn't\n" + " turn on any warnings, it only affects those warnings that have\n" + " been turned on so far or will be turned on later.\n" + "\n" + "-W Turn on individual warning.\n" + "\n" + "-Wno- Turn off individual warning.\n" + "\n" + "-Werror- Turn on individual warning and treat it as error (this implies\n" + " '-W').\n" + "\n" + "-Wno-error- Don't treat this particular warning as error. This doesn't turn\n" + " off the warning itself.\n" + "\n" + "Warnings:\n" + "\n" + "-Wcondition-order Warn if the generated program makes implicit assumptions about\n" + " condition numbering. One should use either '-t, --type-header'\n" + " option or '/*!types:re2c*/' directive to generate mapping of\n" + " condition names to numbers and use autogenerated condition names.\n" + "\n" + "-Wempty-character-class Warn if regular expression contains empty character class. From\n" + " the rational point of view trying to match empty character class\n" + " makes no sense: it should always fail. However, for backwards\n" + " compatibility reasons re2c allows empty character class and treats\n" + " it as empty string. Use '--empty-class' option to change default\n" + " behaviour.\n" + "\n" + "-Wmatch-empty-string Warn if regular expression in a rule is nullable (matches empty\n" + " string). If DFA runs in a loop and empty match is unintentional\n" + " (input position in not advanced manually), lexer may get stuck\n" + " in eternal loop.\n" + "\n" + "-Wswapped-range Warn if range lower bound is greater that upper bound. Default\n" + " re2c behaviour is to silently swap range bounds.\n" + "\n" + "-Wundefined-control-flow\n" + " Warn if some input strings cause undefined control flow in lexer\n" + " (the faulty patterns are reported). This is the most dangerous\n" + " and common mistake. It can be easily fixed by adding default rule\n" + " '*' (this rule has the lowest priority, matches any code unit\n" + " and consumes exactly one code unit).\n" + "\n" + "-Wuseless-escape Warn if a symbol is escaped when it shouldn't be. By default re2c\n" + " silently ignores escape, but this may as well indicate a typo\n" + " or an error in escape sequence.\n" + "\n" + ); +} + +void vernum () +{ + std::string vernum (PACKAGE_VERSION); + if (vernum[1] == '.') + { + vernum.insert(0, "0"); + } + vernum.erase(2, 1); + if (vernum[3] == '.') + { + vernum.insert(2, "0"); + } + vernum.erase(4, 1); + if (vernum.length() < 6 || vernum[5] < '0' || vernum[5] > '9') + { + vernum.insert(4, "0"); + } + vernum.resize(6, '0'); + + printf ("%s\n", vernum.c_str ()); +} + +void version () +{ + printf ("re2c %s\n", PACKAGE_VERSION); +} + +std::string incond (const std::string & cond) +{ + std::string s; + if (!cond.empty ()) + { + s += "in condition '"; + s += cond; + s += "' "; + } + return s; +} + +} // namespace re2c diff --git a/tools/re2c/src/conf/msg.h b/tools/re2c/src/conf/msg.h new file mode 100644 index 000000000..b70555239 --- /dev/null +++ b/tools/re2c/src/conf/msg.h @@ -0,0 +1,24 @@ +#ifndef _RE2C_CONF_MSG_ +#define _RE2C_CONF_MSG_ + +#include + +#include "src/util/attribute.h" +#include "src/util/c99_stdint.h" + +namespace re2c { + +void error (const char * fmt, ...) RE2C_GXX_ATTRIBUTE ((format (printf, 1, 2))); +void error_encoding (); +void error_arg (const char * option); +void warning_start (uint32_t line, bool error); +void warning_end (const char * type, bool error); +void warning (const char * type, uint32_t line, bool error, const char * fmt, ...) RE2C_GXX_ATTRIBUTE ((format (printf, 4, 5))); +void usage (); +void vernum (); +void version (); +std::string incond (const std::string & cond); + +} // namespace re2c + +#endif // _RE2C_CONF_MSG_ diff --git a/tools/re2c/src/conf/opt.cc b/tools/re2c/src/conf/opt.cc new file mode 100644 index 000000000..fa65ceaa8 --- /dev/null +++ b/tools/re2c/src/conf/opt.cc @@ -0,0 +1,331 @@ +#include "src/conf/msg.h" +#include "src/conf/opt.h" + +namespace re2c +{ + +Opt opts; + +opt_t::opt_t () +#define OPT1(type, name, value) : name (value) +#define OPT(type, name, value) , name (value) + RE2C_OPTS +#undef OPT1 +#undef OPT +{} + +opt_t::opt_t (const opt_t & opt) +#define OPT1(type, name, value) : name (opt.name) +#define OPT(type, name, value) , name (opt.name) + RE2C_OPTS +#undef OPT1 +#undef OPT +{} + +opt_t & opt_t::operator = (const opt_t & opt) +{ +#define OPT1 OPT +#define OPT(type, name, value) name = opt.name; + RE2C_OPTS +#undef OPT1 +#undef OPT + return *this; +} + +void opt_t::fix () +{ + // some options either make no sense or must have fixed value + // with current target: reset them to default + switch (target) + { + case DOT: + // default code generation options + sFlag = Opt::baseopt.sFlag; + bFlag = Opt::baseopt.bFlag; + gFlag = Opt::baseopt.gFlag; + cGotoThreshold = Opt::baseopt.cGotoThreshold; + // default environment-insensitive formatting + yybmHexTable = Opt::baseopt.yybmHexTable; + // fallthrough + case SKELETON: + // default line information + iFlag = Opt::baseopt.iFlag; + // default environment-sensitive formatting + topIndent = Opt::baseopt.topIndent; + indString = Opt::baseopt.indString; + condDivider = Opt::baseopt.condDivider; + condDividerParam = Opt::baseopt.condDividerParam; + // default environment bindings + tFlag = Opt::baseopt.tFlag; + header_file = Opt::baseopt.header_file; + yycondtype = Opt::baseopt.yycondtype; + cond_get = Opt::baseopt.cond_get; + cond_get_naked = Opt::baseopt.cond_get_naked; + cond_set = Opt::baseopt.cond_set; + cond_set_arg = Opt::baseopt.cond_set_arg; + cond_set_naked = Opt::baseopt.cond_set_naked; + yyctable = Opt::baseopt.yyctable; + condPrefix = Opt::baseopt.condPrefix; + condEnumPrefix = Opt::baseopt.condEnumPrefix; + condGoto = Opt::baseopt.condGoto; + condGotoParam = Opt::baseopt.condGotoParam; + fFlag = Opt::baseopt.fFlag; + state_get = Opt::baseopt.state_get; + state_get_naked = Opt::baseopt.state_get_naked; + state_set = Opt::baseopt.state_set; + state_set_arg = Opt::baseopt.state_set_arg; + state_set_naked = Opt::baseopt.state_set_naked; + yyfilllabel = Opt::baseopt.yyfilllabel; + yynext = Opt::baseopt.yynext; + yyaccept = Opt::baseopt.yyaccept; + bUseStateAbort = Opt::baseopt.bUseStateAbort; + bUseStateNext = Opt::baseopt.bUseStateNext; + yybm = Opt::baseopt.yybm; + yytarget = Opt::baseopt.yytarget; + input_api = Opt::baseopt.input_api; + yycursor = Opt::baseopt.yycursor; + yymarker = Opt::baseopt.yymarker; + yyctxmarker = Opt::baseopt.yyctxmarker; + yylimit = Opt::baseopt.yylimit; + yypeek = Opt::baseopt.yypeek; + yyskip = Opt::baseopt.yyskip; + yybackup = Opt::baseopt.yybackup; + yybackupctx = Opt::baseopt.yybackupctx; + yyrestore = Opt::baseopt.yyrestore; + yyrestorectx = Opt::baseopt.yyrestorectx; + yylessthan = Opt::baseopt.yylessthan; + dFlag = Opt::baseopt.dFlag; + yydebug = Opt::baseopt.yydebug; + yyctype = Opt::baseopt.yyctype; + yych = Opt::baseopt.yych; + bEmitYYCh = Opt::baseopt.bEmitYYCh; + yychConversion = Opt::baseopt.yychConversion; + fill = Opt::baseopt.fill; + fill_use = Opt::baseopt.fill_use; + fill_check = Opt::baseopt.fill_check; + fill_arg = Opt::baseopt.fill_arg; + fill_arg_use = Opt::baseopt.fill_arg_use; + fill_naked = Opt::baseopt.fill_naked; + labelPrefix = Opt::baseopt.labelPrefix; + break; + default: + break; + } + + if (bCaseInsensitive) + { + bCaseInverted = Opt::baseopt.bCaseInverted; + } + + // respect hierarchy + if (!cFlag) + { + tFlag = Opt::baseopt.tFlag; + header_file = Opt::baseopt.header_file; + yycondtype = Opt::baseopt.yycondtype; + cond_get = Opt::baseopt.cond_get; + cond_get_naked = Opt::baseopt.cond_get_naked; + cond_set = Opt::baseopt.cond_set; + cond_set_arg = Opt::baseopt.cond_set_arg; + cond_set_naked = Opt::baseopt.cond_set_naked; + yyctable = Opt::baseopt.yyctable; + condPrefix = Opt::baseopt.condPrefix; + condEnumPrefix = Opt::baseopt.condEnumPrefix; + condDivider = Opt::baseopt.condDivider; + condDividerParam = Opt::baseopt.condDividerParam; + condGoto = Opt::baseopt.condGoto; + condGotoParam = Opt::baseopt.condGotoParam; + } + if (!fFlag) + { + state_get = Opt::baseopt.state_get; + state_get_naked = Opt::baseopt.state_get_naked; + state_set = Opt::baseopt.state_set; + state_set_arg = Opt::baseopt.state_set_arg; + state_set_naked = Opt::baseopt.state_set_naked; + yyfilllabel = Opt::baseopt.yyfilllabel; + yynext = Opt::baseopt.yynext; + yyaccept = Opt::baseopt.yyaccept; + bUseStateAbort = Opt::baseopt.bUseStateAbort; + bUseStateNext = Opt::baseopt.bUseStateNext; + } + if (!bFlag) + { + yybmHexTable = Opt::baseopt.yybmHexTable; + yybm = Opt::baseopt.yybm; + } + if (!gFlag) + { + cGotoThreshold = Opt::baseopt.cGotoThreshold; + yytarget = Opt::baseopt.yytarget; + } + if (input_api.type () != InputAPI::DEFAULT) + { + yycursor = Opt::baseopt.yycursor; + yymarker = Opt::baseopt.yymarker; + yyctxmarker = Opt::baseopt.yyctxmarker; + yylimit = Opt::baseopt.yylimit; + } + if (input_api.type () != InputAPI::CUSTOM) + { + yypeek = Opt::baseopt.yypeek; + yyskip = Opt::baseopt.yyskip; + yybackup = Opt::baseopt.yybackup; + yybackupctx = Opt::baseopt.yybackupctx; + yyrestore = Opt::baseopt.yyrestore; + yyrestorectx = Opt::baseopt.yyrestorectx; + yylessthan = Opt::baseopt.yylessthan; + } + if (!dFlag) + { + yydebug = Opt::baseopt.yydebug; + } + if (!fill_use) + { + fill = Opt::baseopt.fill; + fill_check = Opt::baseopt.fill_check; + fill_arg = Opt::baseopt.fill_arg; + fill_arg_use = Opt::baseopt.fill_arg_use; + fill_naked = Opt::baseopt.fill_naked; + } + + // force individual options + switch (target) + { + case DOT: + iFlag = true; + break; + case SKELETON: + iFlag = true; + input_api.set (InputAPI::CUSTOM); + indString = " "; + topIndent = 2; + break; + default: + break; + } + switch (encoding.type ()) + { + case Enc::UCS2: + case Enc::UTF16: + case Enc::UTF32: + sFlag = true; + break; + default: + break; + } + if (bFlag) + { + sFlag = true; + } + if (gFlag) + { + bFlag = true; + sFlag = true; + } + if (header_file != NULL) + { + tFlag = true; + } +} + +realopt_t::realopt_t (useropt_t & opt) + : real () + , user (opt) +{} + +const opt_t * realopt_t::operator -> () +{ + sync (); + return ℜ +} + +void realopt_t::sync () +{ + if (user.diverge) + { + real = user.opt; + real.fix (); + user.diverge = false; + } +} + +useropt_t::useropt_t () + : opt () + , diverge (true) +{} + +opt_t * useropt_t::operator -> () +{ + diverge = true; + return &opt; +} + +const opt_t Opt::baseopt; + +bool Opt::source (const char * s) +{ + if (source_file) + { + error ("multiple source files: %s, %s", source_file, s); + return false; + } + else + { + source_file = s; + return true; + } +} + +bool Opt::output (const char * s) +{ + if (output_file) + { + error ("multiple output files: %s, %s", output_file, s); + return false; + } + else + { + output_file = s; + return true; + } +} + +void Opt::reset_encoding (const Enc & enc) +{ + useropt->encoding = enc; +} + +void Opt::reset_mapCodeName () +{ + // historically arranged set of names + // no actual reason why these particular options should be reset + useropt->cond_get = Opt::baseopt.cond_get; + useropt->cond_set = Opt::baseopt.cond_set; + useropt->fill = Opt::baseopt.fill; + useropt->state_get = Opt::baseopt.state_get; + useropt->state_set = Opt::baseopt.state_set; + useropt->yybackup = Opt::baseopt.yybackup; + useropt->yybackupctx = Opt::baseopt.yybackupctx; + useropt->yycondtype = Opt::baseopt.yycondtype; + useropt->yyctxmarker = Opt::baseopt.yyctxmarker; + useropt->yyctype = Opt::baseopt.yyctype; + useropt->yycursor = Opt::baseopt.yycursor; + useropt->yydebug = Opt::baseopt.yydebug; + useropt->yylessthan = Opt::baseopt.yylessthan; + useropt->yylimit = Opt::baseopt.yylimit; + useropt->yymarker = Opt::baseopt.yymarker; + useropt->yypeek = Opt::baseopt.yypeek; + useropt->yyrestore = Opt::baseopt.yyrestore; + useropt->yyrestorectx = Opt::baseopt.yyrestorectx; + useropt->yyskip = Opt::baseopt.yyskip; + useropt->yyfilllabel = Opt::baseopt.yyfilllabel; + useropt->yynext = Opt::baseopt.yynext; + useropt->yyaccept = Opt::baseopt.yyaccept; + useropt->yybm = Opt::baseopt.yybm; + useropt->yych = Opt::baseopt.yych; + useropt->yyctable = Opt::baseopt.yyctable; + useropt->yytarget = Opt::baseopt.yytarget; +} + +} // namespace re2c diff --git a/tools/re2c/src/conf/opt.h b/tools/re2c/src/conf/opt.h new file mode 100644 index 000000000..30ab21e55 --- /dev/null +++ b/tools/re2c/src/conf/opt.h @@ -0,0 +1,218 @@ +#ifndef _RE2C_CONF_OPT_ +#define _RE2C_CONF_OPT_ + +#include "src/util/c99_stdint.h" +#include +#include + +#include "src/codegen/input_api.h" +#include "src/ir/dfa/dfa.h" +#include "src/ir/regexp/empty_class_policy.h" +#include "src/ir/regexp/encoding/enc.h" +#include "src/util/forbid_copy.h" + +namespace re2c +{ + +#define RE2C_OPTS \ + /* target */ \ + OPT1 (opt_t::target_t, target, CODE) \ + /* fingerprint */ \ + OPT (bool, bNoGenerationDate, false) \ + OPT (bool, version, true) \ + /* regular expressions */ \ + OPT (Enc, encoding, Enc ()) \ + OPT (bool, bCaseInsensitive, false) \ + OPT (bool, bCaseInverted, false) \ + OPT (empty_class_policy_t, empty_class_policy, EMPTY_CLASS_MATCH_EMPTY) \ + /* conditions */ \ + OPT (bool, cFlag, false) \ + OPT (bool, tFlag, false) \ + OPT (const char *, header_file, NULL) \ + OPT (std::string, yycondtype, "YYCONDTYPE") \ + OPT (std::string, cond_get, "YYGETCONDITION") \ + OPT (bool, cond_get_naked, false) \ + OPT (std::string, cond_set, "YYSETCONDITION" ) \ + OPT (std::string, cond_set_arg, "@@" ) \ + OPT (bool, cond_set_naked, false ) \ + OPT (std::string, yyctable, "yyctable") \ + OPT (std::string, condPrefix, "yyc_") \ + OPT (std::string, condEnumPrefix, "yyc") \ + OPT (std::string, condDivider, "/* *********************************** */") \ + OPT (std::string, condDividerParam, "@@") \ + OPT (std::string, condGoto, "goto @@;") \ + OPT (std::string, condGotoParam, "@@") \ + /* states */ \ + OPT (bool, fFlag, false) \ + OPT (std::string, state_get, "YYGETSTATE") \ + OPT (bool, state_get_naked, false) \ + OPT (std::string, state_set, "YYSETSTATE") \ + OPT (std::string, state_set_arg, "@@") \ + OPT (bool, state_set_naked, false) \ + OPT (std::string, yyfilllabel, "yyFillLabel") \ + OPT (std::string, yynext, "yyNext") \ + OPT (std::string, yyaccept, "yyaccept") \ + OPT (bool, bUseStateAbort, false) \ + OPT (bool, bUseStateNext, false) \ + /* reuse */ \ + OPT (bool, rFlag, false) \ + /* partial flex syntax support */ \ + OPT (bool, FFlag, false) \ + /* code generation */ \ + OPT (bool, sFlag, false) \ + OPT (bool, bFlag, false) \ + OPT (std::string, yybm, "yybm") \ + OPT (bool, yybmHexTable, false) \ + OPT (bool, gFlag, false) \ + OPT (std::string, yytarget, "yytarget") \ + OPT (uint32_t, cGotoThreshold, 9) \ + /* formatting */ \ + OPT (uint32_t, topIndent, 0) \ + OPT (std::string, indString, "\t") \ + /* input API */ \ + OPT (InputAPI, input_api, InputAPI ()) \ + OPT (std::string, yycursor, "YYCURSOR") \ + OPT (std::string, yymarker, "YYMARKER") \ + OPT (std::string, yyctxmarker, "YYCTXMARKER") \ + OPT (std::string, yylimit, "YYLIMIT") \ + OPT (std::string, yypeek, "YYPEEK") \ + OPT (std::string, yyskip, "YYSKIP") \ + OPT (std::string, yybackup, "YYBACKUP") \ + OPT (std::string, yybackupctx, "YYBACKUPCTX") \ + OPT (std::string, yyrestore, "YYRESTORE") \ + OPT (std::string, yyrestorectx, "YYRESTORECTX") \ + OPT (std::string, yylessthan, "YYLESSTHAN") \ + /* #line directives */ \ + OPT (bool, iFlag, false) \ + /* debug */ \ + OPT (bool, dFlag, false) \ + OPT (std::string, yydebug, "YYDEBUG") \ + /* yych */ \ + OPT (std::string, yyctype, "YYCTYPE") \ + OPT (std::string, yych, "yych") \ + OPT (bool, bEmitYYCh, true) \ + OPT (bool, yychConversion, false) \ + /* YYFILL */ \ + OPT (std::string, fill, "YYFILL") \ + OPT (bool, fill_use, true) \ + OPT (bool, fill_check, true) \ + OPT (std::string, fill_arg, "@@") \ + OPT (bool, fill_arg_use, true) \ + OPT (bool, fill_naked, false) \ + /* labels */ \ + OPT (std::string, labelPrefix, "yy") \ + /* internals */ \ + OPT (dfa_minimization_t, dfa_minimization, DFA_MINIMIZATION_MOORE) + +struct opt_t +{ + enum target_t + { + CODE, + DOT, + SKELETON + }; + +#define OPT1 OPT +#define OPT(type, name, value) type name; + RE2C_OPTS +#undef OPT1 +#undef OPT + + opt_t (); + opt_t (const opt_t & opt); + opt_t & operator = (const opt_t & opt); + void fix (); +}; + +class useropt_t; +class realopt_t +{ + opt_t real; + useropt_t & user; +public: + realopt_t (useropt_t & opt); + const opt_t * operator -> (); + void sync (); +}; + +class useropt_t +{ + opt_t opt; + bool diverge; +public: + useropt_t (); + opt_t * operator -> (); + friend void realopt_t::sync (); +}; + +struct Opt +{ + static const opt_t baseopt; + + const char * source_file; + const char * output_file; + +private: + useropt_t useropt; + realopt_t realopt; + +public: + Opt () + : source_file (NULL) + , output_file (NULL) + , useropt () + , realopt (useropt) + {} + + // read-only access, forces options syncronization + const opt_t * operator -> () + { + return realopt.operator -> (); + } + + bool source (const char * s); + bool output (const char * s); + + // Inplace configurations are applied immediately when parsed. + // This is very bad: first, re2c behaviour is changed in the middle + // of the block; second, config is resynced too often (every + // attempt to read config that has been updated results in + // automatic resync). It is much better to set all options at once. + bool set_encoding (Enc::type_t t) { return useropt->encoding.set (t); } + void unset_encoding (Enc::type_t t) { useropt->encoding.unset (t); } + void set_encoding_policy (Enc::policy_t p) { useropt->encoding.setPolicy (p); } + void set_input_api (InputAPI::type_t t) { useropt->input_api.set (t); } +#define OPT1 OPT +#define OPT(type, name, value) void set_##name (type arg) { useropt->name = arg; } + RE2C_OPTS +#undef OPT1 +#undef OPT + + // helpers + std::string yychConversion () + { + return realopt->yychConversion + ? "(" + realopt->yyctype + ")" + : ""; + } + + // bad temporary hacks, should be fixed by proper scoping of config (parts). + void reset_encoding (const Enc & enc); + void reset_mapCodeName (); + + FORBID_COPY (Opt); +}; + +enum parse_opts_t +{ + OK, + EXIT_OK, + EXIT_FAIL +}; + +parse_opts_t parse_opts (char ** argv, Opt & opts); + +} // namespace re2c + +#endif // _RE2C_CONF_OPT_ diff --git a/tools/re2c/src/conf/parse_opts.cc b/tools/re2c/src/conf/parse_opts.cc new file mode 100644 index 000000000..9eb07989e --- /dev/null +++ b/tools/re2c/src/conf/parse_opts.cc @@ -0,0 +1,2846 @@ +/* Generated by re2c 0.16 on Thu Jan 21 10:47:47 2016 */ +#line 1 "../src/conf/parse_opts.re" +#include "src/codegen/input_api.h" +#include "src/conf/msg.h" +#include "src/conf/opt.h" +#include "src/conf/warn.h" +#include "src/globals.h" +#include "src/ir/regexp/empty_class_policy.h" +#include "src/ir/regexp/encoding/enc.h" + +namespace re2c +{ + +static inline bool next (char * & arg, char ** & argv) +{ + arg = *++argv; + return arg != NULL; +} + +parse_opts_t parse_opts (char ** argv, Opt & opts) +{ +#define YYCTYPE unsigned char + char * YYCURSOR; + char * YYMARKER; + Warn::option_t option; + +#line 31 "../src/conf/parse_opts.re" + + +opt: + if (!next (YYCURSOR, argv)) + { + goto end; + } + +#line 37 "src/conf/parse_opts.cc" +{ + YYCTYPE yych; + unsigned int yyaccept = 0; + static const unsigned char yybm[] = {}; + yych = (YYCTYPE)*YYCURSOR; + if (yych <= 0x00) goto yy2; + if (yych == '-') goto yy5; + goto yy4; +yy2: + ++YYCURSOR; +yy3: +#line 40 "../src/conf/parse_opts.re" + { + error ("bad option: %s", *argv); + return EXIT_FAIL; + } +#line 87 "src/conf/parse_opts.cc" +yy4: + yyaccept = 0; + yych = (YYCTYPE)*(YYMARKER = ++YYCURSOR); + goto yy10; +yy5: + ++YYCURSOR; + if ((yych = (YYCTYPE)*YYCURSOR) <= '-') { + if (yych <= 0x00) goto yy11; + if (yych >= '-') goto yy13; + } else { + if (yych == 'W') goto yy15; + } +#line 63 "../src/conf/parse_opts.re" + { goto opt_short; } +#line 102 "src/conf/parse_opts.cc" +yy7: + ++YYCURSOR; +#line 61 "../src/conf/parse_opts.re" + { if (!opts.source (*argv)) return EXIT_FAIL; goto opt; } +#line 107 "src/conf/parse_opts.cc" +yy9: + ++YYCURSOR; + yych = (YYCTYPE)*YYCURSOR; +yy10: + if (yybm[0+yych] & 128) { + goto yy9; + } + goto yy7; +yy11: + ++YYCURSOR; +#line 60 "../src/conf/parse_opts.re" + { if (!opts.source ("")) return EXIT_FAIL; goto opt; } +#line 120 "src/conf/parse_opts.cc" +yy13: + ++YYCURSOR; + if ((yych = (YYCTYPE)*YYCURSOR) <= 0x00) goto yy17; +#line 64 "../src/conf/parse_opts.re" + { goto opt_long; } +#line 126 "src/conf/parse_opts.cc" +yy15: + yyaccept = 1; + yych = (YYCTYPE)*(YYMARKER = ++YYCURSOR); + if (yych <= 'e') { + if (yych <= 0x00) goto yy19; + if (yych >= 'e') goto yy21; + } else { + if (yych == 'n') goto yy23; + } +yy16: +#line 68 "../src/conf/parse_opts.re" + { option = Warn::W; goto opt_warn; } +#line 139 "src/conf/parse_opts.cc" +yy17: + ++YYCURSOR; +#line 46 "../src/conf/parse_opts.re" + { + // all remaining arguments are non-options + // so they must be input files + // re2c expects exactly one input file + for (char * f; next (f, argv);) + { + if (!opts.source (f)) + { + return EXIT_FAIL; + } + } + goto end; + } +#line 156 "src/conf/parse_opts.cc" +yy19: + ++YYCURSOR; +#line 66 "../src/conf/parse_opts.re" + { warn.set_all (); goto opt; } +#line 161 "src/conf/parse_opts.cc" +yy21: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'r') goto yy24; +yy22: + YYCURSOR = YYMARKER; + if (yyaccept <= 1) { + if (yyaccept == 0) { + goto yy3; + } else { + goto yy16; + } + } else { + goto yy28; + } +yy23: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'o') goto yy25; + goto yy22; +yy24: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'r') goto yy26; + goto yy22; +yy25: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == '-') goto yy27; + goto yy22; +yy26: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'o') goto yy29; + goto yy22; +yy27: + yyaccept = 2; + yych = (YYCTYPE)*(YYMARKER = ++YYCURSOR); + if (yych == 'e') goto yy30; +yy28: +#line 69 "../src/conf/parse_opts.re" + { option = Warn::WNO; goto opt_warn; } +#line 199 "src/conf/parse_opts.cc" +yy29: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'r') goto yy31; + goto yy22; +yy30: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'r') goto yy32; + goto yy22; +yy31: + yych = (YYCTYPE)*++YYCURSOR; + if (yych <= 0x00) goto yy33; + if (yych == '-') goto yy35; + goto yy22; +yy32: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'r') goto yy37; + goto yy22; +yy33: + ++YYCURSOR; +#line 67 "../src/conf/parse_opts.re" + { warn.set_all_error (); goto opt; } +#line 221 "src/conf/parse_opts.cc" +yy35: + ++YYCURSOR; +#line 70 "../src/conf/parse_opts.re" + { option = Warn::WERROR; goto opt_warn; } +#line 226 "src/conf/parse_opts.cc" +yy37: + yych = (YYCTYPE)*++YYCURSOR; + if (yych != 'o') goto yy22; + yych = (YYCTYPE)*++YYCURSOR; + if (yych != 'r') goto yy22; + yych = (YYCTYPE)*++YYCURSOR; + if (yych != '-') goto yy22; + ++YYCURSOR; +#line 71 "../src/conf/parse_opts.re" + { option = Warn::WNOERROR; goto opt_warn; } +#line 237 "src/conf/parse_opts.cc" +} +#line 72 "../src/conf/parse_opts.re" + + +opt_warn: + +#line 244 "src/conf/parse_opts.cc" +{ + YYCTYPE yych; + yych = (YYCTYPE)*YYCURSOR; + switch (yych) { + case 'c': goto yy46; + case 'e': goto yy47; + case 'm': goto yy48; + case 's': goto yy49; + case 'u': goto yy50; + default: goto yy44; + } +yy44: + ++YYCURSOR; +yy45: +#line 77 "../src/conf/parse_opts.re" + { + error ("bad warning: %s", *argv); + return EXIT_FAIL; + } +#line 264 "src/conf/parse_opts.cc" +yy46: + yych = (YYCTYPE)*(YYMARKER = ++YYCURSOR); + if (yych == 'o') goto yy51; + goto yy45; +yy47: + yych = (YYCTYPE)*(YYMARKER = ++YYCURSOR); + if (yych == 'm') goto yy53; + goto yy45; +yy48: + yych = (YYCTYPE)*(YYMARKER = ++YYCURSOR); + if (yych == 'a') goto yy54; + goto yy45; +yy49: + yych = (YYCTYPE)*(YYMARKER = ++YYCURSOR); + if (yych == 'w') goto yy55; + goto yy45; +yy50: + yych = (YYCTYPE)*(YYMARKER = ++YYCURSOR); + if (yych == 'n') goto yy56; + if (yych == 's') goto yy57; + goto yy45; +yy51: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'n') goto yy58; +yy52: + YYCURSOR = YYMARKER; + goto yy45; +yy53: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'p') goto yy59; + goto yy52; +yy54: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 't') goto yy60; + goto yy52; +yy55: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'a') goto yy61; + goto yy52; +yy56: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'd') goto yy62; + if (yych == 'r') goto yy63; + goto yy52; +yy57: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'e') goto yy64; + goto yy52; +yy58: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'd') goto yy65; + goto yy52; +yy59: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 't') goto yy66; + goto yy52; +yy60: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'c') goto yy67; + goto yy52; +yy61: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'p') goto yy68; + goto yy52; +yy62: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'e') goto yy69; + goto yy52; +yy63: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'e') goto yy70; + goto yy52; +yy64: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'l') goto yy71; + goto yy52; +yy65: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'i') goto yy72; + goto yy52; +yy66: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'y') goto yy73; + goto yy52; +yy67: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'h') goto yy74; + goto yy52; +yy68: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'p') goto yy75; + goto yy52; +yy69: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'f') goto yy76; + goto yy52; +yy70: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'a') goto yy77; + goto yy52; +yy71: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'e') goto yy78; + goto yy52; +yy72: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 't') goto yy79; + goto yy52; +yy73: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == '-') goto yy80; + goto yy52; +yy74: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == '-') goto yy81; + goto yy52; +yy75: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'e') goto yy82; + goto yy52; +yy76: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'i') goto yy83; + goto yy52; +yy77: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'c') goto yy84; + goto yy52; +yy78: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 's') goto yy85; + goto yy52; +yy79: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'i') goto yy86; + goto yy52; +yy80: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'c') goto yy87; + goto yy52; +yy81: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'e') goto yy88; + goto yy52; +yy82: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'd') goto yy89; + goto yy52; +yy83: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'n') goto yy90; + goto yy52; +yy84: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'h') goto yy91; + goto yy52; +yy85: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 's') goto yy92; + goto yy52; +yy86: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'o') goto yy93; + goto yy52; +yy87: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'h') goto yy94; + goto yy52; +yy88: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'm') goto yy95; + goto yy52; +yy89: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == '-') goto yy96; + goto yy52; +yy90: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'e') goto yy97; + goto yy52; +yy91: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'a') goto yy98; + goto yy52; +yy92: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == '-') goto yy99; + goto yy52; +yy93: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'n') goto yy100; + goto yy52; +yy94: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'a') goto yy101; + goto yy52; +yy95: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'p') goto yy102; + goto yy52; +yy96: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'r') goto yy103; + goto yy52; +yy97: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'd') goto yy104; + goto yy52; +yy98: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'b') goto yy105; + goto yy52; +yy99: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'e') goto yy106; + goto yy52; +yy100: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == '-') goto yy107; + goto yy52; +yy101: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'r') goto yy108; + goto yy52; +yy102: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 't') goto yy109; + goto yy52; +yy103: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'a') goto yy110; + goto yy52; +yy104: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == '-') goto yy111; + goto yy52; +yy105: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'l') goto yy112; + goto yy52; +yy106: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 's') goto yy113; + goto yy52; +yy107: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'o') goto yy114; + goto yy52; +yy108: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'a') goto yy115; + goto yy52; +yy109: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'y') goto yy116; + goto yy52; +yy110: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'n') goto yy117; + goto yy52; +yy111: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'c') goto yy118; + goto yy52; +yy112: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'e') goto yy119; + goto yy52; +yy113: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'c') goto yy120; + goto yy52; +yy114: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'r') goto yy121; + goto yy52; +yy115: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'c') goto yy122; + goto yy52; +yy116: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == '-') goto yy123; + goto yy52; +yy117: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'g') goto yy124; + goto yy52; +yy118: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'o') goto yy125; + goto yy52; +yy119: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == '-') goto yy126; + goto yy52; +yy120: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'a') goto yy127; + goto yy52; +yy121: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'd') goto yy128; + goto yy52; +yy122: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 't') goto yy129; + goto yy52; +yy123: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 's') goto yy130; + goto yy52; +yy124: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'e') goto yy131; + goto yy52; +yy125: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'n') goto yy132; + goto yy52; +yy126: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'r') goto yy133; + goto yy52; +yy127: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'p') goto yy134; + goto yy52; +yy128: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'e') goto yy135; + goto yy52; +yy129: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'e') goto yy136; + goto yy52; +yy130: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 't') goto yy137; + goto yy52; +yy131: + yych = (YYCTYPE)*++YYCURSOR; + if (yych <= 0x00) goto yy138; + goto yy52; +yy132: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 't') goto yy140; + goto yy52; +yy133: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'u') goto yy141; + goto yy52; +yy134: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'e') goto yy142; + goto yy52; +yy135: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'r') goto yy143; + goto yy52; +yy136: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'r') goto yy144; + goto yy52; +yy137: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'r') goto yy145; + goto yy52; +yy138: + ++YYCURSOR; +#line 84 "../src/conf/parse_opts.re" + { warn.set (Warn::SWAPPED_RANGE, option); goto opt; } +#line 637 "src/conf/parse_opts.cc" +yy140: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'r') goto yy146; + goto yy52; +yy141: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'l') goto yy147; + goto yy52; +yy142: + yych = (YYCTYPE)*++YYCURSOR; + if (yych <= 0x00) goto yy148; + goto yy52; +yy143: + yych = (YYCTYPE)*++YYCURSOR; + if (yych <= 0x00) goto yy150; + goto yy52; +yy144: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == '-') goto yy152; + goto yy52; +yy145: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'i') goto yy153; + goto yy52; +yy146: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'o') goto yy154; + goto yy52; +yy147: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'e') goto yy155; + goto yy52; +yy148: + ++YYCURSOR; +#line 87 "../src/conf/parse_opts.re" + { warn.set (Warn::USELESS_ESCAPE, option); goto opt; } +#line 674 "src/conf/parse_opts.cc" +yy150: + ++YYCURSOR; +#line 81 "../src/conf/parse_opts.re" + { warn.set (Warn::CONDITION_ORDER, option); goto opt; } +#line 679 "src/conf/parse_opts.cc" +yy152: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'c') goto yy156; + goto yy52; +yy153: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'n') goto yy157; + goto yy52; +yy154: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'l') goto yy158; + goto yy52; +yy155: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 's') goto yy159; + goto yy52; +yy156: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'l') goto yy160; + goto yy52; +yy157: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'g') goto yy161; + goto yy52; +yy158: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == '-') goto yy162; + goto yy52; +yy159: + yych = (YYCTYPE)*++YYCURSOR; + if (yych <= 0x00) goto yy163; + goto yy52; +yy160: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'a') goto yy165; + goto yy52; +yy161: + yych = (YYCTYPE)*++YYCURSOR; + if (yych <= 0x00) goto yy166; + goto yy52; +yy162: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'f') goto yy168; + goto yy52; +yy163: + ++YYCURSOR; +#line 86 "../src/conf/parse_opts.re" + { warn.set (Warn::UNREACHABLE_RULES, option); goto opt; } +#line 728 "src/conf/parse_opts.cc" +yy165: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 's') goto yy169; + goto yy52; +yy166: + ++YYCURSOR; +#line 83 "../src/conf/parse_opts.re" + { warn.set (Warn::MATCH_EMPTY_STRING, option); goto opt; } +#line 737 "src/conf/parse_opts.cc" +yy168: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'l') goto yy170; + goto yy52; +yy169: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 's') goto yy171; + goto yy52; +yy170: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'o') goto yy172; + goto yy52; +yy171: + yych = (YYCTYPE)*++YYCURSOR; + if (yych <= 0x00) goto yy173; + goto yy52; +yy172: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'w') goto yy175; + goto yy52; +yy173: + ++YYCURSOR; +#line 82 "../src/conf/parse_opts.re" + { warn.set (Warn::EMPTY_CHARACTER_CLASS, option); goto opt; } +#line 762 "src/conf/parse_opts.cc" +yy175: + yych = (YYCTYPE)*++YYCURSOR; + if (yych >= 0x01) goto yy52; + ++YYCURSOR; +#line 85 "../src/conf/parse_opts.re" + { warn.set (Warn::UNDEFINED_CONTROL_FLOW, option); goto opt; } +#line 769 "src/conf/parse_opts.cc" +} +#line 88 "../src/conf/parse_opts.re" + + +opt_short: + +#line 776 "src/conf/parse_opts.cc" +{ + YYCTYPE yych; + yych = (YYCTYPE)*YYCURSOR; + if (yych <= 'b') { + if (yych <= 'C') { + if (yych <= '7') { + if (yych <= 0x00) goto yy180; + if (yych == '1') goto yy184; + goto yy182; + } else { + if (yych <= '8') goto yy186; + if (yych == '?') goto yy188; + goto yy182; + } + } else { + if (yych <= 'R') { + if (yych <= 'D') goto yy190; + if (yych == 'F') goto yy192; + goto yy182; + } else { + if (yych <= 'U') { + if (yych <= 'S') goto yy194; + goto yy182; + } else { + if (yych <= 'V') goto yy196; + if (yych <= 'a') goto yy182; + goto yy198; + } + } + } + } else { + if (yych <= 'o') { + if (yych <= 'f') { + if (yych <= 'c') goto yy200; + if (yych <= 'd') goto yy202; + if (yych <= 'e') goto yy204; + goto yy206; + } else { + if (yych <= 'h') { + if (yych <= 'g') goto yy208; + goto yy188; + } else { + if (yych <= 'i') goto yy210; + if (yych <= 'n') goto yy182; + goto yy212; + } + } + } else { + if (yych <= 't') { + if (yych <= 'q') goto yy182; + if (yych <= 'r') goto yy214; + if (yych <= 's') goto yy216; + goto yy218; + } else { + if (yych <= 'v') { + if (yych <= 'u') goto yy220; + goto yy222; + } else { + if (yych <= 'w') goto yy224; + if (yych <= 'x') goto yy226; + goto yy182; + } + } + } + } +yy180: + ++YYCURSOR; +#line 97 "../src/conf/parse_opts.re" + { goto opt; } +#line 846 "src/conf/parse_opts.cc" +yy182: + ++YYCURSOR; +#line 93 "../src/conf/parse_opts.re" + { + error ("bad short option: %s", *argv); + return EXIT_FAIL; + } +#line 854 "src/conf/parse_opts.cc" +yy184: + ++YYCURSOR; +#line 121 "../src/conf/parse_opts.re" + { goto opt_short; } +#line 859 "src/conf/parse_opts.cc" +yy186: + ++YYCURSOR; +#line 116 "../src/conf/parse_opts.re" + { if (!opts.set_encoding (Enc::UTF8)) { error_encoding (); return EXIT_FAIL; } goto opt_short; } +#line 864 "src/conf/parse_opts.cc" +yy188: + ++YYCURSOR; +#line 98 "../src/conf/parse_opts.re" + { usage (); return EXIT_OK; } +#line 869 "src/conf/parse_opts.cc" +yy190: + ++YYCURSOR; +#line 104 "../src/conf/parse_opts.re" + { opts.set_target (opt_t::DOT); goto opt_short; } +#line 874 "src/conf/parse_opts.cc" +yy192: + ++YYCURSOR; +#line 106 "../src/conf/parse_opts.re" + { opts.set_FFlag (true); goto opt_short; } +#line 879 "src/conf/parse_opts.cc" +yy194: + ++YYCURSOR; +#line 111 "../src/conf/parse_opts.re" + { opts.set_target (opt_t::SKELETON); goto opt_short; } +#line 884 "src/conf/parse_opts.cc" +yy196: + ++YYCURSOR; +#line 100 "../src/conf/parse_opts.re" + { vernum (); return EXIT_OK; } +#line 889 "src/conf/parse_opts.cc" +yy198: + ++YYCURSOR; +#line 101 "../src/conf/parse_opts.re" + { opts.set_bFlag (true); goto opt_short; } +#line 894 "src/conf/parse_opts.cc" +yy200: + ++YYCURSOR; +#line 102 "../src/conf/parse_opts.re" + { opts.set_cFlag (true); goto opt_short; } +#line 899 "src/conf/parse_opts.cc" +yy202: + ++YYCURSOR; +#line 103 "../src/conf/parse_opts.re" + { opts.set_dFlag (true); goto opt_short; } +#line 904 "src/conf/parse_opts.cc" +yy204: + ++YYCURSOR; +#line 112 "../src/conf/parse_opts.re" + { if (!opts.set_encoding (Enc::EBCDIC)) { error_encoding (); return EXIT_FAIL; } goto opt_short; } +#line 909 "src/conf/parse_opts.cc" +yy206: + ++YYCURSOR; +#line 105 "../src/conf/parse_opts.re" + { opts.set_fFlag (true); goto opt_short; } +#line 914 "src/conf/parse_opts.cc" +yy208: + ++YYCURSOR; +#line 107 "../src/conf/parse_opts.re" + { opts.set_gFlag (true); goto opt_short; } +#line 919 "src/conf/parse_opts.cc" +yy210: + ++YYCURSOR; +#line 108 "../src/conf/parse_opts.re" + { opts.set_iFlag (true); goto opt_short; } +#line 924 "src/conf/parse_opts.cc" +yy212: + ++YYCURSOR; + if ((yych = (YYCTYPE)*YYCURSOR) <= 0x00) goto yy228; +#line 118 "../src/conf/parse_opts.re" + { *argv = YYCURSOR; goto opt_output; } +#line 930 "src/conf/parse_opts.cc" +yy214: + ++YYCURSOR; +#line 109 "../src/conf/parse_opts.re" + { opts.set_rFlag (true); goto opt_short; } +#line 935 "src/conf/parse_opts.cc" +yy216: + ++YYCURSOR; +#line 110 "../src/conf/parse_opts.re" + { opts.set_sFlag (true); goto opt_short; } +#line 940 "src/conf/parse_opts.cc" +yy218: + ++YYCURSOR; + if ((yych = (YYCTYPE)*YYCURSOR) <= 0x00) goto yy230; +#line 120 "../src/conf/parse_opts.re" + { *argv = YYCURSOR; goto opt_header; } +#line 946 "src/conf/parse_opts.cc" +yy220: + ++YYCURSOR; +#line 113 "../src/conf/parse_opts.re" + { if (!opts.set_encoding (Enc::UTF32)) { error_encoding (); return EXIT_FAIL; } goto opt_short; } +#line 951 "src/conf/parse_opts.cc" +yy222: + ++YYCURSOR; +#line 99 "../src/conf/parse_opts.re" + { version (); return EXIT_OK; } +#line 956 "src/conf/parse_opts.cc" +yy224: + ++YYCURSOR; +#line 114 "../src/conf/parse_opts.re" + { if (!opts.set_encoding (Enc::UCS2)) { error_encoding (); return EXIT_FAIL; } goto opt_short; } +#line 961 "src/conf/parse_opts.cc" +yy226: + ++YYCURSOR; +#line 115 "../src/conf/parse_opts.re" + { if (!opts.set_encoding (Enc::UTF16)) { error_encoding (); return EXIT_FAIL; } goto opt_short; } +#line 966 "src/conf/parse_opts.cc" +yy228: + ++YYCURSOR; +#line 117 "../src/conf/parse_opts.re" + { if (!next (YYCURSOR, argv)) { error_arg ("-o, --output"); return EXIT_FAIL; } goto opt_output; } +#line 971 "src/conf/parse_opts.cc" +yy230: + ++YYCURSOR; +#line 119 "../src/conf/parse_opts.re" + { if (!next (YYCURSOR, argv)) { error_arg ("-t, --type-header"); return EXIT_FAIL; } goto opt_header; } +#line 976 "src/conf/parse_opts.cc" +} +#line 122 "../src/conf/parse_opts.re" + + +opt_long: + +#line 983 "src/conf/parse_opts.cc" +{ + YYCTYPE yych; + yych = (YYCTYPE)*YYCURSOR; + switch (yych) { + case 'b': goto yy236; + case 'c': goto yy237; + case 'd': goto yy238; + case 'e': goto yy239; + case 'f': goto yy240; + case 'h': goto yy241; + case 'i': goto yy242; + case 'n': goto yy243; + case 'o': goto yy244; + case 'r': goto yy245; + case 's': goto yy246; + case 't': goto yy247; + case 'u': goto yy248; + case 'v': goto yy249; + case 'w': goto yy250; + default: goto yy234; + } +yy234: + ++YYCURSOR; +yy235: +#line 127 "../src/conf/parse_opts.re" + { + error ("bad long option: %s", *argv); + return EXIT_FAIL; + } +#line 1013 "src/conf/parse_opts.cc" +yy236: + yych = (YYCTYPE)*(YYMARKER = ++YYCURSOR); + if (yych == 'i') goto yy251; + goto yy235; +yy237: + yych = (YYCTYPE)*(YYMARKER = ++YYCURSOR); + if (yych == 'a') goto yy253; + if (yych == 'o') goto yy254; + goto yy235; +yy238: + yych = (YYCTYPE)*(YYMARKER = ++YYCURSOR); + if (yych <= 'd') goto yy235; + if (yych <= 'e') goto yy255; + if (yych <= 'f') goto yy256; + goto yy235; +yy239: + yych = (YYCTYPE)*(YYMARKER = ++YYCURSOR); + if (yych <= 'l') { + if (yych == 'c') goto yy257; + goto yy235; + } else { + if (yych <= 'm') goto yy258; + if (yych <= 'n') goto yy259; + goto yy235; + } +yy240: + yych = (YYCTYPE)*(YYMARKER = ++YYCURSOR); + if (yych == 'l') goto yy260; + goto yy235; +yy241: + yych = (YYCTYPE)*(YYMARKER = ++YYCURSOR); + if (yych == 'e') goto yy261; + goto yy235; +yy242: + yych = (YYCTYPE)*(YYMARKER = ++YYCURSOR); + if (yych == 'n') goto yy262; + goto yy235; +yy243: + yych = (YYCTYPE)*(YYMARKER = ++YYCURSOR); + if (yych == 'e') goto yy263; + if (yych == 'o') goto yy264; + goto yy235; +yy244: + yych = (YYCTYPE)*(YYMARKER = ++YYCURSOR); + if (yych == 'u') goto yy265; + goto yy235; +yy245: + yych = (YYCTYPE)*(YYMARKER = ++YYCURSOR); + if (yych == 'e') goto yy266; + goto yy235; +yy246: + yych = (YYCTYPE)*(YYMARKER = ++YYCURSOR); + if (yych <= 'j') { + if (yych == 'i') goto yy267; + goto yy235; + } else { + if (yych <= 'k') goto yy268; + if (yych == 't') goto yy269; + goto yy235; + } +yy247: + yych = (YYCTYPE)*(YYMARKER = ++YYCURSOR); + if (yych == 'y') goto yy270; + goto yy235; +yy248: + yych = (YYCTYPE)*(YYMARKER = ++YYCURSOR); + if (yych == 'n') goto yy271; + if (yych == 't') goto yy272; + goto yy235; +yy249: + yych = (YYCTYPE)*(YYMARKER = ++YYCURSOR); + if (yych == 'e') goto yy273; + goto yy235; +yy250: + yych = (YYCTYPE)*(YYMARKER = ++YYCURSOR); + if (yych == 'i') goto yy274; + goto yy235; +yy251: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 't') goto yy275; +yy252: + YYCURSOR = YYMARKER; + goto yy235; +yy253: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 's') goto yy276; + goto yy252; +yy254: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'm') goto yy277; + goto yy252; +yy255: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'b') goto yy278; + goto yy252; +yy256: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'a') goto yy279; + goto yy252; +yy257: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'b') goto yy280; + goto yy252; +yy258: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'i') goto yy281; + if (yych == 'p') goto yy282; + goto yy252; +yy259: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'c') goto yy283; + goto yy252; +yy260: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'e') goto yy284; + goto yy252; +yy261: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'l') goto yy285; + goto yy252; +yy262: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'p') goto yy286; + goto yy252; +yy263: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 's') goto yy287; + goto yy252; +yy264: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == '-') goto yy288; + goto yy252; +yy265: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 't') goto yy289; + goto yy252; +yy266: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'u') goto yy290; + goto yy252; +yy267: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'n') goto yy291; + goto yy252; +yy268: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'e') goto yy292; + goto yy252; +yy269: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'a') goto yy293; + if (yych == 'o') goto yy294; + goto yy252; +yy270: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'p') goto yy295; + goto yy252; +yy271: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'i') goto yy296; + goto yy252; +yy272: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'f') goto yy297; + goto yy252; +yy273: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'r') goto yy298; + goto yy252; +yy274: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'd') goto yy299; + goto yy252; +yy275: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == '-') goto yy300; + goto yy252; +yy276: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'e') goto yy301; + goto yy252; +yy277: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'p') goto yy302; + goto yy252; +yy278: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'u') goto yy303; + goto yy252; +yy279: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == '-') goto yy304; + goto yy252; +yy280: + yych = (YYCTYPE)*++YYCURSOR; + if (yych <= 0x00) goto yy305; + goto yy252; +yy281: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 't') goto yy307; + goto yy252; +yy282: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 't') goto yy308; + goto yy252; +yy283: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'o') goto yy309; + goto yy252; +yy284: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'x') goto yy310; + goto yy252; +yy285: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'p') goto yy311; + goto yy252; +yy286: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'u') goto yy312; + goto yy252; +yy287: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 't') goto yy313; + goto yy252; +yy288: + yych = (YYCTYPE)*++YYCURSOR; + if (yych <= 'f') { + if (yych == 'd') goto yy314; + goto yy252; + } else { + if (yych <= 'g') goto yy315; + if (yych == 'v') goto yy316; + goto yy252; + } +yy289: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'p') goto yy317; + goto yy252; +yy290: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 's') goto yy318; + goto yy252; +yy291: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'g') goto yy319; + goto yy252; +yy292: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'l') goto yy320; + goto yy252; +yy293: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'r') goto yy321; + goto yy252; +yy294: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'r') goto yy322; + goto yy252; +yy295: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'e') goto yy323; + goto yy252; +yy296: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'c') goto yy324; + goto yy252; +yy297: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == '-') goto yy325; + goto yy252; +yy298: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'n') goto yy326; + if (yych == 's') goto yy327; + goto yy252; +yy299: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'e') goto yy328; + goto yy252; +yy300: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'v') goto yy329; + goto yy252; +yy301: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == '-') goto yy330; + goto yy252; +yy302: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'u') goto yy331; + goto yy252; +yy303: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'g') goto yy332; + goto yy252; +yy304: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'm') goto yy333; + goto yy252; +yy305: + ++YYCURSOR; +#line 149 "../src/conf/parse_opts.re" + { if (!opts.set_encoding (Enc::EBCDIC)) { error_encoding (); return EXIT_FAIL; } goto opt; } +#line 1318 "src/conf/parse_opts.cc" +yy307: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == '-') goto yy334; + goto yy252; +yy308: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'y') goto yy335; + goto yy252; +yy309: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'd') goto yy336; + goto yy252; +yy310: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == '-') goto yy337; + goto yy252; +yy311: + yych = (YYCTYPE)*++YYCURSOR; + if (yych <= 0x00) goto yy338; + goto yy252; +yy312: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 't') goto yy340; + goto yy252; +yy313: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'e') goto yy341; + goto yy252; +yy314: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'e') goto yy342; + goto yy252; +yy315: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'e') goto yy343; + goto yy252; +yy316: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'e') goto yy344; + goto yy252; +yy317: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'u') goto yy345; + goto yy252; +yy318: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'a') goto yy346; + goto yy252; +yy319: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'l') goto yy347; + goto yy252; +yy320: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'e') goto yy348; + goto yy252; +yy321: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 't') goto yy349; + goto yy252; +yy322: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'a') goto yy350; + goto yy252; +yy323: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == '-') goto yy351; + goto yy252; +yy324: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'o') goto yy352; + goto yy252; +yy325: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == '1') goto yy353; + if (yych == '8') goto yy354; + goto yy252; +yy326: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'u') goto yy355; + goto yy252; +yy327: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'i') goto yy356; + goto yy252; +yy328: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == '-') goto yy357; + goto yy252; +yy329: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'e') goto yy358; + goto yy252; +yy330: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'i') goto yy359; + goto yy252; +yy331: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 't') goto yy360; + goto yy252; +yy332: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == '-') goto yy361; + goto yy252; +yy333: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'i') goto yy362; + goto yy252; +yy334: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'd') goto yy363; + goto yy252; +yy335: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == '-') goto yy364; + goto yy252; +yy336: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'i') goto yy365; + goto yy252; +yy337: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 's') goto yy366; + goto yy252; +yy338: + ++YYCURSOR; +#line 131 "../src/conf/parse_opts.re" + { usage (); return EXIT_OK; } +#line 1448 "src/conf/parse_opts.cc" +yy340: + yych = (YYCTYPE)*++YYCURSOR; + if (yych <= 0x00) goto yy367; + goto yy252; +yy341: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'd') goto yy369; + goto yy252; +yy342: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'b') goto yy370; + goto yy252; +yy343: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'n') goto yy371; + goto yy252; +yy344: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'r') goto yy372; + goto yy252; +yy345: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 't') goto yy373; + goto yy252; +yy346: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'b') goto yy374; + goto yy252; +yy347: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'e') goto yy375; + goto yy252; +yy348: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 't') goto yy376; + goto yy252; +yy349: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == '-') goto yy377; + goto yy252; +yy350: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'b') goto yy378; + goto yy252; +yy351: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'h') goto yy379; + goto yy252; +yy352: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'd') goto yy380; + goto yy252; +yy353: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == '6') goto yy381; + goto yy252; +yy354: + yych = (YYCTYPE)*++YYCURSOR; + if (yych <= 0x00) goto yy382; + goto yy252; +yy355: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'm') goto yy384; + goto yy252; +yy356: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'o') goto yy385; + goto yy252; +yy357: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'c') goto yy386; + goto yy252; +yy358: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'c') goto yy387; + goto yy252; +yy359: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'n') goto yy388; + goto yy252; +yy360: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'e') goto yy389; + goto yy252; +yy361: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'o') goto yy390; + goto yy252; +yy362: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'n') goto yy391; + goto yy252; +yy363: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'o') goto yy392; + goto yy252; +yy364: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'c') goto yy393; + goto yy252; +yy365: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'n') goto yy394; + goto yy252; +yy366: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'y') goto yy395; + goto yy252; +yy367: + ++YYCURSOR; +#line 157 "../src/conf/parse_opts.re" + { goto opt_input; } +#line 1561 "src/conf/parse_opts.cc" +yy369: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == '-') goto yy396; + goto yy252; +yy370: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'u') goto yy397; + goto yy252; +yy371: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'e') goto yy398; + goto yy252; +yy372: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 's') goto yy399; + goto yy252; +yy373: + yych = (YYCTYPE)*++YYCURSOR; + if (yych <= 0x00) goto yy400; + goto yy252; +yy374: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'l') goto yy402; + goto yy252; +yy375: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == '-') goto yy403; + goto yy252; +yy376: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'o') goto yy404; + goto yy252; +yy377: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'c') goto yy405; + goto yy252; +yy378: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'l') goto yy406; + goto yy252; +yy379: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'e') goto yy407; + goto yy252; +yy380: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'e') goto yy408; + goto yy252; +yy381: + yych = (YYCTYPE)*++YYCURSOR; + if (yych <= 0x00) goto yy409; + goto yy252; +yy382: + ++YYCURSOR; +#line 153 "../src/conf/parse_opts.re" + { if (!opts.set_encoding (Enc::UTF8)) { error_encoding (); return EXIT_FAIL; } goto opt; } +#line 1618 "src/conf/parse_opts.cc" +yy384: + yych = (YYCTYPE)*++YYCURSOR; + if (yych <= 0x00) goto yy411; + goto yy252; +yy385: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'n') goto yy413; + goto yy252; +yy386: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'h') goto yy414; + goto yy252; +yy387: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 't') goto yy415; + goto yy252; +yy388: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 's') goto yy416; + if (yych == 'v') goto yy417; + goto yy252; +yy389: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'd') goto yy418; + goto yy252; +yy390: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'u') goto yy419; + goto yy252; +yy391: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'i') goto yy420; + goto yy252; +yy392: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 't') goto yy421; + goto yy252; +yy393: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'l') goto yy422; + goto yy252; +yy394: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'g') goto yy423; + goto yy252; +yy395: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'n') goto yy424; + goto yy252; +yy396: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'i') goto yy425; + goto yy252; +yy397: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'g') goto yy426; + goto yy252; +yy398: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'r') goto yy427; + goto yy252; +yy399: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'i') goto yy428; + goto yy252; +yy400: + ++YYCURSOR; +#line 154 "../src/conf/parse_opts.re" + { if (!next (YYCURSOR, argv)) { error_arg ("-o, --output"); return EXIT_FAIL; } goto opt_output; } +#line 1688 "src/conf/parse_opts.cc" +yy402: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'e') goto yy429; + goto yy252; +yy403: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'p') goto yy430; + goto yy252; +yy404: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'n') goto yy431; + goto yy252; +yy405: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'o') goto yy432; + goto yy252; +yy406: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'e') goto yy433; + goto yy252; +yy407: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'a') goto yy434; + goto yy252; +yy408: + yych = (YYCTYPE)*++YYCURSOR; + if (yych <= 0x00) goto yy435; + goto yy252; +yy409: + ++YYCURSOR; +#line 152 "../src/conf/parse_opts.re" + { if (!opts.set_encoding (Enc::UTF16)) { error_encoding (); return EXIT_FAIL; } goto opt; } +#line 1721 "src/conf/parse_opts.cc" +yy411: + ++YYCURSOR; +#line 133 "../src/conf/parse_opts.re" + { vernum (); return EXIT_OK; } +#line 1726 "src/conf/parse_opts.cc" +yy413: + yych = (YYCTYPE)*++YYCURSOR; + if (yych <= 0x00) goto yy437; + goto yy252; +yy414: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'a') goto yy439; + goto yy252; +yy415: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'o') goto yy440; + goto yy252; +yy416: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'e') goto yy441; + goto yy252; +yy417: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'e') goto yy442; + goto yy252; +yy418: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == '-') goto yy443; + goto yy252; +yy419: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 't') goto yy444; + goto yy252; +yy420: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'm') goto yy445; + goto yy252; +yy421: + yych = (YYCTYPE)*++YYCURSOR; + if (yych <= 0x00) goto yy446; + goto yy252; +yy422: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'a') goto yy448; + goto yy252; +yy423: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == '-') goto yy449; + goto yy252; +yy424: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 't') goto yy450; + goto yy252; +yy425: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'f') goto yy451; + goto yy252; +yy426: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == '-') goto yy452; + goto yy252; +yy427: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'a') goto yy453; + goto yy252; +yy428: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'o') goto yy454; + goto yy252; +yy429: + yych = (YYCTYPE)*++YYCURSOR; + if (yych <= 0x00) goto yy455; + goto yy252; +yy430: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'a') goto yy457; + goto yy252; +yy431: + yych = (YYCTYPE)*++YYCURSOR; + if (yych <= 0x00) goto yy458; + goto yy252; +yy432: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'n') goto yy460; + goto yy252; +yy433: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == '-') goto yy461; + goto yy252; +yy434: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'd') goto yy462; + goto yy252; +yy435: + ++YYCURSOR; +#line 150 "../src/conf/parse_opts.re" + { if (!opts.set_encoding (Enc::UTF32)) { error_encoding (); return EXIT_FAIL; } goto opt; } +#line 1819 "src/conf/parse_opts.cc" +yy437: + ++YYCURSOR; +#line 132 "../src/conf/parse_opts.re" + { version (); return EXIT_OK; } +#line 1824 "src/conf/parse_opts.cc" +yy439: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'r') goto yy463; + goto yy252; +yy440: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'r') goto yy464; + goto yy252; +yy441: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'n') goto yy465; + goto yy252; +yy442: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'r') goto yy466; + goto yy252; +yy443: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'g') goto yy467; + goto yy252; +yy444: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'p') goto yy468; + goto yy252; +yy445: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'i') goto yy469; + goto yy252; +yy446: + ++YYCURSOR; +#line 137 "../src/conf/parse_opts.re" + { opts.set_target (opt_t::DOT); goto opt; } +#line 1857 "src/conf/parse_opts.cc" +yy448: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 's') goto yy470; + goto yy252; +yy449: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'p') goto yy471; + goto yy252; +yy450: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'a') goto yy472; + goto yy252; +yy451: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 's') goto yy473; + goto yy252; +yy452: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'i') goto yy474; + goto yy252; +yy453: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 't') goto yy475; + goto yy252; +yy454: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'n') goto yy476; + goto yy252; +yy455: + ++YYCURSOR; +#line 142 "../src/conf/parse_opts.re" + { opts.set_rFlag (true); goto opt; } +#line 1890 "src/conf/parse_opts.cc" +yy457: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 's') goto yy477; + goto yy252; +yy458: + ++YYCURSOR; +#line 148 "../src/conf/parse_opts.re" + { opts.set_target (opt_t::SKELETON); goto opt; } +#line 1899 "src/conf/parse_opts.cc" +yy460: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'd') goto yy478; + goto yy252; +yy461: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 's') goto yy479; + goto yy252; +yy462: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'e') goto yy480; + goto yy252; +yy463: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 's') goto yy481; + goto yy252; +yy464: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 's') goto yy482; + goto yy252; +yy465: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 's') goto yy483; + goto yy252; +yy466: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 't') goto yy484; + goto yy252; +yy467: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'o') goto yy485; + goto yy252; +yy468: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'u') goto yy486; + goto yy252; +yy469: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'z') goto yy487; + goto yy252; +yy470: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 's') goto yy488; + goto yy252; +yy471: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'o') goto yy489; + goto yy252; +yy472: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'x') goto yy490; + goto yy252; +yy473: + yych = (YYCTYPE)*++YYCURSOR; + if (yych <= 0x00) goto yy491; + goto yy252; +yy474: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'n') goto yy493; + goto yy252; +yy475: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'i') goto yy494; + goto yy252; +yy476: + yych = (YYCTYPE)*++YYCURSOR; + if (yych <= 0x00) goto yy495; + goto yy252; +yy477: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 's') goto yy497; + goto yy252; +yy478: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'i') goto yy498; + goto yy252; +yy479: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 't') goto yy499; + goto yy252; +yy480: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'r') goto yy500; + goto yy252; +yy481: + yych = (YYCTYPE)*++YYCURSOR; + if (yych <= 0x00) goto yy501; + goto yy252; +yy482: + yych = (YYCTYPE)*++YYCURSOR; + if (yych <= 0x00) goto yy503; + goto yy252; +yy483: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'i') goto yy505; + goto yy252; +yy484: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'e') goto yy506; + goto yy252; +yy485: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 't') goto yy507; + goto yy252; +yy486: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 't') goto yy508; + goto yy252; +yy487: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'a') goto yy509; + goto yy252; +yy488: + yych = (YYCTYPE)*++YYCURSOR; + if (yych <= 0x00) goto yy510; + goto yy252; +yy489: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'l') goto yy512; + goto yy252; +yy490: + yych = (YYCTYPE)*++YYCURSOR; + if (yych <= 0x00) goto yy513; + goto yy252; +yy491: + ++YYCURSOR; +#line 143 "../src/conf/parse_opts.re" + { opts.set_sFlag (true); goto opt; } +#line 2028 "src/conf/parse_opts.cc" +yy493: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'f') goto yy515; + goto yy252; +yy494: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'o') goto yy516; + goto yy252; +yy495: + ++YYCURSOR; +#line 145 "../src/conf/parse_opts.re" + { opts.set_version (false); goto opt; } +#line 2041 "src/conf/parse_opts.cc" +yy497: + yych = (YYCTYPE)*++YYCURSOR; + if (yych <= 0x00) goto yy517; + goto yy252; +yy498: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 't') goto yy519; + goto yy252; +yy499: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'a') goto yy520; + goto yy252; +yy500: + yych = (YYCTYPE)*++YYCURSOR; + if (yych <= 0x00) goto yy521; + goto yy252; +yy501: + ++YYCURSOR; +#line 151 "../src/conf/parse_opts.re" + { if (!opts.set_encoding (Enc::UCS2)) { error_encoding (); return EXIT_FAIL; } goto opt; } +#line 2062 "src/conf/parse_opts.cc" +yy503: + ++YYCURSOR; +#line 134 "../src/conf/parse_opts.re" + { opts.set_bFlag (true); goto opt; } +#line 2067 "src/conf/parse_opts.cc" +yy505: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 't') goto yy523; + goto yy252; +yy506: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'd') goto yy524; + goto yy252; +yy507: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'o') goto yy525; + goto yy252; +yy508: + yych = (YYCTYPE)*++YYCURSOR; + if (yych <= 0x00) goto yy526; + goto yy252; +yy509: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 't') goto yy528; + goto yy252; +yy510: + ++YYCURSOR; +#line 158 "../src/conf/parse_opts.re" + { goto opt_empty_class; } +#line 2092 "src/conf/parse_opts.cc" +yy512: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'i') goto yy529; + goto yy252; +yy513: + ++YYCURSOR; +#line 139 "../src/conf/parse_opts.re" + { opts.set_FFlag (true); goto opt; } +#line 2101 "src/conf/parse_opts.cc" +yy515: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'o') goto yy530; + goto yy252; +yy516: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'n') goto yy531; + goto yy252; +yy517: + ++YYCURSOR; +#line 160 "../src/conf/parse_opts.re" + { goto opt; } +#line 2114 "src/conf/parse_opts.cc" +yy519: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'i') goto yy532; + goto yy252; +yy520: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 't') goto yy533; + goto yy252; +yy521: + ++YYCURSOR; +#line 155 "../src/conf/parse_opts.re" + { if (!next (YYCURSOR, argv)) { error_arg ("-t, --type-header"); return EXIT_FAIL; } goto opt_header; } +#line 2127 "src/conf/parse_opts.cc" +yy523: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'i') goto yy534; + goto yy252; +yy524: + yych = (YYCTYPE)*++YYCURSOR; + if (yych <= 0x00) goto yy535; + goto yy252; +yy525: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 's') goto yy537; + goto yy252; +yy526: + ++YYCURSOR; +#line 136 "../src/conf/parse_opts.re" + { opts.set_dFlag (true); goto opt; } +#line 2144 "src/conf/parse_opts.cc" +yy528: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'i') goto yy538; + goto yy252; +yy529: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'c') goto yy539; + goto yy252; +yy530: + yych = (YYCTYPE)*++YYCURSOR; + if (yych <= 0x00) goto yy540; + goto yy252; +yy531: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == '-') goto yy542; + goto yy252; +yy532: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'o') goto yy543; + goto yy252; +yy533: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'e') goto yy544; + goto yy252; +yy534: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'v') goto yy545; + goto yy252; +yy535: + ++YYCURSOR; +#line 147 "../src/conf/parse_opts.re" + { opts.set_bCaseInverted (true); goto opt; } +#line 2177 "src/conf/parse_opts.cc" +yy537: + yych = (YYCTYPE)*++YYCURSOR; + if (yych <= 0x00) goto yy546; + goto yy252; +yy538: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'o') goto yy548; + goto yy252; +yy539: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'y') goto yy549; + goto yy252; +yy540: + ++YYCURSOR; +#line 141 "../src/conf/parse_opts.re" + { opts.set_iFlag (true); goto opt; } +#line 2194 "src/conf/parse_opts.cc" +yy542: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'd') goto yy550; + goto yy252; +yy543: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'n') goto yy551; + goto yy252; +yy544: + yych = (YYCTYPE)*++YYCURSOR; + if (yych <= 0x00) goto yy552; + goto yy252; +yy545: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'e') goto yy554; + goto yy252; +yy546: + ++YYCURSOR; +#line 140 "../src/conf/parse_opts.re" + { opts.set_gFlag (true); goto opt; } +#line 2215 "src/conf/parse_opts.cc" +yy548: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'n') goto yy555; + goto yy252; +yy549: + yych = (YYCTYPE)*++YYCURSOR; + if (yych <= 0x00) goto yy556; + goto yy252; +yy550: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'a') goto yy558; + goto yy252; +yy551: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 's') goto yy559; + goto yy252; +yy552: + ++YYCURSOR; +#line 138 "../src/conf/parse_opts.re" + { opts.set_fFlag (true); goto opt; } +#line 2236 "src/conf/parse_opts.cc" +yy554: + yych = (YYCTYPE)*++YYCURSOR; + if (yych <= 0x00) goto yy560; + goto yy252; +yy555: + yych = (YYCTYPE)*++YYCURSOR; + if (yych <= 0x00) goto yy562; + goto yy252; +yy556: + ++YYCURSOR; +#line 156 "../src/conf/parse_opts.re" + { goto opt_encoding_policy; } +#line 2249 "src/conf/parse_opts.cc" +yy558: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 't') goto yy564; + goto yy252; +yy559: + yych = (YYCTYPE)*++YYCURSOR; + if (yych <= 0x00) goto yy565; + goto yy252; +yy560: + ++YYCURSOR; +#line 146 "../src/conf/parse_opts.re" + { opts.set_bCaseInsensitive (true); goto opt; } +#line 2262 "src/conf/parse_opts.cc" +yy562: + ++YYCURSOR; +#line 159 "../src/conf/parse_opts.re" + { goto opt_dfa_minimization; } +#line 2267 "src/conf/parse_opts.cc" +yy564: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'e') goto yy567; + goto yy252; +yy565: + ++YYCURSOR; +#line 135 "../src/conf/parse_opts.re" + { opts.set_cFlag (true); goto opt; } +#line 2276 "src/conf/parse_opts.cc" +yy567: + yych = (YYCTYPE)*++YYCURSOR; + if (yych >= 0x01) goto yy252; + ++YYCURSOR; +#line 144 "../src/conf/parse_opts.re" + { opts.set_bNoGenerationDate (true); goto opt; } +#line 2283 "src/conf/parse_opts.cc" +} +#line 161 "../src/conf/parse_opts.re" + + +opt_output: + +#line 2290 "src/conf/parse_opts.cc" +{ + YYCTYPE yych; + static const unsigned char yybm[] = {}; + yych = (YYCTYPE)*YYCURSOR; + if (yych <= 0x00) goto yy572; + if (yych != '-') goto yy574; +yy572: + ++YYCURSOR; +#line 166 "../src/conf/parse_opts.re" + { + error ("bad argument to option -o, --output: %s", *argv); + return EXIT_FAIL; + } +#line 2337 "src/conf/parse_opts.cc" +yy574: + yych = (YYCTYPE)*++YYCURSOR; + goto yy578; +yy575: + ++YYCURSOR; +#line 170 "../src/conf/parse_opts.re" + { if (!opts.output (*argv)) return EXIT_FAIL; goto opt; } +#line 2345 "src/conf/parse_opts.cc" +yy577: + ++YYCURSOR; + yych = (YYCTYPE)*YYCURSOR; +yy578: + if (yybm[0+yych] & 128) { + goto yy577; + } + goto yy575; +} +#line 171 "../src/conf/parse_opts.re" + + +opt_header: + +#line 2360 "src/conf/parse_opts.cc" +{ + YYCTYPE yych; + static const unsigned char yybm[] = {}; + yych = (YYCTYPE)*YYCURSOR; + if (yych <= 0x00) goto yy581; + if (yych != '-') goto yy583; +yy581: + ++YYCURSOR; +#line 176 "../src/conf/parse_opts.re" + { + error ("bad argument to option -t, --type-header: %s", *argv); + return EXIT_FAIL; + } +#line 2407 "src/conf/parse_opts.cc" +yy583: + yych = (YYCTYPE)*++YYCURSOR; + goto yy587; +yy584: + ++YYCURSOR; +#line 180 "../src/conf/parse_opts.re" + { opts.set_header_file (*argv); goto opt; } +#line 2415 "src/conf/parse_opts.cc" +yy586: + ++YYCURSOR; + yych = (YYCTYPE)*YYCURSOR; +yy587: + if (yybm[0+yych] & 128) { + goto yy586; + } + goto yy584; +} +#line 181 "../src/conf/parse_opts.re" + + +opt_encoding_policy: + if (!next (YYCURSOR, argv)) + { + error_arg ("--encoding-policy"); + return EXIT_FAIL; + } + +#line 2435 "src/conf/parse_opts.cc" +{ + YYCTYPE yych; + yych = (YYCTYPE)*YYCURSOR; + if (yych <= 'h') { + if (yych == 'f') goto yy592; + } else { + if (yych <= 'i') goto yy593; + if (yych == 's') goto yy594; + } + ++YYCURSOR; +yy591: +#line 191 "../src/conf/parse_opts.re" + { + error ("bad argument to option --encoding-policy (expected: ignore | substitute | fail): %s", *argv); + return EXIT_FAIL; + } +#line 2452 "src/conf/parse_opts.cc" +yy592: + yych = (YYCTYPE)*(YYMARKER = ++YYCURSOR); + if (yych == 'a') goto yy595; + goto yy591; +yy593: + yych = (YYCTYPE)*(YYMARKER = ++YYCURSOR); + if (yych == 'g') goto yy597; + goto yy591; +yy594: + yych = (YYCTYPE)*(YYMARKER = ++YYCURSOR); + if (yych == 'u') goto yy598; + goto yy591; +yy595: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'i') goto yy599; +yy596: + YYCURSOR = YYMARKER; + goto yy591; +yy597: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'n') goto yy600; + goto yy596; +yy598: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'b') goto yy601; + goto yy596; +yy599: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'l') goto yy602; + goto yy596; +yy600: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'o') goto yy603; + goto yy596; +yy601: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 's') goto yy604; + goto yy596; +yy602: + yych = (YYCTYPE)*++YYCURSOR; + if (yych <= 0x00) goto yy605; + goto yy596; +yy603: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'r') goto yy607; + goto yy596; +yy604: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 't') goto yy608; + goto yy596; +yy605: + ++YYCURSOR; +#line 197 "../src/conf/parse_opts.re" + { opts.set_encoding_policy (Enc::POLICY_FAIL); goto opt; } +#line 2507 "src/conf/parse_opts.cc" +yy607: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'e') goto yy609; + goto yy596; +yy608: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'i') goto yy610; + goto yy596; +yy609: + yych = (YYCTYPE)*++YYCURSOR; + if (yych <= 0x00) goto yy611; + goto yy596; +yy610: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 't') goto yy613; + goto yy596; +yy611: + ++YYCURSOR; +#line 195 "../src/conf/parse_opts.re" + { opts.set_encoding_policy (Enc::POLICY_IGNORE); goto opt; } +#line 2528 "src/conf/parse_opts.cc" +yy613: + yych = (YYCTYPE)*++YYCURSOR; + if (yych != 'u') goto yy596; + yych = (YYCTYPE)*++YYCURSOR; + if (yych != 't') goto yy596; + yych = (YYCTYPE)*++YYCURSOR; + if (yych != 'e') goto yy596; + yych = (YYCTYPE)*++YYCURSOR; + if (yych >= 0x01) goto yy596; + ++YYCURSOR; +#line 196 "../src/conf/parse_opts.re" + { opts.set_encoding_policy (Enc::POLICY_SUBSTITUTE); goto opt; } +#line 2541 "src/conf/parse_opts.cc" +} +#line 198 "../src/conf/parse_opts.re" + + +opt_input: + if (!next (YYCURSOR, argv)) + { + error_arg ("--input"); + return EXIT_FAIL; + } + +#line 2553 "src/conf/parse_opts.cc" +{ + YYCTYPE yych; + yych = (YYCTYPE)*YYCURSOR; + if (yych <= 'b') goto yy621; + if (yych <= 'c') goto yy623; + if (yych <= 'd') goto yy624; +yy621: + ++YYCURSOR; +yy622: +#line 208 "../src/conf/parse_opts.re" + { + error ("bad argument to option --input (expected: default | custom): %s", *argv); + return EXIT_FAIL; + } +#line 2568 "src/conf/parse_opts.cc" +yy623: + yych = (YYCTYPE)*(YYMARKER = ++YYCURSOR); + if (yych == 'u') goto yy625; + goto yy622; +yy624: + yych = (YYCTYPE)*(YYMARKER = ++YYCURSOR); + if (yych == 'e') goto yy627; + goto yy622; +yy625: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 's') goto yy628; +yy626: + YYCURSOR = YYMARKER; + goto yy622; +yy627: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'f') goto yy629; + goto yy626; +yy628: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 't') goto yy630; + goto yy626; +yy629: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'a') goto yy631; + goto yy626; +yy630: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'o') goto yy632; + goto yy626; +yy631: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'u') goto yy633; + goto yy626; +yy632: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'm') goto yy634; + goto yy626; +yy633: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'l') goto yy635; + goto yy626; +yy634: + yych = (YYCTYPE)*++YYCURSOR; + if (yych <= 0x00) goto yy636; + goto yy626; +yy635: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 't') goto yy638; + goto yy626; +yy636: + ++YYCURSOR; +#line 213 "../src/conf/parse_opts.re" + { opts.set_input_api (InputAPI::CUSTOM); goto opt; } +#line 2623 "src/conf/parse_opts.cc" +yy638: + yych = (YYCTYPE)*++YYCURSOR; + if (yych >= 0x01) goto yy626; + ++YYCURSOR; +#line 212 "../src/conf/parse_opts.re" + { opts.set_input_api (InputAPI::DEFAULT); goto opt; } +#line 2630 "src/conf/parse_opts.cc" +} +#line 214 "../src/conf/parse_opts.re" + + +opt_empty_class: + if (!next (YYCURSOR, argv)) + { + error_arg ("--empty-class"); + return EXIT_FAIL; + } + +#line 2642 "src/conf/parse_opts.cc" +{ + YYCTYPE yych; + yych = (YYCTYPE)*YYCURSOR; + if (yych == 'e') goto yy645; + if (yych == 'm') goto yy646; + ++YYCURSOR; +yy644: +#line 224 "../src/conf/parse_opts.re" + { + error ("bad argument to option --empty-class (expected: match-empty | match-none | error): %s", *argv); + return EXIT_FAIL; + } +#line 2655 "src/conf/parse_opts.cc" +yy645: + yych = (YYCTYPE)*(YYMARKER = ++YYCURSOR); + if (yych == 'r') goto yy647; + goto yy644; +yy646: + yych = (YYCTYPE)*(YYMARKER = ++YYCURSOR); + if (yych == 'a') goto yy649; + goto yy644; +yy647: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'r') goto yy650; +yy648: + YYCURSOR = YYMARKER; + goto yy644; +yy649: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 't') goto yy651; + goto yy648; +yy650: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'o') goto yy652; + goto yy648; +yy651: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'c') goto yy653; + goto yy648; +yy652: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'r') goto yy654; + goto yy648; +yy653: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'h') goto yy655; + goto yy648; +yy654: + yych = (YYCTYPE)*++YYCURSOR; + if (yych <= 0x00) goto yy656; + goto yy648; +yy655: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == '-') goto yy658; + goto yy648; +yy656: + ++YYCURSOR; +#line 230 "../src/conf/parse_opts.re" + { opts.set_empty_class_policy (EMPTY_CLASS_ERROR); goto opt; } +#line 2702 "src/conf/parse_opts.cc" +yy658: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'e') goto yy659; + if (yych == 'n') goto yy660; + goto yy648; +yy659: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'm') goto yy661; + goto yy648; +yy660: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'o') goto yy662; + goto yy648; +yy661: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'p') goto yy663; + goto yy648; +yy662: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'n') goto yy664; + goto yy648; +yy663: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 't') goto yy665; + goto yy648; +yy664: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'e') goto yy666; + goto yy648; +yy665: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'y') goto yy667; + goto yy648; +yy666: + yych = (YYCTYPE)*++YYCURSOR; + if (yych <= 0x00) goto yy668; + goto yy648; +yy667: + yych = (YYCTYPE)*++YYCURSOR; + if (yych <= 0x00) goto yy670; + goto yy648; +yy668: + ++YYCURSOR; +#line 229 "../src/conf/parse_opts.re" + { opts.set_empty_class_policy (EMPTY_CLASS_MATCH_NONE); goto opt; } +#line 2748 "src/conf/parse_opts.cc" +yy670: + ++YYCURSOR; +#line 228 "../src/conf/parse_opts.re" + { opts.set_empty_class_policy (EMPTY_CLASS_MATCH_EMPTY); goto opt; } +#line 2753 "src/conf/parse_opts.cc" +} +#line 231 "../src/conf/parse_opts.re" + + +opt_dfa_minimization: + if (!next (YYCURSOR, argv)) + { + error_arg ("--minimization"); + return EXIT_FAIL; + } + +#line 2765 "src/conf/parse_opts.cc" +{ + YYCTYPE yych; + yych = (YYCTYPE)*YYCURSOR; + if (yych == 'm') goto yy676; + if (yych == 't') goto yy677; + ++YYCURSOR; +yy675: +#line 241 "../src/conf/parse_opts.re" + { + error ("bad argument to option --dfa-minimization (expected: table | moore): %s", *argv); + return EXIT_FAIL; + } +#line 2778 "src/conf/parse_opts.cc" +yy676: + yych = (YYCTYPE)*(YYMARKER = ++YYCURSOR); + if (yych == 'o') goto yy678; + goto yy675; +yy677: + yych = (YYCTYPE)*(YYMARKER = ++YYCURSOR); + if (yych == 'a') goto yy680; + goto yy675; +yy678: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'o') goto yy681; +yy679: + YYCURSOR = YYMARKER; + goto yy675; +yy680: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'b') goto yy682; + goto yy679; +yy681: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'r') goto yy683; + goto yy679; +yy682: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'l') goto yy684; + goto yy679; +yy683: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'e') goto yy685; + goto yy679; +yy684: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'e') goto yy686; + goto yy679; +yy685: + yych = (YYCTYPE)*++YYCURSOR; + if (yych <= 0x00) goto yy687; + goto yy679; +yy686: + yych = (YYCTYPE)*++YYCURSOR; + if (yych <= 0x00) goto yy689; + goto yy679; +yy687: + ++YYCURSOR; +#line 246 "../src/conf/parse_opts.re" + { opts.set_dfa_minimization (DFA_MINIMIZATION_MOORE); goto opt; } +#line 2825 "src/conf/parse_opts.cc" +yy689: + ++YYCURSOR; +#line 245 "../src/conf/parse_opts.re" + { opts.set_dfa_minimization (DFA_MINIMIZATION_TABLE); goto opt; } +#line 2830 "src/conf/parse_opts.cc" +} +#line 247 "../src/conf/parse_opts.re" + + +end: + if (!opts.source_file) + { + error ("no source file"); + return EXIT_FAIL; + } + + return OK; + +#undef YYCTYPE +} + +} // namespace re2c diff --git a/tools/re2c/src/conf/parse_opts.re b/tools/re2c/src/conf/parse_opts.re new file mode 100644 index 000000000..5ff3a2175 --- /dev/null +++ b/tools/re2c/src/conf/parse_opts.re @@ -0,0 +1,261 @@ +#include "src/codegen/input_api.h" +#include "src/conf/msg.h" +#include "src/conf/opt.h" +#include "src/conf/warn.h" +#include "src/globals.h" +#include "src/ir/regexp/empty_class_policy.h" +#include "src/ir/regexp/encoding/enc.h" + +namespace re2c +{ + +static inline bool next (char * & arg, char ** & argv) +{ + arg = *++argv; + return arg != NULL; +} + +parse_opts_t parse_opts (char ** argv, Opt & opts) +{ +#define YYCTYPE unsigned char + char * YYCURSOR; + char * YYMARKER; + Warn::option_t option; + +/*!re2c + re2c:yyfill:enable = 0; + re2c:yych:conversion = 1; + + end = "\x00"; + filename = [^\x00-] [^\x00]*; +*/ + +opt: + if (!next (YYCURSOR, argv)) + { + goto end; + } +/*!re2c + * + { + error ("bad option: %s", *argv); + return EXIT_FAIL; + } + + "--" end + { + // all remaining arguments are non-options + // so they must be input files + // re2c expects exactly one input file + for (char * f; next (f, argv);) + { + if (!opts.source (f)) + { + return EXIT_FAIL; + } + } + goto end; + } + + "-" end { if (!opts.source ("")) return EXIT_FAIL; goto opt; } + filename end { if (!opts.source (*argv)) return EXIT_FAIL; goto opt; } + + "-" { goto opt_short; } + "--" { goto opt_long; } + + "-W" end { warn.set_all (); goto opt; } + "-Werror" end { warn.set_all_error (); goto opt; } + "-W" { option = Warn::W; goto opt_warn; } + "-Wno-" { option = Warn::WNO; goto opt_warn; } + "-Werror-" { option = Warn::WERROR; goto opt_warn; } + "-Wno-error-" { option = Warn::WNOERROR; goto opt_warn; } +*/ + +opt_warn: +/*!re2c + * + { + error ("bad warning: %s", *argv); + return EXIT_FAIL; + } + "condition-order" end { warn.set (Warn::CONDITION_ORDER, option); goto opt; } + "empty-character-class" end { warn.set (Warn::EMPTY_CHARACTER_CLASS, option); goto opt; } + "match-empty-string" end { warn.set (Warn::MATCH_EMPTY_STRING, option); goto opt; } + "swapped-range" end { warn.set (Warn::SWAPPED_RANGE, option); goto opt; } + "undefined-control-flow" end { warn.set (Warn::UNDEFINED_CONTROL_FLOW, option); goto opt; } + "unreachable-rules" end { warn.set (Warn::UNREACHABLE_RULES, option); goto opt; } + "useless-escape" end { warn.set (Warn::USELESS_ESCAPE, option); goto opt; } +*/ + +opt_short: +/*!re2c + * + { + error ("bad short option: %s", *argv); + return EXIT_FAIL; + } + end { goto opt; } + [?h] { usage (); return EXIT_OK; } + "v" { version (); return EXIT_OK; } + "V" { vernum (); return EXIT_OK; } + "b" { opts.set_bFlag (true); goto opt_short; } + "c" { opts.set_cFlag (true); goto opt_short; } + "d" { opts.set_dFlag (true); goto opt_short; } + "D" { opts.set_target (opt_t::DOT); goto opt_short; } + "f" { opts.set_fFlag (true); goto opt_short; } + "F" { opts.set_FFlag (true); goto opt_short; } + "g" { opts.set_gFlag (true); goto opt_short; } + "i" { opts.set_iFlag (true); goto opt_short; } + "r" { opts.set_rFlag (true); goto opt_short; } + "s" { opts.set_sFlag (true); goto opt_short; } + "S" { opts.set_target (opt_t::SKELETON); goto opt_short; } + "e" { if (!opts.set_encoding (Enc::EBCDIC)) { error_encoding (); return EXIT_FAIL; } goto opt_short; } + "u" { if (!opts.set_encoding (Enc::UTF32)) { error_encoding (); return EXIT_FAIL; } goto opt_short; } + "w" { if (!opts.set_encoding (Enc::UCS2)) { error_encoding (); return EXIT_FAIL; } goto opt_short; } + "x" { if (!opts.set_encoding (Enc::UTF16)) { error_encoding (); return EXIT_FAIL; } goto opt_short; } + "8" { if (!opts.set_encoding (Enc::UTF8)) { error_encoding (); return EXIT_FAIL; } goto opt_short; } + "o" end { if (!next (YYCURSOR, argv)) { error_arg ("-o, --output"); return EXIT_FAIL; } goto opt_output; } + "o" { *argv = YYCURSOR; goto opt_output; } + "t" end { if (!next (YYCURSOR, argv)) { error_arg ("-t, --type-header"); return EXIT_FAIL; } goto opt_header; } + "t" { *argv = YYCURSOR; goto opt_header; } + "1" { goto opt_short; } // deprecated +*/ + +opt_long: +/*!re2c + * + { + error ("bad long option: %s", *argv); + return EXIT_FAIL; + } + "help" end { usage (); return EXIT_OK; } + "version" end { version (); return EXIT_OK; } + "vernum" end { vernum (); return EXIT_OK; } + "bit-vectors" end { opts.set_bFlag (true); goto opt; } + "start-conditions" end { opts.set_cFlag (true); goto opt; } + "debug-output" end { opts.set_dFlag (true); goto opt; } + "emit-dot" end { opts.set_target (opt_t::DOT); goto opt; } + "storable-state" end { opts.set_fFlag (true); goto opt; } + "flex-syntax" end { opts.set_FFlag (true); goto opt; } + "computed-gotos" end { opts.set_gFlag (true); goto opt; } + "no-debug-info" end { opts.set_iFlag (true); goto opt; } + "reusable" end { opts.set_rFlag (true); goto opt; } + "nested-ifs" end { opts.set_sFlag (true); goto opt; } + "no-generation-date" end { opts.set_bNoGenerationDate (true); goto opt; } + "no-version" end { opts.set_version (false); goto opt; } + "case-insensitive" end { opts.set_bCaseInsensitive (true); goto opt; } + "case-inverted" end { opts.set_bCaseInverted (true); goto opt; } + "skeleton" end { opts.set_target (opt_t::SKELETON); goto opt; } + "ecb" end { if (!opts.set_encoding (Enc::EBCDIC)) { error_encoding (); return EXIT_FAIL; } goto opt; } + "unicode" end { if (!opts.set_encoding (Enc::UTF32)) { error_encoding (); return EXIT_FAIL; } goto opt; } + "wide-chars" end { if (!opts.set_encoding (Enc::UCS2)) { error_encoding (); return EXIT_FAIL; } goto opt; } + "utf-16" end { if (!opts.set_encoding (Enc::UTF16)) { error_encoding (); return EXIT_FAIL; } goto opt; } + "utf-8" end { if (!opts.set_encoding (Enc::UTF8)) { error_encoding (); return EXIT_FAIL; } goto opt; } + "output" end { if (!next (YYCURSOR, argv)) { error_arg ("-o, --output"); return EXIT_FAIL; } goto opt_output; } + "type-header" end { if (!next (YYCURSOR, argv)) { error_arg ("-t, --type-header"); return EXIT_FAIL; } goto opt_header; } + "encoding-policy" end { goto opt_encoding_policy; } + "input" end { goto opt_input; } + "empty-class" end { goto opt_empty_class; } + "dfa-minimization" end { goto opt_dfa_minimization; } + "single-pass" end { goto opt; } // deprecated +*/ + +opt_output: +/*!re2c + * + { + error ("bad argument to option -o, --output: %s", *argv); + return EXIT_FAIL; + } + filename end { if (!opts.output (*argv)) return EXIT_FAIL; goto opt; } +*/ + +opt_header: +/*!re2c + * + { + error ("bad argument to option -t, --type-header: %s", *argv); + return EXIT_FAIL; + } + filename end { opts.set_header_file (*argv); goto opt; } +*/ + +opt_encoding_policy: + if (!next (YYCURSOR, argv)) + { + error_arg ("--encoding-policy"); + return EXIT_FAIL; + } +/*!re2c + * + { + error ("bad argument to option --encoding-policy (expected: ignore | substitute | fail): %s", *argv); + return EXIT_FAIL; + } + "ignore" end { opts.set_encoding_policy (Enc::POLICY_IGNORE); goto opt; } + "substitute" end { opts.set_encoding_policy (Enc::POLICY_SUBSTITUTE); goto opt; } + "fail" end { opts.set_encoding_policy (Enc::POLICY_FAIL); goto opt; } +*/ + +opt_input: + if (!next (YYCURSOR, argv)) + { + error_arg ("--input"); + return EXIT_FAIL; + } +/*!re2c + * + { + error ("bad argument to option --input (expected: default | custom): %s", *argv); + return EXIT_FAIL; + } + "default" end { opts.set_input_api (InputAPI::DEFAULT); goto opt; } + "custom" end { opts.set_input_api (InputAPI::CUSTOM); goto opt; } +*/ + +opt_empty_class: + if (!next (YYCURSOR, argv)) + { + error_arg ("--empty-class"); + return EXIT_FAIL; + } +/*!re2c + * + { + error ("bad argument to option --empty-class (expected: match-empty | match-none | error): %s", *argv); + return EXIT_FAIL; + } + "match-empty" end { opts.set_empty_class_policy (EMPTY_CLASS_MATCH_EMPTY); goto opt; } + "match-none" end { opts.set_empty_class_policy (EMPTY_CLASS_MATCH_NONE); goto opt; } + "error" end { opts.set_empty_class_policy (EMPTY_CLASS_ERROR); goto opt; } +*/ + +opt_dfa_minimization: + if (!next (YYCURSOR, argv)) + { + error_arg ("--minimization"); + return EXIT_FAIL; + } +/*!re2c + * + { + error ("bad argument to option --dfa-minimization (expected: table | moore): %s", *argv); + return EXIT_FAIL; + } + "table" end { opts.set_dfa_minimization (DFA_MINIMIZATION_TABLE); goto opt; } + "moore" end { opts.set_dfa_minimization (DFA_MINIMIZATION_MOORE); goto opt; } +*/ + +end: + if (!opts.source_file) + { + error ("no source file"); + return EXIT_FAIL; + } + + return OK; + +#undef YYCTYPE +} + +} // namespace re2c diff --git a/tools/re2c/src/conf/warn.cc b/tools/re2c/src/conf/warn.cc new file mode 100644 index 000000000..e309f26d6 --- /dev/null +++ b/tools/re2c/src/conf/warn.cc @@ -0,0 +1,200 @@ +#include +#include +#include +#include + +#include "src/conf/msg.h" +#include "src/conf/warn.h" + +namespace re2c { + +Warn warn; + +const uint32_t Warn::SILENT = 0; +const uint32_t Warn::WARNING = 1u << 0; +const uint32_t Warn::ERROR = 1u << 1; + +const char * Warn::names [TYPES] = +{ +#define W(x, y) y + RE2C_WARNING_TYPES +#undef W +}; + +Warn::Warn () + : mask () + , error_accuml (false) +{ + for (uint32_t i = 0; i < TYPES; ++i) + { + mask[i] = SILENT; + } +} + +bool Warn::error () const +{ + return error_accuml; +} + +void Warn::set (type_t t, option_t o) +{ + switch (o) + { + case W: + mask[t] |= WARNING; + break; + case WNO: + mask[t] &= ~WARNING; + break; + case WERROR: + // unlike -Werror, -Werror- implies -W + mask[t] |= (WARNING | ERROR); + break; + case WNOERROR: + mask[t] &= ~ERROR; + break; + } +} + +void Warn::set_all () +{ + for (uint32_t i = 0; i < TYPES; ++i) + { + mask[i] |= WARNING; + } +} + +// -Werror doesn't set any warnings: it only guarantees that if a warning +// has been set by now or will be set later then it will result into error. +void Warn::set_all_error () +{ + for (uint32_t i = 0; i < TYPES; ++i) + { + mask[i] |= ERROR; + } +} + +void Warn::fail (type_t t, uint32_t line, const char * s) +{ + if (mask[t] & WARNING) + { + // -Werror has no effect + warning (names[t], line, false, "%s", s); + } +} + +void Warn::condition_order (uint32_t line) +{ + if (mask[CONDITION_ORDER] & WARNING) + { + const bool e = mask[CONDITION_ORDER] & ERROR; + error_accuml |= e; + warning (names[CONDITION_ORDER], line, e, + "looks like you use hardcoded numbers instead of autogenerated condition names: " + "better add '/*!types:re2c*/' directive or '-t, --type-header' option " + "and don't rely on fixed condition order."); + } +} + +void Warn::empty_class (uint32_t line) +{ + if (mask[EMPTY_CHARACTER_CLASS] & WARNING) + { + const bool e = mask[EMPTY_CHARACTER_CLASS] & ERROR; + error_accuml |= e; + warning (names[EMPTY_CHARACTER_CLASS], line, e, "empty character class"); + } +} + +void Warn::match_empty_string (uint32_t line) +{ + if (mask[MATCH_EMPTY_STRING] & WARNING) + { + const bool e = mask[MATCH_EMPTY_STRING] & ERROR; + error_accuml |= e; + warning (names[MATCH_EMPTY_STRING], line, e, "rule matches empty string"); + } +} + +void Warn::swapped_range (uint32_t line, uint32_t l, uint32_t u) +{ + if (mask[SWAPPED_RANGE] & WARNING) + { + const bool e = mask[SWAPPED_RANGE] & ERROR; + error_accuml |= e; + warning (names[SWAPPED_RANGE], line, e, "range lower bound (0x%X) is greater than upper bound (0x%X), swapping", l, u); + } +} + +void Warn::undefined_control_flow (uint32_t line, const std::string & cond, std::vector & ways, bool overflow) +{ + if (mask[UNDEFINED_CONTROL_FLOW] & WARNING) + { + const bool e = mask[UNDEFINED_CONTROL_FLOW] & ERROR; + error_accuml |= e; + + // report shorter patterns first + std::sort (ways.begin (), ways.end (), cmp_ways); + + warning_start (line, e); + fprintf (stderr, "control flow %sis undefined for strings that match ", incond (cond).c_str ()); + const size_t count = ways.size (); + if (count == 1) + { + fprint_way (stderr, ways[0]); + } + else + { + for (size_t i = 0; i < count; ++i) + { + fprintf (stderr, "\n\t"); + fprint_way (stderr, ways[i]); + } + fprintf (stderr, "\n"); + } + if (overflow) + { + fprintf (stderr, " ... and a few more"); + } + fprintf (stderr, ", use default rule '*'"); + warning_end (names[UNDEFINED_CONTROL_FLOW], e); + } +} + +void Warn::unreachable_rule (const std::string & cond, const rule_info_t & rule, const rules_t & rules) +{ + if (mask[UNREACHABLE_RULES] & WARNING) + { + const bool e = mask[UNREACHABLE_RULES] & ERROR; + error_accuml |= e; + warning_start (rule.line, e); + fprintf (stderr, "unreachable rule %s", incond (cond).c_str ()); + const size_t shadows = rule.shadow.size (); + if (shadows > 0) + { + const char * pl = shadows > 1 + ? "s" + : ""; + std::set::const_iterator i = rule.shadow.begin (); + fprintf (stderr, "(shadowed by rule%s at line%s %u", pl, pl, rules.find (*i)->second.line); + for (++i; i != rule.shadow.end (); ++i) + { + fprintf (stderr, ", %u", rules.find (*i)->second.line); + } + fprintf (stderr, ")"); + } + warning_end (names[UNREACHABLE_RULES], e); + } +} + +void Warn::useless_escape (uint32_t line, uint32_t col, char c) +{ + if (mask[USELESS_ESCAPE] & WARNING) + { + const bool e = mask[USELESS_ESCAPE] & ERROR; + error_accuml |= e; + warning (names[USELESS_ESCAPE], line, e, "column %u: escape has no effect: '\\%c'", col, c); + } +} + +} // namespace re2c diff --git a/tools/re2c/src/conf/warn.h b/tools/re2c/src/conf/warn.h new file mode 100644 index 000000000..fb313b01d --- /dev/null +++ b/tools/re2c/src/conf/warn.h @@ -0,0 +1,67 @@ +#ifndef _RE2C_CONF_WARN_ +#define _RE2C_CONF_WARN_ + +#include "src/util/c99_stdint.h" +#include +#include + +#include "src/ir/skeleton/way.h" +#include "src/parse/rules.h" + +namespace re2c { + +#define RE2C_WARNING_TYPES \ + W (CONDITION_ORDER, "condition-order"), \ + W (EMPTY_CHARACTER_CLASS, "empty-character-class"), \ + W (MATCH_EMPTY_STRING, "match-empty-string"), \ + W (SWAPPED_RANGE, "swapped-range"), \ + W (UNDEFINED_CONTROL_FLOW, "undefined-control-flow"), \ + W (UNREACHABLE_RULES, "unreachable-rules"), \ + W (USELESS_ESCAPE, "useless-escape"), + +class Warn +{ +public: + enum type_t + { +#define W(x, y) x + RE2C_WARNING_TYPES +#undef W + TYPES // count + }; + enum option_t + { + W, + WNO, + WERROR, + WNOERROR + }; + +private: + static const uint32_t SILENT; + static const uint32_t WARNING; + static const uint32_t ERROR; + static const char * names [TYPES]; + uint32_t mask[TYPES]; + bool error_accuml; + +public: + Warn (); + bool error () const; + void set (type_t t, option_t o); + void set_all (); + void set_all_error (); + void fail (type_t t, uint32_t line, const char * s); + + void condition_order (uint32_t line); + void empty_class (uint32_t line); + void match_empty_string (uint32_t line); + void swapped_range (uint32_t line, uint32_t l, uint32_t u); + void undefined_control_flow (uint32_t line, const std::string & cond, std::vector & ways, bool overflow); + void unreachable_rule (const std::string & cond, const rule_info_t & rule, const rules_t & rules); + void useless_escape (uint32_t line, uint32_t col, char c); +}; + +} // namespace re2c + +#endif // _RE2C_CONF_WARN_ diff --git a/tools/re2c/src/globals.h b/tools/re2c/src/globals.h new file mode 100644 index 000000000..b2ea67ae0 --- /dev/null +++ b/tools/re2c/src/globals.h @@ -0,0 +1,24 @@ +#ifndef _RE2C_GLOBALS_ +#define _RE2C_GLOBALS_ + +#include + +#include "src/conf/opt.h" +#include "src/conf/warn.h" +#include "src/util/c99_stdint.h" + +namespace re2c +{ + +extern bool bUsedYYBitmap; +extern bool bWroteGetState; +extern bool bWroteCondCheck; +extern uint32_t last_fill_index; +extern std::string yySetupRule; + +extern Opt opts; +extern Warn warn; + +} // end namespace re2c + +#endif // _RE2C_GLOBALS_ diff --git a/tools/re2c/src/ir/adfa/action.h b/tools/re2c/src/ir/adfa/action.h new file mode 100644 index 000000000..3fe01ed78 --- /dev/null +++ b/tools/re2c/src/ir/adfa/action.h @@ -0,0 +1,109 @@ +#ifndef _RE2C_IR_ADFA_ACTION_ +#define _RE2C_IR_ADFA_ACTION_ + +#include + +#include "src/codegen/label.h" +#include "src/util/c99_stdint.h" +#include "src/util/uniq_vector.h" + +namespace re2c +{ + +struct OutputFile; +class RuleOp; +struct State; + +struct Initial +{ + label_t label; + bool setMarker; + + inline Initial (label_t l, bool b) + : label (l) + , setMarker (b) + {} +}; + +typedef uniq_vector_t accept_t; + +class Action +{ +public: + enum type_t + { + MATCH, + INITIAL, + SAVE, + MOVE, + ACCEPT, + RULE + } type; + union + { + Initial * initial; + uint32_t save; + const accept_t * accepts; + const RuleOp * rule; + } info; + +public: + inline Action () + : type (MATCH) + , info () + {} + ~Action () + { + clear (); + } + void set_initial (label_t label, bool used_marker) + { + clear (); + type = INITIAL; + info.initial = new Initial (label, used_marker); + } + void set_save (uint32_t save) + { + clear (); + type = SAVE; + info.save = save; + } + void set_move () + { + clear (); + type = MOVE; + } + void set_accept (const accept_t * accepts) + { + clear (); + type = ACCEPT; + info.accepts = accepts; + } + void set_rule (const RuleOp * const rule) + { + clear (); + type = RULE; + info.rule = rule; + } + +private: + void clear () + { + switch (type) + { + case INITIAL: + delete info.initial; + break; + case MATCH: + case SAVE: + case MOVE: + case ACCEPT: + case RULE: + break; + } + } +}; + +} // namespace re2c + +#endif // _RE2C_IR_ADFA_ACTION_ diff --git a/tools/re2c/src/ir/adfa/adfa.cc b/tools/re2c/src/ir/adfa/adfa.cc new file mode 100644 index 000000000..cb41d0351 --- /dev/null +++ b/tools/re2c/src/ir/adfa/adfa.cc @@ -0,0 +1,135 @@ +#include +#include +#include +#include +#include + +#include "src/codegen/go.h" +#include "src/ir/adfa/adfa.h" +#include "src/ir/dfa/dfa.h" +#include "src/ir/skeleton/skeleton.h" +#include "src/util/allocate.h" + +namespace re2c +{ + +DFA::DFA + ( const dfa_t &dfa + , const std::vector &fill + , Skeleton *skel + , const charset_t &charset + , const std::string &n + , const std::string &c + , uint32_t l + ) + : accepts () + , skeleton (skel) + , name (n) + , cond (c) + , line (l) + , lbChar(0) + , ubChar(charset.back()) + , nStates(0) + , head(NULL) + + // statistics + , max_fill (0) + , need_backup (false) + , need_backupctx (false) + , need_accept (false) +{ + const size_t nstates = dfa.states.size(); + const size_t nchars = dfa.nchars; + + State **i2s = new State*[nstates]; + for (size_t i = 0; i < nstates; ++i) + { + i2s[i] = new State; + } + + State **p = &head; + for (size_t i = 0; i < nstates; ++i) + { + dfa_state_t *t = dfa.states[i]; + State *s = i2s[i]; + + ++nStates; + *p = s; + p = &s->next; + + s->isPreCtxt = t->ctx; + s->rule = t->rule; + s->fill = fill[i]; + s->go.span = allocate(nchars); + uint32_t j = 0; + for (uint32_t c = 0; c < nchars; ++j) + { + const size_t to = t->arcs[c]; + for (;++c < nchars && t->arcs[c] == to;); + s->go.span[j].to = to == dfa_t::NIL ? NULL : i2s[to]; + s->go.span[j].ub = charset[c]; + } + s->go.nSpans = j; + } + *p = NULL; + + delete[] i2s; +} + +DFA::~DFA() +{ + State *s; + + while ((s = head)) + { + head = s->next; + delete s; + } + + delete skeleton; +} + +void DFA::reorder() +{ + std::vector ord; + ord.reserve(nStates); + + std::queue todo; + todo.push(head); + + std::set done; + done.insert(head); + + for(;!todo.empty();) + { + State *s = todo.front(); + todo.pop(); + ord.push_back(s); + for(uint32_t i = 0; i < s->go.nSpans; ++i) + { + State *q = s->go.span[i].to; + if(q && done.insert(q).second) + { + todo.push(q); + } + } + } + + assert(nStates == ord.size()); + + ord.push_back(NULL); + for(uint32_t i = 0; i < nStates; ++i) + { + ord[i]->next = ord[i + 1]; + } +} + +void DFA::addState(State *s, State *next) +{ + ++nStates; + s->next = next->next; + next->next = s; +} + +} // namespace re2c + diff --git a/tools/re2c/src/ir/adfa/adfa.h b/tools/re2c/src/ir/adfa/adfa.h new file mode 100644 index 000000000..95f512d63 --- /dev/null +++ b/tools/re2c/src/ir/adfa/adfa.h @@ -0,0 +1,101 @@ +#ifndef _RE2C_IR_ADFA_ADFA_ +#define _RE2C_IR_ADFA_ADFA_ + +#include +#include "src/util/c99_stdint.h" +#include +#include + +#include "src/codegen/go.h" +#include "src/codegen/label.h" +#include "src/ir/adfa/action.h" +#include "src/ir/regexp/regexp.h" +#include "src/util/forbid_copy.h" + +namespace re2c +{ + +struct Skeleton; +struct Output; +struct OutputFile; +struct dfa_t; + +struct State +{ + label_t label; + RuleOp * rule; + State * next; + size_t fill; + + bool isPreCtxt; + bool isBase; + Go go; + Action action; + + State () + : label (label_t::first ()) + , rule (NULL) + , next (0) + , fill (0) + , isPreCtxt (false) + , isBase (false) + , go () + , action () + {} + ~State () + { + operator delete (go.span); + } + + FORBID_COPY (State); +}; + +class DFA +{ + accept_t accepts; + Skeleton * skeleton; + +public: + const std::string name; + const std::string cond; + const uint32_t line; + + uint32_t lbChar; + uint32_t ubChar; + uint32_t nStates; + State * head; + + // statistics + size_t max_fill; + bool need_backup; + bool need_backupctx; + bool need_accept; + +public: + DFA ( const dfa_t &dfa + , const std::vector &fill + , Skeleton *skel + , const charset_t &charset + , const std::string &n + , const std::string &c + , uint32_t l + ); + ~DFA (); + void reorder(); + void prepare(); + void calc_stats(); + void emit (Output &, uint32_t &, bool, bool &); + +private: + void addState(State*, State *); + void split (State *); + void findBaseState (); + void count_used_labels (std::set & used, label_t prolog, label_t start, bool force_start) const; + void emit_body (OutputFile &, uint32_t &, const std::set & used_labels, label_t initial) const; + + FORBID_COPY (DFA); +}; + +} // namespace re2c + +#endif // _RE2C_IR_ADFA_ADFA_ diff --git a/tools/re2c/src/ir/adfa/prepare.cc b/tools/re2c/src/ir/adfa/prepare.cc new file mode 100644 index 000000000..39cf65c1b --- /dev/null +++ b/tools/re2c/src/ir/adfa/prepare.cc @@ -0,0 +1,268 @@ +#include "src/util/c99_stdint.h" +#include +#include + +#include "src/codegen/bitmap.h" +#include "src/codegen/go.h" +#include "src/globals.h" +#include "src/ir/adfa/action.h" +#include "src/ir/adfa/adfa.h" +#include "src/ir/regexp/regexp_rule.h" +#include "src/ir/rule_rank.h" +#include "src/util/allocate.h" + +namespace re2c { + +void DFA::split(State *s) +{ + State *move = new State; + addState(move, s); + move->action.set_move (); + move->rule = s->rule; + move->fill = s->fill; + move->go = s->go; + s->rule = NULL; + s->go.nSpans = 1; + s->go.span = allocate (1); + s->go.span[0].ub = ubChar; + s->go.span[0].to = move; +} + +static uint32_t merge(Span *x0, State *fg, State *bg) +{ + Span *x = x0, *f = fg->go.span, *b = bg->go.span; + uint32_t nf = fg->go.nSpans, nb = bg->go.nSpans; + State *prev = NULL, *to; + // NB: we assume both spans are for same range + + for (;;) + { + if (f->ub == b->ub) + { + to = f->to == b->to ? bg : f->to; + + if (to == prev) + { + --x; + } + else + { + x->to = prev = to; + } + + x->ub = f->ub; + ++x; + ++f; + --nf; + ++b; + --nb; + + if (nf == 0 && nb == 0) + { + return static_cast (x - x0); + } + } + + while (f->ub < b->ub) + { + to = f->to == b->to ? bg : f->to; + + if (to == prev) + { + --x; + } + else + { + x->to = prev = to; + } + + x->ub = f->ub; + ++x; + ++f; + --nf; + } + + while (b->ub < f->ub) + { + to = b->to == f->to ? bg : f->to; + + if (to == prev) + { + --x; + } + else + { + x->to = prev = to; + } + + x->ub = b->ub; + ++x; + ++b; + --nb; + } + } +} + +void DFA::findBaseState() +{ + Span *span = allocate (ubChar - lbChar); + + for (State *s = head; s; s = s->next) + { + if (s->fill == 0) + { + for (uint32_t i = 0; i < s->go.nSpans; ++i) + { + State *to = s->go.span[i].to; + + if (to->isBase) + { + to = to->go.span[0].to; + uint32_t nSpans = merge(span, s, to); + + if (nSpans < s->go.nSpans) + { + operator delete (s->go.span); + s->go.nSpans = nSpans; + s->go.span = allocate (nSpans); + memcpy(s->go.span, span, nSpans*sizeof(Span)); + } + + break; + } + } + } + } + + operator delete (span); +} + +void DFA::prepare () +{ + bUsedYYBitmap = false; + + // create rule states + std::map rules; + for (State * s = head; s; s = s->next) + { + if (s->rule) + { + if (rules.find (s->rule->rank) == rules.end ()) + { + State *n = new State; + n->action.set_rule (s->rule); + rules[s->rule->rank] = n; + addState(n, s); + } + for (uint32_t i = 0; i < s->go.nSpans; ++i) + { + if (!s->go.span[i].to) + { + s->go.span[i].to = rules[s->rule->rank]; + } + } + } + } + + // create default state (if needed) + State * default_state = NULL; + for (State * s = head; s; s = s->next) + { + for (uint32_t i = 0; i < s->go.nSpans; ++i) + { + if (!s->go.span[i].to) + { + if (!default_state) + { + default_state = new State; + addState(default_state, s); + } + s->go.span[i].to = default_state; + } + } + } + + // find backup states and create accept state (if needed) + if (default_state) + { + for (State * s = head; s; s = s->next) + { + if (s->rule) + { + for (uint32_t i = 0; i < s->go.nSpans; ++i) + { + if (!s->go.span[i].to->rule && s->go.span[i].to->action.type != Action::RULE) + { + const uint32_t accept = static_cast (accepts.find_or_add (rules[s->rule->rank])); + s->action.set_save (accept); + } + } + } + } + default_state->action.set_accept (&accepts); + } + + // split ``base'' states into two parts + for (State * s = head; s; s = s->next) + { + s->isBase = false; + + if (s->fill != 0) + { + for (uint32_t i = 0; i < s->go.nSpans; ++i) + { + if (s->go.span[i].to == s) + { + s->isBase = true; + split(s); + + if (opts->bFlag) + { + BitMap::find(&s->next->go, s); + } + + s = s->next; + break; + } + } + } + } + + // find ``base'' state, if possible + findBaseState(); + + for (State * s = head; s; s = s->next) + { + s->go.init (s); + } +} + +void DFA::calc_stats () +{ + // calculate 'YYMAXFILL' + max_fill = 0; + for (State * s = head; s; s = s->next) + { + if (max_fill < s->fill) + { + max_fill = s->fill; + } + } + + // determine if 'YYMARKER' or 'YYBACKUP'/'YYRESTORE' pair is used + need_backup = accepts.size () > 0; + + // determine if 'YYCTXMARKER' or 'YYBACKUPCTX'/'YYRESTORECTX' pair is used + for (State * s = head; s; s = s->next) + { + if (s->isPreCtxt) + { + need_backupctx = true; + } + } + + // determine if 'yyaccept' variable is used + need_accept = accepts.size () > 1; +} + +} // namespace re2c diff --git a/tools/re2c/src/ir/compile.cc b/tools/re2c/src/ir/compile.cc new file mode 100644 index 000000000..b38b398a1 --- /dev/null +++ b/tools/re2c/src/ir/compile.cc @@ -0,0 +1,104 @@ +#include +#include +#include + +#include "src/codegen/output.h" +#include "src/ir/compile.h" +#include "src/ir/adfa/adfa.h" +#include "src/ir/dfa/dfa.h" +#include "src/ir/nfa/nfa.h" +#include "src/ir/regexp/regexp.h" +#include "src/ir/skeleton/skeleton.h" +#include "src/parse/spec.h" + +namespace re2c { + +static std::string make_name(const std::string &cond, uint32_t line) +{ + std::ostringstream os; + os << "line" << line; + std::string name = os.str(); + if (!cond.empty ()) + { + name += "_"; + name += cond; + } + return name; +} + +smart_ptr compile (Spec & spec, Output & output, const std::string & cond, uint32_t cunits) +{ + const uint32_t line = output.source.get_block_line(); + const std::string name = make_name(cond, line); + + // The original set of code units (charset) might be very large. + // A common trick it is to split charset into disjoint character ranges + // and choose a representative of each range (we choose lower bound). + // The set of all representatives is the new (compacted) charset. + // Don't forget to include zero and upper bound, even if they + // do not explicitely apper in ranges. + std::set bounds; + spec.re->split(bounds); + bounds.insert(0); + bounds.insert(cunits); + charset_t cs; + for (std::set::const_iterator i = bounds.begin(); i != bounds.end(); ++i) + { + cs.push_back(*i); + } + + nfa_t nfa(spec.re); + + dfa_t dfa(nfa, cs, spec.rules); + + // skeleton must be constructed after DFA construction + // but prior to any other DFA transformations + Skeleton *skeleton = new Skeleton(dfa, cs, spec.rules, name, cond, line); + + minimization(dfa); + + // find YYFILL states and calculate argument to YYFILL + std::vector fill; + fillpoints(dfa, fill); + + // ADFA stands for 'DFA with actions' + DFA *adfa = new DFA(dfa, fill, skeleton, cs, name, cond, line); + + /* + * note [reordering DFA states] + * + * re2c-generated code depends on the order of states in DFA: simply + * flipping two states may change the output significantly. + * The order of states is affected by many factors, e.g.: + * - flipping left and right subtrees of alternative when constructing + * AST (also applies to iteration and counted repetition) + * - changing the order in which graph nodes are visited (applies to + * any intermediate representation: bytecode, NFA, DFA, etc.) + * + * To make the resulting code independent of such changes, we hereby + * reorder DFA states. The ordering scheme is very simple: + * + * Starting with DFA root, walk DFA nodes in breadth-first order. + * Child nodes are ordered accoding to the (alphabetically) first symbol + * leading to each node. Each node must be visited exactly once. + * Default state (NULL) is always the last state. + */ + adfa->reorder(); + + // skeleton is constructed, do further DFA transformations + adfa->prepare(); + + // finally gather overall DFA statistics + adfa->calc_stats(); + + // accumulate global statistics from this particular DFA + output.max_fill = std::max (output.max_fill, adfa->max_fill); + if (adfa->need_accept) + { + output.source.set_used_yyaccept (); + } + + return make_smart_ptr(adfa); +} + +} // namespace re2c diff --git a/tools/re2c/src/ir/compile.h b/tools/re2c/src/ir/compile.h new file mode 100644 index 000000000..6883c1c3f --- /dev/null +++ b/tools/re2c/src/ir/compile.h @@ -0,0 +1,20 @@ +#ifndef _RE2C_IR_COMPILE_ +#define _RE2C_IR_COMPILE_ + +#include "src/util/c99_stdint.h" +#include + +#include "src/util/smart_ptr.h" + +namespace re2c +{ + +class DFA; +struct Output; +struct Spec; + +smart_ptr compile (Spec & spec, Output & output, const std::string & cond, uint32_t cunits); + +} // namespace re2c + +#endif // _RE2C_IR_COMPILE_ diff --git a/tools/re2c/src/ir/dfa/determinization.cc b/tools/re2c/src/ir/dfa/determinization.cc new file mode 100644 index 000000000..01a04cc7e --- /dev/null +++ b/tools/re2c/src/ir/dfa/determinization.cc @@ -0,0 +1,197 @@ +#include +#include +#include +#include +#include + +#include "src/ir/dfa/dfa.h" +#include "src/ir/nfa/nfa.h" +#include "src/ir/regexp/regexp.h" +#include "src/ir/regexp/regexp_rule.h" +#include "src/ir/rule_rank.h" +#include "src/parse/rules.h" +#include "src/util/ord_hash_set.h" +#include "src/util/range.h" + +namespace re2c +{ + +const size_t dfa_t::NIL = std::numeric_limits::max(); + +/* + * note [marking DFA states] + * + * DFA state is a set of NFA states. + * However, DFA state includes not all NFA states that are in + * epsilon-closure (NFA states that have only epsilon-transitions + * and are not context of final states are omitted). + * The included states are called 'kernel' states. + * + * We mark visited NFA states during closure construction. + * These marks serve two purposes: + * - avoid loops in NFA + * - avoid duplication of NFA states in kernel + * + * Note that after closure construction: + * - all non-kernel states must be unmarked (these states are + * not stored in kernel and it is impossible to unmark them + * afterwards) + * - all kernel states must be marked (because we may later + * extend this kernel with epsilon-closure of another NFA + * state). Kernel states are unmarked later (before finding + * or adding DFA state). + */ +static nfa_state_t **closure(nfa_state_t **cP, nfa_state_t *n) +{ + if (!n->mark) + { + n->mark = true; + switch (n->type) + { + case nfa_state_t::ALT: + cP = closure(cP, n->value.alt.out2); + cP = closure(cP, n->value.alt.out1); + n->mark = false; + break; + case nfa_state_t::CTX: + *(cP++) = n; + cP = closure(cP, n->value.ctx.out); + break; + default: + *(cP++) = n; + break; + } + } + + return cP; +} + +static size_t find_state + ( nfa_state_t **kernel + , nfa_state_t **end + , ord_hash_set_t &kernels + ) +{ + // zero-sized kernel corresponds to default state + if (kernel == end) + { + return dfa_t::NIL; + } + + // see note [marking DFA states] + for (nfa_state_t **p = kernel; p != end; ++p) + { + (*p)->mark = false; + } + + // sort kernel states: we need this to get stable hash + // and to compare states with simple 'memcmp' + std::sort(kernel, end); + const size_t size = static_cast(end - kernel) * sizeof(nfa_state_t*); + return kernels.insert(kernel, size); +} + +dfa_t::dfa_t(const nfa_t &nfa, const charset_t &charset, rules_t &rules) + : states() + , nchars(charset.size() - 1) // (n + 1) bounds for n ranges +{ + std::map > s2rules; + ord_hash_set_t kernels; + nfa_state_t **const buffer = new nfa_state_t*[nfa.size]; + std::vector > arcs(nchars); + + find_state(buffer, closure(buffer, nfa.root), kernels); + for (size_t i = 0; i < kernels.size(); ++i) + { + dfa_state_t *s = new dfa_state_t; + states.push_back(s); + + nfa_state_t **kernel; + const size_t kernel_size = kernels.deref(i, kernel); + for (size_t j = 0; j < kernel_size; ++j) + { + nfa_state_t *n = kernel[j]; + switch (n->type) + { + case nfa_state_t::RAN: + { + nfa_state_t *m = n->value.ran.out; + size_t c = 0; + for (Range *r = n->value.ran.ran; r; r = r->next ()) + { + for (; charset[c] != r->lower(); ++c); + for (; charset[c] != r->upper(); ++c) + { + arcs[c].push_back(m); + } + } + break; + } + case nfa_state_t::CTX: + s->ctx = true; + break; + case nfa_state_t::FIN: + s2rules[i].insert(n->value.fin.rule); + break; + default: + break; + } + } + + s->arcs = new size_t[nchars]; + for(size_t c = 0; c < nchars; ++c) + { + nfa_state_t **end = buffer; + for (std::vector::const_iterator j = arcs[c].begin(); j != arcs[c].end(); ++j) + { + end = closure(end, *j); + } + s->arcs[c] = find_state(buffer, end, kernels); + } + + for(size_t c = 0; c < nchars; ++c) + { + arcs[c].clear(); + } + } + delete[] buffer; + + const size_t count = states.size(); + for (size_t i = 0; i < count; ++i) + { + dfa_state_t *s = states[i]; + std::set &rs = s2rules[i]; + // for each final state: choose the rule with the smallest rank + for (std::set::const_iterator j = rs.begin(); j != rs.end(); ++j) + { + RuleOp *rule = *j; + if (!s->rule || rule->rank < s->rule->rank) + { + s->rule = rule; + } + } + // other rules are shadowed by the chosen rule + for (std::set::const_iterator j = rs.begin(); j != rs.end(); ++j) + { + RuleOp *rule = *j; + if (s->rule != rule) + { + rules[rule->rank].shadow.insert(s->rule->rank); + } + } + } +} + +dfa_t::~dfa_t() +{ + std::vector::iterator + i = states.begin(), + e = states.end(); + for (; i != e; ++i) + { + delete *i; + } +} + +} // namespace re2c + diff --git a/tools/re2c/src/ir/dfa/dfa.h b/tools/re2c/src/ir/dfa/dfa.h new file mode 100644 index 000000000..459ed4ab3 --- /dev/null +++ b/tools/re2c/src/ir/dfa/dfa.h @@ -0,0 +1,58 @@ +#ifndef _RE2C_IR_DFA_DFA_ +#define _RE2C_IR_DFA_DFA_ + +#include "src/util/c99_stdint.h" +#include + +#include "src/ir/regexp/regexp.h" +#include "src/parse/rules.h" +#include "src/util/forbid_copy.h" + +namespace re2c +{ + +struct nfa_t; +class RuleOp; + +struct dfa_state_t +{ + size_t *arcs; + RuleOp *rule; + bool ctx; + + dfa_state_t() + : arcs(NULL) + , rule(NULL) + , ctx(false) + {} + ~dfa_state_t() + { + delete[] arcs; + } + + FORBID_COPY(dfa_state_t); +}; + +struct dfa_t +{ + static const size_t NIL; + + std::vector states; + const size_t nchars; + + dfa_t(const nfa_t &nfa, const charset_t &charset, rules_t &rules); + ~dfa_t(); +}; + +enum dfa_minimization_t +{ + DFA_MINIMIZATION_TABLE, + DFA_MINIMIZATION_MOORE +}; + +void minimization(dfa_t &dfa); +void fillpoints(const dfa_t &dfa, std::vector &fill); + +} // namespace re2c + +#endif // _RE2C_IR_DFA_DFA_ diff --git a/tools/re2c/src/ir/dfa/fillpoints.cc b/tools/re2c/src/ir/dfa/fillpoints.cc new file mode 100644 index 000000000..a27402d4d --- /dev/null +++ b/tools/re2c/src/ir/dfa/fillpoints.cc @@ -0,0 +1,154 @@ +#include +#include +#include + +#include "src/ir/dfa/dfa.h" + +namespace re2c +{ + +static const size_t INFINITI = std::numeric_limits::max(); +static const size_t UNDEFINED = INFINITI - 1; + +static bool loopback(size_t node, size_t narcs, const size_t *arcs) +{ + for (size_t i = 0; i < narcs; ++i) + { + if (arcs[i] == node) + { + return true; + } + } + return false; +} + +/* + * node [finding strongly connected components of DFA] + * + * A slight modification of Tarjan's algorithm. + * + * The algorithm walks graph in deep-first order. It maintains a stack + * of nodes that have already been visited but haven't been assigned to + * SCC yet. For each node the algorithm calculates 'lowlink': index of + * the highest ancestor node reachable in one step from a descendant of + * the node. Lowlink is used to determine when a set of nodes should be + * popped off the stack into a new SCC. + * + * We use lowlink to hold different kinds of information: + * - values in range [0 .. stack size] mean that this node is on stack + * (link to a node with the smallest index reachable from this one) + * - UNDEFINED means that this node has not been visited yet + * - INFINITI means that this node has already been popped off stack + * + * We use stack size (rather than topological sort index) as unique index + * of a node on stack. This is safe because indices of nodes on stack are + * still unique and less than indices of nodes that have been popped off + * stack (INFINITI). + * + */ +static void scc( + const dfa_t &dfa, + std::stack &stack, + std::vector &lowlink, + std::vector &trivial, + size_t i) +{ + const size_t link = stack.size(); + lowlink[i] = link; + stack.push(i); + + const size_t *arcs = dfa.states[i]->arcs; + for (size_t c = 0; c < dfa.nchars; ++c) + { + const size_t j = arcs[c]; + if (j != dfa_t::NIL) + { + if (lowlink[j] == UNDEFINED) + { + scc(dfa, stack, lowlink, trivial, j); + } + if (lowlink[j] < lowlink[i]) + { + lowlink[i] = lowlink[j]; + } + } + } + + if (lowlink[i] == link) + { + // SCC is non-trivial (has loops) iff it either: + // - consists of multiple nodes (they all must be interconnected) + // - consists of single node which loops back to itself + trivial[i] = i == stack.top() + && !loopback(i, dfa.nchars, arcs); + + size_t j; + do + { + j = stack.top(); + stack.pop(); + lowlink[j] = INFINITI; + } + while (j != i); + } +} + +static void calc_fill( + const dfa_t &dfa, + const std::vector &trivial, + std::vector &fill, + size_t i) +{ + if (fill[i] == UNDEFINED) + { + fill[i] = 0; + const size_t *arcs = dfa.states[i]->arcs; + for (size_t c = 0; c < dfa.nchars; ++c) + { + const size_t j = arcs[c]; + if (j != dfa_t::NIL) + { + calc_fill(dfa, trivial, fill, j); + size_t max = 1; + if (trivial[j]) + { + max += fill[j]; + } + if (max > fill[i]) + { + fill[i] = max; + } + } + } + } +} + +void fillpoints(const dfa_t &dfa, std::vector &fill) +{ + const size_t size = dfa.states.size(); + + // find DFA states that belong to non-trivial SCC + std::stack stack; + std::vector lowlink(size, UNDEFINED); + std::vector trivial(size, false); + scc(dfa, stack, lowlink, trivial, 0); + + // for each DFA state, calculate YYFILL argument: + // maximal path length to the next YYFILL state + fill.resize(size, UNDEFINED); + calc_fill(dfa, trivial, fill, 0); + + // The following states must trigger YYFILL: + // - inital state + // - all states in non-trivial SCCs + // for other states, reset YYFILL argument to zero + for (size_t i = 1; i < size; ++i) + { + if (trivial[i]) + { + fill[i] = 0; + } + } +} + +} // namespace re2c diff --git a/tools/re2c/src/ir/dfa/minimization.cc b/tools/re2c/src/ir/dfa/minimization.cc new file mode 100644 index 000000000..79c93e490 --- /dev/null +++ b/tools/re2c/src/ir/dfa/minimization.cc @@ -0,0 +1,252 @@ +#include +#include +#include + +#include "src/conf/opt.h" +#include "src/ir/dfa/dfa.h" +#include "src/globals.h" + +namespace re2c +{ + +class RuleOp; + +/* + * note [DFA minimization: table filling algorithm] + * + * This algorithm is simple and slow; it's a reference implementation. + * + * The algorithm constructs (strictly lower triangular) boolean matrix + * indexed by DFA states. Each matrix cell (S1,S2) indicates if states + * S1 and S2 are distinguishable. Initialy states are distinguished + * according to their rule and context. One step of the algorithm + * updates the matrix as follows: each pair of states S1 and S2 is + * marked as distinguishable iff exist transitions from S1 and S2 on + * the same symbol that go to distinguishable states. The algorithm + * loops until the matrix stops changing. + */ +static void minimization_table( + size_t *part, + const std::vector &states, + size_t nchars) +{ + const size_t count = states.size(); + + bool **tbl = new bool*[count]; + tbl[0] = new bool[count * (count - 1) / 2]; + for (size_t i = 0; i < count - 1; ++i) + { + tbl[i + 1] = tbl[i] + i; + } + + for (size_t i = 0; i < count; ++i) + { + dfa_state_t *s1 = states[i]; + for (size_t j = 0; j < i; ++j) + { + dfa_state_t *s2 = states[j]; + tbl[i][j] = s1->ctx != s2->ctx + || s1->rule != s2->rule; + } + } + + for (bool loop = true; loop;) + { + loop = false; + for (size_t i = 0; i < count; ++i) + { + for (size_t j = 0; j < i; ++j) + { + if (!tbl[i][j]) + { + for (size_t k = 0; k < nchars; ++k) + { + size_t oi = states[i]->arcs[k]; + size_t oj = states[j]->arcs[k]; + if (oi < oj) + { + std::swap(oi, oj); + } + if (oi != oj && + (oi == dfa_t::NIL || + oj == dfa_t::NIL || + tbl[oi][oj])) + { + tbl[i][j] = true; + loop = true; + break; + } + } + } + } + } + } + + for (size_t i = 0; i < count; ++i) + { + part[i] = i; + for (size_t j = 0; j < i; ++j) + { + if (!tbl[i][j]) + { + part[i] = j; + break; + } + } + } + + delete[] tbl[0]; + delete[] tbl; +} + +/* + * note [DFA minimization: Moore algorithm] + * + * The algorithm maintains partition of DFA states. + * Initial partition is coarse: states are distinguished according + * to their rule and context. Partition is gradually refined: each + * set of states is split into minimal number of subsets such that + * for all states in a subset transitions on the same symbol go to + * the same set of states. + * The algorithm loops until partition stops changing. + */ +static void minimization_moore( + size_t *part, + const std::vector &states, + size_t nchars) +{ + const size_t count = states.size(); + + size_t *next = new size_t[count]; + + std::map, size_t> init; + for (size_t i = 0; i < count; ++i) + { + dfa_state_t *s = states[i]; + std::pair key(s->rule, s->ctx); + if (init.insert(std::make_pair(key, i)).second) + { + part[i] = i; + next[i] = dfa_t::NIL; + } + else + { + const size_t j = init[key]; + part[i] = j; + next[i] = next[j]; + next[j] = i; + } + } + + size_t *out = new size_t[nchars * count]; + size_t *diff = new size_t[count]; + for (bool loop = true; loop;) + { + loop = false; + for (size_t i = 0; i < count; ++i) + { + if (i != part[i] || next[i] == dfa_t::NIL) + { + continue; + } + + for (size_t j = i; j != dfa_t::NIL; j = next[j]) + { + size_t *o = &out[j * nchars]; + size_t *a = states[j]->arcs; + for (size_t c = 0; c < nchars; ++c) + { + o[c] = a[c] == dfa_t::NIL + ? dfa_t::NIL + : part[a[c]]; + } + } + + size_t diff_count = 0; + for (size_t j = i; j != dfa_t::NIL;) + { + const size_t j_next = next[j]; + size_t n = 0; + for (; n < diff_count; ++n) + { + size_t k = diff[n]; + if (memcmp(&out[j * nchars], + &out[k * nchars], + nchars * sizeof(size_t)) == 0) + { + part[j] = k; + next[j] = next[k]; + next[k] = j; + break; + } + } + if (n == diff_count) + { + diff[diff_count++] = j; + part[j] = j; + next[j] = dfa_t::NIL; + } + j = j_next; + } + loop |= diff_count > 1; + } + } + delete[] out; + delete[] diff; + delete[] next; +} + +void minimization(dfa_t &dfa) +{ + const size_t count = dfa.states.size(); + + size_t *part = new size_t[count]; + + switch (opts->dfa_minimization) + { + case DFA_MINIMIZATION_TABLE: + minimization_table(part, dfa.states, dfa.nchars); + break; + case DFA_MINIMIZATION_MOORE: + minimization_moore(part, dfa.states, dfa.nchars); + break; + } + + size_t *compact = new size_t[count]; + for (size_t i = 0, j = 0; i < count; ++i) + { + if (i == part[i]) + { + compact[i] = j++; + } + } + + size_t new_count = 0; + for (size_t i = 0; i < count; ++i) + { + dfa_state_t *s = dfa.states[i]; + if (i == part[i]) + { + size_t *arcs = s->arcs; + for (size_t c = 0; c < dfa.nchars; ++c) + { + if (arcs[c] != dfa_t::NIL) + { + arcs[c] = compact[part[arcs[c]]]; + } + } + dfa.states[new_count++] = s; + } + else + { + delete s; + } + } + dfa.states.resize(new_count); + + delete[] compact; + delete[] part; +} + +} // namespace re2c + diff --git a/tools/re2c/src/ir/nfa/calc_size.cc b/tools/re2c/src/ir/nfa/calc_size.cc new file mode 100644 index 000000000..39f0b4e1b --- /dev/null +++ b/tools/re2c/src/ir/nfa/calc_size.cc @@ -0,0 +1,50 @@ +#include "src/util/c99_stdint.h" + +#include "src/ir/regexp/regexp.h" +#include "src/ir/regexp/regexp_alt.h" +#include "src/ir/regexp/regexp_cat.h" +#include "src/ir/regexp/regexp_close.h" +#include "src/ir/regexp/regexp_match.h" +#include "src/ir/regexp/regexp_null.h" +#include "src/ir/regexp/regexp_rule.h" + +namespace re2c +{ + +uint32_t AltOp::calc_size() const +{ + return exp1->calc_size() + + exp2->calc_size() + + 1; +} + +uint32_t CatOp::calc_size() const +{ + return exp1->calc_size() + + exp2->calc_size(); +} + +uint32_t CloseOp::calc_size() const +{ + return exp->calc_size() + 1; +} + +uint32_t MatchOp::calc_size() const +{ + return 1; +} + +uint32_t NullOp::calc_size() const +{ + return 0; +} + +uint32_t RuleOp::calc_size() const +{ + const uint32_t n = ctx->calc_size(); + return exp->calc_size() + + (n > 0 ? n + 1 : 0) + + 1; +} + +} // end namespace re2c diff --git a/tools/re2c/src/ir/nfa/nfa.cc b/tools/re2c/src/ir/nfa/nfa.cc new file mode 100644 index 000000000..64f4641cf --- /dev/null +++ b/tools/re2c/src/ir/nfa/nfa.cc @@ -0,0 +1,72 @@ +#include "src/ir/nfa/nfa.h" +#include "src/ir/regexp/regexp.h" +#include "src/ir/regexp/regexp_alt.h" +#include "src/ir/regexp/regexp_cat.h" +#include "src/ir/regexp/regexp_close.h" +#include "src/ir/regexp/regexp_match.h" +#include "src/ir/regexp/regexp_null.h" +#include "src/ir/regexp/regexp_rule.h" + +namespace re2c { + +nfa_t::nfa_t(RegExp *re) + : max_size(re->calc_size()) + , size(0) + , states(new nfa_state_t[max_size]) + , root(re->compile(*this, NULL)) +{} + +nfa_t::~nfa_t() +{ + delete[] states; +} + +nfa_state_t *AltOp::compile(nfa_t &nfa, nfa_state_t *t) +{ + nfa_state_t *s = &nfa.states[nfa.size++]; + s->alt(exp1->compile(nfa, t), + exp2->compile(nfa, t)); + return s; +} + +nfa_state_t *CatOp::compile(nfa_t &nfa, nfa_state_t *t) +{ + nfa_state_t *s2 = exp2->compile(nfa, t); + nfa_state_t *s1 = exp1->compile(nfa, s2); + return s1; +} + +nfa_state_t *CloseOp::compile(nfa_t &nfa, nfa_state_t *t) +{ + nfa_state_t *s = &nfa.states[nfa.size++]; + s->alt(t, exp->compile(nfa, s)); + return s; +} + +nfa_state_t *MatchOp::compile(nfa_t &nfa, nfa_state_t *t) +{ + nfa_state_t *s = &nfa.states[nfa.size++]; + s->ran(t, match); + return s; +} + +nfa_state_t *NullOp::compile(nfa_t &, nfa_state_t *t) +{ + return t; +} + +nfa_state_t *RuleOp::compile(nfa_t &nfa, nfa_state_t *) +{ + nfa_state_t *s3 = &nfa.states[nfa.size++]; + s3->fin(this); + if (ctx->calc_size() > 0) + { + nfa_state_t *s2 = &nfa.states[nfa.size++]; + s2->ctx(ctx->compile(nfa, s3)); + s3 = s2; + } + nfa_state_t *s1 = exp->compile(nfa, s3); + return s1; +} + +} // namespace re2c diff --git a/tools/re2c/src/ir/nfa/nfa.h b/tools/re2c/src/ir/nfa/nfa.h new file mode 100644 index 000000000..535348bab --- /dev/null +++ b/tools/re2c/src/ir/nfa/nfa.h @@ -0,0 +1,90 @@ +#ifndef _RE2C_IR_NFA_NFA_ +#define _RE2C_IR_NFA_NFA_ + +#include "src/util/c99_stdint.h" + +#include "src/util/forbid_copy.h" + +namespace re2c +{ + +class Range; +class RegExp; +class RuleOp; + +struct nfa_state_t +{ + enum type_t + { + ALT, + RAN, + CTX, + FIN + } type; + union + { + struct + { + nfa_state_t *out1; + nfa_state_t *out2; + } alt; + struct + { + nfa_state_t *out; + Range *ran; + } ran; + struct + { + nfa_state_t *out; + } ctx; + struct + { + RuleOp *rule; + } fin; + } value; + bool mark; + + void alt(nfa_state_t *s1, nfa_state_t *s2) + { + type = ALT; + value.alt.out1 = s1; + value.alt.out2 = s2; + mark = false; + } + void ran(nfa_state_t *s, Range *r) + { + type = RAN; + value.ran.out = s; + value.ran.ran = r; + mark = false; + } + void ctx(nfa_state_t *s) + { + type = CTX; + value.ctx.out = s; + mark = false; + } + void fin(RuleOp *r) + { + type = FIN; + value.fin.rule = r; + mark = false; + } +}; + +struct nfa_t +{ + const uint32_t max_size; + uint32_t size; + nfa_state_t *states; + nfa_state_t *root; + + nfa_t(RegExp *re); + ~nfa_t(); + + FORBID_COPY(nfa_t); +}; + +} // namespace re2c + +#endif // _RE2C_IR_NFA_NFA_ diff --git a/tools/re2c/src/ir/nfa/split.cc b/tools/re2c/src/ir/nfa/split.cc new file mode 100644 index 000000000..73e63040b --- /dev/null +++ b/tools/re2c/src/ir/nfa/split.cc @@ -0,0 +1,49 @@ +#include "src/util/c99_stdint.h" +#include + +#include "src/ir/regexp/regexp.h" +#include "src/ir/regexp/regexp_alt.h" +#include "src/ir/regexp/regexp_cat.h" +#include "src/ir/regexp/regexp_close.h" +#include "src/ir/regexp/regexp_match.h" +#include "src/ir/regexp/regexp_null.h" +#include "src/ir/regexp/regexp_rule.h" +#include "src/util/range.h" + +namespace re2c { + +void AltOp::split (std::set & cs) +{ + exp1->split (cs); + exp2->split (cs); +} + +void CatOp::split (std::set & cs) +{ + exp1->split (cs); + exp2->split (cs); +} + +void CloseOp::split (std::set & cs) +{ + exp->split (cs); +} + +void MatchOp::split (std::set & cs) +{ + for (Range *r = match; r; r = r->next ()) + { + cs.insert (r->lower ()); + cs.insert (r->upper ()); + } +} + +void NullOp::split (std::set &) {} + +void RuleOp::split (std::set & cs) +{ + exp->split (cs); + ctx->split (cs); +} + +} // namespace re2c diff --git a/tools/re2c/src/ir/regexp/display.cc b/tools/re2c/src/ir/regexp/display.cc new file mode 100644 index 000000000..d139dc53a --- /dev/null +++ b/tools/re2c/src/ir/regexp/display.cc @@ -0,0 +1,51 @@ +#include + +#include "src/ir/regexp/regexp.h" +#include "src/ir/regexp/regexp_alt.h" +#include "src/ir/regexp/regexp_cat.h" +#include "src/ir/regexp/regexp_close.h" +#include "src/ir/regexp/regexp_match.h" +#include "src/ir/regexp/regexp_null.h" +#include "src/ir/regexp/regexp_rule.h" + +namespace re2c +{ + +std::ostream & operator << (std::ostream & o, const RegExp & re) +{ + re.display (o); + return o; +} + +void AltOp::display (std::ostream & o) const +{ + o << exp1 << "|" << exp2; +} + +void CatOp::display (std::ostream & o) const +{ + o << exp1 << exp2; +} + +void CloseOp::display (std::ostream & o) const +{ + o << exp << "+"; +} + +void MatchOp::display (std::ostream & o) const +{ + o << match; +} + +void NullOp::display (std::ostream & o) const +{ + o << "_"; +} + +void RuleOp::display (std::ostream & o) const +{ + o << exp << "/" << ctx << ";"; +} + +} // end namespace re2c + diff --git a/tools/re2c/src/ir/regexp/empty_class_policy.h b/tools/re2c/src/ir/regexp/empty_class_policy.h new file mode 100644 index 000000000..bb062de03 --- /dev/null +++ b/tools/re2c/src/ir/regexp/empty_class_policy.h @@ -0,0 +1,15 @@ +#ifndef _RE2C_IR_REGEXP_EMPTY_CLASS_POLICY_ +#define _RE2C_IR_REGEXP_EMPTY_CLASS_POLICY_ + +namespace re2c { + +enum empty_class_policy_t +{ + EMPTY_CLASS_MATCH_EMPTY, // match on empty input + EMPTY_CLASS_MATCH_NONE, // fail to match on any input + EMPTY_CLASS_ERROR // compilation error +}; + +} // namespace re2c + +#endif // _RE2C_IR_REGEXP_EMPTY_CLASS_POLICY_ diff --git a/tools/re2c/src/ir/regexp/encoding/case.h b/tools/re2c/src/ir/regexp/encoding/case.h new file mode 100644 index 000000000..38efa0e19 --- /dev/null +++ b/tools/re2c/src/ir/regexp/encoding/case.h @@ -0,0 +1,31 @@ +#ifndef _RE2C_IR_REGEXP_ENCODING_CASE_ +#define _RE2C_IR_REGEXP_ENCODING_CASE_ + +#include "src/util/c99_stdint.h" + +namespace re2c { + +// TODO: support non-ASCII encodings +bool is_alpha (uint32_t c); +uint32_t to_lower_unsafe (uint32_t c); +uint32_t to_upper_unsafe (uint32_t c); + +inline bool is_alpha (uint32_t c) +{ + return (c >= 'a' && c <= 'z') + || (c >= 'A' && c <= 'Z'); +} + +inline uint32_t to_lower_unsafe (uint32_t c) +{ + return c | 0x20u; +} + +inline uint32_t to_upper_unsafe (uint32_t c) +{ + return c & ~0x20u; +} + +} + +#endif // _RE2C_IR_REGEXP_ENCODING_CASE_ diff --git a/tools/re2c/src/ir/regexp/encoding/enc.cc b/tools/re2c/src/ir/regexp/encoding/enc.cc new file mode 100644 index 000000000..d8c5e9836 --- /dev/null +++ b/tools/re2c/src/ir/regexp/encoding/enc.cc @@ -0,0 +1,206 @@ +#include "src/ir/regexp/encoding/enc.h" +#include "src/util/range.h" + +namespace re2c { + +const uint32_t Enc::SURR_MIN = 0xD800; +const uint32_t Enc::SURR_MAX = 0xDFFF; +const uint32_t Enc::UNICODE_ERROR = 0xFFFD; + +const uint32_t Enc::asc2ebc[256] = + { /* Based on ISO 8859/1 and Code Page 37 */ + 0x00, 0x01, 0x02, 0x03, 0x37, 0x2d, 0x2e, 0x2f, 0x16, 0x05, 0x25, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, + 0x10, 0x11, 0x12, 0x13, 0x3c, 0x3d, 0x32, 0x26, 0x18, 0x19, 0x3f, 0x27, 0x1c, 0x1d, 0x1e, 0x1f, + 0x40, 0x5a, 0x7f, 0x7b, 0x5b, 0x6c, 0x50, 0x7d, 0x4d, 0x5d, 0x5c, 0x4e, 0x6b, 0x60, 0x4b, 0x61, + 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0x7a, 0x5e, 0x4c, 0x7e, 0x6e, 0x6f, + 0x7c, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7, 0xc8, 0xc9, 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, + 0xd7, 0xd8, 0xd9, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, 0xe8, 0xe9, 0xba, 0xe0, 0xbb, 0xb0, 0x6d, + 0x79, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, + 0x97, 0x98, 0x99, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xc0, 0x4f, 0xd0, 0xa1, 0x07, + 0x20, 0x21, 0x22, 0x23, 0x24, 0x15, 0x06, 0x17, 0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x09, 0x0a, 0x1b, + 0x30, 0x31, 0x1a, 0x33, 0x34, 0x35, 0x36, 0x08, 0x38, 0x39, 0x3a, 0x3b, 0x04, 0x14, 0x3e, 0xff, + 0x41, 0xaa, 0x4a, 0xb1, 0x9f, 0xb2, 0x6a, 0xb5, 0xbd, 0xb4, 0x9a, 0x8a, 0x5f, 0xca, 0xaf, 0xbc, + 0x90, 0x8f, 0xea, 0xfa, 0xbe, 0xa0, 0xb6, 0xb3, 0x9d, 0xda, 0x9b, 0x8b, 0xb7, 0xb8, 0xb9, 0xab, + 0x64, 0x65, 0x62, 0x66, 0x63, 0x67, 0x9e, 0x68, 0x74, 0x71, 0x72, 0x73, 0x78, 0x75, 0x76, 0x77, + 0xac, 0x69, 0xed, 0xee, 0xeb, 0xef, 0xec, 0xbf, 0x80, 0xfd, 0xfe, 0xfb, 0xfc, 0xad, 0x8e, 0x59, + 0x44, 0x45, 0x42, 0x46, 0x43, 0x47, 0x9c, 0x48, 0x54, 0x51, 0x52, 0x53, 0x58, 0x55, 0x56, 0x57, + 0x8c, 0x49, 0xcd, 0xce, 0xcb, 0xcf, 0xcc, 0xe1, 0x70, 0xdd, 0xde, 0xdb, 0xdc, 0x8d, 0xae, 0xdf + }; + +const uint32_t Enc::ebc2asc[256] = + { /* Based on ISO 8859/1 and Code Page 37 */ + 0x00, 0x01, 0x02, 0x03, 0x9c, 0x09, 0x86, 0x7f, 0x97, 0x8d, 0x8e, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, + 0x10, 0x11, 0x12, 0x13, 0x9d, 0x85, 0x08, 0x87, 0x18, 0x19, 0x92, 0x8f, 0x1c, 0x1d, 0x1e, 0x1f, + 0x80, 0x81, 0x82, 0x83, 0x84, 0x0a, 0x17, 0x1b, 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x05, 0x06, 0x07, + 0x90, 0x91, 0x16, 0x93, 0x94, 0x95, 0x96, 0x04, 0x98, 0x99, 0x9a, 0x9b, 0x14, 0x15, 0x9e, 0x1a, + 0x20, 0xa0, 0xe2, 0xe4, 0xe0, 0xe1, 0xe3, 0xe5, 0xe7, 0xf1, 0xa2, 0x2e, 0x3c, 0x28, 0x2b, 0x7c, + 0x26, 0xe9, 0xea, 0xeb, 0xe8, 0xed, 0xee, 0xef, 0xec, 0xdf, 0x21, 0x24, 0x2a, 0x29, 0x3b, 0xac, + 0x2d, 0x2f, 0xc2, 0xc4, 0xc0, 0xc1, 0xc3, 0xc5, 0xc7, 0xd1, 0xa6, 0x2c, 0x25, 0x5f, 0x3e, 0x3f, + 0xf8, 0xc9, 0xca, 0xcb, 0xc8, 0xcd, 0xce, 0xcf, 0xcc, 0x60, 0x3a, 0x23, 0x40, 0x27, 0x3d, 0x22, + 0xd8, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0xab, 0xbb, 0xf0, 0xfd, 0xde, 0xb1, + 0xb0, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, 0x70, 0x71, 0x72, 0xaa, 0xba, 0xe6, 0xb8, 0xc6, 0xa4, + 0xb5, 0x7e, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7a, 0xa1, 0xbf, 0xd0, 0xdd, 0xfe, 0xae, + 0x5e, 0xa3, 0xa5, 0xb7, 0xa9, 0xa7, 0xb6, 0xbc, 0xbd, 0xbe, 0x5b, 0x5d, 0xaf, 0xa8, 0xb4, 0xd7, + 0x7b, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0xad, 0xf4, 0xf6, 0xf2, 0xf3, 0xf5, + 0x7d, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f, 0x50, 0x51, 0x52, 0xb9, 0xfb, 0xfc, 0xf9, 0xfa, 0xff, + 0x5c, 0xf7, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59, 0x5a, 0xb2, 0xd4, 0xd6, 0xd2, 0xd3, 0xd5, + 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0xb3, 0xdb, 0xdc, 0xd9, 0xda, 0x9f + }; + +/* + * Returns code point representation for current + * encoding with regard to current policy. + * + * Since code point is exacly specified by user, + * it is assumed that user considers it to be valid. + * We must check it. + * + * Returns false if this code point exceeds maximum + * or is forbidden by current policy, otherwise + * returns true. Overwrites code point. + */ +bool Enc::encode(uint32_t & c) const +{ + if (c >= nCodePoints ()) + { + return false; + } + + switch (type_) + { + case ASCII: + return true; + case EBCDIC: + c = asc2ebc[c]; + return true; + case UCS2: + case UTF16: + case UTF32: + case UTF8: + if (c < SURR_MIN || c > SURR_MAX) + return true; + else + { + switch (policy_) + { + case POLICY_FAIL: + return false; + case POLICY_SUBSTITUTE: + c = UNICODE_ERROR; + return true; + case POLICY_IGNORE: + return true; + } + } + } + return false; // to silence gcc warning +} + +/* + * Returns original representation of code point. + * Assumes code point is valid (hence 'unsafe'). + */ +uint32_t Enc::decodeUnsafe(uint32_t c) const +{ + switch (type_) + { + case EBCDIC: + c = ebc2asc[c & 0xFF]; + break; + case ASCII: + case UCS2: + case UTF16: + case UTF32: + case UTF8: + break; + } + return c; +} + +/* + * Returns [l - h] range representation for current + * encoding with regard to current policy. + * + * Since range borders are exacly specified by user, + * it is assumed that user considers that all code + * points from this range are valid. re2c must check it. + * + * Returns NULL if range contains code points that + * exceed maximum or are forbidden by current policy, + * otherwise returns pointer to newly constructed range. + */ +Range * Enc::encodeRange(uint32_t l, uint32_t h) const +{ + if (l >= nCodePoints () || h >= nCodePoints ()) + { + return NULL; + } + + Range * r = NULL; + switch (type_) + { + case ASCII: + r = Range::ran (l, h + 1); + break; + case EBCDIC: + { + const uint32_t el = asc2ebc[l]; + r = Range::sym (el); + for (uint32_t c = l + 1; c <= h; ++c) + { + const uint32_t ec = asc2ebc[c]; + r = Range::add (r, Range::sym (ec)); + } + break; + } + case UCS2: + case UTF16: + case UTF32: + case UTF8: + r = Range::ran (l, h + 1); + if (l <= SURR_MAX && h >= SURR_MIN) + { + switch (policy_) + { + case POLICY_FAIL: + r = NULL; + break; + case POLICY_SUBSTITUTE: + { + Range * surrs = Range::ran (SURR_MIN, SURR_MAX + 1); + Range * error = Range::sym (UNICODE_ERROR); + r = Range::sub (r, surrs); + r = Range::add (r, error); + break; + } + case POLICY_IGNORE: + break; + } + } + break; + } + return r; +} + +/* + * Returns full range representation for current encoding + * with regard to current policy. + * + * Since range is defined declaratively, re2c does + * all the necessary corrections 'for free'. + * + * Always succeeds, returns pointer to newly constructed + * range. + */ +Range * Enc::fullRange() const +{ + Range * r = Range::ran (0, nCodePoints()); + if (policy_ != POLICY_IGNORE) + { + Range * surrs = Range::ran (SURR_MIN, SURR_MAX + 1); + r = Range::sub (r, surrs); + } + return r; +} + +} // namespace re2c diff --git a/tools/re2c/src/ir/regexp/encoding/enc.h b/tools/re2c/src/ir/regexp/encoding/enc.h new file mode 100644 index 000000000..b85ae0bec --- /dev/null +++ b/tools/re2c/src/ir/regexp/encoding/enc.h @@ -0,0 +1,197 @@ +#ifndef _RE2C_IR_REGEXP_ENCODING_ENC_ +#define _RE2C_IR_REGEXP_ENCODING_ENC_ + +#include "src/util/c99_stdint.h" + +namespace re2c { + +class Range; + +/* + * note [encodings] + * + * Each encoding defines two concepts: + * + * 1) Code point -- abstract number, which represents single encoding symbol. + * E.g., Unicode defines code points in the range [0 - 0x10FFFF] , so each + * Unicode encoding must be capable of representing 0x110000 code points. + * + * 2) Code unit -- the smallest unit of memory, which is used in the encoded + * text. One or more code units can be needed to represent a single code + * point, depending on the encoding. For each encoding, all code points + * either are represented with equal number of code units (fixed-length + * encodings), or with variable number of code units (variable-length + * encodings). + * + * +----------+------------------+-----------------------+-----------------+----------------+ + * | encoding | code point range | code point size | code unit range | code unit size | + * +----------+------------------+-----------------------+-----------------+----------------+ + * | ASCII | 0 - 0xFF | fixed, 1 byte | 0 - 0xFF | 1 byte | + * | EBCDIC | 0 - 0xFF | fixed, 1 byte | 0 - 0xFF | 1 byte | + * | UCS2 | 0 - 0xFFFF | fixed, 2 bytes | 0 - 0xFFFF | 2 bytes | + * | UTF16 | 0 - 0x10FFFF | variable, 2 - 4 bytes | 0 - 0xFFFF | 2 bytes | + * | UTF32 | 0 - 0x10FFFF | fixed, 4 bytes | 0 - 0x10FFFF | 4 bytes | + * | UTF8 | 0 - 0x10FFFF | variable, 1 - 4 bytes | 0 - 0xFF | 1 byte | + * +----------+------------------+-----------------------+-----------------+----------------+ + */ + +class Enc +{ +public: + // Supported encodings. + enum type_t + { ASCII + , EBCDIC + , UCS2 + , UTF16 + , UTF32 + , UTF8 + }; + + // What to do with invalid code points + enum policy_t + { POLICY_FAIL + , POLICY_SUBSTITUTE + , POLICY_IGNORE + }; + +private: + static const uint32_t asc2ebc[256]; + static const uint32_t ebc2asc[256]; + static const uint32_t SURR_MIN; + static const uint32_t SURR_MAX; + static const uint32_t UNICODE_ERROR; + + type_t type_; + policy_t policy_; + +public: + Enc() + : type_ (ASCII) + , policy_ (POLICY_IGNORE) + { } + + static const char * name (type_t t); + + bool operator != (const Enc & e) const { return type_ != e.type_; } + + inline uint32_t nCodePoints() const; + inline uint32_t nCodeUnits() const; + inline uint32_t szCodePoint() const; + inline uint32_t szCodeUnit() const; + + inline bool set(type_t t); + inline void unset(type_t); + inline type_t type () const; + + inline void setPolicy(policy_t t); + + bool encode(uint32_t & c) const; + uint32_t decodeUnsafe(uint32_t c) const; + Range * encodeRange(uint32_t l, uint32_t h) const; + Range * fullRange() const; +}; + +inline const char * Enc::name (type_t t) +{ + switch (t) + { + case ASCII: return "ASCII"; + case EBCDIC: return "EBCDIC"; + case UTF8: return "UTF8"; + case UCS2: return "USC2"; + case UTF16: return "UTF16"; + case UTF32: return "UTF32"; + default: return ""; + } +} + +inline uint32_t Enc::nCodePoints() const +{ + switch (type_) + { + case ASCII: + case EBCDIC: return 0x100; + case UCS2: return 0x10000; + case UTF16: + case UTF32: + case UTF8: + default: return 0x110000; + } +} + +inline uint32_t Enc::nCodeUnits() const +{ + switch (type_) + { + case ASCII: + case EBCDIC: + case UTF8: return 0x100; + case UCS2: + case UTF16: return 0x10000; + case UTF32: + default: return 0x110000; + } +} + +// returns *maximal* code point size for encoding +inline uint32_t Enc::szCodePoint() const +{ + switch (type_) + { + case ASCII: + case EBCDIC: return 1; + case UCS2: return 2; + case UTF16: + case UTF32: + case UTF8: + default: return 4; + } +} + +inline uint32_t Enc::szCodeUnit() const +{ + switch (type_) + { + case ASCII: + case EBCDIC: + case UTF8: return 1; + case UCS2: + case UTF16: return 2; + case UTF32: + default: return 4; + } +} + +inline bool Enc::set(type_t t) +{ + if (type_ == t) + return true; + else if (type_ != ASCII) + return false; + else + { + type_ = t; + return true; + } +} + +inline void Enc::unset(type_t t) +{ + if (type_ == t) + type_ = ASCII; +} + +inline Enc::type_t Enc::type () const +{ + return type_; +} + +inline void Enc::setPolicy(policy_t t) +{ + policy_ = t; +} + +} // namespace re2c + +#endif // _RE2C_IR_REGEXP_ENCODING_ENC_ diff --git a/tools/re2c/src/ir/regexp/encoding/range_suffix.cc b/tools/re2c/src/ir/regexp/encoding/range_suffix.cc new file mode 100644 index 000000000..486bd558f --- /dev/null +++ b/tools/re2c/src/ir/regexp/encoding/range_suffix.cc @@ -0,0 +1,38 @@ +#include "src/ir/regexp/encoding/range_suffix.h" +#include "src/ir/regexp/regexp.h" +#include "src/ir/regexp/regexp_match.h" +#include "src/util/range.h" + +namespace re2c { + +static RegExp * emit (RangeSuffix * p, RegExp * re); + +free_list RangeSuffix::freeList; + +RegExp * to_regexp (RangeSuffix * p) +{ + return p + ? emit (p, NULL) + : new MatchOp (NULL); +} + +/* + * Build regexp from suffix tree. + */ +RegExp * emit(RangeSuffix * p, RegExp * re) +{ + if (p == NULL) + return re; + else + { + RegExp * regexp = NULL; + for (; p != NULL; p = p->next) + { + RegExp * re1 = doCat(new MatchOp(Range::ran (p->l, p->h + 1)), re); + regexp = doAlt(regexp, emit(p->child, re1)); + } + return regexp; + } +} + +} // namespace re2c diff --git a/tools/re2c/src/ir/regexp/encoding/range_suffix.h b/tools/re2c/src/ir/regexp/encoding/range_suffix.h new file mode 100644 index 000000000..ea46bdd86 --- /dev/null +++ b/tools/re2c/src/ir/regexp/encoding/range_suffix.h @@ -0,0 +1,39 @@ +#ifndef _RE2C_IR_REGEXP_ENCODING_RANGE_SUFFIX_ +#define _RE2C_IR_REGEXP_ENCODING_RANGE_SUFFIX_ + +#include "src/util/c99_stdint.h" +#include // NULL + +#include "src/util/forbid_copy.h" +#include "src/util/free_list.h" + +namespace re2c { + +class RegExp; + +struct RangeSuffix +{ + static free_list freeList; + + uint32_t l; + uint32_t h; + RangeSuffix * next; + RangeSuffix * child; + + RangeSuffix (uint32_t lo, uint32_t hi) + : l (lo) + , h (hi) + , next (NULL) + , child (NULL) + { + freeList.insert(this); + } + + FORBID_COPY (RangeSuffix); +}; + +RegExp * to_regexp (RangeSuffix * p); + +} // namespace re2c + +#endif // _RE2C_IR_REGEXP_ENCODING_RANGE_SUFFIX_ diff --git a/tools/re2c/src/ir/regexp/encoding/utf16/utf16.cc b/tools/re2c/src/ir/regexp/encoding/utf16/utf16.cc new file mode 100644 index 000000000..4b0a13bbb --- /dev/null +++ b/tools/re2c/src/ir/regexp/encoding/utf16/utf16.cc @@ -0,0 +1,10 @@ +#include "src/ir/regexp/encoding/utf16/utf16.h" + +namespace re2c { + +const uint32_t utf16::MAX_1WORD_RUNE = 0xFFFFu; +const uint32_t utf16::MIN_LEAD_SURR = 0xD800u; +const uint32_t utf16::MIN_TRAIL_SURR = 0xDC00u; +const uint32_t utf16::MAX_TRAIL_SURR = 0xDFFFu; + +} // namespace re2c diff --git a/tools/re2c/src/ir/regexp/encoding/utf16/utf16.h b/tools/re2c/src/ir/regexp/encoding/utf16/utf16.h new file mode 100644 index 000000000..89cdbdbdd --- /dev/null +++ b/tools/re2c/src/ir/regexp/encoding/utf16/utf16.h @@ -0,0 +1,37 @@ +#ifndef _RE2C_IR_REGEXP_ENCODING_UTF16_UTF16_ +#define _RE2C_IR_REGEXP_ENCODING_UTF16_UTF16_ + +#include "src/util/c99_stdint.h" + +namespace re2c { + +class utf16 +{ +public: + typedef uint32_t rune; + + static const uint32_t MAX_1WORD_RUNE; + static const uint32_t MIN_LEAD_SURR; + static const uint32_t MIN_TRAIL_SURR; + static const uint32_t MAX_TRAIL_SURR; + + /* leading surrogate of UTF-16 symbol */ + static inline uint32_t lead_surr(rune r); + + /* trailing surrogate of UTF-16 symbol */ + static inline uint32_t trail_surr(rune r); +}; + +inline uint32_t utf16::lead_surr(rune r) +{ + return ((r - 0x10000u) / 0x400u) + MIN_LEAD_SURR; +} + +inline uint32_t utf16::trail_surr(rune r) +{ + return ((r - 0x10000u) % 0x400u) + MIN_TRAIL_SURR; +} + +} // namespace re2c + +#endif // _RE2C_IR_REGEXP_ENCODING_UTF16_UTF16_ diff --git a/tools/re2c/src/ir/regexp/encoding/utf16/utf16_range.cc b/tools/re2c/src/ir/regexp/encoding/utf16/utf16_range.cc new file mode 100644 index 000000000..51f966bac --- /dev/null +++ b/tools/re2c/src/ir/regexp/encoding/utf16/utf16_range.cc @@ -0,0 +1,146 @@ +#include "src/ir/regexp/encoding/utf16/utf16_range.h" +#include "src/ir/regexp/encoding/range_suffix.h" + +namespace re2c { + +/* + * Add word range [w1-w2]. + */ +void UTF16addContinuous1(RangeSuffix * & root, uint32_t l, uint32_t h) +{ + RangeSuffix ** p = &root; + for (;;) + { + if (*p == NULL) + { + *p = new RangeSuffix(l, h); + break; + } + else if ((*p)->l == l && (*p)->h == h) + { + break; + } + else + p = &(*p)->next; + } +} + +/* + * Now that we have catenation of word ranges [l1-h1],[l2-h2], + * we want to add it to existing range, merging suffixes on the fly. + */ +void UTF16addContinuous2(RangeSuffix * & root, uint32_t l_ld, uint32_t h_ld, uint32_t l_tr, uint32_t h_tr) +{ + RangeSuffix ** p = &root; + for (;;) + { + if (*p == NULL) + { + *p = new RangeSuffix(l_tr, h_tr); + p = &(*p)->child; + break; + } + else if ((*p)->l == l_tr && (*p)->h == h_tr) + { + p = &(*p)->child; + break; + } + else + p = &(*p)->next; + } + for (;;) + { + if (*p == NULL) + { + *p = new RangeSuffix(l_ld, h_ld); + break; + } + else if ((*p)->l == l_ld && (*p)->h == h_ld) + { + break; + } + else + p = &(*p)->next; + } +} + +/* + * Split range into sub-ranges that agree on leading surrogates. + * + * We have two Unicode runes, L and H, both map to UTF-16 + * surrogate pairs 'L1 L2' and 'H1 H2'. + * We want to represent Unicode range [L - H] as a catenation + * of word ranges [L1 - H1],[L2 - H2]. + * + * This is only possible if the following condition holds: + * if L1 /= H1, then L2 == 0xdc00 and H2 == 0xdfff. + * This condition ensures that: + * 1) all possible UTF-16 sequences between L and H are allowed + * 2) no word ranges [w1 - w2] appear, such that w1 > w2 + * + * E.g.: + * [\U00010001-\U00010400] => [d800-d801],[dc01-dc00]. + * The last word range, [dc01-dc00], is incorrect: its lower bound + * is greater than its upper bound. To fix this, we must split + * the original range into two sub-ranges: + * [\U00010001-\U000103ff] => [d800-d800],[dc01-dfff] + * [\U00010400-\U00010400] => [d801-d801],[dc00-dc00] + * + * This function finds all such 'points of discontinuity' + * and represents original range as alternation of continuous + * sub-ranges. + */ +void UTF16splitByContinuity(RangeSuffix * & root, uint32_t l_ld, uint32_t h_ld, uint32_t l_tr, uint32_t h_tr) +{ + if (l_ld != h_ld) + { + if (l_tr > utf16::MIN_TRAIL_SURR) + { + UTF16splitByContinuity(root, l_ld, l_ld, l_tr, utf16::MAX_TRAIL_SURR); + UTF16splitByContinuity(root, l_ld + 1, h_ld, utf16::MIN_TRAIL_SURR, h_tr); + return; + } + if (h_tr < utf16::MAX_TRAIL_SURR) + { + UTF16splitByContinuity(root, l_ld, h_ld - 1, l_tr, utf16::MAX_TRAIL_SURR); + UTF16splitByContinuity(root, h_ld, h_ld, utf16::MIN_TRAIL_SURR, h_tr); + return; + } + } + UTF16addContinuous2(root, l_ld, h_ld, l_tr, h_tr); +} + +/* + * Split range into sub-ranges, so that all runes in the same + * sub-range have equal length of UTF-16 sequence. E.g., full + * Unicode range [0-0x10FFFF] gets split into sub-ranges: + * [0 - 0xFFFF] (2-byte UTF-16 sequences) + * [0x10000 - 0x10FFFF] (4-byte UTF-16 sequences) + */ +void UTF16splitByRuneLength(RangeSuffix * & root, utf16::rune l, utf16::rune h) +{ + if (l <= utf16::MAX_1WORD_RUNE) + { + if (h <= utf16::MAX_1WORD_RUNE) + { + UTF16addContinuous1(root, l, h); + } + else + { + UTF16addContinuous1(root, l, utf16::MAX_1WORD_RUNE); + const uint32_t h_ld = utf16::lead_surr(h); + const uint32_t h_tr = utf16::trail_surr(h); + UTF16splitByContinuity(root, utf16::MIN_LEAD_SURR, h_ld, utf16::MIN_TRAIL_SURR, h_tr); + } + } + else + { + const uint32_t l_ld = utf16::lead_surr(l); + const uint32_t l_tr = utf16::trail_surr(l); + const uint32_t h_ld = utf16::lead_surr(h); + const uint32_t h_tr = utf16::trail_surr(h); + UTF16splitByContinuity(root, l_ld, h_ld, l_tr, h_tr); + } +} + +} // namespace re2c diff --git a/tools/re2c/src/ir/regexp/encoding/utf16/utf16_range.h b/tools/re2c/src/ir/regexp/encoding/utf16/utf16_range.h new file mode 100644 index 000000000..8a74e8f34 --- /dev/null +++ b/tools/re2c/src/ir/regexp/encoding/utf16/utf16_range.h @@ -0,0 +1,19 @@ +#ifndef _RE2C_IR_REGEXP_ENCODING_UTF16_RANGE_ +#define _RE2C_IR_REGEXP_ENCODING_UTF16_RANGE_ + +#include "src/util/c99_stdint.h" + +#include "src/ir/regexp/encoding/utf16/utf16.h" + +namespace re2c { + +struct RangeSuffix; + +void UTF16addContinuous1(RangeSuffix * & root, uint32_t l, uint32_t h); +void UTF16addContinuous2(RangeSuffix * & root, uint32_t l_ld, uint32_t h_ld, uint32_t l_tr, uint32_t h_tr); +void UTF16splitByContinuity(RangeSuffix * & root, uint32_t l_ld, uint32_t h_ld, uint32_t l_tr, uint32_t h_tr); +void UTF16splitByRuneLength(RangeSuffix * & root, utf16::rune l, utf16::rune h); + +} // namespace re2c + +#endif // _RE2C_IR_REGEXP_ENCODING_UTF16_RANGE_ diff --git a/tools/re2c/src/ir/regexp/encoding/utf16/utf16_regexp.cc b/tools/re2c/src/ir/regexp/encoding/utf16/utf16_regexp.cc new file mode 100644 index 000000000..3b2442904 --- /dev/null +++ b/tools/re2c/src/ir/regexp/encoding/utf16/utf16_regexp.cc @@ -0,0 +1,38 @@ +#include "src/util/c99_stdint.h" + +#include "src/ir/regexp/encoding/utf16/utf16_regexp.h" +#include "src/ir/regexp/encoding/range_suffix.h" +#include "src/ir/regexp/encoding/utf16/utf16_range.h" +#include "src/ir/regexp/regexp_cat.h" +#include "src/ir/regexp/regexp_match.h" +#include "src/util/range.h" + +namespace re2c { + +RegExp * UTF16Symbol(utf16::rune r) +{ + if (r <= utf16::MAX_1WORD_RUNE) + return new MatchOp(Range::sym (r)); + else + { + const uint32_t ld = utf16::lead_surr(r); + const uint32_t tr = utf16::trail_surr(r); + return new CatOp(new MatchOp(Range::sym (ld)), new MatchOp(Range::sym (tr))); + } +} + +/* + * Split Unicode character class {[l1, h1), ..., [lN, hN)} into + * ranges [l1, h1-1], ..., [lN, hN-1] and return alternation of + * them. We store partially built range in suffix tree, which + * allows to eliminate common suffixes while building. + */ +RegExp * UTF16Range(const Range * r) +{ + RangeSuffix * root = NULL; + for (; r != NULL; r = r->next ()) + UTF16splitByRuneLength(root, r->lower (), r->upper () - 1); + return to_regexp (root); +} + +} // namespace re2c diff --git a/tools/re2c/src/ir/regexp/encoding/utf16/utf16_regexp.h b/tools/re2c/src/ir/regexp/encoding/utf16/utf16_regexp.h new file mode 100644 index 000000000..d381de94b --- /dev/null +++ b/tools/re2c/src/ir/regexp/encoding/utf16/utf16_regexp.h @@ -0,0 +1,16 @@ +#ifndef _RE2C_IR_REGEXP_ENCODING_UTF16_REGEXP_ +#define _RE2C_IR_REGEXP_ENCODING_UTF16_REGEXP_ + +#include "src/ir/regexp/encoding/utf16/utf16.h" + +namespace re2c { + +class Range; +class RegExp; + +RegExp * UTF16Symbol(utf16::rune r); +RegExp * UTF16Range(const Range * r); + +} // namespace re2c + +#endif // _RE2C_IR_REGEXP_ENCODING_UTF16_REGEXP_ diff --git a/tools/re2c/src/ir/regexp/encoding/utf8/utf8.cc b/tools/re2c/src/ir/regexp/encoding/utf8/utf8.cc new file mode 100644 index 000000000..dd4b59ef2 --- /dev/null +++ b/tools/re2c/src/ir/regexp/encoding/utf8/utf8.cc @@ -0,0 +1,84 @@ +#include "src/ir/regexp/encoding/utf8/utf8.h" + +namespace re2c { + +const uint32_t utf8::ERROR = 0xFFFDu; + +const utf8::rune utf8::MAX_1BYTE_RUNE = 0x7Fu; +const utf8::rune utf8::MAX_2BYTE_RUNE = 0x7FFu; +const utf8::rune utf8::MAX_3BYTE_RUNE = 0xFFFFu; +const utf8::rune utf8::MAX_4BYTE_RUNE = 0x10FFFFu; +const utf8::rune utf8::MAX_RUNE = utf8::MAX_4BYTE_RUNE; + +const uint32_t utf8::PREFIX_1BYTE = 0u; // 0000 0000 +const uint32_t utf8::INFIX = 0x80u; // 1000 0000 +const uint32_t utf8::PREFIX_2BYTE = 0xC0u; // 1100 0000 +const uint32_t utf8::PREFIX_3BYTE = 0xE0u; // 1110 0000 +const uint32_t utf8::PREFIX_4BYTE = 0xF0u; // 1111 0000 + +const uint32_t utf8::SHIFT = 6u; +const uint32_t utf8::MASK = 0x3Fu; // 0011 1111 + +uint32_t utf8::rune_to_bytes(uint32_t *str, rune c) +{ + // one byte sequence: 0-0x7F => 0xxxxxxx + if (c <= MAX_1BYTE_RUNE) + { + str[0] = PREFIX_1BYTE | c; + return 1; + } + + // two byte sequence: 0x80-0x7FF => 110xxxxx 10xxxxxx + if (c <= MAX_2BYTE_RUNE) + { + str[0] = PREFIX_2BYTE | (c >> 1*SHIFT); + str[1] = INFIX | (c & MASK); + return 2; + } + + // If the Rune is out of range, convert it to the error rune. + // Do this test here because the error rune encodes to three bytes. + // Doing it earlier would duplicate work, since an out of range + // Rune wouldn't have fit in one or two bytes. + if (c > MAX_RUNE) + c = ERROR; + + // three byte sequence: 0x800 - 0xFFFF => 1110xxxx 10xxxxxx 10xxxxxx + if (c <= MAX_3BYTE_RUNE) + { + str[0] = PREFIX_3BYTE | (c >> 2*SHIFT); + str[1] = INFIX | ((c >> 1*SHIFT) & MASK); + str[2] = INFIX | (c & MASK); + return 3; + } + + // four byte sequence (21-bit value): + // 0x10000 - 0x1FFFFF => 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx + str[0] = PREFIX_4BYTE | (c >> 3*SHIFT); + str[1] = INFIX | ((c >> 2*SHIFT) & MASK); + str[2] = INFIX | ((c >> 1*SHIFT) & MASK); + str[3] = INFIX | (c & MASK); + return 4; +} + +uint32_t utf8::rune_length(rune r) +{ + if (r <= MAX_2BYTE_RUNE) + return r <= MAX_1BYTE_RUNE ? 1 : 2; + else + return r <= MAX_3BYTE_RUNE ? 3 : 4; +} + +utf8::rune utf8::max_rune(uint32_t i) +{ + switch (i) + { + case 1: return MAX_1BYTE_RUNE; + case 2: return MAX_2BYTE_RUNE; + case 3: return MAX_3BYTE_RUNE; + case 4: return MAX_4BYTE_RUNE; + default: return ERROR; + } +} + +} // namespace re2c diff --git a/tools/re2c/src/ir/regexp/encoding/utf8/utf8.h b/tools/re2c/src/ir/regexp/encoding/utf8/utf8.h new file mode 100644 index 000000000..0ca314228 --- /dev/null +++ b/tools/re2c/src/ir/regexp/encoding/utf8/utf8.h @@ -0,0 +1,48 @@ +#ifndef _RE2C_IR_REGEXP_ENCODING_UTF8_UTF8_ +#define _RE2C_IR_REGEXP_ENCODING_UTF8_UTF8_ + +#include "src/util/c99_stdint.h" + +namespace re2c { + +class utf8 +{ +public: + typedef uint32_t rune; + + // maximum characters per rune + // enum instead of static const member because of [-Wvla] + enum { MAX_RUNE_LENGTH = 4u }; + + // decoding error + static const uint32_t ERROR; + + // maximal runes for each rune length + static const rune MAX_1BYTE_RUNE; + static const rune MAX_2BYTE_RUNE; + static const rune MAX_3BYTE_RUNE; + static const rune MAX_4BYTE_RUNE; + static const rune MAX_RUNE; + + static const uint32_t PREFIX_1BYTE; + static const uint32_t INFIX; + static const uint32_t PREFIX_2BYTE; + static const uint32_t PREFIX_3BYTE; + static const uint32_t PREFIX_4BYTE; + + static const uint32_t SHIFT; + static const uint32_t MASK; + + // UTF-8 bytestring for given Unicode rune + static uint32_t rune_to_bytes(uint32_t * s, rune r); + + // length of UTF-8 bytestring for given Unicode rune + static uint32_t rune_length(rune r); + + // maximal Unicode rune with given length of UTF-8 bytestring + static rune max_rune(uint32_t i); +}; + +} // namespace re2c + +#endif // _RE2C_IR_REGEXP_ENCODING_UTF8_UTF8_ diff --git a/tools/re2c/src/ir/regexp/encoding/utf8/utf8_range.cc b/tools/re2c/src/ir/regexp/encoding/utf8/utf8_range.cc new file mode 100644 index 000000000..d3d256cf8 --- /dev/null +++ b/tools/re2c/src/ir/regexp/encoding/utf8/utf8_range.cc @@ -0,0 +1,112 @@ +#include "src/ir/regexp/encoding/utf8/utf8_range.h" +#include "src/ir/regexp/encoding/range_suffix.h" + +namespace re2c { + +/* + * Now that we have catenation of byte ranges [l1-h1]...[lN-hN], + * we want to add it to existing range, merging suffixes on the fly. + */ +void UTF8addContinuous(RangeSuffix * & root, utf8::rune l, utf8::rune h, uint32_t n) +{ + uint32_t lcs[utf8::MAX_RUNE_LENGTH]; + uint32_t hcs[utf8::MAX_RUNE_LENGTH]; + utf8::rune_to_bytes(lcs, l); + utf8::rune_to_bytes(hcs, h); + + RangeSuffix ** p = &root; + for (uint32_t i = 1; i <= n; ++i) + { + const uint32_t lc = lcs[n - i]; + const uint32_t hc = hcs[n - i]; + for (;;) + { + if (*p == NULL) + { + *p = new RangeSuffix(lc, hc); + p = &(*p)->child; + break; + } + else if ((*p)->l == lc && (*p)->h == hc) + { + p = &(*p)->child; + break; + } + else + p = &(*p)->next; + } + } +} + +/* + * Split range into sub-ranges that agree on leading bytes. + * + * We have two Unicode runes of equal length, L and H, which + * map to UTF-8 sequences 'L_1 ... L_n' and 'H_1 ... H_n'. + * We want to represent Unicode range [L - H] as a catenation + * of byte ranges [L_1 - H_1], ..., [L_n - H_n]. + * + * This is only possible if for all i > 1: + * if L_i /= H_i, then L_(i+1) == 0x80 and H_(i+1) == 0xbf. + * This condition ensures that: + * 1) all possible UTF-8 sequences between L and H are allowed + * 2) no byte ranges [b1 - b2] appear, such that b1 > b2 + * + * E.g.: + * [\U000e0031-\U000e0043] => [f3-f3],[a0-a0],[80-81],[b1-83]. + * The last byte range, [b1-83], is incorrect: its lower bound + * is greater than its upper bound. To fix this, we must split + * the original range into two sub-ranges: + * [\U000e0031-\U000e003f] => [f3-f3],[a0-a0],[80-80],[b1-bf] + * [\U000e0040-\U000e0043] => [f3-f3],[a0-a0],[81-81],[80-83] + * + * This function finds all such 'points of discontinuity' + * and represents original range as alternation of continuous + * sub-ranges. + */ +void UTF8splitByContinuity(RangeSuffix * & root, utf8::rune l, utf8::rune h, uint32_t n) +{ + for (uint32_t i = 1; i < n; ++i) + { + uint32_t m = (1u << (6u * i)) - 1u; // last i bytes of a UTF-8 sequence + if ((l & ~m) != (h & ~m)) + { + if ((l & m) != 0) + { + UTF8splitByContinuity(root, l, l | m, n); + UTF8splitByContinuity(root, (l | m) + 1, h, n); + return; + } + if ((h & m) != m) + { + UTF8splitByContinuity(root, l, (h & ~m) - 1, n); + UTF8splitByContinuity(root, h & ~m, h, n); + return; + } + } + } + UTF8addContinuous(root, l, h, n); +} + +/* + * Split range into sub-ranges, so that all runes in the same + * sub-range have equal length of UTF-8 sequence. E.g., full + * Unicode range [0-0x10FFFF] gets split into sub-ranges: + * [0 - 0x7F] (1-byte UTF-8 sequences) + * [0x80 - 0x7FF] (2-byte UTF-8 sequences) + * [0x800 - 0xFFFF] (3-byte UTF-8 sequences) + * [0x10000 - 0x10FFFF] (4-byte UTF-8 sequences) + */ +void UTF8splitByRuneLength(RangeSuffix * & root, utf8::rune l, utf8::rune h) +{ + const uint32_t nh = utf8::rune_length(h); + for (uint32_t nl = utf8::rune_length(l); nl < nh; ++nl) + { + utf8::rune r = utf8::max_rune(nl); + UTF8splitByContinuity(root, l, r, nl); + l = r + 1; + } + UTF8splitByContinuity(root, l, h, nh); +} + +} // namespace re2c diff --git a/tools/re2c/src/ir/regexp/encoding/utf8/utf8_range.h b/tools/re2c/src/ir/regexp/encoding/utf8/utf8_range.h new file mode 100644 index 000000000..1ce46132f --- /dev/null +++ b/tools/re2c/src/ir/regexp/encoding/utf8/utf8_range.h @@ -0,0 +1,18 @@ +#ifndef _RE2C_IR_REGEXP_ENCODING_UTF8_RANGE_ +#define _RE2C_IR_REGEXP_ENCODING_UTF8_RANGE_ + +#include "src/util/c99_stdint.h" + +#include "src/ir/regexp/encoding/utf8/utf8.h" + +namespace re2c { + +struct RangeSuffix; + +void UTF8addContinuous(RangeSuffix * & p, utf8::rune l, utf8::rune h, uint32_t n); +void UTF8splitByContinuity(RangeSuffix * & p, utf8::rune l, utf8::rune h, uint32_t n); +void UTF8splitByRuneLength(RangeSuffix * & p, utf8::rune l, utf8::rune h); + +} // namespace re2c + +#endif // _RE2C_IR_REGEXP_ENCODING_UTF8_RANGE_ diff --git a/tools/re2c/src/ir/regexp/encoding/utf8/utf8_regexp.cc b/tools/re2c/src/ir/regexp/encoding/utf8/utf8_regexp.cc new file mode 100644 index 000000000..54ef6f0e0 --- /dev/null +++ b/tools/re2c/src/ir/regexp/encoding/utf8/utf8_regexp.cc @@ -0,0 +1,36 @@ +#include "src/util/c99_stdint.h" + +#include "src/ir/regexp/encoding/utf8/utf8_regexp.h" +#include "src/ir/regexp/encoding/range_suffix.h" +#include "src/ir/regexp/encoding/utf8/utf8_range.h" +#include "src/ir/regexp/regexp_cat.h" +#include "src/ir/regexp/regexp_match.h" +#include "src/util/range.h" + +namespace re2c { + +RegExp * UTF8Symbol(utf8::rune r) +{ + uint32_t chars[utf8::MAX_RUNE_LENGTH]; + const uint32_t chars_count = utf8::rune_to_bytes(chars, r); + RegExp * re = new MatchOp(Range::sym (chars[0])); + for (uint32_t i = 1; i < chars_count; ++i) + re = new CatOp(re, new MatchOp(Range::sym (chars[i]))); + return re; +} + +/* + * Split Unicode character class {[l1, h1), ..., [lN, hN)} into + * ranges [l1, h1-1], ..., [lN, hN-1] and return alternation of + * them. We store partially built range in suffix tree, which + * allows to eliminate common suffixes while building. + */ +RegExp * UTF8Range(const Range * r) +{ + RangeSuffix * root = NULL; + for (; r != NULL; r = r->next ()) + UTF8splitByRuneLength(root, r->lower (), r->upper () - 1); + return to_regexp (root); +} + +} // namespace re2c diff --git a/tools/re2c/src/ir/regexp/encoding/utf8/utf8_regexp.h b/tools/re2c/src/ir/regexp/encoding/utf8/utf8_regexp.h new file mode 100644 index 000000000..676759a37 --- /dev/null +++ b/tools/re2c/src/ir/regexp/encoding/utf8/utf8_regexp.h @@ -0,0 +1,16 @@ +#ifndef _RE2C_IR_REGEXP_ENCODING_UTF8_REGEXP_ +#define _RE2C_IR_REGEXP_ENCODING_UTF8_REGEXP_ + +#include "src/ir/regexp/encoding/utf8/utf8.h" + +namespace re2c { + +class Range; +class RegExp; + +RegExp * UTF8Symbol(utf8::rune r); +RegExp * UTF8Range(const Range * r); + +} // namespace re2c + +#endif // _RE2C_IR_REGEXP_ENCODING_UTF8_REGEXP_ diff --git a/tools/re2c/src/ir/regexp/fixed_length.cc b/tools/re2c/src/ir/regexp/fixed_length.cc new file mode 100644 index 000000000..e0fd7e00c --- /dev/null +++ b/tools/re2c/src/ir/regexp/fixed_length.cc @@ -0,0 +1,55 @@ +#include "src/util/c99_stdint.h" + +#include "src/ir/regexp/regexp.h" +#include "src/ir/regexp/regexp_alt.h" +#include "src/ir/regexp/regexp_cat.h" +#include "src/ir/regexp/regexp_match.h" +#include "src/ir/regexp/regexp_null.h" + +namespace re2c +{ + +uint32_t RegExp::fixedLength () +{ + return ~0u; +} + +uint32_t AltOp::fixedLength () +{ + uint32_t l1 = exp1->fixedLength (); + uint32_t l2 = exp1->fixedLength (); + + if (l1 != l2 || l1 == ~0u) + { + return ~0u; + } + + return l1; +} + +uint32_t CatOp::fixedLength () +{ + const uint32_t l1 = exp1->fixedLength (); + if (l1 != ~0u) + { + const uint32_t l2 = exp2->fixedLength (); + if (l2 != ~0u) + { + return l1 + l2; + } + } + return ~0u; +} + +uint32_t MatchOp::fixedLength () +{ + return 1; +} + +uint32_t NullOp::fixedLength () +{ + return 0; +} + +} // end namespace re2c + diff --git a/tools/re2c/src/ir/regexp/regexp.cc b/tools/re2c/src/ir/regexp/regexp.cc new file mode 100644 index 000000000..e5a7d9bd5 --- /dev/null +++ b/tools/re2c/src/ir/regexp/regexp.cc @@ -0,0 +1,241 @@ +#include + +#include "src/conf/opt.h" +#include "src/conf/warn.h" +#include "src/globals.h" +#include "src/ir/regexp/empty_class_policy.h" +#include "src/ir/regexp/encoding/case.h" +#include "src/ir/regexp/encoding/enc.h" +#include "src/ir/regexp/encoding/utf16/utf16_regexp.h" +#include "src/ir/regexp/encoding/utf8/utf8_regexp.h" +#include "src/ir/regexp/regexp.h" +#include "src/ir/regexp/regexp_alt.h" +#include "src/ir/regexp/regexp_cat.h" +#include "src/ir/regexp/regexp_close.h" +#include "src/ir/regexp/regexp_match.h" +#include "src/ir/regexp/regexp_null.h" +#include "src/parse/scanner.h" +#include "src/util/range.h" + +namespace re2c +{ + +static MatchOp * merge (MatchOp * m1, MatchOp * m2); + +free_list RegExp::vFreeList; + +RegExp * doAlt (RegExp * e1, RegExp * e2) +{ + if (!e1) + { + return e2; + } + if (!e2) + { + return e1; + } + return new AltOp (e1, e2); +} + +RegExp * mkAlt (RegExp * e1, RegExp * e2) +{ + AltOp * a; + MatchOp * m1; + MatchOp * m2; + + a = dynamic_cast (e1); + if (a != NULL) + { + m1 = dynamic_cast (a->exp1); + if (m1 != NULL) + { + e1 = a->exp2; + } + } + else + { + m1 = dynamic_cast (e1); + if (m1 != NULL) + { + e1 = NULL; + } + } + a = dynamic_cast (e2); + if (a != NULL) + { + m2 = dynamic_cast (a->exp1); + if (m2 != NULL) + { + e2 = a->exp2; + } + } + else + { + m2 = dynamic_cast (e2); + if (m2 != NULL) + { + e2 = NULL; + } + } + + return doAlt (merge (m1, m2), doAlt (e1, e2)); +} + +MatchOp * merge (MatchOp * m1, MatchOp * m2) +{ + if (!m1) + { + return m2; + } + if (!m2) + { + return m1; + } + MatchOp * m = new MatchOp (Range::add (m1->match, m2->match)); + return m; +} + +RegExp * doCat (RegExp * e1, RegExp * e2) +{ + if (!e1) + { + return e2; + } + if (!e2) + { + return e1; + } + return new CatOp (e1, e2); +} + +RegExp *Scanner::schr(uint32_t c) const +{ + if (!opts->encoding.encode(c)) { + fatalf("Bad code point: '0x%X'", c); + } + switch (opts->encoding.type ()) { + case Enc::UTF16: return UTF16Symbol(c); + case Enc::UTF8: return UTF8Symbol(c); + default: return new MatchOp(Range::sym(c)); + } +} + +RegExp *Scanner::ichr(uint32_t c) const +{ + if (is_alpha(c)) { + RegExp *l = schr(to_lower_unsafe(c)); + RegExp *u = schr(to_upper_unsafe(c)); + return mkAlt(l, u); + } else { + return schr(c); + } +} + +RegExp *Scanner::cls(Range *r) const +{ + if (!r) + { + switch (opts->empty_class_policy) + { + case EMPTY_CLASS_MATCH_EMPTY: + warn.empty_class (get_line ()); + return new NullOp; + case EMPTY_CLASS_MATCH_NONE: + warn.empty_class (get_line ()); + break; + case EMPTY_CLASS_ERROR: + fatal ("empty character class"); + break; + } + } + + switch (opts->encoding.type ()) + { + case Enc::UTF16: return UTF16Range(r); + case Enc::UTF8: return UTF8Range(r); + default: return new MatchOp(r); + } +} + +RegExp * Scanner::mkDiff (RegExp * e1, RegExp * e2) const +{ + MatchOp * m1 = dynamic_cast (e1); + MatchOp * m2 = dynamic_cast (e2); + if (m1 == NULL || m2 == NULL) + { + fatal("can only difference char sets"); + } + Range * r = Range::sub (m1->match, m2->match); + + return cls(r); +} + +RegExp * Scanner::mkDot() const +{ + Range * full = opts->encoding.fullRange(); + uint32_t c = '\n'; + if (!opts->encoding.encode(c)) + fatalf("Bad code point: '0x%X'", c); + Range * ran = Range::sym (c); + Range * inv = Range::sub (full, ran); + + return cls(inv); +} + +/* + * Create a byte range that includes all possible input characters. + * This may include characters, which do not map to any valid symbol + * in current encoding. For encodings, which directly map symbols to + * input characters (ASCII, EBCDIC, UTF-32), it equals [^]. For other + * encodings (UTF-16, UTF-8), [^] and this range are different. + * + * Also note that default range doesn't respect encoding policy + * (the way invalid code points are treated). + */ +RegExp * Scanner::mkDefault() const +{ + Range * def = Range::ran (0, opts->encoding.nCodeUnits()); + return new MatchOp(def); +} + +/* + * note [counted repetition expansion] + * + * r{0} ;;= + * r{n} ::= r{n-1} r + * r{n,m} ::= r{n} (r{0} | ... | r{m-n}) + * r{n,} ::= r{n} r* + */ + +// see note [counted repetition expansion] +RegExp * repeat (RegExp * e, uint32_t n) +{ + RegExp * r = NULL; + for (uint32_t i = 0; i < n; ++i) + { + r = doCat (r, e); + } + return r; +} + +// see note [counted repetition expansion] +RegExp * repeat_from_to (RegExp * e, uint32_t n, uint32_t m) +{ + RegExp * r1 = repeat (e, n); + RegExp * r2 = NULL; + for (uint32_t i = n; i < m; ++i) + { + r2 = mkAlt (new NullOp, doCat (e, r2)); + } + return doCat (r1, r2); +} + +// see note [counted repetition expansion] +RegExp * repeat_from (RegExp * e, uint32_t n) +{ + RegExp * r1 = repeat (e, n); + RegExp * r2 = new CloseOp (e); + return doCat (r1, r2); +} + +} // namespace re2c diff --git a/tools/re2c/src/ir/regexp/regexp.h b/tools/re2c/src/ir/regexp/regexp.h new file mode 100644 index 000000000..5d344dd34 --- /dev/null +++ b/tools/re2c/src/ir/regexp/regexp.h @@ -0,0 +1,52 @@ +#ifndef _RE2C_IR_REGEXP_REGEXP_ +#define _RE2C_IR_REGEXP_REGEXP_ + +#include "src/util/c99_stdint.h" +#include +#include +#include + +#include "src/util/free_list.h" +#include "src/util/forbid_copy.h" + +namespace re2c +{ + +struct nfa_state_t; +struct nfa_t; + +typedef std::vector charset_t; + +class RegExp +{ +public: + static free_list vFreeList; + + inline RegExp () + { + vFreeList.insert (this); + } + inline virtual ~RegExp () + { + vFreeList.erase (this); + } + virtual void split (std::set &) = 0; + virtual uint32_t calc_size() const = 0; + virtual uint32_t fixedLength (); + virtual nfa_state_t *compile(nfa_t &nfa, nfa_state_t *n) = 0; + virtual void display (std::ostream &) const = 0; + friend std::ostream & operator << (std::ostream & o, const RegExp & re); + + FORBID_COPY (RegExp); +}; + +RegExp * doAlt (RegExp * e1, RegExp * e2); +RegExp * mkAlt (RegExp * e1, RegExp * e2); +RegExp * doCat (RegExp * e1, RegExp * e2); +RegExp * repeat (RegExp * e, uint32_t n); +RegExp * repeat_from_to (RegExp * e, uint32_t n, uint32_t m); +RegExp * repeat_from (RegExp * e, uint32_t n); + +} // end namespace re2c + +#endif // _RE2C_IR_REGEXP_REGEXP_ diff --git a/tools/re2c/src/ir/regexp/regexp_alt.h b/tools/re2c/src/ir/regexp/regexp_alt.h new file mode 100644 index 000000000..6f1c8ea48 --- /dev/null +++ b/tools/re2c/src/ir/regexp/regexp_alt.h @@ -0,0 +1,31 @@ +#ifndef _RE2C_IR_REGEXP_REGEXP_ALT_ +#define _RE2C_IR_REGEXP_REGEXP_ALT_ + +#include "src/ir/regexp/regexp.h" + +namespace re2c +{ + +class AltOp: public RegExp +{ + RegExp * exp1; + RegExp * exp2; + +public: + inline AltOp (RegExp * e1, RegExp * e2) + : exp1 (e1) + , exp2 (e2) + {} + void split (std::set &); + uint32_t calc_size() const; + uint32_t fixedLength (); + nfa_state_t *compile(nfa_t &nfa, nfa_state_t *n); + void display (std::ostream & o) const; + friend RegExp * mkAlt (RegExp *, RegExp *); + + FORBID_COPY (AltOp); +}; + +} // end namespace re2c + +#endif // _RE2C_IR_REGEXP_REGEXP_ALT_ diff --git a/tools/re2c/src/ir/regexp/regexp_cat.h b/tools/re2c/src/ir/regexp/regexp_cat.h new file mode 100644 index 000000000..d8176212e --- /dev/null +++ b/tools/re2c/src/ir/regexp/regexp_cat.h @@ -0,0 +1,30 @@ +#ifndef _RE2C_IR_REGEXP_REGEXP_CAT_ +#define _RE2C_IR_REGEXP_REGEXP_CAT_ + +#include "src/ir/regexp/regexp.h" + +namespace re2c +{ + +class CatOp: public RegExp +{ + RegExp * exp1; + RegExp * exp2; + +public: + inline CatOp (RegExp * e1, RegExp * e2) + : exp1 (e1) + , exp2 (e2) + {} + void split (std::set &); + uint32_t calc_size() const; + uint32_t fixedLength (); + nfa_state_t *compile(nfa_t &nfa, nfa_state_t *n); + void display (std::ostream & o) const; + + FORBID_COPY (CatOp); +}; + +} // end namespace re2c + +#endif // _RE2C_IR_REGEXP_REGEXP_CAT_ diff --git a/tools/re2c/src/ir/regexp/regexp_close.h b/tools/re2c/src/ir/regexp/regexp_close.h new file mode 100644 index 000000000..02bea20f3 --- /dev/null +++ b/tools/re2c/src/ir/regexp/regexp_close.h @@ -0,0 +1,27 @@ +#ifndef _RE2C_IR_REGEXP_REGEXP_CLOSE_ +#define _RE2C_IR_REGEXP_REGEXP_CLOSE_ + +#include "src/ir/regexp/regexp.h" + +namespace re2c +{ + +class CloseOp: public RegExp +{ + RegExp * exp; + +public: + inline CloseOp (RegExp * e) + : exp (e) + {} + void split (std::set &); + uint32_t calc_size() const; + nfa_state_t *compile(nfa_t &nfa, nfa_state_t *n); + void display (std::ostream & o) const; + + FORBID_COPY (CloseOp); +}; + +} // end namespace re2c + +#endif // _RE2C_IR_REGEXP_REGEXP_CLOSE_ diff --git a/tools/re2c/src/ir/regexp/regexp_match.h b/tools/re2c/src/ir/regexp/regexp_match.h new file mode 100644 index 000000000..903697b64 --- /dev/null +++ b/tools/re2c/src/ir/regexp/regexp_match.h @@ -0,0 +1,29 @@ +#ifndef _RE2C_IR_REGEXP_REGEXP_MATCH_ +#define _RE2C_IR_REGEXP_REGEXP_MATCH_ + +#include "src/ir/regexp/regexp.h" +#include "src/util/range.h" + +namespace re2c +{ + +class MatchOp: public RegExp +{ +public: + Range * match; + + inline MatchOp (Range * m) + : match (m) + {} + void split (std::set &); + uint32_t calc_size() const; + uint32_t fixedLength (); + nfa_state_t *compile(nfa_t &nfa, nfa_state_t *n); + void display (std::ostream & o) const; + + FORBID_COPY (MatchOp); +}; + +} // end namespace re2c + +#endif // _RE2C_IR_REGEXP_REGEXP_MATCH_ diff --git a/tools/re2c/src/ir/regexp/regexp_null.h b/tools/re2c/src/ir/regexp/regexp_null.h new file mode 100644 index 000000000..8168dbe55 --- /dev/null +++ b/tools/re2c/src/ir/regexp/regexp_null.h @@ -0,0 +1,21 @@ +#ifndef _RE2C_IR_REGEXP_REGEXP_NULL_ +#define _RE2C_IR_REGEXP_REGEXP_NULL_ + +#include "src/ir/regexp/regexp.h" + +namespace re2c +{ + +class NullOp: public RegExp +{ +public: + void split (std::set &); + uint32_t calc_size() const; + uint32_t fixedLength (); + nfa_state_t *compile(nfa_t &nfa, nfa_state_t *n); + void display (std::ostream & o) const; +}; + +} // end namespace re2c + +#endif // _RE2C_IR_REGEXP_REGEXP_NULL_ diff --git a/tools/re2c/src/ir/regexp/regexp_rule.h b/tools/re2c/src/ir/regexp/regexp_rule.h new file mode 100644 index 000000000..1519fa233 --- /dev/null +++ b/tools/re2c/src/ir/regexp/regexp_rule.h @@ -0,0 +1,52 @@ +#ifndef _RE2C_IR_REGEXP_REGEXP_RULE_ +#define _RE2C_IR_REGEXP_REGEXP_RULE_ + +#include + +#include "src/ir/regexp/regexp.h" +#include "src/ir/rule_rank.h" +#include "src/parse/code.h" + +namespace re2c +{ + +class RuleOp: public RegExp +{ +public: + const Loc loc; + +private: + RegExp * exp; + +public: + RegExp * ctx; + rule_rank_t rank; + const Code * code; + const std::string newcond; + + inline RuleOp + ( const Loc & l + , RegExp * r1 + , RegExp * r2 + , rule_rank_t r + , const Code * c + , const std::string * cond + ) + : loc (l) + , exp (r1) + , ctx (r2) + , rank (r) + , code (c) + , newcond (cond ? *cond : "") + {} + void display (std::ostream & o) const; + void split (std::set &); + uint32_t calc_size() const; + nfa_state_t *compile(nfa_t &nfa, nfa_state_t *n); + + FORBID_COPY (RuleOp); +}; + +} // end namespace re2c + +#endif // _RE2C_IR_REGEXP_REGEXP_RULE_ diff --git a/tools/re2c/src/ir/rule_rank.cc b/tools/re2c/src/ir/rule_rank.cc new file mode 100644 index 000000000..12d2de885 --- /dev/null +++ b/tools/re2c/src/ir/rule_rank.cc @@ -0,0 +1,68 @@ +#include +#include +#include + +#include "src/ir/rule_rank.h" + +namespace re2c +{ + +const uint32_t rule_rank_t::NONE = std::numeric_limits::max(); +const uint32_t rule_rank_t::DEF = rule_rank_t::NONE - 1; + +rule_rank_t::rule_rank_t () + : value (0) +{} + +void rule_rank_t::inc () +{ + assert (value < DEF - 1); + ++value; +} + +rule_rank_t rule_rank_t::none () +{ + rule_rank_t r; + r.value = NONE; + return r; +} + +rule_rank_t rule_rank_t::def () +{ + rule_rank_t r; + r.value = DEF; + return r; +} + +bool rule_rank_t::is_none () const +{ + return value == NONE; +} + +bool rule_rank_t::is_def () const +{ + return value == DEF; +} + +bool rule_rank_t::operator < (const rule_rank_t & r) const +{ + return value < r.value; +} + +bool rule_rank_t::operator == (const rule_rank_t & r) const +{ + return value == r.value; +} + +std::ostream & operator << (std::ostream & o, rule_rank_t r) +{ + o << r.value; + return o; +} + +uint32_t rule_rank_t::uint32 () const +{ + return value; +} + +} // namespace re2c diff --git a/tools/re2c/src/ir/rule_rank.h b/tools/re2c/src/ir/rule_rank.h new file mode 100644 index 000000000..ca19cb94b --- /dev/null +++ b/tools/re2c/src/ir/rule_rank.h @@ -0,0 +1,44 @@ +#ifndef _RE2C_IR_RULE_RANK_ +#define _RE2C_IR_RULE_RANK_ + +#include "src/util/c99_stdint.h" +#include + +namespace re2c +{ + +template class counter_t; + +// rule rank public API: +// - get rule rank corresponding to nonexistent/default rule +// - check if rank corresponds to nonexistent/default rule +// - compare ranks +// - output rank to std::ostream +// +// rule rank private API (for rule rank counter): +// - get first rank +// - get next rank +class rule_rank_t +{ + static const uint32_t NONE; + static const uint32_t DEF; + uint32_t value; + rule_rank_t (); + void inc (); + +public: + static rule_rank_t none (); + static rule_rank_t def (); + bool is_none () const; + bool is_def () const; + bool operator < (const rule_rank_t & r) const; + bool operator == (const rule_rank_t & r) const; + friend std::ostream & operator << (std::ostream & o, rule_rank_t r); + uint32_t uint32 () const; + + friend class counter_t; +}; + +} // namespace re2c + +#endif // _RE2C_IR_RULE_RANK_ diff --git a/tools/re2c/src/ir/skeleton/control_flow.cc b/tools/re2c/src/ir/skeleton/control_flow.cc new file mode 100644 index 000000000..74166865f --- /dev/null +++ b/tools/re2c/src/ir/skeleton/control_flow.cc @@ -0,0 +1,61 @@ +#include +#include +#include + +#include "src/conf/warn.h" +#include "src/globals.h" +#include "src/ir/rule_rank.h" +#include "src/ir/skeleton/path.h" +#include "src/ir/skeleton/skeleton.h" +#include "src/ir/skeleton/way.h" +#include "src/util/u32lim.h" + +namespace re2c +{ + +// We don't need all patterns that cause undefined behaviour. +// We only need some examples, the shorter the better. +// See also note [counting skeleton edges]. +void Node::naked_ways (way_t & prefix, std::vector & ways, nakeds_t &size) +{ + if (!rule.rank.is_none ()) + { + return; + } + else if (end ()) + { + ways.push_back (prefix); + size = size + nakeds_t::from64(prefix.size ()); + } + else if (loop < 2) + { + local_inc _ (loop); + for (arcsets_t::iterator i = arcsets.begin (); + i != arcsets.end () && !size.overflow (); ++i) + { + prefix.push_back (&i->second); + i->first->naked_ways (prefix, ways, size); + prefix.pop_back (); + } + } +} + +void Skeleton::warn_undefined_control_flow () +{ + way_t prefix; + std::vector ways; + Node::nakeds_t size = Node::nakeds_t::from32(0u); + + nodes->naked_ways (prefix, ways, size); + + if (!ways.empty ()) + { + warn.undefined_control_flow (line, cond, ways, size.overflow ()); + } + else if (size.overflow ()) + { + warn.fail (Warn::UNDEFINED_CONTROL_FLOW, line, "DFA is too large to check undefined control flow"); + } +} + +} // namespace re2c diff --git a/tools/re2c/src/ir/skeleton/generate_code.cc b/tools/re2c/src/ir/skeleton/generate_code.cc new file mode 100644 index 000000000..38940ae77 --- /dev/null +++ b/tools/re2c/src/ir/skeleton/generate_code.cc @@ -0,0 +1,323 @@ +#include "src/util/c99_stdint.h" +#include +#include +#include +#include + +#include "src/codegen/bitmap.h" +#include "src/codegen/output.h" +#include "src/conf/opt.h" +#include "src/globals.h" +#include "src/ir/regexp/encoding/enc.h" +#include "src/ir/rule_rank.h" +#include "src/ir/skeleton/skeleton.h" + +namespace re2c +{ + +static void exact_uint (OutputFile & o, size_t width) +{ + if (width == sizeof (char)) + { + o.ws("unsigned char"); + } + else if (width == sizeof (short)) + { + o.ws("unsigned short"); + } + else if (width == sizeof (int)) + { + o.ws("unsigned int"); + } + else if (width == sizeof (long)) + { + o.ws("unsigned long"); + } + else + { + o.ws("uint").wu64 (width * 8).ws("_t"); + } +} + +static void from_le(OutputFile &o, uint32_t ind, size_t size, const char *expr) +{ + o.ws("\n").wind(ind).ws("/* from little-endian to host-endian */"); + o.ws("\n").wind(ind).ws("unsigned char *p = (unsigned char*)&").ws(expr).ws(";"); + o.ws("\n").wind(ind).ws(expr).ws(" = p[0]"); + for (uint32_t i = 1; i < size; ++i) + { + o.ws(" + (p[").wu32(i).ws("] << ").wu32(i * 8).ws("u)"); + } + o.ws(";"); +} + +void Skeleton::emit_prolog (OutputFile & o) +{ + o.ws("\n#include "); + o.ws("\n#include /* malloc, free */"); + o.ws("\n"); + o.ws("\nstatic void *read_file"); + o.ws("\n").wind(1).ws("( const char *fname"); + o.ws("\n").wind(1).ws(", size_t unit"); + o.ws("\n").wind(1).ws(", size_t padding"); + o.ws("\n").wind(1).ws(", size_t *pfsize"); + o.ws("\n").wind(1).ws(")"); + o.ws("\n{"); + o.ws("\n").wind(1).ws("void *buffer = NULL;"); + o.ws("\n").wind(1).ws("size_t fsize = 0;"); + o.ws("\n"); + o.ws("\n").wind(1).ws("/* open file */"); + o.ws("\n").wind(1).ws("FILE *f = fopen(fname, \"rb\");"); + o.ws("\n").wind(1).ws("if(f == NULL) {"); + o.ws("\n").wind(2).ws("goto error;"); + o.ws("\n").wind(1).ws("}"); + o.ws("\n"); + o.ws("\n").wind(1).ws("/* get file size */"); + o.ws("\n").wind(1).ws("fseek(f, 0, SEEK_END);"); + o.ws("\n").wind(1).ws("fsize = (size_t) ftell(f) / unit;"); + o.ws("\n").wind(1).ws("fseek(f, 0, SEEK_SET);"); + o.ws("\n"); + o.ws("\n").wind(1).ws("/* allocate memory for file and padding */"); + o.ws("\n").wind(1).ws("buffer = malloc(unit * (fsize + padding));"); + o.ws("\n").wind(1).ws("if (buffer == NULL) {"); + o.ws("\n").wind(2).ws("goto error;"); + o.ws("\n").wind(1).ws("}"); + o.ws("\n"); + o.ws("\n").wind(1).ws("/* read the whole file in memory */"); + o.ws("\n").wind(1).ws("if (fread(buffer, unit, fsize, f) != fsize) {"); + o.ws("\n").wind(2).ws("goto error;"); + o.ws("\n").wind(1).ws("}"); + o.ws("\n"); + o.ws("\n").wind(1).ws("fclose(f);"); + o.ws("\n").wind(1).ws("*pfsize = fsize;"); + o.ws("\n").wind(1).ws("return buffer;"); + o.ws("\n"); + o.ws("\nerror:"); + o.ws("\n").wind(1).ws("fprintf(stderr, \"error: cannot read file '%s'\\n\", fname);"); + o.ws("\n").wind(1).ws("free(buffer);"); + o.ws("\n").wind(1).ws("if (f != NULL) {"); + o.ws("\n").wind(2).ws("fclose(f);"); + o.ws("\n").wind(1).ws("}"); + o.ws("\n").wind(1).ws("return NULL;"); + o.ws("\n}"); + o.ws("\n"); +} + +void Skeleton::emit_start + ( OutputFile & o + , size_t maxfill + , bool backup + , bool backupctx + , bool accept + ) const +{ + const size_t sizeof_cunit = opts->encoding.szCodeUnit(); + const uint32_t default_rule = rule2key (rule_rank_t::none ()); + + o.ws("\n#define YYCTYPE "); + exact_uint (o, sizeof_cunit); + o.ws("\n#define YYKEYTYPE "); + exact_uint (o, sizeof_key); + o.ws("\n#define YYPEEK() *cursor"); + o.ws("\n#define YYSKIP() ++cursor"); + if (backup) + { + o.ws("\n#define YYBACKUP() marker = cursor"); + o.ws("\n#define YYRESTORE() cursor = marker"); + } + if (backupctx) + { + o.ws("\n#define YYBACKUPCTX() ctxmarker = cursor"); + o.ws("\n#define YYRESTORECTX() cursor = ctxmarker"); + } + o.ws("\n#define YYLESSTHAN(n) (limit - cursor) < n"); + o.ws("\n#define YYFILL(n) { break; }"); + o.ws("\n"); + o.ws("\nstatic int action_").wstring(name); + o.ws("\n").wind(1).ws("( unsigned int i"); + o.ws("\n").wind(1).ws(", const YYKEYTYPE *keys"); + o.ws("\n").wind(1).ws(", const YYCTYPE *start"); + o.ws("\n").wind(1).ws(", const YYCTYPE *token"); + o.ws("\n").wind(1).ws(", const YYCTYPE **cursor"); + o.ws("\n").wind(1).ws(", YYKEYTYPE rule_act"); + o.ws("\n").wind(1).ws(")"); + o.ws("\n{"); + o.ws("\n").wind(1).ws("const long pos = token - start;"); + o.ws("\n").wind(1).ws("const long len_act = *cursor - token;"); + o.ws("\n").wind(1).ws("const long len_exp = (long) keys [3 * i + 1];"); + o.ws("\n").wind(1).ws("const YYKEYTYPE rule_exp = keys [3 * i + 2];"); + o.ws("\n").wind(1).ws("if (rule_exp == ").wu32(default_rule).ws(") {"); + o.ws("\n").wind(2).ws("fprintf"); + o.ws("\n").wind(3).ws("( stderr"); + o.ws("\n").wind(3).ws(", \"warning: lex_").wstring(name).ws(": control flow is undefined for input\""); + o.ws("\n").wind(4).ws("\" at position %ld, rerun re2c with '-W'\\n\""); + o.ws("\n").wind(3).ws(", pos"); + o.ws("\n").wind(3).ws(");"); + o.ws("\n").wind(1).ws("}"); + o.ws("\n").wind(1).ws("if (len_act == len_exp && rule_act == rule_exp) {"); + o.ws("\n").wind(2).ws("const YYKEYTYPE offset = keys[3 * i];"); + o.ws("\n").wind(2).ws("*cursor = token + offset;"); + o.ws("\n").wind(2).ws("return 0;"); + o.ws("\n").wind(1).ws("} else {"); + o.ws("\n").wind(2).ws("fprintf"); + o.ws("\n").wind(3).ws("( stderr"); + o.ws("\n").wind(3).ws(", \"error: lex_").wstring(name).ws(": at position %ld (iteration %u):\\n\""); + o.ws("\n").wind(4).ws("\"\\texpected: match length %ld, rule %u\\n\""); + o.ws("\n").wind(4).ws("\"\\tactual: match length %ld, rule %u\\n\""); + o.ws("\n").wind(3).ws(", pos"); + o.ws("\n").wind(3).ws(", i"); + o.ws("\n").wind(3).ws(", len_exp"); + o.ws("\n").wind(3).ws(", rule_exp"); + o.ws("\n").wind(3).ws(", len_act"); + o.ws("\n").wind(3).ws(", rule_act"); + o.ws("\n").wind(3).ws(");"); + o.ws("\n").wind(2).ws("return 1;"); + o.ws("\n").wind(1).ws("}"); + o.ws("\n}"); + o.ws("\n"); + o.ws("\nint lex_").wstring(name).ws("()"); + o.ws("\n{"); + o.ws("\n").wind(1).ws("const size_t padding = ").wu64(maxfill).ws("; /* YYMAXFILL */"); + o.ws("\n").wind(1).ws("int status = 0;"); + o.ws("\n").wind(1).ws("size_t input_len = 0;"); + o.ws("\n").wind(1).ws("size_t keys_count = 0;"); + o.ws("\n").wind(1).ws("YYCTYPE *input = NULL;"); + o.ws("\n").wind(1).ws("YYKEYTYPE *keys = NULL;"); + o.ws("\n").wind(1).ws("const YYCTYPE *cursor = NULL;"); + o.ws("\n").wind(1).ws("const YYCTYPE *limit = NULL;"); + o.ws("\n").wind(1).ws("const YYCTYPE *token = NULL;"); + o.ws("\n").wind(1).ws("const YYCTYPE *eof = NULL;"); + o.ws("\n").wind(1).ws("unsigned int i = 0;"); + o.ws("\n"); + o.ws("\n").wind(1).ws("input = (YYCTYPE *) read_file"); + o.ws("\n").wind(2).ws("(\"").wstring(o.file_name).ws(".").wstring(name).ws(".input\""); + o.ws("\n").wind(2).ws(", sizeof (YYCTYPE)"); + o.ws("\n").wind(2).ws(", padding"); + o.ws("\n").wind(2).ws(", &input_len"); + o.ws("\n").wind(2).ws(");"); + o.ws("\n").wind(1).ws("if (input == NULL) {"); + o.ws("\n").wind(2).ws("status = 1;"); + o.ws("\n").wind(2).ws("goto end;"); + o.ws("\n").wind(1).ws("}"); + o.ws("\n"); + if (sizeof_cunit > 1) + { + o.ws("\n").wind(1).ws("for (i = 0; i < input_len; ++i) {"); + from_le(o, 2, sizeof_cunit, "input[i]"); + o.ws("\n").wind(1).ws("}"); + o.ws("\n"); + } + o.ws("\n").wind(1).ws("keys = (YYKEYTYPE *) read_file"); + o.ws("\n").wind(2).ws("(\"").wstring(o.file_name).ws(".").wstring(name).ws(".keys\""); + o.ws("\n").wind(2).ws(", 3 * sizeof (YYKEYTYPE)"); + o.ws("\n").wind(2).ws(", 0"); + o.ws("\n").wind(2).ws(", &keys_count"); + o.ws("\n").wind(2).ws(");"); + o.ws("\n").wind(1).ws("if (keys == NULL) {"); + o.ws("\n").wind(2).ws("status = 1;"); + o.ws("\n").wind(2).ws("goto end;"); + o.ws("\n").wind(1).ws("}"); + o.ws("\n"); + if (sizeof_key > 1) + { + o.ws("\n").wind(1).ws("for (i = 0; i < 3 * keys_count; ++i) {"); + from_le(o, 2, sizeof_key, "keys[i]"); + o.ws("\n").wind(1).ws("}"); + o.ws("\n"); + } + o.ws("\n").wind(1).ws("cursor = input;"); + o.ws("\n").wind(1).ws("limit = input + input_len + padding;"); + o.ws("\n").wind(1).ws("eof = input + input_len;"); + o.ws("\n"); + o.ws("\n").wind(1).ws("for (i = 0; status == 0 && i < keys_count; ++i) {"); + o.ws("\n").wind(2).ws("token = cursor;"); + if (backup) + { + o.ws("\n").wind(2).ws("const YYCTYPE *marker = NULL;"); + } + if (backupctx) + { + o.ws("\n").wind(2).ws("const YYCTYPE *ctxmarker = NULL;"); + } + o.ws("\n").wind(2).ws("YYCTYPE yych;"); + if (accept) + { + o.ws("\n").wind(2).ws("unsigned int yyaccept = 0;"); + } + o.ws("\n"); + if (opts->bFlag && BitMap::first) + { + BitMap::gen (o, 2, 0, std::min (0x100u, opts->encoding.nCodeUnits ())); + } + o.ws("\n"); +} + +void Skeleton::emit_end + ( OutputFile & o + , bool backup + , bool backupctx + ) const +{ + o.ws("\n").wind(1).ws("}"); + o.ws("\n").wind(1).ws("if (status == 0) {"); + o.ws("\n").wind(2).ws("if (cursor != eof) {"); + o.ws("\n").wind(3).ws("status = 1;"); + o.ws("\n").wind(3).ws("const long pos = token - input;"); + o.ws("\n").wind(3).ws("fprintf(stderr, \"error: lex_").wstring(name).ws(": unused input strings left at position %ld\\n\", pos);"); + o.ws("\n").wind(2).ws("}"); + o.ws("\n").wind(2).ws("if (i != keys_count) {"); + o.ws("\n").wind(3).ws("status = 1;"); + o.ws("\n").wind(3).ws("fprintf(stderr, \"error: lex_").wstring(name).ws(": unused keys left after %u iterations\\n\", i);"); + o.ws("\n").wind(2).ws("}"); + o.ws("\n").wind(1).ws("}"); + o.ws("\n"); + o.ws("\nend:"); + o.ws("\n").wind(1).ws("free(input);"); + o.ws("\n").wind(1).ws("free(keys);"); + o.ws("\n"); + o.ws("\n").wind(1).ws("return status;"); + o.ws("\n}"); + o.ws("\n"); + o.ws("\n#undef YYCTYPE"); + o.ws("\n#undef YYKEYTYPE"); + o.ws("\n#undef YYPEEK"); + o.ws("\n#undef YYSKIP"); + if (backup) + { + o.ws("\n#undef YYBACKUP"); + o.ws("\n#undef YYRESTORE"); + } + if (backupctx) + { + o.ws("\n#undef YYBACKUPCTX"); + o.ws("\n#undef YYRESTORECTX"); + } + o.ws("\n#undef YYLESSTHAN"); + o.ws("\n#undef YYFILL"); + o.ws("\n"); +} + +void Skeleton::emit_epilog (OutputFile & o, const std::set & names) +{ + o.ws("\n").ws("int main()"); + o.ws("\n").ws("{"); + + for (std::set::const_iterator i = names.begin (); i != names.end (); ++i) + { + o.ws("\n").wind(1).ws("if(lex_").wstring(*i).ws("() != 0) {"); + o.ws("\n").wind(2).ws("return 1;"); + o.ws("\n").wind(1).ws("}"); + } + + o.ws("\n").wind(1).ws("return 0;"); + o.ws("\n}"); + o.ws("\n"); +} + +void Skeleton::emit_action (OutputFile & o, uint32_t ind, rule_rank_t rank) const +{ + o.wind(ind).ws("status = action_").wstring(name).ws("(i, keys, input, token, &cursor, ").wu32(rule2key (rank)).ws(");\n"); + o.wind(ind).ws("continue;\n"); +} + +} // namespace re2c diff --git a/tools/re2c/src/ir/skeleton/generate_data.cc b/tools/re2c/src/ir/skeleton/generate_data.cc new file mode 100644 index 000000000..60af8376e --- /dev/null +++ b/tools/re2c/src/ir/skeleton/generate_data.cc @@ -0,0 +1,215 @@ +#include "src/util/c99_stdint.h" +#include +#include +#include +#include +#include +#include +#include +#include + +#include "src/conf/msg.h" +#include "src/conf/opt.h" +#include "src/globals.h" +#include "src/ir/regexp/encoding/enc.h" +#include "src/ir/rule_rank.h" +#include "src/ir/skeleton/path.h" +#include "src/ir/skeleton/skeleton.h" +#include "src/util/u32lim.h" + +namespace re2c +{ + +template + static Node::covers_t cover_one (FILE * input, FILE * keys, const path_t & path); + +/* + * note [generating skeleton path cover] + * + * With --skeleton switch we need to generate lots of data: strings that + * correspond to various paths in DFA and match given regular expression. + * We try to generate path cover (a set of paths that cover all skeleton + * arcs at least once). Generation must stop as soon as the size of path + * cover exceeds limit (in which case we'll only get a partial path cover). + * + * The algorithm walks graph nodes in deep-first order and assigns suffix + * to each node (a path from this node to end node). In order to calculate + * suffix for a given node the algorithm must know suffix for any child + * node (end nodes are assigned empty suffix at start). Suffix is only + * calculated once for each node and then reused as much times as the node + * is visited. This is what reduces search space. + * + * The algorithm calculates prefix (multipath to current node). If current + * node has already been assigned suffix, the algorithm immediately + * calculates path cover from prefix and suffix. Otherwise it recurses to + * child nodes (updating prefix on the go). + * + * The algorithm avoids eternal loops by maintaining loop counter for each + * node. Loop counter is incremented on recursive enter and decremented on + * recursive return. If loop counter is greater than 1, current branch is + * abandoned and recursion returns immediately. + * + * See also note [counting skeleton edges]. + * + */ +template + void Node::cover (path_t & prefix, FILE * input, FILE * keys, covers_t &size) +{ + if (end () && suffix == NULL) + { + suffix = new path_t (rule, ctx); + } + if (suffix != NULL) + { + prefix.append (suffix); + size = size + cover_one (input, keys, prefix); + } + else if (loop < 2) + { + local_inc _ (loop); + for (arcs_t::iterator i = arcs.begin (); + i != arcs.end () && !size.overflow(); ++i) + { + path_t new_prefix = prefix; + new_prefix.extend (i->first->rule, i->first->ctx, &i->second); + i->first->cover (new_prefix, input, keys, size); + if (i->first->suffix != NULL && suffix == NULL) + { + suffix = new path_t (rule, ctx); + suffix->extend (i->first->rule, i->first->ctx, &i->second); + suffix->append (i->first->suffix); + } + } + } +} + +template + void Skeleton::generate_paths_cunit_key (FILE * input, FILE * keys) +{ + path_t prefix (nodes->rule, nodes->ctx); + Node::covers_t size = Node::covers_t::from32(0u); + + nodes->cover (prefix, input, keys, size); + + if (size.overflow ()) + { + warning + ( NULL + , line + , false + , "DFA %sis too large: can only generate partial path cover" + , incond (cond).c_str () + ); + } +} + +template + void Skeleton::generate_paths_cunit (FILE * input, FILE * keys) +{ + switch (sizeof_key) + { + case 4: generate_paths_cunit_key (input, keys); break; + case 2: generate_paths_cunit_key (input, keys); break; + case 1: generate_paths_cunit_key (input, keys); break; + } +} + +void Skeleton::generate_paths (FILE * input, FILE * keys) +{ + switch (opts->encoding.szCodeUnit ()) + { + case 4: generate_paths_cunit (input, keys); break; + case 2: generate_paths_cunit (input, keys); break; + case 1: generate_paths_cunit (input, keys); break; + } +} + +void Skeleton::emit_data (const char * fname) +{ + const std::string input_name = std::string (fname) + "." + name + ".input"; + FILE * input = fopen (input_name.c_str (), "wb"); + if (!input) + { + error ("cannot open file: %s", input_name.c_str ()); + exit (1); + } + const std::string keys_name = std::string (fname) + "." + name + ".keys"; + FILE * keys = fopen (keys_name.c_str (), "wb"); + if (!keys) + { + error ("cannot open file: %s", keys_name.c_str ()); + exit (1); + } + + generate_paths (input, keys); + + fclose (input); + fclose (keys); +} + +template static uintn_t to_le(uintn_t n) +{ + uintn_t m; + uint8_t *p = reinterpret_cast(&m); + for (size_t i = 0; i < sizeof(uintn_t); ++i) + { + p[i] = static_cast(n >> (i * 8)); + } + return m; +} + +template + static void keygen (FILE * f, size_t count, size_t len, size_t len_match, rule_rank_t match) +{ + const key_t m = Skeleton::rule2key (match); + + const size_t keys_size = 3 * count; + key_t * keys = new key_t [keys_size]; + for (uint32_t i = 0; i < keys_size;) + { + keys[i++] = to_le(static_cast (len)); + keys[i++] = to_le(static_cast (len_match)); + keys[i++] = to_le(m); + } + fwrite (keys, sizeof (key_t), keys_size, f); + delete [] keys; +} + +template + static Node::covers_t cover_one (FILE * input, FILE * keys, const path_t & path) +{ + const size_t len = path.len (); + + size_t count = 0; + for (size_t i = 0; i < len; ++i) + { + count = std::max (count, path[i]->size ()); + } + + const Node::covers_t size = Node::covers_t::from64(len) * Node::covers_t::from64(count); + if (!size.overflow ()) + { + // input + const size_t buffer_size = size.uint32 (); + cunit_t * buffer = new cunit_t [buffer_size]; + for (size_t i = 0; i < len; ++i) + { + const std::vector & arc = *path[i]; + const size_t width = arc.size (); + for (size_t j = 0; j < count; ++j) + { + const size_t k = j % width; + buffer[j * len + i] = to_le(static_cast (arc[k])); + } + } + fwrite (buffer, sizeof (cunit_t), buffer_size, input); + delete [] buffer; + + // keys + keygen (keys, count, len, path.len_matching (), path.match ()); + } + + return size; +} + +} // namespace re2c diff --git a/tools/re2c/src/ir/skeleton/match_empty.cc b/tools/re2c/src/ir/skeleton/match_empty.cc new file mode 100644 index 000000000..16fba615e --- /dev/null +++ b/tools/re2c/src/ir/skeleton/match_empty.cc @@ -0,0 +1,49 @@ +#include +#include + +#include "src/conf/warn.h" +#include "src/globals.h" +#include "src/ir/rule_rank.h" +#include "src/ir/skeleton/path.h" +#include "src/ir/skeleton/skeleton.h" +#include "src/parse/rules.h" + +namespace re2c +{ + +void Skeleton::warn_match_empty () +{ + Node & head = nodes[0]; + + head.calc_reachable (); + const std::set & reach = head.reachable; + + // warn about rules that match empty string + if (!head.rule.rank.is_none ()) + { + bool reachable = head.end (); + for (std::set::const_iterator i = reach.begin (); + !reachable && i != reach.end (); ++i) + { + reachable |= i->rank.is_none (); + } + if (reachable) + { + warn.match_empty_string (rules[head.rule.rank].line); + } + } + + // warn about rules that match empty string with nonempty trailing context + if (head.ctx) + { + for (std::set::const_iterator i = reach.begin (); i != reach.end (); ++i) + { + if (i->restorectx) + { + warn.match_empty_string (rules[i->rank].line); + } + } + } +} + +} // namespace re2c diff --git a/tools/re2c/src/ir/skeleton/maxlen.cc b/tools/re2c/src/ir/skeleton/maxlen.cc new file mode 100644 index 000000000..3f1d93310 --- /dev/null +++ b/tools/re2c/src/ir/skeleton/maxlen.cc @@ -0,0 +1,50 @@ +#include "src/util/c99_stdint.h" +#include +#include +#include +#include + +#include "src/ir/skeleton/skeleton.h" + +namespace re2c +{ + +// 0 < DIST_MAX < DIST_ERROR <= std::numeric_limits::max() +const uint32_t Node::DIST_ERROR = std::numeric_limits::max(); +const uint32_t Node::DIST_MAX = DIST_ERROR - 1; + +// different from YYMAXFILL calculation +// in the way it handles loops and empty regexp +void Node::calc_dist () +{ + if (dist != DIST_ERROR) + { + return; + } + else if (end ()) + { + dist = 0; + } + else if (loop < 2) + { + local_inc _ (loop); + for (arcs_t::iterator i = arcs.begin (); i != arcs.end (); ++i) + { + i->first->calc_dist (); + if (i->first->dist != DIST_ERROR) + { + if (dist == DIST_ERROR) + { + dist = i->first->dist; + } + else + { + dist = std::max (dist, i->first->dist); + } + } + } + dist = std::min (dist + 1, DIST_MAX); + } +} + +} // namespace re2c diff --git a/tools/re2c/src/ir/skeleton/path.h b/tools/re2c/src/ir/skeleton/path.h new file mode 100644 index 000000000..d09861e5a --- /dev/null +++ b/tools/re2c/src/ir/skeleton/path.h @@ -0,0 +1,103 @@ +#ifndef _RE2C_IR_SKELETON_PATH_ +#define _RE2C_IR_SKELETON_PATH_ + +#include + +#include "src/ir/rule_rank.h" +#include "src/util/c99_stdint.h" + +namespace re2c +{ + +struct rule_t +{ + rule_rank_t rank; + bool restorectx; + + rule_t (rule_rank_t r, bool c) + : rank (r) + , restorectx (c) + {} + + // needed by STL containers + // same as 'std::pair' comparator + bool operator < (const rule_t & r) const + { + return rank < r.rank + || (!(r.rank < rank) && restorectx < r.restorectx); + } +}; + +class path_t +{ +public: + typedef std::vector arc_t; + +private: + std::vector arcs; + + rule_t rule; + size_t rule_pos; + + bool ctx; + size_t ctx_pos; + +public: + explicit path_t (rule_t r, bool c) + : arcs () + , rule (r) + , rule_pos (0) + , ctx (c) + , ctx_pos (0) + {} + size_t len () const + { + return arcs.size (); + } + size_t len_matching () const + { + return rule.restorectx + ? ctx_pos + : rule_pos; + } + rule_rank_t match () const + { + return rule.rank; + } + const arc_t * operator [] (size_t i) const + { + return arcs[i]; + } + void extend (rule_t r, bool c, const arc_t * a) + { + arcs.push_back (a); + if (!r.rank.is_none ()) + { + rule = r; + rule_pos = arcs.size (); + } + if (c) + { + ctx = true; + ctx_pos = arcs.size (); + } + } + void append (const path_t * p) + { + if (!p->rule.rank.is_none ()) + { + rule = p->rule; + rule_pos = arcs.size () + p->rule_pos; + } + if (p->ctx) + { + ctx = true; + ctx_pos = arcs.size () + p->ctx_pos; + } + arcs.insert (arcs.end (), p->arcs.begin (), p->arcs.end ()); + } +}; + +} // namespace re2c + +#endif // _RE2C_IR_SKELETON_PATH_ diff --git a/tools/re2c/src/ir/skeleton/skeleton.cc b/tools/re2c/src/ir/skeleton/skeleton.cc new file mode 100644 index 000000000..deee11334 --- /dev/null +++ b/tools/re2c/src/ir/skeleton/skeleton.cc @@ -0,0 +1,163 @@ +#include +#include +#include + +#include "src/codegen/go.h" +#include "src/conf/msg.h" +#include "src/ir/dfa/dfa.h" +#include "src/ir/regexp/regexp.h" +#include "src/ir/regexp/regexp_rule.h" +#include "src/ir/skeleton/skeleton.h" + +namespace re2c +{ + +Node::Node () + : arcs () + , arcsets () + , loop (0) + , rule (rule_rank_t::none (), false) + , ctx (false) + , dist (DIST_ERROR) + , reachable () + , suffix (NULL) +{} + +void Node::init(bool c, RuleOp *r, const std::vector > &a) +{ + if (r) + { + rule.rank = r->rank; + rule.restorectx = r->ctx->fixedLength () != 0; + } + + ctx = c; + + uint32_t lb = 0; + std::vector >::const_iterator + i = a.begin(), + e = a.end(); + for (; i != e; ++i) + { + Node *n = i->first; + const uint32_t ub = i->second - 1; + + // pick at most 0x100 unique edges from this range + // (for 1-byte code units this covers the whole range: [0 - 0xFF]) + // - range bounds must be included + // - values should be evenly distributed + // - values should be deterministic + const uint32_t step = 1 + (ub - lb) / 0x100; + for (uint32_t c = lb; c < ub; c += step) + { + arcs[n].push_back (c); + } + arcs[n].push_back (ub); + + arcsets[n].push_back (std::make_pair (lb, ub)); + lb = ub + 1; + } +} + +Node::~Node () +{ + delete suffix; +} + +bool Node::end () const +{ + return arcs.size () == 0; +} + +Skeleton::Skeleton + ( const dfa_t &dfa + , const charset_t &cs + , const rules_t &rs + , const std::string &dfa_name + , const std::string &dfa_cond + , uint32_t dfa_line + ) + : name (dfa_name) + , cond (dfa_cond) + , line (dfa_line) + , nodes_count (dfa.states.size()) + , nodes (new Node [nodes_count + 1]) // +1 for default state + , sizeof_key (4) + , rules (rs) +{ + const size_t nc = cs.size() - 1; + + // initialize skeleton nodes + Node *nil = &nodes[nodes_count]; + for (size_t i = 0; i < nodes_count; ++i) + { + dfa_state_t *s = dfa.states[i]; + std::vector > arcs; + for (size_t c = 0; c < nc;) + { + const size_t j = s->arcs[c]; + for (;++c < nc && s->arcs[c] == j;); + Node *to = j == dfa_t::NIL + ? nil + : &nodes[j]; + arcs.push_back(std::make_pair(to, cs[c])); + } + // all arcs go to default node => this node is final, drop arcs + if (arcs.size() == 1 && arcs[0].first == nil) + { + arcs.clear(); + } + nodes[i].init(s->ctx, s->rule, arcs); + } + + // calculate maximal path length, check overflow + nodes->calc_dist (); + const uint32_t maxlen = nodes->dist; + if (maxlen == Node::DIST_MAX) + { + error ("DFA path %sis too long", incond (cond).c_str ()); + exit (1); + } + + // calculate maximal rule rank (disregarding default and none rules) + uint32_t maxrule = 0; + for (uint32_t i = 0; i < nodes_count; ++i) + { + const rule_rank_t r = nodes[i].rule.rank; + if (!r.is_none () && !r.is_def ()) + { + maxrule = std::max (maxrule, r.uint32 ()); + } + } + // two upper values reserved for default and none rules) + maxrule += 2; + + // initialize size of key + const uint32_t max = std::max (maxlen, maxrule); + if (max <= std::numeric_limits::max()) + { + sizeof_key = 1; + } + else if (max <= std::numeric_limits::max()) + { + sizeof_key = 2; + } +} + +Skeleton::~Skeleton () +{ + delete [] nodes; +} + +uint32_t Skeleton::rule2key (rule_rank_t r) const +{ + switch (sizeof_key) + { + default: // shouldn't happen + case 4: return rule2key (r); + case 2: return rule2key (r); + case 1: return rule2key (r); + } +} + +} // namespace re2c diff --git a/tools/re2c/src/ir/skeleton/skeleton.h b/tools/re2c/src/ir/skeleton/skeleton.h new file mode 100644 index 000000000..78c082716 --- /dev/null +++ b/tools/re2c/src/ir/skeleton/skeleton.h @@ -0,0 +1,174 @@ +#ifndef _RE2C_IR_SKELETON_SKELETON_ +#define _RE2C_IR_SKELETON_SKELETON_ + +#include "src/util/c99_stdint.h" +#include +#include +#include +#include +#include +#include +#include +#include + +#include "src/ir/regexp/regexp.h" +#include "src/ir/rule_rank.h" +#include "src/ir/skeleton/path.h" +#include "src/ir/skeleton/way.h" +#include "src/parse/rules.h" +#include "src/util/local_increment.h" +#include "src/util/forbid_copy.h" +#include "src/util/u32lim.h" + +namespace re2c +{ + +struct dfa_t; +struct OutputFile; +class RuleOp; + +struct Node +{ + /* + * note [counting skeleton edges] + * + * To avoid any possible overflows all size calculations are wrapped in + * a special truncated unsigned 32-bit integer type that checks overflow + * on each binary operation or conversion from another type. + * + * Two things contribute to size calculation: path length and the number + * of outgoing arcs in each node. Some considerations on why these values + * will not overflow before they are converted to truncated type: + * + * - Maximal number of outgoing arcs in each node cannot exceed 32 bits: + * it is bounded by the number of code units in current encoding, and + * re2c doesn't support any encoding with more than 2^32 code units. + * Conversion is safe. + * + * - Maximal path length cannot exceed 32 bits: we estimate it right + * after skeleton construction and check for overflow. If path length + * does overflow, an error is reported and re2c aborts. + */ + + // Type for calculating the size of path cover. + // Paths are dumped to file as soon as generated and don't eat + // heap space. The total size of path cover (measured in edges) + // is O(N^2) where N is the number of edges in skeleton. + typedef u32lim_t<1024 * 1024 * 1024> covers_t; // ~1Gb + + // Type for counting arcs in paths that cause undefined behaviour. + // These paths are stored on heap, so the limit should be low. + // Most real-world cases have only a few short paths. + // We don't need all paths anyway, just some examples. + typedef u32lim_t<1024> nakeds_t; // ~1Kb + + typedef std::map arcs_t; + typedef std::map arcsets_t; + typedef local_increment_t local_inc; + + // outgoing arcs + arcs_t arcs; + arcsets_t arcsets; + + // how many times this node has been visited + // (controls looping in graph traversals) + uint8_t loop; + + // rule for corresponding DFA state (if any) + rule_t rule; + + // start of trailing context + bool ctx; + + // maximal distance to end node (assuming one iteration per loop) + static const uint32_t DIST_ERROR; + static const uint32_t DIST_MAX; + uint32_t dist; + + // rules reachable from this node (including absent rule) + std::set reachable; + + // path to end node (for constructing path cover) + path_t * suffix; + + Node (); + void init(bool b, RuleOp *r, const std::vector > &arcs); + ~Node (); + bool end () const; + void calc_dist (); + void calc_reachable (); + template + void cover (path_t & prefix, FILE * input, FILE * keys, covers_t &size); + void naked_ways (way_t & prefix, std::vector & ways, nakeds_t &size); + + FORBID_COPY (Node); +}; + +struct Skeleton +{ + const std::string name; + const std::string cond; + const uint32_t line; + + const size_t nodes_count; + Node * nodes; + size_t sizeof_key; + rules_t rules; + + Skeleton + ( const dfa_t &dfa + , const charset_t &cs + , const rules_t & rs + , const std::string &dfa_name + , const std::string &dfa_cond + , uint32_t dfa_line + ); + ~Skeleton (); + void warn_undefined_control_flow (); + void warn_unreachable_rules (); + void warn_match_empty (); + void emit_data (const char * fname); + static void emit_prolog (OutputFile & o); + void emit_start + ( OutputFile & o + , size_t maxfill + , bool backup + , bool backupctx + , bool accept + ) const; + void emit_end + ( OutputFile & o + , bool backup + , bool backupctx + ) const; + static void emit_epilog (OutputFile & o, const std::set & names); + void emit_action (OutputFile & o, uint32_t ind, rule_rank_t rank) const; + + template static key_t rule2key (rule_rank_t r); + uint32_t rule2key (rule_rank_t r) const; + +private: + template + void generate_paths_cunit_key (FILE * input, FILE * keys); + template + void generate_paths_cunit (FILE * input, FILE * keys); + void generate_paths (FILE * input, FILE * keys); + + FORBID_COPY (Skeleton); +}; + +template key_t Skeleton::rule2key (rule_rank_t r) +{ + if (r.is_none()) { + return std::numeric_limits::max(); + } else if (r.is_def()) { + key_t k = std::numeric_limits::max(); + return --k; + } else { + return static_cast(r.uint32()); + } +} + +} // namespace re2c + +#endif // _RE2C_IR_SKELETON_SKELETON_ diff --git a/tools/re2c/src/ir/skeleton/unreachable.cc b/tools/re2c/src/ir/skeleton/unreachable.cc new file mode 100644 index 000000000..fac41dfc3 --- /dev/null +++ b/tools/re2c/src/ir/skeleton/unreachable.cc @@ -0,0 +1,73 @@ +#include "src/util/c99_stdint.h" +#include +#include +#include + +#include "src/conf/warn.h" +#include "src/globals.h" +#include "src/ir/rule_rank.h" +#include "src/ir/skeleton/path.h" +#include "src/ir/skeleton/skeleton.h" +#include "src/parse/rules.h" + +namespace re2c +{ + +void Node::calc_reachable () +{ + if (!reachable.empty ()) + { + return; + } + else if (end ()) + { + reachable.insert (rule); + } + else if (loop < 2) + { + local_inc _ (loop); + for (arcs_t::iterator i = arcs.begin (); i != arcs.end (); ++i) + { + i->first->calc_reachable (); + reachable.insert (i->first->reachable.begin (), i->first->reachable.end ()); + } + } +} + +void Skeleton::warn_unreachable_rules () +{ + nodes->calc_reachable (); + for (uint32_t i = 0; i < nodes_count; ++i) + { + const rule_rank_t r1 = nodes[i].rule.rank; + const std::set & rs = nodes[i].reachable; + for (std::set::const_iterator j = rs.begin (); j != rs.end (); ++j) + { + const rule_rank_t r2 = j->rank; + if (r1 == r2 || r2.is_none ()) + { + rules[r1].reachable = true; + } + else + { + rules[r1].shadow.insert (r2); + } + } + } + + // warn about unreachable rules: + // - rules that are shadowed by other rules, e.g. rule '[a]' is shadowed by '[a] [^]' + // - infinite rules that consume infinitely many characters and fail on YYFILL, e.g. '[^]*' + // - rules that contain never-matching link, e.g. '[]' with option '--empty-class match-none' + // default rule '*' should not be reported + for (rules_t::const_iterator i = rules.begin (); i != rules.end (); ++i) + { + const rule_rank_t r = i->first; + if (!r.is_none () && !r.is_def () && !rules[r].reachable) + { + warn.unreachable_rule (cond, i->second, rules); + } + } +} + +} // namespace re2c diff --git a/tools/re2c/src/ir/skeleton/way.cc b/tools/re2c/src/ir/skeleton/way.cc new file mode 100644 index 000000000..0f58efe42 --- /dev/null +++ b/tools/re2c/src/ir/skeleton/way.cc @@ -0,0 +1,74 @@ +#include +#include + +#include "src/ir/skeleton/way.h" + +namespace re2c +{ + +static bool cmp_way_arcs (const way_arc_t * a1, const way_arc_t * a2); +static void fprint_way_arc (FILE * f, const way_arc_t & arc); + +bool cmp_way_arcs (const way_arc_t * a1, const way_arc_t * a2) +{ + return std::lexicographical_compare(a1->begin(), a1->end(), a2->begin(), a2->end()); +} + +// define strict weak ordering on patterns: +// 1st criterion is length (short patterns go first) +// 2nd criterion is lexicographical order (applies to patterns of equal length) +bool cmp_ways (const way_t & w1, const way_t & w2) +{ + const size_t s1 = w1.size (); + const size_t s2 = w2.size (); + return (s1 == s2 && std::lexicographical_compare(w1.begin(), w1.end(), w2.begin(), w2.end(), cmp_way_arcs)) + || s1 < s2; +} + +void fprint_way (FILE * f, const way_t & w) +{ + fprintf (f, "'"); + const size_t len = w.size (); + for (size_t i = 0 ; i < len; ++i) + { + if (i > 0) + { + fprintf (f, " "); + } + if (w[i] == NULL) + { + fprintf (stderr, "(nil)"); + } + else + { + fprint_way_arc (stderr, *w[i]); + } + } + fprintf (f, "'"); +} + +void fprint_way_arc (FILE * f, const way_arc_t & arc) +{ + const size_t ranges = arc.size (); + if (ranges == 1 && arc[0].first == arc[0].second) + { + fprintf (f, "\\x%X", arc[0].first); + } + else + { + fprintf (f, "["); + for (size_t i = 0; i < ranges; ++i) + { + const uint32_t l = arc[i].first; + const uint32_t u = arc[i].second; + fprintf (f, "\\x%X", l); + if (l != u) + { + fprintf (f, "-\\x%X", u); + } + } + fprintf (f, "]"); + } +} + +} // namespace re2c diff --git a/tools/re2c/src/ir/skeleton/way.h b/tools/re2c/src/ir/skeleton/way.h new file mode 100644 index 000000000..e10010a91 --- /dev/null +++ b/tools/re2c/src/ir/skeleton/way.h @@ -0,0 +1,20 @@ +#ifndef _RE2C_IR_SKELETON_WAY_ +#define _RE2C_IR_SKELETON_WAY_ + +#include "src/util/c99_stdint.h" +#include +#include +#include + +namespace re2c +{ + +typedef std::vector > way_arc_t; +typedef std::vector way_t; + +bool cmp_ways (const way_t & w1, const way_t & w2); +void fprint_way (FILE * f, const way_t & p); + +} // namespace re2c + +#endif // _RE2C_IR_SKELETON_WAY_ diff --git a/tools/re2c/src/main.cc b/tools/re2c/src/main.cc new file mode 100644 index 000000000..03b6ee291 --- /dev/null +++ b/tools/re2c/src/main.cc @@ -0,0 +1,60 @@ +#include "src/util/c99_stdint.h" +#include + +#include "src/codegen/output.h" +#include "src/conf/msg.h" +#include "src/conf/opt.h" +#include "src/conf/warn.h" +#include "src/globals.h" +#include "src/parse/input.h" +#include "src/parse/parser.h" +#include "src/parse/scanner.h" + +namespace re2c +{ + +bool bUsedYYBitmap = false; +bool bWroteGetState = false; +bool bWroteCondCheck = false; +uint32_t last_fill_index = 0; +std::string yySetupRule = ""; + +} // end namespace re2c + +using namespace re2c; + +int main(int, char *argv[]) +{ + switch (parse_opts (argv, opts)) + { + case OK: break; + case EXIT_OK: return 0; + case EXIT_FAIL: return 1; + } + + // set up the source stream + re2c::Input input (opts.source_file); + if (!input.open ()) + { + error ("cannot open source file: %s", opts.source_file); + return 1; + } + + // set up the output streams + re2c::Output output (opts.output_file, opts->header_file); + if (!output.source.open ()) + { + error ("cannot open output file: %s", opts.output_file); + return 1; + } + if (opts->tFlag && !output.header.open ()) + { + error ("cannot open header file: %s", opts->header_file); + return 1; + } + + Scanner scanner (input, output.source); + parse (scanner, output); + + return warn.error () ? 1 : 0; +} diff --git a/tools/re2c/src/parse/code.cc b/tools/re2c/src/parse/code.cc new file mode 100644 index 000000000..97a865ce8 --- /dev/null +++ b/tools/re2c/src/parse/code.cc @@ -0,0 +1,8 @@ +#include "src/parse/code.h" + +namespace re2c +{ + +free_list Code::freelist; + +} // namespace re2c diff --git a/tools/re2c/src/parse/code.h b/tools/re2c/src/parse/code.h new file mode 100644 index 000000000..d658e628a --- /dev/null +++ b/tools/re2c/src/parse/code.h @@ -0,0 +1,31 @@ +#ifndef _RE2C_PARSE_CODE_ +#define _RE2C_PARSE_CODE_ + +#include "src/util/c99_stdint.h" +#include +#include + +#include "src/parse/loc.h" +#include "src/util/free_list.h" + +namespace re2c +{ + +struct Code +{ + static free_list freelist; + + const Loc loc; + const std::string text; + + inline Code (const char * t, size_t t_len, const std::string & f, uint32_t l) + : loc (f, l) + , text (t, t_len) + { + freelist.insert (this); + } +}; + +} // namespace re2c + +#endif // _RE2C_PARSE_CODE_ diff --git a/tools/re2c/src/parse/extop.h b/tools/re2c/src/parse/extop.h new file mode 100644 index 000000000..d093be924 --- /dev/null +++ b/tools/re2c/src/parse/extop.h @@ -0,0 +1,17 @@ +#ifndef _RE2C_PARSE_EXTOP_ +#define _RE2C_PARSE_EXTOP_ + +#include "src/util/c99_stdint.h" + +namespace re2c +{ + +struct ExtOp +{ + uint32_t min; + uint32_t max; +}; + +} // end namespace re2c + +#endif // _RE2C_PARSE_EXTOP_ diff --git a/tools/re2c/src/parse/input.cc b/tools/re2c/src/parse/input.cc new file mode 100644 index 000000000..472e6b995 --- /dev/null +++ b/tools/re2c/src/parse/input.cc @@ -0,0 +1,31 @@ +#include "src/parse/input.h" + +namespace re2c { + +Input::Input (const char * fn) + : file (NULL) + , file_name (fn) +{} + +bool Input::open () +{ + if (file_name == "") + { + file = stdin; + } + else + { + file = fopen (file_name.c_str (), "rb"); + } + return file != NULL; +} + +Input::~Input () +{ + if (file != NULL && file != stdin) + { + fclose (file); + } +} + +} // namespace re2c diff --git a/tools/re2c/src/parse/input.h b/tools/re2c/src/parse/input.h new file mode 100644 index 000000000..f58e189e6 --- /dev/null +++ b/tools/re2c/src/parse/input.h @@ -0,0 +1,25 @@ +#ifndef _RE2C_PARSE_INPUT_ +#define _RE2C_PARSE_INPUT_ + +#include +#include + +#include "src/util/forbid_copy.h" + +namespace re2c { + +struct Input +{ + FILE * file; + std::string file_name; + + Input (const char * fn); + ~Input (); + bool open (); + + FORBID_COPY (Input); +}; + +} // namespace re2c + +#endif // _RE2C_PARSE_INPUT_ diff --git a/tools/re2c/src/parse/lex.cc b/tools/re2c/src/parse/lex.cc new file mode 100644 index 000000000..9c7e01438 --- /dev/null +++ b/tools/re2c/src/parse/lex.cc @@ -0,0 +1,2861 @@ +/* Generated by re2c 0.16 on Thu Jan 21 10:47:47 2016 */ +#line 1 "../src/parse/lex.re" +#include "src/util/c99_stdint.h" +#include +#include +#include +#include +#include + +#include "src/codegen/output.h" +#include "src/conf/opt.h" +#include "src/conf/warn.h" +#include "src/globals.h" +#include "src/ir/regexp/encoding/enc.h" +#include "src/ir/regexp/regexp.h" +#include "src/ir/regexp/regexp_null.h" +#include "src/parse/code.h" +#include "src/parse/extop.h" +#include "src/parse/input.h" +#include "src/parse/scanner.h" +#include "src/parse/parser.h" // needed by "y.tab.h" +#include "src/parse/unescape.h" +#include "src/util/range.h" +#include "src/util/s_to_n32_unsafe.h" +#include "y.tab.h" + +extern YYSTYPE yylval; + +#define YYCTYPE unsigned char +#define YYCURSOR cur +#define YYLIMIT lim +#define YYMARKER ptr +#define YYCTXMARKER ctx +#define YYFILL(n) { fill (n); } + +namespace re2c +{ + +// source code is in ASCII: pointers have type 'char *' +// but re2c makes an implicit assumption that YYCTYPE is unsigned +// when it generates comparisons +#line 42 "../src/parse/lex.re" + + +#line 62 "../src/parse/lex.re" + + +Scanner::ParseMode Scanner::echo() +{ + bool ignore_eoc = false; + int ignore_cnt = 0; + + if (eof && cur == eof) // Catch EOF + { + return Stop; + } + + tok = cur; +echo: + +#line 62 "src/parse/lex.cc" +{ + YYCTYPE yych; + unsigned int yyaccept = 0; + static const unsigned char yybm[] = { + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 160, 0, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 160, 128, 0, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 192, 192, 192, 192, 192, 192, 192, 192, + 192, 192, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 0, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + }; + if ((YYLIMIT - YYCURSOR) < 16) YYFILL(16); + yych = (YYCTYPE)*YYCURSOR; + if (yych <= '%') { + if (yych <= '\t') { + if (yych >= 0x01) goto yy4; + } else { + if (yych <= '\n') goto yy6; + if (yych <= '$') goto yy4; + goto yy8; + } + } else { + if (yych <= '*') { + if (yych <= ')') goto yy4; + goto yy9; + } else { + if (yych == '/') goto yy10; + goto yy4; + } + } + ++YYCURSOR; +#line 202 "../src/parse/lex.re" + { + if (!ignore_eoc && opts->target == opt_t::CODE) + { + out.wraw(tok, tok_len () - 1); + // -1 so we don't write out the \0 + } + if(cur == eof) + { + return Stop; + } + } +#line 132 "src/parse/lex.cc" +yy4: + ++YYCURSOR; +yy5: +#line 213 "../src/parse/lex.re" + { + goto echo; + } +#line 140 "src/parse/lex.cc" +yy6: + yyaccept = 0; + yych = (YYCTYPE)*(YYMARKER = ++YYCURSOR); + if (yybm[0+yych] & 32) { + goto yy11; + } + if (yych == '#') goto yy14; +yy7: +#line 189 "../src/parse/lex.re" + { + if (ignore_eoc) + { + ignore_cnt++; + } + else if (opts->target == opt_t::CODE) + { + out.wraw(tok, tok_len ()); + } + tok = pos = cur; + cline++; + goto echo; + } +#line 163 "src/parse/lex.cc" +yy8: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == '{') goto yy16; + goto yy5; +yy9: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == '/') goto yy18; + goto yy5; +yy10: + yyaccept = 1; + yych = (YYCTYPE)*(YYMARKER = ++YYCURSOR); + if (yych == '*') goto yy20; + goto yy5; +yy11: + ++YYCURSOR; + if (YYLIMIT <= YYCURSOR) YYFILL(1); + yych = (YYCTYPE)*YYCURSOR; + if (yybm[0+yych] & 32) { + goto yy11; + } + if (yych == '#') goto yy14; +yy13: + YYCURSOR = YYMARKER; + if (yyaccept <= 1) { + if (yyaccept == 0) { + goto yy7; + } else { + goto yy5; + } + } else { + goto yy19; + } +yy14: + ++YYCURSOR; + if ((YYLIMIT - YYCURSOR) < 5) YYFILL(5); + yych = (YYCTYPE)*YYCURSOR; + if (yych <= 0x1F) { + if (yych == '\t') goto yy14; + goto yy13; + } else { + if (yych <= ' ') goto yy14; + if (yych == 'l') goto yy21; + goto yy13; + } +yy16: + ++YYCURSOR; +#line 78 "../src/parse/lex.re" + { + if (opts->rFlag) + { + fatal("found standard 're2c' block while using -r flag"); + } + if (opts->target == opt_t::CODE) + { + const size_t lexeme_len = cur[-1] == '{' + ? sizeof ("%{") - 1 + : sizeof ("/*!re2c") - 1; + out.wraw(tok, tok_len () - lexeme_len); + } + tok = cur; + return Parse; + } +#line 226 "src/parse/lex.cc" +yy18: + yyaccept = 2; + yych = (YYCTYPE)*(YYMARKER = ++YYCURSOR); + if (yych == '\n') goto yy22; + if (yych == '\r') goto yy24; +yy19: +#line 168 "../src/parse/lex.re" + { + if (ignore_eoc) + { + if (ignore_cnt) + { + out.ws("\n").wline_info (cline, get_fname ().c_str ()); + } + ignore_eoc = false; + ignore_cnt = 0; + } + else if (opts->target == opt_t::CODE) + { + out.wraw(tok, tok_len ()); + } + tok = pos = cur; + goto echo; + } +#line 251 "src/parse/lex.cc" +yy20: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == '!') goto yy25; + goto yy13; +yy21: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'i') goto yy26; + goto yy13; +yy22: + ++YYCURSOR; +#line 150 "../src/parse/lex.re" + { + cline++; + if (ignore_eoc) + { + if (ignore_cnt) + { + out.wline_info (cline, get_fname ().c_str ()); + } + ignore_eoc = false; + ignore_cnt = 0; + } + else if (opts->target == opt_t::CODE) + { + out.wraw(tok, tok_len ()); + } + tok = pos = cur; + goto echo; + } +#line 281 "src/parse/lex.cc" +yy24: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == '\n') goto yy22; + goto yy13; +yy25: + yych = (YYCTYPE)*++YYCURSOR; + switch (yych) { + case 'g': goto yy27; + case 'i': goto yy28; + case 'm': goto yy29; + case 'r': goto yy30; + case 't': goto yy31; + case 'u': goto yy32; + default: goto yy13; + } +yy26: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'n') goto yy33; + goto yy13; +yy27: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'e') goto yy34; + goto yy13; +yy28: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'g') goto yy35; + goto yy13; +yy29: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'a') goto yy36; + goto yy13; +yy30: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'e') goto yy37; + if (yych == 'u') goto yy38; + goto yy13; +yy31: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'y') goto yy39; + goto yy13; +yy32: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 's') goto yy40; + goto yy13; +yy33: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'e') goto yy41; + goto yy13; +yy34: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 't') goto yy42; + goto yy13; +yy35: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'n') goto yy43; + goto yy13; +yy36: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'x') goto yy44; + goto yy13; +yy37: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == '2') goto yy45; + goto yy13; +yy38: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'l') goto yy46; + goto yy13; +yy39: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'p') goto yy47; + goto yy13; +yy40: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'e') goto yy48; + goto yy13; +yy41: + yych = (YYCTYPE)*++YYCURSOR; + if (yych <= '0') goto yy50; + if (yych <= '9') goto yy13; + goto yy50; +yy42: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 's') goto yy51; + goto yy13; +yy43: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'o') goto yy52; + goto yy13; +yy44: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == ':') goto yy53; + goto yy13; +yy45: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'c') goto yy16; + goto yy13; +yy46: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'e') goto yy54; + goto yy13; +yy47: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'e') goto yy55; + goto yy13; +yy48: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == ':') goto yy56; + goto yy13; +yy49: + ++YYCURSOR; + if (YYLIMIT <= YYCURSOR) YYFILL(1); + yych = (YYCTYPE)*YYCURSOR; + YYCTXMARKER = YYCURSOR; +yy50: + if (yych <= 0x1F) { + if (yych == '\t') goto yy49; + goto yy13; + } else { + if (yych <= ' ') goto yy49; + if (yych <= '0') goto yy13; + if (yych <= '9') goto yy57; + goto yy13; + } +yy51: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 't') goto yy59; + goto yy13; +yy52: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'r') goto yy60; + goto yy13; +yy53: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'r') goto yy61; + goto yy13; +yy54: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 's') goto yy62; + goto yy13; +yy55: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 's') goto yy63; + goto yy13; +yy56: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'r') goto yy64; + goto yy13; +yy57: + ++YYCURSOR; + if ((YYLIMIT - YYCURSOR) < 2) YYFILL(2); + yych = (YYCTYPE)*YYCURSOR; + if (yybm[0+yych] & 64) { + goto yy57; + } + if (yych <= '\f') { + if (yych <= 0x08) goto yy13; + if (yych <= '\t') goto yy65; + if (yych <= '\n') goto yy67; + goto yy13; + } else { + if (yych <= '\r') goto yy69; + if (yych == ' ') goto yy65; + goto yy13; + } +yy59: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'a') goto yy70; + goto yy13; +yy60: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'e') goto yy71; + goto yy13; +yy61: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'e') goto yy72; + goto yy13; +yy62: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == ':') goto yy73; + goto yy13; +yy63: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == ':') goto yy74; + goto yy13; +yy64: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'e') goto yy75; + goto yy13; +yy65: + ++YYCURSOR; + if (YYLIMIT <= YYCURSOR) YYFILL(1); + yych = (YYCTYPE)*YYCURSOR; + if (yych <= 0x1F) { + if (yych == '\t') goto yy65; + goto yy13; + } else { + if (yych <= ' ') goto yy65; + if (yych == '"') goto yy76; + goto yy13; + } +yy67: + ++YYCURSOR; + YYCURSOR = YYCTXMARKER; +#line 185 "../src/parse/lex.re" + { + set_sourceline (); + goto echo; + } +#line 491 "src/parse/lex.cc" +yy69: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == '\n') goto yy67; + goto yy13; +yy70: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 't') goto yy78; + goto yy13; +yy71: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == ':') goto yy79; + goto yy13; +yy72: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == '2') goto yy80; + goto yy13; +yy73: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'r') goto yy81; + goto yy13; +yy74: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'r') goto yy82; + goto yy13; +yy75: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == '2') goto yy83; + goto yy13; +yy76: + ++YYCURSOR; + if ((YYLIMIT - YYCURSOR) < 3) YYFILL(3); + yych = (YYCTYPE)*YYCURSOR; + if (yybm[0+yych] & 128) { + goto yy76; + } + if (yych <= '\n') goto yy13; + if (yych <= '"') goto yy84; + goto yy85; +yy78: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'e') goto yy86; + goto yy13; +yy79: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'r') goto yy87; + goto yy13; +yy80: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'c') goto yy88; + goto yy13; +yy81: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'e') goto yy90; + goto yy13; +yy82: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'e') goto yy91; + goto yy13; +yy83: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'c') goto yy92; + goto yy13; +yy84: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == '\n') goto yy67; + if (yych == '\r') goto yy69; + goto yy13; +yy85: + ++YYCURSOR; + if (YYLIMIT <= YYCURSOR) YYFILL(1); + yych = (YYCTYPE)*YYCURSOR; + if (yych == '\n') goto yy13; + goto yy76; +yy86: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == ':') goto yy94; + goto yy13; +yy87: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'e') goto yy95; + goto yy13; +yy88: + ++YYCURSOR; +#line 119 "../src/parse/lex.re" + { + if (opts->target != opt_t::DOT) + { + out.wdelay_yymaxfill (); + } + tok = pos = cur; + ignore_eoc = true; + goto echo; + } +#line 585 "src/parse/lex.cc" +yy90: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == '2') goto yy96; + goto yy13; +yy91: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == '2') goto yy97; + goto yy13; +yy92: + ++YYCURSOR; +#line 105 "../src/parse/lex.re" + { + if (!opts->rFlag) + { + fatal("found 'use:re2c' block without -r flag"); + } + reuse(); + if (opts->target == opt_t::CODE) + { + const size_t lexeme_len = sizeof ("/*!use:re2c") - 1; + out.wraw(tok, tok_len () - lexeme_len); + } + tok = cur; + return Reuse; + } +#line 611 "src/parse/lex.cc" +yy94: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'r') goto yy98; + goto yy13; +yy95: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == '2') goto yy99; + goto yy13; +yy96: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'c') goto yy100; + goto yy13; +yy97: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'c') goto yy102; + goto yy13; +yy98: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'e') goto yy104; + goto yy13; +yy99: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'c') goto yy105; + goto yy13; +yy100: + ++YYCURSOR; +#line 93 "../src/parse/lex.re" + { + if (opts->rFlag) + { + opts.reset_mapCodeName (); + } + else + { + fatal("found 'rules:re2c' block without -r flag"); + } + tok = cur; + return Rules; + } +#line 651 "src/parse/lex.cc" +yy102: + ++YYCURSOR; +#line 139 "../src/parse/lex.re" + { + tok = pos = cur; + ignore_eoc = true; + if (opts->target != opt_t::DOT) + { + out.wdelay_line_info ().ws("\n") + .wdelay_types ().ws("\n") + .wline_info (cline, get_fname ().c_str ()); + } + goto echo; + } +#line 666 "src/parse/lex.cc" +yy104: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == '2') goto yy107; + goto yy13; +yy105: + ++YYCURSOR; +#line 134 "../src/parse/lex.re" + { + tok = pos = cur; + ignore_eoc = true; + goto echo; + } +#line 679 "src/parse/lex.cc" +yy107: + yych = (YYCTYPE)*++YYCURSOR; + if (yych != 'c') goto yy13; + ++YYCURSOR; +#line 128 "../src/parse/lex.re" + { + tok = pos = cur; + out.wdelay_state_goto (opts->topIndent); + ignore_eoc = true; + goto echo; + } +#line 691 "src/parse/lex.cc" +} +#line 216 "../src/parse/lex.re" + +} + +int Scanner::scan() +{ + uint32_t depth; + +scan: + tchar = cur - pos; + tline = cline; + tok = cur; + switch (lexer_state) + { + case LEX_NORMAL: goto start; + case LEX_FLEX_NAME: goto flex_name; + } + +start: + +#line 713 "src/parse/lex.cc" +{ + YYCTYPE yych; + unsigned int yyaccept = 0; + static const unsigned char yybm[] = { + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 144, 0, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 144, 128, 0, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 224, 224, 224, 224, 224, 224, 224, 224, + 224, 224, 128, 128, 128, 128, 128, 128, + 128, 160, 160, 160, 160, 160, 160, 160, + 160, 160, 160, 160, 160, 160, 160, 160, + 160, 160, 160, 160, 160, 160, 160, 160, + 160, 160, 160, 128, 0, 128, 128, 160, + 128, 160, 160, 160, 160, 160, 160, 160, + 160, 160, 160, 160, 160, 160, 160, 160, + 160, 160, 160, 160, 160, 160, 160, 160, + 160, 160, 160, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + }; + if ((YYLIMIT - YYCURSOR) < 6) YYFILL(6); + yych = (YYCTYPE)*YYCURSOR; + if (yybm[0+yych] & 16) { + goto yy114; + } + if (yych <= '9') { + if (yych <= '&') { + if (yych <= '\r') { + if (yych <= 0x08) goto yy112; + if (yych <= '\n') goto yy117; + if (yych >= '\r') goto yy119; + } else { + if (yych <= '"') { + if (yych >= '"') goto yy120; + } else { + if (yych == '%') goto yy122; + } + } + } else { + if (yych <= '+') { + if (yych <= '\'') goto yy123; + if (yych <= ')') goto yy125; + if (yych <= '*') goto yy127; + goto yy129; + } else { + if (yych <= '-') { + if (yych <= ',') goto yy125; + } else { + if (yych <= '.') goto yy131; + if (yych <= '/') goto yy133; + goto yy134; + } + } + } + } else { + if (yych <= '\\') { + if (yych <= '>') { + if (yych <= ':') goto yy135; + if (yych == '<') goto yy136; + goto yy125; + } else { + if (yych <= '@') { + if (yych <= '?') goto yy129; + } else { + if (yych <= 'Z') goto yy134; + if (yych <= '[') goto yy137; + goto yy125; + } + } + } else { + if (yych <= 'q') { + if (yych == '_') goto yy134; + if (yych >= 'a') goto yy134; + } else { + if (yych <= 'z') { + if (yych <= 'r') goto yy139; + goto yy134; + } else { + if (yych <= '{') goto yy140; + if (yych <= '|') goto yy125; + } + } + } + } +yy112: + ++YYCURSOR; +yy113: +#line 388 "../src/parse/lex.re" + { + fatalf("unexpected character: '%c'", *tok); + goto scan; + } +#line 823 "src/parse/lex.cc" +yy114: + ++YYCURSOR; + if (YYLIMIT <= YYCURSOR) YYFILL(1); + yych = (YYCTYPE)*YYCURSOR; + if (yybm[0+yych] & 16) { + goto yy114; + } +#line 372 "../src/parse/lex.re" + { + goto scan; + } +#line 835 "src/parse/lex.cc" +yy117: + yyaccept = 0; + yych = (YYCTYPE)*(YYMARKER = ++YYCURSOR); + if (yych <= 0x1F) { + if (yych == '\t') goto yy142; + } else { + if (yych <= ' ') goto yy142; + if (yych == '#') goto yy145; + } +yy118: +#line 381 "../src/parse/lex.re" + { + if (cur == eof) return 0; + pos = cur; + cline++; + goto scan; + } +#line 853 "src/parse/lex.cc" +yy119: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == '\n') goto yy117; + goto yy113; +yy120: + ++YYCURSOR; +#line 265 "../src/parse/lex.re" + { yylval.regexp = lex_str('"', opts->bCaseInsensitive || opts->bCaseInverted); return TOKEN_REGEXP; } +#line 862 "src/parse/lex.cc" +yy122: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == '}') goto yy147; + goto yy113; +yy123: + ++YYCURSOR; +#line 264 "../src/parse/lex.re" + { yylval.regexp = lex_str('\'', opts->bCaseInsensitive || !opts->bCaseInverted); return TOKEN_REGEXP; } +#line 871 "src/parse/lex.cc" +yy125: + ++YYCURSOR; +yy126: +#line 275 "../src/parse/lex.re" + { + return *tok; + } +#line 879 "src/parse/lex.cc" +yy127: + ++YYCURSOR; + if ((yych = (YYCTYPE)*YYCURSOR) == '/') goto yy147; +#line 279 "../src/parse/lex.re" + { + yylval.op = *tok; + return TOKEN_STAR; + } +#line 888 "src/parse/lex.cc" +yy129: + ++YYCURSOR; +#line 283 "../src/parse/lex.re" + { + yylval.op = *tok; + return TOKEN_CLOSE; + } +#line 896 "src/parse/lex.cc" +yy131: + ++YYCURSOR; +#line 367 "../src/parse/lex.re" + { + yylval.regexp = mkDot(); + return TOKEN_REGEXP; + } +#line 904 "src/parse/lex.cc" +yy133: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == '*') goto yy149; + if (yych == '/') goto yy151; + goto yy126; +yy134: + yych = (YYCTYPE)*++YYCURSOR; + YYCTXMARKER = YYCURSOR; + goto yy159; +yy135: + yych = (YYCTYPE)*++YYCURSOR; + YYCTXMARKER = YYCURSOR; + if (yych == '=') goto yy160; + goto yy113; +yy136: + yyaccept = 1; + yych = (YYCTYPE)*(YYMARKER = ++YYCURSOR); + if (yych == '!') goto yy162; + if (yych == '>') goto yy164; + goto yy126; +yy137: + ++YYCURSOR; + if ((yych = (YYCTYPE)*YYCURSOR) == '^') goto yy165; +#line 266 "../src/parse/lex.re" + { yylval.regexp = lex_cls(false); return TOKEN_REGEXP; } +#line 930 "src/parse/lex.cc" +yy139: + yych = (YYCTYPE)*++YYCURSOR; + YYCTXMARKER = YYCURSOR; + if (yych == 'e') goto yy167; + goto yy159; +yy140: + yyaccept = 2; + yych = (YYCTYPE)*(YYMARKER = ++YYCURSOR); + if (yybm[0+yych] & 64) { + goto yy170; + } + if (yych <= 'Z') { + if (yych == ',') goto yy168; + if (yych >= 'A') goto yy172; + } else { + if (yych <= '_') { + if (yych >= '_') goto yy172; + } else { + if (yych <= '`') goto yy141; + if (yych <= 'z') goto yy172; + } + } +yy141: +#line 235 "../src/parse/lex.re" + { + depth = 1; + goto code; + } +#line 959 "src/parse/lex.cc" +yy142: + ++YYCURSOR; + if (YYLIMIT <= YYCURSOR) YYFILL(1); + yych = (YYCTYPE)*YYCURSOR; + if (yych <= 0x1F) { + if (yych == '\t') goto yy142; + } else { + if (yych <= ' ') goto yy142; + if (yych == '#') goto yy145; + } +yy144: + YYCURSOR = YYMARKER; + if (yyaccept <= 1) { + if (yyaccept == 0) { + goto yy118; + } else { + goto yy126; + } + } else { + if (yyaccept == 2) { + goto yy141; + } else { + goto yy169; + } + } +yy145: + ++YYCURSOR; + if ((YYLIMIT - YYCURSOR) < 5) YYFILL(5); + yych = (YYCTYPE)*YYCURSOR; + if (yych <= 0x1F) { + if (yych == '\t') goto yy145; + goto yy144; + } else { + if (yych <= ' ') goto yy145; + if (yych == 'l') goto yy174; + goto yy144; + } +yy147: + ++YYCURSOR; +#line 259 "../src/parse/lex.re" + { + tok = cur; + return 0; + } +#line 1004 "src/parse/lex.cc" +yy149: + ++YYCURSOR; +#line 253 "../src/parse/lex.re" + { + depth = 1; + goto comment; + } +#line 1012 "src/parse/lex.cc" +yy151: + ++YYCURSOR; +#line 250 "../src/parse/lex.re" + { + goto nextLine; + } +#line 1019 "src/parse/lex.cc" +yy153: + ++YYCURSOR; + YYCURSOR = YYCTXMARKER; +#line 351 "../src/parse/lex.re" + { + if (!opts->FFlag) { + yylval.str = new std::string (tok, tok_len()); + return TOKEN_ID; + } else { + RegExp *r = NULL; + const bool casing = opts->bCaseInsensitive || opts->bCaseInverted; + for (char *s = tok; s < cur; ++s) { + const uint32_t c = static_cast(*s); + r = doCat(r, casing ? ichr(c) : schr(c)); + } + yylval.regexp = r ? r : new NullOp; + return TOKEN_REGEXP; + } + } +#line 1039 "src/parse/lex.cc" +yy155: + yych = (YYCTYPE)*++YYCURSOR; + goto yy178; +yy156: + ++YYCURSOR; + YYCURSOR = YYCTXMARKER; +#line 346 "../src/parse/lex.re" + { + yylval.str = new std::string (tok, tok_len ()); + return TOKEN_ID; + } +#line 1051 "src/parse/lex.cc" +yy158: + ++YYCURSOR; + if ((YYLIMIT - YYCURSOR) < 2) YYFILL(2); + yych = (YYCTYPE)*YYCURSOR; + YYCTXMARKER = YYCURSOR; +yy159: + if (yybm[0+yych] & 32) { + goto yy158; + } + if (yych <= ' ') { + if (yych == '\t') goto yy155; + if (yych <= 0x1F) goto yy153; + goto yy155; + } else { + if (yych <= ',') { + if (yych <= '+') goto yy153; + goto yy156; + } else { + if (yych <= '<') goto yy153; + if (yych <= '>') goto yy156; + goto yy153; + } + } +yy160: + ++YYCURSOR; + if ((yych = (YYCTYPE)*YYCURSOR) == '>') goto yy179; +#line 244 "../src/parse/lex.re" + { + tok += 2; /* skip ":=" */ + depth = 0; + goto code; + } +#line 1084 "src/parse/lex.cc" +yy162: + ++YYCURSOR; +#line 272 "../src/parse/lex.re" + { + return TOKEN_SETUP; + } +#line 1091 "src/parse/lex.cc" +yy164: + yych = (YYCTYPE)*++YYCURSOR; + YYCTXMARKER = YYCURSOR; + goto yy182; +yy165: + ++YYCURSOR; +#line 267 "../src/parse/lex.re" + { yylval.regexp = lex_cls(true); return TOKEN_REGEXP; } +#line 1100 "src/parse/lex.cc" +yy167: + yych = (YYCTYPE)*++YYCURSOR; + YYCTXMARKER = YYCURSOR; + if (yych == '2') goto yy187; + goto yy159; +yy168: + ++YYCURSOR; +yy169: +#line 319 "../src/parse/lex.re" + { + fatal("illegal closure form, use '{n}', '{n,}', '{n,m}' where n and m are numbers"); + } +#line 1113 "src/parse/lex.cc" +yy170: + ++YYCURSOR; + if ((YYLIMIT - YYCURSOR) < 2) YYFILL(2); + yych = (YYCTYPE)*YYCURSOR; + if (yybm[0+yych] & 64) { + goto yy170; + } + if (yych <= '^') { + if (yych <= ',') { + if (yych <= '+') goto yy144; + goto yy188; + } else { + if (yych <= '@') goto yy144; + if (yych >= '[') goto yy144; + } + } else { + if (yych <= 'z') { + if (yych == '`') goto yy144; + } else { + if (yych == '}') goto yy189; + goto yy144; + } + } +yy172: + ++YYCURSOR; + if (YYLIMIT <= YYCURSOR) YYFILL(1); + yych = (YYCTYPE)*YYCURSOR; + if (yych <= '^') { + if (yych <= '9') { + if (yych <= '/') goto yy144; + goto yy172; + } else { + if (yych <= '@') goto yy144; + if (yych <= 'Z') goto yy172; + goto yy144; + } + } else { + if (yych <= 'z') { + if (yych == '`') goto yy144; + goto yy172; + } else { + if (yych == '}') goto yy191; + goto yy144; + } + } +yy174: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'i') goto yy193; + goto yy144; +yy175: + ++YYCURSOR; + YYCURSOR = YYCTXMARKER; +#line 333 "../src/parse/lex.re" + { + yylval.str = new std::string (tok, tok_len ()); + if (opts->FFlag) + { + lexer_state = LEX_FLEX_NAME; + return TOKEN_FID; + } + else + { + return TOKEN_ID; + } + } +#line 1179 "src/parse/lex.cc" +yy177: + ++YYCURSOR; + if (YYLIMIT <= YYCURSOR) YYFILL(1); + yych = (YYCTYPE)*YYCURSOR; +yy178: + if (yych <= ' ') { + if (yych == '\t') goto yy177; + if (yych <= 0x1F) goto yy175; + goto yy177; + } else { + if (yych <= ',') { + if (yych <= '+') goto yy175; + goto yy156; + } else { + if (yych <= '<') goto yy175; + if (yych <= '>') goto yy156; + goto yy175; + } + } +yy179: + ++YYCURSOR; + YYCURSOR = YYCTXMARKER; +#line 240 "../src/parse/lex.re" + { + return *tok; + } +#line 1206 "src/parse/lex.cc" +yy181: + ++YYCURSOR; + if ((YYLIMIT - YYCURSOR) < 2) YYFILL(2); + yych = (YYCTYPE)*YYCURSOR; +yy182: + if (yych <= '9') { + if (yych <= '\t') { + if (yych <= 0x08) goto yy144; + goto yy181; + } else { + if (yych == ' ') goto yy181; + goto yy144; + } + } else { + if (yych <= '=') { + if (yych <= ':') goto yy183; + if (yych <= '<') goto yy144; + goto yy184; + } else { + if (yych == '{') goto yy185; + goto yy144; + } + } +yy183: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == '=') goto yy185; + goto yy144; +yy184: + yych = (YYCTYPE)*++YYCURSOR; + if (yych != '>') goto yy144; +yy185: + ++YYCURSOR; + YYCURSOR = YYCTXMARKER; +#line 269 "../src/parse/lex.re" + { + return TOKEN_NOCOND; + } +#line 1244 "src/parse/lex.cc" +yy187: + yych = (YYCTYPE)*++YYCURSOR; + YYCTXMARKER = YYCURSOR; + if (yych == 'c') goto yy194; + goto yy159; +yy188: + yyaccept = 3; + yych = (YYCTYPE)*(YYMARKER = ++YYCURSOR); + if (yych <= '/') goto yy169; + if (yych <= '9') goto yy195; + if (yych == '}') goto yy197; + goto yy169; +yy189: + ++YYCURSOR; +#line 288 "../src/parse/lex.re" + { + if (!s_to_u32_unsafe (tok + 1, cur - 1, yylval.extop.min)) + { + fatal ("repetition count overflow"); + } + yylval.extop.max = yylval.extop.min; + return TOKEN_CLOSESIZE; + } +#line 1268 "src/parse/lex.cc" +yy191: + ++YYCURSOR; +#line 323 "../src/parse/lex.re" + { + if (!opts->FFlag) { + fatal("curly braces for names only allowed with -F switch"); + } + yylval.str = new std::string (tok + 1, tok_len () - 2); // -2 to omit braces + return TOKEN_ID; + } +#line 1279 "src/parse/lex.cc" +yy193: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'n') goto yy199; + goto yy144; +yy194: + yych = (YYCTYPE)*++YYCURSOR; + YYCTXMARKER = YYCURSOR; + if (yych == ':') goto yy200; + goto yy159; +yy195: + ++YYCURSOR; + if (YYLIMIT <= YYCURSOR) YYFILL(1); + yych = (YYCTYPE)*YYCURSOR; + if (yych <= '/') goto yy144; + if (yych <= '9') goto yy195; + if (yych == '}') goto yy202; + goto yy144; +yy197: + ++YYCURSOR; +#line 310 "../src/parse/lex.re" + { + if (!s_to_u32_unsafe (tok + 1, cur - 2, yylval.extop.min)) + { + fatal ("repetition lower bound overflow"); + } + yylval.extop.max = std::numeric_limits::max(); + return TOKEN_CLOSESIZE; + } +#line 1308 "src/parse/lex.cc" +yy199: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'e') goto yy204; + goto yy144; +yy200: + ++YYCURSOR; +#line 331 "../src/parse/lex.re" + { lex_conf (); return TOKEN_CONF; } +#line 1317 "src/parse/lex.cc" +yy202: + ++YYCURSOR; +#line 297 "../src/parse/lex.re" + { + const char * p = strchr (tok, ','); + if (!s_to_u32_unsafe (tok + 1, p, yylval.extop.min)) + { + fatal ("repetition lower bound overflow"); + } + if (!s_to_u32_unsafe (p + 1, cur - 1, yylval.extop.max)) + { + fatal ("repetition upper bound overflow"); + } + return TOKEN_CLOSESIZE; + } +#line 1333 "src/parse/lex.cc" +yy204: + yych = (YYCTYPE)*++YYCURSOR; + if (yych <= '0') goto yy206; + if (yych <= '9') goto yy144; + goto yy206; +yy205: + ++YYCURSOR; + if (YYLIMIT <= YYCURSOR) YYFILL(1); + yych = (YYCTYPE)*YYCURSOR; + YYCTXMARKER = YYCURSOR; +yy206: + if (yych <= 0x1F) { + if (yych == '\t') goto yy205; + goto yy144; + } else { + if (yych <= ' ') goto yy205; + if (yych <= '0') goto yy144; + if (yych >= ':') goto yy144; + } +yy207: + ++YYCURSOR; + if ((YYLIMIT - YYCURSOR) < 2) YYFILL(2); + yych = (YYCTYPE)*YYCURSOR; + if (yych <= '\r') { + if (yych <= '\t') { + if (yych <= 0x08) goto yy144; + } else { + if (yych <= '\n') goto yy211; + if (yych <= '\f') goto yy144; + goto yy213; + } + } else { + if (yych <= ' ') { + if (yych <= 0x1F) goto yy144; + } else { + if (yych <= '/') goto yy144; + if (yych <= '9') goto yy207; + goto yy144; + } + } +yy209: + ++YYCURSOR; + if (YYLIMIT <= YYCURSOR) YYFILL(1); + yych = (YYCTYPE)*YYCURSOR; + if (yych <= 0x1F) { + if (yych == '\t') goto yy209; + goto yy144; + } else { + if (yych <= ' ') goto yy209; + if (yych == '"') goto yy214; + goto yy144; + } +yy211: + ++YYCURSOR; + YYCURSOR = YYCTXMARKER; +#line 376 "../src/parse/lex.re" + { + set_sourceline (); + goto scan; + } +#line 1394 "src/parse/lex.cc" +yy213: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == '\n') goto yy211; + goto yy144; +yy214: + ++YYCURSOR; + if ((YYLIMIT - YYCURSOR) < 3) YYFILL(3); + yych = (YYCTYPE)*YYCURSOR; + if (yybm[0+yych] & 128) { + goto yy214; + } + if (yych <= '\n') goto yy144; + if (yych >= '#') goto yy217; + yych = (YYCTYPE)*++YYCURSOR; + if (yych == '\n') goto yy211; + if (yych == '\r') goto yy213; + goto yy144; +yy217: + ++YYCURSOR; + if (YYLIMIT <= YYCURSOR) YYFILL(1); + yych = (YYCTYPE)*YYCURSOR; + if (yych == '\n') goto yy144; + goto yy214; +} +#line 392 "../src/parse/lex.re" + + +flex_name: + +#line 1424 "src/parse/lex.cc" +{ + YYCTYPE yych; + if ((YYLIMIT - YYCURSOR) < 2) YYFILL(2); + yych = (YYCTYPE)*YYCURSOR; + if (yych == '\n') goto yy222; + if (yych == '\r') goto yy224; + ++YYCURSOR; +yy221: +#line 403 "../src/parse/lex.re" + { + YYCURSOR = tok; + goto start; + } +#line 1438 "src/parse/lex.cc" +yy222: + ++YYCURSOR; +#line 397 "../src/parse/lex.re" + { + YYCURSOR = tok; + lexer_state = LEX_NORMAL; + return TOKEN_FID_END; + } +#line 1447 "src/parse/lex.cc" +yy224: + ++YYCURSOR; + if ((yych = (YYCTYPE)*YYCURSOR) == '\n') goto yy222; + goto yy221; +} +#line 407 "../src/parse/lex.re" + + +code: + +#line 1458 "src/parse/lex.cc" +{ + YYCTYPE yych; + unsigned int yyaccept = 0; + static const unsigned char yybm[] = { + 96, 96, 96, 96, 96, 96, 96, 96, + 96, 112, 0, 96, 96, 96, 96, 96, + 96, 96, 96, 96, 96, 96, 96, 96, + 96, 96, 96, 96, 96, 96, 96, 96, + 112, 96, 64, 96, 96, 96, 96, 32, + 96, 96, 96, 96, 96, 96, 96, 96, + 224, 224, 224, 224, 224, 224, 224, 224, + 224, 224, 96, 96, 96, 96, 96, 96, + 96, 96, 96, 96, 96, 96, 96, 96, + 96, 96, 96, 96, 96, 96, 96, 96, + 96, 96, 96, 96, 96, 96, 96, 96, + 96, 96, 96, 96, 0, 96, 96, 96, + 96, 96, 96, 96, 96, 96, 96, 96, + 96, 96, 96, 96, 96, 96, 96, 96, + 96, 96, 96, 96, 96, 96, 96, 96, + 96, 96, 96, 96, 96, 96, 96, 96, + 96, 96, 96, 96, 96, 96, 96, 96, + 96, 96, 96, 96, 96, 96, 96, 96, + 96, 96, 96, 96, 96, 96, 96, 96, + 96, 96, 96, 96, 96, 96, 96, 96, + 96, 96, 96, 96, 96, 96, 96, 96, + 96, 96, 96, 96, 96, 96, 96, 96, + 96, 96, 96, 96, 96, 96, 96, 96, + 96, 96, 96, 96, 96, 96, 96, 96, + 96, 96, 96, 96, 96, 96, 96, 96, + 96, 96, 96, 96, 96, 96, 96, 96, + 96, 96, 96, 96, 96, 96, 96, 96, + 96, 96, 96, 96, 96, 96, 96, 96, + 96, 96, 96, 96, 96, 96, 96, 96, + 96, 96, 96, 96, 96, 96, 96, 96, + 96, 96, 96, 96, 96, 96, 96, 96, + 96, 96, 96, 96, 96, 96, 96, 96, + }; + if ((YYLIMIT - YYCURSOR) < 3) YYFILL(3); + yych = (YYCTYPE)*YYCURSOR; + if (yych <= '&') { + if (yych <= '\n') { + if (yych <= 0x00) goto yy227; + if (yych <= '\t') goto yy229; + goto yy231; + } else { + if (yych == '"') goto yy233; + goto yy229; + } + } else { + if (yych <= '{') { + if (yych <= '\'') goto yy234; + if (yych <= 'z') goto yy229; + goto yy235; + } else { + if (yych == '}') goto yy237; + goto yy229; + } + } +yy227: + ++YYCURSOR; +#line 470 "../src/parse/lex.re" + { + if (cur == eof) + { + if (depth) + { + fatal("missing '}'"); + } + return 0; + } + goto code; + } +#line 1531 "src/parse/lex.cc" +yy229: + ++YYCURSOR; +yy230: +#line 484 "../src/parse/lex.re" + { + goto code; + } +#line 1539 "src/parse/lex.cc" +yy231: + yyaccept = 0; + yych = (YYCTYPE)*(YYMARKER = ++YYCURSOR); + YYCTXMARKER = YYCURSOR; + if (yych <= '\r') { + if (yych <= '\t') { + if (yych >= '\t') goto yy239; + } else { + if (yych <= '\n') goto yy241; + if (yych >= '\r') goto yy241; + } + } else { + if (yych <= ' ') { + if (yych >= ' ') goto yy239; + } else { + if (yych == '#') goto yy242; + } + } +yy232: +#line 451 "../src/parse/lex.re" + { + if (depth == 0) + { + tok += strspn(tok, " \t\r\n"); + while (cur > tok && strchr(" \t\r\n", cur[-1])) + { + --cur; + } + yylval.code = new Code (tok, tok_len (), get_fname (), tline); + return TOKEN_CODE; + } + else if (cur == eof) + { + fatal("missing '}'"); + } + pos = cur; + cline++; + goto code; + } +#line 1579 "src/parse/lex.cc" +yy233: + yyaccept = 1; + yych = (YYCTYPE)*(YYMARKER = ++YYCURSOR); + if (yych == '\n') goto yy230; + goto yy246; +yy234: + yyaccept = 1; + yych = (YYCTYPE)*(YYMARKER = ++YYCURSOR); + if (yych == '\n') goto yy230; + goto yy251; +yy235: + ++YYCURSOR; +#line 423 "../src/parse/lex.re" + { + if (depth == 0) + { + fatal("Curly braces are not allowed after ':='"); + } + else + { + ++depth; + } + goto code; + } +#line 1604 "src/parse/lex.cc" +yy237: + ++YYCURSOR; +#line 411 "../src/parse/lex.re" + { + if (depth == 0) + { + fatal("Curly braces are not allowed after ':='"); + } + else if (--depth == 0) + { + yylval.code = new Code (tok, tok_len (), get_fname (), tline); + return TOKEN_CODE; + } + goto code; + } +#line 1620 "src/parse/lex.cc" +yy239: + yyaccept = 2; + yych = (YYCTYPE)*(YYMARKER = ++YYCURSOR); + if (yych <= 0x1F) { + if (yych == '\t') goto yy253; + } else { + if (yych <= ' ') goto yy253; + if (yych == '#') goto yy242; + } +yy240: + YYCURSOR = YYCTXMARKER; +#line 438 "../src/parse/lex.re" + { + if (depth == 0) + { + goto code; + } + else if (cur == eof) + { + fatal("missing '}'"); + } + pos = cur; + cline++; + goto code; + } +#line 1646 "src/parse/lex.cc" +yy241: + yych = (YYCTYPE)*++YYCURSOR; + goto yy240; +yy242: + ++YYCURSOR; + if ((YYLIMIT - YYCURSOR) < 5) YYFILL(5); + yych = (YYCTYPE)*YYCURSOR; + if (yybm[0+yych] & 16) { + goto yy242; + } + if (yych == 'l') goto yy255; +yy244: + YYCURSOR = YYMARKER; + if (yyaccept <= 1) { + if (yyaccept == 0) { + goto yy232; + } else { + goto yy230; + } + } else { + goto yy240; + } +yy245: + ++YYCURSOR; + if (YYLIMIT <= YYCURSOR) YYFILL(1); + yych = (YYCTYPE)*YYCURSOR; +yy246: + if (yybm[0+yych] & 32) { + goto yy245; + } + if (yych <= '\n') goto yy244; + if (yych >= '#') goto yy249; +yy247: + ++YYCURSOR; +#line 481 "../src/parse/lex.re" + { + goto code; + } +#line 1685 "src/parse/lex.cc" +yy249: + ++YYCURSOR; + if (YYLIMIT <= YYCURSOR) YYFILL(1); + yych = (YYCTYPE)*YYCURSOR; + if (yych == '\n') goto yy244; + goto yy245; +yy250: + ++YYCURSOR; + if (YYLIMIT <= YYCURSOR) YYFILL(1); + yych = (YYCTYPE)*YYCURSOR; +yy251: + if (yybm[0+yych] & 64) { + goto yy250; + } + if (yych <= '\n') goto yy244; + if (yych <= '\'') goto yy247; + ++YYCURSOR; + if (YYLIMIT <= YYCURSOR) YYFILL(1); + yych = (YYCTYPE)*YYCURSOR; + if (yych == '\n') goto yy244; + goto yy250; +yy253: + ++YYCURSOR; + if (YYLIMIT <= YYCURSOR) YYFILL(1); + yych = (YYCTYPE)*YYCURSOR; + if (yych <= 0x1F) { + if (yych == '\t') goto yy253; + goto yy244; + } else { + if (yych <= ' ') goto yy253; + if (yych == '#') goto yy242; + goto yy244; + } +yy255: + yych = (YYCTYPE)*++YYCURSOR; + if (yych != 'i') goto yy244; + yych = (YYCTYPE)*++YYCURSOR; + if (yych != 'n') goto yy244; + yych = (YYCTYPE)*++YYCURSOR; + if (yych != 'e') goto yy244; + yych = (YYCTYPE)*++YYCURSOR; + if (yych <= '0') goto yy260; + if (yych <= '9') goto yy244; + goto yy260; +yy259: + ++YYCURSOR; + if (YYLIMIT <= YYCURSOR) YYFILL(1); + yych = (YYCTYPE)*YYCURSOR; + YYCTXMARKER = YYCURSOR; +yy260: + if (yych <= 0x1F) { + if (yych == '\t') goto yy259; + goto yy244; + } else { + if (yych <= ' ') goto yy259; + if (yych <= '0') goto yy244; + if (yych >= ':') goto yy244; + } +yy261: + ++YYCURSOR; + if ((YYLIMIT - YYCURSOR) < 2) YYFILL(2); + yych = (YYCTYPE)*YYCURSOR; + if (yybm[0+yych] & 128) { + goto yy261; + } + if (yych <= '\f') { + if (yych <= 0x08) goto yy244; + if (yych <= '\t') goto yy263; + if (yych <= '\n') goto yy265; + goto yy244; + } else { + if (yych <= '\r') goto yy267; + if (yych != ' ') goto yy244; + } +yy263: + ++YYCURSOR; + if (YYLIMIT <= YYCURSOR) YYFILL(1); + yych = (YYCTYPE)*YYCURSOR; + if (yych <= 0x1F) { + if (yych == '\t') goto yy263; + goto yy244; + } else { + if (yych <= ' ') goto yy263; + if (yych == '"') goto yy268; + goto yy244; + } +yy265: + ++YYCURSOR; + YYCURSOR = YYCTXMARKER; +#line 434 "../src/parse/lex.re" + { + set_sourceline (); + goto code; + } +#line 1780 "src/parse/lex.cc" +yy267: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == '\n') goto yy265; + goto yy244; +yy268: + ++YYCURSOR; + if ((YYLIMIT - YYCURSOR) < 3) YYFILL(3); + yych = (YYCTYPE)*YYCURSOR; + if (yych <= '!') { + if (yych == '\n') goto yy244; + goto yy268; + } else { + if (yych <= '"') goto yy270; + if (yych == '\\') goto yy271; + goto yy268; + } +yy270: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == '\n') goto yy265; + if (yych == '\r') goto yy267; + goto yy244; +yy271: + ++YYCURSOR; + if (YYLIMIT <= YYCURSOR) YYFILL(1); + yych = (YYCTYPE)*YYCURSOR; + if (yych == '\n') goto yy244; + goto yy268; +} +#line 487 "../src/parse/lex.re" + + +comment: + +#line 1814 "src/parse/lex.cc" +{ + YYCTYPE yych; + static const unsigned char yybm[] = { + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 160, 0, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 160, 128, 0, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 192, 192, 192, 192, 192, 192, 192, 192, + 192, 192, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 0, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + }; + if ((YYLIMIT - YYCURSOR) < 2) YYFILL(2); + yych = (YYCTYPE)*YYCURSOR; + if (yych <= ')') { + if (yych == '\n') goto yy276; + } else { + if (yych <= '*') goto yy278; + if (yych == '/') goto yy279; + } + ++YYCURSOR; +yy275: +#line 519 "../src/parse/lex.re" + { + if (cur == eof) + { + return 0; + } + goto comment; + } +#line 1869 "src/parse/lex.cc" +yy276: + yych = (YYCTYPE)*(YYMARKER = ++YYCURSOR); + if (yybm[0+yych] & 32) { + goto yy280; + } + if (yych == '#') goto yy283; +yy277: +#line 510 "../src/parse/lex.re" + { + if (cur == eof) + { + return 0; + } + tok = pos = cur; + cline++; + goto comment; + } +#line 1887 "src/parse/lex.cc" +yy278: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == '/') goto yy285; + goto yy275; +yy279: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == '*') goto yy287; + goto yy275; +yy280: + ++YYCURSOR; + if (YYLIMIT <= YYCURSOR) YYFILL(1); + yych = (YYCTYPE)*YYCURSOR; + if (yybm[0+yych] & 32) { + goto yy280; + } + if (yych == '#') goto yy283; +yy282: + YYCURSOR = YYMARKER; + goto yy277; +yy283: + ++YYCURSOR; + if ((YYLIMIT - YYCURSOR) < 5) YYFILL(5); + yych = (YYCTYPE)*YYCURSOR; + if (yych <= 0x1F) { + if (yych == '\t') goto yy283; + goto yy282; + } else { + if (yych <= ' ') goto yy283; + if (yych == 'l') goto yy289; + goto yy282; + } +yy285: + ++YYCURSOR; +#line 491 "../src/parse/lex.re" + { + if (--depth == 0) + { + goto scan; + } + else + { + goto comment; + } + } +#line 1932 "src/parse/lex.cc" +yy287: + ++YYCURSOR; +#line 501 "../src/parse/lex.re" + { + ++depth; + fatal("ambiguous /* found"); + goto comment; + } +#line 1941 "src/parse/lex.cc" +yy289: + yych = (YYCTYPE)*++YYCURSOR; + if (yych != 'i') goto yy282; + yych = (YYCTYPE)*++YYCURSOR; + if (yych != 'n') goto yy282; + yych = (YYCTYPE)*++YYCURSOR; + if (yych != 'e') goto yy282; + yych = (YYCTYPE)*++YYCURSOR; + if (yych <= '0') goto yy294; + if (yych <= '9') goto yy282; + goto yy294; +yy293: + ++YYCURSOR; + if (YYLIMIT <= YYCURSOR) YYFILL(1); + yych = (YYCTYPE)*YYCURSOR; + YYCTXMARKER = YYCURSOR; +yy294: + if (yych <= 0x1F) { + if (yych == '\t') goto yy293; + goto yy282; + } else { + if (yych <= ' ') goto yy293; + if (yych <= '0') goto yy282; + if (yych >= ':') goto yy282; + } +yy295: + ++YYCURSOR; + if ((YYLIMIT - YYCURSOR) < 2) YYFILL(2); + yych = (YYCTYPE)*YYCURSOR; + if (yybm[0+yych] & 64) { + goto yy295; + } + if (yych <= '\f') { + if (yych <= 0x08) goto yy282; + if (yych <= '\t') goto yy297; + if (yych <= '\n') goto yy299; + goto yy282; + } else { + if (yych <= '\r') goto yy301; + if (yych != ' ') goto yy282; + } +yy297: + ++YYCURSOR; + if (YYLIMIT <= YYCURSOR) YYFILL(1); + yych = (YYCTYPE)*YYCURSOR; + if (yych <= 0x1F) { + if (yych == '\t') goto yy297; + goto yy282; + } else { + if (yych <= ' ') goto yy297; + if (yych == '"') goto yy302; + goto yy282; + } +yy299: + ++YYCURSOR; + YYCURSOR = YYCTXMARKER; +#line 506 "../src/parse/lex.re" + { + set_sourceline (); + goto comment; + } +#line 2003 "src/parse/lex.cc" +yy301: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == '\n') goto yy299; + goto yy282; +yy302: + ++YYCURSOR; + if ((YYLIMIT - YYCURSOR) < 3) YYFILL(3); + yych = (YYCTYPE)*YYCURSOR; + if (yybm[0+yych] & 128) { + goto yy302; + } + if (yych <= '\n') goto yy282; + if (yych >= '#') goto yy305; + yych = (YYCTYPE)*++YYCURSOR; + if (yych == '\n') goto yy299; + if (yych == '\r') goto yy301; + goto yy282; +yy305: + ++YYCURSOR; + if (YYLIMIT <= YYCURSOR) YYFILL(1); + yych = (YYCTYPE)*YYCURSOR; + if (yych == '\n') goto yy282; + goto yy302; +} +#line 526 "../src/parse/lex.re" + + +nextLine: + +#line 2033 "src/parse/lex.cc" +{ + YYCTYPE yych; + if (YYLIMIT <= YYCURSOR) YYFILL(1); + yych = (YYCTYPE)*YYCURSOR; + if (yych == '\n') goto yy310; + ++YYCURSOR; +#line 537 "../src/parse/lex.re" + { if(cur == eof) { + return 0; + } + goto nextLine; + } +#line 2046 "src/parse/lex.cc" +yy310: + ++YYCURSOR; +#line 530 "../src/parse/lex.re" + { if(cur == eof) { + return 0; + } + tok = pos = cur; + cline++; + goto scan; + } +#line 2057 "src/parse/lex.cc" +} +#line 542 "../src/parse/lex.re" + +} + +static void escape (std::string & dest, const std::string & src) +{ + dest = src; + size_t l = dest.length(); + for (size_t p = 0; p < l; ++p) + { + if (dest[p] == '\\') + { + dest.insert(++p, "\\"); + ++l; + } + } +} + +RegExp *Scanner::lex_cls(bool neg) +{ + Range *r = NULL, *s; + uint32_t u, l; +fst: + +#line 2083 "src/parse/lex.cc" +{ + YYCTYPE yych; + if (YYLIMIT <= YYCURSOR) YYFILL(1); + yych = (YYCTYPE)*YYCURSOR; + if (yych == ']') goto yy315; +#line 566 "../src/parse/lex.re" + { l = lex_cls_chr(); goto snd; } +#line 2091 "src/parse/lex.cc" +yy315: + ++YYCURSOR; +#line 565 "../src/parse/lex.re" + { goto end; } +#line 2096 "src/parse/lex.cc" +} +#line 567 "../src/parse/lex.re" + +snd: + +#line 2102 "src/parse/lex.cc" +{ + YYCTYPE yych; + if ((YYLIMIT - YYCURSOR) < 2) YYFILL(2); + yych = (YYCTYPE)*(YYMARKER = YYCURSOR); + if (yych == '-') goto yy320; +yy319: +#line 570 "../src/parse/lex.re" + { u = l; goto add; } +#line 2111 "src/parse/lex.cc" +yy320: + yych = (YYCTYPE)*++YYCURSOR; + YYCTXMARKER = YYCURSOR; + if (yych != ']') goto yy322; + YYCURSOR = YYMARKER; + goto yy319; +yy322: + ++YYCURSOR; + YYCURSOR = YYCTXMARKER; +#line 571 "../src/parse/lex.re" + { + u = lex_cls_chr(); + if (l > u) { + warn.swapped_range(get_line(), l, u); + std::swap(l, u); + } + goto add; + } +#line 2130 "src/parse/lex.cc" +} +#line 579 "../src/parse/lex.re" + +add: + if (!(s = opts->encoding.encodeRange(l, u))) { + fatalf ("Bad code point range: '0x%X - 0x%X'", l, u); + } + r = Range::add(r, s); + goto fst; +end: + if (neg) { + r = Range::sub(opts->encoding.fullRange(), r); + } + return cls(r); +} + +uint32_t Scanner::lex_cls_chr() +{ + tok = cur; + +#line 2151 "src/parse/lex.cc" +{ + YYCTYPE yych; + unsigned int yyaccept = 0; + if ((YYLIMIT - YYCURSOR) < 10) YYFILL(10); + yych = (YYCTYPE)*YYCURSOR; + if (yych == '\n') goto yy328; + if (yych == '\\') goto yy330; + ++YYCURSOR; +#line 602 "../src/parse/lex.re" + { return static_cast(tok[0]); } +#line 2162 "src/parse/lex.cc" +yy328: + ++YYCURSOR; +#line 597 "../src/parse/lex.re" + { fatal ((tok - pos) - tchar, "syntax error"); } +#line 2167 "src/parse/lex.cc" +yy330: + ++YYCURSOR; + if ((yych = (YYCTYPE)*YYCURSOR) <= '`') { + if (yych <= '7') { + if (yych <= ',') { + if (yych != '\n') goto yy332; + } else { + if (yych <= '-') goto yy334; + if (yych <= '/') goto yy332; + if (yych <= '3') goto yy336; + goto yy338; + } + } else { + if (yych <= 'X') { + if (yych == 'U') goto yy339; + if (yych <= 'W') goto yy332; + goto yy341; + } else { + if (yych <= '[') goto yy332; + if (yych <= '\\') goto yy342; + if (yych <= ']') goto yy344; + goto yy332; + } + } + } else { + if (yych <= 'q') { + if (yych <= 'e') { + if (yych <= 'a') goto yy346; + if (yych <= 'b') goto yy348; + goto yy332; + } else { + if (yych <= 'f') goto yy350; + if (yych == 'n') goto yy352; + goto yy332; + } + } else { + if (yych <= 'u') { + if (yych <= 'r') goto yy354; + if (yych <= 's') goto yy332; + if (yych <= 't') goto yy356; + goto yy341; + } else { + if (yych <= 'v') goto yy358; + if (yych == 'x') goto yy360; + goto yy332; + } + } + } +#line 600 "../src/parse/lex.re" + { fatal ((tok - pos) - tchar, "syntax error in escape sequence"); } +#line 2218 "src/parse/lex.cc" +yy332: + ++YYCURSOR; +#line 615 "../src/parse/lex.re" + { + warn.useless_escape(tline, tok - pos, tok[1]); + return static_cast(tok[1]); + } +#line 2226 "src/parse/lex.cc" +yy334: + ++YYCURSOR; +#line 613 "../src/parse/lex.re" + { return static_cast('-'); } +#line 2231 "src/parse/lex.cc" +yy336: + yyaccept = 0; + yych = (YYCTYPE)*(YYMARKER = ++YYCURSOR); + if (yych <= '/') goto yy337; + if (yych <= '7') goto yy361; +yy337: +#line 599 "../src/parse/lex.re" + { fatal ((tok - pos) - tchar, "syntax error in octal escape sequence"); } +#line 2240 "src/parse/lex.cc" +yy338: + yych = (YYCTYPE)*++YYCURSOR; + goto yy337; +yy339: + yyaccept = 1; + yych = (YYCTYPE)*(YYMARKER = ++YYCURSOR); + if (yych <= '@') { + if (yych <= '/') goto yy340; + if (yych <= '9') goto yy363; + } else { + if (yych <= 'F') goto yy363; + if (yych <= '`') goto yy340; + if (yych <= 'f') goto yy363; + } +yy340: +#line 598 "../src/parse/lex.re" + { fatal ((tok - pos) - tchar, "syntax error in hexadecimal escape sequence"); } +#line 2258 "src/parse/lex.cc" +yy341: + yyaccept = 1; + yych = (YYCTYPE)*(YYMARKER = ++YYCURSOR); + if (yych <= '@') { + if (yych <= '/') goto yy340; + if (yych <= '9') goto yy364; + goto yy340; + } else { + if (yych <= 'F') goto yy364; + if (yych <= '`') goto yy340; + if (yych <= 'f') goto yy364; + goto yy340; + } +yy342: + ++YYCURSOR; +#line 612 "../src/parse/lex.re" + { return static_cast('\\'); } +#line 2276 "src/parse/lex.cc" +yy344: + ++YYCURSOR; +#line 614 "../src/parse/lex.re" + { return static_cast(']'); } +#line 2281 "src/parse/lex.cc" +yy346: + ++YYCURSOR; +#line 605 "../src/parse/lex.re" + { return static_cast('\a'); } +#line 2286 "src/parse/lex.cc" +yy348: + ++YYCURSOR; +#line 606 "../src/parse/lex.re" + { return static_cast('\b'); } +#line 2291 "src/parse/lex.cc" +yy350: + ++YYCURSOR; +#line 607 "../src/parse/lex.re" + { return static_cast('\f'); } +#line 2296 "src/parse/lex.cc" +yy352: + ++YYCURSOR; +#line 608 "../src/parse/lex.re" + { return static_cast('\n'); } +#line 2301 "src/parse/lex.cc" +yy354: + ++YYCURSOR; +#line 609 "../src/parse/lex.re" + { return static_cast('\r'); } +#line 2306 "src/parse/lex.cc" +yy356: + ++YYCURSOR; +#line 610 "../src/parse/lex.re" + { return static_cast('\t'); } +#line 2311 "src/parse/lex.cc" +yy358: + ++YYCURSOR; +#line 611 "../src/parse/lex.re" + { return static_cast('\v'); } +#line 2316 "src/parse/lex.cc" +yy360: + yyaccept = 1; + yych = (YYCTYPE)*(YYMARKER = ++YYCURSOR); + if (yych <= '@') { + if (yych <= '/') goto yy340; + if (yych <= '9') goto yy365; + goto yy340; + } else { + if (yych <= 'F') goto yy365; + if (yych <= '`') goto yy340; + if (yych <= 'f') goto yy365; + goto yy340; + } +yy361: + yych = (YYCTYPE)*++YYCURSOR; + if (yych <= '/') goto yy362; + if (yych <= '7') goto yy366; +yy362: + YYCURSOR = YYMARKER; + if (yyaccept == 0) { + goto yy337; + } else { + goto yy340; + } +yy363: + yych = (YYCTYPE)*++YYCURSOR; + if (yych <= '@') { + if (yych <= '/') goto yy362; + if (yych <= '9') goto yy368; + goto yy362; + } else { + if (yych <= 'F') goto yy368; + if (yych <= '`') goto yy362; + if (yych <= 'f') goto yy368; + goto yy362; + } +yy364: + yych = (YYCTYPE)*++YYCURSOR; + if (yych <= '@') { + if (yych <= '/') goto yy362; + if (yych <= '9') goto yy369; + goto yy362; + } else { + if (yych <= 'F') goto yy369; + if (yych <= '`') goto yy362; + if (yych <= 'f') goto yy369; + goto yy362; + } +yy365: + yych = (YYCTYPE)*++YYCURSOR; + if (yych <= '@') { + if (yych <= '/') goto yy362; + if (yych <= '9') goto yy370; + goto yy362; + } else { + if (yych <= 'F') goto yy370; + if (yych <= '`') goto yy362; + if (yych <= 'f') goto yy370; + goto yy362; + } +yy366: + ++YYCURSOR; +#line 604 "../src/parse/lex.re" + { return unesc_oct(tok, cur); } +#line 2381 "src/parse/lex.cc" +yy368: + yych = (YYCTYPE)*++YYCURSOR; + if (yych <= '@') { + if (yych <= '/') goto yy362; + if (yych <= '9') goto yy372; + goto yy362; + } else { + if (yych <= 'F') goto yy372; + if (yych <= '`') goto yy362; + if (yych <= 'f') goto yy372; + goto yy362; + } +yy369: + yych = (YYCTYPE)*++YYCURSOR; + if (yych <= '@') { + if (yych <= '/') goto yy362; + if (yych <= '9') goto yy365; + goto yy362; + } else { + if (yych <= 'F') goto yy365; + if (yych <= '`') goto yy362; + if (yych <= 'f') goto yy365; + goto yy362; + } +yy370: + ++YYCURSOR; +#line 603 "../src/parse/lex.re" + { return unesc_hex(tok, cur); } +#line 2410 "src/parse/lex.cc" +yy372: + yych = (YYCTYPE)*++YYCURSOR; + if (yych <= '@') { + if (yych <= '/') goto yy362; + if (yych >= ':') goto yy362; + } else { + if (yych <= 'F') goto yy373; + if (yych <= '`') goto yy362; + if (yych >= 'g') goto yy362; + } +yy373: + ++YYCURSOR; + if ((yych = (YYCTYPE)*YYCURSOR) <= '@') { + if (yych <= '/') goto yy362; + if (yych <= '9') goto yy364; + goto yy362; + } else { + if (yych <= 'F') goto yy364; + if (yych <= '`') goto yy362; + if (yych <= 'f') goto yy364; + goto yy362; + } +} +#line 619 "../src/parse/lex.re" + +} + +uint32_t Scanner::lex_str_chr(char quote, bool &end) +{ + end = false; + tok = cur; + +#line 2443 "src/parse/lex.cc" +{ + YYCTYPE yych; + unsigned int yyaccept = 0; + if ((YYLIMIT - YYCURSOR) < 10) YYFILL(10); + yych = (YYCTYPE)*YYCURSOR; + if (yych == '\n') goto yy378; + if (yych == '\\') goto yy380; + ++YYCURSOR; +#line 632 "../src/parse/lex.re" + { + end = tok[0] == quote; + return static_cast(tok[0]); + } +#line 2457 "src/parse/lex.cc" +yy378: + ++YYCURSOR; +#line 627 "../src/parse/lex.re" + { fatal ((tok - pos) - tchar, "syntax error"); } +#line 2462 "src/parse/lex.cc" +yy380: + ++YYCURSOR; + if ((yych = (YYCTYPE)*YYCURSOR) <= 'a') { + if (yych <= 'T') { + if (yych <= '/') { + if (yych != '\n') goto yy382; + } else { + if (yych <= '3') goto yy384; + if (yych <= '7') goto yy386; + goto yy382; + } + } else { + if (yych <= 'X') { + if (yych <= 'U') goto yy387; + if (yych <= 'W') goto yy382; + goto yy389; + } else { + if (yych == '\\') goto yy390; + if (yych <= '`') goto yy382; + goto yy392; + } + } + } else { + if (yych <= 'r') { + if (yych <= 'f') { + if (yych <= 'b') goto yy394; + if (yych <= 'e') goto yy382; + goto yy396; + } else { + if (yych == 'n') goto yy398; + if (yych <= 'q') goto yy382; + goto yy400; + } + } else { + if (yych <= 'u') { + if (yych <= 's') goto yy382; + if (yych <= 't') goto yy402; + goto yy389; + } else { + if (yych <= 'v') goto yy404; + if (yych == 'x') goto yy406; + goto yy382; + } + } + } +#line 630 "../src/parse/lex.re" + { fatal ((tok - pos) - tchar, "syntax error in escape sequence"); } +#line 2510 "src/parse/lex.cc" +yy382: + ++YYCURSOR; +#line 646 "../src/parse/lex.re" + { + if (tok[1] != quote) { + warn.useless_escape(tline, tok - pos, tok[1]); + } + return static_cast(tok[1]); + } +#line 2520 "src/parse/lex.cc" +yy384: + yyaccept = 0; + yych = (YYCTYPE)*(YYMARKER = ++YYCURSOR); + if (yych <= '/') goto yy385; + if (yych <= '7') goto yy407; +yy385: +#line 629 "../src/parse/lex.re" + { fatal ((tok - pos) - tchar, "syntax error in octal escape sequence"); } +#line 2529 "src/parse/lex.cc" +yy386: + yych = (YYCTYPE)*++YYCURSOR; + goto yy385; +yy387: + yyaccept = 1; + yych = (YYCTYPE)*(YYMARKER = ++YYCURSOR); + if (yych <= '@') { + if (yych <= '/') goto yy388; + if (yych <= '9') goto yy409; + } else { + if (yych <= 'F') goto yy409; + if (yych <= '`') goto yy388; + if (yych <= 'f') goto yy409; + } +yy388: +#line 628 "../src/parse/lex.re" + { fatal ((tok - pos) - tchar, "syntax error in hexadecimal escape sequence"); } +#line 2547 "src/parse/lex.cc" +yy389: + yyaccept = 1; + yych = (YYCTYPE)*(YYMARKER = ++YYCURSOR); + if (yych <= '@') { + if (yych <= '/') goto yy388; + if (yych <= '9') goto yy410; + goto yy388; + } else { + if (yych <= 'F') goto yy410; + if (yych <= '`') goto yy388; + if (yych <= 'f') goto yy410; + goto yy388; + } +yy390: + ++YYCURSOR; +#line 645 "../src/parse/lex.re" + { return static_cast('\\'); } +#line 2565 "src/parse/lex.cc" +yy392: + ++YYCURSOR; +#line 638 "../src/parse/lex.re" + { return static_cast('\a'); } +#line 2570 "src/parse/lex.cc" +yy394: + ++YYCURSOR; +#line 639 "../src/parse/lex.re" + { return static_cast('\b'); } +#line 2575 "src/parse/lex.cc" +yy396: + ++YYCURSOR; +#line 640 "../src/parse/lex.re" + { return static_cast('\f'); } +#line 2580 "src/parse/lex.cc" +yy398: + ++YYCURSOR; +#line 641 "../src/parse/lex.re" + { return static_cast('\n'); } +#line 2585 "src/parse/lex.cc" +yy400: + ++YYCURSOR; +#line 642 "../src/parse/lex.re" + { return static_cast('\r'); } +#line 2590 "src/parse/lex.cc" +yy402: + ++YYCURSOR; +#line 643 "../src/parse/lex.re" + { return static_cast('\t'); } +#line 2595 "src/parse/lex.cc" +yy404: + ++YYCURSOR; +#line 644 "../src/parse/lex.re" + { return static_cast('\v'); } +#line 2600 "src/parse/lex.cc" +yy406: + yyaccept = 1; + yych = (YYCTYPE)*(YYMARKER = ++YYCURSOR); + if (yych <= '@') { + if (yych <= '/') goto yy388; + if (yych <= '9') goto yy411; + goto yy388; + } else { + if (yych <= 'F') goto yy411; + if (yych <= '`') goto yy388; + if (yych <= 'f') goto yy411; + goto yy388; + } +yy407: + yych = (YYCTYPE)*++YYCURSOR; + if (yych <= '/') goto yy408; + if (yych <= '7') goto yy412; +yy408: + YYCURSOR = YYMARKER; + if (yyaccept == 0) { + goto yy385; + } else { + goto yy388; + } +yy409: + yych = (YYCTYPE)*++YYCURSOR; + if (yych <= '@') { + if (yych <= '/') goto yy408; + if (yych <= '9') goto yy414; + goto yy408; + } else { + if (yych <= 'F') goto yy414; + if (yych <= '`') goto yy408; + if (yych <= 'f') goto yy414; + goto yy408; + } +yy410: + yych = (YYCTYPE)*++YYCURSOR; + if (yych <= '@') { + if (yych <= '/') goto yy408; + if (yych <= '9') goto yy415; + goto yy408; + } else { + if (yych <= 'F') goto yy415; + if (yych <= '`') goto yy408; + if (yych <= 'f') goto yy415; + goto yy408; + } +yy411: + yych = (YYCTYPE)*++YYCURSOR; + if (yych <= '@') { + if (yych <= '/') goto yy408; + if (yych <= '9') goto yy416; + goto yy408; + } else { + if (yych <= 'F') goto yy416; + if (yych <= '`') goto yy408; + if (yych <= 'f') goto yy416; + goto yy408; + } +yy412: + ++YYCURSOR; +#line 637 "../src/parse/lex.re" + { return unesc_oct(tok, cur); } +#line 2665 "src/parse/lex.cc" +yy414: + yych = (YYCTYPE)*++YYCURSOR; + if (yych <= '@') { + if (yych <= '/') goto yy408; + if (yych <= '9') goto yy418; + goto yy408; + } else { + if (yych <= 'F') goto yy418; + if (yych <= '`') goto yy408; + if (yych <= 'f') goto yy418; + goto yy408; + } +yy415: + yych = (YYCTYPE)*++YYCURSOR; + if (yych <= '@') { + if (yych <= '/') goto yy408; + if (yych <= '9') goto yy411; + goto yy408; + } else { + if (yych <= 'F') goto yy411; + if (yych <= '`') goto yy408; + if (yych <= 'f') goto yy411; + goto yy408; + } +yy416: + ++YYCURSOR; +#line 636 "../src/parse/lex.re" + { return unesc_hex(tok, cur); } +#line 2694 "src/parse/lex.cc" +yy418: + yych = (YYCTYPE)*++YYCURSOR; + if (yych <= '@') { + if (yych <= '/') goto yy408; + if (yych >= ':') goto yy408; + } else { + if (yych <= 'F') goto yy419; + if (yych <= '`') goto yy408; + if (yych >= 'g') goto yy408; + } +yy419: + ++YYCURSOR; + if ((yych = (YYCTYPE)*YYCURSOR) <= '@') { + if (yych <= '/') goto yy408; + if (yych <= '9') goto yy410; + goto yy408; + } else { + if (yych <= 'F') goto yy410; + if (yych <= '`') goto yy408; + if (yych <= 'f') goto yy410; + goto yy408; + } +} +#line 652 "../src/parse/lex.re" + +} + +RegExp *Scanner::lex_str(char quote, bool casing) +{ + RegExp *r = NULL; + for (bool end;;) { + const uint32_t c = lex_str_chr(quote, end); + if (end) { + return r ? r : new NullOp; + } + r = doCat(r, casing ? ichr(c) : schr(c)); + } +} + +void Scanner::set_sourceline () +{ +sourceline: + tok = cur; + +#line 2739 "src/parse/lex.cc" +{ + YYCTYPE yych; + static const unsigned char yybm[] = { + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 0, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 0, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 192, 192, 192, 192, 192, 192, 192, 192, + 192, 192, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 0, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + }; + if ((YYLIMIT - YYCURSOR) < 2) YYFILL(2); + yych = (YYCTYPE)*YYCURSOR; + if (yych <= '!') { + if (yych == '\n') goto yy424; + } else { + if (yych <= '"') goto yy426; + if (yych <= '0') goto yy422; + if (yych <= '9') goto yy427; + } +yy422: + ++YYCURSOR; +yy423: +#line 695 "../src/parse/lex.re" + { + goto sourceline; + } +#line 2792 "src/parse/lex.cc" +yy424: + ++YYCURSOR; +#line 683 "../src/parse/lex.re" + { + if (cur == eof) + { + --cur; + } + else + { + pos = cur; + } + tok = cur; + return; + } +#line 2808 "src/parse/lex.cc" +yy426: + yych = (YYCTYPE)*(YYMARKER = ++YYCURSOR); + if (yych == '\n') goto yy423; + goto yy431; +yy427: + ++YYCURSOR; + if (YYLIMIT <= YYCURSOR) YYFILL(1); + yych = (YYCTYPE)*YYCURSOR; + if (yybm[0+yych] & 64) { + goto yy427; + } +#line 672 "../src/parse/lex.re" + { + if (!s_to_u32_unsafe (tok, cur, cline)) + { + fatal ("line number overflow"); + } + goto sourceline; + } +#line 2828 "src/parse/lex.cc" +yy430: + ++YYCURSOR; + if (YYLIMIT <= YYCURSOR) YYFILL(1); + yych = (YYCTYPE)*YYCURSOR; +yy431: + if (yybm[0+yych] & 128) { + goto yy430; + } + if (yych <= '\n') goto yy432; + if (yych <= '"') goto yy433; + goto yy435; +yy432: + YYCURSOR = YYMARKER; + goto yy423; +yy433: + ++YYCURSOR; +#line 679 "../src/parse/lex.re" + { + escape (in.file_name, std::string (tok + 1, tok_len () - 2)); // -2 to omit quotes + goto sourceline; + } +#line 2850 "src/parse/lex.cc" +yy435: + ++YYCURSOR; + if (YYLIMIT <= YYCURSOR) YYFILL(1); + yych = (YYCTYPE)*YYCURSOR; + if (yych == '\n') goto yy432; + goto yy430; +} +#line 698 "../src/parse/lex.re" + +} + +} // end namespace re2c diff --git a/tools/re2c/src/parse/lex.re b/tools/re2c/src/parse/lex.re new file mode 100644 index 000000000..2fd98fb93 --- /dev/null +++ b/tools/re2c/src/parse/lex.re @@ -0,0 +1,701 @@ +#include "src/util/c99_stdint.h" +#include +#include +#include +#include +#include + +#include "src/codegen/output.h" +#include "src/conf/opt.h" +#include "src/conf/warn.h" +#include "src/globals.h" +#include "src/ir/regexp/encoding/enc.h" +#include "src/ir/regexp/regexp.h" +#include "src/ir/regexp/regexp_null.h" +#include "src/parse/code.h" +#include "src/parse/extop.h" +#include "src/parse/input.h" +#include "src/parse/scanner.h" +#include "src/parse/parser.h" // needed by "y.tab.h" +#include "src/parse/unescape.h" +#include "src/util/range.h" +#include "src/util/s_to_n32_unsafe.h" +#include "y.tab.h" + +extern YYSTYPE yylval; + +#define YYCTYPE unsigned char +#define YYCURSOR cur +#define YYLIMIT lim +#define YYMARKER ptr +#define YYCTXMARKER ctx +#define YYFILL(n) { fill (n); } + +namespace re2c +{ + +// source code is in ASCII: pointers have type 'char *' +// but re2c makes an implicit assumption that YYCTYPE is unsigned +// when it generates comparisons +/*!re2c + re2c:yych:conversion = 1; +*/ + +/*!re2c +zero = "\000"; +dstring = "\"" ((. \ [\\"] ) | "\\" .)* "\""; +sstring = "'" ((. \ [\\'] ) | "\\" .)* "'" ; +letter = [a-zA-Z]; +digit = [0-9]; +lineno = [1-9] digit*; +name = (letter|digit|"_")+; +space = [ \t]; +ws = (space | [\r\n]); +eol = ("\r\n" | "\n"); +lineinf = lineno (space+ dstring)? eol; + + esc = "\\"; + hex_digit = [0-9a-fA-F]; + esc_hex = esc ("x" hex_digit{2} | [uX] hex_digit{4} | "U" hex_digit{8}); + esc_oct = esc [0-3] [0-7]{2}; // max 1-byte octal value is '\377' + esc_simple = esc [abfnrtv\\]; +*/ + +Scanner::ParseMode Scanner::echo() +{ + bool ignore_eoc = false; + int ignore_cnt = 0; + + if (eof && cur == eof) // Catch EOF + { + return Stop; + } + + tok = cur; +echo: +/*!re2c + beginRE = "%{" | "/*!re2c"; + beginRE { + if (opts->rFlag) + { + fatal("found standard 're2c' block while using -r flag"); + } + if (opts->target == opt_t::CODE) + { + const size_t lexeme_len = cur[-1] == '{' + ? sizeof ("%{") - 1 + : sizeof ("/*!re2c") - 1; + out.wraw(tok, tok_len () - lexeme_len); + } + tok = cur; + return Parse; + } + "/*!rules:re2c" { + if (opts->rFlag) + { + opts.reset_mapCodeName (); + } + else + { + fatal("found 'rules:re2c' block without -r flag"); + } + tok = cur; + return Rules; + } + "/*!use:re2c" { + if (!opts->rFlag) + { + fatal("found 'use:re2c' block without -r flag"); + } + reuse(); + if (opts->target == opt_t::CODE) + { + const size_t lexeme_len = sizeof ("/*!use:re2c") - 1; + out.wraw(tok, tok_len () - lexeme_len); + } + tok = cur; + return Reuse; + } + "/*!max:re2c" { + if (opts->target != opt_t::DOT) + { + out.wdelay_yymaxfill (); + } + tok = pos = cur; + ignore_eoc = true; + goto echo; + } + "/*!getstate:re2c" { + tok = pos = cur; + out.wdelay_state_goto (opts->topIndent); + ignore_eoc = true; + goto echo; + } + "/*!ignore:re2c" { + tok = pos = cur; + ignore_eoc = true; + goto echo; + } + "/*!types:re2c" { + tok = pos = cur; + ignore_eoc = true; + if (opts->target != opt_t::DOT) + { + out.wdelay_line_info ().ws("\n") + .wdelay_types ().ws("\n") + .wline_info (cline, get_fname ().c_str ()); + } + goto echo; + } + "*" "/" "\r"? "\n" { + cline++; + if (ignore_eoc) + { + if (ignore_cnt) + { + out.wline_info (cline, get_fname ().c_str ()); + } + ignore_eoc = false; + ignore_cnt = 0; + } + else if (opts->target == opt_t::CODE) + { + out.wraw(tok, tok_len ()); + } + tok = pos = cur; + goto echo; + } + "*" "/" { + if (ignore_eoc) + { + if (ignore_cnt) + { + out.ws("\n").wline_info (cline, get_fname ().c_str ()); + } + ignore_eoc = false; + ignore_cnt = 0; + } + else if (opts->target == opt_t::CODE) + { + out.wraw(tok, tok_len ()); + } + tok = pos = cur; + goto echo; + } + "\n" space* "#" space* "line" space+ / lineinf { + set_sourceline (); + goto echo; + } + "\n" { + if (ignore_eoc) + { + ignore_cnt++; + } + else if (opts->target == opt_t::CODE) + { + out.wraw(tok, tok_len ()); + } + tok = pos = cur; + cline++; + goto echo; + } + zero { + if (!ignore_eoc && opts->target == opt_t::CODE) + { + out.wraw(tok, tok_len () - 1); + // -1 so we don't write out the \0 + } + if(cur == eof) + { + return Stop; + } + } + * { + goto echo; + } +*/ +} + +int Scanner::scan() +{ + uint32_t depth; + +scan: + tchar = cur - pos; + tline = cline; + tok = cur; + switch (lexer_state) + { + case LEX_NORMAL: goto start; + case LEX_FLEX_NAME: goto flex_name; + } + +start: +/*!re2c + "{" { + depth = 1; + goto code; + } + + ":" / "=>" { + return *tok; + } + + ":=" { + tok += 2; /* skip ":=" */ + depth = 0; + goto code; + } + + "//" { + goto nextLine; + } + "/*" { + depth = 1; + goto comment; + } + + endRE = "%}" | "*/"; + endRE { + tok = cur; + return 0; + } + + "'" { yylval.regexp = lex_str('\'', opts->bCaseInsensitive || !opts->bCaseInverted); return TOKEN_REGEXP; } + "\"" { yylval.regexp = lex_str('"', opts->bCaseInsensitive || opts->bCaseInverted); return TOKEN_REGEXP; } + "[" { yylval.regexp = lex_cls(false); return TOKEN_REGEXP; } + "[^" { yylval.regexp = lex_cls(true); return TOKEN_REGEXP; } + + "<>" / (space* ("{" | "=>" | ":=")) { + return TOKEN_NOCOND; + } + ",()|=;/\\] { + return *tok; + } + + "*" { + yylval.op = *tok; + return TOKEN_STAR; + } + [+?] { + yylval.op = *tok; + return TOKEN_CLOSE; + } + + "{" [0-9]+ "}" { + if (!s_to_u32_unsafe (tok + 1, cur - 1, yylval.extop.min)) + { + fatal ("repetition count overflow"); + } + yylval.extop.max = yylval.extop.min; + return TOKEN_CLOSESIZE; + } + + "{" [0-9]+ "," [0-9]+ "}" { + const char * p = strchr (tok, ','); + if (!s_to_u32_unsafe (tok + 1, p, yylval.extop.min)) + { + fatal ("repetition lower bound overflow"); + } + if (!s_to_u32_unsafe (p + 1, cur - 1, yylval.extop.max)) + { + fatal ("repetition upper bound overflow"); + } + return TOKEN_CLOSESIZE; + } + + "{" [0-9]+ ",}" { + if (!s_to_u32_unsafe (tok + 1, cur - 2, yylval.extop.min)) + { + fatal ("repetition lower bound overflow"); + } + yylval.extop.max = std::numeric_limits::max(); + return TOKEN_CLOSESIZE; + } + + "{" [0-9]* "," { + fatal("illegal closure form, use '{n}', '{n,}', '{n,m}' where n and m are numbers"); + } + + "{" name "}" { + if (!opts->FFlag) { + fatal("curly braces for names only allowed with -F switch"); + } + yylval.str = new std::string (tok + 1, tok_len () - 2); // -2 to omit braces + return TOKEN_ID; + } + + "re2c:" { lex_conf (); return TOKEN_CONF; } + + name / (space+ [^=>,]) { + yylval.str = new std::string (tok, tok_len ()); + if (opts->FFlag) + { + lexer_state = LEX_FLEX_NAME; + return TOKEN_FID; + } + else + { + return TOKEN_ID; + } + } + + name / (space* [=>,]) { + yylval.str = new std::string (tok, tok_len ()); + return TOKEN_ID; + } + + name / [^] { + if (!opts->FFlag) { + yylval.str = new std::string (tok, tok_len()); + return TOKEN_ID; + } else { + RegExp *r = NULL; + const bool casing = opts->bCaseInsensitive || opts->bCaseInverted; + for (char *s = tok; s < cur; ++s) { + const uint32_t c = static_cast(*s); + r = doCat(r, casing ? ichr(c) : schr(c)); + } + yylval.regexp = r ? r : new NullOp; + return TOKEN_REGEXP; + } + } + + "." { + yylval.regexp = mkDot(); + return TOKEN_REGEXP; + } + + space+ { + goto scan; + } + + eol space* "#" space* "line" space+ / lineinf { + set_sourceline (); + goto scan; + } + + eol { + if (cur == eof) return 0; + pos = cur; + cline++; + goto scan; + } + + * { + fatalf("unexpected character: '%c'", *tok); + goto scan; + } +*/ + +flex_name: +/*!re2c + eol + { + YYCURSOR = tok; + lexer_state = LEX_NORMAL; + return TOKEN_FID_END; + } + * + { + YYCURSOR = tok; + goto start; + } +*/ + +code: +/*!re2c + "}" { + if (depth == 0) + { + fatal("Curly braces are not allowed after ':='"); + } + else if (--depth == 0) + { + yylval.code = new Code (tok, tok_len (), get_fname (), tline); + return TOKEN_CODE; + } + goto code; + } + "{" { + if (depth == 0) + { + fatal("Curly braces are not allowed after ':='"); + } + else + { + ++depth; + } + goto code; + } + "\n" space* "#" space* "line" space+ / lineinf { + set_sourceline (); + goto code; + } + "\n" / ws { + if (depth == 0) + { + goto code; + } + else if (cur == eof) + { + fatal("missing '}'"); + } + pos = cur; + cline++; + goto code; + } + "\n" { + if (depth == 0) + { + tok += strspn(tok, " \t\r\n"); + while (cur > tok && strchr(" \t\r\n", cur[-1])) + { + --cur; + } + yylval.code = new Code (tok, tok_len (), get_fname (), tline); + return TOKEN_CODE; + } + else if (cur == eof) + { + fatal("missing '}'"); + } + pos = cur; + cline++; + goto code; + } + zero { + if (cur == eof) + { + if (depth) + { + fatal("missing '}'"); + } + return 0; + } + goto code; + } + dstring | sstring { + goto code; + } + * { + goto code; + } +*/ + +comment: +/*!re2c + "*/" { + if (--depth == 0) + { + goto scan; + } + else + { + goto comment; + } + } + "/*" { + ++depth; + fatal("ambiguous /* found"); + goto comment; + } + "\n" space* "#" space* "line" space+ / lineinf { + set_sourceline (); + goto comment; + } + "\n" { + if (cur == eof) + { + return 0; + } + tok = pos = cur; + cline++; + goto comment; + } + * { + if (cur == eof) + { + return 0; + } + goto comment; + } +*/ + +nextLine: +/*!re2c /* resync emacs */ + "\n" { if(cur == eof) { + return 0; + } + tok = pos = cur; + cline++; + goto scan; + } + * { if(cur == eof) { + return 0; + } + goto nextLine; + } +*/ +} + +static void escape (std::string & dest, const std::string & src) +{ + dest = src; + size_t l = dest.length(); + for (size_t p = 0; p < l; ++p) + { + if (dest[p] == '\\') + { + dest.insert(++p, "\\"); + ++l; + } + } +} + +RegExp *Scanner::lex_cls(bool neg) +{ + Range *r = NULL, *s; + uint32_t u, l; +fst: + /*!re2c + "]" { goto end; } + "" { l = lex_cls_chr(); goto snd; } + */ +snd: + /*!re2c + "" { u = l; goto add; } + "-" / [^\]] { + u = lex_cls_chr(); + if (l > u) { + warn.swapped_range(get_line(), l, u); + std::swap(l, u); + } + goto add; + } + */ +add: + if (!(s = opts->encoding.encodeRange(l, u))) { + fatalf ("Bad code point range: '0x%X - 0x%X'", l, u); + } + r = Range::add(r, s); + goto fst; +end: + if (neg) { + r = Range::sub(opts->encoding.fullRange(), r); + } + return cls(r); +} + +uint32_t Scanner::lex_cls_chr() +{ + tok = cur; + /*!re2c + * { fatal ((tok - pos) - tchar, "syntax error"); } + esc [xXuU] { fatal ((tok - pos) - tchar, "syntax error in hexadecimal escape sequence"); } + esc [0-7] { fatal ((tok - pos) - tchar, "syntax error in octal escape sequence"); } + esc { fatal ((tok - pos) - tchar, "syntax error in escape sequence"); } + + . \ esc { return static_cast(tok[0]); } + esc_hex { return unesc_hex(tok, cur); } + esc_oct { return unesc_oct(tok, cur); } + esc "a" { return static_cast('\a'); } + esc "b" { return static_cast('\b'); } + esc "f" { return static_cast('\f'); } + esc "n" { return static_cast('\n'); } + esc "r" { return static_cast('\r'); } + esc "t" { return static_cast('\t'); } + esc "v" { return static_cast('\v'); } + esc "\\" { return static_cast('\\'); } + esc "-" { return static_cast('-'); } + esc "]" { return static_cast(']'); } + esc . { + warn.useless_escape(tline, tok - pos, tok[1]); + return static_cast(tok[1]); + } + */ +} + +uint32_t Scanner::lex_str_chr(char quote, bool &end) +{ + end = false; + tok = cur; + /*!re2c + * { fatal ((tok - pos) - tchar, "syntax error"); } + esc [xXuU] { fatal ((tok - pos) - tchar, "syntax error in hexadecimal escape sequence"); } + esc [0-7] { fatal ((tok - pos) - tchar, "syntax error in octal escape sequence"); } + esc { fatal ((tok - pos) - tchar, "syntax error in escape sequence"); } + + . \ esc { + end = tok[0] == quote; + return static_cast(tok[0]); + } + esc_hex { return unesc_hex(tok, cur); } + esc_oct { return unesc_oct(tok, cur); } + esc "a" { return static_cast('\a'); } + esc "b" { return static_cast('\b'); } + esc "f" { return static_cast('\f'); } + esc "n" { return static_cast('\n'); } + esc "r" { return static_cast('\r'); } + esc "t" { return static_cast('\t'); } + esc "v" { return static_cast('\v'); } + esc "\\" { return static_cast('\\'); } + esc . { + if (tok[1] != quote) { + warn.useless_escape(tline, tok - pos, tok[1]); + } + return static_cast(tok[1]); + } + */ +} + +RegExp *Scanner::lex_str(char quote, bool casing) +{ + RegExp *r = NULL; + for (bool end;;) { + const uint32_t c = lex_str_chr(quote, end); + if (end) { + return r ? r : new NullOp; + } + r = doCat(r, casing ? ichr(c) : schr(c)); + } +} + +void Scanner::set_sourceline () +{ +sourceline: + tok = cur; +/*!re2c + lineno { + if (!s_to_u32_unsafe (tok, cur, cline)) + { + fatal ("line number overflow"); + } + goto sourceline; + } + dstring { + escape (in.file_name, std::string (tok + 1, tok_len () - 2)); // -2 to omit quotes + goto sourceline; + } + "\n" { + if (cur == eof) + { + --cur; + } + else + { + pos = cur; + } + tok = cur; + return; + } + * { + goto sourceline; + } +*/ +} + +} // end namespace re2c diff --git a/tools/re2c/src/parse/lex_conf.cc b/tools/re2c/src/parse/lex_conf.cc new file mode 100644 index 000000000..eb0aec7fa --- /dev/null +++ b/tools/re2c/src/parse/lex_conf.cc @@ -0,0 +1,2284 @@ +/* Generated by re2c 0.16 on Thu Jan 21 10:47:47 2016 */ +#line 1 "../src/parse/lex_conf.re" +#include "src/util/c99_stdint.h" +#include + +#include "src/codegen/output.h" +#include "src/conf/opt.h" +#include "src/globals.h" +#include "src/ir/regexp/encoding/enc.h" +#include "src/parse/scanner.h" +#include "src/util/s_to_n32_unsafe.h" + +namespace re2c +{ + +// global re2c config (affects the whole file) +#line 36 "../src/parse/lex_conf.re" + + +void Scanner::lex_conf () +{ + tok = cur; + +#line 25 "src/parse/lex_conf.cc" +{ + unsigned char yych; + unsigned int yyaccept = 0; + static const unsigned char yybm[] = { + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 64, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 64, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + }; + if ((lim - cur) < 27) fill(27); + yych = (unsigned char)*cur; + switch (yych) { + case 'c': goto yy4; + case 'd': goto yy5; + case 'f': goto yy6; + case 'i': goto yy7; + case 'l': goto yy8; + case 's': goto yy9; + case 'v': goto yy10; + case 'y': goto yy11; + default: goto yy2; + } +yy2: + ++cur; +yy3: +#line 42 "../src/parse/lex_conf.re" + { fatal ((tok - pos) - tchar, "unrecognized configuration"); } +#line 81 "src/parse/lex_conf.cc" +yy4: + yyaccept = 0; + yych = (unsigned char)*(ptr = ++cur); + if (yych == 'g') goto yy12; + if (yych == 'o') goto yy14; + goto yy3; +yy5: + yyaccept = 0; + yych = (unsigned char)*(ptr = ++cur); + if (yych == 'e') goto yy15; + goto yy3; +yy6: + yyaccept = 0; + yych = (unsigned char)*(ptr = ++cur); + if (yych == 'l') goto yy16; + goto yy3; +yy7: + yyaccept = 0; + yych = (unsigned char)*(ptr = ++cur); + if (yych == 'n') goto yy17; + goto yy3; +yy8: + yyaccept = 0; + yych = (unsigned char)*(ptr = ++cur); + if (yych == 'a') goto yy18; + goto yy3; +yy9: + yyaccept = 0; + yych = (unsigned char)*(ptr = ++cur); + if (yych == 't') goto yy19; + goto yy3; +yy10: + yyaccept = 0; + yych = (unsigned char)*(ptr = ++cur); + if (yych == 'a') goto yy20; + goto yy3; +yy11: + yyaccept = 0; + yych = (unsigned char)*(ptr = ++cur); + if (yych == 'y') goto yy21; + goto yy3; +yy12: + yych = (unsigned char)*++cur; + if (yych == 'o') goto yy22; +yy13: + cur = ptr; + if (yyaccept <= 5) { + if (yyaccept <= 2) { + if (yyaccept <= 1) { + if (yyaccept == 0) { + goto yy3; + } else { + goto yy119; + } + } else { + goto yy161; + } + } else { + if (yyaccept <= 4) { + if (yyaccept == 3) { + goto yy206; + } else { + goto yy255; + } + } else { + goto yy321; + } + } + } else { + if (yyaccept <= 8) { + if (yyaccept <= 7) { + if (yyaccept == 6) { + goto yy356; + } else { + goto yy377; + } + } else { + goto yy383; + } + } else { + if (yyaccept == 9) { + goto yy420; + } else { + goto yy423; + } + } + } +yy14: + yych = (unsigned char)*++cur; + if (yych == 'n') goto yy23; + goto yy13; +yy15: + yych = (unsigned char)*++cur; + if (yych == 'f') goto yy24; + goto yy13; +yy16: + yych = (unsigned char)*++cur; + if (yych == 'a') goto yy25; + goto yy13; +yy17: + yych = (unsigned char)*++cur; + if (yych == 'd') goto yy26; + goto yy13; +yy18: + yych = (unsigned char)*++cur; + if (yych == 'b') goto yy27; + goto yy13; +yy19: + yych = (unsigned char)*++cur; + if (yych == 'a') goto yy28; + goto yy13; +yy20: + yych = (unsigned char)*++cur; + if (yych == 'r') goto yy29; + goto yy13; +yy21: + yych = (unsigned char)*++cur; + if (yych <= 'c') { + if (yych <= 'a') goto yy13; + if (yych <= 'b') goto yy30; + goto yy31; + } else { + if (yych == 'f') goto yy32; + goto yy13; + } +yy22: + yych = (unsigned char)*++cur; + if (yych == 't') goto yy33; + goto yy13; +yy23: + yych = (unsigned char)*++cur; + if (yych == 'd') goto yy34; + goto yy13; +yy24: + yych = (unsigned char)*++cur; + if (yych == 'i') goto yy35; + goto yy13; +yy25: + yych = (unsigned char)*++cur; + if (yych == 'g') goto yy36; + goto yy13; +yy26: + yych = (unsigned char)*++cur; + if (yych == 'e') goto yy37; + goto yy13; +yy27: + yych = (unsigned char)*++cur; + if (yych == 'e') goto yy38; + goto yy13; +yy28: + yych = (unsigned char)*++cur; + if (yych == 'r') goto yy39; + if (yych == 't') goto yy40; + goto yy13; +yy29: + yych = (unsigned char)*++cur; + if (yych == 'i') goto yy41; + goto yy13; +yy30: + yych = (unsigned char)*++cur; + if (yych == 'm') goto yy42; + goto yy13; +yy31: + yych = (unsigned char)*++cur; + if (yych == 'h') goto yy43; + goto yy13; +yy32: + yych = (unsigned char)*++cur; + if (yych == 'i') goto yy44; + goto yy13; +yy33: + yych = (unsigned char)*++cur; + if (yych == 'o') goto yy45; + goto yy13; +yy34: + yych = (unsigned char)*++cur; + if (yych <= 'd') { + if (yych == ':') goto yy46; + goto yy13; + } else { + if (yych <= 'e') goto yy47; + if (yych == 'p') goto yy48; + goto yy13; + } +yy35: + yych = (unsigned char)*++cur; + if (yych == 'n') goto yy49; + goto yy13; +yy36: + yych = (unsigned char)*++cur; + if (yych == 's') goto yy50; + goto yy13; +yy37: + yych = (unsigned char)*++cur; + if (yych == 'n') goto yy51; + goto yy13; +yy38: + yych = (unsigned char)*++cur; + if (yych == 'l') goto yy52; + goto yy13; +yy39: + yych = (unsigned char)*++cur; + if (yych == 't') goto yy53; + goto yy13; +yy40: + yych = (unsigned char)*++cur; + if (yych == 'e') goto yy54; + goto yy13; +yy41: + yych = (unsigned char)*++cur; + if (yych == 'a') goto yy55; + goto yy13; +yy42: + yych = (unsigned char)*++cur; + if (yych == ':') goto yy56; + goto yy13; +yy43: + yych = (unsigned char)*++cur; + if (yych == ':') goto yy57; + goto yy13; +yy44: + yych = (unsigned char)*++cur; + if (yych == 'l') goto yy58; + goto yy13; +yy45: + yych = (unsigned char)*++cur; + if (yych == ':') goto yy59; + goto yy13; +yy46: + yych = (unsigned char)*++cur; + if (yych == 'd') goto yy60; + if (yych == 'g') goto yy61; + goto yy13; +yy47: + yych = (unsigned char)*++cur; + if (yych == 'n') goto yy62; + goto yy13; +yy48: + yych = (unsigned char)*++cur; + if (yych == 'r') goto yy63; + goto yy13; +yy49: + yych = (unsigned char)*++cur; + if (yych == 'e') goto yy64; + goto yy13; +yy50: + yych = (unsigned char)*++cur; + if (yych == ':') goto yy65; + goto yy13; +yy51: + yych = (unsigned char)*++cur; + if (yych == 't') goto yy66; + goto yy13; +yy52: + yych = (unsigned char)*++cur; + if (yych == ':') goto yy67; + if (yych == 'p') goto yy68; + goto yy13; +yy53: + yych = (unsigned char)*++cur; + if (yych == 'l') goto yy69; + goto yy13; +yy54: + yych = (unsigned char)*++cur; + if (yych == ':') goto yy70; + goto yy13; +yy55: + yych = (unsigned char)*++cur; + if (yych == 'b') goto yy71; + goto yy13; +yy56: + yych = (unsigned char)*++cur; + if (yych == 'h') goto yy72; + goto yy13; +yy57: + yych = (unsigned char)*++cur; + if (yych == 'c') goto yy73; + if (yych == 'e') goto yy74; + goto yy13; +yy58: + yych = (unsigned char)*++cur; + if (yych == 'l') goto yy75; + goto yy13; +yy59: + yych = (unsigned char)*++cur; + if (yych == 't') goto yy76; + goto yy13; +yy60: + yych = (unsigned char)*++cur; + if (yych == 'i') goto yy77; + goto yy13; +yy61: + yych = (unsigned char)*++cur; + if (yych == 'o') goto yy78; + goto yy13; +yy62: + yych = (unsigned char)*++cur; + if (yych == 'u') goto yy79; + goto yy13; +yy63: + yych = (unsigned char)*++cur; + if (yych == 'e') goto yy80; + goto yy13; +yy64: + yych = (unsigned char)*++cur; + if (yych == ':') goto yy81; + goto yy13; +yy65: + yych = (unsigned char)*++cur; + if (yych <= 'e') { + if (yych == '8') goto yy82; + if (yych <= 'd') goto yy13; + goto yy82; + } else { + if (yych <= 'u') { + if (yych <= 't') goto yy13; + goto yy82; + } else { + if (yych <= 'v') goto yy13; + if (yych <= 'x') goto yy82; + goto yy13; + } + } +yy66: + yych = (unsigned char)*++cur; + if (yych == ':') goto yy84; + goto yy13; +yy67: + yych = (unsigned char)*++cur; + if (yych == 'y') goto yy85; + goto yy13; +yy68: + yych = (unsigned char)*++cur; + if (yych == 'r') goto yy86; + goto yy13; +yy69: + yych = (unsigned char)*++cur; + if (yych == 'a') goto yy87; + goto yy13; +yy70: + yych = (unsigned char)*++cur; + if (yych == 'a') goto yy88; + if (yych == 'n') goto yy89; + goto yy13; +yy71: + yych = (unsigned char)*++cur; + if (yych == 'l') goto yy90; + goto yy13; +yy72: + yych = (unsigned char)*++cur; + if (yych == 'e') goto yy91; + goto yy13; +yy73: + yych = (unsigned char)*++cur; + if (yych == 'o') goto yy92; + goto yy13; +yy74: + yych = (unsigned char)*++cur; + if (yych == 'm') goto yy93; + goto yy13; +yy75: + yych = (unsigned char)*++cur; + if (yych == ':') goto yy94; + goto yy13; +yy76: + yych = (unsigned char)*++cur; + if (yych == 'h') goto yy95; + goto yy13; +yy77: + yych = (unsigned char)*++cur; + if (yych == 'v') goto yy96; + goto yy13; +yy78: + yych = (unsigned char)*++cur; + if (yych == 't') goto yy97; + goto yy13; +yy79: + yych = (unsigned char)*++cur; + if (yych == 'm') goto yy98; + goto yy13; +yy80: + yych = (unsigned char)*++cur; + if (yych == 'f') goto yy99; + goto yy13; +yy81: + yych = (unsigned char)*++cur; + if (yych == 'Y') goto yy100; + goto yy13; +yy82: + ++cur; +#line 45 "../src/parse/lex_conf.re" + { + Enc::type_t enc = Enc::ASCII; + switch (cur[-1]) + { + case 'e': enc = Enc::EBCDIC; break; + case 'w': enc = Enc::UCS2; break; + case 'x': enc = Enc::UTF16; break; + case 'u': enc = Enc::UTF32; break; + case '8': enc = Enc::UTF8; break; + } + const int32_t n = lex_conf_number (); + if (n == 0) + { + opts.unset_encoding (enc); + } + else if (!opts.set_encoding (enc)) + { + fatalf ("Cannot set %s encoding: please reset %s encoding first" + , Enc::name (enc) + , Enc::name (opts->encoding.type ())); + } + return; + } +#line 496 "src/parse/lex_conf.cc" +yy84: + yych = (unsigned char)*++cur; + if (yych <= 'r') goto yy13; + if (yych <= 's') goto yy101; + if (yych <= 't') goto yy102; + goto yy13; +yy85: + yych = (unsigned char)*++cur; + if (yych == 'y') goto yy103; + goto yy13; +yy86: + yych = (unsigned char)*++cur; + if (yych == 'e') goto yy104; + goto yy13; +yy87: + yych = (unsigned char)*++cur; + if (yych == 'b') goto yy105; + goto yy13; +yy88: + yych = (unsigned char)*++cur; + if (yych == 'b') goto yy106; + goto yy13; +yy89: + yych = (unsigned char)*++cur; + if (yych == 'e') goto yy107; + goto yy13; +yy90: + yych = (unsigned char)*++cur; + if (yych == 'e') goto yy108; + goto yy13; +yy91: + yych = (unsigned char)*++cur; + if (yych == 'x') goto yy109; + goto yy13; +yy92: + yych = (unsigned char)*++cur; + if (yych == 'n') goto yy111; + goto yy13; +yy93: + yych = (unsigned char)*++cur; + if (yych == 'i') goto yy112; + goto yy13; +yy94: + yych = (unsigned char)*++cur; + if (yych <= 'd') { + if (yych == 'c') goto yy113; + goto yy13; + } else { + if (yych <= 'e') goto yy114; + if (yych == 'p') goto yy115; + goto yy13; + } +yy95: + yych = (unsigned char)*++cur; + if (yych == 'r') goto yy116; + goto yy13; +yy96: + yych = (unsigned char)*++cur; + if (yych == 'i') goto yy117; + goto yy13; +yy97: + yych = (unsigned char)*++cur; + if (yych == 'o') goto yy118; + goto yy13; +yy98: + yych = (unsigned char)*++cur; + if (yych == 'p') goto yy120; + goto yy13; +yy99: + yych = (unsigned char)*++cur; + if (yych == 'i') goto yy121; + goto yy13; +yy100: + yych = (unsigned char)*++cur; + if (yych == 'Y') goto yy122; + goto yy13; +yy101: + yych = (unsigned char)*++cur; + if (yych == 't') goto yy123; + goto yy13; +yy102: + yych = (unsigned char)*++cur; + if (yych == 'o') goto yy124; + goto yy13; +yy103: + yych = (unsigned char)*++cur; + if (yych == 'F') goto yy125; + if (yych == 'N') goto yy126; + goto yy13; +yy104: + yych = (unsigned char)*++cur; + if (yych == 'f') goto yy127; + goto yy13; +yy105: + yych = (unsigned char)*++cur; + if (yych == 'e') goto yy128; + goto yy13; +yy106: + yych = (unsigned char)*++cur; + if (yych == 'o') goto yy129; + goto yy13; +yy107: + yych = (unsigned char)*++cur; + if (yych == 'x') goto yy130; + goto yy13; +yy108: + yych = (unsigned char)*++cur; + if (yych == ':') goto yy131; + goto yy13; +yy109: + ++cur; +#line 95 "../src/parse/lex_conf.re" + { opts.set_yybmHexTable (lex_conf_number () != 0); return; } +#line 610 "src/parse/lex_conf.cc" +yy111: + yych = (unsigned char)*++cur; + if (yych == 'v') goto yy132; + goto yy13; +yy112: + yych = (unsigned char)*++cur; + if (yych == 't') goto yy133; + goto yy13; +yy113: + yych = (unsigned char)*++cur; + if (yych == 'h') goto yy135; + goto yy13; +yy114: + yych = (unsigned char)*++cur; + if (yych == 'n') goto yy136; + goto yy13; +yy115: + yych = (unsigned char)*++cur; + if (yych == 'a') goto yy137; + goto yy13; +yy116: + yych = (unsigned char)*++cur; + if (yych == 'e') goto yy138; + goto yy13; +yy117: + yych = (unsigned char)*++cur; + if (yych == 'd') goto yy139; + goto yy13; +yy118: + yyaccept = 1; + yych = (unsigned char)*(ptr = ++cur); + if (yych == '@') goto yy140; +yy119: +#line 79 "../src/parse/lex_conf.re" + { opts.set_condGoto (lex_conf_string ()); return; } +#line 646 "src/parse/lex_conf.cc" +yy120: + yych = (unsigned char)*++cur; + if (yych == 'r') goto yy141; + goto yy13; +yy121: + yych = (unsigned char)*++cur; + if (yych == 'x') goto yy142; + goto yy13; +yy122: + yych = (unsigned char)*++cur; + switch (yych) { + case 'B': goto yy144; + case 'C': goto yy145; + case 'D': goto yy146; + case 'F': goto yy147; + case 'G': goto yy148; + case 'L': goto yy149; + case 'M': goto yy150; + case 'P': goto yy151; + case 'R': goto yy152; + case 'S': goto yy153; + default: goto yy13; + } +yy123: + yych = (unsigned char)*++cur; + if (yych == 'r') goto yy154; + goto yy13; +yy124: + yych = (unsigned char)*++cur; + if (yych == 'p') goto yy155; + goto yy13; +yy125: + yych = (unsigned char)*++cur; + if (yych == 'i') goto yy157; + goto yy13; +yy126: + yych = (unsigned char)*++cur; + if (yych == 'e') goto yy158; + goto yy13; +yy127: + yych = (unsigned char)*++cur; + if (yych == 'i') goto yy159; + goto yy13; +yy128: + yych = (unsigned char)*++cur; + if (yych == 'l') goto yy160; + goto yy13; +yy129: + yych = (unsigned char)*++cur; + if (yych == 'r') goto yy162; + goto yy13; +yy130: + yych = (unsigned char)*++cur; + if (yych == 't') goto yy163; + goto yy13; +yy131: + yych = (unsigned char)*++cur; + if (yych == 'y') goto yy164; + goto yy13; +yy132: + yych = (unsigned char)*++cur; + if (yych == 'e') goto yy165; + goto yy13; +yy133: + ++cur; +#line 138 "../src/parse/lex_conf.re" + { opts.set_bEmitYYCh (lex_conf_number () != 0); return; } +#line 714 "src/parse/lex_conf.cc" +yy135: + yych = (unsigned char)*++cur; + if (yych == 'e') goto yy166; + goto yy13; +yy136: + yych = (unsigned char)*++cur; + if (yych == 'a') goto yy167; + goto yy13; +yy137: + yych = (unsigned char)*++cur; + if (yych == 'r') goto yy168; + goto yy13; +yy138: + yych = (unsigned char)*++cur; + if (yych == 's') goto yy169; + goto yy13; +yy139: + yych = (unsigned char)*++cur; + if (yych == 'e') goto yy170; + goto yy13; +yy140: + yych = (unsigned char)*++cur; + if (yych == 'c') goto yy171; + goto yy13; +yy141: + yych = (unsigned char)*++cur; + if (yych == 'e') goto yy172; + goto yy13; +yy142: + ++cur; +#line 75 "../src/parse/lex_conf.re" + { opts.set_condPrefix (lex_conf_string ()); return; } +#line 747 "src/parse/lex_conf.cc" +yy144: + yych = (unsigned char)*++cur; + if (yych == 'A') goto yy173; + goto yy13; +yy145: + yych = (unsigned char)*++cur; + if (yych <= 'S') { + if (yych == 'O') goto yy174; + goto yy13; + } else { + if (yych <= 'T') goto yy175; + if (yych <= 'U') goto yy176; + goto yy13; + } +yy146: + yych = (unsigned char)*++cur; + if (yych == 'E') goto yy177; + goto yy13; +yy147: + yych = (unsigned char)*++cur; + if (yych == 'I') goto yy178; + goto yy13; +yy148: + yych = (unsigned char)*++cur; + if (yych == 'E') goto yy179; + goto yy13; +yy149: + yych = (unsigned char)*++cur; + if (yych == 'E') goto yy180; + if (yych == 'I') goto yy181; + goto yy13; +yy150: + yych = (unsigned char)*++cur; + if (yych == 'A') goto yy182; + goto yy13; +yy151: + yych = (unsigned char)*++cur; + if (yych == 'E') goto yy183; + goto yy13; +yy152: + yych = (unsigned char)*++cur; + if (yych == 'E') goto yy184; + goto yy13; +yy153: + yych = (unsigned char)*++cur; + if (yych == 'E') goto yy185; + if (yych == 'K') goto yy186; + goto yy13; +yy154: + yych = (unsigned char)*++cur; + if (yych == 'i') goto yy187; + goto yy13; +yy155: + ++cur; +#line 123 "../src/parse/lex_conf.re" + { + const int32_t n = lex_conf_number (); + if (n < 0) + { + fatal ("configuration 'indent:top' must be nonnegative"); + } + opts.set_topIndent (static_cast (n)); + return; + } +#line 812 "src/parse/lex_conf.cc" +yy157: + yych = (unsigned char)*++cur; + if (yych == 'l') goto yy188; + goto yy13; +yy158: + yych = (unsigned char)*++cur; + if (yych == 'x') goto yy189; + goto yy13; +yy159: + yych = (unsigned char)*++cur; + if (yych == 'x') goto yy190; + goto yy13; +yy160: + yyaccept = 2; + yych = (unsigned char)*(ptr = ++cur); + ctx = cur; + if (yybm[0+yych] & 64) { + goto yy192; + } + if (yych == '=') goto yy194; +yy161: +#line 151 "../src/parse/lex_conf.re" + { out.set_user_start_label (lex_conf_string ()); return; } +#line 836 "src/parse/lex_conf.cc" +yy162: + yych = (unsigned char)*++cur; + if (yych == 't') goto yy196; + goto yy13; +yy163: + yych = (unsigned char)*++cur; + if (yych == 'l') goto yy198; + goto yy13; +yy164: + yych = (unsigned char)*++cur; + if (yych == 'y') goto yy199; + goto yy13; +yy165: + yych = (unsigned char)*++cur; + if (yych == 'r') goto yy200; + goto yy13; +yy166: + yych = (unsigned char)*++cur; + if (yych == 'c') goto yy201; + goto yy13; +yy167: + yych = (unsigned char)*++cur; + if (yych == 'b') goto yy202; + goto yy13; +yy168: + yych = (unsigned char)*++cur; + if (yych == 'a') goto yy203; + goto yy13; +yy169: + yych = (unsigned char)*++cur; + if (yych == 'h') goto yy204; + goto yy13; +yy170: + yych = (unsigned char)*++cur; + if (yych == 'r') goto yy205; + goto yy13; +yy171: + yych = (unsigned char)*++cur; + if (yych == 'o') goto yy207; + goto yy13; +yy172: + yych = (unsigned char)*++cur; + if (yych == 'f') goto yy208; + goto yy13; +yy173: + yych = (unsigned char)*++cur; + if (yych == 'C') goto yy209; + goto yy13; +yy174: + yych = (unsigned char)*++cur; + if (yych == 'N') goto yy210; + goto yy13; +yy175: + yych = (unsigned char)*++cur; + if (yych <= 'W') goto yy13; + if (yych <= 'X') goto yy211; + if (yych <= 'Y') goto yy212; + goto yy13; +yy176: + yych = (unsigned char)*++cur; + if (yych == 'R') goto yy213; + goto yy13; +yy177: + yych = (unsigned char)*++cur; + if (yych == 'B') goto yy214; + goto yy13; +yy178: + yych = (unsigned char)*++cur; + if (yych == 'L') goto yy215; + goto yy13; +yy179: + yych = (unsigned char)*++cur; + if (yych == 'T') goto yy216; + goto yy13; +yy180: + yych = (unsigned char)*++cur; + if (yych == 'S') goto yy217; + goto yy13; +yy181: + yych = (unsigned char)*++cur; + if (yych == 'M') goto yy218; + goto yy13; +yy182: + yych = (unsigned char)*++cur; + if (yych == 'R') goto yy219; + goto yy13; +yy183: + yych = (unsigned char)*++cur; + if (yych == 'E') goto yy220; + goto yy13; +yy184: + yych = (unsigned char)*++cur; + if (yych == 'S') goto yy221; + goto yy13; +yy185: + yych = (unsigned char)*++cur; + if (yych == 'T') goto yy222; + goto yy13; +yy186: + yych = (unsigned char)*++cur; + if (yych == 'I') goto yy223; + goto yy13; +yy187: + yych = (unsigned char)*++cur; + if (yych == 'n') goto yy224; + goto yy13; +yy188: + yych = (unsigned char)*++cur; + if (yych == 'l') goto yy225; + goto yy13; +yy189: + yych = (unsigned char)*++cur; + if (yych == 't') goto yy226; + goto yy13; +yy190: + ++cur; +#line 147 "../src/parse/lex_conf.re" + { opts.set_labelPrefix (lex_conf_string ()); return; } +#line 955 "src/parse/lex_conf.cc" +yy192: + ++cur; + if (lim <= cur) fill(1); + yych = (unsigned char)*cur; + if (yybm[0+yych] & 64) { + goto yy192; + } + if (yych != '=') goto yy13; +yy194: + ++cur; + if ((lim - cur) < 2) fill(2); + yych = (unsigned char)*cur; + if (yych <= ',') { + if (yych <= '\t') { + if (yych <= 0x08) goto yy13; + goto yy194; + } else { + if (yych == ' ') goto yy194; + goto yy13; + } + } else { + if (yych <= '/') { + if (yych <= '-') goto yy228; + goto yy13; + } else { + if (yych <= '0') goto yy229; + if (yych <= '9') goto yy231; + goto yy13; + } + } +yy196: + ++cur; +#line 90 "../src/parse/lex_conf.re" + { opts.set_bUseStateAbort (lex_conf_number () != 0); return; } +#line 990 "src/parse/lex_conf.cc" +yy198: + yych = (unsigned char)*++cur; + if (yych == 'a') goto yy233; + goto yy13; +yy199: + yych = (unsigned char)*++cur; + if (yych <= 'c') { + if (yych <= '`') goto yy13; + if (yych <= 'a') goto yy234; + if (yych <= 'b') goto yy235; + goto yy236; + } else { + if (yych <= 'r') goto yy13; + if (yych <= 's') goto yy237; + if (yych <= 't') goto yy238; + goto yy13; + } +yy200: + yych = (unsigned char)*++cur; + if (yych == 's') goto yy239; + goto yy13; +yy201: + yych = (unsigned char)*++cur; + if (yych == 'k') goto yy240; + goto yy13; +yy202: + yych = (unsigned char)*++cur; + if (yych == 'l') goto yy242; + goto yy13; +yy203: + yych = (unsigned char)*++cur; + if (yych == 'm') goto yy243; + goto yy13; +yy204: + yych = (unsigned char)*++cur; + if (yych == 'o') goto yy244; + goto yy13; +yy205: + yyaccept = 3; + yych = (unsigned char)*(ptr = ++cur); + if (yych == '@') goto yy245; +yy206: +#line 77 "../src/parse/lex_conf.re" + { opts.set_condDivider (lex_conf_string ()); return; } +#line 1035 "src/parse/lex_conf.cc" +yy207: + yych = (unsigned char)*++cur; + if (yych == 'n') goto yy246; + goto yy13; +yy208: + yych = (unsigned char)*++cur; + if (yych == 'i') goto yy247; + goto yy13; +yy209: + yych = (unsigned char)*++cur; + if (yych == 'K') goto yy248; + goto yy13; +yy210: + yych = (unsigned char)*++cur; + if (yych == 'D') goto yy249; + goto yy13; +yy211: + yych = (unsigned char)*++cur; + if (yych == 'M') goto yy250; + goto yy13; +yy212: + yych = (unsigned char)*++cur; + if (yych == 'P') goto yy251; + goto yy13; +yy213: + yych = (unsigned char)*++cur; + if (yych == 'S') goto yy252; + goto yy13; +yy214: + yych = (unsigned char)*++cur; + if (yych == 'U') goto yy253; + goto yy13; +yy215: + yych = (unsigned char)*++cur; + if (yych == 'L') goto yy254; + goto yy13; +yy216: + yych = (unsigned char)*++cur; + if (yych == 'C') goto yy256; + if (yych == 'S') goto yy257; + goto yy13; +yy217: + yych = (unsigned char)*++cur; + if (yych == 'S') goto yy258; + goto yy13; +yy218: + yych = (unsigned char)*++cur; + if (yych == 'I') goto yy259; + goto yy13; +yy219: + yych = (unsigned char)*++cur; + if (yych == 'K') goto yy260; + goto yy13; +yy220: + yych = (unsigned char)*++cur; + if (yych == 'K') goto yy261; + goto yy13; +yy221: + yych = (unsigned char)*++cur; + if (yych == 'T') goto yy263; + goto yy13; +yy222: + yych = (unsigned char)*++cur; + if (yych == 'C') goto yy264; + if (yych == 'S') goto yy265; + goto yy13; +yy223: + yych = (unsigned char)*++cur; + if (yych == 'P') goto yy266; + goto yy13; +yy224: + yych = (unsigned char)*++cur; + if (yych == 'g') goto yy268; + goto yy13; +yy225: + yych = (unsigned char)*++cur; + if (yych == 'L') goto yy270; + goto yy13; +yy226: + ++cur; +#line 89 "../src/parse/lex_conf.re" + { opts.set_yynext (lex_conf_string ()); return; } +#line 1118 "src/parse/lex_conf.cc" +yy228: + yych = (unsigned char)*++cur; + if (yych <= '0') goto yy13; + if (yych <= '9') goto yy231; + goto yy13; +yy229: + ++cur; +yy230: + cur = ctx; +#line 150 "../src/parse/lex_conf.re" + { out.set_force_start_label (lex_conf_number () != 0); return; } +#line 1130 "src/parse/lex_conf.cc" +yy231: + ++cur; + if (lim <= cur) fill(1); + yych = (unsigned char)*cur; + if (yybm[0+yych] & 128) { + goto yy231; + } + goto yy230; +yy233: + yych = (unsigned char)*++cur; + if (yych == 'b') goto yy271; + goto yy13; +yy234: + yych = (unsigned char)*++cur; + if (yych == 'c') goto yy272; + goto yy13; +yy235: + yych = (unsigned char)*++cur; + if (yych == 'm') goto yy273; + goto yy13; +yy236: + yych = (unsigned char)*++cur; + if (yych == 'h') goto yy275; + if (yych == 't') goto yy277; + goto yy13; +yy237: + yych = (unsigned char)*++cur; + if (yych == 't') goto yy278; + goto yy13; +yy238: + yych = (unsigned char)*++cur; + if (yych == 'a') goto yy279; + goto yy13; +yy239: + yych = (unsigned char)*++cur; + if (yych == 'i') goto yy280; + goto yy13; +yy240: + ++cur; +#line 145 "../src/parse/lex_conf.re" + { opts.set_fill_check (lex_conf_number () != 0); return; } +#line 1172 "src/parse/lex_conf.cc" +yy242: + yych = (unsigned char)*++cur; + if (yych == 'e') goto yy281; + goto yy13; +yy243: + yych = (unsigned char)*++cur; + if (yych == 'e') goto yy283; + goto yy13; +yy244: + yych = (unsigned char)*++cur; + if (yych == 'l') goto yy284; + goto yy13; +yy245: + yych = (unsigned char)*++cur; + if (yych == 'c') goto yy285; + goto yy13; +yy246: + yych = (unsigned char)*++cur; + if (yych == 'd') goto yy286; + goto yy13; +yy247: + yych = (unsigned char)*++cur; + if (yych == 'x') goto yy288; + goto yy13; +yy248: + yych = (unsigned char)*++cur; + if (yych == 'U') goto yy290; + goto yy13; +yy249: + yych = (unsigned char)*++cur; + if (yych == 'T') goto yy291; + goto yy13; +yy250: + yych = (unsigned char)*++cur; + if (yych == 'A') goto yy292; + goto yy13; +yy251: + yych = (unsigned char)*++cur; + if (yych == 'E') goto yy293; + goto yy13; +yy252: + yych = (unsigned char)*++cur; + if (yych == 'O') goto yy295; + goto yy13; +yy253: + yych = (unsigned char)*++cur; + if (yych == 'G') goto yy296; + goto yy13; +yy254: + yyaccept = 4; + yych = (unsigned char)*(ptr = ++cur); + if (yych == ':') goto yy298; + if (yych == '@') goto yy299; +yy255: +#line 140 "../src/parse/lex_conf.re" + { opts.set_fill (lex_conf_string ()); return; } +#line 1229 "src/parse/lex_conf.cc" +yy256: + yych = (unsigned char)*++cur; + if (yych == 'O') goto yy300; + goto yy13; +yy257: + yych = (unsigned char)*++cur; + if (yych == 'T') goto yy301; + goto yy13; +yy258: + yych = (unsigned char)*++cur; + if (yych == 'T') goto yy302; + goto yy13; +yy259: + yych = (unsigned char)*++cur; + if (yych == 'T') goto yy303; + goto yy13; +yy260: + yych = (unsigned char)*++cur; + if (yych == 'E') goto yy305; + goto yy13; +yy261: + ++cur; +#line 113 "../src/parse/lex_conf.re" + { opts.set_yypeek (lex_conf_string ()); return; } +#line 1254 "src/parse/lex_conf.cc" +yy263: + yych = (unsigned char)*++cur; + if (yych == 'O') goto yy306; + goto yy13; +yy264: + yych = (unsigned char)*++cur; + if (yych == 'O') goto yy307; + goto yy13; +yy265: + yych = (unsigned char)*++cur; + if (yych == 'T') goto yy308; + goto yy13; +yy266: + ++cur; +#line 114 "../src/parse/lex_conf.re" + { opts.set_yyskip (lex_conf_string ()); return; } +#line 1271 "src/parse/lex_conf.cc" +yy268: + ++cur; +#line 121 "../src/parse/lex_conf.re" + { opts.set_indString (lex_conf_string ()); return; } +#line 1276 "src/parse/lex_conf.cc" +yy270: + yych = (unsigned char)*++cur; + if (yych == 'a') goto yy309; + goto yy13; +yy271: + yych = (unsigned char)*++cur; + if (yych == 'e') goto yy310; + goto yy13; +yy272: + yych = (unsigned char)*++cur; + if (yych == 'c') goto yy311; + goto yy13; +yy273: + ++cur; +#line 94 "../src/parse/lex_conf.re" + { opts.set_yybm (lex_conf_string ()); return; } +#line 1293 "src/parse/lex_conf.cc" +yy275: + ++cur; +#line 136 "../src/parse/lex_conf.re" + { opts.set_yych (lex_conf_string ()); return; } +#line 1298 "src/parse/lex_conf.cc" +yy277: + yych = (unsigned char)*++cur; + if (yych == 'a') goto yy312; + goto yy13; +yy278: + yych = (unsigned char)*++cur; + if (yych == 'a') goto yy313; + goto yy13; +yy279: + yych = (unsigned char)*++cur; + if (yych == 'r') goto yy314; + goto yy13; +yy280: + yych = (unsigned char)*++cur; + if (yych == 'o') goto yy315; + goto yy13; +yy281: + ++cur; +#line 141 "../src/parse/lex_conf.re" + { opts.set_fill_use (lex_conf_number () != 0); return; } +#line 1319 "src/parse/lex_conf.cc" +yy283: + yych = (unsigned char)*++cur; + if (yych == 't') goto yy316; + goto yy13; +yy284: + yych = (unsigned char)*++cur; + if (yych == 'd') goto yy317; + goto yy13; +yy285: + yych = (unsigned char)*++cur; + if (yych == 'o') goto yy319; + goto yy13; +yy286: + ++cur; +#line 80 "../src/parse/lex_conf.re" + { opts.set_condGotoParam (lex_conf_string ()); return; } +#line 1336 "src/parse/lex_conf.cc" +yy288: + ++cur; +#line 76 "../src/parse/lex_conf.re" + { opts.set_condEnumPrefix (lex_conf_string ()); return; } +#line 1341 "src/parse/lex_conf.cc" +yy290: + yych = (unsigned char)*++cur; + if (yych == 'P') goto yy320; + goto yy13; +yy291: + yych = (unsigned char)*++cur; + if (yych == 'Y') goto yy322; + goto yy13; +yy292: + yych = (unsigned char)*++cur; + if (yych == 'R') goto yy323; + goto yy13; +yy293: + ++cur; +#line 135 "../src/parse/lex_conf.re" + { opts.set_yyctype (lex_conf_string ()); return; } +#line 1358 "src/parse/lex_conf.cc" +yy295: + yych = (unsigned char)*++cur; + if (yych == 'R') goto yy324; + goto yy13; +yy296: + ++cur; +#line 133 "../src/parse/lex_conf.re" + { opts.set_yydebug (lex_conf_string ()); return; } +#line 1367 "src/parse/lex_conf.cc" +yy298: + yych = (unsigned char)*++cur; + if (yych == 'n') goto yy326; + goto yy13; +yy299: + yych = (unsigned char)*++cur; + if (yych == 'l') goto yy327; + goto yy13; +yy300: + yych = (unsigned char)*++cur; + if (yych == 'N') goto yy328; + goto yy13; +yy301: + yych = (unsigned char)*++cur; + if (yych == 'A') goto yy329; + goto yy13; +yy302: + yych = (unsigned char)*++cur; + if (yych == 'H') goto yy330; + goto yy13; +yy303: + ++cur; +#line 111 "../src/parse/lex_conf.re" + { opts.set_yylimit (lex_conf_string ()); return; } +#line 1392 "src/parse/lex_conf.cc" +yy305: + yych = (unsigned char)*++cur; + if (yych == 'R') goto yy331; + goto yy13; +yy306: + yych = (unsigned char)*++cur; + if (yych == 'R') goto yy333; + goto yy13; +yy307: + yych = (unsigned char)*++cur; + if (yych == 'N') goto yy334; + goto yy13; +yy308: + yych = (unsigned char)*++cur; + if (yych == 'A') goto yy335; + goto yy13; +yy309: + yych = (unsigned char)*++cur; + if (yych == 'b') goto yy336; + goto yy13; +yy310: + yych = (unsigned char)*++cur; + if (yych == 'l') goto yy337; + goto yy13; +yy311: + yych = (unsigned char)*++cur; + if (yych == 'e') goto yy339; + goto yy13; +yy312: + yych = (unsigned char)*++cur; + if (yych == 'b') goto yy340; + goto yy13; +yy313: + yych = (unsigned char)*++cur; + if (yych == 'b') goto yy341; + goto yy13; +yy314: + yych = (unsigned char)*++cur; + if (yych == 'g') goto yy342; + goto yy13; +yy315: + yych = (unsigned char)*++cur; + if (yych == 'n') goto yy343; + goto yy13; +yy316: + yych = (unsigned char)*++cur; + if (yych == 'e') goto yy345; + goto yy13; +yy317: + ++cur; +#line 97 "../src/parse/lex_conf.re" + { + const int32_t n = lex_conf_number (); + if (n < 0) + { + fatal ("configuration 'cgoto:threshold' must be nonnegative"); + } + opts.set_cGotoThreshold (static_cast (n)); + return; + } +#line 1453 "src/parse/lex_conf.cc" +yy319: + yych = (unsigned char)*++cur; + if (yych == 'n') goto yy346; + goto yy13; +yy320: + yyaccept = 5; + yych = (unsigned char)*(ptr = ++cur); + if (yych == 'C') goto yy347; +yy321: +#line 115 "../src/parse/lex_conf.re" + { opts.set_yybackup (lex_conf_string ()); return; } +#line 1465 "src/parse/lex_conf.cc" +yy322: + yych = (unsigned char)*++cur; + if (yych == 'P') goto yy348; + goto yy13; +yy323: + yych = (unsigned char)*++cur; + if (yych == 'K') goto yy349; + goto yy13; +yy324: + ++cur; +#line 108 "../src/parse/lex_conf.re" + { opts.set_yycursor (lex_conf_string ()); return; } +#line 1478 "src/parse/lex_conf.cc" +yy326: + yych = (unsigned char)*++cur; + if (yych == 'a') goto yy350; + goto yy13; +yy327: + yych = (unsigned char)*++cur; + if (yych == 'e') goto yy351; + goto yy13; +yy328: + yych = (unsigned char)*++cur; + if (yych == 'D') goto yy352; + goto yy13; +yy329: + yych = (unsigned char)*++cur; + if (yych == 'T') goto yy353; + goto yy13; +yy330: + yych = (unsigned char)*++cur; + if (yych == 'A') goto yy354; + goto yy13; +yy331: + ++cur; +#line 109 "../src/parse/lex_conf.re" + { opts.set_yymarker (lex_conf_string ()); return; } +#line 1503 "src/parse/lex_conf.cc" +yy333: + yych = (unsigned char)*++cur; + if (yych == 'E') goto yy355; + goto yy13; +yy334: + yych = (unsigned char)*++cur; + if (yych == 'D') goto yy357; + goto yy13; +yy335: + yych = (unsigned char)*++cur; + if (yych == 'T') goto yy358; + goto yy13; +yy336: + yych = (unsigned char)*++cur; + if (yych == 'e') goto yy359; + goto yy13; +yy337: + ++cur; +#line 91 "../src/parse/lex_conf.re" + { opts.set_bUseStateNext (lex_conf_number () != 0); return; } +#line 1524 "src/parse/lex_conf.cc" +yy339: + yych = (unsigned char)*++cur; + if (yych == 'p') goto yy360; + goto yy13; +yy340: + yych = (unsigned char)*++cur; + if (yych == 'l') goto yy361; + goto yy13; +yy341: + yych = (unsigned char)*++cur; + if (yych == 'l') goto yy362; + goto yy13; +yy342: + yych = (unsigned char)*++cur; + if (yych == 'e') goto yy363; + goto yy13; +yy343: + ++cur; +#line 137 "../src/parse/lex_conf.re" + { opts.set_yychConversion (lex_conf_number () != 0); return; } +#line 1545 "src/parse/lex_conf.cc" +yy345: + yych = (unsigned char)*++cur; + if (yych == 'r') goto yy364; + goto yy13; +yy346: + yych = (unsigned char)*++cur; + if (yych == 'd') goto yy366; + goto yy13; +yy347: + yych = (unsigned char)*++cur; + if (yych == 'T') goto yy368; + goto yy13; +yy348: + yych = (unsigned char)*++cur; + if (yych == 'E') goto yy369; + goto yy13; +yy349: + yych = (unsigned char)*++cur; + if (yych == 'E') goto yy371; + goto yy13; +yy350: + yych = (unsigned char)*++cur; + if (yych == 'k') goto yy372; + goto yy13; +yy351: + yych = (unsigned char)*++cur; + if (yych == 'n') goto yy373; + goto yy13; +yy352: + yych = (unsigned char)*++cur; + if (yych == 'I') goto yy375; + goto yy13; +yy353: + yych = (unsigned char)*++cur; + if (yych == 'E') goto yy376; + goto yy13; +yy354: + yych = (unsigned char)*++cur; + if (yych == 'N') goto yy378; + goto yy13; +yy355: + yyaccept = 6; + yych = (unsigned char)*(ptr = ++cur); + if (yych == 'C') goto yy380; +yy356: +#line 117 "../src/parse/lex_conf.re" + { opts.set_yyrestore (lex_conf_string ()); return; } +#line 1593 "src/parse/lex_conf.cc" +yy357: + yych = (unsigned char)*++cur; + if (yych == 'I') goto yy381; + goto yy13; +yy358: + yych = (unsigned char)*++cur; + if (yych == 'E') goto yy382; + goto yy13; +yy359: + yych = (unsigned char)*++cur; + if (yych == 'l') goto yy384; + goto yy13; +yy360: + yych = (unsigned char)*++cur; + if (yych == 't') goto yy386; + goto yy13; +yy361: + yych = (unsigned char)*++cur; + if (yych == 'e') goto yy388; + goto yy13; +yy362: + yych = (unsigned char)*++cur; + if (yych == 'e') goto yy390; + goto yy13; +yy363: + yych = (unsigned char)*++cur; + if (yych == 't') goto yy392; + goto yy13; +yy364: + ++cur; +#line 143 "../src/parse/lex_conf.re" + { opts.set_fill_arg_use (lex_conf_number () != 0); return; } +#line 1626 "src/parse/lex_conf.cc" +yy366: + ++cur; +#line 78 "../src/parse/lex_conf.re" + { opts.set_condDividerParam (lex_conf_string ()); return; } +#line 1631 "src/parse/lex_conf.cc" +yy368: + yych = (unsigned char)*++cur; + if (yych == 'X') goto yy394; + goto yy13; +yy369: + ++cur; +#line 69 "../src/parse/lex_conf.re" + { opts.set_yycondtype (lex_conf_string ()); return; } +#line 1640 "src/parse/lex_conf.cc" +yy371: + yych = (unsigned char)*++cur; + if (yych == 'R') goto yy396; + goto yy13; +yy372: + yych = (unsigned char)*++cur; + if (yych == 'e') goto yy398; + goto yy13; +yy373: + ++cur; +#line 142 "../src/parse/lex_conf.re" + { opts.set_fill_arg (lex_conf_string ()); return; } +#line 1653 "src/parse/lex_conf.cc" +yy375: + yych = (unsigned char)*++cur; + if (yych == 'T') goto yy399; + goto yy13; +yy376: + yyaccept = 7; + yych = (unsigned char)*(ptr = ++cur); + if (yych == ':') goto yy400; +yy377: +#line 83 "../src/parse/lex_conf.re" + { opts.set_state_get (lex_conf_string ()); return; } +#line 1665 "src/parse/lex_conf.cc" +yy378: + ++cur; +#line 119 "../src/parse/lex_conf.re" + { opts.set_yylessthan (lex_conf_string ()); return; } +#line 1670 "src/parse/lex_conf.cc" +yy380: + yych = (unsigned char)*++cur; + if (yych == 'T') goto yy401; + goto yy13; +yy381: + yych = (unsigned char)*++cur; + if (yych == 'T') goto yy402; + goto yy13; +yy382: + yyaccept = 8; + yych = (unsigned char)*(ptr = ++cur); + if (yych == ':') goto yy403; + if (yych == '@') goto yy404; +yy383: +#line 85 "../src/parse/lex_conf.re" + { opts.set_state_set (lex_conf_string ()); return; } +#line 1687 "src/parse/lex_conf.cc" +yy384: + ++cur; +#line 88 "../src/parse/lex_conf.re" + { opts.set_yyfilllabel (lex_conf_string ()); return; } +#line 1692 "src/parse/lex_conf.cc" +yy386: + ++cur; +#line 92 "../src/parse/lex_conf.re" + { opts.set_yyaccept (lex_conf_string ()); return; } +#line 1697 "src/parse/lex_conf.cc" +yy388: + ++cur; +#line 81 "../src/parse/lex_conf.re" + { opts.set_yyctable (lex_conf_string ()); return; } +#line 1702 "src/parse/lex_conf.cc" +yy390: + ++cur; +#line 154 "../src/parse/lex_conf.re" + { lex_conf_string (); return; } +#line 1707 "src/parse/lex_conf.cc" +yy392: + ++cur; +#line 106 "../src/parse/lex_conf.re" + { opts.set_yytarget (lex_conf_string ()); return; } +#line 1712 "src/parse/lex_conf.cc" +yy394: + ++cur; +#line 116 "../src/parse/lex_conf.re" + { opts.set_yybackupctx (lex_conf_string ()); return; } +#line 1717 "src/parse/lex_conf.cc" +yy396: + ++cur; +#line 110 "../src/parse/lex_conf.re" + { opts.set_yyctxmarker (lex_conf_string ()); return; } +#line 1722 "src/parse/lex_conf.cc" +yy398: + yych = (unsigned char)*++cur; + if (yych == 'd') goto yy405; + goto yy13; +yy399: + yych = (unsigned char)*++cur; + if (yych == 'I') goto yy407; + goto yy13; +yy400: + yych = (unsigned char)*++cur; + if (yych == 'n') goto yy408; + goto yy13; +yy401: + yych = (unsigned char)*++cur; + if (yych == 'X') goto yy409; + goto yy13; +yy402: + yych = (unsigned char)*++cur; + if (yych == 'I') goto yy411; + goto yy13; +yy403: + yych = (unsigned char)*++cur; + if (yych == 'n') goto yy412; + goto yy13; +yy404: + yych = (unsigned char)*++cur; + if (yych == 's') goto yy413; + goto yy13; +yy405: + ++cur; +#line 144 "../src/parse/lex_conf.re" + { opts.set_fill_naked (lex_conf_number () != 0); return; } +#line 1755 "src/parse/lex_conf.cc" +yy407: + yych = (unsigned char)*++cur; + if (yych == 'O') goto yy414; + goto yy13; +yy408: + yych = (unsigned char)*++cur; + if (yych == 'a') goto yy415; + goto yy13; +yy409: + ++cur; +#line 118 "../src/parse/lex_conf.re" + { opts.set_yyrestorectx (lex_conf_string ()); return; } +#line 1768 "src/parse/lex_conf.cc" +yy411: + yych = (unsigned char)*++cur; + if (yych == 'O') goto yy416; + goto yy13; +yy412: + yych = (unsigned char)*++cur; + if (yych == 'a') goto yy417; + goto yy13; +yy413: + yych = (unsigned char)*++cur; + if (yych == 't') goto yy418; + goto yy13; +yy414: + yych = (unsigned char)*++cur; + if (yych == 'N') goto yy419; + goto yy13; +yy415: + yych = (unsigned char)*++cur; + if (yych == 'k') goto yy421; + goto yy13; +yy416: + yych = (unsigned char)*++cur; + if (yych == 'N') goto yy422; + goto yy13; +yy417: + yych = (unsigned char)*++cur; + if (yych == 'k') goto yy424; + goto yy13; +yy418: + yych = (unsigned char)*++cur; + if (yych == 'a') goto yy425; + goto yy13; +yy419: + yyaccept = 9; + yych = (unsigned char)*(ptr = ++cur); + if (yych == ':') goto yy426; +yy420: +#line 70 "../src/parse/lex_conf.re" + { opts.set_cond_get (lex_conf_string ()); return; } +#line 1808 "src/parse/lex_conf.cc" +yy421: + yych = (unsigned char)*++cur; + if (yych == 'e') goto yy427; + goto yy13; +yy422: + yyaccept = 10; + yych = (unsigned char)*(ptr = ++cur); + if (yych == ':') goto yy428; + if (yych == '@') goto yy429; +yy423: +#line 72 "../src/parse/lex_conf.re" + { opts.set_cond_set (lex_conf_string ()); return; } +#line 1821 "src/parse/lex_conf.cc" +yy424: + yych = (unsigned char)*++cur; + if (yych == 'e') goto yy430; + goto yy13; +yy425: + yych = (unsigned char)*++cur; + if (yych == 't') goto yy431; + goto yy13; +yy426: + yych = (unsigned char)*++cur; + if (yych == 'n') goto yy432; + goto yy13; +yy427: + yych = (unsigned char)*++cur; + if (yych == 'd') goto yy433; + goto yy13; +yy428: + yych = (unsigned char)*++cur; + if (yych == 'n') goto yy435; + goto yy13; +yy429: + yych = (unsigned char)*++cur; + if (yych == 'c') goto yy436; + goto yy13; +yy430: + yych = (unsigned char)*++cur; + if (yych == 'd') goto yy437; + goto yy13; +yy431: + yych = (unsigned char)*++cur; + if (yych == 'e') goto yy439; + goto yy13; +yy432: + yych = (unsigned char)*++cur; + if (yych == 'a') goto yy441; + goto yy13; +yy433: + ++cur; +#line 84 "../src/parse/lex_conf.re" + { opts.set_state_get_naked (lex_conf_number () != 0); return; } +#line 1862 "src/parse/lex_conf.cc" +yy435: + yych = (unsigned char)*++cur; + if (yych == 'a') goto yy442; + goto yy13; +yy436: + yych = (unsigned char)*++cur; + if (yych == 'o') goto yy443; + goto yy13; +yy437: + ++cur; +#line 86 "../src/parse/lex_conf.re" + { opts.set_state_set_naked (lex_conf_number () != 0); return; } +#line 1875 "src/parse/lex_conf.cc" +yy439: + ++cur; +#line 87 "../src/parse/lex_conf.re" + { opts.set_state_set_arg (lex_conf_string ()); return; } +#line 1880 "src/parse/lex_conf.cc" +yy441: + yych = (unsigned char)*++cur; + if (yych == 'k') goto yy444; + goto yy13; +yy442: + yych = (unsigned char)*++cur; + if (yych == 'k') goto yy445; + goto yy13; +yy443: + yych = (unsigned char)*++cur; + if (yych == 'n') goto yy446; + goto yy13; +yy444: + yych = (unsigned char)*++cur; + if (yych == 'e') goto yy447; + goto yy13; +yy445: + yych = (unsigned char)*++cur; + if (yych == 'e') goto yy448; + goto yy13; +yy446: + yych = (unsigned char)*++cur; + if (yych == 'd') goto yy449; + goto yy13; +yy447: + yych = (unsigned char)*++cur; + if (yych == 'd') goto yy451; + goto yy13; +yy448: + yych = (unsigned char)*++cur; + if (yych == 'd') goto yy453; + goto yy13; +yy449: + ++cur; +#line 73 "../src/parse/lex_conf.re" + { opts.set_cond_set_arg (lex_conf_string ()); return; } +#line 1917 "src/parse/lex_conf.cc" +yy451: + ++cur; +#line 71 "../src/parse/lex_conf.re" + { opts.set_cond_get_naked (lex_conf_number () != 0); return; } +#line 1922 "src/parse/lex_conf.cc" +yy453: + ++cur; +#line 74 "../src/parse/lex_conf.re" + { opts.set_cond_set_naked (lex_conf_number () != 0); return; } +#line 1927 "src/parse/lex_conf.cc" +} +#line 155 "../src/parse/lex_conf.re" + +} + +void Scanner::lex_conf_assign () +{ + +#line 1936 "src/parse/lex_conf.cc" +{ + unsigned char yych; + static const unsigned char yybm[] = { + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 128, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 128, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + }; + if ((lim - cur) < 2) fill(2); + yych = (unsigned char)*cur; + if (yych <= 0x1F) { + if (yych == '\t') goto yy459; + } else { + if (yych <= ' ') goto yy459; + if (yych == '=') goto yy460; + } + ++cur; +yy458: +#line 161 "../src/parse/lex_conf.re" + { fatal ("missing '=' in configuration"); } +#line 1985 "src/parse/lex_conf.cc" +yy459: + yych = (unsigned char)*(ptr = ++cur); + if (yych <= 0x1F) { + if (yych == '\t') goto yy463; + goto yy458; + } else { + if (yych <= ' ') goto yy463; + if (yych != '=') goto yy458; + } +yy460: + ++cur; + if (lim <= cur) fill(1); + yych = (unsigned char)*cur; + if (yybm[0+yych] & 128) { + goto yy460; + } +#line 162 "../src/parse/lex_conf.re" + { return; } +#line 2004 "src/parse/lex_conf.cc" +yy463: + ++cur; + if (lim <= cur) fill(1); + yych = (unsigned char)*cur; + if (yych <= 0x1F) { + if (yych == '\t') goto yy463; + } else { + if (yych <= ' ') goto yy463; + if (yych == '=') goto yy460; + } + cur = ptr; + goto yy458; +} +#line 163 "../src/parse/lex_conf.re" + +} + +void Scanner::lex_conf_semicolon () +{ + +#line 2025 "src/parse/lex_conf.cc" +{ + unsigned char yych; + static const unsigned char yybm[] = { + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 128, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 128, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + }; + if ((lim - cur) < 2) fill(2); + yych = (unsigned char)*cur; + if (yych <= 0x1F) { + if (yych == '\t') goto yy470; + } else { + if (yych <= ' ') goto yy470; + if (yych == ';') goto yy471; + } + ++cur; +yy469: +#line 169 "../src/parse/lex_conf.re" + { fatal ("missing ending ';' in configuration"); } +#line 2074 "src/parse/lex_conf.cc" +yy470: + yych = (unsigned char)*(ptr = ++cur); + if (yybm[0+yych] & 128) { + goto yy473; + } + if (yych != ';') goto yy469; +yy471: + ++cur; +#line 170 "../src/parse/lex_conf.re" + { return; } +#line 2085 "src/parse/lex_conf.cc" +yy473: + ++cur; + if (lim <= cur) fill(1); + yych = (unsigned char)*cur; + if (yybm[0+yych] & 128) { + goto yy473; + } + if (yych == ';') goto yy471; + cur = ptr; + goto yy469; +} +#line 171 "../src/parse/lex_conf.re" + +} + +int32_t Scanner::lex_conf_number () +{ + lex_conf_assign (); + tok = cur; + +#line 2106 "src/parse/lex_conf.cc" +{ + unsigned char yych; + static const unsigned char yybm[] = { + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + }; + if ((lim - cur) < 2) fill(2); + yych = (unsigned char)*cur; + if (yych <= '/') { + if (yych == '-') goto yy479; + } else { + if (yych <= '0') goto yy480; + if (yych <= '9') goto yy482; + } +yy478: +yy479: + yych = (unsigned char)*++cur; + if (yych <= '0') goto yy478; + if (yych <= '9') goto yy482; + goto yy478; +yy480: + ++cur; +yy481: +#line 180 "../src/parse/lex_conf.re" + { + int32_t n = 0; + if (!s_to_i32_unsafe (tok, cur, n)) + { + fatal ("configuration value overflow"); + } + lex_conf_semicolon (); + return n; + } +#line 2170 "src/parse/lex_conf.cc" +yy482: + ++cur; + if (lim <= cur) fill(1); + yych = (unsigned char)*cur; + if (yybm[0+yych] & 128) { + goto yy482; + } + goto yy481; +} +#line 189 "../src/parse/lex_conf.re" + +} + +std::string Scanner::lex_conf_string () +{ + lex_conf_assign (); + std::string s; + tok = cur; + +#line 2190 "src/parse/lex_conf.cc" +{ + unsigned char yych; + static const unsigned char yybm[] = {}; + if (lim <= cur) fill(1); + yych = (unsigned char)*cur; + if (yych <= '!') { + if (yych <= '\n') { + if (yych <= 0x08) goto yy487; + } else { + if (yych != ' ') goto yy487; + } + } else { + if (yych <= '\'') { + if (yych <= '"') goto yy489; + if (yych <= '&') goto yy487; + goto yy489; + } else { + if (yych != ';') goto yy487; + } + } +yy486: +#line 212 "../src/parse/lex_conf.re" + { + s = std::string(tok, tok_len()); + goto end; + } +#line 2250 "src/parse/lex_conf.cc" +yy487: + ++cur; + if (lim <= cur) fill(1); + yych = (unsigned char)*cur; + if (yybm[0+yych] & 128) { + goto yy487; + } + goto yy486; +yy489: + ++cur; +#line 198 "../src/parse/lex_conf.re" + { + const char quote = tok[0]; + for (bool end;;) { + const uint32_t c = lex_str_chr(quote, end); + if (end) { + goto end; + } + if (c > 0xFF) { + fatalf ("multibyte character in configuration string: 0x%X", c); + } else { + s += static_cast(c); + } + } + } +#line 2276 "src/parse/lex_conf.cc" +} +#line 216 "../src/parse/lex_conf.re" + +end: + lex_conf_semicolon (); + return s; +} + +} // end namespace re2c diff --git a/tools/re2c/src/parse/lex_conf.re b/tools/re2c/src/parse/lex_conf.re new file mode 100644 index 000000000..550fe1ab1 --- /dev/null +++ b/tools/re2c/src/parse/lex_conf.re @@ -0,0 +1,222 @@ +#include "src/util/c99_stdint.h" +#include + +#include "src/codegen/output.h" +#include "src/conf/opt.h" +#include "src/globals.h" +#include "src/ir/regexp/encoding/enc.h" +#include "src/parse/scanner.h" +#include "src/util/s_to_n32_unsafe.h" + +namespace re2c +{ + +// global re2c config (affects the whole file) +/*!re2c + re2c:define:YYCTYPE = "unsigned char"; + re2c:define:YYCURSOR = cur; + re2c:define:YYLIMIT = lim; + re2c:define:YYMARKER = ptr; + re2c:define:YYCTXMARKER = ctx; + re2c:define:YYFILL = fill; + + // source code is in ASCII: pointers have type 'char *' + // but re2c makes an implicit assumption that YYCTYPE is unsigned + // when it generates comparisons + re2c:yych:conversion = 1; + + space = [ \t]; + + conf_assign = space* "=" space*; + + naked_char = . \ (space | [;]); + naked = "" | (naked_char \ ['"]) naked_char*; + + number = "0" | ("-"? [1-9] [0-9]*); +*/ + +void Scanner::lex_conf () +{ + tok = cur; +/*!re2c + * { fatal ((tok - pos) - tchar, "unrecognized configuration"); } + + "flags:" [ewxu8] + { + Enc::type_t enc = Enc::ASCII; + switch (cur[-1]) + { + case 'e': enc = Enc::EBCDIC; break; + case 'w': enc = Enc::UCS2; break; + case 'x': enc = Enc::UTF16; break; + case 'u': enc = Enc::UTF32; break; + case '8': enc = Enc::UTF8; break; + } + const int32_t n = lex_conf_number (); + if (n == 0) + { + opts.unset_encoding (enc); + } + else if (!opts.set_encoding (enc)) + { + fatalf ("Cannot set %s encoding: please reset %s encoding first" + , Enc::name (enc) + , Enc::name (opts->encoding.type ())); + } + return; + } + + "define:YYCONDTYPE" { opts.set_yycondtype (lex_conf_string ()); return; } + "define:YYGETCONDITION" { opts.set_cond_get (lex_conf_string ()); return; } + "define:YYGETCONDITION:naked" { opts.set_cond_get_naked (lex_conf_number () != 0); return; } + "define:YYSETCONDITION" { opts.set_cond_set (lex_conf_string ()); return; } + "define:YYSETCONDITION@cond" { opts.set_cond_set_arg (lex_conf_string ()); return; } + "define:YYSETCONDITION:naked" { opts.set_cond_set_naked (lex_conf_number () != 0); return; } + "condprefix" { opts.set_condPrefix (lex_conf_string ()); return; } + "condenumprefix" { opts.set_condEnumPrefix (lex_conf_string ()); return; } + "cond:divider" { opts.set_condDivider (lex_conf_string ()); return; } + "cond:divider@cond" { opts.set_condDividerParam (lex_conf_string ()); return; } + "cond:goto" { opts.set_condGoto (lex_conf_string ()); return; } + "cond:goto@cond" { opts.set_condGotoParam (lex_conf_string ()); return; } + "variable:yyctable" { opts.set_yyctable (lex_conf_string ()); return; } + + "define:YYGETSTATE" { opts.set_state_get (lex_conf_string ()); return; } + "define:YYGETSTATE:naked" { opts.set_state_get_naked (lex_conf_number () != 0); return; } + "define:YYSETSTATE" { opts.set_state_set (lex_conf_string ()); return; } + "define:YYSETSTATE:naked" { opts.set_state_set_naked (lex_conf_number () != 0); return; } + "define:YYSETSTATE@state" { opts.set_state_set_arg (lex_conf_string ()); return; } + "label:yyFillLabel" { opts.set_yyfilllabel (lex_conf_string ()); return; } + "label:yyNext" { opts.set_yynext (lex_conf_string ()); return; } + "state:abort" { opts.set_bUseStateAbort (lex_conf_number () != 0); return; } + "state:nextlabel" { opts.set_bUseStateNext (lex_conf_number () != 0); return; } + "variable:yyaccept" { opts.set_yyaccept (lex_conf_string ()); return; } + + "variable:yybm" { opts.set_yybm (lex_conf_string ()); return; } + "yybm:hex" { opts.set_yybmHexTable (lex_conf_number () != 0); return; } + "cgoto:threshold" + { + const int32_t n = lex_conf_number (); + if (n < 0) + { + fatal ("configuration 'cgoto:threshold' must be nonnegative"); + } + opts.set_cGotoThreshold (static_cast (n)); + return; + } + "variable:yytarget" { opts.set_yytarget (lex_conf_string ()); return; } + + "define:YYCURSOR" { opts.set_yycursor (lex_conf_string ()); return; } + "define:YYMARKER" { opts.set_yymarker (lex_conf_string ()); return; } + "define:YYCTXMARKER" { opts.set_yyctxmarker (lex_conf_string ()); return; } + "define:YYLIMIT" { opts.set_yylimit (lex_conf_string ()); return; } + + "define:YYPEEK" { opts.set_yypeek (lex_conf_string ()); return; } + "define:YYSKIP" { opts.set_yyskip (lex_conf_string ()); return; } + "define:YYBACKUP" { opts.set_yybackup (lex_conf_string ()); return; } + "define:YYBACKUPCTX" { opts.set_yybackupctx (lex_conf_string ()); return; } + "define:YYRESTORE" { opts.set_yyrestore (lex_conf_string ()); return; } + "define:YYRESTORECTX" { opts.set_yyrestorectx (lex_conf_string ()); return; } + "define:YYLESSTHAN" { opts.set_yylessthan (lex_conf_string ()); return; } + + "indent:string" { opts.set_indString (lex_conf_string ()); return; } + "indent:top" + { + const int32_t n = lex_conf_number (); + if (n < 0) + { + fatal ("configuration 'indent:top' must be nonnegative"); + } + opts.set_topIndent (static_cast (n)); + return; + } + + "define:YYDEBUG" { opts.set_yydebug (lex_conf_string ()); return; } + + "define:YYCTYPE" { opts.set_yyctype (lex_conf_string ()); return; } + "variable:yych" { opts.set_yych (lex_conf_string ()); return; } + "yych:conversion" { opts.set_yychConversion (lex_conf_number () != 0); return; } + "yych:emit" { opts.set_bEmitYYCh (lex_conf_number () != 0); return; } + + "define:YYFILL" { opts.set_fill (lex_conf_string ()); return; } + "yyfill:enable" { opts.set_fill_use (lex_conf_number () != 0); return; } + "define:YYFILL@len" { opts.set_fill_arg (lex_conf_string ()); return; } + "yyfill:parameter" { opts.set_fill_arg_use (lex_conf_number () != 0); return; } + "define:YYFILL:naked" { opts.set_fill_naked (lex_conf_number () != 0); return; } + "yyfill:check" { opts.set_fill_check (lex_conf_number () != 0); return; } + + "labelprefix" { opts.set_labelPrefix (lex_conf_string ()); return; } + + // try to lex number first, otherwize it would be lexed as a naked string + "startlabel" / conf_assign number { out.set_force_start_label (lex_conf_number () != 0); return; } + "startlabel" { out.set_user_start_label (lex_conf_string ()); return; } + + // deprecated + "variable:yystable" { lex_conf_string (); return; } +*/ +} + +void Scanner::lex_conf_assign () +{ +/*!re2c + * { fatal ("missing '=' in configuration"); } + conf_assign { return; } +*/ +} + +void Scanner::lex_conf_semicolon () +{ +/*!re2c + * { fatal ("missing ending ';' in configuration"); } + space* ";" { return; } +*/ +} + +int32_t Scanner::lex_conf_number () +{ + lex_conf_assign (); + tok = cur; +/*!re2c + number + { + int32_t n = 0; + if (!s_to_i32_unsafe (tok, cur, n)) + { + fatal ("configuration value overflow"); + } + lex_conf_semicolon (); + return n; + } +*/ +} + +std::string Scanner::lex_conf_string () +{ + lex_conf_assign (); + std::string s; + tok = cur; +/*!re2c + ['"] { + const char quote = tok[0]; + for (bool end;;) { + const uint32_t c = lex_str_chr(quote, end); + if (end) { + goto end; + } + if (c > 0xFF) { + fatalf ("multibyte character in configuration string: 0x%X", c); + } else { + s += static_cast(c); + } + } + } + naked { + s = std::string(tok, tok_len()); + goto end; + } +*/ +end: + lex_conf_semicolon (); + return s; +} + +} // end namespace re2c diff --git a/tools/re2c/src/parse/loc.h b/tools/re2c/src/parse/loc.h new file mode 100644 index 000000000..b3d4277a2 --- /dev/null +++ b/tools/re2c/src/parse/loc.h @@ -0,0 +1,24 @@ +#ifndef _RE2C_PARSE_LOC_ +#define _RE2C_PARSE_LOC_ + +#include + +#include "src/util/c99_stdint.h" + +namespace re2c +{ + +struct Loc +{ + std::string filename; + uint32_t line; + + inline Loc (const std::string & f, uint32_t l) + : filename (f) + , line (l) + {} +}; + +} // namespace re2c + +#endif // _RE2C_PARSE_LOC_ diff --git a/tools/re2c/src/parse/parser.cc b/tools/re2c/src/parse/parser.cc new file mode 100644 index 000000000..a328bc2c9 --- /dev/null +++ b/tools/re2c/src/parse/parser.cc @@ -0,0 +1,2396 @@ +/* A Bison parser, made by GNU Bison 3.0.4. */ + +/* Bison implementation for Yacc-like parsers in C + + Copyright (C) 1984, 1989-1990, 2000-2015 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ + +/* As a special exception, you may create a larger work that contains + part or all of the Bison parser skeleton and distribute that work + under terms of your choice, so long as that work isn't itself a + parser generator using the skeleton or a modified version thereof + as a parser skeleton. Alternatively, if you modify or redistribute + the parser skeleton itself, you may (at your option) remove this + special exception, which will cause the skeleton and the resulting + Bison output files to be licensed under the GNU General Public + License without this special exception. + + This special exception was added by the Free Software Foundation in + version 2.2 of Bison. */ + +/* C LALR(1) parser skeleton written by Richard Stallman, by + simplifying the original so-called "semantic" parser. */ + +/* All symbols defined below should begin with yy or YY, to avoid + infringing on user name space. This should be done even for local + variables, as they might otherwise be expanded by user macros. + There are some unavoidable exceptions within include files to + define necessary library symbols; they are noted "INFRINGES ON + USER NAME SPACE" below. */ + +/* Identify Bison output. */ +#define YYBISON 1 + +/* Bison version. */ +#define YYBISON_VERSION "3.0.4" + +/* Skeleton name. */ +#define YYSKELETON_NAME "yacc.c" + +/* Pure parsers. */ +#define YYPURE 0 + +/* Push parsers. */ +#define YYPUSH 0 + +/* Pull parsers. */ +#define YYPULL 1 + + + + +/* Copy the first part of user declarations. */ +#line 1 "../src/parse/parser.ypp" /* yacc.c:339 */ + + +#include "src/util/c99_stdint.h" +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "src/codegen/output.h" +#include "src/conf/opt.h" +#include "src/globals.h" +#include "src/ir/compile.h" +#include "src/ir/adfa/adfa.h" +#include "src/ir/regexp/encoding/enc.h" +#include "src/ir/regexp/encoding/range_suffix.h" +#include "src/ir/regexp/regexp.h" +#include "src/ir/regexp/regexp_cat.h" +#include "src/ir/regexp/regexp_close.h" +#include "src/ir/regexp/regexp_null.h" +#include "src/ir/regexp/regexp_rule.h" +#include "src/ir/rule_rank.h" +#include "src/ir/skeleton/skeleton.h" +#include "src/parse/code.h" +#include "src/parse/extop.h" +#include "src/parse/loc.h" +#include "src/parse/parser.h" +#include "src/parse/scanner.h" +#include "src/parse/spec.h" +#include "src/util/counter.h" +#include "src/util/free_list.h" +#include "src/util/range.h" +#include "src/util/smart_ptr.h" + +#define YYMALLOC malloc +#define YYFREE free + +using namespace re2c; + +extern "C" +{ +int yylex(); +void yyerror(const char*); +} + +static counter_t rank_counter; +static std::vector condnames; +static re2c::SpecMap specMap; +static Spec spec; +static RuleOp *specNone = NULL; +static RuleOpList specStar; +static RuleOp * star_default = NULL; +static Scanner *in = NULL; +static Scanner::ParseMode parseMode; +static SetupMap ruleSetupMap; +static bool foundRules; +static symbol_table_t symbol_table; + +/* Bison version 1.875 emits a definition that is not working + * with several g++ version. Hence we disable it here. + */ +#if defined(__GNUC__) +#define __attribute__(x) +#endif + +void context_check(CondList *clist) +{ + if (!opts->cFlag) + { + delete clist; + in->fatal("conditions are only allowed when using -c switch"); + } +} + +void context_none(CondList *clist) +{ + delete clist; + context_check(NULL); + in->fatal("no expression specified"); +} + +void context_rule + ( CondList * clist + , const Loc & loc + , RegExp * expr + , RegExp * look + , const Code * code + , const std::string * newcond + ) +{ + context_check(clist); + for(CondList::const_iterator it = clist->begin(); it != clist->end(); ++it) + { + if (specMap.find(*it) == specMap.end()) + { + condnames.push_back (*it); + } + + RuleOp * rule = new RuleOp + ( loc + , expr + , look + , rank_counter.next () + , code + , newcond + ); + specMap[*it].add (rule); + } + delete clist; + delete newcond; +} + +void setup_rule(CondList *clist, const Code * code) +{ + assert(clist); + assert(code); + context_check(clist); + for(CondList::const_iterator it = clist->begin(); it != clist->end(); ++it) + { + if (ruleSetupMap.find(*it) != ruleSetupMap.end()) + { + in->fatalf_at(code->loc.line, "code to setup rule '%s' is already defined", it->c_str()); + } + ruleSetupMap[*it] = std::make_pair(code->loc.line, code->text); + } + delete clist; +} + +void default_rule(CondList *clist, const Code * code) +{ + assert(clist); + assert(code); + context_check(clist); + for(CondList::const_iterator it = clist->begin(); it != clist->end(); ++it) + { + RuleOp * def = new RuleOp + ( code->loc + , in->mkDefault () + , new NullOp + , rule_rank_t::def () + , code + , NULL + ); + if (!specMap[*it].add_def (def)) + { + in->fatalf_at(code->loc.line, "code to default rule '%s' is already defined", it->c_str()); + } + } + delete clist; +} + + +#line 224 "src/parse/parser.cc" /* yacc.c:339 */ + +# ifndef YY_NULLPTR +# if defined __cplusplus && 201103L <= __cplusplus +# define YY_NULLPTR nullptr +# else +# define YY_NULLPTR 0 +# endif +# endif + +/* Enabling verbose error messages. */ +#ifdef YYERROR_VERBOSE +# undef YYERROR_VERBOSE +# define YYERROR_VERBOSE 1 +#else +# define YYERROR_VERBOSE 0 +#endif + +/* In a future release of Bison, this section will be replaced + by #include "y.tab.h". */ +#ifndef YY_YY_SRC_PARSE_Y_TAB_H_INCLUDED +# define YY_YY_SRC_PARSE_Y_TAB_H_INCLUDED +/* Debug traces. */ +#ifndef YYDEBUG +# define YYDEBUG 0 +#endif +#if YYDEBUG +extern int yydebug; +#endif + +/* Token type. */ +#ifndef YYTOKENTYPE +# define YYTOKENTYPE + enum yytokentype + { + TOKEN_CLOSE = 258, + TOKEN_CLOSESIZE = 259, + TOKEN_CODE = 260, + TOKEN_CONF = 261, + TOKEN_ID = 262, + TOKEN_FID = 263, + TOKEN_FID_END = 264, + TOKEN_NOCOND = 265, + TOKEN_REGEXP = 266, + TOKEN_SETUP = 267, + TOKEN_STAR = 268 + }; +#endif + +/* Value type. */ +#if ! defined YYSTYPE && ! defined YYSTYPE_IS_DECLARED + +union YYSTYPE +{ +#line 161 "../src/parse/parser.ypp" /* yacc.c:355 */ + + re2c::RegExp * regexp; + const re2c::Code * code; + char op; + re2c::ExtOp extop; + std::string * str; + re2c::CondList * clist; + +#line 287 "src/parse/parser.cc" /* yacc.c:355 */ +}; + +typedef union YYSTYPE YYSTYPE; +# define YYSTYPE_IS_TRIVIAL 1 +# define YYSTYPE_IS_DECLARED 1 +#endif + + +extern YYSTYPE yylval; + +int yyparse (void); + +#endif /* !YY_YY_SRC_PARSE_Y_TAB_H_INCLUDED */ + +/* Copy the second part of user declarations. */ + +#line 304 "src/parse/parser.cc" /* yacc.c:358 */ + +#ifdef short +# undef short +#endif + +#ifdef YYTYPE_UINT8 +typedef YYTYPE_UINT8 yytype_uint8; +#else +typedef unsigned char yytype_uint8; +#endif + +#ifdef YYTYPE_INT8 +typedef YYTYPE_INT8 yytype_int8; +#else +typedef signed char yytype_int8; +#endif + +#ifdef YYTYPE_UINT16 +typedef YYTYPE_UINT16 yytype_uint16; +#else +typedef unsigned short int yytype_uint16; +#endif + +#ifdef YYTYPE_INT16 +typedef YYTYPE_INT16 yytype_int16; +#else +typedef short int yytype_int16; +#endif + +#ifndef YYSIZE_T +# ifdef __SIZE_TYPE__ +# define YYSIZE_T __SIZE_TYPE__ +# elif defined size_t +# define YYSIZE_T size_t +# elif ! defined YYSIZE_T +# include /* INFRINGES ON USER NAME SPACE */ +# define YYSIZE_T size_t +# else +# define YYSIZE_T unsigned int +# endif +#endif + +#define YYSIZE_MAXIMUM ((YYSIZE_T) -1) + +#ifndef YY_ +# if defined YYENABLE_NLS && YYENABLE_NLS +# if ENABLE_NLS +# include /* INFRINGES ON USER NAME SPACE */ +# define YY_(Msgid) dgettext ("bison-runtime", Msgid) +# endif +# endif +# ifndef YY_ +# define YY_(Msgid) Msgid +# endif +#endif + +#ifndef YY_ATTRIBUTE +# if (defined __GNUC__ \ + && (2 < __GNUC__ || (__GNUC__ == 2 && 96 <= __GNUC_MINOR__))) \ + || defined __SUNPRO_C && 0x5110 <= __SUNPRO_C +# define YY_ATTRIBUTE(Spec) __attribute__(Spec) +# else +# define YY_ATTRIBUTE(Spec) /* empty */ +# endif +#endif + +#ifndef YY_ATTRIBUTE_PURE +# define YY_ATTRIBUTE_PURE YY_ATTRIBUTE ((__pure__)) +#endif + +#ifndef YY_ATTRIBUTE_UNUSED +# define YY_ATTRIBUTE_UNUSED YY_ATTRIBUTE ((__unused__)) +#endif + +#if !defined _Noreturn \ + && (!defined __STDC_VERSION__ || __STDC_VERSION__ < 201112) +# if defined _MSC_VER && 1200 <= _MSC_VER +# define _Noreturn __declspec (noreturn) +# else +# define _Noreturn YY_ATTRIBUTE ((__noreturn__)) +# endif +#endif + +/* Suppress unused-variable warnings by "using" E. */ +#if ! defined lint || defined __GNUC__ +# define YYUSE(E) ((void) (E)) +#else +# define YYUSE(E) /* empty */ +#endif + +#if defined __GNUC__ && 407 <= __GNUC__ * 100 + __GNUC_MINOR__ +/* Suppress an incorrect diagnostic about yylval being uninitialized. */ +# define YY_IGNORE_MAYBE_UNINITIALIZED_BEGIN \ + _Pragma ("GCC diagnostic push") \ + _Pragma ("GCC diagnostic ignored \"-Wuninitialized\"")\ + _Pragma ("GCC diagnostic ignored \"-Wmaybe-uninitialized\"") +# define YY_IGNORE_MAYBE_UNINITIALIZED_END \ + _Pragma ("GCC diagnostic pop") +#else +# define YY_INITIAL_VALUE(Value) Value +#endif +#ifndef YY_IGNORE_MAYBE_UNINITIALIZED_BEGIN +# define YY_IGNORE_MAYBE_UNINITIALIZED_BEGIN +# define YY_IGNORE_MAYBE_UNINITIALIZED_END +#endif +#ifndef YY_INITIAL_VALUE +# define YY_INITIAL_VALUE(Value) /* Nothing. */ +#endif + + +#if ! defined yyoverflow || YYERROR_VERBOSE + +/* The parser invokes alloca or malloc; define the necessary symbols. */ + +# ifdef YYSTACK_USE_ALLOCA +# if YYSTACK_USE_ALLOCA +# ifdef __GNUC__ +# define YYSTACK_ALLOC __builtin_alloca +# elif defined __BUILTIN_VA_ARG_INCR +# include /* INFRINGES ON USER NAME SPACE */ +# elif defined _AIX +# define YYSTACK_ALLOC __alloca +# elif defined _MSC_VER +# include /* INFRINGES ON USER NAME SPACE */ +# define alloca _alloca +# else +# define YYSTACK_ALLOC alloca +# if ! defined _ALLOCA_H && ! defined EXIT_SUCCESS +# include /* INFRINGES ON USER NAME SPACE */ + /* Use EXIT_SUCCESS as a witness for stdlib.h. */ +# ifndef EXIT_SUCCESS +# define EXIT_SUCCESS 0 +# endif +# endif +# endif +# endif +# endif + +# ifdef YYSTACK_ALLOC + /* Pacify GCC's 'empty if-body' warning. */ +# define YYSTACK_FREE(Ptr) do { /* empty */; } while (0) +# ifndef YYSTACK_ALLOC_MAXIMUM + /* The OS might guarantee only one guard page at the bottom of the stack, + and a page size can be as small as 4096 bytes. So we cannot safely + invoke alloca (N) if N exceeds 4096. Use a slightly smaller number + to allow for a few compiler-allocated temporary stack slots. */ +# define YYSTACK_ALLOC_MAXIMUM 4032 /* reasonable circa 2006 */ +# endif +# else +# define YYSTACK_ALLOC YYMALLOC +# define YYSTACK_FREE YYFREE +# ifndef YYSTACK_ALLOC_MAXIMUM +# define YYSTACK_ALLOC_MAXIMUM YYSIZE_MAXIMUM +# endif +# if (defined __cplusplus && ! defined EXIT_SUCCESS \ + && ! ((defined YYMALLOC || defined malloc) \ + && (defined YYFREE || defined free))) +# include /* INFRINGES ON USER NAME SPACE */ +# ifndef EXIT_SUCCESS +# define EXIT_SUCCESS 0 +# endif +# endif +# ifndef YYMALLOC +# define YYMALLOC malloc +# if ! defined malloc && ! defined EXIT_SUCCESS +void *malloc (YYSIZE_T); /* INFRINGES ON USER NAME SPACE */ +# endif +# endif +# ifndef YYFREE +# define YYFREE free +# if ! defined free && ! defined EXIT_SUCCESS +void free (void *); /* INFRINGES ON USER NAME SPACE */ +# endif +# endif +# endif +#endif /* ! defined yyoverflow || YYERROR_VERBOSE */ + + +#if (! defined yyoverflow \ + && (! defined __cplusplus \ + || (defined YYSTYPE_IS_TRIVIAL && YYSTYPE_IS_TRIVIAL))) + +/* A type that is properly aligned for any stack member. */ +union yyalloc +{ + yytype_int16 yyss_alloc; + YYSTYPE yyvs_alloc; +}; + +/* The size of the maximum gap between one aligned stack and the next. */ +# define YYSTACK_GAP_MAXIMUM (sizeof (union yyalloc) - 1) + +/* The size of an array large to enough to hold all stacks, each with + N elements. */ +# define YYSTACK_BYTES(N) \ + ((N) * (sizeof (yytype_int16) + sizeof (YYSTYPE)) \ + + YYSTACK_GAP_MAXIMUM) + +# define YYCOPY_NEEDED 1 + +/* Relocate STACK from its old location to the new one. The + local variables YYSIZE and YYSTACKSIZE give the old and new number of + elements in the stack, and YYPTR gives the new location of the + stack. Advance YYPTR to a properly aligned location for the next + stack. */ +# define YYSTACK_RELOCATE(Stack_alloc, Stack) \ + do \ + { \ + YYSIZE_T yynewbytes; \ + YYCOPY (&yyptr->Stack_alloc, Stack, yysize); \ + Stack = &yyptr->Stack_alloc; \ + yynewbytes = yystacksize * sizeof (*Stack) + YYSTACK_GAP_MAXIMUM; \ + yyptr += yynewbytes / sizeof (*yyptr); \ + } \ + while (0) + +#endif + +#if defined YYCOPY_NEEDED && YYCOPY_NEEDED +/* Copy COUNT objects from SRC to DST. The source and destination do + not overlap. */ +# ifndef YYCOPY +# if defined __GNUC__ && 1 < __GNUC__ +# define YYCOPY(Dst, Src, Count) \ + __builtin_memcpy (Dst, Src, (Count) * sizeof (*(Src))) +# else +# define YYCOPY(Dst, Src, Count) \ + do \ + { \ + YYSIZE_T yyi; \ + for (yyi = 0; yyi < (Count); yyi++) \ + (Dst)[yyi] = (Src)[yyi]; \ + } \ + while (0) +# endif +# endif +#endif /* !YYCOPY_NEEDED */ + +/* YYFINAL -- State number of the termination state. */ +#define YYFINAL 2 +/* YYLAST -- Last index in YYTABLE. */ +#define YYLAST 104 + +/* YYNTOKENS -- Number of terminals. */ +#define YYNTOKENS 25 +/* YYNNTS -- Number of nonterminals. */ +#define YYNNTS 14 +/* YYNRULES -- Number of rules. */ +#define YYNRULES 49 +/* YYNSTATES -- Number of states. */ +#define YYNSTATES 92 + +/* YYTRANSLATE[YYX] -- Symbol number corresponding to YYX as returned + by yylex, with out-of-bounds checking. */ +#define YYUNDEFTOK 2 +#define YYMAXUTOK 268 + +#define YYTRANSLATE(YYX) \ + ((unsigned int) (YYX) <= YYMAXUTOK ? yytranslate[YYX] : YYUNDEFTOK) + +/* YYTRANSLATE[TOKEN-NUM] -- Symbol number corresponding to TOKEN-NUM + as returned by yylex, without out-of-bounds checking. */ +static const yytype_uint8 yytranslate[] = +{ + 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 23, 24, 2, 2, 20, 2, 2, 16, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 19, 15, + 17, 14, 18, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 22, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 21, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 1, 2, 3, 4, + 5, 6, 7, 8, 9, 10, 11, 12, 13 +}; + +#if YYDEBUG + /* YYRLINE[YYN] -- Source line where rule number YYN was defined. */ +static const yytype_uint16 yyrline[] = +{ + 0, 193, 193, 195, 199, 203, 211, 219, 223, 227, + 231, 247, 264, 268, 274, 279, 285, 289, 303, 319, + 324, 330, 345, 362, 381, 387, 395, 398, 405, 411, + 421, 424, 432, 435, 442, 446, 453, 457, 464, 468, + 475, 479, 494, 513, 517, 521, 525, 532, 542, 546 +}; +#endif + +#if YYDEBUG || YYERROR_VERBOSE || 0 +/* YYTNAME[SYMBOL-NUM] -- String name of the symbol SYMBOL-NUM. + First, the terminals, then, starting at YYNTOKENS, nonterminals. */ +static const char *const yytname[] = +{ + "$end", "error", "$undefined", "TOKEN_CLOSE", "TOKEN_CLOSESIZE", + "TOKEN_CODE", "TOKEN_CONF", "TOKEN_ID", "TOKEN_FID", "TOKEN_FID_END", + "TOKEN_NOCOND", "TOKEN_REGEXP", "TOKEN_SETUP", "TOKEN_STAR", "'='", + "';'", "'/'", "'<'", "'>'", "':'", "','", "'|'", "'\\\\'", "'('", "')'", + "$accept", "spec", "decl", "rule", "cond", "clist", "newcond", "look", + "expr", "diff", "term", "factor", "close", "primary", YY_NULLPTR +}; +#endif + +# ifdef YYPRINT +/* YYTOKNUM[NUM] -- (External) token number corresponding to the + (internal) symbol number NUM (which must be that of a token). */ +static const yytype_uint16 yytoknum[] = +{ + 0, 256, 257, 258, 259, 260, 261, 262, 263, 264, + 265, 266, 267, 268, 61, 59, 47, 60, 62, 58, + 44, 124, 92, 40, 41 +}; +# endif + +#define YYPACT_NINF -43 + +#define yypact_value_is_default(Yystate) \ + (!!((Yystate) == (-43))) + +#define YYTABLE_NINF -1 + +#define yytable_value_is_error(Yytable_value) \ + 0 + + /* YYPACT[STATE-NUM] -- Index in YYTABLE of the portion describing + STATE-NUM. */ +static const yytype_int8 yypact[] = +{ + -43, 11, -43, -43, -11, 30, 47, -43, 25, 10, + 33, 30, -43, -43, 48, 17, 30, -43, 1, 30, + -43, 4, 40, 60, 70, -43, 61, 63, 42, -43, + 64, 66, 59, 30, 30, 73, 30, -43, -43, -43, + -43, 32, -9, -43, -43, 78, -43, -43, 81, 82, + 83, 20, 44, -43, 67, 17, -43, 30, -43, -43, + -43, -43, -43, -43, -43, -43, 84, 51, 48, 86, + 54, 48, -43, 60, 87, 57, -43, 60, 88, 58, + -43, -43, 60, 89, -43, -43, 60, 90, -43, -43, + -43, -43 +}; + + /* YYDEFACT[STATE-NUM] -- Default reduction number in state STATE-NUM. + Performed when YYTABLE does not specify something else to do. Zero + means the default is an error. */ +static const yytype_uint8 yydefact[] = +{ + 2, 0, 1, 9, 47, 0, 30, 48, 26, 0, + 26, 0, 4, 3, 32, 34, 36, 38, 40, 0, + 47, 0, 0, 30, 0, 28, 0, 0, 27, 11, + 0, 0, 0, 0, 0, 0, 0, 39, 43, 42, + 44, 41, 0, 6, 8, 0, 23, 22, 0, 0, + 0, 32, 32, 49, 33, 35, 10, 37, 45, 46, + 5, 7, 31, 24, 25, 29, 0, 30, 32, 0, + 30, 32, 21, 30, 0, 30, 16, 30, 0, 30, + 20, 19, 30, 0, 15, 14, 30, 0, 18, 17, + 13, 12 +}; + + /* YYPGOTO[NTERM-NUM]. */ +static const yytype_int8 yypgoto[] = +{ + -43, -43, -43, -43, 91, -43, -23, -42, -3, 62, + 68, -15, -43, -43 +}; + + /* YYDEFGOTO[NTERM-NUM]. */ +static const yytype_int8 yydefgoto[] = +{ + -1, 1, 12, 13, 27, 28, 24, 35, 14, 15, + 16, 17, 41, 18 +}; + + /* YYTABLE[YYPACT[STATE-NUM]] -- What to do in state STATE-NUM. If + positive, shift that token. If negative, reduce the rule whose + number is the opposite. If YYTABLE_NINF, syntax error. */ +static const yytype_uint8 yytable[] = +{ + 46, 37, 21, 19, 38, 39, 60, 61, 32, 67, + 70, 2, 34, 43, 40, 29, 42, 3, 4, 5, + 44, 6, 7, 8, 9, 34, 75, 20, 10, 79, + 54, 7, 25, 66, 11, 58, 33, 20, 26, 36, + 25, 7, 37, 11, 74, 59, 30, 78, 68, 71, + 80, 20, 83, 11, 84, 7, 87, 69, 45, 88, + 33, 22, 50, 90, 33, 22, 23, 11, 22, 34, + 73, 22, 22, 77, 22, 47, 82, 86, 56, 48, + 34, 49, 51, 53, 52, 62, 63, 64, 34, 72, + 65, 76, 81, 85, 89, 91, 55, 0, 0, 0, + 0, 31, 0, 0, 57 +}; + +static const yytype_int8 yycheck[] = +{ + 23, 16, 5, 14, 3, 4, 15, 16, 11, 51, + 52, 0, 21, 9, 13, 5, 19, 6, 7, 8, + 16, 10, 11, 12, 13, 21, 68, 7, 17, 71, + 33, 11, 7, 13, 23, 3, 16, 7, 13, 22, + 7, 11, 57, 23, 67, 13, 13, 70, 51, 52, + 73, 7, 75, 23, 77, 11, 79, 13, 18, 82, + 16, 14, 20, 86, 16, 14, 19, 23, 14, 21, + 19, 14, 14, 19, 14, 5, 19, 19, 5, 18, + 21, 18, 18, 24, 18, 7, 5, 5, 21, 5, + 7, 5, 5, 5, 5, 5, 34, -1, -1, -1, + -1, 10, -1, -1, 36 +}; + + /* YYSTOS[STATE-NUM] -- The (internal number of the) accessing + symbol of state STATE-NUM. */ +static const yytype_uint8 yystos[] = +{ + 0, 26, 0, 6, 7, 8, 10, 11, 12, 13, + 17, 23, 27, 28, 33, 34, 35, 36, 38, 14, + 7, 33, 14, 19, 31, 7, 13, 29, 30, 5, + 13, 29, 33, 16, 21, 32, 22, 36, 3, 4, + 13, 37, 33, 9, 16, 18, 31, 5, 18, 18, + 20, 18, 18, 24, 33, 34, 5, 35, 3, 13, + 15, 16, 7, 5, 5, 7, 13, 32, 33, 13, + 32, 33, 5, 19, 31, 32, 5, 19, 31, 32, + 31, 5, 19, 31, 31, 5, 19, 31, 31, 5, + 31, 5 +}; + + /* YYR1[YYN] -- Symbol number of symbol that rule YYN derives. */ +static const yytype_uint8 yyr1[] = +{ + 0, 25, 26, 26, 26, 27, 27, 27, 27, 27, + 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, + 28, 28, 28, 28, 28, 28, 29, 29, 30, 30, + 31, 31, 32, 32, 33, 33, 34, 34, 35, 35, + 36, 36, 36, 37, 37, 37, 37, 38, 38, 38 +}; + + /* YYR2[YYN] -- Number of symbols on the right hand side of rule YYN. */ +static const yytype_uint8 yyr2[] = +{ + 0, 2, 0, 2, 2, 4, 3, 4, 3, 1, + 3, 2, 7, 7, 6, 6, 5, 7, 7, 6, + 6, 5, 3, 3, 4, 4, 0, 1, 1, 3, + 0, 3, 0, 2, 1, 3, 1, 3, 1, 2, + 1, 2, 2, 1, 1, 2, 2, 1, 1, 3 +}; + + +#define yyerrok (yyerrstatus = 0) +#define yyclearin (yychar = YYEMPTY) +#define YYEMPTY (-2) +#define YYEOF 0 + +#define YYACCEPT goto yyacceptlab +#define YYABORT goto yyabortlab +#define YYERROR goto yyerrorlab + + +#define YYRECOVERING() (!!yyerrstatus) + +#define YYBACKUP(Token, Value) \ +do \ + if (yychar == YYEMPTY) \ + { \ + yychar = (Token); \ + yylval = (Value); \ + YYPOPSTACK (yylen); \ + yystate = *yyssp; \ + goto yybackup; \ + } \ + else \ + { \ + yyerror (YY_("syntax error: cannot back up")); \ + YYERROR; \ + } \ +while (0) + +/* Error token number */ +#define YYTERROR 1 +#define YYERRCODE 256 + + + +/* Enable debugging if requested. */ +#if YYDEBUG + +# ifndef YYFPRINTF +# include /* INFRINGES ON USER NAME SPACE */ +# define YYFPRINTF fprintf +# endif + +# define YYDPRINTF(Args) \ +do { \ + if (yydebug) \ + YYFPRINTF Args; \ +} while (0) + +/* This macro is provided for backward compatibility. */ +#ifndef YY_LOCATION_PRINT +# define YY_LOCATION_PRINT(File, Loc) ((void) 0) +#endif + + +# define YY_SYMBOL_PRINT(Title, Type, Value, Location) \ +do { \ + if (yydebug) \ + { \ + YYFPRINTF (stderr, "%s ", Title); \ + yy_symbol_print (stderr, \ + Type, Value); \ + YYFPRINTF (stderr, "\n"); \ + } \ +} while (0) + + +/*----------------------------------------. +| Print this symbol's value on YYOUTPUT. | +`----------------------------------------*/ + +static void +yy_symbol_value_print (FILE *yyoutput, int yytype, YYSTYPE const * const yyvaluep) +{ + FILE *yyo = yyoutput; + YYUSE (yyo); + if (!yyvaluep) + return; +# ifdef YYPRINT + if (yytype < YYNTOKENS) + YYPRINT (yyoutput, yytoknum[yytype], *yyvaluep); +# endif + YYUSE (yytype); +} + + +/*--------------------------------. +| Print this symbol on YYOUTPUT. | +`--------------------------------*/ + +static void +yy_symbol_print (FILE *yyoutput, int yytype, YYSTYPE const * const yyvaluep) +{ + YYFPRINTF (yyoutput, "%s %s (", + yytype < YYNTOKENS ? "token" : "nterm", yytname[yytype]); + + yy_symbol_value_print (yyoutput, yytype, yyvaluep); + YYFPRINTF (yyoutput, ")"); +} + +/*------------------------------------------------------------------. +| yy_stack_print -- Print the state stack from its BOTTOM up to its | +| TOP (included). | +`------------------------------------------------------------------*/ + +static void +yy_stack_print (yytype_int16 *yybottom, yytype_int16 *yytop) +{ + YYFPRINTF (stderr, "Stack now"); + for (; yybottom <= yytop; yybottom++) + { + int yybot = *yybottom; + YYFPRINTF (stderr, " %d", yybot); + } + YYFPRINTF (stderr, "\n"); +} + +# define YY_STACK_PRINT(Bottom, Top) \ +do { \ + if (yydebug) \ + yy_stack_print ((Bottom), (Top)); \ +} while (0) + + +/*------------------------------------------------. +| Report that the YYRULE is going to be reduced. | +`------------------------------------------------*/ + +static void +yy_reduce_print (yytype_int16 *yyssp, YYSTYPE *yyvsp, int yyrule) +{ + unsigned long int yylno = yyrline[yyrule]; + int yynrhs = yyr2[yyrule]; + int yyi; + YYFPRINTF (stderr, "Reducing stack by rule %d (line %lu):\n", + yyrule - 1, yylno); + /* The symbols being reduced. */ + for (yyi = 0; yyi < yynrhs; yyi++) + { + YYFPRINTF (stderr, " $%d = ", yyi + 1); + yy_symbol_print (stderr, + yystos[yyssp[yyi + 1 - yynrhs]], + &(yyvsp[(yyi + 1) - (yynrhs)]) + ); + YYFPRINTF (stderr, "\n"); + } +} + +# define YY_REDUCE_PRINT(Rule) \ +do { \ + if (yydebug) \ + yy_reduce_print (yyssp, yyvsp, Rule); \ +} while (0) + +/* Nonzero means print parse trace. It is left uninitialized so that + multiple parsers can coexist. */ +int yydebug; +#else /* !YYDEBUG */ +# define YYDPRINTF(Args) +# define YY_SYMBOL_PRINT(Title, Type, Value, Location) +# define YY_STACK_PRINT(Bottom, Top) +# define YY_REDUCE_PRINT(Rule) +#endif /* !YYDEBUG */ + + +/* YYINITDEPTH -- initial size of the parser's stacks. */ +#ifndef YYINITDEPTH +# define YYINITDEPTH 200 +#endif + +/* YYMAXDEPTH -- maximum size the stacks can grow to (effective only + if the built-in stack extension method is used). + + Do not make this value too large; the results are undefined if + YYSTACK_ALLOC_MAXIMUM < YYSTACK_BYTES (YYMAXDEPTH) + evaluated with infinite-precision integer arithmetic. */ + +#ifndef YYMAXDEPTH +# define YYMAXDEPTH 10000 +#endif + + +#if YYERROR_VERBOSE + +# ifndef yystrlen +# if defined __GLIBC__ && defined _STRING_H +# define yystrlen strlen +# else +/* Return the length of YYSTR. */ +static YYSIZE_T +yystrlen (const char *yystr) +{ + YYSIZE_T yylen; + for (yylen = 0; yystr[yylen]; yylen++) + continue; + return yylen; +} +# endif +# endif + +# ifndef yystpcpy +# if defined __GLIBC__ && defined _STRING_H && defined _GNU_SOURCE +# define yystpcpy stpcpy +# else +/* Copy YYSRC to YYDEST, returning the address of the terminating '\0' in + YYDEST. */ +static char * +yystpcpy (char *yydest, const char *yysrc) +{ + char *yyd = yydest; + const char *yys = yysrc; + + while ((*yyd++ = *yys++) != '\0') + continue; + + return yyd - 1; +} +# endif +# endif + +# ifndef yytnamerr +/* Copy to YYRES the contents of YYSTR after stripping away unnecessary + quotes and backslashes, so that it's suitable for yyerror. The + heuristic is that double-quoting is unnecessary unless the string + contains an apostrophe, a comma, or backslash (other than + backslash-backslash). YYSTR is taken from yytname. If YYRES is + null, do not copy; instead, return the length of what the result + would have been. */ +static YYSIZE_T +yytnamerr (char *yyres, const char *yystr) +{ + if (*yystr == '"') + { + YYSIZE_T yyn = 0; + char const *yyp = yystr; + + for (;;) + switch (*++yyp) + { + case '\'': + case ',': + goto do_not_strip_quotes; + + case '\\': + if (*++yyp != '\\') + goto do_not_strip_quotes; + /* Fall through. */ + default: + if (yyres) + yyres[yyn] = *yyp; + yyn++; + break; + + case '"': + if (yyres) + yyres[yyn] = '\0'; + return yyn; + } + do_not_strip_quotes: ; + } + + if (! yyres) + return yystrlen (yystr); + + return yystpcpy (yyres, yystr) - yyres; +} +# endif + +/* Copy into *YYMSG, which is of size *YYMSG_ALLOC, an error message + about the unexpected token YYTOKEN for the state stack whose top is + YYSSP. + + Return 0 if *YYMSG was successfully written. Return 1 if *YYMSG is + not large enough to hold the message. In that case, also set + *YYMSG_ALLOC to the required number of bytes. Return 2 if the + required number of bytes is too large to store. */ +static int +yysyntax_error (YYSIZE_T *yymsg_alloc, char **yymsg, + yytype_int16 *yyssp, int yytoken) +{ + YYSIZE_T yysize0 = yytnamerr (YY_NULLPTR, yytname[yytoken]); + YYSIZE_T yysize = yysize0; + enum { YYERROR_VERBOSE_ARGS_MAXIMUM = 5 }; + /* Internationalized format string. */ + const char *yyformat = YY_NULLPTR; + /* Arguments of yyformat. */ + char const *yyarg[YYERROR_VERBOSE_ARGS_MAXIMUM]; + /* Number of reported tokens (one for the "unexpected", one per + "expected"). */ + int yycount = 0; + + /* There are many possibilities here to consider: + - If this state is a consistent state with a default action, then + the only way this function was invoked is if the default action + is an error action. In that case, don't check for expected + tokens because there are none. + - The only way there can be no lookahead present (in yychar) is if + this state is a consistent state with a default action. Thus, + detecting the absence of a lookahead is sufficient to determine + that there is no unexpected or expected token to report. In that + case, just report a simple "syntax error". + - Don't assume there isn't a lookahead just because this state is a + consistent state with a default action. There might have been a + previous inconsistent state, consistent state with a non-default + action, or user semantic action that manipulated yychar. + - Of course, the expected token list depends on states to have + correct lookahead information, and it depends on the parser not + to perform extra reductions after fetching a lookahead from the + scanner and before detecting a syntax error. Thus, state merging + (from LALR or IELR) and default reductions corrupt the expected + token list. However, the list is correct for canonical LR with + one exception: it will still contain any token that will not be + accepted due to an error action in a later state. + */ + if (yytoken != YYEMPTY) + { + int yyn = yypact[*yyssp]; + yyarg[yycount++] = yytname[yytoken]; + if (!yypact_value_is_default (yyn)) + { + /* Start YYX at -YYN if negative to avoid negative indexes in + YYCHECK. In other words, skip the first -YYN actions for + this state because they are default actions. */ + int yyxbegin = yyn < 0 ? -yyn : 0; + /* Stay within bounds of both yycheck and yytname. */ + int yychecklim = YYLAST - yyn + 1; + int yyxend = yychecklim < YYNTOKENS ? yychecklim : YYNTOKENS; + int yyx; + + for (yyx = yyxbegin; yyx < yyxend; ++yyx) + if (yycheck[yyx + yyn] == yyx && yyx != YYTERROR + && !yytable_value_is_error (yytable[yyx + yyn])) + { + if (yycount == YYERROR_VERBOSE_ARGS_MAXIMUM) + { + yycount = 1; + yysize = yysize0; + break; + } + yyarg[yycount++] = yytname[yyx]; + { + YYSIZE_T yysize1 = yysize + yytnamerr (YY_NULLPTR, yytname[yyx]); + if (! (yysize <= yysize1 + && yysize1 <= YYSTACK_ALLOC_MAXIMUM)) + return 2; + yysize = yysize1; + } + } + } + } + + switch (yycount) + { +# define YYCASE_(N, S) \ + case N: \ + yyformat = S; \ + break + YYCASE_(0, YY_("syntax error")); + YYCASE_(1, YY_("syntax error, unexpected %s")); + YYCASE_(2, YY_("syntax error, unexpected %s, expecting %s")); + YYCASE_(3, YY_("syntax error, unexpected %s, expecting %s or %s")); + YYCASE_(4, YY_("syntax error, unexpected %s, expecting %s or %s or %s")); + YYCASE_(5, YY_("syntax error, unexpected %s, expecting %s or %s or %s or %s")); +# undef YYCASE_ + } + + { + YYSIZE_T yysize1 = yysize + yystrlen (yyformat); + if (! (yysize <= yysize1 && yysize1 <= YYSTACK_ALLOC_MAXIMUM)) + return 2; + yysize = yysize1; + } + + if (*yymsg_alloc < yysize) + { + *yymsg_alloc = 2 * yysize; + if (! (yysize <= *yymsg_alloc + && *yymsg_alloc <= YYSTACK_ALLOC_MAXIMUM)) + *yymsg_alloc = YYSTACK_ALLOC_MAXIMUM; + return 1; + } + + /* Avoid sprintf, as that infringes on the user's name space. + Don't have undefined behavior even if the translation + produced a string with the wrong number of "%s"s. */ + { + char *yyp = *yymsg; + int yyi = 0; + while ((*yyp = *yyformat) != '\0') + if (*yyp == '%' && yyformat[1] == 's' && yyi < yycount) + { + yyp += yytnamerr (yyp, yyarg[yyi++]); + yyformat += 2; + } + else + { + yyp++; + yyformat++; + } + } + return 0; +} +#endif /* YYERROR_VERBOSE */ + +/*-----------------------------------------------. +| Release the memory associated to this symbol. | +`-----------------------------------------------*/ + +static void +yydestruct (const char *yymsg, int yytype, YYSTYPE *yyvaluep) +{ + YYUSE (yyvaluep); + if (!yymsg) + yymsg = "Deleting"; + YY_SYMBOL_PRINT (yymsg, yytype, yyvaluep, yylocationp); + + YY_IGNORE_MAYBE_UNINITIALIZED_BEGIN + YYUSE (yytype); + YY_IGNORE_MAYBE_UNINITIALIZED_END +} + + + + +/* The lookahead symbol. */ +int yychar; + +/* The semantic value of the lookahead symbol. */ +YYSTYPE yylval; +/* Number of syntax errors so far. */ +int yynerrs; + + +/*----------. +| yyparse. | +`----------*/ + +int +yyparse (void) +{ + int yystate; + /* Number of tokens to shift before error messages enabled. */ + int yyerrstatus; + + /* The stacks and their tools: + 'yyss': related to states. + 'yyvs': related to semantic values. + + Refer to the stacks through separate pointers, to allow yyoverflow + to reallocate them elsewhere. */ + + /* The state stack. */ + yytype_int16 yyssa[YYINITDEPTH]; + yytype_int16 *yyss; + yytype_int16 *yyssp; + + /* The semantic value stack. */ + YYSTYPE yyvsa[YYINITDEPTH]; + YYSTYPE *yyvs; + YYSTYPE *yyvsp; + + YYSIZE_T yystacksize; + + int yyn; + int yyresult; + /* Lookahead token as an internal (translated) token number. */ + int yytoken = 0; + /* The variables used to return semantic value and location from the + action routines. */ + YYSTYPE yyval; + +#if YYERROR_VERBOSE + /* Buffer for error messages, and its allocated size. */ + char yymsgbuf[128]; + char *yymsg = yymsgbuf; + YYSIZE_T yymsg_alloc = sizeof yymsgbuf; +#endif + +#define YYPOPSTACK(N) (yyvsp -= (N), yyssp -= (N)) + + /* The number of symbols on the RHS of the reduced rule. + Keep to zero when no symbol should be popped. */ + int yylen = 0; + + yyssp = yyss = yyssa; + yyvsp = yyvs = yyvsa; + yystacksize = YYINITDEPTH; + + YYDPRINTF ((stderr, "Starting parse\n")); + + yystate = 0; + yyerrstatus = 0; + yynerrs = 0; + yychar = YYEMPTY; /* Cause a token to be read. */ + goto yysetstate; + +/*------------------------------------------------------------. +| yynewstate -- Push a new state, which is found in yystate. | +`------------------------------------------------------------*/ + yynewstate: + /* In all cases, when you get here, the value and location stacks + have just been pushed. So pushing a state here evens the stacks. */ + yyssp++; + + yysetstate: + *yyssp = yystate; + + if (yyss + yystacksize - 1 <= yyssp) + { + /* Get the current used size of the three stacks, in elements. */ + YYSIZE_T yysize = yyssp - yyss + 1; + +#ifdef yyoverflow + { + /* Give user a chance to reallocate the stack. Use copies of + these so that the &'s don't force the real ones into + memory. */ + YYSTYPE *yyvs1 = yyvs; + yytype_int16 *yyss1 = yyss; + + /* Each stack pointer address is followed by the size of the + data in use in that stack, in bytes. This used to be a + conditional around just the two extra args, but that might + be undefined if yyoverflow is a macro. */ + yyoverflow (YY_("memory exhausted"), + &yyss1, yysize * sizeof (*yyssp), + &yyvs1, yysize * sizeof (*yyvsp), + &yystacksize); + + yyss = yyss1; + yyvs = yyvs1; + } +#else /* no yyoverflow */ +# ifndef YYSTACK_RELOCATE + goto yyexhaustedlab; +# else + /* Extend the stack our own way. */ + if (YYMAXDEPTH <= yystacksize) + goto yyexhaustedlab; + yystacksize *= 2; + if (YYMAXDEPTH < yystacksize) + yystacksize = YYMAXDEPTH; + + { + yytype_int16 *yyss1 = yyss; + union yyalloc *yyptr = + (union yyalloc *) YYSTACK_ALLOC (YYSTACK_BYTES (yystacksize)); + if (! yyptr) + goto yyexhaustedlab; + YYSTACK_RELOCATE (yyss_alloc, yyss); + YYSTACK_RELOCATE (yyvs_alloc, yyvs); +# undef YYSTACK_RELOCATE + if (yyss1 != yyssa) + YYSTACK_FREE (yyss1); + } +# endif +#endif /* no yyoverflow */ + + yyssp = yyss + yysize - 1; + yyvsp = yyvs + yysize - 1; + + YYDPRINTF ((stderr, "Stack size increased to %lu\n", + (unsigned long int) yystacksize)); + + if (yyss + yystacksize - 1 <= yyssp) + YYABORT; + } + + YYDPRINTF ((stderr, "Entering state %d\n", yystate)); + + if (yystate == YYFINAL) + YYACCEPT; + + goto yybackup; + +/*-----------. +| yybackup. | +`-----------*/ +yybackup: + + /* Do appropriate processing given the current state. Read a + lookahead token if we need one and don't already have one. */ + + /* First try to decide what to do without reference to lookahead token. */ + yyn = yypact[yystate]; + if (yypact_value_is_default (yyn)) + goto yydefault; + + /* Not known => get a lookahead token if don't already have one. */ + + /* YYCHAR is either YYEMPTY or YYEOF or a valid lookahead symbol. */ + if (yychar == YYEMPTY) + { + YYDPRINTF ((stderr, "Reading a token: ")); + yychar = yylex (); + } + + if (yychar <= YYEOF) + { + yychar = yytoken = YYEOF; + YYDPRINTF ((stderr, "Now at end of input.\n")); + } + else + { + yytoken = YYTRANSLATE (yychar); + YY_SYMBOL_PRINT ("Next token is", yytoken, &yylval, &yylloc); + } + + /* If the proper action on seeing token YYTOKEN is to reduce or to + detect an error, take that action. */ + yyn += yytoken; + if (yyn < 0 || YYLAST < yyn || yycheck[yyn] != yytoken) + goto yydefault; + yyn = yytable[yyn]; + if (yyn <= 0) + { + if (yytable_value_is_error (yyn)) + goto yyerrlab; + yyn = -yyn; + goto yyreduce; + } + + /* Count tokens shifted since error; after three, turn off error + status. */ + if (yyerrstatus) + yyerrstatus--; + + /* Shift the lookahead token. */ + YY_SYMBOL_PRINT ("Shifting", yytoken, &yylval, &yylloc); + + /* Discard the shifted token. */ + yychar = YYEMPTY; + + yystate = yyn; + YY_IGNORE_MAYBE_UNINITIALIZED_BEGIN + *++yyvsp = yylval; + YY_IGNORE_MAYBE_UNINITIALIZED_END + + goto yynewstate; + + +/*-----------------------------------------------------------. +| yydefault -- do the default action for the current state. | +`-----------------------------------------------------------*/ +yydefault: + yyn = yydefact[yystate]; + if (yyn == 0) + goto yyerrlab; + goto yyreduce; + + +/*-----------------------------. +| yyreduce -- Do a reduction. | +`-----------------------------*/ +yyreduce: + /* yyn is the number of a rule to reduce with. */ + yylen = yyr2[yyn]; + + /* If YYLEN is nonzero, implement the default value of the action: + '$$ = $1'. + + Otherwise, the following line sets YYVAL to garbage. + This behavior is undocumented and Bison + users should not rely upon it. Assigning to YYVAL + unconditionally makes the parser a bit smaller, and it avoids a + GCC warning that YYVAL may be used uninitialized. */ + yyval = yyvsp[1-yylen]; + + + YY_REDUCE_PRINT (yyn); + switch (yyn) + { + case 2: +#line 193 "../src/parse/parser.ypp" /* yacc.c:1646 */ + { + } +#line 1438 "src/parse/parser.cc" /* yacc.c:1646 */ + break; + + case 3: +#line 196 "../src/parse/parser.ypp" /* yacc.c:1646 */ + { + foundRules = true; + } +#line 1446 "src/parse/parser.cc" /* yacc.c:1646 */ + break; + + case 5: +#line 204 "../src/parse/parser.ypp" /* yacc.c:1646 */ + { + if (!symbol_table.insert (std::make_pair (* (yyvsp[-3].str), (yyvsp[-1].regexp))).second) + { + in->fatal("sym already defined"); + } + delete (yyvsp[-3].str); + } +#line 1458 "src/parse/parser.cc" /* yacc.c:1646 */ + break; + + case 6: +#line 212 "../src/parse/parser.ypp" /* yacc.c:1646 */ + { + if (!symbol_table.insert (std::make_pair (* (yyvsp[-2].str), (yyvsp[-1].regexp))).second) + { + in->fatal("sym already defined"); + } + delete (yyvsp[-2].str); + } +#line 1470 "src/parse/parser.cc" /* yacc.c:1646 */ + break; + + case 7: +#line 220 "../src/parse/parser.ypp" /* yacc.c:1646 */ + { + in->fatal("trailing contexts are not allowed in named definitions"); + } +#line 1478 "src/parse/parser.cc" /* yacc.c:1646 */ + break; + + case 8: +#line 224 "../src/parse/parser.ypp" /* yacc.c:1646 */ + { + in->fatal("trailing contexts are not allowed in named definitions"); + } +#line 1486 "src/parse/parser.cc" /* yacc.c:1646 */ + break; + + case 9: +#line 227 "../src/parse/parser.ypp" /* yacc.c:1646 */ + {} +#line 1492 "src/parse/parser.cc" /* yacc.c:1646 */ + break; + + case 10: +#line 232 "../src/parse/parser.ypp" /* yacc.c:1646 */ + { + if (opts->cFlag) + { + in->fatal("condition or '<*>' required when using -c switch"); + } + RuleOp * rule = new RuleOp + ( (yyvsp[0].code)->loc + , (yyvsp[-2].regexp) + , (yyvsp[-1].regexp) + , rank_counter.next () + , (yyvsp[0].code) + , NULL + ); + spec.add (rule); + } +#line 1512 "src/parse/parser.cc" /* yacc.c:1646 */ + break; + + case 11: +#line 248 "../src/parse/parser.ypp" /* yacc.c:1646 */ + { + if (opts->cFlag) + in->fatal("condition or '<*>' required when using -c switch"); + RuleOp * def = new RuleOp + ( (yyvsp[0].code)->loc + , in->mkDefault () + , new NullOp + , rule_rank_t::def () + , (yyvsp[0].code) + , NULL + ); + if (!spec.add_def (def)) + { + in->fatal("code to default rule is already defined"); + } + } +#line 1533 "src/parse/parser.cc" /* yacc.c:1646 */ + break; + + case 12: +#line 265 "../src/parse/parser.ypp" /* yacc.c:1646 */ + { + context_rule ((yyvsp[-5].clist), (yyvsp[0].code)->loc, (yyvsp[-3].regexp), (yyvsp[-2].regexp), (yyvsp[0].code), (yyvsp[-1].str)); + } +#line 1541 "src/parse/parser.cc" /* yacc.c:1646 */ + break; + + case 13: +#line 269 "../src/parse/parser.ypp" /* yacc.c:1646 */ + { + assert((yyvsp[0].str)); + Loc loc (in->get_fname (), in->get_cline ()); + context_rule ((yyvsp[-5].clist), loc, (yyvsp[-3].regexp), (yyvsp[-2].regexp), NULL, (yyvsp[0].str)); + } +#line 1551 "src/parse/parser.cc" /* yacc.c:1646 */ + break; + + case 14: +#line 275 "../src/parse/parser.ypp" /* yacc.c:1646 */ + { + context_none((yyvsp[-4].clist)); + delete (yyvsp[-1].str); + } +#line 1560 "src/parse/parser.cc" /* yacc.c:1646 */ + break; + + case 15: +#line 280 "../src/parse/parser.ypp" /* yacc.c:1646 */ + { + assert((yyvsp[0].str)); + context_none((yyvsp[-4].clist)); + delete (yyvsp[0].str); + } +#line 1570 "src/parse/parser.cc" /* yacc.c:1646 */ + break; + + case 16: +#line 286 "../src/parse/parser.ypp" /* yacc.c:1646 */ + { + default_rule((yyvsp[-3].clist), (yyvsp[0].code)); + } +#line 1578 "src/parse/parser.cc" /* yacc.c:1646 */ + break; + + case 17: +#line 290 "../src/parse/parser.ypp" /* yacc.c:1646 */ + { + context_check(NULL); + RuleOp * rule = new RuleOp + ( (yyvsp[0].code)->loc + , (yyvsp[-3].regexp) + , (yyvsp[-2].regexp) + , rank_counter.next () + , (yyvsp[0].code) + , (yyvsp[-1].str) + ); + specStar.push_back (rule); + delete (yyvsp[-1].str); + } +#line 1596 "src/parse/parser.cc" /* yacc.c:1646 */ + break; + + case 18: +#line 304 "../src/parse/parser.ypp" /* yacc.c:1646 */ + { + assert((yyvsp[0].str)); + context_check(NULL); + Loc loc (in->get_fname (), in->get_cline ()); + RuleOp * rule = new RuleOp + ( loc + , (yyvsp[-3].regexp) + , (yyvsp[-2].regexp) + , rank_counter.next () + , NULL + , (yyvsp[0].str) + ); + specStar.push_back (rule); + delete (yyvsp[0].str); + } +#line 1616 "src/parse/parser.cc" /* yacc.c:1646 */ + break; + + case 19: +#line 320 "../src/parse/parser.ypp" /* yacc.c:1646 */ + { + context_none(NULL); + delete (yyvsp[-1].str); + } +#line 1625 "src/parse/parser.cc" /* yacc.c:1646 */ + break; + + case 20: +#line 325 "../src/parse/parser.ypp" /* yacc.c:1646 */ + { + assert((yyvsp[0].str)); + context_none(NULL); + delete (yyvsp[0].str); + } +#line 1635 "src/parse/parser.cc" /* yacc.c:1646 */ + break; + + case 21: +#line 331 "../src/parse/parser.ypp" /* yacc.c:1646 */ + { + if (star_default) + { + in->fatal ("code to default rule '*' is already defined"); + } + star_default = new RuleOp + ( (yyvsp[0].code)->loc + , in->mkDefault () + , new NullOp + , rule_rank_t::def () + , (yyvsp[0].code) + , NULL + ); + } +#line 1654 "src/parse/parser.cc" /* yacc.c:1646 */ + break; + + case 22: +#line 346 "../src/parse/parser.ypp" /* yacc.c:1646 */ + { + context_check(NULL); + if (specNone) + { + in->fatal("code to handle illegal condition already defined"); + } + (yyval.regexp) = specNone = new RuleOp + ( (yyvsp[0].code)->loc + , new NullOp + , new NullOp + , rank_counter.next () + , (yyvsp[0].code) + , (yyvsp[-1].str) + ); + delete (yyvsp[-1].str); + } +#line 1675 "src/parse/parser.cc" /* yacc.c:1646 */ + break; + + case 23: +#line 363 "../src/parse/parser.ypp" /* yacc.c:1646 */ + { + assert((yyvsp[0].str)); + context_check(NULL); + if (specNone) + { + in->fatal("code to handle illegal condition already defined"); + } + Loc loc (in->get_fname (), in->get_cline ()); + (yyval.regexp) = specNone = new RuleOp + ( loc + , new NullOp + , new NullOp + , rank_counter.next () + , NULL + , (yyvsp[0].str) + ); + delete (yyvsp[0].str); + } +#line 1698 "src/parse/parser.cc" /* yacc.c:1646 */ + break; + + case 24: +#line 382 "../src/parse/parser.ypp" /* yacc.c:1646 */ + { + CondList *clist = new CondList(); + clist->insert("*"); + setup_rule(clist, (yyvsp[0].code)); + } +#line 1708 "src/parse/parser.cc" /* yacc.c:1646 */ + break; + + case 25: +#line 388 "../src/parse/parser.ypp" /* yacc.c:1646 */ + { + setup_rule((yyvsp[-2].clist), (yyvsp[0].code)); + } +#line 1716 "src/parse/parser.cc" /* yacc.c:1646 */ + break; + + case 26: +#line 395 "../src/parse/parser.ypp" /* yacc.c:1646 */ + { + in->fatal("unnamed condition not supported"); + } +#line 1724 "src/parse/parser.cc" /* yacc.c:1646 */ + break; + + case 27: +#line 399 "../src/parse/parser.ypp" /* yacc.c:1646 */ + { + (yyval.clist) = (yyvsp[0].clist); + } +#line 1732 "src/parse/parser.cc" /* yacc.c:1646 */ + break; + + case 28: +#line 406 "../src/parse/parser.ypp" /* yacc.c:1646 */ + { + (yyval.clist) = new CondList(); + (yyval.clist)->insert(* (yyvsp[0].str)); + delete (yyvsp[0].str); + } +#line 1742 "src/parse/parser.cc" /* yacc.c:1646 */ + break; + + case 29: +#line 412 "../src/parse/parser.ypp" /* yacc.c:1646 */ + { + (yyvsp[-2].clist)->insert(* (yyvsp[0].str)); + delete (yyvsp[0].str); + (yyval.clist) = (yyvsp[-2].clist); + } +#line 1752 "src/parse/parser.cc" /* yacc.c:1646 */ + break; + + case 30: +#line 421 "../src/parse/parser.ypp" /* yacc.c:1646 */ + { + (yyval.str) = NULL; + } +#line 1760 "src/parse/parser.cc" /* yacc.c:1646 */ + break; + + case 31: +#line 425 "../src/parse/parser.ypp" /* yacc.c:1646 */ + { + (yyval.str) = (yyvsp[0].str); + } +#line 1768 "src/parse/parser.cc" /* yacc.c:1646 */ + break; + + case 32: +#line 432 "../src/parse/parser.ypp" /* yacc.c:1646 */ + { + (yyval.regexp) = new NullOp; + } +#line 1776 "src/parse/parser.cc" /* yacc.c:1646 */ + break; + + case 33: +#line 436 "../src/parse/parser.ypp" /* yacc.c:1646 */ + { + (yyval.regexp) = (yyvsp[0].regexp); + } +#line 1784 "src/parse/parser.cc" /* yacc.c:1646 */ + break; + + case 34: +#line 443 "../src/parse/parser.ypp" /* yacc.c:1646 */ + { + (yyval.regexp) = (yyvsp[0].regexp); + } +#line 1792 "src/parse/parser.cc" /* yacc.c:1646 */ + break; + + case 35: +#line 447 "../src/parse/parser.ypp" /* yacc.c:1646 */ + { + (yyval.regexp) = mkAlt((yyvsp[-2].regexp), (yyvsp[0].regexp)); + } +#line 1800 "src/parse/parser.cc" /* yacc.c:1646 */ + break; + + case 36: +#line 454 "../src/parse/parser.ypp" /* yacc.c:1646 */ + { + (yyval.regexp) = (yyvsp[0].regexp); + } +#line 1808 "src/parse/parser.cc" /* yacc.c:1646 */ + break; + + case 37: +#line 458 "../src/parse/parser.ypp" /* yacc.c:1646 */ + { + (yyval.regexp) = in->mkDiff((yyvsp[-2].regexp), (yyvsp[0].regexp)); + } +#line 1816 "src/parse/parser.cc" /* yacc.c:1646 */ + break; + + case 38: +#line 465 "../src/parse/parser.ypp" /* yacc.c:1646 */ + { + (yyval.regexp) = (yyvsp[0].regexp); + } +#line 1824 "src/parse/parser.cc" /* yacc.c:1646 */ + break; + + case 39: +#line 469 "../src/parse/parser.ypp" /* yacc.c:1646 */ + { + (yyval.regexp) = new CatOp((yyvsp[-1].regexp), (yyvsp[0].regexp)); + } +#line 1832 "src/parse/parser.cc" /* yacc.c:1646 */ + break; + + case 40: +#line 476 "../src/parse/parser.ypp" /* yacc.c:1646 */ + { + (yyval.regexp) = (yyvsp[0].regexp); + } +#line 1840 "src/parse/parser.cc" /* yacc.c:1646 */ + break; + + case 41: +#line 480 "../src/parse/parser.ypp" /* yacc.c:1646 */ + { + switch((yyvsp[0].op)) + { + case '*': + (yyval.regexp) = new CloseOp((yyvsp[-1].regexp)); + break; + case '+': + (yyval.regexp) = new CatOp (new CloseOp((yyvsp[-1].regexp)), (yyvsp[-1].regexp)); + break; + case '?': + (yyval.regexp) = mkAlt((yyvsp[-1].regexp), new NullOp()); + break; + } + } +#line 1859 "src/parse/parser.cc" /* yacc.c:1646 */ + break; + + case 42: +#line 495 "../src/parse/parser.ypp" /* yacc.c:1646 */ + { + if ((yyvsp[0].extop).max == std::numeric_limits::max()) + { + (yyval.regexp) = repeat_from ((yyvsp[-1].regexp), (yyvsp[0].extop).min); + } + else if ((yyvsp[0].extop).min == (yyvsp[0].extop).max) + { + (yyval.regexp) = repeat ((yyvsp[-1].regexp), (yyvsp[0].extop).min); + } + else + { + (yyval.regexp) = repeat_from_to ((yyvsp[-1].regexp), (yyvsp[0].extop).min, (yyvsp[0].extop).max); + } + (yyval.regexp) = (yyval.regexp) ? (yyval.regexp) : new NullOp; + } +#line 1879 "src/parse/parser.cc" /* yacc.c:1646 */ + break; + + case 43: +#line 514 "../src/parse/parser.ypp" /* yacc.c:1646 */ + { + (yyval.op) = (yyvsp[0].op); + } +#line 1887 "src/parse/parser.cc" /* yacc.c:1646 */ + break; + + case 44: +#line 518 "../src/parse/parser.ypp" /* yacc.c:1646 */ + { + (yyval.op) = (yyvsp[0].op); + } +#line 1895 "src/parse/parser.cc" /* yacc.c:1646 */ + break; + + case 45: +#line 522 "../src/parse/parser.ypp" /* yacc.c:1646 */ + { + (yyval.op) = ((yyvsp[-1].op) == (yyvsp[0].op)) ? (yyvsp[-1].op) : '*'; + } +#line 1903 "src/parse/parser.cc" /* yacc.c:1646 */ + break; + + case 46: +#line 526 "../src/parse/parser.ypp" /* yacc.c:1646 */ + { + (yyval.op) = ((yyvsp[-1].op) == (yyvsp[0].op)) ? (yyvsp[-1].op) : '*'; + } +#line 1911 "src/parse/parser.cc" /* yacc.c:1646 */ + break; + + case 47: +#line 533 "../src/parse/parser.ypp" /* yacc.c:1646 */ + { + symbol_table_t::iterator i = symbol_table.find (* (yyvsp[0].str)); + delete (yyvsp[0].str); + if (i == symbol_table.end ()) + { + in->fatal("can't find symbol"); + } + (yyval.regexp) = i->second; + } +#line 1925 "src/parse/parser.cc" /* yacc.c:1646 */ + break; + + case 48: +#line 543 "../src/parse/parser.ypp" /* yacc.c:1646 */ + { + (yyval.regexp) = (yyvsp[0].regexp); + } +#line 1933 "src/parse/parser.cc" /* yacc.c:1646 */ + break; + + case 49: +#line 547 "../src/parse/parser.ypp" /* yacc.c:1646 */ + { + (yyval.regexp) = (yyvsp[-1].regexp); + } +#line 1941 "src/parse/parser.cc" /* yacc.c:1646 */ + break; + + +#line 1945 "src/parse/parser.cc" /* yacc.c:1646 */ + default: break; + } + /* User semantic actions sometimes alter yychar, and that requires + that yytoken be updated with the new translation. We take the + approach of translating immediately before every use of yytoken. + One alternative is translating here after every semantic action, + but that translation would be missed if the semantic action invokes + YYABORT, YYACCEPT, or YYERROR immediately after altering yychar or + if it invokes YYBACKUP. In the case of YYABORT or YYACCEPT, an + incorrect destructor might then be invoked immediately. In the + case of YYERROR or YYBACKUP, subsequent parser actions might lead + to an incorrect destructor call or verbose syntax error message + before the lookahead is translated. */ + YY_SYMBOL_PRINT ("-> $$ =", yyr1[yyn], &yyval, &yyloc); + + YYPOPSTACK (yylen); + yylen = 0; + YY_STACK_PRINT (yyss, yyssp); + + *++yyvsp = yyval; + + /* Now 'shift' the result of the reduction. Determine what state + that goes to, based on the state we popped back to and the rule + number reduced by. */ + + yyn = yyr1[yyn]; + + yystate = yypgoto[yyn - YYNTOKENS] + *yyssp; + if (0 <= yystate && yystate <= YYLAST && yycheck[yystate] == *yyssp) + yystate = yytable[yystate]; + else + yystate = yydefgoto[yyn - YYNTOKENS]; + + goto yynewstate; + + +/*--------------------------------------. +| yyerrlab -- here on detecting error. | +`--------------------------------------*/ +yyerrlab: + /* Make sure we have latest lookahead translation. See comments at + user semantic actions for why this is necessary. */ + yytoken = yychar == YYEMPTY ? YYEMPTY : YYTRANSLATE (yychar); + + /* If not already recovering from an error, report this error. */ + if (!yyerrstatus) + { + ++yynerrs; +#if ! YYERROR_VERBOSE + yyerror (YY_("syntax error")); +#else +# define YYSYNTAX_ERROR yysyntax_error (&yymsg_alloc, &yymsg, \ + yyssp, yytoken) + { + char const *yymsgp = YY_("syntax error"); + int yysyntax_error_status; + yysyntax_error_status = YYSYNTAX_ERROR; + if (yysyntax_error_status == 0) + yymsgp = yymsg; + else if (yysyntax_error_status == 1) + { + if (yymsg != yymsgbuf) + YYSTACK_FREE (yymsg); + yymsg = (char *) YYSTACK_ALLOC (yymsg_alloc); + if (!yymsg) + { + yymsg = yymsgbuf; + yymsg_alloc = sizeof yymsgbuf; + yysyntax_error_status = 2; + } + else + { + yysyntax_error_status = YYSYNTAX_ERROR; + yymsgp = yymsg; + } + } + yyerror (yymsgp); + if (yysyntax_error_status == 2) + goto yyexhaustedlab; + } +# undef YYSYNTAX_ERROR +#endif + } + + + + if (yyerrstatus == 3) + { + /* If just tried and failed to reuse lookahead token after an + error, discard it. */ + + if (yychar <= YYEOF) + { + /* Return failure if at end of input. */ + if (yychar == YYEOF) + YYABORT; + } + else + { + yydestruct ("Error: discarding", + yytoken, &yylval); + yychar = YYEMPTY; + } + } + + /* Else will try to reuse lookahead token after shifting the error + token. */ + goto yyerrlab1; + + +/*---------------------------------------------------. +| yyerrorlab -- error raised explicitly by YYERROR. | +`---------------------------------------------------*/ +yyerrorlab: + + /* Pacify compilers like GCC when the user code never invokes + YYERROR and the label yyerrorlab therefore never appears in user + code. */ + if (/*CONSTCOND*/ 0) + goto yyerrorlab; + + /* Do not reclaim the symbols of the rule whose action triggered + this YYERROR. */ + YYPOPSTACK (yylen); + yylen = 0; + YY_STACK_PRINT (yyss, yyssp); + yystate = *yyssp; + goto yyerrlab1; + + +/*-------------------------------------------------------------. +| yyerrlab1 -- common code for both syntax error and YYERROR. | +`-------------------------------------------------------------*/ +yyerrlab1: + yyerrstatus = 3; /* Each real token shifted decrements this. */ + + for (;;) + { + yyn = yypact[yystate]; + if (!yypact_value_is_default (yyn)) + { + yyn += YYTERROR; + if (0 <= yyn && yyn <= YYLAST && yycheck[yyn] == YYTERROR) + { + yyn = yytable[yyn]; + if (0 < yyn) + break; + } + } + + /* Pop the current state because it cannot handle the error token. */ + if (yyssp == yyss) + YYABORT; + + + yydestruct ("Error: popping", + yystos[yystate], yyvsp); + YYPOPSTACK (1); + yystate = *yyssp; + YY_STACK_PRINT (yyss, yyssp); + } + + YY_IGNORE_MAYBE_UNINITIALIZED_BEGIN + *++yyvsp = yylval; + YY_IGNORE_MAYBE_UNINITIALIZED_END + + + /* Shift the error token. */ + YY_SYMBOL_PRINT ("Shifting", yystos[yyn], yyvsp, yylsp); + + yystate = yyn; + goto yynewstate; + + +/*-------------------------------------. +| yyacceptlab -- YYACCEPT comes here. | +`-------------------------------------*/ +yyacceptlab: + yyresult = 0; + goto yyreturn; + +/*-----------------------------------. +| yyabortlab -- YYABORT comes here. | +`-----------------------------------*/ +yyabortlab: + yyresult = 1; + goto yyreturn; + +#if !defined yyoverflow || YYERROR_VERBOSE +/*-------------------------------------------------. +| yyexhaustedlab -- memory exhaustion comes here. | +`-------------------------------------------------*/ +yyexhaustedlab: + yyerror (YY_("memory exhausted")); + yyresult = 2; + /* Fall through. */ +#endif + +yyreturn: + if (yychar != YYEMPTY) + { + /* Make sure we have latest lookahead translation. See comments at + user semantic actions for why this is necessary. */ + yytoken = YYTRANSLATE (yychar); + yydestruct ("Cleanup: discarding lookahead", + yytoken, &yylval); + } + /* Do not reclaim the symbols of the rule whose action triggered + this YYABORT or YYACCEPT. */ + YYPOPSTACK (yylen); + YY_STACK_PRINT (yyss, yyssp); + while (yyssp != yyss) + { + yydestruct ("Cleanup: popping", + yystos[*yyssp], yyvsp); + YYPOPSTACK (1); + } +#ifndef yyoverflow + if (yyss != yyssa) + YYSTACK_FREE (yyss); +#endif +#if YYERROR_VERBOSE + if (yymsg != yymsgbuf) + YYSTACK_FREE (yymsg); +#endif + return yyresult; +} +#line 552 "../src/parse/parser.ypp" /* yacc.c:1906 */ + + +extern "C" { +void yyerror(const char* s) +{ + in->fatal(s); +} + +int yylex(){ + return in ? in->scan() : 0; +} +} // end extern "C" + +namespace re2c +{ + +void parse(Scanner& i, Output & o) +{ + std::map > dfa_map; + ScannerState rules_state; + + in = &i; + + o.source.wversion_time () + .wline_info (in->get_cline (), in->get_fname ().c_str ()); + if (opts->target == opt_t::SKELETON) + { + Skeleton::emit_prolog (o.source); + } + + Enc encodingOld = opts->encoding; + + while ((parseMode = i.echo()) != Scanner::Stop) + { + o.source.new_block (); + bool bPrologBrace = false; + ScannerState curr_state; + + i.save_state(curr_state); + foundRules = false; + + if (opts->rFlag && parseMode == Scanner::Rules && dfa_map.size()) + { + in->fatal("cannot have a second 'rules:re2c' block"); + } + if (parseMode == Scanner::Reuse) + { + if (dfa_map.empty()) + { + in->fatal("got 'use:re2c' without 'rules:re2c'"); + } + } + else if (parseMode == Scanner::Rules) + { + i.save_state(rules_state); + } + else + { + dfa_map.clear(); + } + rank_counter.reset (); + spec.clear (); + in->set_in_parse(true); + yyparse(); + in->set_in_parse(false); + if (opts->rFlag && parseMode == Scanner::Reuse) + { + if (foundRules || opts->encoding != encodingOld) + { + // Re-parse rules + parseMode = Scanner::Parse; + i.restore_state(rules_state); + i.reuse(); + dfa_map.clear(); + parse_cleanup(); + spec.clear (); + rank_counter.reset (); + in->set_in_parse(true); + yyparse(); + in->set_in_parse(false); + + // Now append potential new rules + i.restore_state(curr_state); + parseMode = Scanner::Parse; + in->set_in_parse(true); + yyparse(); + in->set_in_parse(false); + } + encodingOld = opts->encoding; + } + o.source.set_block_line (in->get_cline ()); + uint32_t ind = opts->topIndent; + if (opts->cFlag) + { + SpecMap::iterator it; + SetupMap::const_iterator itRuleSetup; + + if (parseMode != Scanner::Reuse) + { + // <*> rules must have the lowest priority + // now that all rules have been parsed, we can fix it + for (RuleOpList::const_iterator itOp = specStar.begin(); itOp != specStar.end(); ++itOp) + { + (*itOp)->rank = rank_counter.next (); + } + // merge <*> rules to all conditions + for (it = specMap.begin(); it != specMap.end(); ++it) + { + for (RuleOpList::const_iterator itOp = specStar.begin(); itOp != specStar.end(); ++itOp) + { + it->second.add (*itOp); + } + if (star_default) + { + it->second.add_def (star_default); + } + } + + if (specNone) + { + specMap["0"].add (specNone); + // Note that "0" inserts first, which is important. + condnames.insert (condnames.begin (), "0"); + } + o.types = condnames; + } + + size_t nCount = specMap.size(); + + for (it = specMap.begin(); it != specMap.end(); ++it) + { + if (parseMode != Scanner::Reuse) + { + itRuleSetup = ruleSetupMap.find(it->first); + if (itRuleSetup != ruleSetupMap.end()) + { + yySetupRule = itRuleSetup->second.second; + } + else + { + itRuleSetup = ruleSetupMap.find("*"); + if (itRuleSetup != ruleSetupMap.end()) + { + yySetupRule = itRuleSetup->second.second; + } + else + { + yySetupRule = ""; + } + } + + dfa_map[it->first] = compile(it->second, o, it->first, opts->encoding.nCodeUnits ()); + } + if (parseMode != Scanner::Rules && dfa_map.find(it->first) != dfa_map.end()) + { + dfa_map[it->first]->emit(o, ind, !--nCount, bPrologBrace); + } + } + } + else + { + if (spec.re || !dfa_map.empty()) + { + if (parseMode != Scanner::Reuse) + { + dfa_map[""] = compile(spec, o, "", opts->encoding.nCodeUnits ()); + } + if (parseMode != Scanner::Rules && dfa_map.find("") != dfa_map.end()) + { + dfa_map[""]->emit(o, ind, 0, bPrologBrace); + } + } + } + o.source.wline_info (in->get_cline (), in->get_fname ().c_str ()); + /* restore original char handling mode*/ + opts.reset_encoding (encodingOld); + } + + if (opts->cFlag) + { + SetupMap::const_iterator itRuleSetup; + for (itRuleSetup = ruleSetupMap.begin(); itRuleSetup != ruleSetupMap.end(); ++itRuleSetup) + { + if (itRuleSetup->first != "*" && specMap.find(itRuleSetup->first) == specMap.end()) + { + in->fatalf_at(itRuleSetup->second.first, "setup for non existing rule '%s' found", itRuleSetup->first.c_str()); + } + } + if (specMap.size() < ruleSetupMap.size()) + { + uint32_t line = in->get_cline(); + itRuleSetup = ruleSetupMap.find("*"); + if (itRuleSetup != ruleSetupMap.end()) + { + line = itRuleSetup->second.first; + } + in->fatalf_at(line, "setup for all rules with '*' not possible when all rules are setup explicitly"); + } + } + + if (opts->target == opt_t::SKELETON) + { + Skeleton::emit_epilog (o.source, o.skeletons); + } + + parse_cleanup(); + in = NULL; +} + +void parse_cleanup() +{ + RegExp::vFreeList.clear(); + Range::vFreeList.clear(); + RangeSuffix::freeList.clear(); + Code::freelist.clear(); + symbol_table.clear (); + condnames.clear (); + specMap.clear(); + specStar.clear(); + star_default = NULL; + specNone = NULL; +} + +} // end namespace re2c diff --git a/tools/re2c/src/parse/parser.h b/tools/re2c/src/parse/parser.h new file mode 100644 index 000000000..8901d7924 --- /dev/null +++ b/tools/re2c/src/parse/parser.h @@ -0,0 +1,28 @@ +#ifndef _RE2C_PARSE_PARSER_ +#define _RE2C_PARSE_PARSER_ + +#include +#include + +#include "src/codegen/output.h" +#include "src/ir/regexp/regexp.h" +#include "src/ir/regexp/regexp_rule.h" +#include "src/parse/scanner.h" +#include "src/parse/spec.h" + +namespace re2c +{ + +extern void parse(Scanner &, Output &); +extern void parse_cleanup(); + +typedef std::set CondList; +typedef std::list RuleOpList; +typedef std::map SpecMap; +typedef std::map > SetupMap; +typedef std::map DefaultMap; +typedef std::map symbol_table_t; + +} // namespace re2c + +#endif // _RE2C_PARSE_PARSER_ diff --git a/tools/re2c/src/parse/parser.ypp b/tools/re2c/src/parse/parser.ypp new file mode 100644 index 000000000..76540ecc0 --- /dev/null +++ b/tools/re2c/src/parse/parser.ypp @@ -0,0 +1,775 @@ +%{ + +#include "src/util/c99_stdint.h" +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "src/codegen/output.h" +#include "src/conf/opt.h" +#include "src/globals.h" +#include "src/ir/compile.h" +#include "src/ir/adfa/adfa.h" +#include "src/ir/regexp/encoding/enc.h" +#include "src/ir/regexp/encoding/range_suffix.h" +#include "src/ir/regexp/regexp.h" +#include "src/ir/regexp/regexp_cat.h" +#include "src/ir/regexp/regexp_close.h" +#include "src/ir/regexp/regexp_null.h" +#include "src/ir/regexp/regexp_rule.h" +#include "src/ir/rule_rank.h" +#include "src/ir/skeleton/skeleton.h" +#include "src/parse/code.h" +#include "src/parse/extop.h" +#include "src/parse/loc.h" +#include "src/parse/parser.h" +#include "src/parse/scanner.h" +#include "src/parse/spec.h" +#include "src/util/counter.h" +#include "src/util/free_list.h" +#include "src/util/range.h" +#include "src/util/smart_ptr.h" + +#define YYMALLOC malloc +#define YYFREE free + +using namespace re2c; + +extern "C" +{ +int yylex(); +void yyerror(const char*); +} + +static counter_t rank_counter; +static std::vector condnames; +static re2c::SpecMap specMap; +static Spec spec; +static RuleOp *specNone = NULL; +static RuleOpList specStar; +static RuleOp * star_default = NULL; +static Scanner *in = NULL; +static Scanner::ParseMode parseMode; +static SetupMap ruleSetupMap; +static bool foundRules; +static symbol_table_t symbol_table; + +/* Bison version 1.875 emits a definition that is not working + * with several g++ version. Hence we disable it here. + */ +#if defined(__GNUC__) +#define __attribute__(x) +#endif + +void context_check(CondList *clist) +{ + if (!opts->cFlag) + { + delete clist; + in->fatal("conditions are only allowed when using -c switch"); + } +} + +void context_none(CondList *clist) +{ + delete clist; + context_check(NULL); + in->fatal("no expression specified"); +} + +void context_rule + ( CondList * clist + , const Loc & loc + , RegExp * expr + , RegExp * look + , const Code * code + , const std::string * newcond + ) +{ + context_check(clist); + for(CondList::const_iterator it = clist->begin(); it != clist->end(); ++it) + { + if (specMap.find(*it) == specMap.end()) + { + condnames.push_back (*it); + } + + RuleOp * rule = new RuleOp + ( loc + , expr + , look + , rank_counter.next () + , code + , newcond + ); + specMap[*it].add (rule); + } + delete clist; + delete newcond; +} + +void setup_rule(CondList *clist, const Code * code) +{ + assert(clist); + assert(code); + context_check(clist); + for(CondList::const_iterator it = clist->begin(); it != clist->end(); ++it) + { + if (ruleSetupMap.find(*it) != ruleSetupMap.end()) + { + in->fatalf_at(code->loc.line, "code to setup rule '%s' is already defined", it->c_str()); + } + ruleSetupMap[*it] = std::make_pair(code->loc.line, code->text); + } + delete clist; +} + +void default_rule(CondList *clist, const Code * code) +{ + assert(clist); + assert(code); + context_check(clist); + for(CondList::const_iterator it = clist->begin(); it != clist->end(); ++it) + { + RuleOp * def = new RuleOp + ( code->loc + , in->mkDefault () + , new NullOp + , rule_rank_t::def () + , code + , NULL + ); + if (!specMap[*it].add_def (def)) + { + in->fatalf_at(code->loc.line, "code to default rule '%s' is already defined", it->c_str()); + } + } + delete clist; +} + +%} + +%start spec + +%union { + re2c::RegExp * regexp; + const re2c::Code * code; + char op; + re2c::ExtOp extop; + std::string * str; + re2c::CondList * clist; +}; + +%token TOKEN_CLOSE +%token TOKEN_CLOSESIZE +%token TOKEN_CODE +%token TOKEN_CONF +%token TOKEN_ID +%token TOKEN_FID +%token TOKEN_FID_END +%token TOKEN_NOCOND +%token TOKEN_REGEXP +%token TOKEN_SETUP +%token TOKEN_STAR + +%type TOKEN_CLOSE TOKEN_STAR TOKEN_SETUP close +%type TOKEN_CLOSESIZE +%type TOKEN_CODE +%type TOKEN_REGEXP rule look expr diff term factor primary +%type TOKEN_ID TOKEN_FID newcond +%type cond clist + +%% + +spec: + /* empty */ + { + } + | spec rule + { + foundRules = true; + } + | spec decl +; + +decl: + TOKEN_ID '=' expr ';' + { + if (!symbol_table.insert (std::make_pair (* $1, $3)).second) + { + in->fatal("sym already defined"); + } + delete $1; + } + | TOKEN_FID expr TOKEN_FID_END + { + if (!symbol_table.insert (std::make_pair (* $1, $2)).second) + { + in->fatal("sym already defined"); + } + delete $1; + } + | TOKEN_ID '=' expr '/' + { + in->fatal("trailing contexts are not allowed in named definitions"); + } + | TOKEN_FID expr '/' + { + in->fatal("trailing contexts are not allowed in named definitions"); + } + | TOKEN_CONF {} +; + +rule: + expr look TOKEN_CODE + { + if (opts->cFlag) + { + in->fatal("condition or '<*>' required when using -c switch"); + } + RuleOp * rule = new RuleOp + ( $3->loc + , $1 + , $2 + , rank_counter.next () + , $3 + , NULL + ); + spec.add (rule); + } + | TOKEN_STAR TOKEN_CODE /* default rule */ + { + if (opts->cFlag) + in->fatal("condition or '<*>' required when using -c switch"); + RuleOp * def = new RuleOp + ( $2->loc + , in->mkDefault () + , new NullOp + , rule_rank_t::def () + , $2 + , NULL + ); + if (!spec.add_def (def)) + { + in->fatal("code to default rule is already defined"); + } + } + | '<' cond '>' expr look newcond TOKEN_CODE + { + context_rule ($2, $7->loc, $4, $5, $7, $6); + } + | '<' cond '>' expr look ':' newcond + { + assert($7); + Loc loc (in->get_fname (), in->get_cline ()); + context_rule ($2, loc, $4, $5, NULL, $7); + } + | '<' cond '>' look newcond TOKEN_CODE + { + context_none($2); + delete $5; + } + | '<' cond '>' look ':' newcond + { + assert($6); + context_none($2); + delete $6; + } + | '<' cond '>' TOKEN_STAR TOKEN_CODE /* default rule for conditions */ + { + default_rule($2, $5); + } + | '<' TOKEN_STAR '>' expr look newcond TOKEN_CODE + { + context_check(NULL); + RuleOp * rule = new RuleOp + ( $7->loc + , $4 + , $5 + , rank_counter.next () + , $7 + , $6 + ); + specStar.push_back (rule); + delete $6; + } + | '<' TOKEN_STAR '>' expr look ':' newcond + { + assert($7); + context_check(NULL); + Loc loc (in->get_fname (), in->get_cline ()); + RuleOp * rule = new RuleOp + ( loc + , $4 + , $5 + , rank_counter.next () + , NULL + , $7 + ); + specStar.push_back (rule); + delete $7; + } + | '<' TOKEN_STAR '>' look newcond TOKEN_CODE + { + context_none(NULL); + delete $5; + } + | '<' TOKEN_STAR '>' look ':' newcond + { + assert($6); + context_none(NULL); + delete $6; + } + | '<' TOKEN_STAR '>' TOKEN_STAR TOKEN_CODE /* default rule for all conditions */ + { + if (star_default) + { + in->fatal ("code to default rule '*' is already defined"); + } + star_default = new RuleOp + ( $5->loc + , in->mkDefault () + , new NullOp + , rule_rank_t::def () + , $5 + , NULL + ); + } + | TOKEN_NOCOND newcond TOKEN_CODE + { + context_check(NULL); + if (specNone) + { + in->fatal("code to handle illegal condition already defined"); + } + $$ = specNone = new RuleOp + ( $3->loc + , new NullOp + , new NullOp + , rank_counter.next () + , $3 + , $2 + ); + delete $2; + } + | TOKEN_NOCOND ':' newcond + { + assert($3); + context_check(NULL); + if (specNone) + { + in->fatal("code to handle illegal condition already defined"); + } + Loc loc (in->get_fname (), in->get_cline ()); + $$ = specNone = new RuleOp + ( loc + , new NullOp + , new NullOp + , rank_counter.next () + , NULL + , $3 + ); + delete $3; + } + | TOKEN_SETUP TOKEN_STAR '>' TOKEN_CODE + { + CondList *clist = new CondList(); + clist->insert("*"); + setup_rule(clist, $4); + } + | TOKEN_SETUP cond '>' TOKEN_CODE + { + setup_rule($2, $4); + } +; + +cond: + /* empty */ + { + in->fatal("unnamed condition not supported"); + } + | clist + { + $$ = $1; + } + ; + +clist: + TOKEN_ID + { + $$ = new CondList(); + $$->insert(* $1); + delete $1; + } + | clist ',' TOKEN_ID + { + $1->insert(* $3); + delete $3; + $$ = $1; + } +; + +newcond: + /* empty */ + { + $$ = NULL; + } + | '=' '>' TOKEN_ID + { + $$ = $3; + } +; + +look: + /* empty */ + { + $$ = new NullOp; + } + | '/' expr + { + $$ = $2; + } +; + +expr: + diff + { + $$ = $1; + } + | expr '|' diff + { + $$ = mkAlt($1, $3); + } +; + +diff: + term + { + $$ = $1; + } + | diff '\\' term + { + $$ = in->mkDiff($1, $3); + } +; + +term: + factor + { + $$ = $1; + } + | term factor + { + $$ = new CatOp($1, $2); + } +; + +factor: + primary + { + $$ = $1; + } + | primary close + { + switch($2) + { + case '*': + $$ = new CloseOp($1); + break; + case '+': + $$ = new CatOp (new CloseOp($1), $1); + break; + case '?': + $$ = mkAlt($1, new NullOp()); + break; + } + } + | primary TOKEN_CLOSESIZE + { + if ($2.max == std::numeric_limits::max()) + { + $$ = repeat_from ($1, $2.min); + } + else if ($2.min == $2.max) + { + $$ = repeat ($1, $2.min); + } + else + { + $$ = repeat_from_to ($1, $2.min, $2.max); + } + $$ = $$ ? $$ : new NullOp; + } +; + +close: + TOKEN_CLOSE + { + $$ = $1; + } + | TOKEN_STAR + { + $$ = $1; + } + | close TOKEN_CLOSE + { + $$ = ($1 == $2) ? $1 : '*'; + } + | close TOKEN_STAR + { + $$ = ($1 == $2) ? $1 : '*'; + } +; + +primary: + TOKEN_ID + { + symbol_table_t::iterator i = symbol_table.find (* $1); + delete $1; + if (i == symbol_table.end ()) + { + in->fatal("can't find symbol"); + } + $$ = i->second; + } + | TOKEN_REGEXP + { + $$ = $1; + } + | '(' expr ')' + { + $$ = $2; + } +; + +%% + +extern "C" { +void yyerror(const char* s) +{ + in->fatal(s); +} + +int yylex(){ + return in ? in->scan() : 0; +} +} // end extern "C" + +namespace re2c +{ + +void parse(Scanner& i, Output & o) +{ + std::map > dfa_map; + ScannerState rules_state; + + in = &i; + + o.source.wversion_time () + .wline_info (in->get_cline (), in->get_fname ().c_str ()); + if (opts->target == opt_t::SKELETON) + { + Skeleton::emit_prolog (o.source); + } + + Enc encodingOld = opts->encoding; + + while ((parseMode = i.echo()) != Scanner::Stop) + { + o.source.new_block (); + bool bPrologBrace = false; + ScannerState curr_state; + + i.save_state(curr_state); + foundRules = false; + + if (opts->rFlag && parseMode == Scanner::Rules && dfa_map.size()) + { + in->fatal("cannot have a second 'rules:re2c' block"); + } + if (parseMode == Scanner::Reuse) + { + if (dfa_map.empty()) + { + in->fatal("got 'use:re2c' without 'rules:re2c'"); + } + } + else if (parseMode == Scanner::Rules) + { + i.save_state(rules_state); + } + else + { + dfa_map.clear(); + } + rank_counter.reset (); + spec.clear (); + in->set_in_parse(true); + yyparse(); + in->set_in_parse(false); + if (opts->rFlag && parseMode == Scanner::Reuse) + { + if (foundRules || opts->encoding != encodingOld) + { + // Re-parse rules + parseMode = Scanner::Parse; + i.restore_state(rules_state); + i.reuse(); + dfa_map.clear(); + parse_cleanup(); + spec.clear (); + rank_counter.reset (); + in->set_in_parse(true); + yyparse(); + in->set_in_parse(false); + + // Now append potential new rules + i.restore_state(curr_state); + parseMode = Scanner::Parse; + in->set_in_parse(true); + yyparse(); + in->set_in_parse(false); + } + encodingOld = opts->encoding; + } + o.source.set_block_line (in->get_cline ()); + uint32_t ind = opts->topIndent; + if (opts->cFlag) + { + SpecMap::iterator it; + SetupMap::const_iterator itRuleSetup; + + if (parseMode != Scanner::Reuse) + { + // <*> rules must have the lowest priority + // now that all rules have been parsed, we can fix it + for (RuleOpList::const_iterator itOp = specStar.begin(); itOp != specStar.end(); ++itOp) + { + (*itOp)->rank = rank_counter.next (); + } + // merge <*> rules to all conditions + for (it = specMap.begin(); it != specMap.end(); ++it) + { + for (RuleOpList::const_iterator itOp = specStar.begin(); itOp != specStar.end(); ++itOp) + { + it->second.add (*itOp); + } + if (star_default) + { + it->second.add_def (star_default); + } + } + + if (specNone) + { + specMap["0"].add (specNone); + // Note that "0" inserts first, which is important. + condnames.insert (condnames.begin (), "0"); + } + o.types = condnames; + } + + size_t nCount = specMap.size(); + + for (it = specMap.begin(); it != specMap.end(); ++it) + { + if (parseMode != Scanner::Reuse) + { + itRuleSetup = ruleSetupMap.find(it->first); + if (itRuleSetup != ruleSetupMap.end()) + { + yySetupRule = itRuleSetup->second.second; + } + else + { + itRuleSetup = ruleSetupMap.find("*"); + if (itRuleSetup != ruleSetupMap.end()) + { + yySetupRule = itRuleSetup->second.second; + } + else + { + yySetupRule = ""; + } + } + + dfa_map[it->first] = compile(it->second, o, it->first, opts->encoding.nCodeUnits ()); + } + if (parseMode != Scanner::Rules && dfa_map.find(it->first) != dfa_map.end()) + { + dfa_map[it->first]->emit(o, ind, !--nCount, bPrologBrace); + } + } + } + else + { + if (spec.re || !dfa_map.empty()) + { + if (parseMode != Scanner::Reuse) + { + dfa_map[""] = compile(spec, o, "", opts->encoding.nCodeUnits ()); + } + if (parseMode != Scanner::Rules && dfa_map.find("") != dfa_map.end()) + { + dfa_map[""]->emit(o, ind, 0, bPrologBrace); + } + } + } + o.source.wline_info (in->get_cline (), in->get_fname ().c_str ()); + /* restore original char handling mode*/ + opts.reset_encoding (encodingOld); + } + + if (opts->cFlag) + { + SetupMap::const_iterator itRuleSetup; + for (itRuleSetup = ruleSetupMap.begin(); itRuleSetup != ruleSetupMap.end(); ++itRuleSetup) + { + if (itRuleSetup->first != "*" && specMap.find(itRuleSetup->first) == specMap.end()) + { + in->fatalf_at(itRuleSetup->second.first, "setup for non existing rule '%s' found", itRuleSetup->first.c_str()); + } + } + if (specMap.size() < ruleSetupMap.size()) + { + uint32_t line = in->get_cline(); + itRuleSetup = ruleSetupMap.find("*"); + if (itRuleSetup != ruleSetupMap.end()) + { + line = itRuleSetup->second.first; + } + in->fatalf_at(line, "setup for all rules with '*' not possible when all rules are setup explicitly"); + } + } + + if (opts->target == opt_t::SKELETON) + { + Skeleton::emit_epilog (o.source, o.skeletons); + } + + parse_cleanup(); + in = NULL; +} + +void parse_cleanup() +{ + RegExp::vFreeList.clear(); + Range::vFreeList.clear(); + RangeSuffix::freeList.clear(); + Code::freelist.clear(); + symbol_table.clear (); + condnames.clear (); + specMap.clear(); + specStar.clear(); + star_default = NULL; + specNone = NULL; +} + +} // end namespace re2c diff --git a/tools/re2c/src/parse/rules.h b/tools/re2c/src/parse/rules.h new file mode 100644 index 000000000..7815372e1 --- /dev/null +++ b/tools/re2c/src/parse/rules.h @@ -0,0 +1,29 @@ +#ifndef _RE2C_PARSE_RULES_ +#define _RE2C_PARSE_RULES_ + +#include +#include + +#include "src/ir/rule_rank.h" + +namespace re2c +{ + +struct rule_info_t +{ + uint32_t line; + std::set shadow; + bool reachable; + + rule_info_t () + : line (0) + , shadow () + , reachable (false) + {} +}; + +typedef std::map rules_t; + +} // namespace re2c + +#endif // _RE2C_PARSE_RULES_ diff --git a/tools/re2c/src/parse/scanner.cc b/tools/re2c/src/parse/scanner.cc new file mode 100644 index 000000000..09dd2bda4 --- /dev/null +++ b/tools/re2c/src/parse/scanner.cc @@ -0,0 +1,211 @@ +#include +#include +#include +#include +#include +#include + +#include "src/codegen/label.h" +#include "src/codegen/output.h" +#include "src/conf/opt.h" +#include "src/globals.h" +#include "src/parse/scanner.h" +#include "src/util/counter.h" + +// used by Scanner::fatal_at and Scanner::fatalf +#if defined(_MSC_VER) && !defined(vsnprintf) +# define vsnprintf _vsnprintf +#endif + +namespace re2c { + +const uint32_t Scanner::BSIZE = 8192; + +ScannerState::ScannerState () + : tok (NULL) + , ptr (NULL) + , cur (NULL) + , pos (NULL) + , ctx (NULL) + , bot (NULL) + , lim (NULL) + , top (NULL) + , eof (NULL) + , tchar (0) + , tline (0) + , cline (1) + , in_parse (false) + , lexer_state (LEX_NORMAL) +{} + +ScannerState::ScannerState (const ScannerState & s) + : tok (s.tok) + , ptr (s.ptr) + , cur (s.cur) + , pos (s.pos) + , ctx (s.ctx) + , bot (s.bot) + , lim (s.lim) + , top (s.top) + , eof (s.eof) + , tchar (s.tchar) + , tline (s.tline) + , cline (s.cline) + , in_parse (s.in_parse) + , lexer_state (s.lexer_state) +{} + +ScannerState & ScannerState::operator = (const ScannerState & s) +{ + this->~ScannerState (); + new (this) ScannerState (s); + return * this; +} + +Scanner::Scanner (Input & i, OutputFile & o) + : ScannerState () + , in (i) + , out (o) +{} + +void Scanner::fill (uint32_t need) +{ + if(!eof) + { + /* Do not get rid of anything when rFlag is active. Otherwise + * get rid of everything that was already handedout. */ + if (!opts->rFlag) + { + const ptrdiff_t diff = tok - bot; + if (diff > 0) + { + const size_t move = static_cast (top - tok); + memmove (bot, tok, move); + tok -= diff; + ptr -= diff; + cur -= diff; + pos -= diff; + lim -= diff; + ctx -= diff; + } + } + /* In crease buffer size. */ + if (BSIZE > need) + { + need = BSIZE; + } + if (static_cast (top - lim) < need) + { + const size_t copy = static_cast (lim - bot); + char * buf = new char[copy + need]; + if (!buf) + { + fatal("Out of memory"); + } + memcpy (buf, bot, copy); + tok = &buf[tok - bot]; + ptr = &buf[ptr - bot]; + cur = &buf[cur - bot]; + pos = &buf[pos - bot]; + lim = &buf[lim - bot]; + top = &lim[need]; + ctx = &buf[ctx - bot]; + delete [] bot; + bot = buf; + } + /* Append to buffer. */ + const size_t have = fread (lim, 1, need, in.file); + if (have != need) + { + eof = &lim[have]; + *eof++ = '\0'; + } + lim += have; + } +} + +void Scanner::set_in_parse(bool new_in_parse) +{ + in_parse = new_in_parse; +} + +void Scanner::fatal_at(uint32_t line, ptrdiff_t ofs, const char *msg) const +{ + std::cerr << "re2c: error: " + << "line " << line << ", column " << (tchar + ofs + 1) << ": " + << msg << std::endl; + exit(1); +} + +void Scanner::fatal(ptrdiff_t ofs, const char *msg) const +{ + fatal_at(in_parse ? tline : cline, ofs, msg); +} + +void Scanner::fatalf_at(uint32_t line, const char* fmt, ...) const +{ + char szBuf[4096]; + + va_list args; + + va_start(args, fmt); + vsnprintf(szBuf, sizeof(szBuf), fmt, args); + va_end(args); + + szBuf[sizeof(szBuf)-1] = '0'; + + fatal_at(line, 0, szBuf); +} + +void Scanner::fatalf(const char *fmt, ...) const +{ + char szBuf[4096]; + + va_list args; + + va_start(args, fmt); + vsnprintf(szBuf, sizeof(szBuf), fmt, args); + va_end(args); + + szBuf[sizeof(szBuf)-1] = '0'; + + fatal(szBuf); +} + +Scanner::~Scanner() +{ + delete [] bot; +} + +void Scanner::reuse() +{ + out.label_counter.reset (); + last_fill_index = 0; + bWroteGetState = false; + bWroteCondCheck = false; + opts.reset_mapCodeName (); +} + +void Scanner::restore_state(const ScannerState& state) +{ + ptrdiff_t diff = bot - state.bot; + char *old_bot = bot; + char *old_lim = lim; + char *old_top = top; + char *old_eof = eof; + *(ScannerState*)this = state; + if (diff) + { + tok -= diff; + ptr -= diff; + cur -= diff; + pos -= diff; + ctx -= diff; + bot = old_bot; + lim = old_lim; + top = old_top; + eof = old_eof; + } +} + +} // namespace re2c diff --git a/tools/re2c/src/parse/scanner.h b/tools/re2c/src/parse/scanner.h new file mode 100644 index 000000000..d13eeaa1d --- /dev/null +++ b/tools/re2c/src/parse/scanner.h @@ -0,0 +1,147 @@ +#ifndef _RE2C_PARSE_SCANNER_ +#define _RE2C_PARSE_SCANNER_ + +#include "src/util/c99_stdint.h" +#include +#include + +#include "src/parse/input.h" +#include "src/util/attribute.h" +#include "src/util/forbid_copy.h" + +namespace re2c +{ + +class Range; +class RegExp; +struct OutputFile; + +struct ScannerState +{ + enum lexer_state_t + { + LEX_NORMAL, + LEX_FLEX_NAME + }; + + // positioning + char * tok; + char * ptr; + char * cur; + char * pos; + char * ctx; + + // buffer + char * bot; + char * lim; + char * top; + char * eof; + + ptrdiff_t tchar; + uint32_t tline; + uint32_t cline; + + bool in_parse; + lexer_state_t lexer_state; + + ScannerState (); + ScannerState (const ScannerState &); + ScannerState & operator = (const ScannerState &); +}; + +class Scanner: private ScannerState +{ + static const uint32_t BSIZE; + + Input & in; +public: + OutputFile & out; + +private: + void fill (uint32_t); + void set_sourceline (); + uint32_t lex_cls_chr(); + uint32_t lex_str_chr(char quote, bool &end); + RegExp *lex_cls(bool neg); + RegExp *lex_str(char quote, bool casing); + RegExp *schr(uint32_t c) const; + RegExp *ichr(uint32_t c) const; + RegExp *cls(Range *r) const; + + void lex_conf (); + void lex_conf_assign (); + void lex_conf_semicolon (); + int32_t lex_conf_number (); + std::string lex_conf_string (); + + size_t tok_len () const; + +public: + Scanner(Input &, OutputFile &); + ~Scanner(); + + enum ParseMode { + Stop, + Parse, + Reuse, + Rules + }; + + ParseMode echo(); + int scan(); + void reuse(); + + void save_state(ScannerState&) const; + void restore_state(const ScannerState&); + + uint32_t get_cline() const; + uint32_t get_line() const; + const std::string & get_fname () const; + void set_in_parse(bool new_in_parse); + void fatal_at(uint32_t line, ptrdiff_t ofs, const char *msg) const; + void fatalf_at(uint32_t line, const char*, ...) const RE2C_GXX_ATTRIBUTE ((format (printf, 3, 4))); + void fatalf(const char*, ...) const RE2C_GXX_ATTRIBUTE ((format (printf, 2, 3))); + void fatal(const char*) const; + void fatal(ptrdiff_t, const char*) const; + + RegExp * mkDiff (RegExp * e1, RegExp * e2) const; + RegExp * mkDot () const; + RegExp * mkDefault () const; + + FORBID_COPY (Scanner); +}; + +inline size_t Scanner::tok_len () const +{ + // lexing and fill procedures must maintain: token pointer <= cursor pointer + return static_cast (cur - tok); +} + +inline const std::string & Scanner::get_fname () const +{ + return in.file_name; +} + +inline uint32_t Scanner::get_cline() const +{ + return cline; +} + +inline uint32_t Scanner::get_line() const +{ + return in_parse ? tline : cline; +} + +inline void Scanner::save_state(ScannerState& state) const +{ + state = *this; +} + +inline void Scanner::fatal(const char *msg) const +{ + fatal(0, msg); +} + +} // end namespace re2c + +#endif // _RE2C_PARSE_SCANNER_ diff --git a/tools/re2c/src/parse/spec.h b/tools/re2c/src/parse/spec.h new file mode 100644 index 000000000..0b68af74f --- /dev/null +++ b/tools/re2c/src/parse/spec.h @@ -0,0 +1,55 @@ +#ifndef _RE2C_PARSE_SPEC_ +#define _RE2C_PARSE_SPEC_ + +#include "src/ir/regexp/regexp_rule.h" +#include "src/parse/rules.h" + +namespace re2c +{ + +struct Spec +{ + RegExp * re; + rules_t rules; + + Spec () + : re (NULL) + , rules () + {} + Spec (const Spec & spec) + : re (spec.re) + , rules (spec.rules) + {} + Spec & operator = (const Spec & spec) + { + re = spec.re; + rules = spec.rules; + return *this; + } + bool add_def (RuleOp * r) + { + if (rules.find (rule_rank_t::def ()) != rules.end ()) + { + return false; + } + else + { + add (r); + return true; + } + } + void add (RuleOp * r) + { + rules[r->rank].line = r->loc.line; + re = mkAlt (re, r); + } + void clear () + { + re = NULL; + rules.clear (); + } +}; + +} // namespace re2c + +#endif // _RE2C_PARSE_SPEC_ diff --git a/tools/re2c/src/parse/unescape.cc b/tools/re2c/src/parse/unescape.cc new file mode 100644 index 000000000..0c17139fb --- /dev/null +++ b/tools/re2c/src/parse/unescape.cc @@ -0,0 +1,60 @@ +#include "src/parse/unescape.h" + +namespace re2c { + +// expected characters: [0-9a-zA-Z] +static inline uint32_t hex_digit (const char c) +{ + switch (c) + { + case '0': return 0; + case '1': return 1; + case '2': return 2; + case '3': return 3; + case '4': return 4; + case '5': return 5; + case '6': return 6; + case '7': return 7; + case '8': return 8; + case '9': return 9; + case 'a': + case 'A': return 0xA; + case 'b': + case 'B': return 0xB; + case 'c': + case 'C': return 0xC; + case 'd': + case 'D': return 0xD; + case 'e': + case 'E': return 0xE; + case 'f': + case 'F': return 0xF; + default: return ~0u; // unexpected + } +} + +// expected string format: "\" [xXuU] [0-9a-zA-Z]* +uint32_t unesc_hex (const char * s, const char * s_end) +{ + uint32_t n = 0; + for (s += 2; s != s_end; ++s) + { + n <<= 4; + n += hex_digit (*s); + } + return n; +} + +// expected string format: "\" [0-7]* +uint32_t unesc_oct (const char * s, const char * s_end) +{ + uint32_t n = 0; + for (++s; s != s_end; ++s) + { + n <<= 3; + n += static_cast (*s - '0'); + } + return n; +} + +} // namespace re2c diff --git a/tools/re2c/src/parse/unescape.h b/tools/re2c/src/parse/unescape.h new file mode 100644 index 000000000..000c378d7 --- /dev/null +++ b/tools/re2c/src/parse/unescape.h @@ -0,0 +1,13 @@ +#ifndef _RE2C_PARSE_UNESCAPE_ +#define _RE2C_PARSE_UNESCAPE_ + +#include "src/util/c99_stdint.h" + +namespace re2c { + +uint32_t unesc_hex (const char * s, const char * s_end); +uint32_t unesc_oct (const char * s, const char * s_end); + +} // namespace re2c + +#endif // _RE2C_PARSE_UNESCAPE_ diff --git a/tools/re2c/src/parse/y.tab.h b/tools/re2c/src/parse/y.tab.h new file mode 100644 index 000000000..52bd8d1b9 --- /dev/null +++ b/tools/re2c/src/parse/y.tab.h @@ -0,0 +1,89 @@ +/* A Bison parser, made by GNU Bison 3.0.4. */ + +/* Bison interface for Yacc-like parsers in C + + Copyright (C) 1984, 1989-1990, 2000-2015 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ + +/* As a special exception, you may create a larger work that contains + part or all of the Bison parser skeleton and distribute that work + under terms of your choice, so long as that work isn't itself a + parser generator using the skeleton or a modified version thereof + as a parser skeleton. Alternatively, if you modify or redistribute + the parser skeleton itself, you may (at your option) remove this + special exception, which will cause the skeleton and the resulting + Bison output files to be licensed under the GNU General Public + License without this special exception. + + This special exception was added by the Free Software Foundation in + version 2.2 of Bison. */ + +#ifndef YY_YY_SRC_PARSE_Y_TAB_H_INCLUDED +# define YY_YY_SRC_PARSE_Y_TAB_H_INCLUDED +/* Debug traces. */ +#ifndef YYDEBUG +# define YYDEBUG 0 +#endif +#if YYDEBUG +extern int yydebug; +#endif + +/* Token type. */ +#ifndef YYTOKENTYPE +# define YYTOKENTYPE + enum yytokentype + { + TOKEN_CLOSE = 258, + TOKEN_CLOSESIZE = 259, + TOKEN_CODE = 260, + TOKEN_CONF = 261, + TOKEN_ID = 262, + TOKEN_FID = 263, + TOKEN_FID_END = 264, + TOKEN_NOCOND = 265, + TOKEN_REGEXP = 266, + TOKEN_SETUP = 267, + TOKEN_STAR = 268 + }; +#endif + +/* Value type. */ +#if ! defined YYSTYPE && ! defined YYSTYPE_IS_DECLARED + +union YYSTYPE +{ +#line 161 "../src/parse/parser.ypp" /* yacc.c:1909 */ + + re2c::RegExp * regexp; + const re2c::Code * code; + char op; + re2c::ExtOp extop; + std::string * str; + re2c::CondList * clist; + +#line 77 "src/parse/y.tab.h" /* yacc.c:1909 */ +}; + +typedef union YYSTYPE YYSTYPE; +# define YYSTYPE_IS_TRIVIAL 1 +# define YYSTYPE_IS_DECLARED 1 +#endif + + +extern YYSTYPE yylval; + +int yyparse (void); + +#endif /* !YY_YY_SRC_PARSE_Y_TAB_H_INCLUDED */ diff --git a/tools/re2c/src/test/range/test-impl.h b/tools/re2c/src/test/range/test-impl.h new file mode 100644 index 000000000..3f1d78403 --- /dev/null +++ b/tools/re2c/src/test/range/test-impl.h @@ -0,0 +1,50 @@ +#ifndef _RE2C_TEST_RANGE_TEST_IMPL_ +#define _RE2C_TEST_RANGE_TEST_IMPL_ + +#include "src/test/range/test.h" +#include "src/util/range.h" +#include "src/util/static_assert.h" + +namespace re2c_test { + +static inline bool bit_set (uint32_t n, uint32_t bit) +{ + return n & (1u << bit); +} + +template +re2c::Range * range (uint32_t n) +{ + RE2C_STATIC_ASSERT (BITS <= 31); + + re2c::Range * r = NULL; + re2c::Range ** p = &r; + for (uint32_t i = 0; i < BITS; ++i) + { + for (; i < BITS && !bit_set (n, i); ++i); + if (i == BITS && !bit_set (n, BITS - 1)) + { + break; + } + const uint32_t lb = i; + for (; i < BITS && bit_set (n, i); ++i); + re2c::Range::append (p, lb, i); + } + return r; +} + +template +re2c::Range * add (uint32_t n1, uint32_t n2) +{ + return range (n1 | n2); +} + +template +re2c::Range * sub (uint32_t n1, uint32_t n2) +{ + return range (n1 & ~n2); +} + +} // namespace re2c_test + +#endif // _RE2C_TEST_RANGE_TEST_IMPL_ diff --git a/tools/re2c/src/test/range/test.cc b/tools/re2c/src/test/range/test.cc new file mode 100644 index 000000000..a602f79db --- /dev/null +++ b/tools/re2c/src/test/range/test.cc @@ -0,0 +1,94 @@ +#include + +#include "src/test/range/test.h" +#include "src/test/range/test-impl.h" + +namespace re2c_test { + +static bool equal (const re2c::Range * r1, const re2c::Range * r2) +{ + for (; r1 && r2; r1 = r1->next (), r2 = r2->next ()) + { + if (r1->lower () != r2->lower () + || r1->upper () != r2->upper ()) + { + return false; + } + } + return !r1 && !r2; +} + +static void show (const re2c::Range * r) +{ + if (!r) + { + fprintf (stderr, "[]"); + } + for (; r; r = r->next ()) + { + const uint32_t l = r->lower (); + const uint32_t u = r->upper () - 1; + if (l < u) + { + fprintf (stderr, "[%X-%X]", l, u); + } + else + { + fprintf (stderr, "[%X]", l); + } + } +} + +static int32_t diff + ( const re2c::Range * r1 + , const re2c::Range * r2 + , const re2c::Range * op1 + , const re2c::Range * op2 + , const char * op) +{ + if (equal (op1, op2)) + { + return 0; + } + else + { + fprintf (stderr, "%s error: ", op); + show (r1); + fprintf (stderr, " %s ", op); + show (r2); + fprintf (stderr, " ====> "); + show (op2); + fprintf (stderr, " =/= "); + show (op1); + fprintf (stderr, "\n"); + return 1; + } +} + +static int32_t test () +{ + int32_t ok = 0; + + static const uint32_t BITS = 8; + static const uint32_t N = 1u << BITS; + for (uint32_t i = 0; i <= N; ++i) + { + for (uint32_t j = 0; j <= N; ++j) + { + re2c::Range * r1 = range (i); + re2c::Range * r2 = range (j); + ok |= diff (r1, r2, add (i, j), re2c::Range::add (r1, r2), "U"); + ok |= diff (r1, r2, sub (i, j), re2c::Range::sub (r1, r2), "D"); + re2c::Range::vFreeList.clear (); + } + } + + return ok; +} + +} // namespace re2c_test + +int main () +{ + return re2c_test::test (); +} diff --git a/tools/re2c/src/test/range/test.h b/tools/re2c/src/test/range/test.h new file mode 100644 index 000000000..18aa1aca7 --- /dev/null +++ b/tools/re2c/src/test/range/test.h @@ -0,0 +1,26 @@ +#ifndef _RE2C_TEST_RANGE_TEST_ +#define _RE2C_TEST_RANGE_TEST_ + +#include "src/util/c99_stdint.h" + +namespace re2c { class Range; } + +namespace re2c_test { + +/* + * If encoding has N code units (characters), character class can be + * represented as an N-bit integer: k-th bit is set iff k-th character + * belongs to the class. + * + * Addition and subtraction can be implemented trivially for such + * integer representation of character classes: addition is simply + * bitwise OR of two classes, subtraction is bitwise AND of the first + * class and negated second class. + */ +template re2c::Range * range (uint32_t n); +template re2c::Range * add (uint32_t n1, uint32_t n2); +template re2c::Range * sub (uint32_t n1, uint32_t n2); + +} // namespace re2c_test + +#endif // _RE2C_TEST_RANGE_TEST_ diff --git a/tools/re2c/src/test/s_to_n32_unsafe/test.cc b/tools/re2c/src/test/s_to_n32_unsafe/test.cc new file mode 100644 index 000000000..47b3d47d7 --- /dev/null +++ b/tools/re2c/src/test/s_to_n32_unsafe/test.cc @@ -0,0 +1,102 @@ +#include +#include + +#include "src/util/s_to_n32_unsafe.h" + +namespace re2c_test { + +static const uint32_t DIGITS = 256; + +// writes string backwards and returns pointer to the start +// no terminating null as we don't need it +static char * u64_to_s_fastest_ever (uint64_t u, char * s) +{ + while (u > 0) + { + const uint64_t d = u % 10 + '0'; + *--s = static_cast (d); + u /= 10; + } + return s; +} + +static int32_t test_u (uint64_t i) +{ + char s [DIGITS]; + char * const s_end = s + DIGITS; + char * const s_start = u64_to_s_fastest_ever (i, s_end); + uint32_t u = i == 0; // not equal to i + if (s_to_u32_unsafe (s_start, s_end, u) && u != i) + { + fprintf (stderr, "unsigned: expected: %lu, got: %u\n", i, u); + return 1; + } + return 0; +} + +static int32_t test_i (int64_t i) +{ + char s [DIGITS]; + char * const s_end = s + DIGITS; + const uint64_t i_abs = i < 0 + ? static_cast (-i) + : static_cast (i); + char * s_start = u64_to_s_fastest_ever (i_abs, s_end); + if (i < 0) + { + *--s_start = '-'; + } + int32_t j = i == 0; // not equal to i + if (s_to_i32_unsafe (s_start, s_end, j) && j != i) + { + fprintf (stderr, "signed: expected: %ld, got: %d\n", i, j); + return 1; + } + return 0; +} + +static int32_t test () +{ + int32_t ok = 0; + + static const uint64_t UDELTA = 0xFFFF; + // zero neighbourhood + for (uint64_t i = 0; i <= UDELTA; ++i) + { + ok |= test_u (i); + } + // u32_max neighbourhood + static const uint64_t u32_max = std::numeric_limits::max(); + for (uint64_t i = u32_max - UDELTA; i <= u32_max + UDELTA; ++i) + { + ok |= test_u (i); + } + + static const int64_t IDELTA = 0xFFFF; + // i32_min neighbourhood + static const int64_t i32_min = std::numeric_limits::min(); + for (int64_t i = i32_min - IDELTA; i <= i32_min + IDELTA; ++i) + { + ok |= test_i (i); + } + // zero neighbourhood + for (int64_t i = -IDELTA; i <= IDELTA; ++i) + { + ok |= test_i (i); + } + // i32_max neighbourhood + static const int64_t i32_max = std::numeric_limits::max(); + for (int64_t i = i32_max - IDELTA; i <= i32_max + IDELTA; ++i) + { + ok |= test_i (i); + } + + return ok; +} + +} // namespace re2c_test + +int main () +{ + return re2c_test::test (); +} diff --git a/tools/re2c/src/util/allocate.h b/tools/re2c/src/util/allocate.h new file mode 100644 index 000000000..f664910c8 --- /dev/null +++ b/tools/re2c/src/util/allocate.h @@ -0,0 +1,19 @@ +#ifndef _RE2C_UTIL_ALLOCATE_ +#define _RE2C_UTIL_ALLOCATE_ + +#include // size_t + +namespace re2c { + +// useful fof allocation of arrays of POD objects +// 'new []' invokes default constructor for each object +// this can be unacceptable for performance reasons +template T * allocate (size_t n) +{ + void * p = operator new (n * sizeof (T)); + return static_cast (p); +} + +} // namespace re2c + +#endif // _RE2C_UTIL_ALLOCATE_ diff --git a/tools/re2c/src/util/attribute.h b/tools/re2c/src/util/attribute.h new file mode 100644 index 000000000..027a6072d --- /dev/null +++ b/tools/re2c/src/util/attribute.h @@ -0,0 +1,10 @@ +#ifndef _RE2C_UTIL_ATTRIBUTE_ +#define _RE2C_UTIL_ATTRIBUTE_ + +#ifdef __GNUC__ +# define RE2C_GXX_ATTRIBUTE(x) __attribute__(x) +#else +# define RE2C_GXX_ATTRIBUTE(x) +#endif + +#endif // _RE2C_UTIL_ATTRIBUTE_ diff --git a/tools/re2c/src/util/c99_stdint.h b/tools/re2c/src/util/c99_stdint.h new file mode 100644 index 000000000..571860431 --- /dev/null +++ b/tools/re2c/src/util/c99_stdint.h @@ -0,0 +1,266 @@ +#ifndef _RE2C_UTIL_C99_STDINT_ +#define _RE2C_UTIL_C99_STDINT_ + +#if defined(_MSC_VER) && _MSC_VER < 1500 +#include "config.msc.h" +#else +#include "config.h" +#endif + +#if HAVE_STDINT_H +# include +#else // HAVE_STDINT_H + +// A humble attempt to provide C99 compliant +// for environments that don't have it (e.g., MSVC 2003). +// +// First, we try to define exact-width integer types. We don't +// rely on any particular environment: instead, we search for +// a type of certain width in the following list: +// char (C89) +// short (C89) +// int (C89) +// long (C89) +// long long (C99) +// __int64 (MSVC-specific) +// (we consider even insane possibilities for simplicity). +// The size of each type is defined by autoconf in the form +// of a macro SIZEOF_ (set to 0 for nonexistent types). +// If we don't find a type with the required width, we don't +// define the corresponding exact-width C99 type at all. +// +// We define other types and constants based on exact-width +// types and C99 standard. +// +// We use SIZEOF_VOID_P to determine size of pointers. +// +// We use SIZEOF_0 to find suitable 64-bit integer +// constant suffix. + +// C99-7.18.1.1 Exact-width integer types + +// int8_t, uint8_t +#if SIZEOF_CHAR == 1 + typedef signed char int8_t; + typedef unsigned char uint8_t; +#elif SIZEOF_SHORT == 1 + typedef signed short int8_t; + typedef unsigned short uint8_t; +#elif SIZEOF_INT == 1 + typedef signed int int8_t; + typedef unsigned int uint8_t; +#elif SIZEOF_LONG == 1 + typedef signed long int8_t; + typedef unsigned long uint8_t; +#elif SIZEOF_LONG_LONG == 1 + typedef signed long long int8_t; + typedef unsigned long long uint8_t; +#elif SIZEOF___INT64 == 1 + typedef signed __int64 int8_t; + typedef unsigned __int64 uint8_t; +#endif + +// int16_t, uint16_t +#if SIZEOF_CHAR == 2 + typedef signed char int16_t; + typedef unsigned char uint16_t; +#elif SIZEOF_SHORT == 2 + typedef signed short int16_t; + typedef unsigned short uint16_t; +#elif SIZEOF_INT == 2 + typedef signed int int16_t; + typedef unsigned int uint16_t; +#elif SIZEOF_LONG == 2 + typedef signed long int16_t; + typedef unsigned long uint16_t; +#elif SIZEOF_LONG_LONG == 2 + typedef signed long long int16_t; + typedef unsigned long long uint16_t; +#elif SIZEOF___INT64 == 2 + typedef signed __int64 int16_t; + typedef unsigned __int64 uint16_t; +#endif + +// int32_t, uint32_t +#if SIZEOF_CHAR == 4 + typedef signed char int32_t; + typedef unsigned char uint32_t; +#elif SIZEOF_SHORT == 4 + typedef signed short int32_t; + typedef unsigned short uint32_t; +#elif SIZEOF_INT == 4 + typedef signed int int32_t; + typedef unsigned int uint32_t; +#elif SIZEOF_LONG == 4 + typedef signed long int32_t; + typedef unsigned long uint32_t; +#elif SIZEOF_LONG_LONG == 4 + typedef signed long long int32_t; + typedef unsigned long long uint32_t; +#elif SIZEOF___INT64 == 4 + typedef signed __int64 int32_t; + typedef unsigned __int64 uint32_t; +#endif + +// int64_t, uint64_t +#if SIZEOF_CHAR == 8 + typedef signed char int64_t; + typedef unsigned char uint64_t; +#elif SIZEOF_SHORT == 8 + typedef signed short int64_t; + typedef unsigned short uint64_t; +#elif SIZEOF_INT == 8 + typedef signed int int64_t; + typedef unsigned int uint64_t; +#elif SIZEOF_LONG == 8 + typedef signed long int64_t; + typedef unsigned long uint64_t; +#elif SIZEOF_LONG_LONG == 8 + typedef signed long long int64_t; + typedef unsigned long long uint64_t; +#elif SIZEOF___INT64 == 8 + typedef signed __int64 int64_t; + typedef unsigned __int64 uint64_t; +#endif + +// C99-7.18.1.2 Minimum-width integer types +typedef int8_t int_least8_t; +typedef int16_t int_least16_t; +typedef int32_t int_least32_t; +typedef int64_t int_least64_t; +typedef uint8_t uint_least8_t; +typedef uint16_t uint_least16_t; +typedef uint32_t uint_least32_t; +typedef uint64_t uint_least64_t; + +// C99-7.18.1.3 Fastest minimum-width integer types +typedef int8_t int_fast8_t; +typedef int16_t int_fast16_t; +typedef int32_t int_fast32_t; +typedef int64_t int_fast64_t; +typedef uint8_t uint_fast8_t; +typedef uint16_t uint_fast16_t; +typedef uint32_t uint_fast32_t; +typedef uint64_t uint_fast64_t; + +// C99-7.18.1.4 Integer types capable of holding object pointers +#if SIZEOF_VOID_P == 8 + typedef int64_t intptr_t; + typedef uint64_t uintptr_t; +#else + typedef int intptr_t; + typedef unsigned int uintptr_t; +#endif + +// C99-7.18.1.5 Greatest-width integer types +typedef int64_t intmax_t; +typedef uint64_t uintmax_t; + +#if !defined(__cplusplus) || defined(__STDC_LIMIT_MACROS) // See footnote 220 at page 257 and footnote 221 at page 259 + +// C99-7.18.2.1 Limits of exact-width integer types +#define INT8_MIN (-128) // -2^(8 - 1) +#define INT8_MAX 127 // 2^(8 - 1) - 1 +#define INT16_MIN (-32768) // -2^(16 - 1) +#define INT16_MAX 32767 // 2^(16 - 1) - 1 +#define INT32_MIN (-2147483648) // -2^(32 - 1) +#define INT32_MAX 2147483647 // 2^(32 - 1) - 1 +#define INT64_MIN (-9223372036854775808) // -2^(64 - 1) +#define INT64_MAX 9223372036854775807 // 2^(64 - 1) - 1 +#define UINT8_MAX 0xFF // 2^8 - 1 +#define UINT16_MAX 0xFFFF // 2^16 - 1 +#define UINT32_MAX 0xFFFFffff // 2^32 - 1 +#define UINT64_MAX 0xFFFFffffFFFFffff // 2^64 - 1 + +// C99-7.18.2.2 Limits of minimum-width integer types +#define INT_LEAST8_MIN INT8_MIN +#define INT_LEAST8_MAX INT8_MAX +#define INT_LEAST16_MIN INT16_MIN +#define INT_LEAST16_MAX INT16_MAX +#define INT_LEAST32_MIN INT32_MIN +#define INT_LEAST32_MAX INT32_MAX +#define INT_LEAST64_MIN INT64_MIN +#define INT_LEAST64_MAX INT64_MAX +#define UINT_LEAST8_MAX UINT8_MAX +#define UINT_LEAST16_MAX UINT16_MAX +#define UINT_LEAST32_MAX UINT32_MAX +#define UINT_LEAST64_MAX UINT64_MAX + +// C99-7.18.2.3 Limits of fastest minimum-width integer types +#define INT_FAST8_MIN INT8_MIN +#define INT_FAST8_MAX INT8_MAX +#define INT_FAST16_MIN INT16_MIN +#define INT_FAST16_MAX INT16_MAX +#define INT_FAST32_MIN INT32_MIN +#define INT_FAST32_MAX INT32_MAX +#define INT_FAST64_MIN INT64_MIN +#define INT_FAST64_MAX INT64_MAX +#define UINT_FAST8_MAX UINT8_MAX +#define UINT_FAST16_MAX UINT16_MAX +#define UINT_FAST32_MAX UINT32_MAX +#define UINT_FAST64_MAX UINT64_MAX + +// C99-7.18.2.4 Limits of integer types capable of holding object pointers +#define INTPTR_MIN (-32767) // -(2^15 - 1) +#define INTPTR_MAX 32767 // 2^15 - 1 +#define UINTPTR_MAX 0xFFFF // 2^16 - 1 + +// C99-7.18.2.5 Limits of greatest-width integer types +#define INTMAX_MIN (-9223372036854775807) // -(2^63 - 1) +#define INTMAX_MAX 9223372036854775807 // 2^63 - 1 +#define UINTMAX_MAX 0xFFFFffffFFFFffff // 2^64 - 1 + +// C99-7.18.3 Limits of other integer types: +// "An implementation shall define only the macros +// corresponding to those typedef names it actually +// provides" +// and footnote 222 at page 259: +// "A freestanding implementation need not provide +// all of these types." +// +// Since we don't define corresponding types, we don't +// define the following limits either: +// PTRDIFF_MIN +// PTRDIFF_MAX +// SIG_ATOMIC_MIN +// SIG_ATOMIC_MAX +// SIZE_MAX +// WCHAR_MIN +// WCHAR_MAX +// WINT_MIN +// WINT_MAX + +#endif // __STDC_LIMIT_MACROS + +#if !defined(__cplusplus) || defined(__STDC_CONSTANT_MACROS) // See footnote 224 at page 260 + +// C99-7.18.4.1 Macros for minimum-width integer constants +#define INT8_C(x) x +#define UINT8_C(x) x##u +#define INT16_C(x) x +#define UINT16_C(x) x##u +#define INT32_C(x) x +#define UINT32_C(x) x##u +#if SIZEOF_0L == 8 +# define INT64_C(x) x##l +# define UINT64_C(x) x##ul +#elif SIZEOF_0LL == 8 +# define INT64_C(x) x##ll +# define UINT64_C(x) x##ull +#elif SIZEOF_0I8 == 8 +# define INT64_C(x) x##i8 +# define UINT64_C(x) x##ui8 +#else +# define INT64_C(x) x +# define UINT64_C(x) x##u +#endif + +// C99-7.18.4.2 Macros for greatest-width integer constants +#define INTMAX_C INT64_C +#define UINTMAX_C UINT64_C + +#endif // __STDC_CONSTANT_MACROS + +#endif // HAVE_STDINT_H + +#endif // _RE2C_UTIL_C99_STDINT_ diff --git a/tools/re2c/src/util/counter.h b/tools/re2c/src/util/counter.h new file mode 100644 index 000000000..f4f583944 --- /dev/null +++ b/tools/re2c/src/util/counter.h @@ -0,0 +1,29 @@ +#ifndef _RE2C_UTIL_COUNTER_ +#define _RE2C_UTIL_COUNTER_ + +namespace re2c { + +template +class counter_t +{ + num_t num; + +public: + counter_t () + : num () + {} + num_t next () + { + num_t n = num; + num.inc (); + return n; + } + void reset () + { + num = num_t (); + } +}; + +} // namespace re2c + +#endif // _RE2C_UTIL_COUNTER_ diff --git a/tools/re2c/src/util/forbid_copy.h b/tools/re2c/src/util/forbid_copy.h new file mode 100644 index 000000000..9c5701d7d --- /dev/null +++ b/tools/re2c/src/util/forbid_copy.h @@ -0,0 +1,11 @@ +#ifndef _RE2C_UTIL_FORBID_COPY_ +#define _RE2C_UTIL_FORBID_COPY_ + +// must be used at the end of class definition +// (since this macro changes scope to private) +#define FORBID_COPY(type) \ + private: \ + type (const type &); \ + type & operator = (const type &) + +#endif // _RE2C_UTIL_FORBID_COPY_ diff --git a/tools/re2c/src/util/free_list.h b/tools/re2c/src/util/free_list.h new file mode 100644 index 000000000..8d3ac6546 --- /dev/null +++ b/tools/re2c/src/util/free_list.h @@ -0,0 +1,56 @@ +#ifndef _RE2C_UTIL_FREE_LIST_ +#define _RE2C_UTIL_FREE_LIST_ + +#include + +namespace re2c +{ + +template +class free_list: protected std::set<_Ty> +{ +public: + typedef typename std::set<_Ty>::iterator iterator; + typedef typename std::set<_Ty>::size_type size_type; + typedef typename std::set<_Ty>::key_type key_type; + + free_list(): in_clear(false) + { + } + + using std::set<_Ty>::insert; + + size_type erase(const key_type& key) + { + if (!in_clear) + { + return std::set<_Ty>::erase(key); + } + return 0; + } + + void clear() + { + in_clear = true; + + for(iterator it = this->begin(); it != this->end(); ++it) + { + delete *it; + } + std::set<_Ty>::clear(); + + in_clear = false; + } + + ~free_list() + { + clear(); + } + +protected: + bool in_clear; +}; + +} // end namespace re2c + +#endif // _RE2C_UTIL_FREE_LIST_ diff --git a/tools/re2c/src/util/local_increment.h b/tools/re2c/src/util/local_increment.h new file mode 100644 index 000000000..799ced43b --- /dev/null +++ b/tools/re2c/src/util/local_increment.h @@ -0,0 +1,22 @@ +#ifndef _RE2C_UTIL_LOCAL_INCREMENT_ +#define _RE2C_UTIL_LOCAL_INCREMENT_ + +namespace re2c +{ + +template +struct local_increment_t +{ + counter_t & counter; + inline explicit local_increment_t (counter_t & c) + : counter (++c) + {} + inline ~local_increment_t () + { + --counter; + } +}; + +} // namespace re2c + +#endif // _RE2C_UTIL_LOCAL_INCREMENT_ diff --git a/tools/re2c/src/util/ord_hash_set.h b/tools/re2c/src/util/ord_hash_set.h new file mode 100644 index 000000000..9a0e73eae --- /dev/null +++ b/tools/re2c/src/util/ord_hash_set.h @@ -0,0 +1,115 @@ +#ifndef _RE2C_UTIL_ORD_HASH_SET_ +#define _RE2C_UTIL_ORD_HASH_SET_ + +#include "src/util/c99_stdint.h" +#include // malloc, free +#include // memcpy +#include +#include + +namespace re2c +{ + +/* + * ordered hash set: + * - access element by index: O(1) + * - insert element (find existing or add new): O(log(n)) + * + */ +class ord_hash_set_t +{ + struct elem_t + { + elem_t *next; + size_t index; + size_t size; + char data[1]; // inlined array of variable length + }; + typedef size_t hash_t; + + std::vector elems; + std::map lookup; + + static hash_t hash(const void *data, size_t size); + elem_t *make_elem(elem_t *next, size_t index, size_t size, const void *data); + +public: + ord_hash_set_t(); + ~ord_hash_set_t(); + size_t size() const; + size_t insert(const void *data, size_t size); + template size_t deref(size_t i, data_t *&data); +}; + +ord_hash_set_t::hash_t ord_hash_set_t::hash(const void *data, size_t size) +{ + const uint8_t *bytes = static_cast(data); + hash_t h = size; // seed + for (size_t i = 0; i < size; ++i) + { + h = h ^ ((h << 5) + (h >> 2) + bytes[i]); + } + return h; +} + +ord_hash_set_t::elem_t* ord_hash_set_t::make_elem( + elem_t *next, + size_t index, + size_t size, + const void *data) +{ + elem_t *e = static_cast(malloc(offsetof(elem_t, data) + size)); + e->next = next; + e->index = index; + e->size = size; + memcpy(e->data, data, size); + return e; +} + +ord_hash_set_t::ord_hash_set_t() + : elems() + , lookup() +{} + +ord_hash_set_t::~ord_hash_set_t() +{ + std::for_each(elems.begin(), elems.end(), free); +} + +size_t ord_hash_set_t::size() const +{ + return elems.size(); +} + +size_t ord_hash_set_t::insert(const void *data, size_t size) +{ + const hash_t h = hash(data, size); + + std::map::const_iterator i = lookup.find(h); + if (i != lookup.end()) + { + for (elem_t *e = i->second; e; e = e->next) + { + if (e->size == size + && memcmp(e->data, data, size) == 0) + { + return e->index; + } + } + } + + const size_t index = elems.size(); + elems.push_back(lookup[h] = make_elem(lookup[h], index, size, data)); + return index; +} + +template size_t ord_hash_set_t::deref(size_t i, data_t *&data) +{ + elem_t *e = elems[i]; + data = reinterpret_cast(e->data); + return e->size / sizeof(data_t); +} + +} // namespace re2c + +#endif // _RE2C_UTIL_ORD_HASH_SET_ diff --git a/tools/re2c/src/util/range.cc b/tools/re2c/src/util/range.cc new file mode 100644 index 000000000..fa46ab332 --- /dev/null +++ b/tools/re2c/src/util/range.cc @@ -0,0 +1,97 @@ +#include "src/util/range.h" + +namespace re2c +{ + +free_list Range::vFreeList; + +void Range::append_overlapping (Range * & head, Range * & tail, const Range * r) +{ + if (!head) + { + head = Range::ran (r->lb, r->ub); + tail = head; + } + else if (tail->ub < r->lb) + { + tail->nx = Range::ran (r->lb, r->ub); + tail = tail->nx; + } + else if (tail->ub < r->ub) + { + tail->ub = r->ub; + } +} + +Range * Range::add (const Range * r1, const Range * r2) +{ + Range * head = NULL; + Range * tail = NULL; + for (; r1 && r2;) + { + if (r1->lb < r2->lb) + { + append_overlapping (head, tail, r1); + r1 = r1->nx; + } + else + { + append_overlapping (head, tail, r2); + r2 = r2->nx; + } + } + for (; r1; r1 = r1->nx) + { + append_overlapping (head, tail, r1); + } + for (; r2; r2 = r2->nx) + { + append_overlapping (head, tail, r2); + } + return head; +} + +void Range::append (Range ** & ptail, uint32_t l, uint32_t u) +{ + Range * & tail = * ptail; + tail = Range::ran (l, u); + ptail = &tail->nx; +} + +Range * Range::sub (const Range * r1, const Range * r2) +{ + Range * head = NULL; + Range ** ptail = &head; + while (r1) + { + if (!r2 || r2->lb >= r1->ub) + { + append (ptail, r1->lb, r1->ub); + r1 = r1->nx; + } + else if (r2->ub <= r1->lb) + { + r2 = r2->nx; + } + else + { + if (r1->lb < r2->lb) + { + append (ptail, r1->lb, r2->lb); + } + while (r2 && r2->ub < r1->ub) + { + const uint32_t lb = r2->ub; + r2 = r2->nx; + const uint32_t ub = r2 && r2->lb < r1->ub + ? r2->lb + : r1->ub; + append (ptail, lb, ub); + } + r1 = r1->nx; + } + } + return head; +} + +} // namespace re2c diff --git a/tools/re2c/src/util/range.h b/tools/re2c/src/util/range.h new file mode 100644 index 000000000..1ea7d9be4 --- /dev/null +++ b/tools/re2c/src/util/range.h @@ -0,0 +1,65 @@ +#ifndef _RE2C_UTIL_RANGE_ +#define _RE2C_UTIL_RANGE_ + +#include "src/util/c99_stdint.h" +#include +#include // NULL + +#include "src/test/range/test.h" +#include "src/util/forbid_copy.h" +#include "src/util/free_list.h" + +namespace re2c +{ + +class Range +{ +public: + static free_list vFreeList; + +private: + Range * nx; + // [lb,ub) + uint32_t lb; + uint32_t ub; + +public: + static Range * sym (uint32_t c) + { + return new Range (NULL, c, c + 1); + } + static Range * ran (uint32_t l, uint32_t u) + { + return new Range (NULL, l, u); + } + ~Range () + { + vFreeList.erase (this); + } + Range * next () const { return nx; } + uint32_t lower () const { return lb; } + uint32_t upper () const { return ub; } + static Range * add (const Range * r1, const Range * r2); + static Range * sub (const Range * r1, const Range * r2); + +private: + Range (Range * n, uint32_t l, uint32_t u) + : nx (n) + , lb (l) + , ub (u) + { + assert (lb < ub); + vFreeList.insert (this); + } + static void append_overlapping (Range * & head, Range * & tail, const Range * r); + static void append (Range ** & ptail, uint32_t l, uint32_t u); + + // test addition and subtraction + //template friend Range * re2c_test::range (uint32_t n); + + FORBID_COPY (Range); +}; + +} // namespace re2c + +#endif // _RE2C_UTIL_RANGE_ diff --git a/tools/re2c/src/util/s_to_n32_unsafe.cc b/tools/re2c/src/util/s_to_n32_unsafe.cc new file mode 100644 index 000000000..fa7590b99 --- /dev/null +++ b/tools/re2c/src/util/s_to_n32_unsafe.cc @@ -0,0 +1,55 @@ +#include + +#include "src/util/s_to_n32_unsafe.h" + +// assumes that string matches regexp [0-9]+ +// returns false on overflow +bool s_to_u32_unsafe (const char * s, const char * s_end, uint32_t & number) +{ + uint64_t u = 0; + for (; s != s_end; ++s) + { + u *= 10; + u += static_cast (*s) - 0x30; + if (u >= std::numeric_limits::max()) + { + return false; + } + } + number = static_cast (u); + return true; +} + +// assumes that string matches regexp "-"? [0-9]+ +// returns false on underflow/overflow +bool s_to_i32_unsafe (const char * s, const char * s_end, int32_t & number) +{ + int64_t i = 0; + if (*s == '-') + { + ++s; + for (; s != s_end; ++s) + { + i *= 10; + i -= *s - 0x30; + if (i < std::numeric_limits::min()) + { + return false; + } + } + } + else + { + for (; s != s_end; ++s) + { + i *= 10; + i += *s - 0x30; + if (i > std::numeric_limits::max()) + { + return false; + } + } + } + number = static_cast (i); + return true; +} diff --git a/tools/re2c/src/util/s_to_n32_unsafe.h b/tools/re2c/src/util/s_to_n32_unsafe.h new file mode 100644 index 000000000..e733c1613 --- /dev/null +++ b/tools/re2c/src/util/s_to_n32_unsafe.h @@ -0,0 +1,10 @@ +#ifndef _RE2C_UTIL_S_TO_N32_UNSAFE_ +#define _RE2C_UTIL_S_TO_N32_UNSAFE_ + +#include "src/util/attribute.h" +#include "src/util/c99_stdint.h" + +bool s_to_u32_unsafe (const char * s, const char * s_end, uint32_t & number) RE2C_GXX_ATTRIBUTE ((warn_unused_result)); +bool s_to_i32_unsafe (const char * s, const char * s_end, int32_t & number) RE2C_GXX_ATTRIBUTE ((warn_unused_result)); + +#endif // _RE2C_UTIL_S_TO_N32_UNSAFE_ diff --git a/tools/re2c/src/util/smart_ptr.h b/tools/re2c/src/util/smart_ptr.h new file mode 100644 index 000000000..c138cf554 --- /dev/null +++ b/tools/re2c/src/util/smart_ptr.h @@ -0,0 +1,69 @@ +#ifndef _RE2C_UTIL_SMART_PTR_ +#define _RE2C_UTIL_SMART_PTR_ + +namespace re2c +{ + + template + class smart_ptr + { + private: + T* ptr; + long* count; // shared number of owners + + public: + explicit smart_ptr (T* p=0) + : ptr(p), count(new long(1)) {} + + smart_ptr (const smart_ptr& p) throw() + : ptr(p.ptr), count(p.count) + { + ++*count; + } + + ~smart_ptr () + { + dispose(); + } + + smart_ptr& operator= (const smart_ptr& p) + { + if (this != &p) + { + dispose(); + ptr = p.ptr; + count = p.count; + ++*count; + } + return *this; + } + + T& operator*() const + { + return *ptr; + } + + T* operator->() const + { + return ptr; + } + + private: + void dispose() + { + if (--*count == 0) + { + delete count; + delete ptr; + } + } + }; + + template + smart_ptr make_smart_ptr(T* p) + { + return smart_ptr(p); + } +} + +#endif // _RE2C_UTIL_SMART_PTR_ diff --git a/tools/re2c/src/util/static_assert.h b/tools/re2c/src/util/static_assert.h new file mode 100644 index 000000000..c2a1327db --- /dev/null +++ b/tools/re2c/src/util/static_assert.h @@ -0,0 +1,14 @@ +#ifndef _RE2C_UTIL_STATIC_ASSERT_ +#define _RE2C_UTIL_STATIC_ASSERT_ + +namespace re2c { + +template struct static_assert_t; +template<> struct static_assert_t {}; + +} // namespace re2c + +#define RE2C_STATIC_ASSERT(e) \ + { re2c::static_assert_t _; (void) _; } + +#endif // _RE2C_UTIL_STATIC_ASSERT_ diff --git a/tools/re2c/src/util/u32lim.h b/tools/re2c/src/util/u32lim.h new file mode 100644 index 000000000..d9c356ccf --- /dev/null +++ b/tools/re2c/src/util/u32lim.h @@ -0,0 +1,72 @@ +#ifndef _RE2C_UTIL_U32LIM_ +#define _RE2C_UTIL_U32LIM_ + +#include "src/util/c99_stdint.h" + +// uint32_t truncated to LIMIT +// any overflow (either result of a binary operation +// or conversion from another type) results in LIMIT +// LIMIT is a fixpoint +template +class u32lim_t +{ + uint32_t value; + explicit u32lim_t (uint32_t x) + : value (x < LIMIT ? x : LIMIT) + {} + explicit u32lim_t (uint64_t x) + : value (x < LIMIT ? static_cast (x) : LIMIT) + {} + +public: + // implicit conversion is forbidden, because + // operands should be converted before operation: + // uint32_t x, y; ... u32lim_t z = x + y; + // will result in 32-bit addition and may overflow + // Don't export overloaded constructors: it breaks OS X builds + // ('size_t' causes resolution ambiguity) + static u32lim_t from32 (uint32_t x) { return u32lim_t(x); } + static u32lim_t from64 (uint64_t x) { return u32lim_t(x); } + + static u32lim_t limit () + { + return u32lim_t (LIMIT); + } + + uint32_t uint32 () const + { + return value; + } + + bool overflow () const + { + return value == LIMIT; + } + + friend u32lim_t operator + (u32lim_t x, u32lim_t y) + { + const uint64_t z + = static_cast (x.value) + + static_cast (y.value); + return z < LIMIT + ? u32lim_t (z) + : u32lim_t (LIMIT); + } + + friend u32lim_t operator * (u32lim_t x, u32lim_t y) + { + const uint64_t z + = static_cast (x.value) + * static_cast (y.value); + return z < LIMIT + ? u32lim_t (z) + : u32lim_t (LIMIT); + } + + friend bool operator < (u32lim_t x, u32lim_t y) + { + return x.value < y.value; + } +}; + +#endif // _RE2C_UTIL_U32LIM_ diff --git a/tools/re2c/src/util/uniq_vector.h b/tools/re2c/src/util/uniq_vector.h new file mode 100644 index 000000000..76c0512f0 --- /dev/null +++ b/tools/re2c/src/util/uniq_vector.h @@ -0,0 +1,46 @@ +#ifndef _RE2C_UTIL_UNIQ_VECTOR_ +#define _RE2C_UTIL_UNIQ_VECTOR_ + +#include + +namespace re2c +{ + +// wrapper over std::vector +// O(n) lookup +// O(n) insertion +template +class uniq_vector_t +{ + typedef std::vector elems_t; + elems_t elems; +public: + uniq_vector_t () + : elems () + {} + size_t size () const + { + return elems.size (); + } + const value_t & operator [] (size_t i) const + { + return elems[i]; + } + size_t find_or_add (const value_t & v) + { + const size_t size = elems.size (); + for (size_t i = 0; i < size; ++i) + { + if (elems[i] == v) + { + return i; + } + } + elems.push_back (v); + return size; + } +}; + +} // namespace re2c + +#endif // _RE2C_UTIL_UNIQ_VECTOR_ diff --git a/tools/updaterevision/CMakeLists.txt b/tools/updaterevision/CMakeLists.txt new file mode 100644 index 000000000..db99ab812 --- /dev/null +++ b/tools/updaterevision/CMakeLists.txt @@ -0,0 +1,24 @@ +cmake_minimum_required( VERSION 2.8.7 ) + +if( WIN32 ) + if( MSVC_VERSION GREATER 1399 ) + # VC 8+ adds a manifest automatically to the executable. We need to + # merge ours with it. + set( MT_MERGE ON ) + else() + set( TRUSTINFO trustinfo.rc ) + endif() +else( WIN32 ) + set( TRUSTINFO "" ) +endif() + +if( NOT CMAKE_CROSSCOMPILING ) + add_executable( updaterevision updaterevision.c ${TRUSTINFO} ) + set( CROSS_EXPORTS ${CROSS_EXPORTS} updaterevision PARENT_SCOPE ) +endif() + +if( MT_MERGE ) + add_custom_command(TARGET updaterevision POST_BUILD + COMMAND mt -inputresource:$ -manifest ${CMAKE_CURRENT_SOURCE_DIR}/trustinfo.txt -outputresource:$ -nologo + COMMENT "Embedding trustinfo into updaterevision" ) +endif() diff --git a/tools/updaterevision/trustinfo.rc b/tools/updaterevision/trustinfo.rc new file mode 100644 index 000000000..366f9b2fd --- /dev/null +++ b/tools/updaterevision/trustinfo.rc @@ -0,0 +1,6 @@ +// This resource script is for compiling with MinGW only. Visual C++ +// compilations use the manifest tool to insert the manifest instead. + +#include + +1 RT_MANIFEST "trustinfo.txt" diff --git a/tools/updaterevision/trustinfo.txt b/tools/updaterevision/trustinfo.txt new file mode 100644 index 000000000..2bbed9fb7 --- /dev/null +++ b/tools/updaterevision/trustinfo.txt @@ -0,0 +1,16 @@ + + + + + Update svnrevision.h for the ZDoom source build process. + + + + + + + + diff --git a/tools/updaterevision/updaterevision.c b/tools/updaterevision/updaterevision.c new file mode 100644 index 000000000..bbb6dddf3 --- /dev/null +++ b/tools/updaterevision/updaterevision.c @@ -0,0 +1,136 @@ +/* updaterevision.c + * + * Public domain. This program uses git commands command to get + * various bits of repository status for a particular directory + * and writes it into a header file so that it can be used for a + * project's versioning. + */ + +#define _CRT_SECURE_NO_DEPRECATE + +#include +#include +#include +#include +#include + +#ifdef _WIN32 +#define popen _popen +#define pclose _pclose +#endif + +// Used to strip newline characters from lines read by fgets. +void stripnl(char *str) +{ + if (*str != '\0') + { + size_t len = strlen(str); + if (str[len - 1] == '\n') + { + str[len - 1] = '\0'; + } + } +} + +int main(int argc, char **argv) +{ + char vertag[128], lastlog[128], lasthash[128], *hash = NULL; + FILE *stream = NULL; + int gotrev = 0, needupdate = 1; + + vertag[0] = '\0'; + lastlog[0] = '\0'; + + if (argc != 2) + { + fprintf(stderr, "Usage: %s \n", argv[0]); + return 1; + } + + // Use git describe --tags to get a version string. If we are sitting directly + // on a tag, it returns that tag. Otherwise it returns --. + // Use git log to get the time of the latest commit in ISO 8601 format and its full hash. + stream = popen("git describe --tags --dirty=-m && git log -1 --format=%ai*%H", "r"); + + if (NULL != stream) + { + if (fgets(vertag, sizeof vertag, stream) == vertag && + fgets(lastlog, sizeof lastlog, stream) == lastlog) + { + stripnl(vertag); + stripnl(lastlog); + gotrev = 1; + } + + pclose(stream); + } + + if (gotrev) + { + hash = strchr(lastlog, '*'); + if (hash != NULL) + { + *hash = '\0'; + hash++; + } + } + if (hash == NULL) + { + fprintf(stderr, "Failed to get commit info: %s\n", strerror(errno)); + strcpy(vertag, ""); + lastlog[0] = '\0'; + lastlog[1] = '0'; + lastlog[2] = '\0'; + hash = lastlog + 1; + } + + stream = fopen (argv[1], "r"); + if (stream != NULL) + { + if (!gotrev) + { // If we didn't get a revision but the file does exist, leave it alone. + fclose (stream); + return 0; + } + // Read the revision that's in this file already. If it's the same as + // what we've got, then we don't need to modify it and can avoid rebuilding + // dependant files. + if (fgets(lasthash, sizeof lasthash, stream) == lasthash) + { + stripnl(lasthash); + if (strcmp(hash, lasthash + 3) == 0) + { + needupdate = 0; + } + } + fclose (stream); + } + + if (needupdate) + { + stream = fopen (argv[1], "w"); + if (stream == NULL) + { + return 1; + } + fprintf(stream, +"// %s\n" +"//\n" +"// This file was automatically generated by the\n" +"// updaterevision tool. Do not edit by hand.\n" +"\n" +"#define GIT_DESCRIPTION \"%s\"\n" +"#define GIT_HASH \"%s\"\n" +"#define GIT_TIME \"%s\"\n", + hash, vertag, hash, lastlog); + fclose(stream); + fprintf(stderr, "%s updated to commit %s.\n", argv[1], vertag); + } + else + { + fprintf (stderr, "%s is up to date at commit %s.\n", argv[1], vertag); + } + + return 0; +} diff --git a/tools/zipdir/CMakeLists.txt b/tools/zipdir/CMakeLists.txt new file mode 100644 index 000000000..6a36b2cb5 --- /dev/null +++ b/tools/zipdir/CMakeLists.txt @@ -0,0 +1,9 @@ +cmake_minimum_required( VERSION 2.8.7 ) + +if( NOT CMAKE_CROSSCOMPILING ) + include_directories( "${ZLIB_INCLUDE_DIR}" "${BZIP2_INCLUDE_DIR}" "${LZMA_INCLUDE_DIR}" ) + add_executable( zipdir + zipdir.c ) + target_link_libraries( zipdir ${ZLIB_LIBRARIES} ${BZIP2_LIBRARIES} lzma ) + set( CROSS_EXPORTS ${CROSS_EXPORTS} zipdir PARENT_SCOPE ) +endif() diff --git a/tools/zipdir/zipdir.c b/tools/zipdir/zipdir.c new file mode 100644 index 000000000..4cb98c138 --- /dev/null +++ b/tools/zipdir/zipdir.c @@ -0,0 +1,1695 @@ +/* +** zipdir.c +** Copyright (C) 2008-2009 Randy Heit +** +** This program is free software; you can redistribute it and/or modify +** it under the terms of the GNU General Public License as published by +** the Free Software Foundation; either version 2 of the License, or +** (at your option) any later version. +** +** This program is distributed in the hope that it will be useful, +** but WITHOUT ANY WARRANTY; without even the implied warranty of +** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +** GNU General Public License for more details. +** +** You should have received a copy of the GNU General Public License +** along with this program; if not, write to the Free Software +** Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. +** +**************************************************************************** +** +** Usage: zipdir [-dfuq] ... +** +** Given one or more directories, their contents are scanned recursively. +** If any files are newer than the zip file or the zip file does not exist, +** then everything in the specified directories is stored in the zip. The +** base directory names are not stored in the zip file, but subdirectories +** recursed into are stored. +*/ + +// HEADER FILES ------------------------------------------------------------ + +#include +#include +#ifdef _WIN32 +#include +#define stat _stat +#else +#include +#if !defined(__sun) +#include +#endif +#endif +#include +#include +#include +#include +#include +#include +#include "zlib.h" +#include "bzlib.h" +#include "LzmaEnc.h" +#include "7zVersion.h" +#ifdef PPMD +#include "../../ppmd/PPMd.h" +#endif + +// MACROS ------------------------------------------------------------------ + +#ifdef __GNUC__ +// With versions of GCC newer than 4.2, it appears it was determined that the +// cost of an unaligned pointer on PPC was high enough to add padding to the +// end of packed structs. For whatever reason __packed__ and pragma pack are +// handled differently in this regard. Note that this only needs to be applied +// to types which are used in arrays. +#define FORCE_PACKED __attribute__((__packed__)) +#else +#define FORCE_PACKED +#endif + +#ifndef __BIG_ENDIAN__ +#define MAKE_ID(a,b,c,d) ((a)|((b)<<8)|((c)<<16)|((d)<<24)) +#define LittleShort(x) (x) +#define LittleLong(x) (x) +#else +#define MAKE_ID(a,b,c,d) ((d)|((c)<<8)|((b)<<16)|((a)<<24)) +static unsigned short LittleShort(unsigned short x) +{ + return (x>>8) | (x<<8); +} + +static unsigned int LittleLong(unsigned int x) +{ + return (x>>24) | ((x>>8) & 0xff00) | ((x<<8) & 0xff0000) | (x<<24); +} +#endif + +#define ZIP_LOCALFILE MAKE_ID('P','K',3,4) +#define ZIP_CENTRALFILE MAKE_ID('P','K',1,2) +#define ZIP_ENDOFDIR MAKE_ID('P','K',5,6) + +#define METHOD_STORED 0 +#define METHOD_DEFLATE 8 +#define METHOD_BZIP2 12 +#define METHOD_LZMA 14 +#define METHOD_PPMD 98 + +// Buffer size for central directory search +#define BUFREADCOMMENT (0x400) + +#ifndef PATH_MAX +#define PATH_MAX 4096 +#endif + +// TYPES ------------------------------------------------------------------- + +typedef struct file_entry_s +{ + struct file_entry_s *next; + time_t time_write; + unsigned int uncompressed_size; + unsigned int compressed_size; + unsigned int crc32; + unsigned int zip_offset; + short date, time; + short method; + char path[]; +} file_entry_t; + +typedef struct dir_tree_s +{ + struct dir_tree_s *next; + file_entry_t *files; + size_t path_size; + char path[]; +} dir_tree_t; + +typedef struct file_sorted_s +{ + file_entry_t *file; + char *path_in_zip; +} file_sorted_t; + +typedef struct compressor_s +{ + int (*compress)(Byte *out, unsigned int *outlen, const Byte *in, unsigned int inlen); + int method; +} compressor_t; + +typedef unsigned int UINT32; +typedef unsigned short WORD; +typedef unsigned char BYTE; + +// [BL] Solaris (well GCC on Solaris) doesn't seem to support pack(push/pop, 1) so we'll need to use use it +// on the whole file. +#pragma pack(1) +//#pragma pack(push,1) +typedef struct +{ + UINT32 Magic; // 0 + BYTE VersionToExtract[2]; // 4 + WORD Flags; // 6 + WORD Method; // 8 + WORD ModTime; // 10 + WORD ModDate; // 12 + UINT32 CRC32; // 14 + UINT32 CompressedSize; // 18 + UINT32 UncompressedSize; // 22 + WORD NameLength; // 26 + WORD ExtraLength; // 28 +} FORCE_PACKED LocalFileHeader; + +typedef struct +{ + UINT32 Magic; + BYTE VersionMadeBy[2]; + BYTE VersionToExtract[2]; + WORD Flags; + WORD Method; + WORD ModTime; + WORD ModDate; + UINT32 CRC32; + UINT32 CompressedSize; + UINT32 UncompressedSize; + WORD NameLength; + WORD ExtraLength; + WORD CommentLength; + WORD StartingDiskNumber; + WORD InternalAttributes; + UINT32 ExternalAttributes; + UINT32 LocalHeaderOffset; +} FORCE_PACKED CentralDirectoryEntry; + +typedef struct +{ + UINT32 Magic; + WORD DiskNumber; + WORD FirstDisk; + WORD NumEntries; + WORD NumEntriesOnAllDisks; + UINT32 DirectorySize; + UINT32 DirectoryOffset; + WORD ZipCommentLength; +} FORCE_PACKED EndOfCentralDirectory; +//#pragma pack(pop) + +// EXTERNAL FUNCTION PROTOTYPES -------------------------------------------- + +// PUBLIC FUNCTION PROTOTYPES ---------------------------------------------- + +void print_usage(const char *cmdname); +dir_tree_t *alloc_dir_tree(const char *dir); +file_entry_t *alloc_file_entry(const char *prefix, const char *path, time_t last_written); +void free_dir_tree(dir_tree_t *tree); +void free_dir_trees(dir_tree_t *tree); +#ifdef _WIN32 +void recurse_dir(dir_tree_t *tree, const char *dirpath); +dir_tree_t *add_dir(const char *dirpath); +#endif +dir_tree_t *add_dirs(char **argv); +int count_files(dir_tree_t *trees); +int sort_cmp(const void *a, const void *b); +file_sorted_t *sort_files(dir_tree_t *trees, int num_files); +void write_zip(const char *zipname, dir_tree_t *trees, int update); +int append_to_zip(FILE *zip_file, file_sorted_t *file, FILE *ozip, BYTE *odir); +int write_central_dir(FILE *zip, file_sorted_t *file); +void time_to_dos(struct tm *time, short *dosdate, short *dostime); +int method_to_version(int method); +const char *method_name(int method); +int compress_lzma(Byte *out, unsigned int *outlen, const Byte *in, unsigned int inlen); +int compress_bzip2(Byte *out, unsigned int *outlen, const Byte *in, unsigned int inlen); +int compress_ppmd(Byte *out, unsigned int *outlen, const Byte *in, unsigned int inlen); +int compress_deflate(Byte *out, unsigned int *outlen, const Byte *in, unsigned int inlen); +BYTE *find_central_dir(FILE *fin); +CentralDirectoryEntry *find_file_in_zip(BYTE *dir, const char *path, unsigned int len, unsigned int crc, short date, short time); +int copy_zip_file(FILE *zip, file_entry_t *file, FILE *ozip, CentralDirectoryEntry *dirent); + +// PRIVATE FUNCTION PROTOTYPES --------------------------------------------- + +static void *SzAlloc(ISzAllocPtr p, size_t size) { p = p; return malloc(size); } +static void SzFree(ISzAllocPtr p, void *address) { p = p; free(address); } + +// EXTERNAL DATA DECLARATIONS ---------------------------------------------- + +// PUBLIC DATA DEFINITIONS ------------------------------------------------- + +int DeflateOnly; +int UpdateCount; +int Quiet; + +// PRIVATE DATA DEFINITIONS ------------------------------------------------ + +static const UINT32 centralfile = ZIP_CENTRALFILE; +static const UINT32 endofdir = ZIP_ENDOFDIR; + +static int no_mem; + +static ISzAlloc Alloc = { SzAlloc, SzFree }; +static compressor_t Compressors[] = +{ + { compress_lzma, METHOD_LZMA }, + { compress_bzip2, METHOD_BZIP2 }, +#ifdef PPMD + { compress_ppmd, METHOD_PPMD }, +#endif + { compress_deflate, METHOD_DEFLATE }, + { NULL, 0 } +}; + +// CODE -------------------------------------------------------------------- + +//========================================================================== +// +// print_usage +// +//========================================================================== + +void print_usage(const char *cmdname) +{ +#ifdef _WIN32 + const char *rchar = strrchr(cmdname, '\\'); + if (rchar != NULL) + { + cmdname = rchar+1; + } +#endif + fprintf(stderr, "Usage: %s [options] ...\n" + "Options: -d Use deflate compression only\n" + " -f Force creation of archive\n" + " -u Only update changed files\n" + " -q Do not list files\n", cmdname); +} + +//========================================================================== +// +// alloc_dir_tree +// +//========================================================================== + +dir_tree_t *alloc_dir_tree(const char *dir) +{ + dir_tree_t *tree; + size_t dirlen; + + dirlen = strlen(dir); + tree = malloc(sizeof(dir_tree_t) + dirlen + 2); + if (tree != NULL) + { + strcpy(tree->path, dir); + tree->path_size = dirlen; + if (dir[dirlen - 1] != '/') + { + tree->path_size++; + tree->path[dirlen] = '/'; + tree->path[dirlen + 1] = '\0'; + } + tree->files = NULL; + tree->next = NULL; + } + return tree; +} + +//========================================================================== +// +// alloc_file_entry +// +//========================================================================== + +file_entry_t *alloc_file_entry(const char *prefix, const char *path, time_t last_written) +{ + file_entry_t *entry; + + entry = malloc(sizeof(file_entry_t) + strlen(prefix) + strlen(path) + 1); + if (entry != NULL) + { + strcpy(entry->path, prefix); + strcat(entry->path, path); + entry->next = NULL; + entry->time_write = last_written; + } + return entry; +} + +//========================================================================== +// +// free_dir_tree +// +//========================================================================== + +void free_dir_tree(dir_tree_t *tree) +{ + file_entry_t *entry, *next; + + if (tree != NULL) + { + for (entry = tree->files; entry != NULL; entry = next) + { + next = entry->next; + free(entry); + } + free(tree); + } +} + +//========================================================================== +// +// free_dir_trees +// +//========================================================================== + +void free_dir_trees(dir_tree_t *tree) +{ + dir_tree_t *next; + + for (; tree != NULL; tree = next) + { + next = tree->next; + free_dir_tree(tree); + } +} + +#ifdef _WIN32 + +//========================================================================== +// +// recurse_dir +// +//========================================================================== + +void recurse_dir(dir_tree_t *tree, const char *dirpath) +{ + struct _finddata_t fileinfo; + intptr_t handle; + char *dirmatch; + + dirmatch = malloc(strlen(dirpath) + 2); + if (dirmatch == NULL) + { + no_mem = 1; + return; + } + strcpy(dirmatch, dirpath); + strcat(dirmatch, "*"); + if ((handle = _findfirst(dirmatch, &fileinfo)) == -1) + { + fprintf(stderr, "Could not scan '%s': %s\n", dirpath, strerror(errno)); + } + else + { + do + { + if (fileinfo.attrib & _A_HIDDEN) + { + // Skip hidden files and directories. (Prevents SVN bookkeeping + // info from being included.) + continue; + } + if (fileinfo.attrib & _A_SUBDIR) + { + char *newdir; + + if (fileinfo.name[0] == '.' && + (fileinfo.name[1] == '\0' || + (fileinfo.name[1] == '.' && fileinfo.name[2] == '\0'))) + { + // Do not record . and .. directories. + continue; + } + newdir = malloc(strlen(dirpath) + strlen(fileinfo.name) + 2); + strcpy(newdir, dirpath); + strcat(newdir, fileinfo.name); + strcat(newdir, "/"); + recurse_dir(tree, newdir); + } + else + { + file_entry_t *entry; + + if (strstr(fileinfo.name, ".orig")) + { + // .orig files are left behind by patch.exe and should never be + // added to zdoom.pk3 + continue; + } + + entry = alloc_file_entry(dirpath, fileinfo.name, fileinfo.time_write); + if (entry == NULL) + { + no_mem = 1; + break; + } + entry->next = tree->files; + tree->files = entry; + } + } while (_findnext(handle, &fileinfo) == 0); + _findclose(handle); + } + free(dirmatch); +} + +//========================================================================== +// +// add_dir +// +//========================================================================== + +dir_tree_t *add_dir(const char *dirpath) +{ + dir_tree_t *tree = alloc_dir_tree(dirpath); + + if (tree != NULL) + { + recurse_dir(tree, tree->path); + } + return tree; +} + +//========================================================================== +// +// add_dirs +// Windows version +// +// Given NULL-terminated array of directory paths, create trees for them. +// +//========================================================================== + +dir_tree_t *add_dirs(char **argv) +{ + dir_tree_t *tree, *trees = NULL; + char *s; + + while (*argv != NULL) + { + for (s = *argv; *s != '\0'; ++s) + { + if (*s == '\\') + { + *s = '/'; + } + } + tree = add_dir(*argv); + tree->next = trees; + trees = tree; + if (no_mem) + { + break; + } + argv++; + } + return trees; +} + +#elif defined(__sun) + +//========================================================================== +// +// add_dirs +// Solaris version +// +// Given NULL-terminated array of directory paths, create trees for them. +// +//========================================================================== + +void add_dir(dir_tree_t *tree, char* dirpath) +{ + DIR *directory = opendir(dirpath); + if(directory == NULL) + return; + + struct dirent *file; + while((file = readdir(directory)) != NULL) + { + if(file->d_name[0] == '.') //File is hidden or ./.. directory so ignore it. + continue; + + int isDirectory = 0; + int time = 0; + + char* fullFileName = malloc(strlen(dirpath) + strlen(file->d_name) + 1); + strcpy(fullFileName, dirpath); + strcat(fullFileName, file->d_name); + + struct stat *fileStat; + fileStat = malloc(sizeof(struct stat)); + stat(fullFileName, fileStat); + isDirectory = S_ISDIR(fileStat->st_mode); + time = fileStat->st_mtime; + free(stat); + + free(fullFileName); + + if(isDirectory) + { + char* newdir; + newdir = malloc(strlen(dirpath) + strlen(file->d_name) + 2); + strcpy(newdir, dirpath); + strcat(newdir, file->d_name); + strcat(newdir, "/"); + add_dir(tree, newdir); + free(newdir); + continue; + } + + file_entry_t *entry; + entry = alloc_file_entry(dirpath, file->d_name, time); + if (entry == NULL) + { + //no_mem = 1; + break; + } + entry->next = tree->files; + tree->files = entry; + } + + closedir(directory); +} + +dir_tree_t *add_dirs(char **argv) +{ + dir_tree_t *tree, *trees = NULL; + + int i = 0; + while(argv[i] != NULL) + { + tree = alloc_dir_tree(argv[i]); + tree->next = trees; + trees = tree; + + if(tree != NULL) + { + char* dirpath = malloc(sizeof(argv[i]) + 2); + strcpy(dirpath, argv[i]); + if(dirpath[strlen(dirpath)] != '/') + strcat(dirpath, "/"); + add_dir(tree, dirpath); + free(dirpath); + } + + i++; + } + return trees; +} + +#else + +//========================================================================== +// +// add_dirs +// 4.4BSD version +// +// Given NULL-terminated array of directory paths, create trees for them. +// +//========================================================================== + +dir_tree_t *add_dirs(char **argv) +{ + FTS *fts; + FTSENT *ent; + dir_tree_t *tree, *trees = NULL; + file_entry_t *file; + + fts = fts_open(argv, FTS_LOGICAL, NULL); + if (fts == NULL) + { + fprintf(stderr, "Failed to start directory traversal: %s\n", strerror(errno)); + return NULL; + } + while ((ent = fts_read(fts)) != NULL) + { + if (ent->fts_info == FTS_D && ent->fts_name[0] == '.') + { + // Skip hidden directories. (Prevents SVN bookkeeping + // info from being included.) + // [BL] Also skip backup files. + fts_set(fts, ent, FTS_SKIP); + } + if (ent->fts_info == FTS_D && ent->fts_level == 0) + { + tree = alloc_dir_tree(ent->fts_path); + if (tree == NULL) + { + no_mem = 1; + break; + } + tree->next = trees; + trees = tree; + } + if (ent->fts_info != FTS_F) + { + // We're only interested in remembering files. + continue; + } + else if(ent->fts_name[strlen(ent->fts_name)-1] == '~') + { + // Don't remember backup files. + continue; + } + file = alloc_file_entry("", ent->fts_path, ent->fts_statp->st_mtime); + if (file == NULL) + { + no_mem = 1; + break; + } + file->next = tree->files; + tree->files = file; + } + fts_close(fts); + return trees; +} +#endif + +//========================================================================== +// +// count_files +// +//========================================================================== + +int count_files(dir_tree_t *trees) +{ + dir_tree_t *tree; + file_entry_t *file; + int count; + + for (count = 0, tree = trees; tree != NULL; tree = tree->next) + { + for (file = tree->files; file != NULL; file = file->next) + { + count++; + } + } + return count; +} + +//========================================================================== +// +// sort_cmp +// +// Arbitrarily-selected sorting for the zip files: Files in the root +// directory sort after files in subdirectories. Otherwise, everything +// sorts by name. +// +//========================================================================== + +int sort_cmp(const void *a, const void *b) +{ + const file_sorted_t *sort1 = (const file_sorted_t *)a; + const file_sorted_t *sort2 = (const file_sorted_t *)b; + int in_dir1, in_dir2; + + in_dir1 = (strchr(sort1->path_in_zip, '/') != NULL); + in_dir2 = (strchr(sort2->path_in_zip, '/') != NULL); + if (in_dir1 == 1 && in_dir2 == 0) + { + return -1; + } + if (in_dir1 == 0 && in_dir2 == 1) + { + return 1; + } + return strcmp(((const file_sorted_t *)a)->path_in_zip, + ((const file_sorted_t *)b)->path_in_zip); +} + +//========================================================================== +// +// sort_files +// +//========================================================================== + +file_sorted_t *sort_files(dir_tree_t *trees, int num_files) +{ + file_sorted_t *sorter; + dir_tree_t *tree; + file_entry_t *file; + int i; + + sorter = malloc(sizeof(*sorter) * num_files); + if (sorter != NULL) + { + for (i = 0, tree = trees; tree != NULL; tree = tree->next) + { + for (file = tree->files; file != NULL; file = file->next) + { + sorter[i].file = file; + sorter[i].path_in_zip = file->path + tree->path_size; + i++; + } + } + qsort(sorter, num_files, sizeof(*sorter), sort_cmp); + } + return sorter; +} + +//========================================================================== +// +// write_zip +// +//========================================================================== + +void write_zip(const char *zipname, dir_tree_t *trees, int update) +{ +#ifdef _WIN32 + char tempname[_MAX_PATH]; +#else + char tempname[PATH_MAX]; +#endif + EndOfCentralDirectory dirend; + int i, num_files; + file_sorted_t *sorted; + FILE *zip, *ozip = NULL; + void *central_dir = NULL; + + num_files = count_files(trees); + sorted = sort_files(trees, num_files); + if (sorted == NULL) + { + no_mem = 1; + return; + } + if (update) + { + sprintf(tempname, "%s.temp", zipname); + ozip = fopen(zipname, "rb"); + if (ozip == NULL) + { + fprintf(stderr, "Could not open %s for updating: %s\n", zipname, strerror(errno)); + update = 0; + } + else + { + central_dir = find_central_dir(ozip); + if (central_dir == NULL) + { + fprintf(stderr, "Could not read central directory from %s. (Is it a zipfile?)\n", zipname); + update = 0; + } + } + if (!update) + { + fprintf(stderr, "Will proceed as if -u had not been specified.\n"); + } + } + if (update) + { + zip = fopen(tempname, "wb"); + } + else + { + zip = fopen(zipname, "wb"); + } + if (zip == NULL) + { + fprintf(stderr, "Could not open %s: %s\n", zipname, strerror(errno)); + } + else + { + // Write each file. + for (i = 0; i < num_files; ++i) + { + if (append_to_zip(zip, sorted + i, ozip, central_dir)) + { + break; + } + } + if (i == num_files) + { + // Write central directory. + dirend.DirectoryOffset = LittleLong(ftell(zip)); + for (i = 0; i < num_files; ++i) + { + write_central_dir(zip, sorted + i); + } + // Write the directory terminator. + dirend.Magic = ZIP_ENDOFDIR; + dirend.DiskNumber = 0; + dirend.FirstDisk = 0; + dirend.NumEntriesOnAllDisks = dirend.NumEntries = LittleShort(i); + // In this case LittleLong(dirend.DirectoryOffset) is undoing the transformation done above. + dirend.DirectorySize = LittleLong(ftell(zip) - LittleLong(dirend.DirectoryOffset)); + dirend.ZipCommentLength = 0; + if (fwrite(&dirend, sizeof(dirend), 1, zip) != 1) + { + fprintf(stderr, "Failed writing zip directory terminator: %s\n", strerror(errno)); + } + printf("%s contains %d files (updated %d)\n", zipname, num_files, UpdateCount); + fclose(zip); + + if (ozip != NULL) + { + // Delete original, and rename temp to take its place + fclose(ozip); + ozip = NULL; + if (remove(zipname)) + { + fprintf(stderr, "Could not delete old zip: %s\nUpdated zip can be found at %s\n", + strerror(errno), tempname); + } + else if (rename(tempname, zipname)) + { + fprintf(stderr, "Could not rename %s to %s: %s\n", + tempname, zipname, strerror(errno)); + } + } + } + } + free(sorted); + if (ozip != NULL) + { + fclose(ozip); + } + if (central_dir != NULL) + { + free(central_dir); + } +} + +//========================================================================== +// +// append_to_zip +// +// Write a given file to the zipFile. +// +// zipfile: zip object to be written to +// file: file to read data from +// +// returns: 0 = success, 1 = error +// +//========================================================================== + +int append_to_zip(FILE *zip_file, file_sorted_t *filep, FILE *ozip, BYTE *odir) +{ + LocalFileHeader local; + uLong crc; + file_entry_t *file; + Byte *readbuf; + Byte *compbuf[2]; + unsigned int comp_len[2]; + int offset[2]; + int method[2]; + int best; + int slot; + FILE *lumpfile; + unsigned int readlen; + unsigned int len; + int i; + struct tm *ltime; + + file = filep->file; + + // try to determine local time + ltime = localtime(&file->time_write); + time_to_dos(ltime, &file->date, &file->time); + + // lumpfile = source file + lumpfile = fopen(file->path, "rb"); + if (lumpfile == NULL) + { + fprintf(stderr, "Could not open %s: %s\n", file->path, strerror(errno)); + return 1; + } + // len = source size + fseek (lumpfile, 0, SEEK_END); + len = ftell(lumpfile); + fseek (lumpfile, 0, SEEK_SET); + + // allocate a buffer for the whole source file + readbuf = malloc(len); + if (readbuf == NULL) + { + fclose(lumpfile); + fprintf(stderr, "Could not allocate %u bytes\n", (int)len); + return 1; + } + // read the whole source file into buffer + readlen = (unsigned int)fread(readbuf, 1, len, lumpfile); + fclose(lumpfile); + + // if read less bytes than expected, + if (readlen != len) + { + // diagnose and return error + free(readbuf); + fprintf(stderr, "Unable to read %s\n", file->path); + return 1; + } + // file loaded + + file->uncompressed_size = len; + file->compressed_size = len; + file->method = METHOD_STORED; + + // Calculate CRC32 for file. + crc = crc32(0, NULL, 0); + crc = crc32(crc, readbuf, (uInt)len); + file->crc32 = LittleLong(crc); + + // Can we save time and just copy the file from the old zip? + if (odir != NULL && ozip != NULL) + { + CentralDirectoryEntry *dirent; + + dirent = find_file_in_zip(odir, filep->path_in_zip, len, crc, file->date, file->time); + if (dirent != NULL) + { + i = copy_zip_file(zip_file, file, ozip, dirent); + if (i > 0) + { + free(readbuf); + return 0; + } + if (i < 0) + { + free(readbuf); + fprintf(stderr, "Unable to write %s to zip\n", file->path); + return 1; + } + } + } + + if (!Quiet) + { + if (ozip != NULL) + { + printf("Updating %-40s", filep->path_in_zip); + } + else + { + printf("Adding %-40s", filep->path_in_zip); + } + } + UpdateCount++; + + // Allocate a buffer for compression, one byte less than the source buffer. + // If it doesn't fit in that space, then skip compression and store it as-is. + compbuf[0] = malloc(len - 1); + compbuf[1] = malloc(len - 1); + best = -1; // best slot + slot = 0; // slot we are compressing to now + + // Find best compression method. We have two output buffers. One to hold the + // best compression method, and the other to hold the compression we are trying + // now. + for (i = 0; Compressors[i].compress != NULL; ++i) + { + if (DeflateOnly && Compressors[i].method != METHOD_DEFLATE) + { + continue; + } + comp_len[slot] = len - 1; + method[slot] = Compressors[i].method; + offset[slot] = Compressors[i].compress(compbuf[slot], &comp_len[slot], readbuf, len); + if (offset[slot] >= 0) + { + if (best < 0 || comp_len[slot] <= comp_len[best]) + { + best = slot; + slot ^= 1; + } + } + } + + if (best >= 0) + { + file->method = method[best]; + file->compressed_size = comp_len[best]; + } +// printf("%s -> method %d -> slot %d\n", filep->path_in_zip, file->method, best); + + // Fill in local directory header. + local.Magic = ZIP_LOCALFILE; + local.VersionToExtract[0] = method_to_version(file->method); + local.VersionToExtract[1] = 0; + local.Flags = file->method == METHOD_DEFLATE ? LittleShort(2) : 0; + local.Method = LittleShort(file->method); + local.ModTime = file->time; + local.ModDate = file->date; + local.CRC32 = file->crc32; + local.UncompressedSize = LittleLong(file->uncompressed_size); + local.CompressedSize = LittleLong(file->compressed_size); + local.NameLength = LittleShort((unsigned short)strlen(filep->path_in_zip)); + local.ExtraLength = 0; + + file->zip_offset = ftell(zip_file); + + // Write out the header, file name, and file data. + if (fwrite(&local, sizeof(local), 1, zip_file) != 1 || + fwrite(filep->path_in_zip, strlen(filep->path_in_zip), 1, zip_file) != 1 || + (file->method ? fwrite(compbuf[best] + offset[best], 1, comp_len[best], zip_file) != comp_len[best] : + fwrite(readbuf, 1, len, zip_file) != len)) + { + if (!Quiet) + { + printf("\n"); + } + fprintf(stderr, "Unable to write %s to zip\n", file->path); + free(readbuf); + if (compbuf[0] != NULL) + { + free(compbuf[0]); + } + if (compbuf[1] != NULL) + { + free(compbuf[1]); + } + return 1; + } + + // all done + free(readbuf); + if (compbuf[0] != NULL) + { + free(compbuf[0]); + } + if (compbuf[1] != NULL) + { + free(compbuf[1]); + } + if (!Quiet) + { + printf("%5.1f%% [%6u/%6u] %s\n", 100.0 - 100.0 * file->compressed_size / file->uncompressed_size, + file->compressed_size, file->uncompressed_size, method_name(file->method)); + } + return 0; +} + +//========================================================================== +// +// write_central_dir +// +// Writes the central directory entry for a file. +// +//========================================================================== + +int write_central_dir(FILE *zip, file_sorted_t *filep) +{ + CentralDirectoryEntry dir; + file_entry_t *file; + + file = filep->file; + dir.Magic = ZIP_CENTRALFILE; + dir.VersionMadeBy[0] = 20; + dir.VersionMadeBy[1] = 0; + dir.VersionToExtract[0] = method_to_version(file->method); + dir.VersionToExtract[1] = 0; + dir.Flags = file->method == METHOD_DEFLATE ? LittleShort(2) : 0; + dir.Method = LittleShort(file->method); + dir.ModTime = file->time; + dir.ModDate = file->date; + dir.CRC32 = file->crc32; + dir.CompressedSize = LittleLong(file->compressed_size); + dir.UncompressedSize = LittleLong(file->uncompressed_size); + dir.NameLength = LittleShort((unsigned short)strlen(filep->path_in_zip)); + dir.ExtraLength = 0; + dir.CommentLength = 0; + dir.StartingDiskNumber = 0; + dir.InternalAttributes = 0; + dir.ExternalAttributes = 0; + dir.LocalHeaderOffset = LittleLong(file->zip_offset); + + if (fwrite(&dir, sizeof(dir), 1, zip) != 1 || + fwrite(filep->path_in_zip, strlen(filep->path_in_zip), 1, zip) != 1) + { + fprintf(stderr, "Error writing central directory header for %s: %s\n", file->path, strerror(errno)); + return 1; + } + return 0; +} + +//========================================================================== +// +// time_to_dos +// +// Converts time from struct tm to the DOS format used by zip files. +// +//========================================================================== + +void time_to_dos(struct tm *time, short *dosdate, short *dostime) +{ + if (time == NULL || time->tm_year < 80) + { + *dosdate = *dostime = 0; + } + else + { + *dosdate = LittleShort((time->tm_year - 80) * 512 + (time->tm_mon + 1) * 32 + time->tm_mday); + *dostime = LittleShort(time->tm_hour * 2048 + time->tm_min * 32 + time->tm_sec / 2); + } +} + +//========================================================================== +// +// method_to_version +// +// Given a compression method, returns the version of the ZIP appnote +// required to decompress it, for filling in the directory information. +// +//========================================================================== + +int method_to_version(int method) +{ + // Apparently, real-world programs get confused by setting the version + // to extract field to something other than 2.0. +#if 0 + if (method == METHOD_LZMA || method == METHOD_PPMD) + return 63; + if (method == METHOD_BZIP2) + return 46; +#endif + // Default anything else to PKZIP 2.0. + return 20; +} + +//========================================================================== +// +// method_name +// +// Returns the name of the compression method. If the method is unknown, +// this will point to a static buffer. +// +//========================================================================== + +const char *method_name(int method) +{ + static char unkn[16]; + + if (method == METHOD_STORED) + { + return "Stored"; + } + if (method == METHOD_DEFLATE) + { + return "Deflate"; + } + if (method == METHOD_LZMA) + { + return "LZMA"; + } + if (method == METHOD_PPMD) + { + return "PPMd"; + } + if (method == METHOD_BZIP2) + { + return "BZip2"; + } + sprintf(unkn, "Unk:%03d", method); + return unkn; +} + +//========================================================================== +// +// compress_lzma +// +// Returns non-negative offset to start of data stream on success. Barring +// any strange changes to the LZMA library in the future, success should +// always return 0. +// +//========================================================================== + +int compress_lzma(Byte *out, unsigned int *outlen, const Byte *in, unsigned int inlen) +{ + CLzmaEncProps lzma_props; + size_t props_size; + size_t comp_len; + int offset; + + if (*outlen < 1 + 4 + LZMA_PROPS_SIZE) + { + // Not enough room for LZMA properties header + compressed data. + return -1; + } + if (out == NULL || in == NULL || inlen == 0) + { + return -1; + } + + LzmaEncProps_Init(&lzma_props); +// lzma_props.level = 9; + props_size = LZMA_PROPS_SIZE; + comp_len = *outlen - 4 - LZMA_PROPS_SIZE; + + if (SZ_OK != LzmaEncode(out + 4 + LZMA_PROPS_SIZE, &comp_len, in, inlen, &lzma_props, + out + 4, &props_size, 0, NULL, &Alloc, &Alloc)) + { + return -1; + } + // Fill in LZMA properties header + offset = 0; + if (props_size != LZMA_PROPS_SIZE) + { + // Move LZMA properties to be adjacent to the compressed data, because for + // some reaseon the library didn't use all the space provided. + int i; + + offset = (int)(LZMA_PROPS_SIZE - props_size); + for (i = 4 + LZMA_PROPS_SIZE - 1; i > 4 + offset; --i) + { + out[i] = out[i - offset]; + } + } + out[offset] = MY_VER_MAJOR; + out[offset+1] = MY_VER_MINOR; + out[offset+2] = (Byte)props_size; + out[offset+3] = 0; + // Add header length to outlen + *outlen = (unsigned int)(comp_len + 4 + props_size); + return offset; +} + +//========================================================================== +// +// compress_bzip2 +// +// Returns 0 on success, negative on failure. +// +//========================================================================== + +int compress_bzip2(Byte *out, unsigned int *outlen, const Byte *in, unsigned int inlen) +{ + if (BZ_OK == BZ2_bzBuffToBuffCompress((char *)out, outlen, (char *)in, inlen, 9, 0, 0)) + { + return 0; + } + return -1; +} + +#ifdef PPMD +//========================================================================== +// +// compress_ppmd +// +// Returns 0 on success, negative on failure. +// +// Big problem here: The zip format only allows for PPMd I rev. 1. This +// version of the code is incompatible with 64-bit processors. PPMd J rev. 1 +// corrects this and also compresses slightly better, but it also changes +// the data format and is incompatible with I rev. 1. The PPMd source code +// is a tangled mass that I cannot comprehend, so fixing I rev. 1 to work +// on 64-bit processors is well beyond my means. Hence, I cannot currently +// support PPMd in zips. If the zip spec gets updated to allow J rev. 1, +// then I can, but not any sooner. +// +//========================================================================== + +int compress_ppmd(Byte *out, unsigned int *outlen, const Byte *in, unsigned int inlen) +{ + int maxorder = 8; + int sasize = 8; + int cutoff = 0; + + _PPMD_FILE ppsin = { (char *)in, inlen, 0 }; + _PPMD_FILE ppsout = { out + 2, *outlen - 2, 0}; + + if (!PPMd_StartSubAllocator(sasize)) + { + return -1; + } + PPMd_EncodeFile(&ppsout, &ppsin, maxorder, cutoff); + PPMd_StopSubAllocator(); + if (ppsout.eof) + { + return -1; + } + if (!ppsin.eof) + { + return -1; + } + + const short outval = LittleShort((maxorder - 1) + ((sasize - 1) << 4) + (cutoff << 12)); + memcpy(out, (const Byte *)&outval, sizeof(short)); + *outlen = *outlen - ppsout.buffersize; + return 0; +} +#endif + +//========================================================================== +// +// compress_deflate +// +// Returns 0 on success, negative on failure. +// +//========================================================================== + +int compress_deflate(Byte *out, unsigned int *outlen, const Byte *in, unsigned int inlen) +{ + z_stream stream; + int err; + + stream.next_in = (Bytef *)in; + stream.avail_in = inlen; + stream.next_out = out; + stream.avail_out = *outlen; + stream.zalloc = (alloc_func)0; + stream.zfree = (free_func)0; + stream.opaque = (voidpf)0; + + err = deflateInit2(&stream, 9, Z_DEFLATED, -15, 9, Z_DEFAULT_STRATEGY); + if (err != Z_OK) return -1; + + err = deflate(&stream, Z_FINISH); + if (err != Z_STREAM_END) { + deflateEnd(&stream); + return -1; + } + *outlen = stream.total_out; + + err = deflateEnd(&stream); + return err == Z_OK ? 0 : -1; +} + +//========================================================================== +// +// find_central_dir +// +// Finds and loads the central directory records in the file. +// Taken from Quake3 source and modified. +// +//========================================================================== + +BYTE *find_central_dir(FILE *fin) +{ + unsigned char buf[BUFREADCOMMENT + 4]; + EndOfCentralDirectory eod; + BYTE *dir; + long file_size; + long back_read; + long max_back; // maximum size of global comment + long pos_found = 0; + + fseek(fin, 0, SEEK_END); + + file_size = ftell(fin); + max_back = 0xffff > file_size ? file_size : 0xffff; + + back_read = 4; + while (back_read < max_back) + { + UINT32 read_size, read_pos; + int i; + if (back_read + BUFREADCOMMENT > max_back) + back_read = max_back; + else + back_read += BUFREADCOMMENT; + read_pos = file_size - back_read; + + read_size = (BUFREADCOMMENT + 4) < (file_size - read_pos) ? + (BUFREADCOMMENT + 4) : (file_size - read_pos); + + if (fseek(fin, read_pos, SEEK_SET) != 0) + return NULL; + + if (fread(buf, 1, read_size, fin) != read_size) + return NULL; + + for (i = (int)read_size - 3; (i--) > 0;) + { + if (buf[i] == 'P' && buf[i+1] == 'K' && buf[i+2] == 5 && buf[i+3] == 6) + { + pos_found = read_pos + i; + break; + } + } + + if (pos_found != 0) + break; + } + if (pos_found == 0 || + fseek(fin, pos_found, SEEK_SET) != 0 || + fread(&eod, sizeof(eod), 1, fin) != 1 || + fseek(fin, LittleLong(eod.DirectoryOffset), SEEK_SET) != 0) + { + return NULL; + } + dir = malloc(LittleLong(eod.DirectorySize) + 4); + if (dir == NULL) + { + no_mem = 1; + return NULL; + } + if (fread(dir, 1, LittleLong(eod.DirectorySize), fin) != LittleLong(eod.DirectorySize)) + { + free(dir); + return NULL; + } + if (memcmp(dir, (const BYTE *)¢ralfile, sizeof(UINT32)) != 0) + { + free(dir); + return NULL; + } + memcpy(dir + LittleLong(eod.DirectorySize), (const BYTE *)&endofdir, sizeof(UINT32)); + return dir; +} + +//========================================================================== +// +// find_file_in_zip +// +// Returns a pointer to a central directory entry to a file, if it was +// found in the zip's directory. Data endianness is in zip order. +// +//========================================================================== + +CentralDirectoryEntry *find_file_in_zip(BYTE *dir, const char *path, unsigned int len, unsigned int crc, short date, short time) +{ + int pathlen = (int)strlen(path); + CentralDirectoryEntry *ent; + int flags; + + while (memcmp(dir, (const BYTE *)¢ralfile, sizeof(UINT32)) == 0) + { + ent = (CentralDirectoryEntry *)dir; + if (pathlen == LittleShort(ent->NameLength) && + strncmp((char *)(ent + 1), path, pathlen) == 0) + { + // Found something that matches by name. + break; + } + dir += sizeof(*ent) + LittleShort(ent->NameLength) + LittleShort(ent->ExtraLength) + LittleShort(ent->CommentLength); + } + if (memcmp(dir, (const BYTE *)¢ralfile, sizeof(UINT32)) != 0) + { + return NULL; + } + if (crc != LittleLong(ent->CRC32)) + { + return NULL; + } + if (len != LittleLong(ent->UncompressedSize)) + { + return NULL; + } + // Should I check modification date and time here? + flags = LittleShort(ent->Flags); + if (flags & 1) + { // Don't want to deal with encryption. + return NULL; + } + if (ent->ExtraLength != 0) + { // Don't want to deal with extra data. + return NULL; + } + // Okay, looks good. + return ent; +} + +//========================================================================== +// +// copy_zip_file +// +// Copies one file from ozip to zip. Returns positive on success, zero if +// the file could not be found, and negative if it failed while writing the +// file. +// +//========================================================================== + +int copy_zip_file(FILE *zip, file_entry_t *file, FILE *ozip, CentralDirectoryEntry *ent) +{ + LocalFileHeader lfh; + BYTE *buf; + UINT32 buf_size; + + if (fseek(ozip, LittleLong(ent->LocalHeaderOffset), SEEK_SET) != 0) + { + return 0; + } + if (fread(&lfh, sizeof(lfh), 1, ozip) != 1) + { + return 0; + } + // Check to make sure the local header matches the central directory. + if (lfh.Flags != ent->Flags || lfh.Method != ent->Method || + lfh.CRC32 != ent->CRC32 || lfh.CompressedSize != ent->CompressedSize || + lfh.UncompressedSize != ent->UncompressedSize || + lfh.NameLength != ent->NameLength || lfh.ExtraLength != ent->ExtraLength) + { + return 0; + } + buf_size = LittleShort(lfh.NameLength) + LittleLong(lfh.CompressedSize); + buf = malloc(buf_size); + if (buf == NULL) + { + return 0; + } + if (fread(buf, 1, buf_size, ozip) != buf_size) + { + free(buf); + return 0; + } + // Check to be sure name matches. + if (strncmp((char *)buf, (char *)(ent + 1), LittleShort(lfh.NameLength)) != 0) + { + free(buf); + return 0; + } + // Looks good. Let's write it in. + file->zip_offset = ftell(zip); + if (fwrite(&lfh, sizeof(lfh), 1, zip) != 1 || + fwrite(buf, 1, buf_size, zip) != buf_size) + { + free(buf); + return -1; + } + free(buf); + file->date = lfh.ModDate; + file->time = lfh.ModTime; + file->uncompressed_size = LittleLong(lfh.UncompressedSize); + file->compressed_size = LittleLong(lfh.CompressedSize); + file->method = LittleShort(lfh.Method); + file->crc32 = lfh.CRC32; + return 1; +} + +//========================================================================== +// +// main +// +//========================================================================== + +int main (int argc, char **argv) +{ + dir_tree_t *tree, *trees; + file_entry_t *file; + struct stat zipstat; + int needwrite; + int i, j, k; + int force = 0; + int update = 0; + + // Find options. Options are removed from the array. + for (i = k = 1; i < argc; ++i) + { + if (argv[i][0] == '-') + { + if (argv[i][1] == '-') + { + if (argv[i][2] == '\0') + { // -- terminates option handling for the rest of the command line + break; + } + } + for (j = 1; argv[i][j] != '\0'; ++j) + { + if (argv[i][j] == 'f') + { + force = 1; + } + else if (argv[i][j] == 'd') + { + DeflateOnly = 1; + } + else if (argv[i][j] == 'u') + { + update = 1; + } + else if (argv[i][j] == 'q') + { + Quiet = 1; + } + else + { + fprintf(stderr, "Unknown option '%c'\n", argv[i][j]); + print_usage(argv[0]); + return 1; + } + } + } + else + { + argv[k++] = argv[i]; + } + } + for (; i <= argc; ++i) + { + argv[k++] = argv[i]; + } + argc -= i - k; + + if (argc < 3) + { + print_usage(argv[0]); + return 1; + } + + trees = add_dirs(&argv[2]); + if (no_mem) + { + free_dir_trees(trees); + fprintf(stderr, "Out of memory.\n"); + return 1; + } + + needwrite = force; + if (stat(argv[1], &zipstat) != 0) + { + if (errno == ENOENT) + { + needwrite = 1; + update = 0; // Can't update what's not there. + } + else + { + fprintf(stderr, "Could not stat %s: %s\n", argv[1], strerror(errno)); + } + } + else if (!needwrite) + { + // Check the files in each tree. If any one of them was modified more + // recently than the zip, then it needs to be recreated. + for (tree = trees; tree != NULL; tree = tree->next) + { + for (file = tree->files; file != NULL; file = file->next) + { + if (file->time_write > zipstat.st_mtime) + { + needwrite = 1; + break; + } + } + } + } + if (force || needwrite) + { + write_zip(argv[1], trees, update); + } + free_dir_trees(trees); + if (no_mem) + { + fprintf(stderr, "Out of memory.\n"); + return 1; + } + return 0; +} + +//========================================================================== +// +// bz_internal_error +// +// libbzip2 wants this, since we build it with BZ_NO_STDIO set. +// +//========================================================================== + +void bz_internal_error (int errcode) +{ + fprintf(stderr, "libbzip2: internal error number %d\n", errcode); + exit(3); +}