From 43c6c9b5ee31b8c0aeb1fddd29e00651a1e7a899 Mon Sep 17 00:00:00 2001 From: Randy Heit Date: Sat, 6 Feb 2016 20:58:35 -0600 Subject: [PATCH] Update re2c to version 0.16 --- tools/re2c/CHANGELOG | 199 + tools/re2c/CMakeLists.txt | 84 +- tools/re2c/NO_WARRANTY | 2 + tools/re2c/README | 279 +- tools/re2c/actions.cc | 1062 ---- tools/re2c/basics.h | 57 - tools/re2c/code.cc | 1806 ------ tools/re2c/code.h | 53 - tools/re2c/code_names.h | 33 - tools/re2c/config.h.in | 41 +- tools/re2c/config_w32.h | 102 - tools/re2c/dfa.cc | 416 -- tools/re2c/dfa.h | 366 -- tools/re2c/doc/loplas.ps | 5249 ----------------- tools/re2c/doc/sample.bib | 48 - tools/re2c/examples/001_upn_calculator/README | 83 + .../examples/001_upn_calculator/calc_001.re | 84 + .../examples/001_upn_calculator/calc_002.re | 69 + .../examples/001_upn_calculator/calc_003.re | 61 + .../examples/001_upn_calculator/calc_004.re | 78 + .../examples/001_upn_calculator/calc_005.re | 144 + .../examples/001_upn_calculator/calc_006.s.re | 162 + .../examples/001_upn_calculator/calc_007.b.re | 135 + .../examples/001_upn_calculator/calc_008.b.re | 158 + .../001_upn_calculator/windows/HiResTimer.h | 54 + .../001_upn_calculator/windows/main.b.re | 291 + tools/re2c/examples/002_strip_comments/README | 21 + .../002_strip_comments/strip_001.s.re | 147 + .../002_strip_comments/strip_002.s.re | 162 + .../002_strip_comments/strip_003.b.re | 179 + tools/re2c/examples/basemmap.c | 26 - tools/re2c/examples/cmmap.re | 267 - tools/re2c/examples/cnokw.re | 239 - tools/re2c/examples/cunroll.re | 258 - tools/re2c/examples/input_custom/fixed.re | 35 + .../re2c/examples/input_custom/simple/README | 20 + .../examples/input_custom/simple/default.re | 24 + .../examples/input_custom/simple/fgetc.re | 43 + .../input_custom/simple/istringstream.re | 27 + tools/re2c/examples/{ => langs}/c.re | 0 tools/re2c/examples/{ => langs}/modula.re | 3 +- .../examples/{rexx/rexx.l => langs/rexx.re} | 0 tools/re2c/examples/{ => push_model}/push.re | 6 +- tools/re2c/examples/repeater.re | 44 - tools/re2c/examples/rexx/README | 1 - tools/re2c/examples/rexx/scanio.c | 41 - tools/re2c/examples/sample.re | 7 - tools/re2c/examples/simple.re | 13 - tools/re2c/globals.h | 73 - tools/re2c/ins.h | 56 - tools/re2c/main.cc | 351 -- tools/re2c/mbo_getopt.cc | 210 - tools/re2c/mbo_getopt.h | 34 - tools/re2c/parser.cc | 1807 ------ tools/re2c/parser.h | 56 - tools/re2c/parser.y | 220 - tools/re2c/re.h | 496 -- tools/re2c/re2c.1 | 597 -- tools/re2c/re2c.vcproj | 549 -- tools/re2c/scanner.cc | 1302 ---- tools/re2c/scanner.h | 76 - tools/re2c/scanner.re | 381 -- tools/re2c/src/codegen/bitmap.cc | 168 + tools/re2c/src/codegen/bitmap.h | 45 + tools/re2c/src/codegen/emit.h | 43 + tools/re2c/src/codegen/emit_action.cc | 388 ++ tools/re2c/src/codegen/emit_dfa.cc | 348 ++ tools/re2c/src/codegen/go.h | 216 + tools/re2c/src/codegen/go_construct.cc | 284 + tools/re2c/src/codegen/go_destruct.cc | 99 + tools/re2c/src/codegen/go_emit.cc | 271 + tools/re2c/src/codegen/go_used_labels.cc | 111 + tools/re2c/src/codegen/indent.h | 24 + tools/re2c/src/codegen/input_api.cc | 175 + tools/re2c/src/codegen/input_api.h | 43 + tools/re2c/src/codegen/label.cc | 42 + tools/re2c/src/codegen/label.h | 39 + tools/re2c/src/codegen/output.cc | 465 ++ tools/re2c/src/codegen/output.h | 158 + tools/re2c/src/codegen/print.cc | 156 + tools/re2c/src/codegen/print.h | 20 + tools/re2c/src/conf/msg.cc | 254 + tools/re2c/src/conf/msg.h | 24 + tools/re2c/src/conf/opt.cc | 331 ++ tools/re2c/src/conf/opt.h | 218 + tools/re2c/src/conf/parse_opts.cc | 2846 +++++++++ tools/re2c/src/conf/parse_opts.re | 261 + tools/re2c/src/conf/warn.cc | 200 + tools/re2c/src/conf/warn.h | 67 + tools/re2c/src/globals.h | 24 + tools/re2c/src/ir/adfa/action.h | 109 + tools/re2c/src/ir/adfa/adfa.cc | 135 + tools/re2c/src/ir/adfa/adfa.h | 101 + tools/re2c/src/ir/adfa/prepare.cc | 268 + tools/re2c/src/ir/compile.cc | 104 + tools/re2c/src/ir/compile.h | 20 + tools/re2c/src/ir/dfa/determinization.cc | 197 + tools/re2c/src/ir/dfa/dfa.h | 58 + tools/re2c/src/ir/dfa/fillpoints.cc | 154 + tools/re2c/src/ir/dfa/minimization.cc | 252 + tools/re2c/src/ir/nfa/calc_size.cc | 50 + tools/re2c/src/ir/nfa/nfa.cc | 72 + tools/re2c/src/ir/nfa/nfa.h | 90 + tools/re2c/src/ir/nfa/split.cc | 49 + tools/re2c/src/ir/regexp/display.cc | 51 + tools/re2c/src/ir/regexp/empty_class_policy.h | 15 + tools/re2c/src/ir/regexp/encoding/case.h | 31 + .../ir/regexp/encoding/enc.cc} | 206 +- tools/re2c/src/ir/regexp/encoding/enc.h | 197 + .../src/ir/regexp/encoding/range_suffix.cc | 38 + .../src/ir/regexp/encoding/range_suffix.h | 39 + .../src/ir/regexp/encoding/utf16/utf16.cc | 10 + .../re2c/src/ir/regexp/encoding/utf16/utf16.h | 37 + .../ir/regexp/encoding/utf16/utf16_range.cc | 146 + .../ir/regexp/encoding/utf16/utf16_range.h | 19 + .../ir/regexp/encoding/utf16/utf16_regexp.cc | 38 + .../ir/regexp/encoding/utf16/utf16_regexp.h | 16 + .../re2c/src/ir/regexp/encoding/utf8/utf8.cc | 84 + tools/re2c/src/ir/regexp/encoding/utf8/utf8.h | 48 + .../src/ir/regexp/encoding/utf8/utf8_range.cc | 112 + .../src/ir/regexp/encoding/utf8/utf8_range.h | 18 + .../ir/regexp/encoding/utf8/utf8_regexp.cc | 36 + .../src/ir/regexp/encoding/utf8/utf8_regexp.h | 16 + tools/re2c/src/ir/regexp/fixed_length.cc | 55 + tools/re2c/src/ir/regexp/regexp.cc | 241 + tools/re2c/src/ir/regexp/regexp.h | 52 + tools/re2c/src/ir/regexp/regexp_alt.h | 31 + tools/re2c/src/ir/regexp/regexp_cat.h | 30 + tools/re2c/src/ir/regexp/regexp_close.h | 27 + tools/re2c/src/ir/regexp/regexp_match.h | 29 + tools/re2c/src/ir/regexp/regexp_null.h | 21 + tools/re2c/src/ir/regexp/regexp_rule.h | 52 + tools/re2c/src/ir/rule_rank.cc | 68 + tools/re2c/src/ir/rule_rank.h | 44 + tools/re2c/src/ir/skeleton/control_flow.cc | 61 + tools/re2c/src/ir/skeleton/generate_code.cc | 323 + tools/re2c/src/ir/skeleton/generate_data.cc | 215 + tools/re2c/src/ir/skeleton/match_empty.cc | 49 + tools/re2c/src/ir/skeleton/maxlen.cc | 50 + tools/re2c/src/ir/skeleton/path.h | 103 + tools/re2c/src/ir/skeleton/skeleton.cc | 163 + tools/re2c/src/ir/skeleton/skeleton.h | 174 + tools/re2c/src/ir/skeleton/unreachable.cc | 73 + tools/re2c/src/ir/skeleton/way.cc | 74 + tools/re2c/src/ir/skeleton/way.h | 20 + tools/re2c/src/main.cc | 60 + tools/re2c/src/parse/code.cc | 8 + tools/re2c/src/parse/code.h | 31 + tools/re2c/src/parse/extop.h | 17 + tools/re2c/src/parse/input.cc | 31 + tools/re2c/src/parse/input.h | 25 + tools/re2c/src/parse/lex.cc | 2861 +++++++++ tools/re2c/src/parse/lex.re | 701 +++ tools/re2c/src/parse/lex_conf.cc | 2284 +++++++ tools/re2c/src/parse/lex_conf.re | 222 + tools/re2c/src/parse/loc.h | 24 + tools/re2c/src/parse/parser.cc | 2396 ++++++++ tools/re2c/src/parse/parser.h | 28 + tools/re2c/src/parse/parser.ypp | 775 +++ tools/re2c/src/parse/rules.h | 29 + tools/re2c/src/parse/scanner.cc | 211 + tools/re2c/src/parse/scanner.h | 147 + tools/re2c/src/parse/spec.h | 55 + tools/re2c/src/parse/unescape.cc | 60 + tools/re2c/src/parse/unescape.h | 13 + tools/re2c/src/parse/y.tab.h | 89 + tools/re2c/src/test/range/test-impl.h | 50 + tools/re2c/src/test/range/test.cc | 94 + tools/re2c/src/test/range/test.h | 26 + tools/re2c/src/test/s_to_n32_unsafe/test.cc | 102 + tools/re2c/src/util/allocate.h | 19 + tools/re2c/src/util/attribute.h | 10 + tools/re2c/src/util/c99_stdint.h | 262 + tools/re2c/src/util/counter.h | 29 + tools/re2c/src/util/forbid_copy.h | 11 + tools/re2c/src/util/free_list.h | 56 + tools/re2c/src/util/local_increment.h | 22 + tools/re2c/src/util/ord_hash_set.h | 115 + tools/re2c/src/util/range.cc | 97 + tools/re2c/src/util/range.h | 65 + tools/re2c/src/util/s_to_n32_unsafe.cc | 55 + tools/re2c/src/util/s_to_n32_unsafe.h | 10 + tools/re2c/src/util/smart_ptr.h | 69 + tools/re2c/src/util/static_assert.h | 14 + tools/re2c/src/util/u32lim.h | 72 + tools/re2c/src/util/uniq_vector.h | 46 + tools/re2c/stream_lc.h | 433 -- tools/re2c/substr.cc | 62 - tools/re2c/substr.h | 101 - tools/re2c/token.h | 28 - tools/re2c/y.tab.h | 88 - 191 files changed, 25750 insertions(+), 17231 deletions(-) create mode 100644 tools/re2c/NO_WARRANTY delete mode 100644 tools/re2c/actions.cc delete mode 100644 tools/re2c/basics.h delete mode 100644 tools/re2c/code.cc delete mode 100644 tools/re2c/code.h delete mode 100644 tools/re2c/code_names.h delete mode 100644 tools/re2c/config_w32.h delete mode 100644 tools/re2c/dfa.cc delete mode 100644 tools/re2c/dfa.h delete mode 100644 tools/re2c/doc/loplas.ps delete mode 100644 tools/re2c/doc/sample.bib create mode 100644 tools/re2c/examples/001_upn_calculator/README create mode 100644 tools/re2c/examples/001_upn_calculator/calc_001.re create mode 100644 tools/re2c/examples/001_upn_calculator/calc_002.re create mode 100644 tools/re2c/examples/001_upn_calculator/calc_003.re create mode 100644 tools/re2c/examples/001_upn_calculator/calc_004.re create mode 100644 tools/re2c/examples/001_upn_calculator/calc_005.re create mode 100644 tools/re2c/examples/001_upn_calculator/calc_006.s.re create mode 100644 tools/re2c/examples/001_upn_calculator/calc_007.b.re create mode 100644 tools/re2c/examples/001_upn_calculator/calc_008.b.re create mode 100644 tools/re2c/examples/001_upn_calculator/windows/HiResTimer.h create mode 100644 tools/re2c/examples/001_upn_calculator/windows/main.b.re create mode 100644 tools/re2c/examples/002_strip_comments/README create mode 100644 tools/re2c/examples/002_strip_comments/strip_001.s.re create mode 100644 tools/re2c/examples/002_strip_comments/strip_002.s.re create mode 100644 tools/re2c/examples/002_strip_comments/strip_003.b.re delete mode 100644 tools/re2c/examples/basemmap.c delete mode 100644 tools/re2c/examples/cmmap.re delete mode 100644 tools/re2c/examples/cnokw.re delete mode 100644 tools/re2c/examples/cunroll.re create mode 100644 tools/re2c/examples/input_custom/fixed.re create mode 100644 tools/re2c/examples/input_custom/simple/README create mode 100644 tools/re2c/examples/input_custom/simple/default.re create mode 100644 tools/re2c/examples/input_custom/simple/fgetc.re create mode 100644 tools/re2c/examples/input_custom/simple/istringstream.re rename tools/re2c/examples/{ => langs}/c.re (100%) rename tools/re2c/examples/{ => langs}/modula.re (98%) rename tools/re2c/examples/{rexx/rexx.l => langs/rexx.re} (100%) rename tools/re2c/examples/{ => push_model}/push.re (99%) delete mode 100644 tools/re2c/examples/repeater.re delete mode 100644 tools/re2c/examples/rexx/README delete mode 100644 tools/re2c/examples/rexx/scanio.c delete mode 100644 tools/re2c/examples/sample.re delete mode 100644 tools/re2c/examples/simple.re delete mode 100644 tools/re2c/globals.h delete mode 100644 tools/re2c/ins.h delete mode 100644 tools/re2c/main.cc delete mode 100644 tools/re2c/mbo_getopt.cc delete mode 100644 tools/re2c/mbo_getopt.h delete mode 100644 tools/re2c/parser.cc delete mode 100644 tools/re2c/parser.h delete mode 100644 tools/re2c/parser.y delete mode 100644 tools/re2c/re.h delete mode 100644 tools/re2c/re2c.1 delete mode 100644 tools/re2c/re2c.vcproj delete mode 100644 tools/re2c/scanner.cc delete mode 100644 tools/re2c/scanner.h delete mode 100644 tools/re2c/scanner.re create mode 100644 tools/re2c/src/codegen/bitmap.cc create mode 100644 tools/re2c/src/codegen/bitmap.h create mode 100644 tools/re2c/src/codegen/emit.h create mode 100644 tools/re2c/src/codegen/emit_action.cc create mode 100644 tools/re2c/src/codegen/emit_dfa.cc create mode 100644 tools/re2c/src/codegen/go.h create mode 100644 tools/re2c/src/codegen/go_construct.cc create mode 100644 tools/re2c/src/codegen/go_destruct.cc create mode 100644 tools/re2c/src/codegen/go_emit.cc create mode 100644 tools/re2c/src/codegen/go_used_labels.cc create mode 100644 tools/re2c/src/codegen/indent.h create mode 100644 tools/re2c/src/codegen/input_api.cc create mode 100644 tools/re2c/src/codegen/input_api.h create mode 100644 tools/re2c/src/codegen/label.cc create mode 100644 tools/re2c/src/codegen/label.h create mode 100644 tools/re2c/src/codegen/output.cc create mode 100644 tools/re2c/src/codegen/output.h create mode 100644 tools/re2c/src/codegen/print.cc create mode 100644 tools/re2c/src/codegen/print.h create mode 100644 tools/re2c/src/conf/msg.cc create mode 100644 tools/re2c/src/conf/msg.h create mode 100644 tools/re2c/src/conf/opt.cc create mode 100644 tools/re2c/src/conf/opt.h create mode 100644 tools/re2c/src/conf/parse_opts.cc create mode 100644 tools/re2c/src/conf/parse_opts.re create mode 100644 tools/re2c/src/conf/warn.cc create mode 100644 tools/re2c/src/conf/warn.h create mode 100644 tools/re2c/src/globals.h create mode 100644 tools/re2c/src/ir/adfa/action.h create mode 100644 tools/re2c/src/ir/adfa/adfa.cc create mode 100644 tools/re2c/src/ir/adfa/adfa.h create mode 100644 tools/re2c/src/ir/adfa/prepare.cc create mode 100644 tools/re2c/src/ir/compile.cc create mode 100644 tools/re2c/src/ir/compile.h create mode 100644 tools/re2c/src/ir/dfa/determinization.cc create mode 100644 tools/re2c/src/ir/dfa/dfa.h create mode 100644 tools/re2c/src/ir/dfa/fillpoints.cc create mode 100644 tools/re2c/src/ir/dfa/minimization.cc create mode 100644 tools/re2c/src/ir/nfa/calc_size.cc create mode 100644 tools/re2c/src/ir/nfa/nfa.cc create mode 100644 tools/re2c/src/ir/nfa/nfa.h create mode 100644 tools/re2c/src/ir/nfa/split.cc create mode 100644 tools/re2c/src/ir/regexp/display.cc create mode 100644 tools/re2c/src/ir/regexp/empty_class_policy.h create mode 100644 tools/re2c/src/ir/regexp/encoding/case.h rename tools/re2c/{translate.cc => src/ir/regexp/encoding/enc.cc} (50%) create mode 100644 tools/re2c/src/ir/regexp/encoding/enc.h create mode 100644 tools/re2c/src/ir/regexp/encoding/range_suffix.cc create mode 100644 tools/re2c/src/ir/regexp/encoding/range_suffix.h create mode 100644 tools/re2c/src/ir/regexp/encoding/utf16/utf16.cc create mode 100644 tools/re2c/src/ir/regexp/encoding/utf16/utf16.h create mode 100644 tools/re2c/src/ir/regexp/encoding/utf16/utf16_range.cc create mode 100644 tools/re2c/src/ir/regexp/encoding/utf16/utf16_range.h create mode 100644 tools/re2c/src/ir/regexp/encoding/utf16/utf16_regexp.cc create mode 100644 tools/re2c/src/ir/regexp/encoding/utf16/utf16_regexp.h create mode 100644 tools/re2c/src/ir/regexp/encoding/utf8/utf8.cc create mode 100644 tools/re2c/src/ir/regexp/encoding/utf8/utf8.h create mode 100644 tools/re2c/src/ir/regexp/encoding/utf8/utf8_range.cc create mode 100644 tools/re2c/src/ir/regexp/encoding/utf8/utf8_range.h create mode 100644 tools/re2c/src/ir/regexp/encoding/utf8/utf8_regexp.cc create mode 100644 tools/re2c/src/ir/regexp/encoding/utf8/utf8_regexp.h create mode 100644 tools/re2c/src/ir/regexp/fixed_length.cc create mode 100644 tools/re2c/src/ir/regexp/regexp.cc create mode 100644 tools/re2c/src/ir/regexp/regexp.h create mode 100644 tools/re2c/src/ir/regexp/regexp_alt.h create mode 100644 tools/re2c/src/ir/regexp/regexp_cat.h create mode 100644 tools/re2c/src/ir/regexp/regexp_close.h create mode 100644 tools/re2c/src/ir/regexp/regexp_match.h create mode 100644 tools/re2c/src/ir/regexp/regexp_null.h create mode 100644 tools/re2c/src/ir/regexp/regexp_rule.h create mode 100644 tools/re2c/src/ir/rule_rank.cc create mode 100644 tools/re2c/src/ir/rule_rank.h create mode 100644 tools/re2c/src/ir/skeleton/control_flow.cc create mode 100644 tools/re2c/src/ir/skeleton/generate_code.cc create mode 100644 tools/re2c/src/ir/skeleton/generate_data.cc create mode 100644 tools/re2c/src/ir/skeleton/match_empty.cc create mode 100644 tools/re2c/src/ir/skeleton/maxlen.cc create mode 100644 tools/re2c/src/ir/skeleton/path.h create mode 100644 tools/re2c/src/ir/skeleton/skeleton.cc create mode 100644 tools/re2c/src/ir/skeleton/skeleton.h create mode 100644 tools/re2c/src/ir/skeleton/unreachable.cc create mode 100644 tools/re2c/src/ir/skeleton/way.cc create mode 100644 tools/re2c/src/ir/skeleton/way.h create mode 100644 tools/re2c/src/main.cc create mode 100644 tools/re2c/src/parse/code.cc create mode 100644 tools/re2c/src/parse/code.h create mode 100644 tools/re2c/src/parse/extop.h create mode 100644 tools/re2c/src/parse/input.cc create mode 100644 tools/re2c/src/parse/input.h create mode 100644 tools/re2c/src/parse/lex.cc create mode 100644 tools/re2c/src/parse/lex.re create mode 100644 tools/re2c/src/parse/lex_conf.cc create mode 100644 tools/re2c/src/parse/lex_conf.re create mode 100644 tools/re2c/src/parse/loc.h create mode 100644 tools/re2c/src/parse/parser.cc create mode 100644 tools/re2c/src/parse/parser.h create mode 100644 tools/re2c/src/parse/parser.ypp create mode 100644 tools/re2c/src/parse/rules.h create mode 100644 tools/re2c/src/parse/scanner.cc create mode 100644 tools/re2c/src/parse/scanner.h create mode 100644 tools/re2c/src/parse/spec.h create mode 100644 tools/re2c/src/parse/unescape.cc create mode 100644 tools/re2c/src/parse/unescape.h create mode 100644 tools/re2c/src/parse/y.tab.h create mode 100644 tools/re2c/src/test/range/test-impl.h create mode 100644 tools/re2c/src/test/range/test.cc create mode 100644 tools/re2c/src/test/range/test.h create mode 100644 tools/re2c/src/test/s_to_n32_unsafe/test.cc create mode 100644 tools/re2c/src/util/allocate.h create mode 100644 tools/re2c/src/util/attribute.h create mode 100644 tools/re2c/src/util/c99_stdint.h create mode 100644 tools/re2c/src/util/counter.h create mode 100644 tools/re2c/src/util/forbid_copy.h create mode 100644 tools/re2c/src/util/free_list.h create mode 100644 tools/re2c/src/util/local_increment.h create mode 100644 tools/re2c/src/util/ord_hash_set.h create mode 100644 tools/re2c/src/util/range.cc create mode 100644 tools/re2c/src/util/range.h create mode 100644 tools/re2c/src/util/s_to_n32_unsafe.cc create mode 100644 tools/re2c/src/util/s_to_n32_unsafe.h create mode 100644 tools/re2c/src/util/smart_ptr.h create mode 100644 tools/re2c/src/util/static_assert.h create mode 100644 tools/re2c/src/util/u32lim.h create mode 100644 tools/re2c/src/util/uniq_vector.h delete mode 100644 tools/re2c/stream_lc.h delete mode 100644 tools/re2c/substr.cc delete mode 100644 tools/re2c/substr.h delete mode 100644 tools/re2c/token.h delete mode 100644 tools/re2c/y.tab.h diff --git a/tools/re2c/CHANGELOG b/tools/re2c/CHANGELOG index c371edbe1..02a523c27 100644 --- a/tools/re2c/CHANGELOG +++ b/tools/re2c/CHANGELOG @@ -1,3 +1,202 @@ +Version 0.16 (2016-01-21) +--------------------------- +- Fixed bug #127 "code generation error with wide chars and bitmaps (omitted 'goto' statement)" +- Added DFA minimization and option '--dfa-minimization ' +- Fixed bug #128 "very slow DFA construction (resulting in a very large DFA)" +- Fixed bug #132 "test failure on big endian archs with 0.15.3" + +Version 0.15.3 (2015-12-02) +--------------------------- +- Fixed bugs and applied patches: + #122 "clang does not compile re2c 0.15.x" (reported and fixed by Oleksii Taran). + #124 "Get rid of UINT32_MAX and friends" (patch by Sergei Trofimovich, fixes FreeBSD builds). + #125 "[OS X] git reports changes not staged for commit in newly cloned repository" (by Oleksii Taran, this fix also applies to Windows). +- Added option --no-version that allows to omit version information. +- Reduced memory and time consumed with -Wundefined-control-flow. +- Improved coverage of input data generated with -S --skeleton. + +Version 0.15.2 (2015-11-23) +--------------------------- +- Fixed build system: lexer depends on bison-generated parser + (Gentoo bug: https://bugs.gentoo.org/show_bug.cgi?id=566620) + +Version 0.15.1 (2015-11-22) +--------------------------- +- Fixed test failures caused by locale-sensitive 'sort'. + +Version 0.15 (2015-11-22) +------------------------- +- Updated website http://re2c.org: + added examples + updated docs + added news + added web feed (Atom 1.0) +- Added options: + -S, --skeleton + --empty-class +- Added warnings: + -W + -Werror + -W + -Wno- + -Werror- + -Wno-error- +- Added individual warnings: + -Wundefined-control-flow + -Wunreachable-rules + -Wcondition-order + -Wuseless-escape + -Wempty-character-class + -Wswapped-range + -Wmatch-empty-string +- Fixed options: + -- (interpret remaining arguments as non-options) +- Deprecated options: + -1 --single-pass (single pass is by default now) +- Reduced size of the generated .dot files. +- Fixed bugs: + #27 re2c crashes reading files containing %{ %} (patch by Rui) + #51 default rule doesn't work in reuse mode + #52 eliminate multiple passes + #59 bogus yyaccept in -c mode + #60 redundant use of YYMARKER + #61 empty character class [] matches empty string + #115 flex-style named definitions cause ambiguity in re2c grammar + #119 -f with -b/-g generates incorrect dispatch on fill labels + #116 empty string with non-empty trailing context consumes code units +- Added test options: + -j, -j (run tests in N threads, defaults to the number of CPUs) + --wine (test windows builds using wine) + --skeleton (generate skeleton programs, compile and execute them) + --keep-tmp-files (don't delete intermediate files for successful tests) +- Updated build system: + support out of source builds + support `make distcheck` + added `make bootstrap` (rebuild re2c after building with precomplied .re files) + added `make tests` (run tests with -j) + added `make vtests` (run tests with --valgrind -j) + added `make wtests` (run tests with --wine -j 1) + added Autoconf tests for CXXFLAGS. By default try the following options: + -W -Wall -Wextra -Weffc++ -pedantic -Wformat=2 -Wredundant-decls + -Wsuggest-attribute=format -Wconversion -Wsign-conversion -O2 -Weverything), + respect user-defined CXXFLAGS + support Mingw builds: `configure -host i686-w64-mingw32` + structured source files + removed old MSVC files +- Moved development to github (https://github.com/skvadrik/re2c), keep a mirror on sourceforge. + +Version 0.14.3 (2015-05-20) +----------------------------- +- applied patch '#27 re2c crashes reading files containing %{ %}' by Rui +- dropped distfiles for MSVC (they are broken anyway) + +Version 0.14.2 (2015-03-25) +----------------------------- +- fixed #57 Wrong result only if another rule is present + +Version 0.14.1 (2015-02-27) +----------------------------- +- fixed #55 re2c-0.14: re2c -V outputs null byte + +Version 0.14 (2015-02-23) +----------------------------- +- Added generic input API 21 (#21 Support to configure how re2c code interfaced with the symbol buffer?) +- fixed #46 re2c generates an infinite loop, depends on existence of previous parser +- fixed #47 Dot output label escaped characters + +Version 0.13.7.5 (2014-08-22) +----------------------------- +- Fixed Gentoo bug: https://bugs.gentoo.org/show_bug.cgi?id=518904 (PHP lexer) + +Version 0.13.7.4 (2014-07-29) +----------------------------- +- Enabled 'make docs' only if configured with '--enable-docs' +- Disallowed to use yacc/byacc instead of bison to build parser +- Removed non-portable sed feature in script that runs tests + +Version 0.13.7.3 (2014-07-28) +----------------------------- +- Fixed CXX warning +- Got rid of asciidoc build-time dependency + +Version 0.13.7.2 (2014-07-27) +----------------------------- +- Included man page into dist, respect users CXXFLAGS. + +Version 0.13.7.1 (2014-07-26) +----------------------------- +- Added missing files to tarball + +Version 0.13.7 (2014-07-25) +--------------------------- +- Added UTF-8 support +- Added UTF-16 support +- Added default rule +- Added option to control ill-formed Unicode + +Version 0.13.6 (2013-07-04) +--------------------------- +- Fixed #2535084 uint problem with Sun C 5.8 +- #3308400: allow Yacc-style %{code brackets}% +- #2506253: allow C++ // comments +- Fixed inplace configuration in -e mode. +- Applied #2482572 Typos in error messages. +- Applied #2482561 Error in manual section on -r mode. +- Fixed #2478216 Wrong start_label in -c mode. +- Fixed #2186718 Unescaped backslash in file name of #line directive. +- Fixed #2102138 Duplicate case labels on EBCDIC. +- Fixed #2088583 Compile problem on AIX. +- Fixed #2038610 Ebcdic problem. +- improve dot support: make char intervals (e.g. [A-Z]) instead of one edge per char + +Version 0.13.5 (2008-05-25) +--------------------------- +- Fixed #1952896 Segfault in re2c::Scanner::scan. +- Fixed #1952842 Regression. + +Version 0.13.4 (2008-04-05) +--------------------------- +- Added transparent handling of #line directives in input files. +- Added re2c:yyfill:check inplace configuration. +- Added re2c:define:YYSETSTATE:naked inplace configuration. +- Added re2c:flags:w and re2c:flags:u inplace configurations. +- Added the ability to add rules in 'use:re2c' blocks. +- Changed -r flag to accept only 'rules:re2c' and 'use:re2c' blocks. + +Version 0.13.3 (2008-03-14) +--------------------------- +- Added -r flag to allow reuse of scanner definitions. +- Added -F flag to support flex syntax in rules. +- Fixed SEGV in scanner that occurs with very large blocks. +- Fixed issue with unused yybm. +- Partial support for flex syntax. +- Changed to allow /* comments with -c switch. +- Added flag -D/--emit-dot. + +Version 0.13.2 (2008-02-14) +--------------------------- +- Added flag --case-inverted. +- Added flag --case-insensitive. +- Added support for '' to enable rule setup. +- Added support for '=>' style rules. +- Added support for ':=' style rules. +- Added support for ':=>' style rules. +- Added re2c:cond:divider and re2c:con:goto inplace configuration. +- Fixed code generation to emit space after 'if'. + +Version 0.13.1 (2007-08-24) +--------------------------- +- Added custom build rules for Visual Studio 2005 (re2c.rules). (William Swanson) +- Fixed issue with some compilers. +- Fixed #1776177 Build on AIX. +- Fixed #1743180 fwrite with 0 length crashes on OS X. + +Version 0.13.0 (2007-06-24) +--------------------------- +- Added -c and -t to generate scanners with (f)lex-like condition support. +- Fixed issue with short form of switches and parameter if not first switch. +- Fixed #1708378 segfault in actions.cc. + Version 0.12.3 (2007-08-24) --------------------------- - Fixed issue with some compilers. diff --git a/tools/re2c/CMakeLists.txt b/tools/re2c/CMakeLists.txt index ffc7a56e9..78742ef99 100644 --- a/tools/re2c/CMakeLists.txt +++ b/tools/re2c/CMakeLists.txt @@ -7,32 +7,92 @@ include( CheckTypeSize ) set( PACKAGE_NAME re2c ) set( PACKAGE_TARNAME re2c ) -set( PACKAGE_VERSION 0.12.3 ) -set( PACKAGE_STRING "re2c 0.12.3" ) +set( PACKAGE_VERSION 0.16 ) +set( PACKAGE_STRING "re2c 0.16" ) set( PACKAGE_BUGREPORT "re2c-general@lists.sourceforge.net" ) CHECK_FUNCTION_EXISTS( strdup HAVE_STRDUP ) CHECK_FUNCTION_EXISTS( strndup HAVE_STRNDUP ) +CHECK_TYPE_SIZE( "0i8" SIZEOF_0I8 ) +CHECK_TYPE_SIZE( "0l" SIZEOF_0L ) +CHECK_TYPE_SIZE( "0ll" SIZEOF_0LL ) CHECK_TYPE_SIZE( char SIZEOF_CHAR ) CHECK_TYPE_SIZE( short SIZEOF_SHORT ) CHECK_TYPE_SIZE( int SIZEOF_INT ) CHECK_TYPE_SIZE( long SIZEOF_LONG ) +CHECK_TYPE_SIZE( "long long" SIZEOF_LONG_LONG ) +CHECK_TYPE_SIZE( "void *" SIZEOF_VOID_P ) +CHECK_TYPE_SIZE( __int64 SIZEOF___INT_64 ) configure_file( ${CMAKE_CURRENT_SOURCE_DIR}/config.h.in ${CMAKE_CURRENT_BINARY_DIR}/config.h ) -include_directories( ${CMAKE_CURRENT_BINARY_DIR} ) +include_directories( ${CMAKE_CURRENT_BINARY_DIR} ${CMAKE_CURRENT_SOURCE_DIR} ) add_definitions( -DHAVE_CONFIG_H ) +file( GLOB SRC_HDR + src/codegen/*.h + src/conf/*.h + src/ir/*.h + src/*.h + src/parse/*.h + src/util/*.h ) + add_executable( re2c - actions.cc - code.cc - dfa.cc - main.cc - mbo_getopt.cc - parser.cc - scanner.cc - substr.cc - translate.cc ) + ${SRC_HDR} + src/codegen/bitmap.cc + src/codegen/emit_action.cc + src/codegen/emit_dfa.cc + src/codegen/label.cc + src/codegen/go_construct.cc + src/codegen/go_destruct.cc + src/codegen/go_emit.cc + src/codegen/go_used_labels.cc + src/codegen/input_api.cc + src/codegen/output.cc + src/codegen/print.cc + src/conf/msg.cc + src/conf/opt.cc + src/conf/parse_opts.cc + src/conf/warn.cc + src/ir/nfa/calc_size.cc + src/ir/nfa/nfa.cc + src/ir/nfa/split.cc + src/ir/adfa/adfa.cc + src/ir/adfa/prepare.cc + src/ir/dfa/determinization.cc + src/ir/dfa/fillpoints.cc + src/ir/dfa/minimization.cc + src/ir/regexp/display.cc + src/ir/regexp/encoding/enc.cc + src/ir/regexp/encoding/range_suffix.cc + src/ir/regexp/encoding/utf8/utf8_regexp.cc + src/ir/regexp/encoding/utf8/utf8_range.cc + src/ir/regexp/encoding/utf8/utf8.cc + src/ir/regexp/encoding/utf16/utf16_regexp.cc + src/ir/regexp/encoding/utf16/utf16.cc + src/ir/regexp/encoding/utf16/utf16_range.cc + src/ir/regexp/fixed_length.cc + src/ir/regexp/regexp.cc + src/ir/compile.cc + src/ir/rule_rank.cc + src/ir/skeleton/control_flow.cc + src/ir/skeleton/generate_code.cc + src/ir/skeleton/generate_data.cc + src/ir/skeleton/match_empty.cc + src/ir/skeleton/maxlen.cc + src/ir/skeleton/skeleton.cc + src/ir/skeleton/unreachable.cc + src/ir/skeleton/way.cc + src/main.cc + src/parse/code.cc + src/parse/input.cc + src/parse/lex.cc + src/parse/lex_conf.cc + src/parse/parser.cc + src/parse/scanner.cc + src/parse/unescape.cc + src/util/s_to_n32_unsafe.cc + src/util/range.cc ) set( CROSS_EXPORTS ${CROSS_EXPORTS} re2c PARENT_SCOPE ) diff --git a/tools/re2c/NO_WARRANTY b/tools/re2c/NO_WARRANTY new file mode 100644 index 000000000..885a13d06 --- /dev/null +++ b/tools/re2c/NO_WARRANTY @@ -0,0 +1,2 @@ +re2c is distributed with no warranty whatever. The author and any other +contributors take no responsibility for the consequences of its use. diff --git a/tools/re2c/README b/tools/re2c/README index 84c2f4fdb..29d97b674 100644 --- a/tools/re2c/README +++ b/tools/re2c/README @@ -1,188 +1,159 @@ -re2c Version 0.12.3 ------------------- +re2c +-------------------------------------------------------------------------------- -Originally written by Peter Bumbulis (peter@csg.uwaterloo.ca) -Currently maintained by: - Dan Nuffer - Marcus Boerger - Hartmut Kaiser +DESCRIPTION +-------------------------------------------------------------------------------- +re2c is a tool for generating C-based recognizers from regular expressions. +re2c-based scanners are efficient: for programming languages, given similar +specifications, a re2c-based scanner is typically almost twice as fast as a +flex-based scanner with little or no increase in size (possibly a decrease +on cisc architectures). Indeed, re2c-based scanners are quite competitive with +hand-crafted ones. +Unlike flex, re2c does not generate complete scanners: the user must supply some +interface code. While this code is not bulky (about 50-100 lines for a +flex-like scanner; see the man page and examples in the distribution) careful +coding is required for efficiency (and correctness). One advantage of this +arrangement is that the generated code is not tied to any particular input +model. +-------------------------------------------------------------------------------- + + +DOWNLOAD +-------------------------------------------------------------------------------- The re2c distribution can be found at: + https://sourceforge.net/projects/re2c/ - http://sourceforge.net/projects/re2c/ +Download the latest tarball: + https://sourceforge.net/projects/re2c/files/latest/download -re2c has been developed and tested with the following compilers on various -platforms in 32 bit and 64 bit mode: -- GCC 3.3 ... 4.1 -- Microsoft VC 7, 7.1, 8 -- Intel 9.0 -- Sun C++ 5.8 (CXXFLAGS='-library=stlport4') -- MIPSpro Compilers: Version 7.4.4m +Clone git repo: + git clone git://git.code.sf.net/p/re2c/code-git +-------------------------------------------------------------------------------- -GCC 2.x and Microsoft VC 6 are not capable of compiling re2c. -Building re2c on unix like platforms requires autoconf 2.57 and bison (tested -with 1.875 and later). Under windows you don't need autoconf or bison -and can use the pregenerated files. +BUILD +-------------------------------------------------------------------------------- +Contents: + 1. simple build + 2. bootstrap + 3. out-of-source build + 4. testing + 5. rebuild documentation + 6. build for windows with mingw + 7. build from git -You can build this software by simply typing the following commands: - ./configure - make +1. Simplest possible build: + $ ./configure [--prefix=] + $ make + $ make install +This will build re2c and install it (binary and man page) to (defaults +to /usr/local). -The above version will be based on the pregenerated scanner.cc file. -If you want to build that file yourself (recommended when installing -re2c) you need the following steps: - ./configure - make - rm -f scanner.cc - make install +2. Bootstrap and rebuild: + $ ./configure [--prefix=] + $ make bootstrap + $ make install +Usual bootstrap procedure: re2c uses re2c to compile its lexer. +1. build lexer (if make finds re2c binary in build directory, it will build lexer + from source, otherwize it will use prebuilt lexer) +2. build re2c +3. build lexer from source using re2c binary in build directory +4. rebuild re2c -Or you can create a rpm package and install it by the following commands: - ./configure - make rpm - rpm -Uhv /re2c-0.12.3-1.rpm +3. Out-of-source build: + $ mkdir + $ cd + $ /configure [--prefix=] + $ make + $ make install -If you want to build from CVS then the first thing you should do is -regenerating all build files using the following command: - ./autogen.sh -and then continue with one of the above described build methods. Or if you -need to generate RPM packages for cvs builds use these commands: - ./autogen.sh - ./configure - ./makerpm - rpm -Uhv /re2c-0.12.3-.rpm +4. Testing: + $ make check +This will redirect test script output to file. If you want to see progress: + $ make tests +Testing under valgrind (takes a long time): + $ make vtests -Here should be a number like 1. And must equal -the directory where the makerpm step has written the generated rpm to. +5. Rebuild documentation (requires rst2man.py): + $ ./configure --enable-docs [--prefix=] + $ make docs + $ make install -If you are on a debian system you can use the tool 'alien' to convert rpms -to debian packages. +6. Build for windows using mingw: + $ ../configure --host i686-w64-mingw32 [--prefix=] + $ make +This will result into an executable re2c.exe, which can be tested with wine: + $ make wtests -When building with native SUN compilers you need to set the following compiler -flags: CXXFLAGS='-g -compat5 -library=stlport4'. +7. If you want to build from git, you'll first need to generate autotools files: + $ ./autogen.sh +-------------------------------------------------------------------------------- -If you want to build re2c on a windows system you can either use cygwin and one -of the methods described above or use Microsoft Visual C .NET 2002 or later -with the solution files provided (re2c.sln for 2002/2003 and re2c-2005.sln for -version 2005). re2c cannot be built with Microsoft Visual C 6.0 or earlier. -re2c is a great tool for writing fast and flexible lexers. It has -served many people well for many years. re2c is on the order of 2-3 -times faster than a flex based scanner, and its input model is much -more flexible. +INFO +-------------------------------------------------------------------------------- + $ man re2c -For an introduction to re2c refer to the lessons sub directory. +re2c home page: + re2c.org -Peter's original version 0.5 ANNOUNCE and README follows. +re2c manual: + re2c.org/manual.html --- +Ulya Trofimovich's blog on re2c: + skvadrik.github.io/aleph_null/re2c.html -re2c is a tool for generating C-based recognizers from regular -expressions. re2c-based scanners are efficient: for programming -languages, given similar specifications, an re2c-based scanner is -typically almost twice as fast as a flex-based scanner with little or no -increase in size (possibly a decrease on cisc architectures). Indeed, -re2c-based scanners are quite competitive with hand-crafted ones. +Original paper on re2c: "RE2C: a More Versatile Parser Generator" (1994, Peter +Bumbulis and Donald D. Cowan). -Unlike flex, re2c does not generate complete scanners: the user must -supply some interface code. While this code is not bulky (about 50-100 -lines for a flex-like scanner; see the man page and examples in the -distribution) careful coding is required for efficiency (and -correctness). One advantage of this arrangement is that the generated -code is not tied to any particular input model. For example, re2c -generated code can be used to scan data from a null-byte terminated -buffer as illustrated below. +Examples can be found in 'examples' directory. +-------------------------------------------------------------------------------- -Given the following source - #define NULL ((char*) 0) - char *scan(char *p) - { - #define YYCTYPE char - #define YYCURSOR p - #define YYLIMIT p - #define YYFILL(n) - /*!re2c - [0-9]+ {return YYCURSOR;} - [\000-\377] {return NULL;} - */ - } +MAILING LISTS +-------------------------------------------------------------------------------- +re2c-general: + re2c-general@lists.sourceforge.net +re2c-devel: + re2c-devel@lists.sourceforge.net -re2c will generate +You are welcome to ask for help or share your thoughts and ideas about re2c :) +-------------------------------------------------------------------------------- - /* Generated by re2c on Sat Apr 16 11:40:58 1994 */ - #line 1 "simple.re" - #define NULL ((char*) 0) - char *scan(char *p) - { - #define YYCTYPE char - #define YYCURSOR p - #define YYLIMIT p - #define YYFILL(n) - { - YYCTYPE yych; - unsigned int yyaccept; - if((YYLIMIT - YYCURSOR) < 2) YYFILL(2); - yych = *YYCURSOR; - if(yych <= '/') goto yy4; - if(yych >= ':') goto yy4; - yy2: yych = *++YYCURSOR; - goto yy7; - yy3: - #line 9 - {return YYCURSOR;} - yy4: yych = *++YYCURSOR; - yy5: - #line 10 - {return NULL;} - yy6: ++YYCURSOR; - if(YYLIMIT == YYCURSOR) YYFILL(1); - yych = *YYCURSOR; - yy7: if(yych <= '/') goto yy3; - if(yych <= '9') goto yy6; - goto yy3; - } - #line 11 +BUGS +-------------------------------------------------------------------------------- +Please report any bugs and send feature requests to: + https://sourceforge.net/p/re2c/_list/tickets +-------------------------------------------------------------------------------- - } -Note that most compilers will perform dead-code elimination to remove -all YYCURSOR, YYLIMIT comparisions. +AUTHORS +-------------------------------------------------------------------------------- +Originally written by Peter Bumbulis (peter@csg.uwaterloo.ca) +Currently maintained by: + Ulya Trofimovich + Dan Nuffer + Marcus Boerger + Hartmut Kaiser +-------------------------------------------------------------------------------- -re2c was developed for a particular project (constructing a fast REXX -scanner of all things!) and so while it has some rough edges, it should -be quite usable. More information about re2c can be found in the -(admittedly skimpy) man page; the algorithms and heuristics used are -described in an upcoming LOPLAS article (included in the distribution). -Probably the best way to find out more about re2c is to try the supplied -examples. re2c is written in C++, and is currently being developed -under Linux using gcc 2.5.8. -Peter +LICENSE +-------------------------------------------------------------------------------- +re2c is distributed with no warranty whatever. The code is certain to contain +errors. Neither the author nor any contributor takes responsibility for any +consequences of its use. --- - -re2c is distributed with no warranty whatever. The code is certain to -contain errors. Neither the author nor any contributor takes -responsibility for any consequences of its use. - -re2c is in the public domain. The data structures and algorithms used -in re2c are all either taken from documents available to the general -public or are inventions of the author. Programs generated by re2c may -be distributed freely. re2c itself may be distributed freely, in source -or binary, unchanged or modified. Distributors may charge whatever fees -they can obtain for re2c. +re2c is in the public domain. The data structures and algorithms used in re2c +are all either taken from documents available to the general public or are +inventions of the authors. Programs generated by re2c may be distributed freely. +re2c itself may be distributed freely, in source or binary, unchanged or +modified. Distributors may charge whatever fees they can obtain for re2c. If you do make use of re2c, or incorporate it into a larger project an -acknowledgement somewhere (documentation, research report, etc.) would -be appreciated. - -Please send bug reports and feedback (including suggestions for -improving the distribution) to - - peter@csg.uwaterloo.ca - -Include a small example and the banner from parser.y with bug reports. - +acknowledgement somewhere (documentation, research report, etc.) would be +appreciated. +-------------------------------------------------------------------------------- diff --git a/tools/re2c/actions.cc b/tools/re2c/actions.cc deleted file mode 100644 index b90584827..000000000 --- a/tools/re2c/actions.cc +++ /dev/null @@ -1,1062 +0,0 @@ -/* $Id: actions.cc 608 2006-11-05 00:48:30Z helly $ */ -#include -#include -#include -#include -#include - -#include "globals.h" -#include "parser.h" -#include "dfa.h" - -namespace re2c -{ - -void Symbol::ClearTable() -{ - for (SymbolTable::iterator it = symbol_table.begin(); it != symbol_table.end(); ++it) - { - delete it->second; - } - - symbol_table.clear(); -} - -Symbol::SymbolTable Symbol::symbol_table; - -Symbol *Symbol::find(const SubStr &str) -{ - const std::string ss(str.to_string()); - SymbolTable::const_iterator it = symbol_table.find(ss); - - if (it == symbol_table.end()) - { - return (*symbol_table.insert(SymbolTable::value_type(ss, new Symbol(str))).first).second; - } - - return (*it).second; -} - -void showIns(std::ostream &o, const Ins &i, const Ins &base) -{ - o.width(3); - o << &i - &base << ": "; - - switch (i.i.tag) - { - - case CHAR: - { - o << "match "; - - for (const Ins *j = &(&i)[1]; j < (Ins*) i.i.link; ++j) - prtCh(o, j->c.value); - - break; - } - - case GOTO: - o << "goto " << ((Ins*) i.i.link - &base); - break; - - case FORK: - o << "fork " << ((Ins*) i.i.link - &base); - break; - - case CTXT: - o << "ctxt"; - break; - - case TERM: - o << "term " << ((RuleOp*) i.i.link)->accept; - break; - } - - o << "\n"; -} - -uint RegExp::fixedLength() -{ - return ~0; -} - -const char *NullOp::type = "NullOp"; - -void NullOp::calcSize(Char*) -{ - size = 0; -} - -uint NullOp::fixedLength() -{ - return 0; -} - -void NullOp::compile(Char*, Ins*) -{ - ; -} - -void NullOp::split(CharSet&) -{ - ; -} - -std::ostream& operator<<(std::ostream &o, const Range &r) -{ - if ((r.ub - r.lb) == 1) - { - prtCh(o, r.lb); - } - else - { - prtCh(o, r.lb); - o << "-"; - prtCh(o, r.ub - 1); - } - - return o << r.next; -} - -Range *doUnion(Range *r1, Range *r2) -{ - Range *r, **rP = &r; - - for (;;) - { - Range *s; - - if (r1->lb <= r2->lb) - { - s = new Range(*r1); - } - else - { - s = new Range(*r2); - } - - *rP = s; - rP = &s->next; - - for (;;) - { - if (r1->lb <= r2->lb) - { - if (r1->lb > s->ub) - break; - - if (r1->ub > s->ub) - s->ub = r1->ub; - - if (!(r1 = r1->next)) - { - uint ub = 0; - - for (; r2 && r2->lb <= s->ub; r2 = r2->next) - ub = r2->ub; - - if (ub > s->ub) - s->ub = ub; - - *rP = r2; - - return r; - } - } - else - { - if (r2->lb > s->ub) - break; - - if (r2->ub > s->ub) - s->ub = r2->ub; - - if (!(r2 = r2->next)) - { - uint ub = 0; - - for (; r1 && r1->lb <= s->ub; r1 = r1->next) - ub = r1->ub; - - if (ub > s->ub) - s->ub = ub; - - *rP = r1; - - return r; - } - } - } - } - - *rP = NULL; - return r; -} - -Range *doDiff(Range *r1, Range *r2) -{ - Range *r, *s, **rP = &r; - - for (; r1; r1 = r1->next) - { - uint lb = r1->lb; - - for (; r2 && r2->ub <= r1->lb; r2 = r2->next) - - ; - for (; r2 && r2->lb < r1->ub; r2 = r2->next) - { - if (lb < r2->lb) - { - *rP = s = new Range(lb, r2->lb); - rP = &s->next; - } - - if ((lb = r2->ub) >= r1->ub) - goto noMore; - } - - *rP = s = new Range(lb, r1->ub); - rP = &s->next; - -noMore: - ; - } - - *rP = NULL; - return r; -} - -MatchOp *merge(MatchOp *m1, MatchOp *m2) -{ - if (!m1) - return m2; - - if (!m2) - return m1; - - return new MatchOp(doUnion(m1->match, m2->match)); -} - -const char *MatchOp::type = "MatchOp"; - -void MatchOp::display(std::ostream &o) const -{ - o << match; -} - -void MatchOp::calcSize(Char *rep) -{ - size = 1; - - for (Range *r = match; r; r = r->next) - for (uint c = r->lb; c < r->ub; ++c) - if (rep[c] == c) - ++size; -} - -uint MatchOp::fixedLength() -{ - return 1; -} - -void MatchOp::compile(Char *rep, Ins *i) -{ - i->i.tag = CHAR; - i->i.link = &i[size]; - Ins *j = &i[1]; - uint bump = size; - - for (Range *r = match; r; r = r->next) - { - for (uint c = r->lb; c < r->ub; ++c) - { - if (rep[c] == c) - { - j->c.value = c; - j->c.bump = --bump; - j++; - } - } - } -} - -void MatchOp::split(CharSet &s) -{ - for (Range *r = match; r; r = r->next) - { - for (uint c = r->lb; c < r->ub; ++c) - { - CharPtn *x = s.rep[c], *a = x->nxt; - - if (!a) - { - if (x->card == 1) - continue; - - x->nxt = a = s.freeHead; - - if (!(s.freeHead = s.freeHead->nxt)) - s.freeTail = &s.freeHead; - - a->nxt = NULL; - - x->fix = s.fix; - - s.fix = x; - } - - if (--(x->card) == 0) - { - *s.freeTail = x; - *(s.freeTail = &x->nxt) = NULL; - } - - s.rep[c] = a; - ++(a->card); - } - } - - for (; s.fix; s.fix = s.fix->fix) - if (s.fix->card) - s.fix->nxt = NULL; -} - -RegExp * mkDiff(RegExp *e1, RegExp *e2) -{ - MatchOp *m1, *m2; - - if (!(m1 = (MatchOp*) e1->isA(MatchOp::type))) - return NULL; - - if (!(m2 = (MatchOp*) e2->isA(MatchOp::type))) - return NULL; - - Range *r = doDiff(m1->match, m2->match); - - return r ? (RegExp*) new MatchOp(r) : (RegExp*) new NullOp; -} - -RegExp *doAlt(RegExp *e1, RegExp *e2) -{ - if (!e1) - return e2; - - if (!e2) - return e1; - - return new AltOp(e1, e2); -} - -RegExp *mkAlt(RegExp *e1, RegExp *e2) -{ - AltOp *a; - MatchOp *m1, *m2; - - if ((a = (AltOp*) e1->isA(AltOp::type))) - { - if ((m1 = (MatchOp*) a->exp1->isA(MatchOp::type))) - e1 = a->exp2; - } - else if ((m1 = (MatchOp*) e1->isA(MatchOp::type))) - { - e1 = NULL; - } - - if ((a = (AltOp*) e2->isA(AltOp::type))) - { - if ((m2 = (MatchOp*) a->exp1->isA(MatchOp::type))) - e2 = a->exp2; - } - else if ((m2 = (MatchOp*) e2->isA(MatchOp::type))) - { - e2 = NULL; - } - - return doAlt(merge(m1, m2), doAlt(e1, e2)); -} - -const char *AltOp::type = "AltOp"; - -void AltOp::calcSize(Char *rep) -{ - exp1->calcSize(rep); - exp2->calcSize(rep); - size = exp1->size + exp2->size + 2; -} - -uint AltOp::fixedLength() -{ - uint l1 = exp1->fixedLength(); - uint l2 = exp1->fixedLength(); - - if (l1 != l2 || l1 == ~0u) - return ~0; - - return l1; -} - -void AltOp::compile(Char *rep, Ins *i) -{ - i->i.tag = FORK; - Ins *j = &i[exp1->size + 1]; - i->i.link = &j[1]; - exp1->compile(rep, &i[1]); - j->i.tag = GOTO; - j->i.link = &j[exp2->size + 1]; - exp2->compile(rep, &j[1]); -} - -void AltOp::split(CharSet &s) -{ - exp1->split(s); - exp2->split(s); -} - -const char *CatOp::type = "CatOp"; - -void CatOp::calcSize(Char *rep) -{ - exp1->calcSize(rep); - exp2->calcSize(rep); - size = exp1->size + exp2->size; -} - -uint CatOp::fixedLength() -{ - uint l1, l2; - - if ((l1 = exp1->fixedLength()) != ~0u ) - if ((l2 = exp2->fixedLength()) != ~0u) - return l1 + l2; - - return ~0u; -} - -void CatOp::compile(Char *rep, Ins *i) -{ - exp1->compile(rep, &i[0]); - exp2->compile(rep, &i[exp1->size]); -} - -void CatOp::split(CharSet &s) -{ - exp1->split(s); - exp2->split(s); -} - -const char *CloseOp::type = "CloseOp"; - -void CloseOp::calcSize(Char *rep) -{ - exp->calcSize(rep); - size = exp->size + 1; -} - -void CloseOp::compile(Char *rep, Ins *i) -{ - exp->compile(rep, &i[0]); - i += exp->size; - i->i.tag = FORK; - i->i.link = i - exp->size; -} - -void CloseOp::split(CharSet &s) -{ - exp->split(s); -} - -const char *CloseVOp::type = "CloseVOp"; - -void CloseVOp::calcSize(Char *rep) -{ - exp->calcSize(rep); - - if (max >= 0) - { - size = (exp->size * min) + ((1 + exp->size) * (max - min)); - } - else - { - size = (exp->size * min) + 1; - } -} - -void CloseVOp::compile(Char *rep, Ins *i) -{ - Ins *jumppoint; - int st; - jumppoint = i + ((1 + exp->size) * (max - min)); - - for (st = min; st < max; st++) - { - i->i.tag = FORK; - i->i.link = jumppoint; - i++; - exp->compile(rep, &i[0]); - i += exp->size; - } - - for (st = 0; st < min; st++) - { - exp->compile(rep, &i[0]); - i += exp->size; - - if (max < 0 && st == 0) - { - i->i.tag = FORK; - i->i.link = i - exp->size; - i++; - } - } -} - -void CloseVOp::split(CharSet &s) -{ - exp->split(s); -} - -RegExp *expr(Scanner &); - -uint Scanner::unescape(SubStr &s) const -{ - static const char * hex = "0123456789abcdef"; - static const char * oct = "01234567"; - - s.len--; - uint c, ucb = 0; - - if ((c = *s.str++) != '\\' || s.len == 0) - { - return xlat(c); - } - - s.len--; - - switch (c = *s.str++) - { - case 'n': return xlat('\n'); - case 't': return xlat('\t'); - case 'v': return xlat('\v'); - case 'b': return xlat('\b'); - case 'r': return xlat('\r'); - case 'f': return xlat('\f'); - case 'a': return xlat('\a'); - - case 'x': - { - if (s.len < 2) - { - fatal(s.ofs()+s.len, "Illegal hexadecimal character code, two hexadecimal digits are required"); - return ~0; - } - - const char *p1 = strchr(hex, tolower(s.str[0])); - const char *p2 = strchr(hex, tolower(s.str[1])); - - if (!p1 || !p2) - { - fatal(s.ofs()+(p1?1:0), "Illegal hexadecimal character code"); - return ~0; - } - else - { - s.len -= 2; - s.str += 2; - - uint v = (uint)((p1 - hex) << 4) - + (uint)((p2 - hex)); - - return v; - } - } - - case 'U': - { - if (s.len < 8) - { - fatal(s.ofs()+s.len, "Illegal unicode character, eight hexadecimal digits are required"); - return ~0; - } - - uint l = 0; - - if (s.str[0] == '0') - { - l++; - if (s.str[1] == '0') - { - l++; - if (s.str[2] == '0' || (s.str[2] == '1' && uFlag)) - { - l++; - if (uFlag) { - const char *u3 = strchr(hex, tolower(s.str[2])); - const char *u4 = strchr(hex, tolower(s.str[3])); - if (u3 && u4) - { - ucb = (uint)((u3 - hex) << 20) - + (uint)((u4 - hex) << 16); - l++; - } - } - else if (s.str[3] == '0') - { - l++; - } - } - } - } - - if (l != 4) - { - fatal(s.ofs()+l, "Illegal unicode character, eight hexadecimal digits are required"); - } - - s.len -= 4; - s.str += 4; - - // no break; - } - case 'X': - case 'u': - { - if (s.len < 4) - { - fatal(s.ofs()+s.len, - c == 'X' - ? "Illegal hexadecimal character code, four hexadecimal digits are required" - : "Illegal unicode character, four hexadecimal digits are required"); - return ~0; - } - - const char *p1 = strchr(hex, tolower(s.str[0])); - const char *p2 = strchr(hex, tolower(s.str[1])); - const char *p3 = strchr(hex, tolower(s.str[2])); - const char *p4 = strchr(hex, tolower(s.str[3])); - - if (!p1 || !p2 || !p3 || !p4) - { - fatal(s.ofs()+(p1?1:0)+(p2?1:0)+(p3?1:0), - c == 'X' - ? "Illegal hexadecimal character code, non hexxdecimal digit found" - : "Illegal unicode character, non hexadecimal digit found"); - return ~0; - } - else - { - s.len -= 4; - s.str += 4; - - uint v = (uint)((p1 - hex) << 12) - + (uint)((p2 - hex) << 8) - + (uint)((p3 - hex) << 4) - + (uint)((p4 - hex)) - + ucb; - - if (v >= nRealChars) - { - fatal(s.ofs(), - c == 'X' - ? "Illegal hexadecimal character code, out of range" - : "Illegal unicode character, out of range"); - } - - return v; - } - } - - case '4': - case '5': - case '6': - case '7': - { - fatal(s.ofs()-1, "Illegal octal character code, first digit must be 0 thru 3"); - return ~0; - } - - case '0': - case '1': - case '2': - case '3': - { - if (s.len < 2) - { - fatal(s.ofs()+s.len, "Illegal octal character code, three octal digits are required"); - return ~0; - } - - const char *p0 = strchr(oct, c); - const char *p1 = strchr(oct, s.str[0]); - const char *p2 = strchr(oct, s.str[1]); - - if (!p0 || !p1 || !p2) - { - fatal(s.ofs()+(p1?1:0), "Illegal octal character code, non octal digit found"); - return ~0; - } - else - { - s.len -= 2; - s.str += 2; - - uint v = (uint)((p0 - oct) << 6) + (uint)((p1 - oct) << 3) + (uint)(p2 - oct); - - return v; - } - } - - default: - return xlat(c); - } -} - -std::string& Scanner::unescape(SubStr& str_in, std::string& str_out) const -{ - str_out.clear(); - - while(str_in.len) - { - uint c = unescape(str_in); - - if (c > 0xFF) - { - fatal(str_in.ofs(), "Illegal character"); - } - - str_out += static_cast(c); - } - - return str_out; -} - -Range * Scanner::getRange(SubStr &s) const -{ - uint lb = unescape(s), ub, xlb, xub, c; - - if (s.len < 2 || *s.str != '-') - { - ub = lb; - } - else - { - s.len--; - s.str++; - ub = unescape(s); - - if (ub < lb) - { - uint tmp = lb; - lb = ub; - ub = tmp; - } - - xlb = xlat(lb); - xub = xlat(ub); - - for(c = lb; c <= ub; c++) - { - if (!(xlb <= xlat(c) && xlat(c) <= ub)) - { - /* range doesn't work */ - Range * r = new Range(xlb, xlb + 1); - for (c = lb + 1; c <= ub; c++) - { - r = doUnion(r, new Range(xlat(c), xlat(c) + 1)); - } - return r; - } - } - - lb = xlb; - ub = xub; - } - - return new Range(lb, ub + 1); -} - -RegExp * Scanner::matchChar(uint c) const -{ - return new MatchOp(new Range(c, c + 1)); -} - -RegExp * Scanner::strToRE(SubStr s) const -{ - s.len -= 2; - s.str += 1; - - if (s.len == 0) - return new NullOp; - - RegExp *re = matchChar(unescape(s)); - - while (s.len > 0) - re = new CatOp(re, matchChar(unescape(s))); - - return re; -} - -RegExp * Scanner::strToCaseInsensitiveRE(SubStr s) const -{ - s.len -= 2; - s.str += 1; - - if (s.len == 0) - return new NullOp; - - uint c = unescape(s); - - RegExp *re, *reL, *reU; - - if ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z')) - { - reL = matchChar(xlat(tolower(c))); - reU = matchChar(xlat(toupper(c))); - re = mkAlt(reL, reU); - } - else - { - re = matchChar(c); - } - - while (s.len > 0) - { - uint c = unescape(s); - - if ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z')) - { - reL = matchChar(xlat(tolower(c))); - reU = matchChar(xlat(toupper(c))); - re = new CatOp(re, mkAlt(reL, reU)); - } - else - { - re = new CatOp(re, matchChar(c)); - } - } - - return re; -} - -RegExp * Scanner::ranToRE(SubStr s) const -{ - s.len -= 2; - s.str += 1; - - if (s.len == 0) - return new NullOp; - - Range *r = getRange(s); - - while (s.len > 0) - r = doUnion(r, getRange(s)); - - return new MatchOp(r); -} - -RegExp * Scanner::invToRE(SubStr s) const -{ - s.len--; - s.str++; - - RegExp * any = ranToRE(SubStr(wFlag ? "[\\X0000-\\XFFFF]" : "[\\000-\\377]")); - - if (s.len <= 2) - { - return any; - } - - RegExp * ran = ranToRE(s); - RegExp * inv = mkDiff(any, ran); - - delete ran; - delete any; - - return inv; -} - -RegExp * Scanner::mkDot() const -{ - RegExp * any = ranToRE(SubStr(wFlag ? "[\\X0000-\\XFFFF]" : "[\\000-\\377]")); - RegExp * ran = matchChar(xlat('\n')); - RegExp * inv = mkDiff(any, ran); - - delete ran; - delete any; - - return inv; -} - -const char *RuleOp::type = "RuleOp"; - -RuleOp::RuleOp(RegExp *e, RegExp *c, Token *t, uint a) - : exp(e) - , ctx(c) - , ins(NULL) - , accept(a) - , code(t) - , line(0) -{ - ; -} - -void RuleOp::calcSize(Char *rep) -{ - exp->calcSize(rep); - ctx->calcSize(rep); - size = exp->size + (ctx->size ? ctx->size + 2 : 1); -} - -void RuleOp::compile(Char *rep, Ins *i) -{ - ins = i; - exp->compile(rep, &i[0]); - i += exp->size; - if (ctx->size) - { - i->i.tag = CTXT; - i->i.link = &i[1]; - i++; - ctx->compile(rep, &i[0]); - i += ctx->size; - } - i->i.tag = TERM; - i->i.link = this; -} - -void RuleOp::split(CharSet &s) -{ - exp->split(s); - ctx->split(s); -} - -void optimize(Ins *i) -{ - while (!isMarked(i)) - { - mark(i); - - if (i->i.tag == CHAR) - { - i = (Ins*) i->i.link; - } - else if (i->i.tag == GOTO || i->i.tag == FORK) - { - Ins *target = (Ins*) i->i.link; - optimize(target); - - if (target->i.tag == GOTO) - i->i.link = target->i.link == target ? i : target; - - if (i->i.tag == FORK) - { - Ins *follow = (Ins*) & i[1]; - optimize(follow); - - if (follow->i.tag == GOTO && follow->i.link == follow) - { - i->i.tag = GOTO; - } - else if (i->i.link == i) - { - i->i.tag = GOTO; - i->i.link = follow; - } - } - - return ; - } - else - { - ++i; - } - } -} - -void genCode(std::ostream& o, RegExp *re) -{ - genCode(o, 0, re); -} - -CharSet::CharSet() - : fix(0) - , freeHead(0) - , freeTail(0) - , rep(new CharPtr[nRealChars]) - , ptn(new CharPtn[nRealChars]) -{ - for (uint j = 0; j < nRealChars; ++j) - { - rep[j] = &ptn[0]; - ptn[j].nxt = &ptn[j + 1]; /* wrong for j=nRealChars but will be corrected below */ - ptn[j].card = 0; - } - - freeHead = &ptn[1]; - *(freeTail = &ptn[nRealChars - 1].nxt) = NULL; - ptn[0].card = nRealChars; - ptn[0].nxt = NULL; -} - -CharSet::~CharSet() -{ - delete[] rep; - delete[] ptn; -} - -void genCode(std::ostream& o, uint ind, RegExp *re) -{ - CharSet cs; - uint j; - - re->split(cs); - /* - for(uint k = 0; k < nChars;){ - for(j = k; ++k < nRealChars && cs.rep[k] == cs.rep[j];); - printSpan(cerr, j, k); - cerr << "\t" << cs.rep[j] - &cs.ptn[0] << endl; - } - */ - Char *rep = new Char[nRealChars]; - - for (j = 0; j < nRealChars; ++j) - { - if (!cs.rep[j]->nxt) - cs.rep[j]->nxt = &cs.ptn[j]; - - rep[j] = (Char) (cs.rep[j]->nxt - &cs.ptn[0]); - } - - re->calcSize(rep); - Ins *ins = new Ins[re->size + 1]; - memset(ins, 0, (re->size + 1)*sizeof(Ins)); - re->compile(rep, ins); - Ins *eoi = &ins[re->size]; - eoi->i.tag = GOTO; - eoi->i.link = eoi; - - optimize(ins); - - for (j = 0; j < re->size;) - { - unmark(&ins[j]); - - if (ins[j].i.tag == CHAR) - { - j = (Ins*) ins[j].i.link - ins; - } - else - { - j++; - } - } - - DFA *dfa = new DFA(ins, re->size, 0, nRealChars, rep); - dfa->emit(o, ind); - delete dfa; - delete [] ins; - delete [] rep; -} - -} // end namespace re2c - diff --git a/tools/re2c/basics.h b/tools/re2c/basics.h deleted file mode 100644 index c9ddb6276..000000000 --- a/tools/re2c/basics.h +++ /dev/null @@ -1,57 +0,0 @@ -/* $Id: basics.h 520 2006-05-25 13:31:06Z helly $ */ -#ifndef _basics_h -#define _basics_h - -#ifdef HAVE_CONFIG_H -#include "config.h" -#elif defined(_WIN32) -#include "config_w32.h" -#endif - -namespace re2c -{ - -#if SIZEOF_CHAR == 1 -typedef unsigned char byte; -#elif SIZEOF_SHORT == 1 -typedef unsigned short byte; -#elif SIZEOF_INT == 1 -typedef unsigned int byte; -#elif SIZEOF_LONG == 1 -typedef unsigned long byte; -#else -typedef unsigned char byte; -#endif - -#if SIZEOF_CHAR == 2 -typedef unsigned char word; -#elif SIZEOF_SHORT == 2 -typedef unsigned short word; -#elif SIZEOF_INT == 2 -typedef unsigned int word; -#elif SIZEOF_LONG == 2 -typedef unsigned long word; -#else -typedef unsigned short word; -#endif - -#if SIZEOF_CHAR == 4 -typedef unsigned char dword; -#elif SIZEOF_SHORT == 4 -typedef unsigned short dword; -#elif SIZEOF_INT == 4 -typedef unsigned int dword; -#elif SIZEOF_LONG == 4 -typedef unsigned long dword; -#else -typedef unsigned long dword; -#endif - -typedef unsigned int uint; -typedef unsigned char uchar; -typedef unsigned short ushort; -typedef unsigned long ulong; - -} // end namespace re2c - -#endif diff --git a/tools/re2c/code.cc b/tools/re2c/code.cc deleted file mode 100644 index f6eea4d0b..000000000 --- a/tools/re2c/code.cc +++ /dev/null @@ -1,1806 +0,0 @@ -/* $Id: code.cc 717 2007-04-29 22:29:59Z helly $ */ -#include -#include -#include -#include -#include -#include "substr.h" -#include "globals.h" -#include "dfa.h" -#include "parser.h" -#include "code.h" - -namespace re2c -{ - -// there must be at least one span in list; all spans must cover -// same range - -std::string indent(uint ind) -{ - std::string str; - - while (ind-- > 0) - { - str += indString; - } - return str; -} - -static std::string space(uint this_label) -{ - int nl = next_label > 999999 ? 6 : next_label > 99999 ? 5 : next_label > 9999 ? 4 : next_label > 999 ? 3 : next_label > 99 ? 2 : next_label > 9 ? 1 : 0; - int tl = this_label > 999999 ? 6 : this_label > 99999 ? 5 : this_label > 9999 ? 4 : this_label > 999 ? 3 : this_label > 99 ? 2 : this_label > 9 ? 1 : 0; - - return std::string(std::max(1, nl - tl + 1), ' '); -} - -void Go::compact() -{ - // arrange so that adjacent spans have different targets - uint i = 0; - - for (uint j = 1; j < nSpans; ++j) - { - if (span[j].to != span[i].to) - { - ++i; - span[i].to = span[j].to; - } - - span[i].ub = span[j].ub; - } - - nSpans = i + 1; -} - -void Go::unmap(Go *base, const State *x) -{ - Span *s = span, *b = base->span, *e = &b[base->nSpans]; - uint lb = 0; - s->ub = 0; - s->to = NULL; - - for (; b != e; ++b) - { - if (b->to == x) - { - if ((s->ub - lb) > 1) - { - s->ub = b->ub; - } - } - else - { - if (b->to != s->to) - { - if (s->ub) - { - lb = s->ub; - ++s; - } - - s->to = b->to; - } - - s->ub = b->ub; - } - } - - s->ub = e[ -1].ub; - ++s; - nSpans = s - span; -} - -void doGen(const Go *g, const State *s, uint *bm, uint f, uint m) -{ - Span *b = g->span, *e = &b[g->nSpans]; - uint lb = 0; - - for (; b < e; ++b) - { - if (b->to == s) - { - for (; lb < b->ub && lb < 256; ++lb) - { - bm[lb-f] |= m; - } - } - - lb = b->ub; - } -} - -void prt(std::ostream& o, const Go *g, const State *s) -{ - Span *b = g->span, *e = &b[g->nSpans]; - uint lb = 0; - - for (; b < e; ++b) - { - if (b->to == s) - { - printSpan(o, lb, b->ub); - } - - lb = b->ub; - } -} - -bool matches(const Go *g1, const State *s1, const Go *g2, const State *s2) -{ - Span *b1 = g1->span, *e1 = &b1[g1->nSpans]; - uint lb1 = 0; - Span *b2 = g2->span, *e2 = &b2[g2->nSpans]; - uint lb2 = 0; - - for (;;) - { - for (; b1 < e1 && b1->to != s1; ++b1) - { - lb1 = b1->ub; - } - - for (; b2 < e2 && b2->to != s2; ++b2) - { - lb2 = b2->ub; - } - - if (b1 == e1) - { - return b2 == e2; - } - - if (b2 == e2) - { - return false; - } - - if (lb1 != lb2 || b1->ub != b2->ub) - { - return false; - } - - ++b1; - ++b2; - } -} - -BitMap *BitMap::first = NULL; - -BitMap::BitMap(const Go *g, const State *x) - : go(g) - , on(x) - , next(first) - , i(0) - , m(0) -{ - first = this; -} - -BitMap::~BitMap() -{ - delete next; -} - -const BitMap *BitMap::find(const Go *g, const State *x) -{ - for (const BitMap *b = first; b; b = b->next) - { - if (matches(b->go, b->on, g, x)) - { - return b; - } - } - - return new BitMap(g, x); -} - -const BitMap *BitMap::find(const State *x) -{ - for (const BitMap *b = first; b; b = b->next) - { - if (b->on == x) - { - return b; - } - } - - return NULL; -} - -void BitMap::gen(std::ostream &o, uint ind, uint lb, uint ub) -{ - if (first && bLastPass) - { - o << indent(ind) << "static const unsigned char " << mapCodeName["yybm"] << "[] = {"; - - uint c = 1, n = ub - lb; - const BitMap *cb = first; - - while((cb = cb->next) != NULL) { - ++c; - } - BitMap *b = first; - - uint *bm = new uint[n]; - - for (uint i = 0, t = 1; b; i += n, t += 8) - { - memset(bm, 0, n * sizeof(uint)); - - for (uint m = 0x80; b && m; m >>= 1) - { - b->i = i; - b->m = m; - doGen(b->go, b->on, bm, lb, m); - b = const_cast(b->next); - } - - if (c > 8) - { - o << "\n" << indent(ind+1) << "/* table " << t << " .. " << std::min(c, t+7) << ": " << i << " */"; - } - - for (uint j = 0; j < n; ++j) - { - if (j % 8 == 0) - { - o << "\n" << indent(ind+1); - } - - if (yybmHexTable) - { - prtHex(o, bm[j], false); - } - else - { - o << std::setw(3) << (uint)bm[j]; - } - o << ", "; - } - } - - o << "\n" << indent(ind) << "};\n"; - /* stats(); */ - - delete[] bm; - } -} - -void BitMap::stats() -{ - uint n = 0; - - for (const BitMap *b = first; b; b = b->next) - { - prt(std::cerr, b->go, b->on); - std::cerr << std::endl; - ++n; - } - - std::cerr << n << " bitmaps\n"; - first = NULL; -} - -void genGoTo(std::ostream &o, uint ind, const State *from, const State *to, bool & readCh) -{ - if (readCh && from->label + 1 != to->label) - { - o << indent(ind) << mapCodeName["yych"] << " = " << yychConversion << "*" << mapCodeName["YYCURSOR"] << ";\n"; - readCh = false; - } - - o << indent(ind) << "goto " << labelPrefix << to->label << ";\n"; - vUsedLabels.insert(to->label); -} - -void genIf(std::ostream &o, uint ind, const char *cmp, uint v, bool &readCh) -{ - o << indent(ind) << "if("; - if (readCh) - { - o << "(" << mapCodeName["yych"] << " = " << yychConversion << "*" << mapCodeName["YYCURSOR"] << ")"; - readCh = false; - } - else - { - o << mapCodeName["yych"]; - } - - o << " " << cmp << " "; - prtChOrHex(o, v); - o << ") "; -} - -static void need(std::ostream &o, uint ind, uint n, bool & readCh, bool bSetMarker) -{ - uint fillIndex = next_fill_index; - - if (fFlag) - { - next_fill_index++; - o << indent(ind) << mapCodeName["YYSETSTATE"] << "(" << fillIndex << ");\n"; - } - - if (bUseYYFill) - { - if (n == 1) - { - o << indent(ind) << "if(" << mapCodeName["YYLIMIT"] << " == " << mapCodeName["YYCURSOR"] << ") " << mapCodeName["YYFILL"]; - } - else - { - o << indent(ind) << "if((" << mapCodeName["YYLIMIT"] << " - " << mapCodeName["YYCURSOR"] << ") < " << n << ") " << mapCodeName["YYFILL"]; - } - if (bUseYYFillParam) - { - o << "(" << n << ")"; - } - o << ";\n"; - } - - if (fFlag) - { - o << mapCodeName["yyFillLabel"] << fillIndex << ":\n"; - } - - if (bSetMarker) - { - o << indent(ind) << mapCodeName["yych"] << " = " << yychConversion << "*(" << mapCodeName["YYMARKER"] << " = " << mapCodeName["YYCURSOR"] << ");\n"; - } - else - { - o << indent(ind) << mapCodeName["yych"] << " = " << yychConversion << "*" << mapCodeName["YYCURSOR"] << ";\n"; - } - readCh = false; -} - -void Match::emit(std::ostream &o, uint ind, bool &readCh) const -{ - if (state->link) - { - o << indent(ind) << "++" << mapCodeName["YYCURSOR"] << ";\n"; - } - else if (!readAhead()) - { - /* do not read next char if match */ - o << indent(ind) << "++" << mapCodeName["YYCURSOR"] << ";\n"; - readCh = true; - } - else - { - o << indent(ind) << mapCodeName["yych"] << " = " << yychConversion << "*++" << mapCodeName["YYCURSOR"] << ";\n"; - readCh = false; - } - - if (state->link) - { - need(o, ind, state->depth, readCh, false); - } -} - -void Enter::emit(std::ostream &o, uint ind, bool &readCh) const -{ - if (state->link) - { - o << indent(ind) << "++" << mapCodeName["YYCURSOR"] << ";\n"; - if (vUsedLabels.count(label)) - { - o << labelPrefix << label << ":\n"; - } - need(o, ind, state->depth, readCh, false); - } - else - { - /* we shouldn't need 'rule-following' protection here */ - o << indent(ind) << mapCodeName["yych"] << " = " << yychConversion << "*++" << mapCodeName["YYCURSOR"] << ";\n"; - if (vUsedLabels.count(label)) - { - o << labelPrefix << label << ":\n"; - } - readCh = false; - } -} - -void Initial::emit(std::ostream &o, uint ind, bool &readCh) const -{ - if (!startLabelName.empty()) - { - o << startLabelName << ":\n"; - } - - if (vUsedLabels.count(1)) - { - if (state->link) - { - o << indent(ind) << "++" << mapCodeName["YYCURSOR"] << ";\n"; - } - else - { - o << indent(ind) << mapCodeName["yych"] << " = " << yychConversion << "*++" << mapCodeName["YYCURSOR"] << ";\n"; - } - } - - if (vUsedLabels.count(label)) - { - o << labelPrefix << label << ":\n"; - } - else if (!label) - { - o << "\n"; - } - - if (dFlag) - { - o << indent(ind) << mapCodeName["YYDEBUG"] << "(" << label << ", *" << mapCodeName["YYCURSOR"] << ");\n"; - } - - if (state->link) - { - need(o, ind, state->depth, readCh, setMarker && bUsedYYMarker); - } - else - { - if (setMarker && bUsedYYMarker) - { - o << indent(ind) << mapCodeName["YYMARKER"] << " = " << mapCodeName["YYCURSOR"] << ";\n"; - } - readCh = false; - } -} - -void Save::emit(std::ostream &o, uint ind, bool &readCh) const -{ - if (bUsedYYAccept) - { - o << indent(ind) << mapCodeName["yyaccept"] << " = " << selector << ";\n"; - } - - if (state->link) - { - if (bUsedYYMarker) - { - o << indent(ind) << mapCodeName["YYMARKER"] << " = ++" << mapCodeName["YYCURSOR"] << ";\n"; - } - need(o, ind, state->depth, readCh, false); - } - else - { - if (bUsedYYMarker) - { - o << indent(ind) << mapCodeName["yych"] << " = " << yychConversion << "*(" << mapCodeName["YYMARKER"] << " = ++" << mapCodeName["YYCURSOR"] << ");\n"; - } - else - { - o << indent(ind) << mapCodeName["yych"] << " = " << yychConversion << "*++" << mapCodeName["YYCURSOR"] << ";\n"; - } - readCh = false; - } -} - -Move::Move(State *s) : Action(s) -{ - ; -} - -void Move::emit(std::ostream &, uint, bool &) const -{ - ; -} - -Accept::Accept(State *x, uint n, uint *s, State **r) - : Action(x), nRules(n), saves(s), rules(r) -{ - ; -} - -void Accept::genRuleMap() -{ - for (uint i = 0; i < nRules; ++i) - { - if (saves[i] != ~0u) - { - mapRules[saves[i]] = rules[i]; - } - } -} - -void Accept::emitBinary(std::ostream &o, uint ind, uint l, uint r, bool &readCh) const -{ - if (l < r) - { - uint m = (l + r) >> 1; - - o << indent(ind) << "if(" << mapCodeName["yyaccept"] << " <= " << m << ") {\n"; - emitBinary(o, ++ind, l, m, readCh); - o << indent(--ind) << "} else {\n"; - emitBinary(o, ++ind, m + 1, r, readCh); - o << indent(--ind) << "}\n"; - } - else - { - genGoTo(o, ind, state, mapRules.find(l)->second, readCh); - } -} - -void Accept::emit(std::ostream &o, uint ind, bool &readCh) const -{ - if (mapRules.size() > 0) - { - bUsedYYMarker = true; - o << indent(ind) << mapCodeName["YYCURSOR"] << " = " << mapCodeName["YYMARKER"] << ";\n"; - - if (readCh) // shouldn't be necessary, but might become at some point - { - o << indent(ind) << mapCodeName["yych"] << " = " << yychConversion << "*" << mapCodeName["YYCURSOR"] << ";\n"; - readCh = false; - } - - if (mapRules.size() > 1) - { - bUsedYYAccept = true; - - if (gFlag && mapRules.size() >= cGotoThreshold) - { - o << indent(ind++) << "{\n"; - o << indent(ind++) << "static void *" << mapCodeName["yytarget"] << "[" << mapRules.size() << "] = {\n"; - for (RuleMap::const_iterator it = mapRules.begin(); it != mapRules.end(); ++it) - { - o << indent(ind) << "&&" << labelPrefix << it->second->label << ",\n"; - vUsedLabels.insert(it->second->label); - } - o << indent(--ind) << "};\n"; - o << indent(ind) << "goto *" << mapCodeName["yytarget"] << "[" << mapCodeName["yyaccept"] << "];\n"; - o << indent(--ind) << "}\n"; - } - else if (sFlag) - { - emitBinary(o, ind, 0, mapRules.size() - 1, readCh); - } - else - { - o << indent(ind) << "switch(" << mapCodeName["yyaccept"] << ") {\n"; - - for (RuleMap::const_iterator it = mapRules.begin(); it != mapRules.end(); ++it) - { - o << indent(ind) << "case " << it->first << ": \t"; - genGoTo(o, 0, state, it->second, readCh); - } - - o << indent(ind) << "}\n"; - } - } - else - { - // no need to write if statement here since there is only case 0. - genGoTo(o, ind, state, mapRules.find(0)->second, readCh); - } - } -} - -Rule::Rule(State *s, RuleOp *r) : Action(s), rule(r) -{ - ; -} - -void Rule::emit(std::ostream &o, uint ind, bool &) const -{ - uint back = rule->ctx->fixedLength(); - - if (back != 0u) - { - o << indent(ind) << mapCodeName["YYCURSOR"] << " = " << mapCodeName["YYCTXMARKER"] << ";\n"; - } - - RuleLine rl(*rule); - - o << file_info(sourceFileInfo, &rl); - o << indent(ind); - o << rule->code->text; - o << "\n"; - o << outputFileInfo; -} - -void doLinear(std::ostream &o, uint ind, Span *s, uint n, const State *from, const State *next, bool &readCh, uint mask) -{ - for (;;) - { - State *bg = s[0].to; - - while (n >= 3 && s[2].to == bg && (s[1].ub - s[0].ub) == 1) - { - if (s[1].to == next && n == 3) - { - if (!mask || (s[0].ub > 0x00FF)) - { - genIf(o, ind, "!=", s[0].ub, readCh); - genGoTo(o, 0, from, bg, readCh); - } - if (next->label != from->label + 1) - { - genGoTo(o, ind, from, next, readCh); - } - return ; - } - else - { - if (!mask || (s[0].ub > 0x00FF)) - { - genIf(o, ind, "==", s[0].ub, readCh); - genGoTo(o, 0, from, s[1].to, readCh); - } - } - - n -= 2; - s += 2; - } - - if (n == 1) - { - // if(bg != next){ - if (s[0].to->label != from->label + 1) - { - genGoTo(o, ind, from, s[0].to, readCh); - } - // } - return ; - } - else if (n == 2 && bg == next) - { - if (!mask || (s[0].ub > 0x00FF)) - { - genIf(o, ind, ">=", s[0].ub, readCh); - genGoTo(o, 0, from, s[1].to, readCh); - } - if (next->label != from->label + 1) - { - genGoTo(o, ind, from, next, readCh); - } - return ; - } - else - { - if (!mask || ((s[0].ub - 1) > 0x00FF)) - { - genIf(o, ind, "<=", s[0].ub - 1, readCh); - genGoTo(o, 0, from, bg, readCh); - } - n -= 1; - s += 1; - } - } - - if (next->label != from->label + 1) - { - genGoTo(o, ind, from, next, readCh); - } -} - -void Go::genLinear(std::ostream &o, uint ind, const State *from, const State *next, bool &readCh, uint mask) const -{ - doLinear(o, ind, span, nSpans, from, next, readCh, mask); -} - -bool genCases(std::ostream &o, uint ind, uint lb, Span *s, bool &newLine, uint mask) -{ - bool used = false; - - if (!newLine) - { - o << "\n"; - } - newLine = true; - if (lb < s->ub) - { - for (;;) - { - if (!mask || lb > 0x00FF) - { - o << indent(ind) << "case "; - prtChOrHex(o, lb); - o << ":"; - newLine = false; - used = true; - } - - if (++lb == s->ub) - { - break; - } - - o << "\n"; - newLine = true; - } - } - return used; -} - -void Go::genSwitch(std::ostream &o, uint ind, const State *from, const State *next, bool &readCh, uint mask) const -{ - bool newLine = true; - - if ((mask ? wSpans : nSpans) <= 2) - { - genLinear(o, ind, from, next, readCh, mask); - } - else - { - State *def = span[nSpans - 1].to; - Span **sP = new Span * [nSpans - 1], **r, **s, **t; - - t = &sP[0]; - - for (uint i = 0; i < nSpans; ++i) - { - if (span[i].to != def) - { - *(t++) = &span[i]; - } - } - - if (dFlag) - { - o << indent(ind) << mapCodeName["YYDEBUG"] << "(-1, " << mapCodeName["yych"] << ");\n"; - } - - if (readCh) - { - o << indent(ind) << "switch((" << mapCodeName["yych"] << " = " << yychConversion << "*" << mapCodeName["YYCURSOR"] << ")) {\n"; - readCh = false; - } - else - { - o << indent(ind) << "switch(" << mapCodeName["yych"] << ") {\n"; - } - - while (t != &sP[0]) - { - bool used = false; - - r = s = &sP[0]; - - if (*s == &span[0]) - { - used |= genCases(o, ind, 0, *s, newLine, mask); - } - else - { - used |= genCases(o, ind, (*s)[ -1].ub, *s, newLine, mask); - } - - State *to = (*s)->to; - - while (++s < t) - { - if ((*s)->to == to) - { - used |= genCases(o, ind, (*s)[ -1].ub, *s, newLine, mask); - } - else - { - *(r++) = *s; - } - } - - if (used) - { - genGoTo(o, newLine ? ind+1 : 1, from, to, readCh); - newLine = true; - } - t = r; - } - - o << indent(ind) << "default:"; - genGoTo(o, 1, from, def, readCh); - o << indent(ind) << "}\n"; - - delete [] sP; - } -} - -void doBinary(std::ostream &o, uint ind, Span *s, uint n, const State *from, const State *next, bool &readCh, uint mask) -{ - if (n <= 4) - { - doLinear(o, ind, s, n, from, next, readCh, mask); - } - else - { - uint h = n / 2; - - genIf(o, ind, "<=", s[h - 1].ub - 1, readCh); - o << "{\n"; - doBinary(o, ind+1, &s[0], h, from, next, readCh, mask); - o << indent(ind) << "} else {\n"; - doBinary(o, ind+1, &s[h], n - h, from, next, readCh, mask); - o << indent(ind) << "}\n"; - } -} - -void Go::genBinary(std::ostream &o, uint ind, const State *from, const State *next, bool &readCh, uint mask) const -{ - if (mask) - { - Span * sc = new Span[wSpans]; - - for (uint i = 0, j = 0; i < nSpans; i++) - { - if (span[i].ub > 0xFF) - { - sc[j++] = span[i]; - } - } - - doBinary(o, ind, sc, wSpans, from, next, readCh, mask); - - delete[] sc; - } - else - { - doBinary(o, ind, span, nSpans, from, next, readCh, mask); - } -} - -void Go::genBase(std::ostream &o, uint ind, const State *from, const State *next, bool &readCh, uint mask) const -{ - if ((mask ? wSpans : nSpans) == 0) - { - return ; - } - - if (!sFlag) - { - genSwitch(o, ind, from, next, readCh, mask); - return ; - } - - if ((mask ? wSpans : nSpans) > 8) - { - Span *bot = &span[0], *top = &span[nSpans - 1]; - uint util; - - if (bot[0].to == top[0].to) - { - util = (top[ -1].ub - bot[0].ub) / (nSpans - 2); - } - else - { - if (bot[0].ub > (top[0].ub - top[ -1].ub)) - { - util = (top[0].ub - bot[0].ub) / (nSpans - 1); - } - else - { - util = top[ -1].ub / (nSpans - 1); - } - } - - if (util <= 2) - { - genSwitch(o, ind, from, next, readCh, mask); - return ; - } - } - - if ((mask ? wSpans : nSpans) > 5) - { - genBinary(o, ind, from, next, readCh, mask); - } - else - { - genLinear(o, ind, from, next, readCh, mask); - } -} - -void Go::genCpGoto(std::ostream &o, uint ind, const State *from, const State *next, bool &readCh) const -{ - std::string sYych; - - if (readCh) - { - sYych = "(" + mapCodeName["yych"] + " = " + yychConversion + "*" + mapCodeName["YYCURSOR"] + ")"; - } - else - { - sYych = mapCodeName["yych"]; - } - - readCh = false; - if (wFlag) - { - o << indent(ind) << "if(" << sYych <<" & ~0xFF) {\n"; - genBase(o, ind+1, from, next, readCh, 1); - o << indent(ind++) << "} else {\n"; - sYych = mapCodeName["yych"]; - } - else - { - o << indent(ind++) << "{\n"; - } - o << indent(ind++) << "static void *" << mapCodeName["yytarget"] << "[256] = {\n"; - o << indent(ind); - - uint ch = 0; - for (uint i = 0; i < lSpans; ++i) - { - vUsedLabels.insert(span[i].to->label); - for(; ch < span[i].ub; ++ch) - { - o << "&&" << labelPrefix << span[i].to->label; - if (ch == 255) - { - o << "\n"; - i = lSpans; - break; - } - else if (ch % 8 == 7) - { - o << ",\n" << indent(ind); - } - else - { - o << "," << space(span[i].to->label); - } - } - } - o << indent(--ind) << "};\n"; - o << indent(ind) << "goto *" << mapCodeName["yytarget"] << "[" << sYych << "];\n"; - o << indent(--ind) << "}\n"; -} - -void Go::genGoto(std::ostream &o, uint ind, const State *from, const State *next, bool &readCh) -{ - if ((gFlag || wFlag) && wSpans == ~0u) - { - uint nBitmaps = 0; - std::set vTargets; - wSpans = 0; - lSpans = 1; - dSpans = 0; - for (uint i = 0; i < nSpans; ++i) - { - if (span[i].ub > 0xFF) - { - wSpans++; - } - if (span[i].ub < 0x100 || !wFlag) - { - lSpans++; - - State *to = span[i].to; - - if (to && to->isBase) - { - const BitMap *b = BitMap::find(to); - - if (b && matches(b->go, b->on, this, to)) - { - nBitmaps++; - } - else - { - dSpans++; - vTargets.insert(to->label); - } - } - else - { - dSpans++; - vTargets.insert(to->label); - } - } - } - lTargets = vTargets.size() >> nBitmaps; - } - - if (gFlag && (lTargets >= cGotoThreshold || dSpans >= cGotoThreshold)) - { - genCpGoto(o, ind, from, next, readCh); - return; - } - else if (bFlag) - { - for (uint i = 0; i < nSpans; ++i) - { - State *to = span[i].to; - - if (to && to->isBase) - { - const BitMap *b = BitMap::find(to); - std::string sYych; - - if (b && matches(b->go, b->on, this, to)) - { - Go go; - go.span = new Span[nSpans]; - go.unmap(this, to); - if (readCh) - { - sYych = "(" + mapCodeName["yych"] + " = " + yychConversion + "*" + mapCodeName["YYCURSOR"] + ")"; - } - else - { - sYych = mapCodeName["yych"]; - } - readCh = false; - if (wFlag) - { - o << indent(ind) << "if(" << sYych << " & ~0xFF) {\n"; - sYych = mapCodeName["yych"]; - genBase(o, ind+1, from, next, readCh, 1); - o << indent(ind) << "} else "; - } - else - { - o << indent(ind); - } - o << "if(" << mapCodeName["yybm"] << "[" << b->i << "+" << sYych << "] & "; - if (yybmHexTable) - { - prtHex(o, b->m, false); - } - else - { - o << (uint) b->m; - } - o << ") {\n"; - genGoTo(o, ind+1, from, to, readCh); - o << indent(ind) << "}\n"; - go.genBase(o, ind, from, next, readCh, 0); - delete [] go.span; - return ; - } - } - } - } - - genBase(o, ind, from, next, readCh, 0); -} - -void State::emit(std::ostream &o, uint ind, bool &readCh) const -{ - if (vUsedLabels.count(label)) - { - o << labelPrefix << label << ":\n"; - } - if (dFlag && !action->isInitial()) - { - o << indent(ind) << mapCodeName["YYDEBUG"] << "(" << label << ", *" << mapCodeName["YYCURSOR"] << ");\n"; - } - if (isPreCtxt) - { - o << indent(ind) << mapCodeName["YYCTXMARKER"] << " = " << mapCodeName["YYCURSOR"] << " + 1;\n"; - } - action->emit(o, ind, readCh); -} - -uint merge(Span *x0, State *fg, State *bg) -{ - Span *x = x0, *f = fg->go.span, *b = bg->go.span; - uint nf = fg->go.nSpans, nb = bg->go.nSpans; - State *prev = NULL, *to; - // NB: we assume both spans are for same range - - for (;;) - { - if (f->ub == b->ub) - { - to = f->to == b->to ? bg : f->to; - - if (to == prev) - { - --x; - } - else - { - x->to = prev = to; - } - - x->ub = f->ub; - ++x; - ++f; - --nf; - ++b; - --nb; - - if (nf == 0 && nb == 0) - { - return x - x0; - } - } - - while (f->ub < b->ub) - { - to = f->to == b->to ? bg : f->to; - - if (to == prev) - { - --x; - } - else - { - x->to = prev = to; - } - - x->ub = f->ub; - ++x; - ++f; - --nf; - } - - while (b->ub < f->ub) - { - to = b->to == f->to ? bg : f->to; - - if (to == prev) - { - --x; - } - else - { - x->to = prev = to; - } - - x->ub = b->ub; - ++x; - ++b; - --nb; - } - } -} - -const uint cInfinity = ~0; - -class SCC -{ - -public: - State **top, **stk; - -public: - SCC(uint); - ~SCC(); - void traverse(State*); - -#ifdef PEDANTIC -private: - SCC(const SCC& oth) - : top(oth.top) - , stk(oth.stk) - { - } - SCC& operator = (const SCC& oth) - { - new(this) SCC(oth); - return *this; - } -#endif -}; - -SCC::SCC(uint size) - : top(new State * [size]) - , stk(top) -{ -} - -SCC::~SCC() -{ - delete [] stk; -} - -void SCC::traverse(State *x) -{ - *top = x; - uint k = ++top - stk; - x->depth = k; - - for (uint i = 0; i < x->go.nSpans; ++i) - { - State *y = x->go.span[i].to; - - if (y) - { - if (y->depth == 0) - { - traverse(y); - } - - if (y->depth < x->depth) - { - x->depth = y->depth; - } - } - } - - if (x->depth == k) - { - do - { - (*--top)->depth = cInfinity; - (*top)->link = x; - } - while (*top != x); - } -} - -static bool state_is_in_non_trivial_SCC(const State* s) -{ - - // does not link to self - if (s->link != s) - { - return true; - } - - // or exists i: (s->go.spans[i].to->link == s) - // - // Note: (s->go.spans[i].to == s) is allowed, corresponds to s - // looping back to itself. - // - for (uint i = 0; i < s->go.nSpans; ++i) - { - const State* t = s->go.span[i].to; - - if (t && t->link == s) - { - return true; - } - } - // otherwise no - return false; -} - -uint maxDist(State *s) -{ - if (s->depth != cInfinity) - { - // Already calculated, just return result. - return s->depth; - } - uint mm = 0; - - for (uint i = 0; i < s->go.nSpans; ++i) - { - State *t = s->go.span[i].to; - - if (t) - { - uint m = 1; - - if (!t->link) // marked as non-key state - { - if (t->depth == cInfinity) - { - t->depth = maxDist(t); - } - m += t->depth; - } - - if (m > mm) - { - mm = m; - } - } - } - - s->depth = mm; - return mm; -} - -void calcDepth(State *head) -{ - State* s; - - // mark non-key states by s->link = NULL ; - for (s = head; s; s = s->next) - { - if (s != head && !state_is_in_non_trivial_SCC(s)) - { - s->link = NULL; - } - //else: key state, leave alone - } - - for (s = head; s; s = s->next) - { - s->depth = cInfinity; - } - - // calculate max number of transitions before guarantied to reach - // a key state. - for (s = head; s; s = s->next) - { - maxDist(s); - } -} - -void DFA::findSCCs() -{ - SCC scc(nStates); - State *s; - - for (s = head; s; s = s->next) - { - s->depth = 0; - s->link = NULL; - } - - for (s = head; s; s = s->next) - { - if (!s->depth) - { - scc.traverse(s); - } - } - - calcDepth(head); -} - -void DFA::split(State *s) -{ - State *move = new State; - (void) new Move(move); - addState(&s->next, move); - move->link = s->link; - move->rule = s->rule; - move->go = s->go; - s->rule = NULL; - s->go.nSpans = 1; - s->go.span = new Span[1]; - s->go.span[0].ub = ubChar; - s->go.span[0].to = move; -} - -void DFA::findBaseState() -{ - Span *span = new Span[ubChar - lbChar]; - - for (State *s = head; s; s = s->next) - { - if (!s->link) - { - for (uint i = 0; i < s->go.nSpans; ++i) - { - State *to = s->go.span[i].to; - - if (to && to->isBase) - { - to = to->go.span[0].to; - uint nSpans = merge(span, s, to); - - if (nSpans < s->go.nSpans) - { - delete [] s->go.span; - s->go.nSpans = nSpans; - s->go.span = new Span[nSpans]; - memcpy(s->go.span, span, nSpans*sizeof(Span)); - } - - break; - } - } - } - } - - delete [] span; -} - -void DFA::emit(std::ostream &o, uint ind) -{ - State *s; - uint i, bitmap_brace = 0; - - findSCCs(); - head->link = head; - - uint nRules = 0; - - for (s = head; s; s = s->next) - { - s->depth = maxDist(s); - if (maxFill < s->depth) - { - maxFill = s->depth; - } - if (s->rule && s->rule->accept >= nRules) - { - nRules = s->rule->accept + 1; - } - } - - uint nSaves = 0; - uint *saves = new uint[nRules]; - memset(saves, ~0, (nRules)*sizeof(*saves)); - - // mark backtracking points - bool bSaveOnHead = false; - - for (s = head; s; s = s->next) - { - if (s->rule) - { - for (i = 0; i < s->go.nSpans; ++i) - { - if (s->go.span[i].to && !s->go.span[i].to->rule) - { - delete s->action; - s->action = NULL; - - if (saves[s->rule->accept] == ~0u) - { - saves[s->rule->accept] = nSaves++; - } - - bSaveOnHead |= s == head; - (void) new Save(s, saves[s->rule->accept]); // sets s->action - } - } - } - } - - // insert actions - State **rules = new State * [nRules]; - - memset(rules, 0, (nRules)*sizeof(*rules)); - - State *accept = NULL; - Accept *accfixup = NULL; - - for (s = head; s; s = s->next) - { - State * ow; - - if (!s->rule) - { - ow = accept; - } - else - { - if (!rules[s->rule->accept]) - { - State *n = new State; - (void) new Rule(n, s->rule); - rules[s->rule->accept] = n; - addState(&s->next, n); - } - - ow = rules[s->rule->accept]; - } - - for (i = 0; i < s->go.nSpans; ++i) - { - if (!s->go.span[i].to) - { - if (!ow) - { - ow = accept = new State; - accfixup = new Accept(accept, nRules, saves, rules); - addState(&s->next, accept); - } - - s->go.span[i].to = ow; - } - } - } - - if (accfixup) - { - accfixup->genRuleMap(); - } - - // split ``base'' states into two parts - for (s = head; s; s = s->next) - { - s->isBase = false; - - if (s->link) - { - for (i = 0; i < s->go.nSpans; ++i) - { - if (s->go.span[i].to == s) - { - s->isBase = true; - split(s); - - if (bFlag) - { - BitMap::find(&s->next->go, s); - } - - s = s->next; - break; - } - } - } - } - - // find ``base'' state, if possible - findBaseState(); - - delete head->action; - head->action = NULL; - - if (bFlag) - { - o << indent(ind++) << "{\n"; - bitmap_brace = 1; - BitMap::gen(o, ind, lbChar, ubChar <= 256 ? ubChar : 256); - } - - bUsedYYAccept = false; - - uint start_label = next_label; - - (void) new Initial(head, next_label++, bSaveOnHead); - - if (bUseStartLabel) - { - if (startLabelName.empty()) - { - vUsedLabels.insert(start_label); - } - } - - for (s = head; s; s = s->next) - { - s->label = next_label++; - } - - // Save 'next_fill_index' and compute information about code generation - // while writing to null device. - uint save_fill_index = next_fill_index; - null_stream null_dev; - - for (s = head; s; s = s->next) - { - bool readCh = false; - s->emit(null_dev, ind, readCh); - s->go.genGoto(null_dev, ind, s, s->next, readCh); - } - if (last_fill_index < next_fill_index) - { - last_fill_index = next_fill_index; - } - next_fill_index = save_fill_index; - - // Generate prolog - o << "\n" << outputFileInfo; - o << indent(ind++) << "{\n"; - - if (!fFlag) - { - o << indent(ind) << mapCodeName["YYCTYPE"] << " " << mapCodeName["yych"] << ";\n"; - if (bUsedYYAccept) - { - o << indent(ind) << "unsigned int "<< mapCodeName["yyaccept"] << " = 0;\n"; - } - } - else - { - o << "\n"; - } - - genGetState(o, ind, start_label); - - if (vUsedLabels.count(1)) - { - vUsedLabels.insert(0); - o << indent(ind) << "goto " << labelPrefix << "0;\n"; - } - - // Generate code - for (s = head; s; s = s->next) - { - bool readCh = false; - s->emit(o, ind, readCh); - s->go.genGoto(o, ind, s, s->next, readCh); - } - - // Generate epilog - o << indent(--ind) << "}\n"; - if (bitmap_brace) - { - o << indent(--ind) << "}\n"; - } - - // Cleanup - if (BitMap::first) - { - delete BitMap::first; - BitMap::first = NULL; - } - - delete [] saves; - delete [] rules; - - bUseStartLabel = false; -} - -void genGetState(std::ostream &o, uint& ind, uint start_label) -{ - if (fFlag && !bWroteGetState) - { - vUsedLabels.insert(start_label); - o << indent(ind) << "switch(" << mapCodeName["YYGETSTATE"] << "()) {\n"; - if (bUseStateAbort) - { - o << indent(ind) << "default: abort();\n"; - o << indent(ind) << "case -1: goto " << labelPrefix << start_label << ";\n"; - } - else - { - o << indent(ind) << "default: goto " << labelPrefix << start_label << ";\n"; - } - - for (size_t i=0; iget_line() << " \"" << li.fname << "\"\n"; - } - return o; -} - -uint Scanner::get_line() const -{ - return cline; -} - -void Scanner::config(const Str& cfg, int num) -{ - if (cfg.to_string() == "indent:top") - { - if (num < 0) - { - fatal("configuration 'indent:top' must be a positive integer"); - } - topIndent = num; - } - else if (cfg.to_string() == "yybm:hex") - { - yybmHexTable = num != 0; - } - else if (cfg.to_string() == "startlabel") - { - bUseStartLabel = num != 0; - startLabelName = ""; - } - else if (cfg.to_string() == "state:abort") - { - bUseStateAbort = num != 0; - } - else if (cfg.to_string() == "state:nextlabel") - { - bUseStateNext = num != 0; - } - else if (cfg.to_string() == "yyfill:enable") - { - bUseYYFill = num != 0; - } - else if (cfg.to_string() == "yyfill:parameter") - { - bUseYYFillParam = num != 0; - } - else if (cfg.to_string() == "cgoto:threshold") - { - cGotoThreshold = num; - } - else if (cfg.to_string() == "yych:conversion") - { - if (num) - { - yychConversion = "("; - yychConversion += mapCodeName["YYCTYPE"]; - yychConversion += ")"; - } - else - { - yychConversion = ""; - } - } - else - { - fatal("unrecognized configuration name or illegal integer value"); - } -} - -static std::set mapVariableKeys; -static std::set mapDefineKeys; -static std::set mapLabelKeys; - -void Scanner::config(const Str& cfg, const Str& val) -{ - if (mapDefineKeys.empty()) - { - mapVariableKeys.insert("variable:yyaccept"); - mapVariableKeys.insert("variable:yybm"); - mapVariableKeys.insert("variable:yych"); - mapVariableKeys.insert("variable:yytarget"); - mapDefineKeys.insert("define:YYCTXMARKER"); - mapDefineKeys.insert("define:YYCTYPE"); - mapDefineKeys.insert("define:YYCURSOR"); - mapDefineKeys.insert("define:YYDEBUG"); - mapDefineKeys.insert("define:YYFILL"); - mapDefineKeys.insert("define:YYGETSTATE"); - mapDefineKeys.insert("define:YYLIMIT"); - mapDefineKeys.insert("define:YYMARKER"); - mapDefineKeys.insert("define:YYSETSTATE"); - mapLabelKeys.insert("label:yyFillLabel"); - mapLabelKeys.insert("label:yyNext"); - } - - std::string strVal; - - if (val.len >= 2 && val.str[0] == val.str[val.len-1] - && (val.str[0] == '"' || val.str[0] == '\'')) - { - SubStr tmp(val.str + 1, val.len - 2); - unescape(tmp, strVal); - } - else - { - strVal = val.to_string(); - } - - if (cfg.to_string() == "indent:string") - { - indString = strVal; - } - else if (cfg.to_string() == "startlabel") - { - startLabelName = strVal; - bUseStartLabel = !startLabelName.empty(); - } - else if (cfg.to_string() == "labelprefix") - { - labelPrefix = strVal; - } - else if (mapVariableKeys.find(cfg.to_string()) != mapVariableKeys.end()) - { - if (bFirstPass && !mapCodeName.insert( - std::make_pair(cfg.to_string().substr(sizeof("variable:") - 1), strVal) - ).second) - { - fatal("variable already being used and cannot be changed"); - } - } - else if (mapDefineKeys.find(cfg.to_string()) != mapDefineKeys.end()) - { - if (bFirstPass && !mapCodeName.insert( - std::make_pair(cfg.to_string().substr(sizeof("define:") - 1), strVal) - ).second) - { - fatal("define already being used and cannot be changed"); - } - } - else if (mapLabelKeys.find(cfg.to_string()) != mapLabelKeys.end()) - { - if (bFirstPass && !mapCodeName.insert( - std::make_pair(cfg.to_string().substr(sizeof("label:") - 1), strVal) - ).second) - { - fatal("label already being used and cannot be changed"); - } - } - else - { - fatal("unrecognized configuration name or illegal string value"); - } -} - -} // end namespace re2c diff --git a/tools/re2c/code.h b/tools/re2c/code.h deleted file mode 100644 index ed9df0dde..000000000 --- a/tools/re2c/code.h +++ /dev/null @@ -1,53 +0,0 @@ -/* $Id: code.h 525 2006-05-25 13:32:49Z helly $ */ -#ifndef _code_h -#define _code_h - -#include "re.h" -#include "dfa.h" - -namespace re2c -{ - -class BitMap -{ -public: - static BitMap *first; - - const Go *go; - const State *on; - const BitMap *next; - uint i; - uint m; - -public: - static const BitMap *find(const Go*, const State*); - static const BitMap *find(const State*); - static void gen(std::ostream&, uint ind, uint, uint); - static void stats(); - BitMap(const Go*, const State*); - ~BitMap(); - -#if PEDANTIC - BitMap(const BitMap& oth) - : go(oth.go) - , on(oth.on) - , next(oth.next) - , i(oth.i) - , m(oth.m) - { - } - BitMap& operator = (const BitMap& oth) - { - new(this) BitMap(oth); - return *this; - } -#endif -}; - -#ifdef _MSC_VER -# pragma warning(disable: 4355) /* 'this' : used in base member initializer list */ -#endif - -} // end namespace re2c - -#endif diff --git a/tools/re2c/code_names.h b/tools/re2c/code_names.h deleted file mode 100644 index 4622e20e0..000000000 --- a/tools/re2c/code_names.h +++ /dev/null @@ -1,33 +0,0 @@ -/* $Id: token.h 547 2006-05-25 13:40:35Z helly $ */ -#ifndef _code_names_h -#define _code_names_h - -#include -#include - -namespace re2c -{ - -class CodeNames: public std::map -{ -public: - std::string& operator [] (const char * what); -}; - -inline std::string& CodeNames::operator [] (const char * what) -{ - CodeNames::iterator it = find(std::string(what)); - - if (it != end()) - { - return it->second; - } - else - { - return insert(std::make_pair(std::string(what), std::string(what))).first->second; - } -} - -} // end namespace re2c - -#endif diff --git a/tools/re2c/config.h.in b/tools/re2c/config.h.in index b856a24bf..38029b7cf 100644 --- a/tools/re2c/config.h.in +++ b/tools/re2c/config.h.in @@ -1,10 +1,7 @@ -/* config.h.in. Generated from configure.in by autoheader. */ +/* config.h.in. Generated from configure.ac by autoheader. */ -/* Define to 1 if you have the `strdup' function. */ -#cmakedefine HAVE_STRDUP - -/* Define to 1 if you have the `strndup' function. */ -#cmakedefine HAVE_STRNDUP +/* Define to 1 if you have the header file. */ +#cmakedefine HAVE_STDINT_H @HAVE_STDINT_H@ /* Name of package */ #cmakedefine PACKAGE "@PACKAGE_NAME@" @@ -21,21 +18,41 @@ /* Define to the one symbol short name of this package. */ #cmakedefine PACKAGE_TARNAME "@PACKAGE_TARNAME@" +/* Define to the home page for this package. */ +#cmakedefine PACKAGE_URL "@PACKAGE_URL@" + /* Define to the version of this package. */ #cmakedefine PACKAGE_VERSION "@PACKAGE_VERSION@" +/* The size of `0i8', as computed by sizeof. */ +#cmakedefine SIZEOF_0I8 @SIZEOF_0I8@ + +/* The size of `0l', as computed by sizeof. */ +#cmakedefine SIZEOF_0L @SIZEOF_0L@ + +/* The size of `0ll', as computed by sizeof. */ +#cmakedefine SIZEOF_0LL @SIZEOF_0LL@ + /* The size of `char', as computed by sizeof. */ -#define SIZEOF_CHAR @SIZEOF_CHAR@ +#cmakedefine SIZEOF_CHAR @SIZEOF_CHAR@ /* The size of `int', as computed by sizeof. */ -#define SIZEOF_INT @SIZEOF_INT@ +#cmakedefine SIZEOF_INT @SIZEOF_INT@ /* The size of `long', as computed by sizeof. */ -#define SIZEOF_LONG @SIZEOF_LONG@ +#cmakedefine SIZEOF_LONG @SIZEOF_LONG@ + +/* The size of `long long', as computed by sizeof. */ +#cmakedefine SIZEOF_LONG_LONG @SIZEOF_LONG_LONG@ /* The size of `short', as computed by sizeof. */ -#define SIZEOF_SHORT @SIZEOF_SHORT@ +#cmakedefine SIZEOF_SHORT @SIZEOF_SHORT@ + +/* The size of `void *', as computed by sizeof. */ +#cmakedefine SIZEOF_VOID_P @SIZEOF_VOID_P@ + +/* The size of `__int64', as computed by sizeof. */ +#cmakedefine SIZEOF___INT64 @SIZEOF___INT64@ /* Version number of package */ -#define VERSION "@PACKAGE_VERSION@" - +#cmakedefine VERSION @PACKAGE_VERSION@ diff --git a/tools/re2c/config_w32.h b/tools/re2c/config_w32.h deleted file mode 100644 index 2dc92bb01..000000000 --- a/tools/re2c/config_w32.h +++ /dev/null @@ -1,102 +0,0 @@ -/* config.h. Generated by configure. */ -/* config.h.in. Generated from configure.in by autoheader. */ - -/* Define to 1 if you have the `getpagesize' function. */ -#define HAVE_GETPAGESIZE 1 - -/* Define to 1 if you have the header file. */ -#define HAVE_INTTYPES_H 1 - -/* Define to 1 if you have the header file. */ -#define HAVE_MEMORY_H 1 - -/* Define to 1 if you have the `memset' function. */ -#define HAVE_MEMSET 1 - -/* Define to 1 if you have a working `mmap' system call. */ -/* #undef HAVE_MMAP */ - -/* Define to 1 if you have the `munmap' function. */ -#define HAVE_MUNMAP 1 - -/* Define to 1 if stdbool.h conforms to C99. */ -#define HAVE_STDBOOL_H 1 - -/* Define to 1 if you have the header file. */ -#define HAVE_STDINT_H 1 - -/* Define to 1 if you have the header file. */ -#define HAVE_STDLIB_H 1 - -/* Define to 1 if you have the `strdup' function. */ -#define HAVE_STRDUP 1 - -/* Define to 1 if you have the header file. */ -#define HAVE_STRINGS_H 1 - -/* Define to 1 if you have the header file. */ -#define HAVE_STRING_H 1 - -/* Define to 1 if you have the header file. */ -#define HAVE_SYS_STAT_H 1 - -/* Define to 1 if you have the header file. */ -#define HAVE_SYS_TYPES_H 1 - -/* Define to 1 if you have the header file. */ -#define HAVE_UNISTD_H 1 - -/* Define to 1 if the system has the type `_Bool'. */ -#define HAVE__BOOL 1 - -/* Name of package */ -#define PACKAGE "re2c" - -/* Define to the address where bug reports for this package should be sent. */ -#define PACKAGE_BUGREPORT "re2c-general@lists.sourceforge.net" - -/* Define to the full name of this package. */ -#define PACKAGE_NAME "re2c" - -/* Define to the full name and version of this package. */ -#define PACKAGE_STRING "re2c 0.12.3" - -/* Define to the one symbol short name of this package. */ -#define PACKAGE_TARNAME "re2c" - -/* Define to the version of this package. */ -#define PACKAGE_VERSION "0.12.3" - -/* The size of a `char', as computed by sizeof. */ -#define SIZEOF_CHAR 1 - -/* The size of a `int', as computed by sizeof. */ -#define SIZEOF_INT 4 - -/* The size of a `long', as computed by sizeof. */ -#define SIZEOF_LONG 4 - -/* The size of a `short', as computed by sizeof. */ -#define SIZEOF_SHORT 2 - -/* Define to 1 if you have the ANSI C header files. */ -#define STDC_HEADERS 1 - -/* Version number of package */ -#define VERSION "0.12.3" - -/* Define to empty if `const' does not conform to ANSI C. */ -/* #undef const */ - -/* Define to `__inline__' or `__inline' if that's what the C compiler - calls it, or to nothing if 'inline' is not supported under any name. */ -#ifndef __cplusplus -/* #undef inline */ -#endif - -/* Define to `unsigned' if does not define. */ -/* #undef size_t */ - -/* Define to empty if the keyword `volatile' does not work. Warning: valid - code using `volatile' can become incorrect without. Disable with care. */ -/* #undef volatile */ diff --git a/tools/re2c/dfa.cc b/tools/re2c/dfa.cc deleted file mode 100644 index d7db03d5c..000000000 --- a/tools/re2c/dfa.cc +++ /dev/null @@ -1,416 +0,0 @@ -#include -#include -#include -#include "globals.h" -#include "substr.h" -#include "dfa.h" - -namespace re2c -{ - -void prtChOrHex(std::ostream& o, uint c, bool useTalx) -{ - int oc = (int)(re2c::wFlag || !useTalx ? c : re2c::talx[c]); - - if ((oc < 256) && isprint(oc)) - { - o << '\''; - prtCh(o, c); - o << '\''; - } - else - { - prtHex(o, c); - } -} - -void prtHex(std::ostream& o, uint c, bool useTalx) -{ - int oc = (int)(re2c::wFlag || !useTalx ? c : re2c::talx[c]); - - if (re2c::uFlag) - { - o << "0x" - << hexCh(oc >> 28) - << hexCh(oc >> 24) - << hexCh(oc >> 20) - << hexCh(oc >> 16) - << hexCh(oc >> 12) - << hexCh(oc >> 8) - << hexCh(oc >> 4) - << hexCh(oc); - } - else if (re2c::wFlag) - { - o << "0x" - << hexCh(oc >> 12) - << hexCh(oc >> 8) - << hexCh(oc >> 4) - << hexCh(oc); - } - else - { - o << "0x" - << hexCh(oc >> 4) - << hexCh(oc); - } -} - -void prtCh(std::ostream& o, uint c, bool useTalx) -{ - int oc = (int)(re2c::wFlag || !useTalx ? c : re2c::talx[c]); - - switch (oc) - { - case '\'': - o << "\\'"; - break; - - case '\n': - o << "\\n"; - break; - - case '\t': - o << "\\t"; - break; - - case '\v': - o << "\\v"; - break; - - case '\b': - o << "\\b"; - break; - - case '\r': - o << "\\r"; - break; - - case '\f': - o << "\\f"; - break; - - case '\a': - o << "\\a"; - break; - - case '\\': - o << "\\\\"; - break; - - default: - - if ((oc < 256) && isprint(oc)) - { - o << (char) oc; - } - else if (re2c::uFlag) - { - o << "0x" - << hexCh(oc >> 20) - << hexCh(oc >> 16) - << hexCh(oc >> 12) - << hexCh(oc >> 8) - << hexCh(oc >> 4) - << hexCh(oc); - } - else if (re2c::wFlag) - { - o << "0x" - << hexCh(oc >> 12) - << hexCh(oc >> 8) - << hexCh(oc >> 4) - << hexCh(oc); - } - else - { - o << '\\' << octCh(oc / 64) << octCh(oc / 8) << octCh(oc); - } - } -} - -void printSpan(std::ostream& o, uint lb, uint ub) -{ - if (lb > ub) - { - o << "*"; - } - - o << "["; - - if ((ub - lb) == 1) - { - prtCh(o, lb); - } - else - { - prtCh(o, lb); - o << "-"; - prtCh(o, ub - 1); - } - - o << "]"; -} - -uint Span::show(std::ostream &o, uint lb) const -{ - if (to) - { - printSpan(o, lb, ub); - o << " " << to->label << "; "; - } - - return ub; -} - -std::ostream& operator<<(std::ostream &o, const State &s) -{ - o << "state " << s.label; - - if (s.rule) - { - o << " accepts " << s.rule->accept; - } - - o << "\n"; - - uint lb = 0; - - for (uint i = 0; i < s.go.nSpans; ++i) - { - lb = s.go.span[i].show(o, lb); - } - - return o; -} - -std::ostream& operator<<(std::ostream &o, const DFA &dfa) -{ - for (State *s = dfa.head; s; s = s->next) - { - o << s << "\n\n"; - } - - return o; -} - -State::State() - : label(0) - , rule(NULL) - , next(0) - , link(NULL) - , depth(0) - , kCount(0) - , kernel(NULL) - , isPreCtxt(false) - , isBase(false) - , go() - , action(NULL) -{ -} - -State::~State() -{ - delete action; - delete [] kernel; - delete [] go.span; -} - -static Ins **closure(Ins **cP, Ins *i) -{ - while (!isMarked(i)) - { - mark(i); - *(cP++) = i; - - if (i->i.tag == FORK) - { - cP = closure(cP, i + 1); - i = (Ins*) i->i.link; - } - else if (i->i.tag == GOTO || i->i.tag == CTXT) - { - i = (Ins*) i->i.link; - } - else - break; - } - - return cP; -} - -struct GoTo -{ - Char ch; - void *to; -}; - -DFA::DFA(Ins *ins, uint ni, uint lb, uint ub, Char *rep) - : lbChar(lb) - , ubChar(ub) - , nStates(0) - , head(NULL) - , tail(&head) - , toDo(NULL) -{ - Ins **work = new Ins * [ni + 1]; - uint nc = ub - lb; - GoTo *goTo = new GoTo[nc]; - Span *span = new Span[nc]; - memset((char*) goTo, 0, nc*sizeof(GoTo)); - findState(work, closure(work, &ins[0]) - work); - - while (toDo) - { - State *s = toDo; - toDo = s->link; - - Ins **cP, **iP, *i; - uint nGoTos = 0; - uint j; - - s->rule = NULL; - - for (iP = s->kernel; (i = *iP); ++iP) - { - if (i->i.tag == CHAR) - { - for (Ins *j = i + 1; j < (Ins*) i->i.link; ++j) - { - if (!(j->c.link = goTo[j->c.value - lb].to)) - goTo[nGoTos++].ch = j->c.value; - - goTo[j->c.value - lb].to = j; - } - } - else if (i->i.tag == TERM) - { - if (!s->rule || ((RuleOp*) i->i.link)->accept < s->rule->accept) - s->rule = (RuleOp*) i->i.link; - } - else if (i->i.tag == CTXT) - { - s->isPreCtxt = true; - } - } - - for (j = 0; j < nGoTos; ++j) - { - GoTo *go = &goTo[goTo[j].ch - lb]; - i = (Ins*) go->to; - - for (cP = work; i; i = (Ins*) i->c.link) - cP = closure(cP, i + i->c.bump); - - go->to = findState(work, cP - work); - } - - s->go.nSpans = 0; - - for (j = 0; j < nc;) - { - State *to = (State*) goTo[rep[j]].to; - - while (++j < nc && goTo[rep[j]].to == to) ; - - span[s->go.nSpans].ub = lb + j; - - span[s->go.nSpans].to = to; - - s->go.nSpans++; - } - - for (j = nGoTos; j-- > 0;) - goTo[goTo[j].ch - lb].to = NULL; - - s->go.span = new Span[s->go.nSpans]; - - memcpy((char*) s->go.span, (char*) span, s->go.nSpans*sizeof(Span)); - - (void) new Match(s); - - } - - delete [] work; - delete [] goTo; - delete [] span; -} - -DFA::~DFA() -{ - State *s; - - while ((s = head)) - { - head = s->next; - delete s; - } -} - -void DFA::addState(State **a, State *s) -{ - s->label = nStates++; - s->next = *a; - *a = s; - - if (a == tail) - tail = &s->next; -} - -State *DFA::findState(Ins **kernel, uint kCount) -{ - Ins **cP, **iP, *i; - State *s; - - kernel[kCount] = NULL; - - cP = kernel; - - for (iP = kernel; (i = *iP); ++iP) - { - if (i->i.tag == CHAR || i->i.tag == TERM || i->i.tag == CTXT) - { - *cP++ = i; - } - else - { - unmark(i); - } - } - - kCount = cP - kernel; - kernel[kCount] = NULL; - - for (s = head; s; s = s->next) - { - if (s->kCount == kCount) - { - for (iP = s->kernel; (i = *iP); ++iP) - if (!isMarked(i)) - goto nextState; - - goto unmarkAll; - } - -nextState: - ; - } - - s = new State; - addState(tail, s); - s->kCount = kCount; - s->kernel = new Ins * [kCount + 1]; - memcpy(s->kernel, kernel, (kCount + 1)*sizeof(Ins*)); - s->link = toDo; - toDo = s; - -unmarkAll: - - for (iP = kernel; (i = *iP); ++iP) - unmark(i); - - return s; -} - -} // end namespace re2c - diff --git a/tools/re2c/dfa.h b/tools/re2c/dfa.h deleted file mode 100644 index fac2fe21c..000000000 --- a/tools/re2c/dfa.h +++ /dev/null @@ -1,366 +0,0 @@ -/* $Id: dfa.h 569 2006-06-05 22:14:00Z helly $ */ -#ifndef _dfa_h -#define _dfa_h - -#include -#include -#include "re.h" - -namespace re2c -{ - -extern void prtCh(std::ostream&, uint, bool useTalx = true); -extern void prtHex(std::ostream&, uint, bool useTalx = true); -extern void prtChOrHex(std::ostream&, uint, bool useTalx = true); -extern void printSpan(std::ostream&, uint, uint); - -class DFA; - -class State; - -class Action -{ - -public: - State *state; - -public: - Action(State*); - virtual ~Action(); - - virtual void emit(std::ostream&, uint, bool&) const = 0; - virtual bool isRule() const; - virtual bool isMatch() const; - virtual bool isInitial() const; - virtual bool readAhead() const; - -#ifdef PEDANTIC -protected: - Action(const Action& oth) - : state(oth.state) - { - } - Action& operator = (const Action& oth) - { - state = oth.state; - return *this; - } -#endif -}; - -class Match: public Action -{ -public: - Match(State*); - void emit(std::ostream&, uint, bool&) const; - bool isMatch() const; -}; - -class Enter: public Action -{ -public: - uint label; - -public: - Enter(State*, uint); - void emit(std::ostream&, uint, bool&) const; -}; - -class Initial: public Enter -{ -public: - bool setMarker; - -public: - Initial(State*, uint, bool); - void emit(std::ostream&, uint, bool&) const; - bool isInitial() const; -}; - -class Save: public Match -{ - -public: - uint selector; - -public: - Save(State*, uint); - void emit(std::ostream&, uint, bool&) const; - bool isMatch() const; -}; - -class Move: public Action -{ - -public: - Move(State*); - void emit(std::ostream&, uint, bool&) const; -}; - -class Accept: public Action -{ - -public: - typedef std::map RuleMap; - - uint nRules; - uint *saves; - State **rules; - RuleMap mapRules; - -public: - Accept(State*, uint, uint*, State**); - void emit(std::ostream&, uint, bool&) const; - void emitBinary(std::ostream &o, uint ind, uint l, uint r, bool &readCh) const; - void genRuleMap(); - -#ifdef PEDANTIC -private: - Accept(const Accept& oth) - : Action(oth) - , nRules(oth.nRules) - , saves(oth.saves) - , rules(oth.rules) - { - } - Accept& operator=(const Accept& oth) - { - new(this) Accept(oth); - return *this; - } -#endif -}; - -class Rule: public Action -{ - -public: - RuleOp *rule; - -public: - Rule(State*, RuleOp*); - void emit(std::ostream&, uint, bool&) const; - bool isRule() const; - -#ifdef PEDANTIC -private: - Rule (const Rule& oth) - : Action(oth) - , rule(oth.rule) - { - } - Rule& operator=(const Rule& oth) - { - new(this) Rule(oth); - return *this; - } -#endif -}; - -class Span -{ - -public: - uint ub; - State *to; - -public: - uint show(std::ostream&, uint) const; -}; - -class Go -{ -public: - Go() - : nSpans(0) - , wSpans(~0u) - , lSpans(~0u) - , dSpans(~0u) - , lTargets(~0u) - , span(NULL) - { - } - -public: - uint nSpans; // number of spans - uint wSpans; // number of spans in wide mode - uint lSpans; // number of low (non wide) spans - uint dSpans; // number of decision spans (decide between g and b mode) - uint lTargets; - Span *span; - -public: - void genGoto( std::ostream&, uint ind, const State *from, const State *next, bool &readCh); - void genBase( std::ostream&, uint ind, const State *from, const State *next, bool &readCh, uint mask) const; - void genLinear(std::ostream&, uint ind, const State *from, const State *next, bool &readCh, uint mask) const; - void genBinary(std::ostream&, uint ind, const State *from, const State *next, bool &readCh, uint mask) const; - void genSwitch(std::ostream&, uint ind, const State *from, const State *next, bool &readCh, uint mask) const; - void genCpGoto(std::ostream&, uint ind, const State *from, const State *next, bool &readCh) const; - void compact(); - void unmap(Go*, const State*); -}; - -class State -{ - -public: - uint label; - RuleOp *rule; - State *next; - State *link; - uint depth; // for finding SCCs - uint kCount; - Ins **kernel; - - bool isPreCtxt; - bool isBase; - Go go; - Action *action; - -public: - State(); - ~State(); - void emit(std::ostream&, uint, bool&) const; - friend std::ostream& operator<<(std::ostream&, const State&); - friend std::ostream& operator<<(std::ostream&, const State*); - -#ifdef PEDANTIC -private: - State(const State& oth) - : label(oth.label) - , rule(oth.rule) - , next(oth.next) - , link(oth.link) - , depth(oth.depth) - , kCount(oth.kCount) - , kernel(oth.kernel) - , isBase(oth.isBase) - , go(oth.go) - , action(oth.action) - { - } - State& operator = (const State& oth) - { - new(this) State(oth); - return *this; - } -#endif -}; - -class DFA -{ - -public: - uint lbChar; - uint ubChar; - uint nStates; - State *head, **tail; - State *toDo; - -public: - DFA(Ins*, uint, uint, uint, Char*); - ~DFA(); - void addState(State**, State*); - State *findState(Ins**, uint); - void split(State*); - - void findSCCs(); - void findBaseState(); - void emit(std::ostream&, uint); - - friend std::ostream& operator<<(std::ostream&, const DFA&); - friend std::ostream& operator<<(std::ostream&, const DFA*); - -#ifdef PEDANTIC - DFA(const DFA& oth) - : lbChar(oth.lbChar) - , ubChar(oth.ubChar) - , nStates(oth.nStates) - , head(oth.head) - , tail(oth.tail) - , toDo(oth.toDo) - { - } - DFA& operator = (const DFA& oth) - { - new(this) DFA(oth); - return *this; - } -#endif -}; - -inline Action::Action(State *s) : state(s) -{ - delete s->action; - s->action = this; -} - -inline Action::~Action() -{ -} - -inline bool Action::isRule() const -{ - return false; -} - -inline bool Action::isMatch() const -{ - return false; -} - -inline bool Action::isInitial() const -{ - return false; -} - -inline bool Action::readAhead() const -{ - return !isMatch() || (state && state->next && state->next->action && !state->next->action->isRule()); -} - -inline Match::Match(State *s) : Action(s) -{ } - -inline bool Match::isMatch() const -{ - return true; -} - -inline Enter::Enter(State *s, uint l) : Action(s), label(l) -{ } - -inline Initial::Initial(State *s, uint l, bool b) : Enter(s, l), setMarker(b) -{ } - -inline bool Initial::isInitial() const -{ - return true; -} - -inline Save::Save(State *s, uint i) : Match(s), selector(i) -{ } - -inline bool Save::isMatch() const -{ - return false; -} - -inline bool Rule::isRule() const -{ - return true; -} - -inline std::ostream& operator<<(std::ostream &o, const State *s) -{ - return o << *s; -} - -inline std::ostream& operator<<(std::ostream &o, const DFA *dfa) -{ - return o << *dfa; -} - -} // end namespace re2c - -#endif diff --git a/tools/re2c/doc/loplas.ps b/tools/re2c/doc/loplas.ps deleted file mode 100644 index 2067b582c..000000000 --- a/tools/re2c/doc/loplas.ps +++ /dev/null @@ -1,5249 +0,0 @@ -%!PS-Adobe-2.0 -%%Creator: dvipsk 5.526a Copyright 1986, 1993 Radical Eye Software -%%Title: paper.dvi -%%Pages: 16 -%%PageOrder: Ascend -%%BoundingBox: 0 0 612 792 -%%EndComments -%DVIPSCommandLine: dvips -o paper.ps paper -%DVIPSParameters: dpi=300, compressed, comments removed -%DVIPSSource: TeX output 1994.04.15:2115 -%%BeginProcSet: texc.pro -/TeXDict 250 dict def TeXDict begin /N{def}def /B{bind def}N /S{exch}N -/X{S N}B /TR{translate}N /isls false N /vsize 11 72 mul N /hsize 8.5 72 -mul N /landplus90{false}def /@rigin{isls{[0 landplus90{1 -1}{-1 1} -ifelse 0 0 0]concat}if 72 Resolution div 72 VResolution div neg scale -isls{landplus90{VResolution 72 div vsize mul 0 exch}{Resolution -72 div -hsize mul 0}ifelse TR}if Resolution VResolution vsize -72 div 1 add mul -TR matrix currentmatrix dup dup 4 get round 4 exch put dup dup 5 get -round 5 exch put setmatrix}N /@landscape{/isls true N}B /@manualfeed{ -statusdict /manualfeed true put}B /@copies{/#copies X}B /FMat[1 0 0 -1 0 -0]N /FBB[0 0 0 0]N /nn 0 N /IE 0 N /ctr 0 N /df-tail{/nn 8 dict N nn -begin /FontType 3 N /FontMatrix fntrx N /FontBBox FBB N string /base X -array /BitMaps X /BuildChar{CharBuilder}N /Encoding IE N end dup{/foo -setfont}2 array copy cvx N load 0 nn put /ctr 0 N[}B /df{/sf 1 N /fntrx -FMat N df-tail}B /dfs{div /sf X /fntrx[sf 0 0 sf neg 0 0]N df-tail}B /E{ -pop nn dup definefont setfont}B /ch-width{ch-data dup length 5 sub get} -B /ch-height{ch-data dup length 4 sub get}B /ch-xoff{128 ch-data dup -length 3 sub get sub}B /ch-yoff{ch-data dup length 2 sub get 127 sub}B -/ch-dx{ch-data dup length 1 sub get}B /ch-image{ch-data dup type -/stringtype ne{ctr get /ctr ctr 1 add N}if}B /id 0 N /rw 0 N /rc 0 N /gp -0 N /cp 0 N /G 0 N /sf 0 N /CharBuilder{save 3 1 roll S dup /base get 2 -index get S /BitMaps get S get /ch-data X pop /ctr 0 N ch-dx 0 ch-xoff -ch-yoff ch-height sub ch-xoff ch-width add ch-yoff setcachedevice -ch-width ch-height true[1 0 0 -1 -.1 ch-xoff sub ch-yoff .1 add]/id -ch-image N /rw ch-width 7 add 8 idiv string N /rc 0 N /gp 0 N /cp 0 N{ -rc 0 ne{rc 1 sub /rc X rw}{G}ifelse}imagemask restore}B /G{{id gp get -/gp gp 1 add N dup 18 mod S 18 idiv pl S get exec}loop}B /adv{cp add /cp -X}B /chg{rw cp id gp 4 index getinterval putinterval dup gp add /gp X -adv}B /nd{/cp 0 N rw exit}B /lsh{rw cp 2 copy get dup 0 eq{pop 1}{dup -255 eq{pop 254}{dup dup add 255 and S 1 and or}ifelse}ifelse put 1 adv} -B /rsh{rw cp 2 copy get dup 0 eq{pop 128}{dup 255 eq{pop 127}{dup 2 idiv -S 128 and or}ifelse}ifelse put 1 adv}B /clr{rw cp 2 index string -putinterval adv}B /set{rw cp fillstr 0 4 index getinterval putinterval -adv}B /fillstr 18 string 0 1 17{2 copy 255 put pop}for N /pl[{adv 1 chg} -{adv 1 chg nd}{1 add chg}{1 add chg nd}{adv lsh}{adv lsh nd}{adv rsh}{ -adv rsh nd}{1 add adv}{/rc X nd}{1 add set}{1 add clr}{adv 2 chg}{adv 2 -chg nd}{pop nd}]dup{bind pop}forall N /D{/cc X dup type /stringtype ne{] -}if nn /base get cc ctr put nn /BitMaps get S ctr S sf 1 ne{dup dup -length 1 sub dup 2 index S get sf div put}if put /ctr ctr 1 add N}B /I{ -cc 1 add D}B /bop{userdict /bop-hook known{bop-hook}if /SI save N @rigin -0 0 moveto /V matrix currentmatrix dup 1 get dup mul exch 0 get dup mul -add .99 lt{/QV}{/RV}ifelse load def pop pop}N /eop{SI restore showpage -userdict /eop-hook known{eop-hook}if}N /@start{userdict /start-hook -known{start-hook}if pop /VResolution X /Resolution X 1000 div /DVImag X -/IE 256 array N 0 1 255{IE S 1 string dup 0 3 index put cvn put}for -65781.76 div /vsize X 65781.76 div /hsize X}N /p{show}N /RMat[1 0 0 -1 0 -0]N /BDot 260 string N /rulex 0 N /ruley 0 N /v{/ruley X /rulex X V}B /V -{}B /RV statusdict begin /product where{pop product dup length 7 ge{0 7 -getinterval dup(Display)eq exch 0 4 getinterval(NeXT)eq or}{pop false} -ifelse}{false}ifelse end{{gsave TR -.1 -.1 TR 1 1 scale rulex ruley -false RMat{BDot}imagemask grestore}}{{gsave TR -.1 -.1 TR rulex ruley -scale 1 1 false RMat{BDot}imagemask grestore}}ifelse B /QV{gsave -transform round exch round exch itransform moveto rulex 0 rlineto 0 -ruley neg rlineto rulex neg 0 rlineto fill grestore}B /a{moveto}B /delta -0 N /tail{dup /delta X 0 rmoveto}B /M{S p delta add tail}B /b{S p tail} -B /c{-4 M}B /d{-3 M}B /e{-2 M}B /f{-1 M}B /g{0 M}B /h{1 M}B /i{2 M}B /j{ -3 M}B /k{4 M}B /w{0 rmoveto}B /l{p -4 w}B /m{p -3 w}B /n{p -2 w}B /o{p --1 w}B /q{p 1 w}B /r{p 2 w}B /s{p 3 w}B /t{p 4 w}B /x{0 S rmoveto}B /y{ -3 2 roll p a}B /bos{/SS save N}B /eos{SS restore}B end -%%EndProcSet -%%BeginProcSet: special.pro -TeXDict begin /SDict 200 dict N SDict begin /@SpecialDefaults{/hs 612 N -/vs 792 N /ho 0 N /vo 0 N /hsc 1 N /vsc 1 N /ang 0 N /CLIP 0 N /rwiSeen -false N /rhiSeen false N /letter{}N /note{}N /a4{}N /legal{}N}B -/@scaleunit 100 N /@hscale{@scaleunit div /hsc X}B /@vscale{@scaleunit -div /vsc X}B /@hsize{/hs X /CLIP 1 N}B /@vsize{/vs X /CLIP 1 N}B /@clip{ -/CLIP 2 N}B /@hoffset{/ho X}B /@voffset{/vo X}B /@angle{/ang X}B /@rwi{ -10 div /rwi X /rwiSeen true N}B /@rhi{10 div /rhi X /rhiSeen true N}B -/@llx{/llx X}B /@lly{/lly X}B /@urx{/urx X}B /@ury{/ury X}B /magscale -true def end /@MacSetUp{userdict /md known{userdict /md get type -/dicttype eq{userdict begin md length 10 add md maxlength ge{/md md dup -length 20 add dict copy def}if end md begin /letter{}N /note{}N /legal{} -N /od{txpose 1 0 mtx defaultmatrix dtransform S atan/pa X newpath -clippath mark{transform{itransform moveto}}{transform{itransform lineto} -}{6 -2 roll transform 6 -2 roll transform 6 -2 roll transform{ -itransform 6 2 roll itransform 6 2 roll itransform 6 2 roll curveto}}{{ -closepath}}pathforall newpath counttomark array astore /gc xdf pop ct 39 -0 put 10 fz 0 fs 2 F/|______Courier fnt invertflag{PaintBlack}if}N -/txpose{pxs pys scale ppr aload pop por{noflips{pop S neg S TR pop 1 -1 -scale}if xflip yflip and{pop S neg S TR 180 rotate 1 -1 scale ppr 3 get -ppr 1 get neg sub neg ppr 2 get ppr 0 get neg sub neg TR}if xflip yflip -not and{pop S neg S TR pop 180 rotate ppr 3 get ppr 1 get neg sub neg 0 -TR}if yflip xflip not and{ppr 1 get neg ppr 0 get neg TR}if}{noflips{TR -pop pop 270 rotate 1 -1 scale}if xflip yflip and{TR pop pop 90 rotate 1 --1 scale ppr 3 get ppr 1 get neg sub neg ppr 2 get ppr 0 get neg sub neg -TR}if xflip yflip not and{TR pop pop 90 rotate ppr 3 get ppr 1 get neg -sub neg 0 TR}if yflip xflip not and{TR pop pop 270 rotate ppr 2 get ppr -0 get neg sub neg 0 S TR}if}ifelse scaleby96{ppr aload pop 4 -1 roll add -2 div 3 1 roll add 2 div 2 copy TR .96 dup scale neg S neg S TR}if}N /cp -{pop pop showpage pm restore}N end}if}if}N /normalscale{Resolution 72 -div VResolution 72 div neg scale magscale{DVImag dup scale}if 0 setgray} -N /psfts{S 65781.76 div N}N /startTexFig{/psf$SavedState save N userdict -maxlength dict begin /magscale false def normalscale currentpoint TR -/psf$ury psfts /psf$urx psfts /psf$lly psfts /psf$llx psfts /psf$y psfts -/psf$x psfts currentpoint /psf$cy X /psf$cx X /psf$sx psf$x psf$urx -psf$llx sub div N /psf$sy psf$y psf$ury psf$lly sub div N psf$sx psf$sy -scale psf$cx psf$sx div psf$llx sub psf$cy psf$sy div psf$ury sub TR -/showpage{}N /erasepage{}N /copypage{}N /p 3 def @MacSetUp}N /doclip{ -psf$llx psf$lly psf$urx psf$ury currentpoint 6 2 roll newpath 4 copy 4 2 -roll moveto 6 -1 roll S lineto S lineto S lineto closepath clip newpath -moveto}N /endTexFig{end psf$SavedState restore}N /@beginspecial{SDict -begin /SpecialSave save N gsave normalscale currentpoint TR -@SpecialDefaults count /ocount X /dcount countdictstack N}N /@setspecial -{CLIP 1 eq{newpath 0 0 moveto hs 0 rlineto 0 vs rlineto hs neg 0 rlineto -closepath clip}if ho vo TR hsc vsc scale ang rotate rwiSeen{rwi urx llx -sub div rhiSeen{rhi ury lly sub div}{dup}ifelse scale llx neg lly neg TR -}{rhiSeen{rhi ury lly sub div dup scale llx neg lly neg TR}if}ifelse -CLIP 2 eq{newpath llx lly moveto urx lly lineto urx ury lineto llx ury -lineto closepath clip}if /showpage{}N /erasepage{}N /copypage{}N newpath -}N /@endspecial{count ocount sub{pop}repeat countdictstack dcount sub{ -end}repeat grestore SpecialSave restore end}N /@defspecial{SDict begin} -N /@fedspecial{end}B /li{lineto}B /rl{rlineto}B /rc{rcurveto}B /np{ -/SaveX currentpoint /SaveY X N 1 setlinecap newpath}N /st{stroke SaveX -SaveY moveto}N /fil{fill SaveX SaveY moveto}N /ellipse{/endangle X -/startangle X /yrad X /xrad X /savematrix matrix currentmatrix N TR xrad -yrad scale 0 0 1 startangle endangle arc savematrix setmatrix}N end -%%EndProcSet -TeXDict begin 40258431 52099146 1000 300 300 (paper.dvi) -@start /Fa 43 122 df<126012F0A212701210A31220A312401280040C7B830D>44 -D<126012F0A2126004047B830D>46 D<1303A3497EA2497E130BA2EB11E0A3EB20F0A249 -7E1478A2497EA33801FFFEEB001E0002131F80A248EB0780A2120C001E14C039FF803FFC -1E1D7E9C22>65 DI<90380FE0209038 -7018603801C00439030003E000061301000E13004814605A15201278127000F01400A800 -70142012781238A26C14407E000614806CEB01003801C00638007018EB0FE01B1E7D9C21 ->II -II<90381FC04090387030C03801C00C38030003000E1301120C001C -13005A15401278127000F01400A6EC7FF8EC07C00070130312781238A27E120C120E0003 -13053801C008390070304090381FC0001D1E7D9C23>I<39FFF0FFF0390F000F00AC90B5 -FCEB000FAD39FFF0FFF01C1C7D9B22>I<380FFF8038007C00133CB3127012F8A21338EA -7078EA4070EA30E0EA0F80111D7D9B18>74 D<39FFF00FF8390F0007C0EC030014025C5C -5C1460148049C7FC13021307497E1317EB23C0EB43E01381EB00F08014788080141F80EC -078015C015E039FFF03FFC1E1C7D9B23>IIII80 D82 -D<3803E080EA0C19EA1005EA3003EA600112E01300A36C13007E127EEA7FE0EA3FFC6CB4 -FC00071380EA007FEB07C0EB03E0130113007EA36C13C0A238E0018038D00300EACE06EA -81F8131E7D9C19>I<007FB512C038700F010060130000401440A200C014201280A30000 -1400B1497E3803FFFC1B1C7D9B21>I<39FFF01FF0390F000380EC0100B3A26C13021380 -00035BEA01C03800E018EB7060EB0F801C1D7D9B22>I<39FFE003FC001FC712F06C1440 -A26C6C1380A27F0003EB0100A23801E002A2EBF00600001304A2EB7808A2EB7C18EB3C10 -A26D5AA2EB1F60EB0F40A26D5AA36DC7FCA21E1D7E9B22>I<3BFFE07FF03FC03B1F000F -800F00ED00066CEC8004A213800007496C5A1413A23A03C033E0101421A23A01E061F020 -1440A2D800F06D5AEC8078A20178EB7C80903879003CA2013D013FC7FC013E7FA2011E13 -1E011C130EA2010C130C010813042A1D7E9B2E>I<13201370A313B8A3EA011CA3EA020E -A2487EA2EA07FF38080380A3381801C0EA380338FE0FF815157F9419>97 -DIIII103 D<38FF8FF8381C01C0A9EA1FFFEA1C01A938FF8FF815157F9419>II<38FF83F8381C01E01480140013025B5B5B1330137013B8EA1D -1C121EEA1C0E7F14801303EB01C014E014F038FF83FC16157F941A>107 -DI<00FEEB0FE0001E1400 -00171317A338138027A23811C047A33810E087A2EB7107A3133AA2131CA2123839FE083F -E01B157F941F>I<38FC03F8381E00E014401217EA138013C01211EA10E01370A2133813 -1CA2130E130714C0130313011300123800FE134015157F9419>III114 -DI<387FFFF03860703000401310A20080 -1308A300001300ADEA07FF15157F9419>I<38FF83F8381C00E01440AE000C13C0000E13 -8038060100EA0386EA00FC15157F9419>I<38FF01F8383C00F0001C1340A2001E13C000 -0E1380A238070100A21383EA0382A2EA01C4A213E4EA00E8A21370A3132015157F9419> -I<38FF87F0381E0380000E1300EA0F026C5AEA038413C8EA01D0EA00F0A21370137813BC -EA011C7FEA020F487EEB0380000C13C0123C38FE07F815157F9419>120 -D<38FF80FE381E0078000E1320000F13407E3803808013C100011300EA00E2A21374137C -1338A848B4FC1715809419>I E /Fb 7 118 df101 D<123E120CA41218A41230A41260 -A412C012C8A312D0126007177D9609>108 D<3830783C38498CC6384E0502EA4C06389C -0E06EA180CA348485A15801418A23960300900140E190E7D8D1D>I111 -D114 -DI117 D E /Fc 1 59 df<126012F0A2126004047D830A>58 -D E /Fd 1 111 df110 D E /Fe 22 121 df45 -D<12E0A303037D820A>I<130813181330A31360A313C0A3EA0180A3EA0300A21206A35A -A35AA35AA35AA35AA20D217E9812>I<1202120E12FEA2120EB0EAFFE0A20B167D9512> -49 D90 D97 D<12E0A8EAE7C0EAFFE0 -EAF870EAE038A2131CA51338A2EAF0F0EAFFE0EAE7800E177E9612>II101 DII<12E0A31200A612E0AF03187E9708>105 -D<12E0B3A503177E9608>108 D<38E7E1F838EFF3FC38F87E1E38F03C0EEAE038AB170F -7E8E1C>IIII114 D<121FEA7FC012E01300A27E127FEA3F80EA0FC0EA -01E0128012C0EAE1C0127FEA1F000B0F7F8E0E>I<1238A4EAFFC0A2EA3800AA1340EA1F -C013000A137F920D>II120 D E /Ff 34 121 df<3801F1C0120312071301120EA7EAFFF1A2EA0E01B012 -1D809C16>13 D45 D<12E0A303037C820C>I<130113031306A3 -130CA31318A31330A31360A213C0A3EA0180A3EA0300A31206A25AA35AA35AA35AA35AA2 -10297E9E15>II<5A1207123FB4FC12C71207B3A3EAFFF8 -A20D1C7C9B15>II -I<133C137C135C13DC1201139C1203A2EA071CA2120EA2121C123C12381278127012F0B5 -12C0A238001C00A7121B7F9A15>II55 D57 D69 -D<387FFFF0A2380001E0130314C013071480EB0F005B131E133E133C5B13F85B12015B48 -5A12075B120F90C7FC121E123E123C127C1278B512F0A2141D7E9C19>90 -D97 D<12E0ABEAE3E0EAEFF0EAFFF8EAF87CEAF01CEAE01E13 -0EA6131C12F0EAF87CEAFFF8EAEFF0EAE3C00F1D7D9C15>II<130EABEA0F -8EEA1FEEEA3FFEEA7C3EEA700EA212E0A612F0EA701EEA7C3EEA3FFEEA1FEEEA0F8E0F1D -7E9C15>II<13FC12011203EA0700120EA7EAFFE0A2EA0E -00B00E1D809C0D>I<3803C3C0EA0FFF5A381C3800487EA56C5AEA1FF85BEA3BC00038C7 -FCA2EA1FFC13FF481380EA700738E001C0A3EAF003387C0F80383FFF006C5AEA07F8121B -7F9115>I<12F0A41200A71270B2041D7E9C0A>105 D<12E0AB133C137813F0EAE1E0EAE3 -C0EAE780EAEF00B4FC138012FBEAF9C0EAF1E012E013F013781338133C131E0F1D7D9C14 ->107 D<12E0B3AB031D7D9C0A>I<38E3F03F39EFF8FF80D8FFFD13C039F81F81E038F00F -00EAE00EAD1B127D9122>II< -EA03F0EA0FFC487EEA3C0F38780780EA700338E001C0A5EAF00300701380EA7807383C0F -00EA1FFE6C5AEA03F012127F9115>II114 DI<121CA6EA -FFE0A2EA1C00AC1320EA1FF0120FEA07C00C187F970F>II<39E03E0380A3D870371300EB7707A213733838E38EA33818E18C -381CC1CC001D13DCA2380D80D8000F13F8A20007137019127F911C>119 -D<3870038038780700EA3C0EEA1C1C120E6C5AEA03F06C5A5B7F487EEA0738EA0E18131C -487E487E3870038000F013C01212809113>I E /Fg 72 126 df<126012F0AA12701200 -A4126012F0A212600414799312>33 DII37 D<1207EA1F80EA19C01239A3EA3B9E133EEA3E38EA1C70A2EA1E -E0123E1267EAE7C0EAE3C413CE12E7EA7EFCEA3C380F147F9312>I<126012F012F81278 -1218A31230A2126012C01280050C799312>II<128012C012601230121812 -1C120C120EA21207A7120EA2120C121C12181230126012C0128008197C9612>I<1207A3 -EAE738EAFFF8EA7FF0EA1FC0A2EA7FF0EAFFF8EAE738EA0700A30D0E7E9012>II<126012F012F8127812181230A212E012C0050979 -8312>II<126012F0A212600404798312>I<13181338A21370A2 -13E0A2EA01C0A3EA0380A2EA0700A2120EA25AA35AA25AA25AA25A0D1A7E9612>II<1206A2120E121E12FE12EE120EACEAFFE0A20B147D9312>III<12E0EAFFFEA2 -EAE0181338EA007013E013C01201EA0380A2EA0700A4120EA512040F157F9412>55 -DII<126012F0A212601200A6126012F0A21260040E798D12>I<126012F0A2126012 -00A6126012F0A212701230A2126012C012800413798D12>I<13381378EA01F0EA03E0EA -0F80EA1F00123C12F85A7E123C121FEA0F80EA03E0EA01F0EA007813380D117E9212>I< -EAFFFEA2127FC7FCA3EA7FFE12FFA20F097F8E12>I<124012E07E127C7EEA0F80EA07C0 -EA01E0EA00F8137813F8EA01E0EA07C0EA0F80EA3E005A12F05A12400D137E9312>I65 DII69 DI73 D75 DII79 -D -I82 DII<38FE3F80A238380E00AE6C5A6C5AEA07F06C5A111480 -9312>I89 DII<12C07E -A21270A27EA27EA37EA27EA2EA0380A2EA01C0A3EA00E0A21370A21338A213180D1A7E96 -12>II97 D<12F8A21238A4EA3BE0 -EA3FF0EA3C38EA381C130C130EA4130C131CEA3C38EA3FF0EA1BC00F147F9312>II< -13F8A21338A4EA07B8EA1FF8EA3878EA7038126012E0A41260EA70781238EA1FFEEA0FBE -0F147F9312>II<137E13FFEA01C7EA03821380A2EA7FFE12FFEA0380AA -EA3FF8A21014809312>II<12F8A212 -38A4EA39E0EA3FF0EA3E38123C1238A8EAFE3EA20F147F9312>I<1206120FA21206C7FC -A3127FA21207AAEAFFF0A20C157D9412>I<12F8A21238A4EA3BFCA2EA38F0EA39E0EA3B -C0EA3F80A213C0EA3DE0123813701338EAFC7EA20F147F9312>107 -DIIIII114 -DI<1206120EA3EA7FF812FFEA0E00A7131CA3EA07F8EA01E00E127F -9112>IIIIIII<13F81201EA0380EA0700A8120E12FCA2120E7EA8EA0380EA01F812000D1A7E96 -12>I<127812FC120E7EA8EA0380EA01F8A2EA0380EA0700A8120E12FC12780D1A7E9612> -125 D E /Fh 37 123 df<90383FE3F83901F03F1C3903C03E3E0007137CEA0F80151C15 -00A5B612C0A2390F807C00AE397FE1FFC0A21F1D809C1C>11 D<127812FCA4127806067D -850D>46 D<1360EA01E0120F12FF12F31203B3A2387FFF80A2111B7D9A18>49 -DI< -EA03F8EA1FFEEA3C1FEB0F80387C07C0127E127C123838000F80A2EB1E005BEA03F8EA00 -1EEB0F80EB07C0A214E01230127812FCA214C038780F80EB1F00EA1FFEEA07F8131B7E9A -18>II66 D<90381FE0209038FFF8E03803F80F3807C003380F -800148C7FC123E1560127E127C00FC1400A8007C1460127E123E15C07E390F8001803907 -C003003803F80E3800FFFCEB1FE01B1C7D9B22>I69 D77 D79 -D82 D<3807F820381FFEE0EA3C07EA -7801EA700012F01460A26C130012FEEAFFE0EA7FFE6C7E1480000F13C06C13E0EA007FEB -03F01301130012C0A214E07E38F001C0EAFC0338EFFF00EA83FC141C7D9B1B>I<007FB5 -12E0A238781F81007013800060146000E0147000C01430A400001400B03807FFFEA21C1C -7E9B21>I97 DIIII<137F3801 -E3803803C7C0EA0787120FEB8380EB8000A5EAFFF8A2EA0F80AEEA7FF8A2121D809C0F> -I<3803F8F0380E0F38121E381C0730003C1380A4001C1300EA1E0FEA0E0EEA1BF80010C7 -FC1218A2EA1FFF14C06C13E04813F0387801F838F00078A300701370007813F0381E03C0 -3807FF00151B7F9118>II<121E123FA4121EC7FCA6B4FCA2121FAEEAFFE0A20B1E7F9D0E>I107 DI<39FF0FC0 -7E903831E18F3A1F40F20780D980FC13C0A2EB00F8AB3AFFE7FF3FF8A225127F9128>I< -38FF0FC0EB31E0381F40F0EB80F8A21300AB38FFE7FFA218127F911B>II<38FF3F80EBE1E0381F80F0EB0078147C143C143EA6143C147C -1478EB80F0EBC1E0EB3F0090C7FCA6EAFFE0A2171A7F911B>I114 DI<1203A45AA25AA2EA3FFC12FFEA1F00A9130CA4EA0F08EA0798EA03F00E1A7F9913>I< -38FF07F8A2EA1F00AC1301120F380786FFEA01F818127F911B>I<39FF8FF8FEA2391F03 -E030A201831370000FEBF0601386D807C613C0EBCEF8EBEC790003EB7D80EBF83D0001EB -3F00A2497E0000131EEBE00EA21F127F9122>119 D<38FFC7FCA2381F8180EA0F833807 -C700EA03EEEA01FC5B1200137C13FEEA01DFEA039F38070F80380607C0380C03E038FF07 -FCA216127F9119>I<38FFC1FCA2381F0060EB80E0000F13C013C03807C180A23803E300 -A2EA01F713F6EA00FE5BA21378A21330A21370EA706012F85BEAF9800073C7FC123E161A -7F9119>I<383FFF80383C1F00EA303F133E485A13FC5BEA01F01203485AEBC180EA0F81 -121F1303003E1300EA7E07EA7C0FB5FC11127F9115>I E /Fi 55 -124 df<1218123CA31204A21208A21210122012401280060C779C0D>39 -D43 D<12181238127812381208A21210A212201240A21280050C7D -830D>II<1230127812F0126005047C830D>I<14031407140E14 -0C141C141814381430147014E014C013011480130314005B1306130E5B13181338133013 -70136013E05B1201485A90C7FC5A1206120E120C121C121812385A126012E05AA218297F -9E15>I<133C13C6EA0183EA030312061480120E120C121C1400485AA4EA700EA4485AA3 -5BA21330485A12E0EA60C0EA7180001EC7FC111D7B9B15>I<13021306130C131C137CEA -039CEA0038A41370A413E0A4EA01C0A4EA0380A41207EAFFF00F1C7C9B15>I<133C13C3 -38010180120214C0EA0441A21208A338108380A238110700EA0E06C65A5B5B13C048C7FC -12061208485A13021220EA4006EA7E0CEAC7F81283EA80E0121D7C9B15>I<133EEBC180 -EA0101380200C05A1340EA0841A3EB8380EA070338000700130EEA01F8EA0038130CA213 -0EA41270485A12805B1330EA4060EA21C0001FC7FC121D7C9B15>III<130FEB308013C0EA01831203 -90C7FC1206120E120C121C13F0EA3B18EA3C0C12381278EA700EA3EA601C12E0A35BA25B -EA60605BEA2180001EC7FC111D7B9B15>I<131E1361EB8180EA0180380300C0A2380601 -80A3380703001386EA03CC13F01201EA0378EA063CEA081EEA180E1230EA6006A3485AA2 -5BEA60105BEA30C0000FC7FC121D7C9B15>56 D<133C13C6EA0183EA03031206120E000C -1380121C1400A2485AA35B130EEA181EA2EA0C6CEA079CEA001C1318133813305BEAE0C0 -A2EA81800086C7FC127C111D7B9B15>I<1206120FA212061200AA1230127812F0126008 -127C910D>I<1418A21438A21478A214B8130114381302143CEB041CA213081318131013 -20A2EB7FFCEB401C1380120113001202A2487F120C001C131EB4EBFFC01A1D7E9C1F>65 -D<903803F02090381E0C6090383002E09038E003C03801C001EA038048C7FC000E148012 -1E121C123C15005AA35AA41404A35C12705C6C5B00185B6C485AD80706C7FCEA01F81B1E -7A9C1E>67 D<48B512E038003C00013813601540A35BA214201500495AA214C013FF3801 -C080A43803810113801402A248485AA2140C5C000E1378B55A1B1C7D9B1C>69 -D<48B512C038003C01EB38001580A35BA214201500495AA214C013FF3801C080A4D80381 -C7FC1380A348C8FCA45AEAFFF01A1C7D9B1B>I<903803F02090381E0C6090383002E090 -38E003C03801C001EA038048C7FC000E1480121E121C123C15005AA35AA2903801FFC090 -38001E00141CA400705BA27E001813786C139038070710D801F8C7FC1B1E7A9C20>I<38 -01FFC038003C001338A45BA45BA4485AA4485AA448C7FCA45AEAFFE0121C7E9B10>73 -D<3801FFE038003C001338A45BA45BA4485AA438038008A31410EA07001430146014E038 -0E03C0B5FC151C7D9B1A>76 DI<3901FC03FE39001C0070013C13 -60012E1340A301471380A3EB43809038838100A2138114C1380101C2A2EB00E2A2000213 -E41474A3481338A3000C1318001C1310EAFF801F1C7D9B1F>II<3801FF -FC38003C079038380380EC01C0A3EB7003A31580EBE0071500140E14383801FFE001C0C7 -FCA3485AA448C8FCA45AEAFFE01A1C7D9B1C>I<3801FFF838003C0EEB3807EC0380A3EB -7007A3EC0F00EBE00E5C1470EBFFC0EA01C014601470A2EA0380A4380700F01540A21580 -48137839FFE07900C7121E1A1D7D9B1E>82 DI<001FB512C0381C070138300E -0000201480126012405B1280A2000014005BA45BA45BA4485AA41203EA7FFE1A1C799B1E ->I<397FF0FF80390F001C00000E13181410A3485BA4485BA4485BA44848C7FCA31302A2 -5BA2EA6008EA3030EA1040EA0F80191D779B1F>I<3901FF81FE39001E00F0011C136001 -1E1380EB0E011500EB0F026D5A5C1490EB03A014C01301A28013021304497EEB10701320 -EB60381340EB803C3801001C12020006131E121E39FF80FFC01F1C7E9B1F>88 -D97 D<123F1207A2120EA45AA4EA39C0EA3E60EA3830A2EA7038A4EAE0 -70A3136013E0EAC0C012C1EA6180EA6300123C0D1D7B9C13>IIIII<13F3EA018FEA030FEA0607EA0E0E120C121CA2EA381CA413381230A2EA1878 -13F0EA0F701200A213E0A2EAC0C012E1EAC300127E101A7D9113>III108 D<393C1E078039266318C0394683A0E0384703C0008E13 -80A2120EA2391C0701C0A3EC0380D8380E1388A2EC0708151039701C032039300C01C01D -127C9122>IIIIIII<13C01201A3EA0380A4EAFFE0EA0700A3120EA45AA4EA3840A313 -80EA1900120E0B1A7D990E>II<381E0183382703871247148338870701A2 -120EA2381C0E02A31404EA180C131C1408001C1310380C26303807C3C018127C911C> -119 DI -I123 D E /Fj 38 123 df<90380FF83F90397FFDFFC03A01F81F -E3E03903E03F87EA07C0D80F801307ED03C06EC7FCA6B612FCA2260F801FC7FCB2397FF0 -FFF0A223237FA221>11 D<1238127C12FE12FFA2127F123B1203A212071206A2120C121C -12181270122008117CA210>39 D<1238127C12FEA3127C123807077C8610>46 -D<13181378EA01F812FFA21201B3A7387FFFE0A213207C9F1C>49 -DI<13FE3807FFC0380F07E0381E03F0123FEB81F8A3EA1F -0314F0120014E0EB07C0EB1F803801FE007F380007C0EB01F014F8EB00FCA2003C13FE12 -7EB4FCA314FCEA7E01007813F8381E07F0380FFFC03801FE0017207E9F1C>I<14E01301 -1303A21307130F131FA21337137713E7EA01C71387EA03071207120E120C121812381270 -12E0B512FEA2380007E0A7EBFFFEA217207E9F1C>I<00101320381E01E0381FFFC01480 -14005B13F8EA1BC00018C7FCA4EA19FCEA1FFF381E0FC0381807E01303000013F0A214F8 -A21238127C12FEA200FC13F0A2387007E0003013C0381C1F80380FFF00EA03F815207D9F -1C>I<1470A214F8A3497EA2497EA3EB06FF80010E7FEB0C3FA201187F141F01387FEB30 -0FA201607F140701E07F90B5FCA239018001FCA200038090C7FCA20006147FA23AFFE00F -FFF8A225227EA12A>65 DIIIIII76 D79 -DI82 D<007FB61280A2397E03F80F0078140700701403006014 -0100E015C0A200C01400A400001500B3A20003B512F8A222227EA127>84 -D87 D97 D99 -DI<13FE3807FF80380F -87C0381E01E0003E13F0EA7C0014F812FCA2B5FCA200FCC7FCA3127CA2127E003E13186C -1330380FC0703803FFC0C6130015167E951A>I<3803FC1E380FFF7F381F0F8F383E07CF -383C03C0007C13E0A5003C13C0EA3E07381F0F80EBFF00EA13FC0030C7FCA21238383FFF -806C13F06C13F84813FCEA380048133E00F0131EA40078133C007C137C383F01F8380FFF -E00001130018217E951C>103 DI<121C123E127FA3123E121CC7FCA7B4FCA2121FB2 -EAFFE0A20B247EA310>I<3AFF07F007F090391FFC1FFC3A1F303E303E01401340496C48 -7EA201001300AE3BFFE0FFE0FFE0A22B167E9530>109 D<38FF07E0EB1FF8381F307CEB -403CEB803EA21300AE39FFE1FFC0A21A167E951F>I<13FE3807FFC0380F83E0381E00F0 -003E13F848137CA300FC137EA7007C137CA26C13F8381F01F0380F83E03807FFC03800FE -0017167E951C>I<38FF0FE0EB3FF8381FF07CEB803E497E1580A2EC0FC0A8EC1F80A290 -38803F00EBC03EEBE0FCEB3FF8EB0FC090C8FCA8EAFFE0A21A207E951F>I114 -DI<487EA412 -03A21207A2120F123FB5FCA2EA0F80ABEB8180A5EB8300EA07C3EA03FEEA00F811207F9F -16>I<38FF01FEA2381F003EAF147E14FE380F81BE3907FF3FC0EA01FC1A167E951F>I<39 -FFE01FE0A2390F800600A2EBC00E0007130CEBE01C00031318A26C6C5AA26C6C5AA2EB7C -C0A2137F6D5AA26DC7FCA2130EA21B167F951E>I<387FFFF0A2387C03E0387007C0EA60 -0F38E01F8000C01300133E137EC65A5B485A00031330EA07E013C0380F8070121F383F00 -60003E13E0EA7C03B5FCA214167E9519>122 D E /Fk 4 122 df15 -D<133C13E0EA01C013801203AD13005A121C12F0121C12077E1380AD120113C0EA00E013 -3C0E297D9E15>102 D<12F0121C12077E1380AD120113C0EA00E0133C13E0EA01C01380 -1203AD13005A121C12F00E297D9E15>I<12021207A61202A3EA7270EAFFF8EA7270EA02 -00A21207B11202A60D267E9C12>121 D E /Fl 12 114 df<38078010EA1FC0383FE020 -EA7FF03860304038C01080128038000900A2130AA3130CA21308A31318A35BA45BA21340 -141B7F9115>13 D<126012F0A2126004047C830C>58 D<126012F0A212701210A41220A2 -12401280040C7C830C>I<140CA2141CA2143C147C145C149C148EEB010EA213021304A2 -1308A213101320EB3FFEEB4007A21380EA0100A21202A21206121E39FF807FF01C1D7F9C -1F>65 D<3801FFE038003C001338A45BA45BA4485AA438038002A31404EA0700140C1418 -1438000E13F0B5FC171C7E9B1C>76 D<3801FFFE39003C03C090383800E015F01570A249 -13F0A3EC01E001E013C0EC0780EC1E00EBFFF03801C03080141CA2EA0380A43807003C15 -08A2151048131E39FFE00E20C7EA07C01D1D7E9B20>82 DI97 D<123F1207A2120EA45AA4EA39E0EA3A30EA3C1812381270131CA3EAE038A31330 -1370136013C01261EA2300121E0E1D7E9C12>II110 D113 -D E /Fm 77 125 df<126012F0AF12601200A4126012F0A212600419779816>33 -DII<13C01201A3EA07F0EA1F -FC48B4FCEA7DCF38F1C78012E1A338F1C300EA79C0127FEA1FF0EA07FCEA01FE13CFEBC7 -80EAF1C3A3EAE1C712F13879DF00EA3FFE6C5AEA07E0EA01C0A2120011207E9C16>I38 -D<1218123C123E121E120EA3121E121C123C127812F01260070D799816>I<13E01201EA -07C013005A121E5A123812781270A312F05AA77E1270A312781238123C7E7E7E13C0EA01 -E012000B217A9C16>I<12E07E127C121C121E7EEA0780120313C01201A313E01200A712 -0113C0A3120313801207EA0F00121E121C127C12F05A0B217C9C16>III<1238127C127EA2123E120E121E -123C127C12F81260070B798416>II<127012F8A3127005057884 -16>IIIIII<137C13FC13DC1201EA039CA2EA071C120F120E121E123C1238 -127812F0B512E0A338001C00A53801FFC0A313197F9816>II<13F8EA03FEEA0FFFEA1F0F123E123CEA78060070C7FC12F0EAF3F8EA -EFFE12FFEAF80F38F00780A2EAE00312F0A21270EA7807EB0F006C5AEA1FFEEA0FF8EA03 -E011197E9816>I<12E0B51280A338E00F00131EEA001C5B137813705BA2485AA3485AA4 -48C7FCA7111A7E9916>III<127012F8A312701200A8127012F8A312700512789116>I<1238 -127CA312381200A81238127CA3123C121C123C123812F812F012600618799116>III<12C012F012FC123EEA0F806C7EEA01F06C7E133EEB1F801307131FEB3E -0013F8485AEA07C0485A003EC7FC12FC12F012C011157E9616>II<13E0487EA213B0A2EA03B8A31318EA071CA5EA0E0EA2EA0FFEA2 -487EEA1C07A3387F1FC000FF13E0007F13C013197F9816>65 D<3801F180EA07FF5AEA1F -0FEA3C0712781303127000F0C7FC5AA77E387003801278A2EA3C07381F0F00EA0FFE6C5A -EA01F011197E9816>67 DI<387FFFC0B5FC7EEA1C -01A490C7FCA2131CA2EA1FFCA3EA1C1CA290C7FC14E0A5EA7FFFB5FC7E13197F9816>I< -B512E0A3EA1C00A41400A2131CA2EA1FFCA3EA1C1CA290C7FCA6B47E7F5B13197F9816> -I<387F1FC038FFBFE0387F1FC0381C0700A7EA1FFFA3EA1C07A9387F1FC038FFBFE0387F -1FC013197F9816>72 DI<387F0FE038FF8F -F0387F0FE0381C0780EB0F00130E5B133C5B5B5BEA1DF0121F7F1338EA1E1C121C7FA27F -A2EB0380387F07E038FF8FF0387F07E01419809816>75 DI<38FC07E0EAFE0FA2383A0B80EA3B1BA513BBEA39B3A413F3 -EA38E3A21303A538FE0FE0A313197F9816>I<387E1FC038FF3FE0387F1FC0381D070013 -87A313C7A2121CA213E7A31367A21377A21337A31317EA7F1FEAFF9FEA7F0F13197F9816 ->III82 -DI<387FFFE0B5FC -A2EAE0E0A400001300AFEA07FC487E6C5A13197F9816>I<387F07F038FF8FF8387F07F0 -381C01C0B0380E0380A23807070013FF6C5AEA00F81519809816>I<387F1F80133F131F -380E1E00131CEA073C1338EA03B813F012015B120012017F120313B81207131CA2EA0E0E -A2487E387F1FC000FF13E0007F13C013197F9816>88 D<38FE0FE0EAFF1FEAFE0F381C07 -00A2EA0E0EA26C5AA3EA03B8A2EA01F0A26C5AA8EA03F8487E6C5A13197F9816>I91 D<12C07EA21270A27EA27EA27EA27EA26C7E -A26C7EA26C7EA21370A27FA27FA27FA27FA2EB0380A2130111207E9C16>II<120C121E123C1278127012F012E0A312F012F81278 -1230070D789B16>96 DI<127E12FE127E120EA4 -133EEBFF80000F13C0EB83E01301380E00F0A21470A414F0000F13E01301EB83C013FF00 -0E1300EA063C1419809816>II<133F5B7F1307A4EA03C7EA0FF7 -EA3FFFEA3C1F487E487EA212E0A412F05BEA781FEA7C3F383FFFE0381FF7F03807C7E014 -197F9816>II<131FEB7F8013FFEA01E7EBC30013C0 -A2EA7FFFB5FCA2EA01C0ACEA3FFE487E6C5A11197F9816>I<3803E3C0380FFFE05A381E -3CC0383C1E00EA380EA3EA3C1E6C5AEA1FFC5BEA3BE00038C7FCA2EA1FFC13FF4813C0EA -780338F001E0EAE000A3EAF001387C07C0383FFF80380FFE00EA03F8131C7F9116>I<12 -7E12FE127E120EA4133C13FF000F138013871303A2120EA9387FC7F038FFE7F8387FC7F0 -1519809816>II108 D<38F9C38038FFEFC0EBFFE0EA3E -7CEA3C78EA3870AA38FE7CF8A2EB3C781512809116>IIIII<38FF0FC0EB3FE0137F3807F040EBE0005B5BA290C7 -FCA7EAFFFCA313127F9116>II<12035AA4EA -7FFFB5FCA20007C7FCA75BEB0380A2130713873803FF005BEA00F811177F9616>I<387E -1F80EAFE3FEA7E1FEA0E03AA1307EA0F0FEBFFF06C13F83803E3F01512809116>I<38FF -1FE013BF131F38380380A413E33819F300A213B3EA1DB7A4EA0F1EA313127F9116>119 -D<387F1FC0133F131F380F1C00EA073CEA03B813F012016C5A12017FEA03B8EA073C131C -EA0E0E387F1FC038FF3FE0387F1FC013127F9116>I<387F1FC038FF9FE0387F1FC0381C -0700120E130EA212075BA2EA039CA21398EA01B8A2EA00F0A35BA3485A1279127BEA7F80 -6CC7FC123C131B7F9116>I<383FFFC05AA238700780EB0F00131EC65A5B485A485AEA07 -8048C7FC381E01C0123C1278B5FCA312127F9116>I<12E0B3AE0320779C16>124 -D E /Fn 10 58 df<121FEA3180EA60C0EA4040EAC060A8EA4040EA60C0EA3180EA1F00 -0B107F8F0F>48 D<120C123C12CC120CACEAFF8009107E8F0F>I<121FEA6180EA40C0EA -806012C01200A213C0EA0180EA030012065AEA10201220EA7FC012FF0B107F8F0F>I<12 -1FEA2180EA60C0A2120013801201EA0F00EA00801340136012C0A2EA8040EA6080EA1F00 -0B107F8F0F>I<1203A25A5A120B121312331223124312C3EAFFE0EA0300A4EA1FE00B10 -7F8F0F>III<1240EA7FE013C0EA8080A2EA010012025AA2 -120C1208A21218A50B117E900F>I<121FEA3180EA60C0A3EA7180EA3F00120FEA3380EA -61C0EAC060A3EA4040EA6080EA1F000B107F8F0F>I<121FEA3180EA60C0EAC0401360A3 -EA40E01221EA1E6012001340EA60C01380EA4300123E0B107F8F0F>I -E /Fo 66 124 df11 D<13FCEA0782EA0E07121C130290C7FCA4B5FCEA1C07AC38FF1FE013 -17809614>I<13FFEA0707120E121CA6B5FCEA1C07AC38FFBFE01317809614>I<12011202 -1204120C1218A21230A212701260A312E0AA1260A312701230A21218A2120C1204120212 -0108227D980E>40 D<12801240122012301218A2120CA2120E1206A31207AA1206A3120E -120CA21218A2123012201240128008227E980E>I<126012F0A212701210A21220A21240 -A2040A7D830A>44 DI<126012F0A2126004047D830A>I<130813 -181330A31360A313C0A3EA0180A3EA0300A21206A35AA35AA35AA35AA35AA20D217E9812 ->I<12035AB4FC1207B1EA7FF00C157E9412>49 DII<1330A2137013F01201 -1370120212041208121812101220124012C0EAFFFEEA0070A5EA03FE0F157F9412>II -I<1240EA7FFE13FC13F8EAC008EA80101320EA00401380A2EA0100A25A12021206A2120E -A512040F167E9512>I57 -D61 D<13101338A3135CA3138EA3EA0107A200 -031380EA0203A23807FFC0EA0401A2380800E0A21218003813F038FE03FE17177F961A> -65 DIIIIII73 D<38FF80FE381C0078146014401480EB0100130613085B13381378139C -EA1D0E121EEA1C07EB0380EB01C0A2EB00E014701478147C38FF80FF18177F961B>75 -DI<00FEEB03F800 -1E14C000171305A338138009A23811C011A33810E021A2EB7041A3EB3881A2EB1D01A213 -0EA2123839FE040FF81D177F9620>I<00FC13FE001E1338001F13101217EA1380EA11C0 -A2EA10E013701338A2131C130E130F1307EB0390EB01D0A2EB00F014701430123800FE13 -1017177F961A>I<13FCEA0303380E01C0381C00E0481370003013300070133800601318 -00E0131CA700701338A200301330003813706C13E0380E01C038030300EA00FC16177E96 -1B>II82 DI<387FFFF83860381800401308A200801304A300001300 -AF3807FFC016177F9619>I<38FF80FE383C0038001C1310A26C1320A26C1340A3380380 -80A213C100011300A2EA00E2A213F61374A21338A3131017177F961A>86 -D<38FF83FC381F01E0380E00807EEB8100EA0382EA01C213E4EA00E81378A21338137C13 -5E138EEA0187EB0780EA0203380601C0000413E0EA0C00001C13F038FF03FE17177F961A ->88 D<12FCA212C0B3AB12FCA206217D980A>91 D<12FCA2120CB3AB12FCA2062180980A ->93 D97 D<12FC121CA813F8EA1F06EA1C031480130114C0A4148013031400 -EA1B0EEA10F81217809614>II<137E130EA8EA07CEEA1C3EEA300E1270126012E0A412 -601270EA301EEA182E3807CFC012177F9614>IIII<12FC121CA8137CEA1D8EEA1E07121CAA38FF9FE01317809614>I<121812 -3CA212181200A5127C121CAC12FF081780960A>I<1203EA0780A2EA0300C7FCA5EA1F80 -1203AF1243EAE30012E7127C091D82960B>I<12FC121CA8EB3F80EB1C00131813205B13 -C0EA1FE0EA1CF0137013787F7FA238FF3FC01217809613>I<12FC121CB3A3EAFF800917 -80960A>I<38FC7C1F391D8E6380391E0781C0001C1301AA39FF9FE7F81D0E808D1E>IIIII< -EAFCF0EA1D38121EEA1C101300A9EAFF800D0E808D0E>II<1208A31218 -A21238EAFF80EA3800A71340A4EA1C80EA0F000A147F930E>III<38FCFE7C383838381410381C3C20A2134C380E4E40A2138638 -078780A2130300031300A2160E7F8D19>IIIII E /Fp 1 4 df<120CA2EACCC012EDEA -7F80EA0C00EA7F80EAEDC012CCEA0C00A20A0B7D8B10>3 D E /Fq -10 58 df<120FEA30C0EA6060A2EA4020EAC030A9EA4020EA6060A2EA30C0EA0F000C13 -7E9211>48 D<120C121C12EC120CAFEAFFC00A137D9211>I<121FEA60C01360EAF07013 -301260EA0070A2136013C012011380EA02005AEA08101210EA2020EA7FE012FF0C137E92 -11>II<136013E0A2EA016012021206120C1208121012 -20126012C0EAFFFCEA0060A5EA03FC0E137F9211>III<1240EA7FFC13F8EA401012801320EA00401380EA0100 -A25A12021206A2120EA512040E147E9311>II< -120FEA3080EA6040EA4060EAC0201330A31240EA6070EA30B0EA0F30120013201360EAE0 -401380EA4100123E0C137E9211>I E /Fr 76 124 df11 -D<137E3801C180EA0301380703C0120EEB018090C7FCA5B512C0EA0E01B0387F87F8151D -809C17>II< -90383F07E03901C09C18380380F0D80701133C000E13E00100131892C7FCA5B612FC390E -00E01CB03A7FC7FCFF80211D809C23>I34 D<126012F012F812681208A31210A212201240 -1280050C7C9C0C>39 D<13401380EA0100120212065AA25AA25AA212701260A312E0AC12 -60A312701230A27EA27EA27E12027EEA008013400A2A7D9E10>I<7E12407E7E12187EA2 -7EA27EA213801201A313C0AC1380A312031300A21206A25AA25A12105A5A5A0A2A7E9E10 ->I<1306ADB612E0A2D80006C7FCAD1B1C7E9720>43 D<126012F0A212701210A41220A2 -12401280040C7C830C>II<126012F0A2126004047C830C>I48 D<12035A123F12C71207B3A4EA0F80EAFFF80D1C7C9B -15>III<13 -0CA2131C133CA2135C13DC139CEA011C120312021204120C1208121012301220124012C0 -B512C038001C00A73801FFC0121C7F9B15>II<13F0EA030CEA0604EA0C0EEA181E1230130CEA7000A21260EAE3 -E0EAE430EAE818EAF00C130EEAE0061307A51260A2EA7006EA300E130CEA1818EA0C30EA -03E0101D7E9B15>I<1240387FFF801400A2EA4002485AA25B485AA25B1360134013C0A2 -12015BA21203A41207A66CC7FC111D7E9B15>III<126012F0A212601200AA126012F0 -A2126004127C910C>I<126012F0A212601200AA126012F0A212701210A41220A2124012 -80041A7C910C>I<1306A3130FA3EB1780A3EB23C0A3EB41E0A3EB80F0A200017FEB0078 -EBFFF83803007C0002133CA20006133E0004131EA2000C131F121E39FF80FFF01C1D7F9C -1F>65 DI<90381F8080EBE061380180 -1938070007000E13035A14015A00781300A2127000F01400A8007014801278A212386CEB -0100A26C13026C5B380180083800E030EB1FC0191E7E9C1E>IIII<90381F -8080EBE0613801801938070007000E13035A14015A00781300A2127000F01400A6ECFFF0 -EC0F80007013071278A212387EA27E6C130B380180113800E06090381F80001C1E7E9C21 ->I<39FFF3FFC0390F003C00ACEBFFFCEB003CAD39FFF3FFC01A1C7E9B1F>III76 DII< -EB3F80EBE0E03803803848487E000E7F487F003C148000381303007814C0A20070130100 -F014E0A8007014C000781303A200381480003C1307001C14006C130E6C5B6C6C5A3800E0 -E0EB3F801B1E7E9C20>II82 D<3807E080EA1C19EA3005EA7003EA600112E01300A36C13007E127CEA7FC0EA -3FF8EA1FFEEA07FFC61380130FEB07C0130313011280A300C01380A238E00300EAD002EA -CC0CEA83F8121E7E9C17>I<007FB512C038700F010060130000401440A200C014201280 -A300001400B1497E3803FFFC1B1C7F9B1E>I<39FFF07FC0390F000E001404B3A26C5B13 -8000035B12016C6C5AEB70C0011FC7FC1A1D7E9B1F>I<39FFE00FF0391F0003C06CEB01 -8015006D5A00071302A26C6C5AA36C6C5AA213F000005BA2EBF830EB7820A26D5AA36D5A -A2131F6DC7FCA21306A31C1D7F9B1F>I<3AFFE0FFE0FF3A1F001F003C001E011E13186C -011F1310A3D807801420EC2780A2D803C01440EC43C0A213E00001903881E080A33A00F1 -00F100A3017913FA017A137AA2013E137C013C133CA301181318A3281D7F9B2B>I<397F -F0FFC0390FC03E0038078018EA03C0EBE01000015BEBF06000001340EB7880137D013DC7 -FC7F131F7F80A2EB13C0EB23E01321EB41F0EBC0F8EB80783801007C48133C00027F0006 -131F001FEB3F8039FFC0FFF01C1C7F9B1F>I<12FEA212C0B3B312FEA207297C9E0C>91 -D -I<12FEA21206B3B312FEA20729809E0C>I97 -D<12FC121CAA137CEA1D86EA1E03381C018014C0130014E0A614C013011480381E0300EA -1906EA10F8131D7F9C17>II<133F1307AAEA03E7EA0C17EA180F487E12 -70126012E0A61260127012306C5AEA0C373807C7E0131D7E9C17>II<13F8EA018CEA071E1206EA0E0C1300A6EAFFE0EA0E00B0EA7FE00F1D809C -0D>II<12FC121CAA137C1387EA1D03001E1380121CAD38FF9FF0141D -7F9C17>I<1218123CA21218C7FCA712FC121CB0EAFF80091D7F9C0C>I<13C0EA01E0A2EA -00C01300A7EA0FE01200B3A21260EAF0C012F1EA6180EA3E000B25839C0D>I<12FC121C -AAEB3FC0EB0F00130C13085B5B5B13E0121DEA1E70EA1C781338133C131C7F130F148038 -FF9FE0131D7F9C16>I<12FC121CB3A9EAFF80091D7F9C0C>I<39FC7E07E0391C83883839 -1D019018001EEBE01C001C13C0AD3AFF8FF8FF8021127F9124>IIIIIII<1204A4120CA212 -1C123CEAFFE0EA1C00A91310A5120CEA0E20EA03C00C1A7F9910>I<38FC1F80EA1C03AD -1307120CEA0E1B3803E3F014127F9117>I<38FF07E0383C0380381C0100A2EA0E02A26C -5AA3EA0388A213D8EA01D0A2EA00E0A3134013127F9116>I<39FF3FCFE0393C0F038038 -1C07011500130B000E1382A21311000713C4A213203803A0E8A2EBC06800011370A2EB80 -30000013201B127F911E>I<387F8FF0380F03801400EA0702EA0384EA01C813D8EA00F0 -1370137813F8139CEA010E1202EA060738040380381E07C038FF0FF81512809116>I<38 -FF07E0383C0380381C0100A2EA0E02A26C5AA3EA0388A213D8EA01D0A2EA00E0A31340A2 -5BA212F000F1C7FC12F31266123C131A7F9116>I -II E /Fs 40 122 df<49B4FC011F13C090387F81E0EBFC013901 -F807F01203EA07F0A4EC01C091C8FCA3EC3FF8B6FCA33807F003B3A33A7FFF3FFF80A321 -2A7FA925>12 D<130E131E137EEA07FE12FFA212F81200B3AB387FFFFEA317277BA622> -49 DII<140E141E143E147E14 -FEA213011303EB077E130EA2131C1338137013E0A2EA01C0EA0380EA0700120EA25A5A5A -5AB612F8A3C7EAFE00A890387FFFF8A31D277EA622>I<00181303381F801FEBFFFE5C5C -5C14C091C7FC001CC8FCA7EB7FC0381DFFF8381F80FC381E003F1208C7EA1F8015C0A215 -E0A21218127C12FEA315C05A0078EB3F80A26CEB7F00381F01FE6CB45A000313F0C61380 -1B277DA622>II<1238123E003FB512F0A34814E015C0158015003870000EA25C485B5C5CEA -00015C495A130791C7FC5B5B131E133EA2137E137CA213FCA41201A76C5A13701C297CA8 -22>I65 D<91387FE003903907FFFC07 -011FEBFF0F90397FF00F9F9039FF0001FFD801FC7F4848147F4848143F4848141F485A16 -0F485A1607127FA290C9FC5AA97E7F1607123FA26C7E160E6C7E6C6C141C6C6C143C6C6C -14786CB4EB01F090397FF007C0011FB512800107EBFE009038007FF028297CA831>67 -DIII<91387FE003903907FFFC07011FEBFF0F90397FF00F9F9039FF0001FF -D801FC7F484880484880484880485A82485A82127FA290CAFC5AA892B512F87E7F030013 -00123FA26C7EA26C7E6C7E6C7E6C7E6CB45B90387FF007011FB5129F0107EBFE0F903900 -7FF0032D297CA835>I73 -D82 D<01FF13C0000313E1000F13F9381F80FF383F003F -003E130F481307A200FC1303A214017EA26C90C7FC13C0EA7FFCEBFFE06C13F86C13FE80 -000714806C14C0C6FC010F13E0EB007FEC1FF0140F140700E01303A46C14E0A26C13076C -14C0B4EB0F80EBE01F00E3B5120000E113FC38C01FF01C297CA825>I87 -D<3803FF80000F13F0381F01FC383F80FE147F801580EA1F00C7FCA4EB3FFF3801FC3FEA -0FE0EA1F80EA3F00127E5AA4145F007E13DF393F839FFC381FFE0F3803F8031E1B7E9A21 ->97 DIIIII<90387F80F03901FFE3 -F83907C0FE1C390F807C7C381F003E151048EB3F00A66C133EA26C6C5A6C6C5A3805FFE0 -380C7F8048C8FC121CA2121E381FFFF814FF6C14C06C14E06C14F0120F383E000748EB01 -F8481300A4007CEB01F0A2003FEB07E0390FC01F806CB5120038007FF01E287E9A22>I< -EAFFE0A3120FAC147E9038E1FF809038E30FC001E413E0EBE80701F813F013F0A213E0B0 -39FFFE3FFFA3202A7DA925>I<1207EA0F80EA1FC0EA3FE0A3EA1FC0EA0F80EA0700C7FC -A7EAFFE0A3120FB3A3EAFFFEA30F2B7EAA12>I107 DI<26FFC07FEB1FC0903AC1FFC07FF0903AC307E0C1F8D80FC49038F101FC9039 -C803F20001D801FE7F01D05BA201E05BB03CFFFE3FFF8FFFE0A3331B7D9A38>I<38FFC0 -7E9038C1FF809038C30FC0D80FC413E0EBC80701D813F013D0A213E0B039FFFE3FFFA320 -1B7D9A25>II<38FFE1FE9038E7FF809038FE0FE0390FF803F09038F001F8 -01E013FC140015FEA2157FA8157E15FEA215FC140101F013F89038F803F09038FC0FE090 -38EFFF809038E1FC0001E0C7FCA9EAFFFEA320277E9A25>I<38FFC3E0EBC7F8EBCC7C38 -0FD8FE13D0A213F0EBE07C1400B0B5FCA3171B7E9A1B>114 D<3803FE30380FFFF0EA3E -03EA7800127000F01370A27E00FE1300EAFFE06CB4FC14C06C13E06C13F0000713F8C6FC -EB07FC130000E0137C143C7E14387E6C137038FF01E038E7FFC000C11300161B7E9A1B> -I<1370A413F0A312011203A21207381FFFE0B5FCA23807F000AD1470A7000313E03801F8 -C0EA00FFEB3F0014267FA51A>I<39FFE07FF0A3000F1307B2140FA2000713173903F067 -FF3801FFC738007F87201B7D9A25>I<3BFFFC7FFC1FFCA33B0FE00FE001C02607F007EB -0380A201F8EBF0070003160015F82601FC0F130EA29039FE1FFC1E0000011C131C15FE90 -39FF387E3C017F1438EC787F6D486C5AA29138E01FF0011F5CA26D486C5AA36D486C5AA2 -2E1B7F9A31>119 D<39FFFC1FFEA33907F00780D803F813006C6C5AEBFE1E00005BEB7F -78EB3FF85C6D5A130F6D7E80130F497EEB3DFEEB38FFEB787F9038F03F80D801E013C039 -03C01FE0EB800F39FFF03FFFA3201B7F9A23>I<39FFFE07FFA33907F000E0A2EBF80100 -0314C0A23901FC0380A2EBFE07000014006D5AEB7F0EA2EB3F9CA214FC6D5AA26D5AA213 -075CA26D5AA25CA21307003890C7FC127CEAFE0EA25B5BEA7C70EA3FE0EA0F8020277F9A -23>I E /Ft 14 123 df97 D99 D -II<13F338038B8038060700120E120C121CEA380EA4EA301CA3EA183C5BEA -07B8EA0038A25B1260EAE0E0EAC1C0007FC7FC11177E8F12>103 -D<1203120712061200A61238124C124E128E129CA2121C1238A212701272A212E212E412 -64123808197C980C>105 D<121F1207A3120EA4121CA41238A41270A412E4A412E81230 -081A7D990A>108 D110 DII115 D<1206120EA45AA2EAFFC0EA1C005AA45AA412E1A312 -E212E412380A177C960D>II122 -D E /Fu 54 124 df<13FCEA0182EA0703EA0607EA0E0290C7FCA5B5FCEA0E07AE387F0F -E0131A809915>12 D<90387E1F803901C17040390703C0600006EB80E0000E14401500A5 -B612E0380E0380AE397F8FE3FC1E1A809920>14 D<1380EA010012025A120C120812185A -A35AA412E0AA1260A47EA37E1208120C12047E7EEA008009267D9B0F>40 -D<7E12407E7E12181208120C7EA37EA41380AA1300A41206A35A1208121812105A5A5A09 -267E9B0F>I<126012F0A212701210A31220A212401280040B7D830B>44 -DI<126012F0A2126004047D830B>I50 DI<1330A2137013F0A2EA0170120312021204120C12081210123012 -20124012C0B5FCEA0070A6EA07FF10187F9713>I<1240EA7FFE13FCA2EA4008EA8010A2 -1320EA0040A213801201A213005AA45AA612020F197E9813>55 D<126012F0A212601200 -A8126012F0A2126004107D8F0B>58 D<126012F0A212601200A8126012F0A212701210A3 -1220A21240128004177D8F0B>I<130CA3131EA2133F1327A2EB4380A3EB81C0A348C67E -A213FF38020070A20006137800041338A2487FA2001C131EB4EBFFC01A1A7F991D>65 -D67 -DII71 -D<38FFE7FF380E0070AB380FFFF0380E0070AC38FFE7FF181A7E991D>II<39FFE07F80390E001E00141814105C5C5C49C7FC13025B5B -131C132E134E1387380F0380120E6D7E6D7EA21470A28080143E39FFE0FF80191A7E991E ->75 DI80 D82 DI<007FB5FC38701C0700401301A200C01480008013 -00A300001400B13803FFE0191A7F991C>I<3AFF83FF0FF03A3C007801C0001CEC0080A2 -6CEC0100A2149C6C1402A2EB010EA2D803815BEB8207A2D801C25B9038C40388A2D800E4 -1390A29038E801D0017813E0A2EB7000A201305B01201340241A7F9927>87 -D<12FEA212C0B3AF12FEA207257D9B0B>91 D<12FEA21206B3AF12FEA20725809B0B>93 -D97 D<12FC121CA913F8EA1F0EEA1E07381C0380130114C0A6EB038014 -00EA1E07EA1B0CEA10F0121A7F9915>II<137E130EA9EA03CEEA0C3EEA380E12301270 -12E0A612601270EA381EEA1C2E3807CFC0121A7F9915>IIII<12FC121CA913F8EA1D0CEA1E0EA2121CAB38FF9F -C0121A7F9915>I<1218123CA21218C7FCA612FC121CAEEAFF80091A80990A>II<12FC121C -A9EB3F80EB1E00131813105B5BEA1DC0EA1FE0121C1370137813387F131E131F38FF3FC0 -121A7F9914>I<12FC121CB3A6EAFF80091A80990A>I<38FC7C1F391D8E6380391E0781C0 -A2001C1301AB39FF9FE7F81D107F8F20>IIII114 DI<1204A3120CA2121C123CEAFFC0EA -1C00A81320A5EA0E40EA03800B177F960F>II<38FF1F80383C0600EA1C04A2EA1E0CEA0E08A26C5AA21390EA03A0A2EA01 -C0A36C5A11107F8F14>I<39FF3F9F80393C0E070000381306381C16041317001E130C38 -0E23081488000F13983807419014D03803C1E01380A200015BEB004019107F8F1C>I<38 -FF3F80383C1C00EA1C18EA0E106C5A13606C5A12017F1203EA0270487E1208EA181CEA38 -1E38FC3FC012107F8F14>I<38FF1F80383C0600EA1C04A2EA1E0CEA0E08A26C5AA21390 -EA03A0A2EA01C0A36C5AA248C7FCA212E112E212E4127811177F8F14>I123 D E /Fv 16 118 df<1303497EA3497EA2EB1BE0A3EB31F0A2EB60F8A2EBE0 -FCEBC07CA248487EEBFFFE487FEB001FA20006EB0F80A2000E14C039FFC07FFCA21E1A7F -9921>65 D76 -D80 D97 D<12FEA2121EA7137F381FC3C0EB00E0001E13F01478 -A2147CA51478A214F0001F13E0381D83C038187F00161A7F9919>II< -EA03F0EA0E1C487EEA3C071278148012F8A2B5FC00F8C7FCA21278A2383C0180381C0300 -EA0E06EA03FC11117F9014>101 D<3807E3C0381C3CE0EA381C38781EC01400A4EA381C -6C5AEA27E00020C7FCA21230EA3FFE381FFF8014C0EA700338E000E0A4387001C0383C07 -803807FC0013197F9016>103 D<123C127EA4123CC7FCA4127EA2121EADEAFF80A2091B -7F9A0D>105 D<39FE1F01F0903863C63C391E81C81C391F01F01EA2001E13E0AA3AFFCF -FCFFC0A222117F9025>109 DII114 DI<1206A4120EA2121EEA3FF0 -12FFEA1E00A81318A5EA0F30EA03E00D187F9711>I<38FE0FE0A2EA1E01AB1303A2380F -05FCEA03F916117F9019>I E /Fw 1 4 df<1202A3EAC218EAF278EA3AE0EA0F80A2EA3A -E0EAF278EAC218EA0200A30D0E7E8E12>3 D E /Fx 34 122 df<127012F812FCA21274 -1204A41208A21210A212201240060F7C840E>44 D<127012F8A3127005057C840E>46 -D<13801203120F12F31203B3A9EA07C0EAFFFE0F217CA018>49 D<13021306130EA2131E -A2132E134EA2138EA2EA010E1202A21204A212081210A21220A212401280B512F838000E -00A7131F3801FFF015217FA018>52 D<00101380381E0700EA1FFF5B13F8EA13E00010C7 -FCA613F8EA130EEA1407381803801210380001C0A214E0A4127012F0A200E013C01280EA -4003148038200700EA1006EA0C1CEA03F013227EA018>I57 -D<497EA3497EA3EB05E0A2EB0DF01308A2497E1478A2497EA3497EA3497EA290B5FC3901 -000780A24814C000021303A24814E01401A2000CEB00F0A2003EEB01F839FF800FFF2023 -7EA225>65 DI<903807E0109038381830EBE0063901C00170390380 -00F048C7FC000E1470121E001C1430123CA2007C14101278A200F81400A812781510127C -123CA2001C1420121E000E14407E6C6C13803901C001003800E002EB381CEB07E01C247D -A223>II<9038 -07F00890383C0C18EBE0023901C001B839038000F848C71278481438121E15185AA2007C -14081278A200F81400A7EC1FFF0078EB00F81578127C123CA27EA27E7E6C6C13B86C7E39 -00E0031890383C0C08903807F00020247DA226>71 D80 D<3803F020380C0C60EA1802383001E0EA70000060136012E0A21420A36C1300A212 -78127FEA3FF0EA1FFE6C7E0003138038003FC0EB07E01301EB00F0A214707EA46C1360A2 -6C13C07E38C8018038C60700EA81FC14247DA21B>83 D<39FFFC07FF390FC000F86C4813 -701520B3A5000314407FA2000114806C7E9038600100EB3006EB1C08EB03F020237EA125 ->85 D<3BFFF03FFC03FE3B1F8007E000F86C486C4813701720A26C6C6C6C1340A32703C0 -02F01380A33B01E004780100A33A00F0083C02A39039F8183E06903978101E04A2137C90 -393C200F08A390391E400790A390390F8003E0A36D486C5AA36D5C010213002F237FA132 ->87 D97 D<120E12FE121E120EAB131FEB -61C0EB8060380F0030000E1338143C141C141EA7141C143C1438000F1370380C8060EB41 -C038083F0017237FA21B>II<14E0130F130113 -00ABEA01F8EA0704EA0C02EA1C01EA38001278127012F0A7127012781238EA1801EA0C02 -38070CF03801F0FE17237EA21B>II<133C13C6EA018F1203130FEA0700A9EAFFF8EA0700B21380EA7FF8102380A20F>I< -121C123EA3121CC7FCA8120E12FE121E120EB1EAFFC00A227FA10E>105 -D<120E12FE121E120EB3ADEAFFE00B237FA20E>108 D<390E1FC07F3AFE60E183803A1E -807201C03A0F003C00E0A2000E1338AF3AFFE3FF8FFE27157F942A>I<380E1F8038FE60 -C0381E80E0380F0070A2120EAF38FFE7FF18157F941B>III< -EA0E3CEAFE46EA1E8FEA0F0F13061300120EAD120FEAFFF010157F9413>114 -DI<1202A41206A3120E121E12 -3EEAFFF8EA0E00AB1304A6EA07081203EA01F00E1F7F9E13>I<000E137038FE07F0EA1E -00000E1370AD14F0A238060170380382783800FC7F18157F941B>I<38FFC1FE381E0078 -000E13301420A26C1340A238038080A33801C100A2EA00E2A31374A21338A3131017157F -941A>I<39FF8FF8FF391E01E03C001CEBC018120EECE010A239070260201470A2390384 -30401438A23901C81880141CA23900F00D00140FA2EB6006A320157F9423>I<38FFC1FE -381E0078000E13301420A26C1340A238038080A33801C100A2EA00E2A31374A21338A313 -10A25BA35B12F05B12F10043C7FC123C171F7F941A>121 D E /Fy -20 124 df50 D<1403A34A7EA24A7EA3EC17E01413A2EC23F01421A2EC40F8A3EC807CA2 -0101137EEC003EA20102133F81A2496D7EA3496D7EA2011880011FB5FCA29039200003F0 -1501A249801500A249147CA348C87EA248153F825AD81F80EC3F80D8FFE0903803FFFCA2 -2E327EB132>65 D<91383FE001903901FFF803903807F01E90391F800307013EC7128701 -78144F49142F4848141F4848140F485A000F150790C8FC481503121E123E003C1501127C -A30078150012F8AB1278127C1601A2123C123E121E001F15027E6D1406000715046C6C14 -086C7E6C6C141001781420013E14C090391F800380903907F00F00903801FFFC9038003F -E028337CB130>67 D69 D71 D77 D82 D<90387F80203801FFE03907C07860380F001C001EEB -06E048130300381301007813001270156012F0A21520A37E1500127C127E7E13C0EA1FF8 -6CB47E6C13F86C7FC613FF010F1380010013C0EC1FE01407EC03F01401140015F8A26C14 -78A57E15706C14F015E07E6CEB01C000ECEB038000C7EB070038C1F01E38807FFCEB0FF0 -1D337CB125>I86 D<13FE380303C0380C00E00010137080003C133C003E13 -1C141EA21208C7FCA3EB0FFEEBFC1EEA03E0EA0F80EA1F00123E123C127C481404A3143E -A21278007C135E6CEB8F08390F0307F03903FC03E01E1F7D9E21>97 -D99 -D101 D<120FEA1F80A4EA0F00C7FCABEA078012FFA2120F1207B3A6 -EA0FC0EAFFF8A20D307EAF12>105 D108 D<380780FE39FF83078090388C03C0390F9001E0EA07A06E7E13C0A2 -5BB3A2486C487E3AFFFC1FFF80A2211F7E9E25>110 D -I<380783E038FF8C18EB907C120FEA07A0EBC0381400A35BB3487EEAFFFEA2161F7E9E19 ->114 D<3801FC10380E0330381800F048137048133012E01410A37E6C1300127EEA3FF0 -6CB4FC6C13C0000313E038003FF0EB01F813006C133CA2141C7EA27E14186C1338143000 -CC136038C301C03880FE00161F7E9E1A>I<1340A513C0A31201A212031207120F381FFF -E0B5FC3803C000B01410A80001132013E000001340EB78C0EB1F00142C7FAB19>I123 D E end -%%EndProlog -%%BeginSetup -%%Feature: *Resolution 300dpi -TeXDict begin - -%%EndSetup -%%Page: 1 1 -1 0 bop 284 369 a Fy(RE2C)21 b({)h(A)f(More)h(V)-6 b(ersatile)23 -b(Scanner)e(Generator)472 490 y Fx(P)o(eter)15 b(Bum)o(bulis)209 -b(Donald)17 b(D.)f(Co)o(w)o(an)272 548 y(Computer)g(Science)e -(Departmen)o(t)h(and)i(Computer)e(Systems)g(Group)692 -606 y(Univ)o(ersit)o(y)e(of)k(W)l(aterlo)q(o)1175 588 -y Fw(\003)782 704 y Fx(April)e(15,)i(1994)849 895 y Fv(Abstract)120 -960 y Fu(It)f(is)g(usually)j(claimed)f(that)e(lexical)j(analysis)f -(routines)g(are)e(still)i(co)q(ded)f(b)o(y)f(hand,)i(despite)g(the)e -(widespread)62 1005 y(a)o(v)n(ailabil)q(i)q(t)o(y)h(of)d(scanner)i -(generators,)f(for)f(e\016ciency)i(reasons.)23 b(While)16 -b(e\016ciency)g(is)f(a)f(consideration)q(,)j(there)e(exist)62 -1051 y(freely)k(a)o(v)n(ailable)j(scanner)d(generators)g(suc)o(h)g(as)g -(GLA)f([7])g(that)g(can)h(generate)g(scanners)h(that)e(are)h(faster)f -(than)62 1097 y(most)g(hand-co)q(ded)h(ones.)30 b(Ho)o(w)o(ev)o(er,)18 -b(most)g(generated)g(scanners)g(are)g(tailored)h(for)e(a)g(particular)j -(en)o(vironmen)o(t,)62 1142 y(and)c(retargetting)h(these)e(scanners)i -(to)e(other)h(en)o(vironmen)o(ts,)h(if)f(p)q(ossible,)h(is)f(usually)i -(complex)f(enough)f(to)f(mak)o(e)62 1188 y(a)h(hand-co)q(ded)h(scanner) -f(more)g(app)q(ealing.)26 b(In)16 b(this)g(pap)q(er)g(w)o(e)f(describ)q -(e)i(RE2C,)e(a)g(scanner)i(generator)f(that)f(not)62 -1234 y(only)e(generates)f(scanners)h(whic)o(h)f(are)g(faster)f(\(and)h -(usually)i(smaller\))f(than)f(those)g(pro)q(duced)h(b)o(y)f(an)o(y)g -(other)g(scanner)62 1279 y(generator)i(kno)o(wn)g(to)f(the)g(authors,)g -(includin)q(g)j(GLA,)c(but)i(also)g(adapt)f(easily)i(to)e(an)o(y)h(en)o -(vironmen)o(t.)62 1371 y(Categories)19 b(and)f(Sub)r(ject)g -(Descriptors:)27 b(D.3.2)17 b([)p Fv(Programming)j(Languages)p -Fu(]:)25 b(Language)19 b(Classi\014cations)i({)62 1416 -y Ft(sp)n(e)n(cialize)n(d)11 b(applic)n(atio)o(n)g(languages)o -Fu(;)f(D.3.4)j([)p Fv(Programming)h(Languages)p Fu(]:)j(Pro)q(cessors) -62 1508 y(General)e(T)m(erms:)h(Algorithms,)e(Languages,)h(P)o -(erformance)62 1599 y(Additional)h(Key)d(W)m(ords)h(and)g(Phrases:)j -(Lexical)e(analysis,)g(scanner)f(generator)-42 1736 y -Fs(1)67 b(In)n(tro)r(duction)-42 1827 y Fr(Lexical)14 -b(analysis)g(routines)h(are)g(still)f(often)g(co)q(ded)i(b)o(y)e(hand)g -(despite)i(the)f(widespread)g(a)o(v)n(ailabilit)o(y)c(of)j(scanner)i -(gener-)-42 1877 y(ators.)k(F)m(or)14 b(example,)e(while)i(most)f(Unix) -h(systems)g(ha)o(v)o(e)g(a)g(scanner)i(generator)f(installed)f(\(t)o -(ypically)f(LEX)h([15)o(])g(or)g(\015ex)-42 1927 y([16]\),)d(few)i -(Unix)f(applications)f(use)i(a)f(mec)o(hanically)e(generated)k -(scanner.)19 b(One)13 b(commonly)c(cited)k(reason)g(for)e(not)i(using) --42 1977 y(LEX-generated)k(scanners)f(is)f(p)q(erformance:)20 -b(they)15 b(can)g(b)q(e)g(10)g(times)f(slo)o(w)o(er)g(than)h(equiv)n -(alen)o(t)f(hand-co)q(ded)i(scanners)-42 2026 y([13].)h(As)d(a)g -(result,)g(there)h(has)f(b)q(een)h(considerable)f(researc)o(h)i(in)o -(to)d(impro)o(ving)e(the)j(p)q(erformance)g(of)f(mec)o(hanically)e -(gen-)-42 2076 y(erated)17 b(scanners)f([16)o(,)f(7,)f(9].)21 -b(GLA)15 b([7)o(],)g(one)g(suc)o(h)h(scanner)g(generator,)g(can)f(pro)q -(duce)h(scanners)h(that)e(are)h(faster)f(than)-42 2126 -y(most)h(hand-co)q(ded)h(scanners.)29 b(Ho)o(w)o(ev)o(er,)17 -b(the)h(use)f(of)f(hand-co)q(ded)i(scanners)g(is)f(still)f(prev)n(alen) -o(t.)27 b(One)17 b(p)q(ossibilit)o(y)f(is)-42 2176 y(that)e(this)g(is)g -(due)h(to)e(the)i(di\016cult)o(y)e(of)g(adapting)g(the)h(generated)i -(scanners)f(to)f(sp)q(eci\014c)h(applications.)21 2226 -y(Most)g(scanner)g(generators)h(are)f(tailored)f(to)g(a)h(particular)f -(en)o(vironmen)o(t.)19 b(In)14 b(fact,)g(the)i(trend)f(in)f(recen)o(t)i -(y)o(ears)f(has)-42 2276 y(b)q(een)i(to)e(in)o(tegrate)g(scanner)i -(generators)f(with)f(compiler)f(to)q(olkits.)21 b(F)m(or)15 -b(example,)f(GLA)h(is)g(part)g(of)g(the)g(Eli)g(compiler)-42 -2325 y(construction)k(system)f([8)o(],)g(and)g(Rex)f([9])g(is)h(part)g -(of)f(the)h(GMD)f(T)m(o)q(olb)q(o)o(x)g(for)g(Compiler)f(Construction) -1688 2310 y Fq(1)1707 2325 y Fr(.)30 b(Scanners)p -42 -2362 780 2 v 4 2389 a Fp(\003)22 2401 y Fo(P)o(ermission)9 -b(to)j(cop)o(y)e(without)h(fee)g(all)g(or)g(part)g(of)g(this)g -(material)e(is)j(gran)o(ted)d(pro)o(vided)h(that)g(the)h(copies)f(are)h -(not)g(made)f(or)i(distributed)-42 2440 y(for)g(direct)e(commercial)f -(adv)n(an)o(tage,)g(the)i(A)o(CM)j(cop)o(yrigh)o(t)9 -b(notice)i(and)g(the)g(title)g(of)h(the)f(publication)e(and)i(its)g -(date)g(app)q(ear,)f(and)h(notice)g(is)-42 2480 y(giv)o(en)g(that)g -(cop)o(ying)f(is)i(b)o(y)f(p)q(ermission)e(of)j(the)f(Asso)q(ciation)f -(for)h(Computing)f(Mac)o(hinery)m(.)15 b(T)m(o)d(cop)o(y)f(otherwise,)f -(or)i(to)f(republish,)f(requires)-42 2519 y(a)k(fee)f(and/or)f(sp)q -(eci\014c)g(p)q(ermission.)19 b(Cop)o(yrigh)o(t)12 b(1994)h(b)o(y)g -(the)g(Asso)q(ciation)f(for)h(Computing)e(Mac)o(hinery)m(,)h(Inc.)22 -b(T)m(o)14 b(app)q(ear)e(in)h(LOPLAS)-42 2558 y(2\(1{4\).)5 -2586 y Fn(1)22 2598 y Fo(Also)f(kno)o(wn)e(as)i(Co)q(c)o(ktail)e -(\(Compiler-Compi)o(ler-)o(T)m(o)q(olb)q(o)n(x)f(Karlsruhe\).)923 -2748 y Fr(1)p eop -%%Page: 2 2 -2 1 bop -42 195 a Fr(generated)21 b(b)o(y)d(these)i(to)q(ols)f(assume)f -(the)i(existence)g(of)e(a)h(library)f(of)g(supp)q(ort)h(mo)q(dules)f -(for)h(error)g(handling,)g(input)-42 245 y(bu\013ering,)c(sym)o(b)q(ol) -d(table)i(managemen)o(t,)d(and)j(similar)e(functions.)18 -b(While)c(these)h(supp)q(ort)g(mo)q(dules)e(simplify)f(the)i(task)-42 -295 y(of)c(implemen)o(ting)c(a)k(compiler)f(or)h(in)o(terpreter,)i -(they)e(mak)o(e)f(adaptation)g(to)h(other)g(purp)q(oses)i(more)d -(di\016cult.)16 b(Adaptation)-42 345 y(to)e(other)h(en)o(vironmen)o(ts) -e(is)h(also)f(made)g(more)g(di\016cult)g(b)q(ecause)j(often)e -(assumptions)f(are)i(made)d(ab)q(out)i(the)h(input)f(and)-42 -394 y(restrictions)j(are)e(placed)g(on)g(tok)o(ens)g(in)g(order)g(to)g -(ac)o(hiev)o(e)g(b)q(etter)i(p)q(erformance.)k(RE2C)14 -b(go)q(es)i(to)f(the)g(other)h(extreme:)-42 444 y(it)e(concen)o(trates) -i(solely)d(on)h(generating)g(co)q(de)h(for)e(matc)o(hing)f(regular)i -(expressions.)21 494 y(RE2C)f(is)h(successful)h(at)f(its)g(task:)k(not) -13 b(only)g(do)q(es)i(it)e(pro)q(duce)i(scanners)h(whic)o(h)e(are)g -(faster)g(than)g(those)g(created)i(b)o(y)-42 544 y(other)g(scanner)h -(generators)f(but,)f(surprisingly)m(,)f(they)i(are)g(usually)e(smaller) -f(as)j(w)o(ell.)21 b(F)m(urther,)15 b(RE2C)g(do)q(es)h(not)f(mak)o(e) --42 594 y(an)o(y)i(assumptions)e(ab)q(out)h(the)h(input)g(or)f(place)h -(an)o(y)f(restrictions)i(on)e(tok)o(ens.)26 b(T)m(o)16 -b(a)g(large)g(degree,)i(the)g(p)q(erformance)-42 643 -y(and)d(\015exibilit)o(y)d(of)i(RE2C-generated)h(scanners)h(is)f(due)f -(to)h(a)f(no)o(v)o(el)f(metho)q(d)h(for)g(determining)f(when)i(to)f -(re\014ll)g(a)g(bu\013er)-42 693 y(whic)o(h)g(a)o(v)o(oids)f(the)i -(complications)c(in)o(tro)q(duced)k(b)o(y)f(the)g(sen)o(tinel)h(metho)q -(d)e([1)o(].)21 743 y(The)e(follo)o(wing)d(sections)j(of)f(this)h(pap)q -(er)g(describ)q(e)i(RE2C)d(scanner)i(sp)q(eci\014cations,)f(discuss)h -(ho)o(w)e(these)i(sp)q(eci\014cations)-42 793 y(are)21 -b(con)o(v)o(erted)g(in)o(to)f(scanners,)j(and)d(giv)o(e)f(p)q -(erformance)h(results)h(ac)o(hiev)o(ed)g(b)o(y)f(our)g(implemen)o -(tatio)o(n)d(\(including)j(a)-42 843 y(comparison)13 -b(with)g(GLA\).)-42 980 y Fs(2)67 b(Scanner)24 b(Sp)r(eci\014cations) --42 1071 y Fr(An)14 b(RE2C)g(source)h(\014le)f(consists)h(of)e(C[14)o -(])g(or)h(C++[4])842 1056 y Fq(2)874 1071 y Fr(co)q(de)g(in)o(terlea)o -(v)o(ed)g(with)g(commen)o(ts)e(of)h(the)i(form)d Fm(/*!re2c)20 -b Fl(:)7 b(:)g(:)-42 1121 y Fm(*/)15 b Fr(con)o(taining)f(scanner)i(sp) -q(eci\014cations.)23 b(These)16 b(sp)q(eci\014cations)g(are)g(replaced) -g(with)e(generated)j(co)q(de)e(that)g(is)g(in)o(v)o(ok)o(ed)-42 -1170 y(simply)d(b)o(y)i(\\falling)d(in)o(to")i(the)i(commen)o(ts)d(as)i -(illustrated)g(in)f(Figure)h(1)g(and)f(in)h(App)q(endix)g(A)1481 -1155 y Fq(3)1500 1170 y Fr(.)469 1254 y Fm(#define)20 -b(YYCURSOR)42 b(p)469 1304 y(unsigned)20 b(char)h(*scan)p -887 1304 14 2 v 14 w(uint\(unsigned)e(char)i(*p\))p Fk(f)469 -1353 y Fm(/*!re2c)556 1403 y([0-9]+)195 b Fk(f)p Fm(return)20 -b(p;)p Fk(g)556 1453 y Fm([\\)o(000-\\)o(377])87 b Fk(f)p -Fm(return)20 b(NULL;)p Fk(g)469 1503 y Fm(*/)469 1553 -y Fk(g)681 1634 y Fr(Figure)14 b(1:)k(A)c(simple)f(scanner.)21 -1733 y(A)i(scanner)h(sp)q(eci\014cation)f(tak)o(es)g(the)g(form)e(of)h -(a)h(list)f(of)g(rules,)h(eac)o(h)g(rule)g(consisting)g(of)f(a)g -(regular)h(expression)h([10)o(])-42 1783 y(and)g(an)g(action)f -(expressed)k(in)c(executable)i(co)q(de.)25 b(Figure)16 -b(2)g(illustrates)g(a)g(trivial)e(RE2C)h(scanner)j(sp)q(eci\014cation)e -(that)-42 1833 y(will)c(b)q(e)h(used)h(as)f(an)f(example)g(throughout)g -(this)h(pap)q(er.)36 b(Eac)o(h)14 b(call)d(to)i(the)g(co)q(de)h -(generated)g(from)d(a)h(sp)q(eci\014cation)i(will)416 -1926 y Fm("print")201 b Fk(f)22 b Fm(return)e(PRINT;)49 -b(/*)21 b(rule)g(5)h(*/)f Fk(g)416 1975 y Fm([a-z])o(+)224 -b Fk(f)22 b Fm(return)e(ID;)115 b(/*)21 b(rule)g(4)h(*/)f -Fk(g)416 2025 y Fm([0-9])o(+)224 b Fk(f)22 b Fm(return)e(DEC;)93 -b(/*)21 b(rule)g(3)h(*/)f Fk(g)416 2075 y Fm("0x")g([0-9a-f])n(+)50 -b Fk(f)22 b Fm(return)e(HEX;)93 b(/*)21 b(rule)g(2)h(*/)f -Fk(g)416 2125 y Fm([\\000-\\)o(377])114 b Fk(f)22 b Fm(return)e(ERR;)93 -b(/*)21 b(rule)g(1)h(*/)f Fk(g)108 2214 y Fr(Figure)15 -b(2:)20 b(Sample)13 b(sp)q(eci\014cation.)22 b Fm([)p -Fl(a)p Fm(-)p Fl(b)p Fm(])14 b Fr(matc)o(hes)h(an)o(y)f(c)o(haracter)i -(b)q(et)o(w)o(een)g Fl(a)f Fr(and)g Fl(b)p Fr(,)f(inclusiv)o(ely)m(.) -108 2264 y(The)i(last)f(rule,)g(for)g(example,)e(will)h(matc)o(h)g(an)o -(y)g(eigh)o(t)h(bit)g(c)o(haracter.)23 b(Rules)15 b(are)g(listed)h(in)e -(order)i(of)108 2314 y(precedence)q(.)-42 2414 y(\014rst)d(determine)g -(the)f(longest)h(p)q(ossible)f(pre\014x)h(of)f(the)h(remaining)d(input) -i(that)g(matc)o(hes)g(one)g(of)g(the)h(regular)f(expressions)-42 -2463 y(and)i(will)f(then)h(execute)i(the)e(action)g(in)f(the)i(\014rst) -g(applicable)e(rule.)p -42 2498 780 2 v 5 2525 a Fn(2)22 -2537 y Fo(Retargetting)8 b(RE2C)k(to)f(a)h(di\013eren)o(t)d(language)g -(is)i(straigh)o(tforw)o(ard.)5 2564 y Fn(3)22 2576 y -Fo(RE2C-generated)e(scanners)g(require)h(no)h(additional)d(supp)q(ort)i -(co)q(de.)923 2748 y Fr(2)p eop -%%Page: 3 3 -3 2 bop 138 1062 a @beginspecial 53 @llx 268 @lly 435 -@urx 486 @ury 3820 @rwi @setspecial -%%BeginDocument: scanner.eps - - - - - - - - - - - -/AutoFlatness false def - -% -------------- POSTSCRIPT PROLOG FOR CORELDRAW 3.X ------ - -% Copyright 1992 Corel Corporation. All rights reserved. - -/wCorelDict 300 dict def wCorelDict begin/bd{bind def}bind def - -/ld{load def}bd/xd{exch def}bd/_ null def - -/$c 0 def/$m 0 def/$y 0 def/$k 0 def/$t 1 def - -/$n _ def/$o 0 def/$fil 0 def/$bkg false def - -/$C 0 def/$M 0 def/$Y 0 def/$K 0 def/$T 1 def - -/$N _ def/$O 0 def/$PF false def/$ctm matrix currentmatrix def - -/$ptm matrix def/$ttm matrix def/$stm matrix def - -/$fst 128 def/$pad 0 def/$rox 0 def/$roy 0 def - -currentscreen/@dsp xd/$dsp/@dsp def/$dsa xd - -/$dsf xd/$sdf false def/$SDF false def/$Scra 0.0 def - -/$sv 0 def/@cp/closepath ld/@gs/gsave ld/@gr/grestore ld - -/@np/newpath ld/@sv{/$sv save def}bd/@rs{$sv restore}bd - -/@ss{exch $Scra add exch load setscreen}bd - -AutoFlatness{/$cpx ([Error: PathTooComplex; OffendingCommand: AnyPaintingOperator]\n) def - -/@err1{$cpx print flush newpath}bd/@ifl{dup currentflat exch sub 10 gt - -{@err1 exit}{currentflat 2 add setflat}ifelse}bd - -/@fill/fill ld/fill{currentflat{{@fill}stopped{@ifl}{exit}ifelse - -}bind loop setflat}bd/@eofill/eofill ld/eofill{currentflat{{@eofill} - -stopped{@ifl}{exit}ifelse}bind loop setflat}bd - -/@clip/clip ld/clip{currentflat{{@clip}stopped{initclip @ifl}{exit - -}ifelse}bind loop setflat}bd/@eoclip/eoclip ld - -/eoclip{currentflat{{@eoclip}stopped{initclip @ifl}{exit}ifelse - -}bind loop setflat}bd/@stroke/stroke ld/stroke{currentflat{{@stroke} - -stopped{@ifl}{exit}ifelse}bind loop setflat}bd}if - -/InRange{3 -1 roll 2 copy le{pop}{exch pop}ifelse - -2 copy ge{pop}{exch pop}ifelse}bd/wDstChck{2 1 roll dup 3 -1 roll - -eq{1 add}if}bd/@dot{dup mul exch dup mul add 1 exch sub 2 div}bd - -/@lin{exch pop abs 1 exch sub}bd/@MN{2 copy le{pop}{exch pop}ifelse}bd - -/setcmykcolor where{pop}{/setcmykcolor{4 1 roll - -3{3 index add 1 @MN 1 exch sub 3 1 roll}repeat - -setrgbcolor pop}bd}ifelse/setoverprint{/$op xd}bd - -/currentoverprint{$op}bd/setsepcolor{1 exch sub setgray}bd - -/checksepcolor{1 exch sub dup setgray 1 eq exch 1 eq and not}bd - -/setprocesscolor{ColorSeparationMode 0 eq{setcmykcolor}{ - -0 4 $ink sub index exch pop 5 1 roll 4{pop}repeat - -setsepcolor}ifelse}bd/findcmykcustomcolor{5 array astore}bd - -/setcustomcolor where{pop}{/setcustomcolor{ColorSeparationMode 0 eq{ - -exch aload pop pop 4{4 index mul 4 1 roll}repeat - -5 -1 roll pop setcmykcolor}{exch aload pop - -CurrentInkName eq{4 index}{0}ifelse 6 1 roll - -5{pop}repeat setsepcolor}ifelse}bd}ifelse/colorimage where{pop}{ - -/colorimage{pop pop pop pop pop{currentfile $dat readhexstring pop pop} - -repeat pop}bd}ifelse/@tc{dup 1 ge{pop}{4{dup - -6 -1 roll mul exch}repeat pop}ifelse}bd/@scc{1 eq setoverprint - -dup _ eq{pop setprocesscolor pop}{findcmykcustomcolor - -exch setcustomcolor}ifelse ColorSeparationMode 0 eq{true}{ - -currentgray 1 eq currentoverprint and not}ifelse}bd - -/@sft{/$tx $tllx $pxf add dup $tllx gt{$pwid sub}if def - -/$ty $tury $pyf sub dup $tury lt{$phei add}if def}bd - -/@stb{pathbbox/$ury xd/$urx xd/$lly xd/$llx xd}bd - -/@ep{{cvx exec}forall}bd/@tp{@sv/$in true def - -2 copy dup $lly le{/$in false def}if $phei sub $ury ge{/$in false def}if - -dup $urx ge{/$in false def}if $pwid add $llx le{/$in false def}if - -$in{@np 2 copy m $pwid 0 rl 0 $phei neg rl $pwid neg 0 rl - -0 $phei rl clip @np $pn cvlit load aload pop - -7 -1 roll 5 index sub 7 -1 roll 3 index sub translate - -/$ctm matrix currentmatrix def @ep pop pop pop pop}{pop pop}ifelse - -@rs}bd/@th{@sft 0 1 $tly 1 sub{dup $psx mul $tx add{ - -dup $llx gt{$pwid sub}{exit}ifelse}loop exch $phei mul $ty exch sub - -0 1 $tlx 1 sub{$pwid mul 3 copy 3 -1 roll add exch - -@tp pop}for pop pop}for}bd/@tv{@sft 0 1 $tlx 1 sub{dup $pwid mul $tx add - -exch $psy mul $ty exch sub{dup $ury lt{$phei add}{exit}ifelse}loop - -0 1 $tly 1 sub{$phei mul 3 copy sub @tp pop}for - -pop pop}for}bd/@pf{@gs $ctm setmatrix $pm concat - -@stb eoclip Bburx Bbury $pm itransform/$tury xd/$turx xd - -Bbllx Bblly $pm itransform/$tlly xd/$tllx xd - -/$wid $turx $tllx sub def/$hei $tury $tlly sub def - -@gs $vectpat{1 0 0 0 0 _ $o @scc{eofill}if}{$t $c $m $y $k $n $o @scc{ - -$tllx $tlly translate $wid $hei scale <00> 8 1 false [ 8 0 0 1 0 0 ]{}imagemask - -/$bkg true def}if}ifelse @gr $wid 0 gt $hei 0 gt and{ - -$pn cvlit load aload pop/$pd xd 3 -1 roll sub/$phei xd - -exch sub/$pwid xd/$tlx $wid $pwid div ceiling 1 add def - -/$tly $hei $phei div ceiling 1 add def $psx 0 eq{@tv}{@th}ifelse}if - -@gr @np/$bkg false def}bd/@dlt{ColorSeparationMode 0 eq{ - -/$dc $toc $tot mul $frc $frt mul dup/$c xd sub $fst 1 sub div def - -/$dm $tom $tot mul $frm $frt mul dup/$m xd sub $fst 1 sub div def - -/$dy $toy $tot mul $fry $frt mul dup/$y xd sub $fst 1 sub div def - -/$dk $tok $tot mul $frk $frt mul dup/$k xd sub $fst 1 sub div def - -true}{$frt $frc $frm $fry $frk $frn $o @scc - -dup{/$frk 1 currentgray sub def}{/$frk 0 def}ifelse - -$tot $toc $tom $toy $tok $ton $o @scc dup{/$tok 1 currentgray sub def}{ - -/$tok 0 def}ifelse or dup{/$c 0 def/$m 0 def/$y 0 def/$k $frk def - -/$dc 0 def/$dm 0 def/$dy 0 def/$dk $tok $frk sub $fst 1 sub div def}if - -}ifelse}bd/@ftl{1 index 4 index sub dup $pad mul dup/$pdw xd - -2 mul sub $fst div/$wid xd 2 index sub/$hei xd - -pop translate $c $m $y $k 4 copy ColorSeparationMode 0 ne - -{1 exch sub setgray pop pop pop}{setcmykcolor}ifelse - -0 0 moveto 0 $hei lineto $pdw $hei lineto $pdw 0 lineto 0 0 lineto fill - -$pdw 0 translate $fst{4 copy ColorSeparationMode 0 ne - -{1 exch sub setgray pop pop pop}{setcmykcolor}ifelse - -0 0 moveto 0 $hei lineto $wid $hei lineto $wid 0 lineto 0 0 lineto fill - -$wid 0 translate $dk add 4 1 roll $dy add 4 1 roll - -$dm add 4 1 roll $dc add 4 1 roll}repeat $dk sub 4 1 roll - -$dy sub 4 1 roll $dm sub 4 1 roll $dc sub 4 1 roll - -ColorSeparationMode 0 ne{1 exch sub setgray pop pop pop} - -{setcmykcolor}ifelse 0 0 moveto 0 $hei lineto $pdw $hei lineto $pdw 0 lineto 0 0 lineto fill - -}bd/@ftr{1 index 4 index sub dup $rox mul/$row xd - -2 div 1 index 4 index sub dup $roy mul/$roh xd - -2 div 2 copy dup mul exch dup mul add sqrt - -$row dup mul $roh dup mul add sqrt add dup/$hei xd $fst div/$wid xd - -4 index add $roh add exch 5 index add $row add - -exch translate pop pop pop pop currentflat dup 5 mul setflat - -$c $m $y $k 4 copy ColorSeparationMode 0 ne - -{1 exch sub setgray pop pop pop}{setcmykcolor}ifelse - -$wid 0 moveto 0 0 $hei 0 360 arc fill 1.0 $pad 2 mul sub dup scale - -$fst{4 copy ColorSeparationMode 0 ne{1 exch sub setgray pop pop pop} - -{setcmykcolor}ifelse $wid 0 moveto 0 0 $hei 0 360 arc fill - -/$hei $hei $wid sub def $dk add 4 1 roll $dy add 4 1 roll - -$dm add 4 1 roll $dc add 4 1 roll}repeat pop pop pop pop - -setflat}bd/@ff{@gs @dlt{$ctm setmatrix eoclip - -newpath Bbllx Bblly moveto Bbllx Bbury lineto - -Bburx Bbury lineto Bburx Bblly lineto $fan rotate - -pathbbox newpath $fty 1 eq{@ftr}{@ftl}ifelse}if - -@gr @np}bd/@Pf{@sv ColorSeparationMode 0 eq $ink 3 eq or{0 J 0 j [] 0 d - -$t $c $m $y $k $n $o @scc pop $ctm setmatrix - -72 1000 div dup matrix scale dup concat dup Bburx exch Bbury exch itransform - -ceiling cvi/Bbury xd ceiling cvi/Bburx xd Bbllx exch Bblly exch itransform - -floor cvi/Bblly xd floor cvi/Bbllx xd $Prm aload pop - -$Psn load exec}{1 setgray eofill}ifelse @rs - -@np}bd/g{1 exch sub/$k xd/$c 0 def/$m 0 def/$y 0 def/$t 1 def/$n _ def/$fil 0 def - -}bd/G{1 exch sub/$K xd/$C 0 def/$M 0 def/$Y 0 def/$T 1 def/$N _ def}bd - -/k{/$k xd/$y xd/$m xd/$c xd/$t 1 def/$n _ def/$fil 0 def}bd - -/K{/$K xd/$Y xd/$M xd/$C xd/$T 1 def/$N _ def}bd - -/x{/$t xd/$n xd/$k xd/$y xd/$m xd/$c xd/$fil 0 def}bd - -/X{/$T xd/$N xd/$K xd/$Y xd/$M xd/$C xd}bd - -/d/setdash ld/i{dup 0 ne{setflat}{pop}ifelse}bd - -/j/setlinejoin ld/J/setlinecap ld/M/setmiterlimit ld - -/w/setlinewidth ld/O{/$o xd}bd/R{/$O xd}bd - -/c/curveto ld/C/c ld/v{4 -2 roll 2 copy 6 -2 roll curveto}bd - -/V/v ld/y{2 copy curveto}bd/Y/y ld/l/lineto ld - -/L/l ld/rl/rlineto ld/m/moveto ld/n/newpath ld - -/N/newpath ld/F{matrix currentmatrix $sdf{$scf $sca $scp @ss}if - -$fil 1 eq{@pf}{$fil 2 eq{@ff}{$fil 3 eq{@Pf}{$t $c $m $y $k $n $o @scc - -{eofill}{@np}ifelse}ifelse}ifelse}ifelse $sdf{$dsf $dsa $dsp @ss}if - -setmatrix}bd/f{@cp F}bd/S{matrix currentmatrix - -$ctm setmatrix $SDF{$SCF $SCA $SCP @ss}if $T $C $M $Y $K $N $O @scc{ - -matrix currentmatrix $ptm concat stroke setmatrix}{@np}ifelse - -$SDF{$dsf $dsa $dsp @ss}if setmatrix}bd/s{@cp - -S}bd/B{@gs F @gr S}bd/b{@cp B}bd/W{eoclip}bd - -/p{/$pm xd 7{pop}repeat/$pyf xd/$pxf xd/$pn xd - -/$fil 1 def}bd/P{11{pop}repeat}bd/u{}bd/U{}bd - -/A{pop}bd/q/@gs ld/Q/@gr ld/E{5 array astore - -exch cvlit exch def}bd/`{}bd/~{}bd/@{}bd/&{}bd - -/CorelDrawReencodeVect [ 16#82/quotesinglbase/florin/quotedblbase/ellipsis/dagger/daggerdbl - -16#88/circumflex/perthousand/Scaron/guilsinglleft/OE - -16#91/quoteleft/quoteright/quotedblleft/quotedblright/bullet/endash/emdash - -16#98/tilde/trademark/scaron/guilsinglright/oe - -16#9F/Ydieresis 16#A1/exclamdown/cent/sterling/currency/yen/brokenbar/section - -16#a8/dieresis/copyright/ordfeminine/guillemotleft/logicalnot/minus/registered/macron - -16#b0/degree/plusminus/twosuperior/threesuperior/acute/mu/paragraph/periodcentered - -16#b8/cedilla/onesuperior/ordmasculine/guillemotright/onequarter/onehalf/threequarters/questiondown - -16#c0/Agrave/Aacute/Acircumflex/Atilde/Adieresis/Aring/AE/Ccedilla - -16#c8/Egrave/Eacute/Ecircumflex/Edieresis/Igrave/Iacute/Icircumflex/Idieresis - -16#d0/Eth/Ntilde/Ograve/Oacute/Ocircumflex/Otilde/Odieresis/multiply - -16#d8/Oslash/Ugrave/Uacute/Ucircumflex/Udieresis/Yacute/Thorn/germandbls - -16#e0/agrave/aacute/acircumflex/atilde/adieresis/aring/ae/ccedilla - -16#e8/egrave/eacute/ecircumflex/edieresis/igrave/iacute/icircumflex/idieresis - -16#f0/eth/ntilde/ograve/oacute/ocircumflex/otilde/odieresis/divide - -16#f8/oslash/ugrave/uacute/ucircumflex/udieresis/yacute/thorn/ydieresis - -] def/@cc{currentfile $dat readhexstring pop}bd - -/@sm{/$ctm $ctm currentmatrix def}bd/@E{/Bbury xd/Bburx xd - -/Bblly xd/Bbllx xd}bd/@c{@cp}bd/@p{/$fil 1 def - -1 eq/$vectpat xd/$pm xd/$psy xd/$psx xd/$pyf xd/$pxf xd - -/$pn xd}bd/@P{/$fil 3 def/$Psn xd array astore - -/$Prm xd}bd/@k{/$fil 2 def/$roy xd/$rox xd/$pad xd - -/$fty xd/$fan xd $fty 1 eq{/$fan 0 def}if/$tok xd/$toy xd/$tom xd/$toc xd - -/$frk xd/$fry xd/$frm xd/$frc xd/$frn _ def/$frt 1 def/$ton _ def/$tot 1 def - -}bd/@x{/$fil 2 def/$roy xd/$rox xd/$pad xd - -/$fty xd/$fan xd/$tot xd/$ton xd/$tok xd/$toy xd/$tom xd/$toc xd - -/$frt xd/$frn xd/$frk xd/$fry xd/$frm xd/$frc xd}bd - -/@ii{concat 3 index 3 index m 3 index 1 index l - -2 copy l 1 index 3 index l 3 index 3 index l - -clip pop pop pop pop}bd/@i{@sm @gs @ii 6 index 1 ne{/$frg true def - -pop pop}{1 eq{$T $C $M $Y $K $N $O @scc/$frg xd}{/$frg false def - -}ifelse 1 eq{@gs $ctm setmatrix $t $c $m $y $k $n $o @scc{eofill}if - -@gr}if}ifelse/$frg $frg $bkg or def @np/$ury xd/$urx xd/$lly xd/$llx xd - -/$bts xd/$hei xd/$wid xd/$dat $wid $bts mul 8 div ceiling cvi string def - -$frg{$SDF{$SCF $SCA $SCP @ss}if $llx $lly translate - -$urx $llx sub $ury $lly sub scale $wid $hei abs - -$bts 1 eq{false}{$bts}ifelse [ $wid 0 0 $hei neg 0 - -$hei 0 gt{$hei}{0}ifelse]/@cc load $bts 1 eq{imagemask}{image}ifelse - -$SDF{$dsf $dsa $dsp @ss}if}{$hei abs{@cc pop}repeat}ifelse - -@gr $ctm setmatrix}def/@M{@sv}bd/@N{/@cc{}def - -1 eq{12 -1 roll neg 12 1 roll @I}{13 -1 roll neg 13 1 roll - -@i}ifelse @rs}bd/@I{@sm @gs @ii @np/$ury xd/$urx xd/$lly xd/$llx xd - -/$ncl xd/$bts xd/$hei xd/$wid xd/$dat $wid $bts mul $ncl mul 8 div ceiling cvi string def - -$llx $lly translate $urx $llx sub $ury $lly sub scale - -$wid $hei abs $bts [ $wid 0 0 $hei neg 0 $hei 0 gt{$hei}{0}ifelse] - -/@cc load false $ncl colorimage @gr $ctm setmatrix}bd - -/z{exch findfont exch scalefont setfont}bd - -/ZB{9 dict dup begin 4 1 roll/FontType 3 def - -/FontMatrix xd/FontBBox xd/Encoding 256 array def - -0 1 255{Encoding exch/.notdef put}for/CharStrings 256 dict def - -CharStrings/.notdef{}put/Metrics 256 dict def - -Metrics/.notdef 3 -1 roll put/BuildChar{exch - -dup/$char exch/Encoding get 3 index get def - -dup/Metrics get $char get aload pop setcachedevice - -begin Encoding exch get CharStrings exch get - -end exec}def end definefont pop}bd/ZBAddChar{findfont begin - -dup 4 1 roll dup 6 1 roll Encoding 3 1 roll put - -CharStrings 3 1 roll put Metrics 3 1 roll put - -end}bd/Z{findfont dup maxlength 2 add dict exch - -dup{1 index/FID ne{3 index 3 1 roll put}{pop pop}ifelse}forall - -pop dup dup/Encoding get 256 array copy dup/$fe xd - -/Encoding exch put dup/Fontname 3 index put - -3 -1 roll dup length 0 ne{0 exch{dup type 0 type eq{exch pop}{ - -$fe exch 2 index exch put 1 add}ifelse}forall - -pop}if dup 256 dict dup/$met xd/Metrics exch put - -dup/FontMatrix get 0 get 1000 mul 1 exch div - -3 index length 256 eq{0 1 255{dup $fe exch get - -dup/.notdef eq{pop pop}{5 index 3 -1 roll get - -2 index mul $met 3 1 roll put}ifelse}for}if - -pop definefont pop pop}bd/@ftx{{currentpoint 3 -1 roll - -(0) dup 3 -1 roll 0 exch put dup @gs true charpath - -$ctm setmatrix @@txt @gr @np stringwidth pop 3 -1 roll add exch moveto - -}forall}bd/@ft{matrix currentmatrix exch $sdf{$scf $sca $scp @ss}if - -$fil 1 eq{/@@txt/@pf ld @ftx}{$fil 2 eq{/@@txt/@ff ld @ftx}{$fil 3 eq - -{/@@txt/@Pf ld @ftx}{$t $c $m $y $k $n $o @scc{show}{pop}ifelse}ifelse - -}ifelse}ifelse $sdf{$dsf $dsa $dsp @ss}if setmatrix}bd - -/@st{matrix currentmatrix exch $SDF{$SCF $SCA $SCP @ss}if - -$T $C $M $Y $K $N $O @scc{{currentpoint 3 -1 roll - -(0) dup 3 -1 roll 0 exch put dup @gs true charpath - -$ctm setmatrix $ptm concat stroke @gr @np stringwidth pop 3 -1 roll add exch moveto - -}forall}{pop}ifelse $SDF{$dsf $dsa $dsp @ss}if - -setmatrix}bd/@te{@ft}bd/@tr{@st}bd/@ta{dup - -@gs @ft @gr @st}bd/@t@a{dup @gs @st @gr @ft}bd - -/@tm{/$textsave save def @sm concat}bd/e{/t{@te}def}bd - -/r{/t{@tr}def}bd/o{/t{pop}def}bd/a{/t{@ta}def}bd - -/@a{/t{@t@a}def}bd/t{@te}def/T{@np $ctm setmatrix - -/$ttm matrix def $textsave restore}bd/@t{/$stm $stm currentmatrix def - -3 1 roll moveto $ttm concat t $stm setmatrix}def - -/@n{/$ttm exch matrix rotate def}bd/@s{}bd - -/@l{}bd/@B{@gs S @gr F}bd/@b{@cp @B}bd/@w{matrix rotate/$ptm xd - -matrix scale $ptm dup concatmatrix/$ptm xd - -1 eq{$ptm exch dup concatmatrix/$ptm xd}if - -1 w}bd/@g{1 eq dup/$sdf xd{/$scp xd/$sca xd - -/$scf xd}if}bd/@G{1 eq dup/$SDF xd{/$SCP xd - -/$SCA xd/$SCF xd}if}bd/@D{3 copy @ss/$dsp xd - -/$dsa xd/$dsf xd}bd/@j{@sv @np}bind def/@J{@rs}bind def - -/@sep{/ColorSeparationMode where{pop}{/ColorSeparationMode 0 def - -/CurrentInkName (Composite) def}ifelse ColorSeparationMode 0 eq{ - -/CurrentInkName (Composite) def}if/CurrentInkName where{pop}{ - -/CurrentInkName (Composite) def}ifelse CurrentInkName (Composite) eq - -{/$ink -1 def}{CurrentInkName (Cyan) eq{/$ink 0 def}{ - -CurrentInkName (Magenta) eq{/$ink 1 def}{CurrentInkName (Yellow) eq - -{/$ink 2 def}{CurrentInkName (Black) eq{/$ink 3 def}{/$ink 4 def - -}ifelse}ifelse}ifelse}ifelse}ifelse}bd @sep - -/@whi{@gs -72000 dup moveto -72000 72000 lineto - -72000 dup lineto 72000 -72000 lineto closepath 1 setgray fill - -@gr}bd/@neg{ [{1 exch sub}/exec cvx currenttransfer/exec cvx] cvx settransfer - -@whi}bd/@reg{[] 0 d 0 setgray .3 setlinewidth - -2 copy 5.4 0 360 arc closepath 2 copy moveto 9 0 rlineto - -2 copy moveto -9 0 rlineto 2 copy moveto 0 9 rlineto - -moveto 0 -9 rlineto stroke}bd/leftbracket{(\050)}def - -/rightbracket{(\051)}def - - - - - -11.4737 setmiterlimit - -1.00 setflat - -/$fst 128 def - -[ 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 - -0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 229 - -229 291 457 457 732 543 182 275 275 322 480 229 275 229 229 457 - -457 457 457 457 457 457 457 457 457 229 229 480 480 480 457 836 - -543 543 592 592 543 504 639 592 229 408 543 457 686 592 639 543 - -639 592 543 504 592 543 771 543 543 504 229 229 229 387 457 275 - -457 457 408 457 457 229 457 457 182 182 408 182 686 457 457 457 - -457 275 408 229 457 408 592 408 408 408 275 213 275 480 750 750 - -750 750 457 275 818 457 457 275 818 750 275 818 750 750 750 750 - -182 182 275 275 229 457 818 275 818 750 275 771 750 750 750 750 - -275 457 457 457 457 148 457 275 605 299 457 480 750 605 750 750 - -750 750 750 275 750 441 750 275 750 299 457 750 750 750 504 543 - -543 543 543 543 543 818 592 543 543 543 543 229 229 229 229 750 - -592 639 639 639 639 639 750 639 592 592 592 592 543 750 504 457 - -457 457 457 457 457 732 408 457 457 457 457 229 229 229 229 750 - -457 457 457 457 457 457 750 504 457 457 457 457 408 750 408 ] - -CorelDrawReencodeVect /_Helvetica-Narrow /Helvetica-Narrow Z - -[ 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 - -0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 600 - -600 600 600 600 600 600 600 600 600 600 600 600 600 600 600 600 - -600 600 600 600 600 600 600 600 600 600 600 600 600 600 600 600 - -600 600 600 600 600 600 600 600 600 600 600 600 600 600 600 600 - -600 600 600 600 600 600 600 600 600 600 600 600 600 600 600 600 - -600 600 600 600 600 600 600 600 600 600 600 600 600 600 600 600 - -600 600 600 600 600 600 600 600 600 600 600 600 600 600 600 600 - -600 600 600 600 600 600 600 600 600 600 600 600 600 600 600 600 - -600 600 600 600 600 600 600 600 600 600 600 600 600 600 600 600 - -600 600 600 600 600 600 600 600 600 600 600 600 600 600 600 600 - -600 600 600 600 600 600 600 600 600 600 600 600 600 600 600 600 - -600 600 600 600 600 600 600 600 600 600 600 600 600 600 600 600 - -600 600 600 600 600 600 600 600 600 600 600 600 600 600 600 600 - -600 600 600 600 600 600 600 600 600 600 600 600 600 600 600 600 - -600 600 600 600 600 600 600 600 600 600 600 600 600 600 600 ] - -CorelDrawReencodeVect /_Courier /Courier Z - - - - -@sv - -/$ctm matrix currentmatrix def - -@sv - - -106.13 392.62 110.02 399.38 @E - - -[0.07199 0.00000 0.00000 0.07199 106 392] @tm - - 0 O 0 @g - -0.00 0.00 0.00 1.00 k - -e - -/_Helvetica-Narrow 125.00 z - -0 0 (0) @t - -T - - -160.70 392.69 163.30 399.31 @E - - -[0.07199 0.00000 0.00000 0.07199 160 392] @tm - - 0 O 0 @g - -0.00 0.00 0.00 1.00 k - -e - -/_Helvetica-Narrow 125.00 z - -0 0 (1) @t - -T - - -214.06 392.69 217.94 399.24 @E - - -[0.07199 0.00000 0.00000 0.07199 214 392] @tm - - 0 O 0 @g - -0.00 0.00 0.00 1.00 k - -e - -/_Helvetica-Narrow 125.00 z - -0 0 (2) @t - -T - - -268.06 392.54 271.94 399.24 @E - - -[0.07199 0.00000 0.00000 0.07199 268 392] @tm - - 0 O 0 @g - -0.00 0.00 0.00 1.00 k - -e - -/_Helvetica-Narrow 125.00 z - -0 0 (3) @t - -T - - -321.98 392.47 325.94 399.10 @E - - -[0.07199 0.00000 0.00000 0.07199 322 392] @tm - - 0 O 0 @g - -0.00 0.00 0.00 1.00 k - -e - -/_Helvetica-Narrow 125.00 z - -0 0 (4) @t - -T - - -375.98 392.40 379.94 399.10 @E - - -[0.07199 0.00000 0.00000 0.07199 376 392] @tm - - 0 O 0 @g - -0.00 0.00 0.00 1.00 k - -e - -/_Helvetica-Narrow 125.00 z - -0 0 (5) @t - -T - - -159.98 446.69 163.94 453.38 @E - - -[0.07199 0.00000 0.00000 0.07199 160 446] @tm - - 0 O 0 @g - -0.00 0.00 0.00 1.00 k - -e - -/_Helvetica-Narrow 125.00 z - -0 0 (6) @t - -T - - -106.06 338.83 110.09 345.38 @E - - -[0.07199 0.00000 0.00000 0.07199 106 338] @tm - - 0 O 0 @g - -0.00 0.00 0.00 1.00 k - -e - -/_Helvetica-Narrow 125.00 z - -0 0 (7) @t - -T - - -159.98 338.62 163.94 345.46 @E - - -[0.07199 0.00000 0.00000 0.07199 160 338] @tm - - 0 O 0 @g - -0.00 0.00 0.00 1.00 k - -e - -/_Helvetica-Narrow 125.00 z - -0 0 (8) @t - -T - - -106.13 284.62 110.02 291.46 @E - - -[0.07199 0.00000 0.00000 0.07199 106 284] @tm - - 0 O 0 @g - -0.00 0.00 0.00 1.00 k - -e - -/_Helvetica-Narrow 125.00 z - -0 0 (9) @t - -T - - -157.97 284.69 166.10 291.53 @E - - -[0.07199 0.00000 0.00000 0.07199 158 285] @tm - - 0 O 0 @g - -0.00 0.00 0.00 1.00 k - -e - -/_Helvetica-Narrow 125.00 z - -0 0 (10) @t - -T - - -104.54 446.69 111.53 453.31 @E - - -[0.07199 0.00000 0.00000 0.07199 104 446] @tm - - 0 O 0 @g - -0.00 0.00 0.00 1.00 k - -e - -/_Helvetica-Narrow 125.00 z - -0 0 (11) @t - -T - - -112.54 440.93 114.55 445.97 @E - - -[0.07199 0.00000 0.00000 0.07199 112 440] @tm - - 0 O 0 @g - -0.00 0.00 0.00 1.00 k - -e - -/_Helvetica-Narrow 97.00 z - -0 0 (1) @t - -T - - -166.46 278.93 169.49 283.97 @E - - -[0.07199 0.00000 0.00000 0.07199 166 278] @tm - - 0 O 0 @g - -0.00 0.00 0.00 1.00 k - -e - -/_Helvetica-Narrow 97.00 z - -0 0 (2) @t - -T - - -112.46 332.86 115.49 337.90 @E - - -[0.07199 0.00000 0.00000 0.07199 112 332] @tm - - 0 O 0 @g - -0.00 0.00 0.00 1.00 k - -e - -/_Helvetica-Narrow 97.00 z - -0 0 (3) @t - -T - - -166.46 332.86 169.49 337.90 @E - - -[0.07199 0.00000 0.00000 0.07199 166 332] @tm - - 0 O 0 @g - -0.00 0.00 0.00 1.00 k - -e - -/_Helvetica-Narrow 97.00 z - -0 0 (3) @t - -T - - -328.46 386.93 331.56 391.97 @E - - -[0.07199 0.00000 0.00000 0.07199 328 386] @tm - - 0 O 0 @g - -0.00 0.00 0.00 1.00 k - -e - -/_Helvetica-Narrow 97.00 z - -0 0 (4) @t - -T - - -274.46 386.93 277.56 391.97 @E - - -[0.07199 0.00000 0.00000 0.07199 274 386] @tm - - 0 O 0 @g - -0.00 0.00 0.00 1.00 k - -e - -/_Helvetica-Narrow 97.00 z - -0 0 (4) @t - -T - - -220.46 386.93 223.56 391.97 @E - - -[0.07199 0.00000 0.00000 0.07199 220 386] @tm - - 0 O 0 @g - -0.00 0.00 0.00 1.00 k - -e - -/_Helvetica-Narrow 97.00 z - -0 0 (4) @t - -T - - -166.46 386.93 169.56 391.97 @E - - -[0.07199 0.00000 0.00000 0.07199 166 386] @tm - - 0 O 0 @g - -0.00 0.00 0.00 1.00 k - -e - -/_Helvetica-Narrow 97.00 z - -0 0 (4) @t - -T - - -166.46 440.93 169.56 445.97 @E - - -[0.07199 0.00000 0.00000 0.07199 166 440] @tm - - 0 O 0 @g - -0.00 0.00 0.00 1.00 k - -e - -/_Helvetica-Narrow 97.00 z - -0 0 (4) @t - -T - - -382.46 386.78 385.56 391.90 @E - - -[0.07199 0.00000 0.00000 0.07199 382 386] @tm - - 0 O 0 @g - -0.00 0.00 0.00 1.00 k - -e - -/_Helvetica-Narrow 97.00 z - -0 0 (5) @t - -T - - -94.54 382.54 121.61 409.46 @E - -0 J 0 j [] 0 d 0 R 0 @G - -0.00 0.00 0.00 1.00 K - -0 0.22 0.22 0.00 @w - - -108.07 409.46 m - -115.49 409.46 121.61 403.42 121.61 396.00 c - -121.61 388.58 115.49 382.54 108.07 382.54 c - -100.66 382.54 94.54 388.58 94.54 396.00 c - -94.54 403.42 100.66 409.46 108.07 409.46 c - -@c - -S - - -94.54 274.54 121.61 301.46 @E - -0 J 0 j [] 0 d 0 R 0 @G - -0.00 0.00 0.00 1.00 K - -0 0.22 0.22 0.00 @w - - -108.07 301.46 m - -115.49 301.46 121.61 295.42 121.61 288.00 c - -121.61 280.58 115.49 274.54 108.07 274.54 c - -100.66 274.54 94.54 280.58 94.54 288.00 c - -94.54 295.42 100.66 301.46 108.07 301.46 c - -@c - -S - - -94.54 328.61 121.61 355.54 @E - -0 J 0 j [] 0 d 0 R 0 @G - -0.00 0.00 0.00 1.00 K - -0 0.22 0.22 0.00 @w - - -108.07 355.54 m - -115.49 355.54 121.61 349.49 121.61 342.07 c - -121.61 334.66 115.49 328.61 108.07 328.61 c - -100.66 328.61 94.54 334.66 94.54 342.07 c - -94.54 349.49 100.66 355.54 108.07 355.54 c - -@c - -S - - -95.83 329.98 120.17 354.31 @E - -0 J 0 j [] 0 d 0 R 0 @G - -0.00 0.00 0.00 1.00 K - -0 0.22 0.22 0.00 @w - - -108.00 354.31 m - -114.62 354.31 120.17 348.91 120.17 342.14 c - -120.17 335.45 114.62 329.98 108.00 329.98 c - -101.30 329.98 95.83 335.45 95.83 342.14 c - -95.83 348.91 101.30 354.31 108.00 354.31 c - -@c - -S - - -148.46 328.54 175.54 355.46 @E - -0 J 0 j [] 0 d 0 R 0 @G - -0.00 0.00 0.00 1.00 K - -0 0.22 0.22 0.00 @w - - -162.00 355.46 m - -169.42 355.46 175.54 349.42 175.54 342.00 c - -175.54 334.58 169.42 328.54 162.00 328.54 c - -154.58 328.54 148.46 334.58 148.46 342.00 c - -148.46 349.42 154.58 355.46 162.00 355.46 c - -@c - -S - - -149.76 329.90 174.10 354.24 @E - -0 J 0 j [] 0 d 0 R 0 @G - -0.00 0.00 0.00 1.00 K - -0 0.22 0.22 0.00 @w - - -161.93 354.24 m - -168.55 354.24 174.10 348.84 174.10 342.07 c - -174.10 335.38 168.55 329.90 161.93 329.90 c - -155.23 329.90 149.76 335.38 149.76 342.07 c - -149.76 348.84 155.23 354.24 161.93 354.24 c - -@c - -S - - -148.46 436.54 175.54 463.46 @E - -0 J 0 j [] 0 d 0 R 0 @G - -0.00 0.00 0.00 1.00 K - -0 0.22 0.22 0.00 @w - - -162.00 463.46 m - -169.42 463.46 175.54 457.42 175.54 450.00 c - -175.54 442.58 169.42 436.54 162.00 436.54 c - -154.58 436.54 148.46 442.58 148.46 450.00 c - -148.46 457.42 154.58 463.46 162.00 463.46 c - -@c - -S - - -149.76 437.90 174.10 462.24 @E - -0 J 0 j [] 0 d 0 R 0 @G - -0.00 0.00 0.00 1.00 K - -0 0.22 0.22 0.00 @w - - -161.93 462.24 m - -168.55 462.24 174.10 456.84 174.10 450.07 c - -174.10 443.38 168.55 437.90 161.93 437.90 c - -155.23 437.90 149.76 443.38 149.76 450.07 c - -149.76 456.84 155.23 462.24 161.93 462.24 c - -@c - -S - - -94.54 436.54 121.61 463.46 @E - -0 J 0 j [] 0 d 0 R 0 @G - -0.00 0.00 0.00 1.00 K - -0 0.22 0.22 0.00 @w - - -108.07 463.46 m - -115.49 463.46 121.61 457.42 121.61 450.00 c - -121.61 442.58 115.49 436.54 108.07 436.54 c - -100.66 436.54 94.54 442.58 94.54 450.00 c - -94.54 457.42 100.66 463.46 108.07 463.46 c - -@c - -S - - -95.83 437.90 120.17 462.24 @E - -0 J 0 j [] 0 d 0 R 0 @G - -0.00 0.00 0.00 1.00 K - -0 0.22 0.22 0.00 @w - - -108.00 462.24 m - -114.62 462.24 120.17 456.84 120.17 450.07 c - -120.17 443.38 114.62 437.90 108.00 437.90 c - -101.30 437.90 95.83 443.38 95.83 450.07 c - -95.83 456.84 101.30 462.24 108.00 462.24 c - -@c - -S - - -148.46 382.54 175.54 409.46 @E - -0 J 0 j [] 0 d 0 R 0 @G - -0.00 0.00 0.00 1.00 K - -0 0.22 0.22 0.00 @w - - -162.00 409.46 m - -169.42 409.46 175.54 403.42 175.54 396.00 c - -175.54 388.58 169.42 382.54 162.00 382.54 c - -154.58 382.54 148.46 388.58 148.46 396.00 c - -148.46 403.42 154.58 409.46 162.00 409.46 c - -@c - -S - - -149.76 383.90 174.10 408.24 @E - -0 J 0 j [] 0 d 0 R 0 @G - -0.00 0.00 0.00 1.00 K - -0 0.22 0.22 0.00 @w - - -161.93 408.24 m - -168.55 408.24 174.10 402.84 174.10 396.07 c - -174.10 389.38 168.55 383.90 161.93 383.90 c - -155.23 383.90 149.76 389.38 149.76 396.07 c - -149.76 402.84 155.23 408.24 161.93 408.24 c - -@c - -S - - -148.54 274.61 175.61 301.54 @E - -0 J 0 j [] 0 d 0 R 0 @G - -0.00 0.00 0.00 1.00 K - -0 0.22 0.22 0.00 @w - - -162.07 301.54 m - -169.49 301.54 175.61 295.49 175.61 288.07 c - -175.61 280.66 169.49 274.61 162.07 274.61 c - -154.66 274.61 148.54 280.66 148.54 288.07 c - -148.54 295.49 154.66 301.54 162.07 301.54 c - -@c - -S - - -149.83 275.98 174.17 300.31 @E - -0 J 0 j [] 0 d 0 R 0 @G - -0.00 0.00 0.00 1.00 K - -0 0.22 0.22 0.00 @w - - -162.00 300.31 m - -168.62 300.31 174.17 294.91 174.17 288.14 c - -174.17 281.45 168.62 275.98 162.00 275.98 c - -155.30 275.98 149.83 281.45 149.83 288.14 c - -149.83 294.91 155.30 300.31 162.00 300.31 c - -@c - -S - - -202.46 382.46 229.54 409.39 @E - -0 J 0 j [] 0 d 0 R 0 @G - -0.00 0.00 0.00 1.00 K - -0 0.22 0.22 0.00 @w - - -216.00 409.39 m - -223.42 409.39 229.54 403.34 229.54 395.93 c - -229.54 388.51 223.42 382.46 216.00 382.46 c - -208.58 382.46 202.46 388.51 202.46 395.93 c - -202.46 403.34 208.58 409.39 216.00 409.39 c - -@c - -S - - -203.76 383.83 228.10 408.17 @E - -0 J 0 j [] 0 d 0 R 0 @G - -0.00 0.00 0.00 1.00 K - -0 0.22 0.22 0.00 @w - - -215.93 408.17 m - -222.55 408.17 228.10 402.77 228.10 396.00 c - -228.10 389.30 222.55 383.83 215.93 383.83 c - -209.23 383.83 203.76 389.30 203.76 396.00 c - -203.76 402.77 209.23 408.17 215.93 408.17 c - -@c - -S - - -256.46 382.39 283.54 409.32 @E - -0 J 0 j [] 0 d 0 R 0 @G - -0.00 0.00 0.00 1.00 K - -0 0.22 0.22 0.00 @w - - -270.00 409.32 m - -277.42 409.32 283.54 403.27 283.54 395.86 c - -283.54 388.44 277.42 382.39 270.00 382.39 c - -262.58 382.39 256.46 388.44 256.46 395.86 c - -256.46 403.27 262.58 409.32 270.00 409.32 c - -@c - -S - - -257.76 383.76 282.10 408.10 @E - -0 J 0 j [] 0 d 0 R 0 @G - -0.00 0.00 0.00 1.00 K - -0 0.22 0.22 0.00 @w - - -269.93 408.10 m - -276.55 408.10 282.10 402.70 282.10 395.93 c - -282.10 389.23 276.55 383.76 269.93 383.76 c - -263.23 383.76 257.76 389.23 257.76 395.93 c - -257.76 402.70 263.23 408.10 269.93 408.10 c - -@c - -S - - -310.46 382.32 337.54 409.25 @E - -0 J 0 j [] 0 d 0 R 0 @G - -0.00 0.00 0.00 1.00 K - -0 0.22 0.22 0.00 @w - - -324.00 409.25 m - -331.42 409.25 337.54 403.20 337.54 395.78 c - -337.54 388.37 331.42 382.32 324.00 382.32 c - -316.58 382.32 310.46 388.37 310.46 395.78 c - -310.46 403.20 316.58 409.25 324.00 409.25 c - -@c - -S - - -311.76 383.69 336.10 408.02 @E - -0 J 0 j [] 0 d 0 R 0 @G - -0.00 0.00 0.00 1.00 K - -0 0.22 0.22 0.00 @w - - -323.93 408.02 m - -330.55 408.02 336.10 402.62 336.10 395.86 c - -336.10 389.16 330.55 383.69 323.93 383.69 c - -317.23 383.69 311.76 389.16 311.76 395.86 c - -311.76 402.62 317.23 408.02 323.93 408.02 c - -@c - -S - - -364.46 382.25 391.54 409.18 @E - -0 J 0 j [] 0 d 0 R 0 @G - -0.00 0.00 0.00 1.00 K - -0 0.22 0.22 0.00 @w - - -378.00 409.18 m - -385.42 409.18 391.54 403.13 391.54 395.71 c - -391.54 388.30 385.42 382.25 378.00 382.25 c - -370.58 382.25 364.46 388.30 364.46 395.71 c - -364.46 403.13 370.58 409.18 378.00 409.18 c - -@c - -S - - -365.76 383.62 390.10 407.95 @E - -0 J 0 j [] 0 d 0 R 0 @G - -0.00 0.00 0.00 1.00 K - -0 0.22 0.22 0.00 @w - - -377.93 407.95 m - -384.55 407.95 390.10 402.55 390.10 395.78 c - -390.10 389.09 384.55 383.62 377.93 383.62 c - -371.23 383.62 365.76 389.09 365.76 395.78 c - -365.76 402.55 371.23 407.95 377.93 407.95 c - -@c - -S - - -121.54 395.86 148.54 396.14 @E - -0 J 0 j [] 0 d 0 R 0 @G - -0.00 0.00 0.00 1.00 K - -0 0.22 0.22 0.00 @w - - -121.54 396.00 m - -147.17 396.00 L - -S - -@j - -0.00 0.00 0.00 1.00 K - -0.00 0.00 0.00 1.00 k - -0 @g - -0 @G - -[] 0 d 0 J 0 j - -0 R 0 O 0 1.01 1.01 0 @w - -144.00 398.52 m - -147.96 396.00 L - -144.00 393.48 L - -S - -@J - - -175.54 395.86 202.54 396.14 @E - -0 J 0 j [] 0 d 0 R 0 @G - -0.00 0.00 0.00 1.00 K - -0 0.22 0.22 0.00 @w - - -175.54 396.00 m - -201.17 396.00 L - -S - -@j - -0.00 0.00 0.00 1.00 K - -0.00 0.00 0.00 1.00 k - -0 @g - -0 @G - -[] 0 d 0 J 0 j - -0 R 0 O 0 1.01 1.01 0 @w - -198.00 398.52 m - -201.96 396.00 L - -198.00 393.48 L - -S - -@J - - -229.54 395.86 256.54 396.14 @E - -0 J 0 j [] 0 d 0 R 0 @G - -0.00 0.00 0.00 1.00 K - -0 0.22 0.22 0.00 @w - - -229.54 396.00 m - -255.17 396.00 L - -S - -@j - -0.00 0.00 0.00 1.00 K - -0.00 0.00 0.00 1.00 k - -0 @g - -0 @G - -[] 0 d 0 J 0 j - -0 R 0 O 0 1.01 1.01 0 @w - -252.00 398.52 m - -255.96 396.00 L - -252.00 393.48 L - -S - -@J - - -283.54 395.86 310.54 396.14 @E - -0 J 0 j [] 0 d 0 R 0 @G - -0.00 0.00 0.00 1.00 K - -0 0.22 0.22 0.00 @w - - -283.54 396.00 m - -309.17 396.00 L - -S - -@j - -0.00 0.00 0.00 1.00 K - -0.00 0.00 0.00 1.00 k - -0 @g - -0 @G - -[] 0 d 0 J 0 j - -0 R 0 O 0 1.01 1.01 0 @w - -306.00 398.52 m - -309.96 396.00 L - -306.00 393.48 L - -S - -@J - - -337.54 395.86 364.54 396.14 @E - -0 J 0 j [] 0 d 0 R 0 @G - -0.00 0.00 0.00 1.00 K - -0 0.22 0.22 0.00 @w - - -337.54 396.00 m - -363.17 396.00 L - -S - -@j - -0.00 0.00 0.00 1.00 K - -0.00 0.00 0.00 1.00 k - -0 @g - -0 @G - -[] 0 d 0 J 0 j - -0 R 0 O 0 1.01 1.01 0 @w - -360.00 398.52 m - -363.96 396.00 L - -360.00 393.48 L - -S - -@J - - -125.57 268.56 143.86 284.98 @E - - -[0.07198 0.00000 0.00000 0.07199 125 278] @tm - - 0 O 0 @g - -0.00 0.00 0.00 1.00 k - -e - -/_Courier 125.00 z - -0 0 (\133) @t - -60 0 (0) @t - -120 0 (-) @t - -180 0 (9) @t - -0 -125 (a) @t - -60 -125 (-) @t - -120 -125 (f) @t - -180 -125 (\135) @t - -T - - -121.54 287.86 148.54 288.14 @E - -0 J 0 j [] 0 d 0 R 0 @G - -0.00 0.00 0.00 1.00 K - -0 0.22 0.22 0.00 @w - - -121.54 288.00 m - -147.17 288.00 L - -S - -@j - -0.00 0.00 0.00 1.00 K - -0.00 0.00 0.00 1.00 k - -0 @g - -0 @G - -[] 0 d 0 J 0 j - -0 R 0 O 0 1.01 1.01 0 @w - -144.00 290.52 m - -147.96 288.00 L - -144.00 285.48 L - -S - -@J - - -121.54 341.86 148.54 342.14 @E - -0 J 0 j [] 0 d 0 R 0 @G - -0.00 0.00 0.00 1.00 K - -0 0.22 0.22 0.00 @w - - -121.54 342.00 m - -147.17 342.00 L - -S - -@j - -0.00 0.00 0.00 1.00 K - -0.00 0.00 0.00 1.00 k - -0 @g - -0 @G - -[] 0 d 0 J 0 j - -0 R 0 O 0 1.01 1.01 0 @w - -144.00 344.52 m - -147.96 342.00 L - -144.00 339.48 L - -S - -@J - - -107.86 301.54 108.14 328.54 @E - -0 J 0 j [] 0 d 0 R 0 @G - -0.00 0.00 0.00 1.00 K - -0 0.22 0.22 0.00 @w - - -108.00 328.54 m - -108.00 302.90 L - -S - -@j - -0.00 0.00 0.00 1.00 K - -0.00 0.00 0.00 1.00 k - -0 @g - -0 @G - -[] 0 d 0 J 0 j - -0 R 0 O 0 1.01 1.01 0 @w - -110.52 306.14 m - -108.00 302.18 L - -105.48 306.14 L - -S - -@J - - -107.86 355.54 108.14 382.54 @E - -0 J 0 j [] 0 d 0 R 0 @G - -0.00 0.00 0.00 1.00 K - -0 0.22 0.22 0.00 @w - - -108.00 382.54 m - -108.00 356.90 L - -S - -@j - -0.00 0.00 0.00 1.00 K - -0.00 0.00 0.00 1.00 k - -0 @g - -0 @G - -[] 0 d 0 J 0 j - -0 R 0 O 0 1.01 1.01 0 @w - -110.52 360.14 m - -108.00 356.18 L - -105.48 360.14 L - -S - -@J - - -107.86 409.54 108.14 436.54 @E - -0 J 0 j [] 0 d 0 R 0 @G - -0.00 0.00 0.00 1.00 K - -0 0.22 0.22 0.00 @w - - -108.00 409.54 m - -108.00 435.17 L - -S - -@j - -0.00 0.00 0.00 1.00 K - -0.00 0.00 0.00 1.00 k - -0 @g - -0 @G - -[] 0 d 0 J 0 j - -0 R 0 O 0 1.01 1.01 0 @w - -105.41 432.00 m - -108.00 435.96 L - -110.59 432.00 L - -S - -@J - - -117.00 405.00 153.00 441.00 @E - -0 J 0 j [] 0 d 0 R 0 @G - -0.00 0.00 0.00 1.00 K - -0 0.22 0.22 0.00 @w - - -117.00 405.00 m - -152.06 440.06 L - -S - -@j - -0.00 0.00 0.00 1.00 K - -0.00 0.00 0.00 1.00 k - -0 @g - -0 @G - -[] 0 d 0 J 0 j - -0 R 0 O 0 1.01 1.01 0 @w - -147.96 439.63 m - -152.64 440.64 L - -151.70 436.03 L - -S - -@J - - -117.00 351.00 153.00 387.00 @E - -0 J 0 j [] 0 d 0 R 0 @G - -0.00 0.00 0.00 1.00 K - -0 0.22 0.22 0.00 @w - - -117.00 387.00 m - -152.06 351.94 L - -S - -@j - -0.00 0.00 0.00 1.00 K - -0.00 0.00 0.00 1.00 k - -0 @g - -0 @G - -[] 0 d 0 J 0 j - -0 R 0 O 0 1.01 1.01 0 @w - -151.63 356.04 m - -152.64 351.43 L - -148.03 352.44 L - -S - -@J - - -166.39 341.93 196.63 377.14 @E - -0 J 0 j [] 0 d 0 R 0 @G - -0.00 0.00 0.00 1.00 K - -0 0.22 0.22 0.00 @w - - -173.38 346.82 m - -177.48 347.47 182.59 350.42 186.77 354.60 c - -193.75 361.58 196.63 370.22 193.18 373.68 c - -189.72 377.14 181.08 374.33 174.10 367.34 c - -169.92 363.17 166.90 358.06 166.39 353.95 C - -S - -@j - -0.00 0.00 0.00 1.00 K - -0.00 0.00 0.00 1.00 k - -0 @g - -0 @G - -[] 0 d 0 J 0 j - -0 R 0 O 0 1.01 1.01 0 @w - -169.34 356.90 m - -166.39 353.30 L - -164.30 357.62 L - -S - -@J - - -166.39 450.72 196.63 485.93 @E - -0 J 0 j [] 0 d 0 R 0 @G - -0.00 0.00 0.00 1.00 K - -0 0.22 0.22 0.00 @w - - -173.38 455.62 m - -177.48 456.26 182.59 459.22 186.77 463.39 c - -193.75 470.38 196.63 479.02 193.18 482.47 c - -189.72 485.93 181.08 483.12 174.10 476.14 c - -169.92 471.96 166.90 466.85 166.39 462.74 C - -S - -@j - -0.00 0.00 0.00 1.00 K - -0.00 0.00 0.00 1.00 k - -0 @g - -0 @G - -[] 0 d 0 J 0 j - -0 R 0 O 0 1.01 1.01 0 @w - -169.34 465.70 m - -166.39 462.10 L - -164.30 466.42 L - -S - -@J - - -166.39 287.93 196.63 323.14 @E - -0 J 0 j [] 0 d 0 R 0 @G - -0.00 0.00 0.00 1.00 K - -0 0.22 0.22 0.00 @w - - -173.38 292.82 m - -177.48 293.47 182.59 296.42 186.77 300.60 c - -193.75 307.58 196.63 316.22 193.18 319.68 c - -189.72 323.14 181.08 320.33 174.10 313.34 c - -169.92 309.17 166.90 304.06 166.39 299.95 C - -S - -@j - -0.00 0.00 0.00 1.00 K - -0.00 0.00 0.00 1.00 k - -0 @g - -0 @G - -[] 0 d 0 J 0 j - -0 R 0 O 0 1.01 1.01 0 @w - -169.34 302.90 m - -166.39 299.30 L - -164.30 303.62 L - -S - -@J - - -166.61 396.00 198.00 450.00 @E - -0 J 0 j [] 0 d 0 R 0 @G - -0.00 0.00 0.00 1.00 K - -0 0.22 0.22 0.00 @w - - -182.30 396.00 m - -190.94 396.00 198.00 408.17 198.00 423.00 c - -198.00 437.83 190.94 450.00 182.30 450.00 C - -S - -@j - -0.00 0.00 0.00 1.00 K - -0.00 0.00 0.00 1.00 k - -0 @g - -0 @G - -[] 0 d 0 J 0 j - -0 R 0 O 0 1.01 1.01 0 @w - -185.54 447.41 m - -181.58 450.00 L - -185.54 452.59 L - -S - -@J - - -220.61 396.00 252.00 450.00 @E - -0 J 0 j [] 0 d 0 R 0 @G - -0.00 0.00 0.00 1.00 K - -0 0.22 0.22 0.00 @w - - -236.30 396.00 m - -244.94 396.00 252.00 408.17 252.00 423.00 c - -252.00 437.83 244.94 450.00 236.30 450.00 C - -S - -@j - -0.00 0.00 0.00 1.00 K - -0.00 0.00 0.00 1.00 k - -0 @g - -0 @G - -[] 0 d 0 J 0 j - -0 R 0 O 0 1.01 1.01 0 @w - -239.54 447.41 m - -235.58 450.00 L - -239.54 452.59 L - -S - -@J - - -274.61 396.00 306.00 450.00 @E - -0 J 0 j [] 0 d 0 R 0 @G - -0.00 0.00 0.00 1.00 K - -0 0.22 0.22 0.00 @w - - -290.30 396.00 m - -298.94 396.00 306.00 408.17 306.00 423.00 c - -306.00 437.83 298.94 450.00 290.30 450.00 C - -S - -@j - -0.00 0.00 0.00 1.00 K - -0.00 0.00 0.00 1.00 k - -0 @g - -0 @G - -[] 0 d 0 J 0 j - -0 R 0 O 0 1.01 1.01 0 @w - -293.54 447.41 m - -289.58 450.00 L - -293.54 452.59 L - -S - -@J - - -328.61 396.00 360.00 450.00 @E - -0 J 0 j [] 0 d 0 R 0 @G - -0.00 0.00 0.00 1.00 K - -0 0.22 0.22 0.00 @w - - -344.30 396.00 m - -352.94 396.00 360.00 408.17 360.00 423.00 c - -360.00 437.83 352.94 450.00 344.30 450.00 C - -S - -@j - -0.00 0.00 0.00 1.00 K - -0.00 0.00 0.00 1.00 k - -0 @g - -0 @G - -[] 0 d 0 J 0 j - -0 R 0 O 0 1.01 1.01 0 @w - -347.54 447.41 m - -343.58 450.00 L - -347.54 452.59 L - -S - -@J - - -382.54 396.00 413.93 450.00 @E - -0 J 0 j [] 0 d 0 R 0 @G - -0.00 0.00 0.00 1.00 K - -0 0.22 0.22 0.00 @w - - -398.23 396.00 m - -406.87 396.00 413.93 408.17 413.93 423.00 c - -413.93 437.83 406.87 450.00 398.23 450.00 C - -S - -@j - -0.00 0.00 0.00 1.00 K - -0.00 0.00 0.00 1.00 k - -0 @g - -0 @G - -[] 0 d 0 J 0 j - -0 R 0 O 0 1.01 1.01 0 @w - -401.47 447.41 m - -397.51 450.00 L - -401.47 452.59 L - -S - -@J - - -391.54 395.86 398.23 396.14 @E - -0 J 0 j [] 0 d 0 R 0 @G - -0.00 0.00 0.00 1.00 K - -0 0.22 0.22 0.00 @w - - -391.54 396.00 m - -398.23 396.00 L - -S - - -175.54 449.86 398.23 450.14 @E - -0 J 0 j [] 0 d 0 R 0 @G - -0.00 0.00 0.00 1.00 K - -0 0.22 0.22 0.00 @w - - -398.23 450.00 m - -175.54 450.00 L - -S - - -139.46 385.27 144.43 391.03 @E - - -[0.07199 0.00000 0.00000 0.07199 139 386] @tm - - 0 O 0 @g - -0.00 0.00 0.00 1.00 k - -e - -/_Courier 125.00 z - -0 0 (p) @t - -T - - -188.93 386.93 193.90 390.96 @E - - -[0.07199 0.00000 0.00000 0.07199 188 386] @tm - - 0 O 0 @g - -0.00 0.00 0.00 1.00 k - -e - -/_Courier 125.00 z - -0 0 (r) @t - -T - - -242.93 386.93 247.61 392.76 @E - - -[0.07199 0.00000 0.00000 0.07199 242 386] @tm - - 0 O 0 @g - -0.00 0.00 0.00 1.00 k - -e - -/_Courier 125.00 z - -0 0 (i) @t - -T - - -296.93 386.93 301.97 391.03 @E - - -[0.07199 0.00000 0.00000 0.07199 296 386] @tm - - 0 O 0 @g - -0.00 0.00 0.00 1.00 k - -e - -/_Courier 125.00 z - -0 0 (n) @t - -T - - -350.93 386.86 355.82 392.18 @E - - -[0.07199 0.00000 0.00000 0.07199 350 386] @tm - - 0 O 0 @g - -0.00 0.00 0.00 1.00 k - -e - -/_Courier 125.00 z - -0 0 (t) @t - -T - - -98.93 364.39 103.46 370.22 @E - - -[0.07199 0.00000 0.00000 0.07199 98 364] @tm - - 0 O 0 @g - -0.00 0.00 0.00 1.00 k - -e - -/_Courier 125.00 z - -0 0 (0) @t - -T - - -98.93 310.46 103.97 314.42 @E - - -[0.07199 0.00000 0.00000 0.07199 98 310] @tm - - 0 O 0 @g - -0.00 0.00 0.00 1.00 k - -e - -/_Courier 125.00 z - -0 0 (x) @t - -T - - -125.50 331.78 146.66 338.76 @E - - -[0.07199 0.00000 0.00000 0.07199 125 332] @tm - - 0 O 0 @g - -0.00 0.00 0.00 1.00 k - -e - -/_Courier 125.00 z - -0 0 (\133) @t - -60 0 (0) @t - -120 0 (-) @t - -180 0 (9) @t - -240 0 (\135) @t - -T - - -134.50 367.78 155.66 374.76 @E - - -[0.07199 0.00000 0.00000 0.07199 134 368] @tm - - 0 O 0 @g - -0.00 0.00 0.00 1.00 k - -e - -/_Courier 125.00 z - -0 0 (\133) @t - -60 0 (1) @t - -120 0 (-) @t - -180 0 (9) @t - -240 0 (\135) @t - -T - - -134.28 416.74 166.46 424.73 @E - - -[0.07199 0.00000 0.00000 0.07199 134 418] @tm - - 0 O 0 @g - -0.00 0.00 0.00 1.00 k - -e - -/_Courier 125.00 z - -0 0 (\133) @t - -60 0 (a) @t - -120 0 (-) @t - -180 0 (z) @t - -240 0 (\135) @t - -300 0 (\134) @t - -360 0 (p) @t - -T - - -197.28 417.31 229.46 424.73 @E - - -[0.07199 0.00000 0.00000 0.07199 197 418] @tm - - 0 O 0 @g - -0.00 0.00 0.00 1.00 k - -e - -/_Courier 125.00 z - -0 0 (\133) @t - -60 0 (a) @t - -120 0 (-) @t - -180 0 (z) @t - -240 0 (\135) @t - -300 0 (\134) @t - -360 0 (r) @t - -T - - -251.28 417.31 283.18 424.73 @E - - -[0.07199 0.00000 0.00000 0.07199 251 418] @tm - - 0 O 0 @g - -0.00 0.00 0.00 1.00 k - -e - -/_Courier 125.00 z - -0 0 (\133) @t - -60 0 (a) @t - -120 0 (-) @t - -180 0 (z) @t - -240 0 (\135) @t - -300 0 (\134) @t - -360 0 (i) @t - -T - - -305.28 417.31 337.54 424.73 @E - - -[0.07199 0.00000 0.00000 0.07199 305 418] @tm - - 0 O 0 @g - -0.00 0.00 0.00 1.00 k - -e - -/_Courier 125.00 z - -0 0 (\133) @t - -60 0 (a) @t - -120 0 (-) @t - -180 0 (z) @t - -240 0 (\135) @t - -300 0 (\134) @t - -360 0 (n) @t - -T - - -359.28 417.31 391.39 424.73 @E - - -[0.07199 0.00000 0.00000 0.07199 359 418] @tm - - 0 O 0 @g - -0.00 0.00 0.00 1.00 k - -e - -/_Courier 125.00 z - -0 0 (\133) @t - -60 0 (a) @t - -120 0 (-) @t - -180 0 (z) @t - -240 0 (\135) @t - -300 0 (\134) @t - -360 0 (t) @t - -T - - -413.50 417.31 434.66 424.15 @E - - -[0.07199 0.00000 0.00000 0.07199 413 418] @tm - - 0 O 0 @g - -0.00 0.00 0.00 1.00 k - -e - -/_Courier 125.00 z - -0 0 (\133) @t - -60 0 (a) @t - -120 0 (-) @t - -180 0 (z) @t - -240 0 (\135) @t - -T - - -193.03 475.78 214.20 482.62 @E - - -[0.07199 0.00000 0.00000 0.07199 193 476] @tm - - 0 O 0 @g - -0.00 0.00 0.00 1.00 k - -e - -/_Courier 125.00 z - -0 0 (\133) @t - -60 0 (a) @t - -120 0 (-) @t - -180 0 (z) @t - -240 0 (\135) @t - -T - - -193.03 367.78 214.20 374.76 @E - - -[0.07199 0.00000 0.00000 0.07199 193 368] @tm - - 0 O 0 @g - -0.00 0.00 0.00 1.00 k - -e - -/_Courier 125.00 z - -0 0 (\133) @t - -60 0 (0) @t - -120 0 (-) @t - -180 0 (9) @t - -240 0 (\135) @t - -T - - -192.74 313.78 227.45 320.76 @E - - -[0.07199 0.00000 0.00000 0.07199 193 314] @tm - - 0 O 0 @g - -0.00 0.00 0.00 1.00 k - -e - -/_Courier 125.00 z - -0 0 (\133) @t - -60 0 (0) @t - -120 0 (-) @t - -180 0 (9) @t - -240 0 (a) @t - -300 0 (-) @t - -360 0 (f) @t - -420 0 (\135) @t - -T - - -52.70 412.56 107.21 429.41 @E - - -[0.07199 0.00000 0.00000 0.07199 53 422] @tm - - 0 O 0 @g - -0.00 0.00 0.00 1.00 k - -e - -/_Courier 125.00 z - -0 0 (\133) @t - -60 0 (\134) @t - -120 0 (0) @t - -180 0 (0) @t - -240 0 (0) @t - -300 0 (-) @t - -360 0 (\134) @t - -420 0 (3) @t - -480 0 (7) @t - -540 0 (7) @t - -600 0 (\135) @t - -660 0 (\134) @t - -0 -125 (\133) @t - -60 -125 (0) @t - -120 -125 (-) @t - -180 -125 (9) @t - -240 -125 (a) @t - -300 -125 (-) @t - -360 -125 (z) @t - -420 -125 (\135) @t - -T - - -54.00 391.54 94.54 401.83 @E - -0 J 0 j [] 0 d 0 R 0 @G - -0.00 0.00 0.00 1.00 K - -0 0.22 0.22 0.00 @w - - -54.00 396.65 m - -55.51 398.16 56.52 401.83 58.54 401.18 c - -62.50 399.89 63.29 392.18 67.54 392.18 c - -71.71 392.18 72.29 401.18 76.54 401.18 c - -80.71 401.18 81.50 393.55 85.54 392.18 c - -87.48 391.54 88.27 396.58 90.00 396.65 C - -93.17 396.65 L - -S - -@j - -0.00 0.00 0.00 1.00 K - -0.00 0.00 0.00 1.00 k - -0 @g - -0 @G - -[] 0 d 0 J 0 j - -0 R 0 O 0 1.01 1.01 0 @w - -90.00 399.17 m - -93.96 396.65 L - -90.00 394.13 L - -S - -@J - -@rs - -@rs - - - end - -%%EndDocument - @endspecial 108 1136 a Fr(Figure)21 b(3:)32 b(A)20 b(DF)-5 -b(A)21 b(for)f(the)i(sample)d(sp)q(eci\014cation)j(in)e(Figure)g(2.)39 -b(State)21 b(0)f(is)h(the)g(start)g(state.)108 1186 y(Accepting)d -(states)g(are)g(lab)q(eled)e(with)h(the)g(n)o(um)o(b)q(er)f(of)g(the)i -(rule)f(that)g(they)g(matc)o(h.)26 b(F)m(or)16 b(example,)108 -1236 y(state)i(10)d(accepts)j(rule)f(2.)25 b(T)m(ransitions)16 -b(di\013ering)g(only)g(b)o(y)g(lab)q(el)f(are)i(represen)o(ted)i(with)d -(the)h(same)108 1286 y(arc.)i(F)m(or)12 b(example,)f(state)j(0)e(has)h -(transitions)g(to)f(state)i(6)e(on)h(all)e(of)i(the)g(follo)o(wing)d(c) -o(haracters:)19 b Fm(a)p Fr(,)13 b Fl(:)7 b(:)g(:)n Fr(,)108 -1336 y Fm(o)p Fr(,)14 b Fm(q)p Fr(,)f Fl(:)7 b(:)g(:)12 -b Fm(z)p Fr(.)21 1469 y(RE2C)i(is)g(di\013eren)o(t)h(from)e(most)g -(other)i(scanner)h(generators)f(in)f(that)h(the)g(user)g(m)o(ust)f(pro) -o(vide)g(the)h(input)f(bu\013ering)-42 1519 y(mec)o(hanism)i(for)h(the) -i(scanner;)h(the)f(generated)g(co)q(de)g(simply)c(assumes)j(that)g(the) -g(user)h(has)f(de\014ned)h(three)g(p)q(oin)o(ters:)-42 -1569 y Fm(YYCURSOR)p Fr(,)9 b Fm(YYLIMIT)g Fr(and)h Fm(YYMARKER)p -Fr(,)e(and)i(a)h(routine)f Fm(YYFILL\()p Fl(n)p Fm(\))p -Fr(.)15 b(Before)d(executing)f(the)g(generated)h(co)q(de,)f -Fm(YYCURSOR)-42 1619 y Fr(and)h Fm(YYLIMIT)d Fr(m)o(ust)h(b)q(e)i(set)g -(to)g(p)q(oin)o(t)e(to)h(the)h(\014rst)g(and)f(one)h(past)f(the)h(last) -f(c)o(haracter)i(in)e(the)g(bu\013er,)i(resp)q(ectiv)o(ely)m(.)18 -b(After)-42 1669 y(a)f(tok)o(en)g(is)f(recognized,)i(and)f(b)q(efore)g -(an)o(y)f(action)g(is)h(executed,)i Fm(YYCURSOR)c Fr(is)h(set)i(to)e(p) -q(oin)o(t)g(to)h(just)g(past)g(the)g(tok)o(en.)-42 1718 -y Fm(YYFILL)d Fr(will)g(b)q(e)i(called)f(as)g(the)h(bu\013er)h(needs)f -(\014lling;)e(at)h(least)h Fl(n)f Fr(additional)e(input)i(c)o -(haracters)i(should)e(b)q(e)h(pro)o(vided.)-42 1768 y(When)g -Fm(YYFILL)d Fr(is)i(called,)f Fm(YYCURSOR)f Fr(will)h(p)q(oin)o(t)g(to) -h(the)g(next)h(c)o(haracter)g(to)f(b)q(e)g(scanned)h(and)f -Fm(YYMARKER)p Fr(,)d(if)i(set,)i(will)-42 1818 y(p)q(oin)o(t)f(to)f(a)g -(p)q(ossible)h(bac)o(ktrac)o(king)g(p)q(oin)o(t)f(in)g(the)h(bu\013er.) -21 b Fm(YYFILL)14 b Fr(m)o(ust)f(up)q(date)j Fm(YYLIMIT)p -Fr(,)c(and)j(p)q(ossibly)f Fm(YYCURSOR)-42 1868 y Fr(and)f -Fm(YYMARKER)f Fr(b)q(efore)h(returning.)18 b(T)o(ypically)12 -b Fm(YYCURSOR)p Fr(,)f Fm(YYLIMIT)p Fr(,)g Fm(YYMARKER)p -Fr(,)f(and)j Fm(YYFILL\()p Fl(n)p Fm(\))e Fr(will)h(b)q(e)h(de\014ned)h -(as)-42 1918 y(macros.)-42 2034 y Fj(2.1)56 b(Things)19 -b(That)g(RE2C)f(Do)r(esn't)g(Pro)n(vide)-42 2111 y Fr(RE2C)11 -b(do)q(esn't)h(pro)o(vide)f(man)o(y)e(things)j(a)o(v)n(ailable)d(in)h -(more)h(con)o(v)o(en)o(tional)f(scanner)j(generators)f(including)e -(default)h(rules,)-42 2160 y(end-of-input)17 b(pseudo-tok)o(ens,)i(and) -e(bu\013er)i(managemen)o(t)14 b(routines.)29 b(All)17 -b(of)f(these)j(m)o(ust)d(b)q(e)i(supplied)g(b)o(y)f(the)h(user.)-42 -2210 y(Rather)10 b(than)g(b)q(eing)f(a)g(handicap,)h(this)g(allo)o(ws)e -(RE2C-generated)i(scanners)h(to)f(b)q(e)g(tailored)f(to)g(almost)f(an)o -(y)h(en)o(vironmen)o(t.)-42 2260 y(F)m(or)k(example,)f(the)i(scanner)h -(de\014ned)f(in)f(Figure)h(1)f(compiles)f(in)o(to)h(32)g(b)o(ytes)h(of) -e(i486)h(co)q(de)h(\(using)f(W)m(atcom)e(C)j(9.5\);)e(the)-42 -2310 y(same)g(size)h(as)f(an)g(equiv)n(alen)o(t)g(hand-co)q(ded)h -(routine.)k(Most)c(other)g(scanner)g(generators)h(cannot)e(pro)q(duce)h -(scanners)h(that)-42 2360 y(are)k(comp)q(etitiv)o(e)e(with)g(hand-co)q -(ded)i(analyzers)f(in)f(this)h(case.)28 b(F)m(urther,)18 -b(it)f(is)g(not)f(o)o(v)o(erly)h(di\016cult)f(to)h(implem)o(en)o(t)e(a) --42 2409 y(more)c(traditional)f(scanner)j(using)f(RE2C.)e(F)m(or)h -(example,)g(App)q(endix)h(A)f(con)o(tains)h(the)g(supp)q(ort)h(co)q(de) -f(for)f(the)i(C)e(scanner)-42 2459 y(b)q(enc)o(hmark)o(ed)j(in)f(T)m -(able)g(1.)k(Note)d(that)g(this)f(co)q(de)h(allo)o(ws)f(for)g -(arbitrarily)f(long)h(con)o(tiguous)g(tok)o(ens)h(and)f(pro)o(vides)h -(line)-42 2509 y(and)g(column)e(n)o(um)o(b)q(er)i(information.)923 -2748 y(3)p eop -%%Page: 4 4 -4 3 bop -42 195 a Fs(3)67 b(Generating)23 b(Directly)h(Executable)g -(Scanners)-42 286 y Fr(As)14 b(demonstrated)g(b)o(y)f(GLA)g([7)o(])g -(generating)h(directly)g(executable)g(co)q(de)g(instead)g(of)f(tables)h -(can)f(result)i(in)e(m)o(uc)o(h)f(faster)-42 336 y(scanners.)20 -b(Ho)o(w)o(ev)o(er,)13 b(to)g(ac)o(hiev)o(e)h(this)f(sp)q(eed,)h -(GLA-generated)h(scanners)f(mak)o(e)e(some)g(assumptions)h(ab)q(out)g -(the)h(input)-42 386 y(and)i(place)g(certain)g(restrictions)g(on)g(tok) -o(ens)680 371 y Fq(4)699 386 y Fr(.)23 b(In)15 b(this)h(section)g(w)o -(e)g(will)e(sho)o(w)h(ho)o(w)g(to)h(generate)g(directly)g(executable) --42 435 y(scanners)22 b(whic)o(h)d(not)g(only)g(a)o(v)o(oid)f(suc)o(h)i -(restrictions,)h(but)f(are)g(also)e(faster)i(and)f(usually)g(smaller.) -33 b(The)19 b(approac)o(h)-42 485 y(presen)o(ted)f(here)f(has)f(the)g -(added)g(b)q(ene\014t)h(that)f(ev)o(en)g(faster)h(scanners)g(can)f(b)q -(e)g(easily)f(b)q(e)i(created,)g(at)e(the)i(exp)q(ense)g(of)-42 -535 y(increased)f(co)q(de)e(size,)h(b)o(y)e(using)h(a)f(tec)o(hnique)i -(akin)e(to)h(lo)q(op)f(unrolling.)-42 651 y Fj(3.1)56 -b(Constructing)19 b(a)g(DF)-6 b(A)-42 728 y Fr(The)15 -b(\014rst)g(step)g(in)f(generating)h(a)f(directly)g(executable)h -(scanner)h(is)e(to)g(construct)i(a)e(DF)-5 b(A)15 b(that)f(recognizes)i -(the)e(regular)-42 778 y(expressions)g(in)e(the)h(sp)q(eci\014cation.) -18 b(Figure)12 b(3)g(presen)o(ts)i(a)e(DF)-5 b(A)13 b(that)f -(recognizes)h(the)g(regular)f(expressions)i(in)e(Figure)g(2.)-42 -828 y(One)18 b(p)q(ossible)f(algorithm)e(for)h(constructing)i(suc)o(h)g -(a)e(DF)-5 b(A)18 b(can)f(b)q(e)g(found)g(in)f([1].)26 -b(Giv)o(en)16 b(suc)o(h)i(a)f(DF)-5 b(A,)16 b(the)i(task)f(of)p --42 864 780 2 v 5 891 a Fn(4)22 903 y Fo(These)11 b(assumptions)e(and)h -(restrictions)f(are)i(discussed)e(in)j(more)e(detail)g(in)h(Sections)e -(3.3.1)i(and)f(5.1.)923 2748 y Fr(4)p eop -%%Page: 5 5 -5 4 bop -42 195 a Fr(scanning)14 b(the)h(input)f(can)g(b)q(e)g -(expressed)i(as)e(follo)o(ws:)125 278 y(Starting)i(from)g(the)i(start)g -(state,)g(mo)o(v)o(e)e(from)f(state)j(to)f(state)h(along)e(transitions) -i(lab)q(eled)f(with)g(con-)62 328 y(secutiv)o(e)f(c)o(haracters)h(from) -c(the)i(input.)21 b(When)15 b(no)f(further)i(transitions)f(can)g(b)q(e) -g(made,)e(bac)o(ktrac)o(k)i(to)g(the)62 378 y(last)h(accepting)g -(state,)h(sa)o(y)f Fl(q)q Fr(.)24 b(The)16 b(path)g(to)f -Fl(q)i Fr(sp)q(ells)g(the)f(next)g(tok)o(en)g(and)g(the)g(rule)h(asso)q -(ciated)f(with)g Fl(q)62 428 y Fr(determines)e(the)h(co)q(de)g(to)e(b)q -(e)i(executed.)-42 511 y(As)d(a)e(result,)h(the)h(problem)d(of)h -(generating)h(scanners)h(essen)o(tially)f(reduces)h(to)f(the)g(problem) -e(of)h(generating)h(an)f(executable)-42 560 y(represen)o(tation)16 -b(for)d(a)h(DF)-5 b(A.)-42 677 y Fj(3.2)56 b(Generating)18 -b(Co)r(de)-42 753 y Fr(If)g(w)o(e)g(assume)g(that)g(the)h(input)f(is)g -(en)o(tirely)g(con)o(tained)g(in)g(a)f(single)h(bu\013er)h(then)g -(generating)f(co)q(de)h(for)f(the)g(DF)-5 b(A)19 b(is)-42 -803 y(relativ)o(ely)13 b(straigh)o(tforw)o(ard,)f(as)h(is)g -(illustrated)g(b)o(y)f(the)i(co)q(de)g(templates)e(in)h(Figure)g(4.)35 -b(Note)14 b(that)f(the)g(only)g(di\013erence)p 575 863 -717 2 v 575 912 2 50 v 856 897 a Fi(Pr)n(olo)n(gue)p -1290 912 V 575 914 717 2 v 575 964 2 50 v 720 949 a Fm(int)21 -b(yyaccept;)p 1290 964 V 575 1014 V 720 999 a(goto)g(M)p -Fi(start)p Fm(;)p 1290 1014 V 575 1063 V 720 1049 a(fin:)g(YYCURSOR)f -(=)i(YYMARKER;)p 1290 1063 V 575 1113 V 720 1098 a(switch\(yyaccept\))p -Fk(f)p 1290 1113 V 575 1163 V 807 1148 a Fl(:)7 b(:)g(:)p -1290 1163 V 575 1213 V 602 1198 a Fm(A)p Fl(n)p Fm(:)93 -b(case)21 b Fl(n)p Fm(:)43 b Fi(action)p Fr(\()p Fl(n)p -Fr(\))p Fm(;)p 1290 1213 V 575 1263 V 807 1248 a Fl(:)7 -b(:)g(:)p 1290 1263 V 575 1313 V 720 1298 a Fk(g)p 1290 -1313 V 575 1362 V 720 1347 a Fi(c)n(o)n(de)15 b(for)g(states)p -1290 1362 V 575 1364 717 2 v 204 1370 720 2 v 204 1420 -2 50 v 346 1405 a(Co)n(de)g(for)f(ac)n(c)n(epting)h(state)p -921 1420 V 204 1421 720 2 v 204 1471 2 50 v 230 1456 -a Fm(L)p Fl(q)q Fm(:)50 b(++YYCURSOR;)p 921 1471 V 204 -1521 V 344 1506 a(yyaccept)20 b(=)h Fi(rule)p Fr(\()p -Fl(q)q Fr(\))p Fm(;)p 921 1521 V 204 1571 V 344 1556 -a(YYMARKER)f(=)h(YYCURSOR;)p 921 1571 V 204 1621 V 230 -1606 a(M)p Fl(q)q Fm(:)50 b(switch\(*YYCURSO)o(R\))p -Fk(f)p 921 1621 V 204 1670 V 431 1655 a Fl(:)7 b(:)g(:)p -921 1670 V 204 1720 V 387 1705 a Fm(case)21 b Fl(c)p -Fm(:)43 b(goto)21 b(L)p Fi(goto)q Fr(\()p Fl(q)q(;)7 -b(c)p Fr(\))p Fm(;)p 921 1720 V 204 1770 V 431 1755 a -Fl(:)g(:)g(:)p 921 1770 V 204 1820 V 387 1805 a Fm(default:)42 -b(goto)21 b(fin;)p 921 1820 V 204 1870 V 344 1855 a Fk(g)p -921 1870 V 204 1871 720 2 v 944 1370 V 944 1420 2 50 -v 1045 1405 a Fi(Co)n(de)15 b(for)g(non-ac)n(c)n(epting)h(state)p -1662 1420 V 944 1421 720 2 v 944 1471 2 50 v 971 1456 -a Fm(L)p Fl(q)q Fm(:)49 b(++YYCURSOR;)p 1662 1471 V 944 -1521 V 1662 1521 V 944 1571 V 1662 1571 V 944 1621 V -971 1606 a(M)p Fl(q)q Fm(:)g(switch\(*YYCURSOR\))p Fk(f)p -1662 1621 V 944 1670 V 1172 1655 a Fl(:)7 b(:)g(:)p 1662 -1670 V 944 1720 V 1128 1705 a Fm(case)21 b Fl(c)p Fm(:)43 -b(goto)21 b(L)p Fi(goto)p Fr(\()p Fl(q)q(;)7 b(c)p Fr(\))p -Fm(;)p 1662 1720 V 944 1770 V 1172 1755 a Fl(:)g(:)g(:)p -1662 1770 V 944 1820 V 1128 1805 a Fm(default:)42 b(goto)21 -b(fin;)p 1662 1820 V 944 1870 V 1084 1855 a Fk(g)p 1662 -1870 V 944 1871 720 2 v 108 1946 a Fr(Figure)g(4:)30 -b(Directly)20 b(executable)h(scanner.)38 b(The)20 b(co)q(de)h -(generated)g(for)f(a)g(scanner)h(consists)g(of)f(a)108 -1996 y(prologue)12 b(follo)o(w)o(ed)f(b)o(y)h(co)q(de)i(for)e(eac)o(h)h -(state.)18 b Fi(start)11 b Fr(is)i(the)g(start)g(state.)18 -b Fi(action)p Fr(\()p Fl(n)p Fr(\))13 b(denotes)h(the)f(co)q(de)108 -2045 y(asso)q(ciated)h(with)e(rule)h Fl(n)p Fr(,)f Fi(goto)p -Fr(\()p Fl(q)q(;)7 b(c)p Fr(\))12 b(denotes)i(the)f(state)g(reac)o(hed) -h(from)d(state)i Fl(q)h Fr(along)d(the)i(transition)108 -2095 y(lab)q(eled)18 b(with)e Fl(c)h Fr(and)g Fi(rule)p -Fr(\()p Fl(q)q Fr(\))g(denotes)i(the)e(rule)h(asso)q(ciated)g(with)e -(state)i Fl(q)q Fr(.)28 b Fm(yyaccept)16 b Fr(is)h(used)h(to)108 -2145 y(sa)o(v)o(e)d(bac)o(ktrac)o(king)e(information.)i(The)g -Fm(M)p Fr(-lab)q(els)e(will)f(b)q(e)j(used)g(in)e(section)i(3.4.2.)-42 -2245 y(b)q(et)o(w)o(een)j(the)f(templates)f(for)g(accepting)g(and)h -(non-accepting)f(states)i(is)e(that)g(the)h(accepting)g(states)g(ha)o -(v)o(e)f(additional)-42 2294 y(co)q(de)e(to)f(sa)o(v)o(e)g(bac)o(ktrac) -o(king)f(information.)j(Figure)e(5)g(sho)o(ws)g(co)q(de)h(that)f(migh)o -(t)d(b)q(e)k(generated)g(for)f(state)g(1)g(in)f(Figure)h(3.)-42 -2460 y Fj(3.3)56 b(Bu\013ering)-42 2537 y Fr(Complications)13 -b(arise)i(when)g(the)g(input)g(is)g(not)f(con)o(tained)h(in)g(a)f -(single)g(bu\013er:)21 b(additional)13 b(co)q(de)j(is)e(needed)j(for)d -(\014lling)-42 2587 y(the)h(bu\013er)g(as)f(necessary)m(.)923 -2748 y(5)p eop -%%Page: 6 6 -6 5 bop 647 188 a Fm(L1:)49 b(++YYCURSOR;)762 238 y(yyaccept)20 -b(=)i(4;)762 288 y(YYMARKER)e(=)i(YYCURSOR;)647 338 y(M1:)49 -b(switch\(*YYCURSOR\))p Fk(f)806 388 y Fm(case)21 b('a':)43 -b(goto)21 b(L6;)849 438 y Fl(:)7 b(:)g(:)806 487 y Fm(case)21 -b('q':)43 b(goto)21 b(L6;)806 537 y(case)g('r':)43 b(goto)21 -b(L2;)806 587 y(case)g('s':)43 b(goto)21 b(L6;)849 637 -y Fl(:)7 b(:)g(:)806 687 y Fm(case)21 b('z':)43 b(goto)21 -b(L6;)806 736 y(default:)42 b(goto)21 b(fin;)762 786 -y Fk(g)690 876 y Fr(Figure)14 b(5:)k(Co)q(de)c(for)g(state)g(1.)505 -968 y Fm(L6:)50 b(++YYCURSOR;)621 1018 y(if\(YYLIMIT)19 -b(==)j(YYCURSOR\))e(YYFILL\(1\);)621 1068 y(yyaccept)g(=)h(4;)621 -1118 y(YYMARKER)f(=)h(YYCURSOR;)505 1168 y(M6:)50 b(switch\(*YYCURSO)o -(R\))p Fk(f)708 1217 y Fl(:)7 b(:)g(:)621 1267 y Fk(g)690 -1357 y Fr(Figure)14 b(6:)k(Co)q(de)c(for)g(state)g(6.)-42 -1489 y Fh(3.3.1)48 b(The)16 b(Sen)o(tinel)c(Metho)q(d)-42 -1566 y Fr(Most)20 b(scanner)g(generators)g(use)g(the)g(sen)o(tinel)f -(metho)q(d)g([1)o(])f(to)h(determine)g(when)h(the)f(bu\013er)i(needs)f -(\014lling.)32 b(In)19 b(the)-42 1616 y(simplest)d(case,)h(a)f(sym)o(b) -q(ol)f(that)h(do)q(es)i(not)e(app)q(ear)h(in)f(v)n(alid)f(input)h(is)g -(c)o(hosen)h(as)g(the)g(sen)o(tinel)g(c)o(haracter.)26 -b(An)17 b(extra)-42 1666 y(state)12 b(is)f(added)g(to)g(the)h(DF)-5 -b(A)11 b(and)g(transitions)f(to)h(this)g(state)h(on)f(the)g(sen)o -(tinel)h(sym)o(b)q(ol)d(are)i(added)g(to)g(the)h(original)d(states.)-42 -1715 y(When)16 b(the)g(DF)-5 b(A)16 b(arriv)o(es)f(in)g(this)h(new)f -(state)i(it)e(is)g(time)f(to)h(re\014ll)g(the)h(bu\013er.)23 -b(After)16 b(the)g(bu\013er)h(is)e(re\014lled,)g(scanning)-42 -1765 y(m)o(ust)h(b)q(e)g(restarted)i(in)e(the)h(previous)f(state.)26 -b(Unfortunately)m(,)16 b(this)g(is)g(not)g(p)q(ossible)g(with)g(the)h -(approac)o(h)f(outlined)g(in)-42 1815 y(Figure)j(4:)26 -b(the)19 b(necessary)h(information)15 b(is)j(simply)e(not)i(a)o(v)n -(ailable.)28 b(Co)q(de)19 b(could)f(b)q(e)g(added)h(to)f(eac)o(h)g -(state)h(to)f(sa)o(v)o(e)-42 1865 y(the)e(necessary)g(information)c -(but)j(this)g(w)o(ould)e(result)j(in)e(slo)o(w)o(er)h(and)f(larger)h -(scanners.)22 b(GLA)14 b(solv)o(es)h(this)f(problem)g(b)o(y)-42 -1915 y(ensuring)i(that)g(the)f(sen)o(tinel)h(only)f(gets)g(inserted)i -(b)q(et)o(w)o(een)g(tok)o(ens:)k(if)14 b(this)i(is)f(the)h(case,)g(the) -g(scanner)g(can)g(alw)o(a)o(ys)e(b)q(e)-42 1964 y(restarted)k(from)c -(the)j(start)f(state.)25 b(T)m(o)15 b(ensure)i(that)f(the)h(sen)o -(tinel)f(only)f(gets)i(inserted)g(b)q(et)o(w)o(een)g(tok)o(ens,)f(GLA)g -(allo)o(ws)-42 2014 y(newline)g(\(ASCI)q(I)g(LF\))g(c)o(haracters)h(to) -f(app)q(ear)g(only)f(at)g(the)h(end)g(of)f(a)h(tok)o(en)g(and)f -(disallo)o(ws)f(the)i(bu\013ering)g(of)f(partial)-42 -2064 y(lines)39 2049 y Fq(5)58 2064 y Fr(.)-42 2172 y -Fh(3.3.2)48 b(Bu\013ering)-42 2249 y Fr(RE2C-generated)17 -b(scanners)h(c)o(hec)o(k)f(if)e(the)i(bu\013er)g(needs)g(\014lling)e -(simply)f(b)o(y)i(comparing)e Fm(YYCURSOR)g Fr(and)i -Fm(YYLIMIT)p Fr(.)e(A)-42 2298 y(metho)q(d)d(inspired)g(b)o(y)g(the)h -(mec)o(hanism)d(used)j(to)f(guard)g(against)g(stac)o(k)g(o)o(v)o -(er\015o)o(w)g(in)g([17)o(])1365 2283 y Fq(6)1394 2298 -y Fr(is)g(used)i(to)e(reduce)h(the)g(amoun)o(t)-42 2348 -y(of)i(c)o(hec)o(king.)21 2398 y(Chec)o(ks)j(are)g(only)f(inserted)h -(in)f(certain)h(k)o(ey)f(states.)27 b(These)18 b(c)o(hec)o(ks)g(simply) -c(ensure)k(that)e(there)i(is)e(enough)h(input)-42 2448 -y(in)g(the)g(bu\013er)h(for)f(the)h(scan)f(to)g(pro)q(ceed)h(un)o(til)e -(the)i(next)f(k)o(ey)g(state.)28 b(F)m(or)17 b(example,)f(in)g(the)i -(DF)-5 b(A)17 b(of)f(Figure)h(3)g(it)g(is)-42 2498 y(su\016cien)o(t)e -(to)e(c)o(hec)o(k)i(that)f(there)h(are)g(at)e(least)h(6)g(c)o -(haracters)h(in)f(the)g(bu\013er)h(when)f(it)g(starts,)g(and)g(that)g -(there)h(is)f(at)f(least)-42 2547 y(one)i(c)o(haracter)g(in)e(the)i -(bu\013er)g(when)f(the)h(DF)-5 b(A)14 b(is)g(in)f(states)j(6,)d(8,)g -(or)h(10.)k(No)13 b(other)i(c)o(hec)o(ks)g(are)g(required.)k(The)14 -b(c)o(hec)o(ks)-42 2597 y(inserted)i(in)d(k)o(ey)h(states)h(are)f(of)g -(the)g(form)923 2748 y(6)p eop -%%Page: 7 7 -7 6 bop 62 195 a Fm(if\(\(YYLIMIT)20 b(-)h(YYCURSOR\))f(<)i -Fl(n)p Fm(\))f(YYFILL\()p Fl(n)p Fm(\);)-42 278 y Fr(where)c -Fl(n)f Fr(is)f(the)i(maxim)n(um)11 b(n)o(um)o(b)q(er)k(of)g(c)o -(haracters)i(that)f(can)g(b)q(e)g(consumed)g(b)q(efore)g(another)g(k)o -(ey)g(state)g(is)g(reac)o(hed.)-42 328 y(F)m(or)e(example,)e(Figure)i -(6)g(sho)o(ws)g(the)g(co)q(de)h(generated)g(for)f(state)h(6)e(in)h -(Figure)g(3.)21 378 y(A)h(set)g(of)f(k)o(ey)h(states)h(can)f(b)q(e)g -(determined)g(b)o(y)f(disco)o(v)o(ering)h(the)g(strongly-connected)h -(comp)q(onen)o(ts)e(\(SCCs\))i(of)e(the)-42 428 y(DF)-5 -b(A.)12 b(An)h(SCC)f(is)f(a)h(maxima)o(l)d(subset)k(of)f(states)h(suc)o -(h)g(that)f(there)h(exists)g(a)f(path)g(from)e(an)o(y)i(state)g(in)g -(the)h(subset)g(to)f(an)o(y)-42 477 y(other.)24 b(The)16 -b(set)g(of)f(k)o(ey)h(states)g(consists)h(of)e(all)f(of)h(the)h(states) -h(in)e(non-trivial)f(SCCs,)h(together)i(with)e(the)h(start)g(state.)-42 -527 y(Note)d(that)f(for)g(eac)o(h)g(SCC)g Fl(S)r Fr(,)h(w)o(e)f -(actually)g(only)f(ha)o(v)o(e)h(to)g(include)g(a)f(subset)j(of)d -(states)j(of)d Fl(S)k Fr(suc)o(h)e(that)f(when)g(the)h(subset)-42 -577 y(is)f(remo)o(v)o(ed,)f Fl(S)k Fr(b)q(ecomes)d(acyclic.)17 -b(Indeed,)c([17)o(])e(describ)q(es)j(a)e(simple)e(heuristic)j(for)e(c)o -(ho)q(osing)h(suc)o(h)g(a)g(subset.)18 b(Ho)o(w)o(ev)o(er,)-42 -627 y(since)c(in)f(practice)h(most)d(of)i(the)g(\(non-trivial\))f(SCCs) -h(encoun)o(tered)i(will)c(consist)j(of)e(a)h(single)g(state)g(the)h -(curren)o(t)g(v)o(ersion)-42 677 y(of)f(RE2C)g(simply)e(includes)j(all) -d(states)k(in)e(non-trivial)e(SCCs)943 662 y Fq(7)962 -677 y Fr(.)18 b(An)13 b(algorithm)e(giv)o(en)h(in)h([3)o(])g(w)o(as)g -(used)h(to)f(compute)g(the)-42 726 y(SCCs.)-42 843 y -Fj(3.4)56 b(Optimizations)-42 919 y Fr(Ev)o(en)17 b(go)q(o)q(d)f -(optimizing)e(C)i(compilers)f(can)i(b)q(e)g(coaxed)f(in)o(to)g -(generating)g(m)o(uc)o(h)f(smaller)g(and)h(sligh)o(tly)f(faster)i(co)q -(de)g(if)-42 969 y(some)c(transformations)g(are)h(\014rst)h(applied)e -(to)h(the)g(generated)i(co)q(de.)-42 1077 y Fh(3.4.1)48 -b(Eliminatin)o(g)13 b(Bac)o(ktrac)o(king)-42 1154 y Fr(Consider)19 -b(state)g(1)f(in)g(the)h(DF)-5 b(A)18 b(in)g(Figure)g(3.)31 -b(Note)19 b(that)f(since)h(all)e(of)h(the)h(transitions)f(from)e(state) -j(1)f(reac)o(h)h(only)-42 1203 y(accepting)f(states,)h(bac)o(ktrac)o -(king)f(information)c(do)q(es)k(not)f(need)i(to)e(b)q(e)h(sa)o(v)o(ed)g -(if)e(the)i(co)q(de)g(for)f(the)h Fm(default)e Fr(case)j(is)-42 -1253 y(c)o(hanged)h(to)e(go)g(directly)h(to)g(the)g(co)q(de)h(asso)q -(ciated)f(with)f(state)i(1.)32 b(The)19 b(result)h(of)e(this)h -(optimization)d(is)i(sho)o(wn)h(in)-42 1303 y(Figure)d(7.)48 -b(More)17 b(generally)m(,)e(this)h(optimization)d(can)j(b)q(e)g -(applied)f(to)h(all)f(accepting)h(states)h(whic)o(h)f(ha)o(v)o(e)g -(transitions)647 1396 y Fm(L1:)49 b(++YYCURSOR;)647 1446 -y(M1:)g(switch\(*YYCURSOR\))p Fk(f)806 1495 y Fm(case)21 -b('a':)43 b(goto)21 b(L6;)849 1545 y Fl(:)7 b(:)g(:)806 -1595 y Fm(case)21 b('q':)43 b(goto)21 b(L6;)806 1645 -y(case)g('r':)43 b(goto)21 b(L2;)806 1695 y(case)g('s':)43 -b(goto)21 b(L6;)849 1744 y Fl(:)7 b(:)g(:)806 1794 y -Fm(case)21 b('z':)43 b(goto)21 b(L6;)806 1844 y(default:)42 -b(goto)21 b(A4;)762 1894 y Fk(g)419 1983 y Fr(Figure)14 -b(7:)k(Co)q(de)c(for)g(state)g(1)g(with)g(bac)o(ktrac)o(king)f -(eliminated.)-42 2083 y(only)h(to)f(accepting)i(states.)-42 -2191 y Fh(3.4.2)48 b(Optimizing)12 b Fm(switch)p Fh(es)-42 -2268 y Fr(Most)21 b(C)g(compilers)f(will)f(generate)j(either)f(a)g -(jump)e(table)h(or)h(a)g(set)g(of)f Fm(if)g Fr(statemen)o(ts)h(for)g(a) -f Fm(switch)g Fr(statemen)o(t)-42 2317 y(dep)q(ending)e(on)e(the)i -(distribution)e(of)g(the)i Fm(case)e Fr(lab)q(els.)26 -b(In)17 b(man)o(y)e(compilers)h(the)h(decision)g(as)g(to)g(whic)o(h)g -(metho)q(d)f(to)-42 2367 y(use)j(is)f(biased)f(to)o(w)o(ards)h -(generating)g(jump)e(tables)i(since)g(in)g(most)e(cases)j(this)f -(results)h(in)e(faster)h(alb)q(eit)f(larger)h(co)q(de.)-42 -2417 y(Ho)o(w)o(ev)o(er,)c(exp)q(erience)h(with)e(directly)h -(executable)h(scanners)g(has)e(sho)o(wn,)g(that)g(replacing)h(man)o(y)d -(of)i(these)h(jump)e(tables)p -42 2452 780 2 v 5 2478 -a Fn(5)22 2490 y Fo(If)f(the)g(input)f(con)o(tains)g(no)h(newlines,)f -(a)h(GLA)h(scanner)e(will)h(attempt)f(to)h(bu\013er)f(the)g(en)o(tire)g -(input)h(stream.)5 2518 y Fn(6)22 2530 y Fo(The)h(problem)d(of)i -(detecting)e(stac)o(k)h(o)o(v)o(er\015o)o(w)g(in)i(LR)f(parsers)f(is)i -(probably)d(b)q(est)i(left)f(to)h(hardw)o(are)f(mec)o(hanisms)f([12)o -(].)5 2558 y Fn(7)22 2569 y Fo(It)i(should)f(b)q(e)h(noted)f(that)g -(\014nding)f(the)i(minimal)e(set)i(of)g(states)f(to)h(remo)o(v)o(e)f -(from)g(an)h(SCC)h(in)f(order)f(to)h(render)f(it)h(acyclic)e(is)j -(equiv)n(alen)o(t)-42 2609 y(to)g(the)e(FEEDBA)o(CK)j(VER)m(TEX)h(SET)d -(problem)f(whic)o(h)h(is)g(NP-complete)e([6].)923 2748 -y Fr(7)p eop -%%Page: 8 8 -8 7 bop -42 195 a Fr(with)12 b Fm(if)g Fr(statemen)o(ts)g(results)h(in) -f(scanners)i(whic)o(h)d(are)i(m)o(uc)o(h)e(smaller,)f(and)i -(surprisingly)m(,)f(in)h(some)f(cases)i(sligh)o(tly)e(faster)-42 -245 y(as)h(w)o(ell)78 230 y Fq(8)96 245 y Fr(.)17 b(As)c(a)e(result,)h -(the)h(capabilit)o(y)d(of)h(replacing)g(a)h Fm(switch)e -Fr(statemen)o(t)i(with)f Fm(if)g Fr(statemen)o(ts)h(w)o(as)g(added)g -(to)f(RE2C.)21 295 y(RE2C)j(bases)j(its)e(decision)g(on)g(whether)i(to) -e(generate)i(a)d Fm(switch)g Fr(statemen)o(t)i(or)f(to)g(replace)h(it)f -(with)g Fm(if)p Fr(s)g(solely)g(on)-42 345 y(the)j(densit)o(y)162 -329 y Fq(9)198 345 y Fr(of)f(the)g Fm(switch)f Fr(statemen)o(t.)27 -b(It)18 b(is)e(surprising)i(that)f(suc)o(h)h(a)e(simple)g(heuristic)i -(w)o(orks)f(w)o(ell.)27 b(F)m(or)16 b(more)-42 394 y(esoteric)h -(applications)e(in)g(whic)o(h)h(the)g(input)f(alphab)q(et)h(is)f(not)h -(a)f(simple)f(in)o(terv)n(al)h(RE2C)g(has)g(the)i(adv)n(an)o(tage)d(in) -h(that)-42 444 y(there)f(is)e(no)g(pro)o(vision)f(for)h(don't)f(care)i -(en)o(tries)g(in)f(a)g Fm(switch)f Fr(statemen)o(t:)17 -b(if)11 b(no)h Fm(case)f Fr(matc)o(hes)h(none)g(of)f(the)i(statemen)o -(ts)-42 494 y(in)i(the)g Fm(switch)e Fr(m)o(ust)g(b)q(e)i(executed.)22 -b(Ho)o(w)o(ev)o(er,)15 b(for)f(the)h(examples)f(in)g(T)m(able)f(1)h -(this)h(is)f(not)h(so:)k(RE2C)14 b(simply)f(do)q(es)i(a)-42 -544 y(b)q(etter)j(job)d(of)g(generating)g(co)q(de)i(for)e -Fm(switch)f Fr(statemen)o(ts)i(than)g(the)g(compiler.)21 -b([18)o(],)15 b([11)o(],)g(and)h([2)o(])f(also)g(address)i(the)-42 -594 y(problem)c(of)g(generating)h(go)q(o)q(d)g(co)q(de)h(for)e -Fm(switch)g Fr(statemen)o(ts.)-42 702 y Fh(Replacing)18 -b Fm(switch)p Fh(es)g(with)g Fm(if)p Fh(s)41 b Fr(When)17 -b(replacing)g(a)f Fm(switch)g Fr(statemen)o(t)h(with)g -Fm(if)f Fr(statemen)o(ts,)i(it)f(is)f(useful)i(to)-42 -751 y(sort)g(the)f Fm(case)p Fr(s)f(b)o(y)h(lab)q(el)f(and)g(then)h -(group)g(them)f(according)h(to)f(rule)h(in)o(to)f(subranges,)i(as)f -(illustrated)f(in)g(Figure)h(8.)-42 801 y(RE2C)c(replaces)h(a)f -Fm(switch)f Fr(with)g(either)i(a)f(linear)f(or)h(binary)g(searc)o(h,)h -(dep)q(ending)f(on)g(the)h(n)o(um)o(b)q(er)e(of)g(subranges)i(in)f(the) -467 894 y Fm(switch\(*YYCURSOR\))p Fk(f)511 944 y Fm(case)21 -b('\\)o(000':)49 b Fl(:)7 b(:)g(:)20 b Fm(case)h('/':)115 -b(goto)21 b(L11;)576 994 y(case)g('0':)431 b(goto)21 -b(L7;)576 1043 y(case)g('1':)49 b Fl(:)7 b(:)g(:)20 b -Fm(case)h('9':)115 b(goto)21 b(L8;)576 1093 y(case)g(':':)49 -b Fl(:)7 b(:)g(:)20 b Fm(case)h('`':)115 b(goto)21 b(L11;)576 -1143 y(case)g('a':)49 b Fl(:)7 b(:)g(:)20 b Fm(case)h('o':)115 -b(goto)21 b(L6;)576 1193 y(case)g('p':)431 b(goto)21 -b(L1;)576 1243 y(case)g('r':)49 b Fl(:)7 b(:)g(:)20 b -Fm(case)h('z':)115 b(goto)21 b(L6;)577 1292 y(case)g(')p -Fk(f)p Fm(':)49 b Fl(:)7 b(:)g(:)20 b Fm(case)h('\\377':)49 -b(goto)21 b(L11;)467 1342 y Fk(g)672 1432 y Fr(Figure)14 -b(8:)j Fm(switch)c Fr(for)g(state)i(0.)-42 1531 y Fm(switch)p -Fr(.)i(If)d(there)h(are)f(only)f(a)h(few)g(subranges)h(a)e(linear)h -(searc)o(h)h(is)f(generated;)g(otherwise,)h(a)e(binary)h(searc)o(h)h -(is)f(used.)21 1581 y(Figure)f(9)f(and)g(Figure)h(10)f(sho)o(w)h -(linear)f(and)h(binary)f(searc)o(hes,)i(resp)q(ectiv)o(ely)m(,)g(that)f -(could)f(b)q(e)h(used)h(to)e(replace)52 b(the)607 1674 -y Fm(if\(*YYCURSOR)19 b(<=)i('/'\))g(goto)g(L11;)607 -1724 y(if\(*YYCURSOR)e(<=)i('0'\))g(goto)g(L7;)607 1773 -y(if\(*YYCURSOR)e(<=)i('9'\))g(goto)g(L8;)607 1823 y(if\(*YYCURSOR)e -(<=)i('`'\))g(goto)g(L11;)607 1873 y(if\(*YYCURSOR)e(==)i('p'\))g(goto) -g(L1;)607 1923 y(if\(*YYCURSOR)e(<=)i('z'\))g(goto)g(L6;)607 -1973 y(goto)g(L11;)477 2062 y Fr(Figure)14 b(9:)k(Linear)c(lo)q(okup)f -(co)q(de)h(sequence)i(for)e(state)h(0.)p -42 2147 780 -2 v 5 2173 a Fn(8)22 2185 y Fo(See)c(T)m(able)g(1)g(for)g(examples.)5 -2213 y Fn(9)22 2225 y Fo(The)h(n)o(um)o(b)q(er)d(of)i(distinct)f -(subranges)f(divided)g(b)o(y)i(the)g(total)f(n)o(um)o(b)q(er)g(of)h -Fg(case)p Fo(s.)923 2748 y Fr(8)p eop -%%Page: 9 9 -9 8 bop 563 188 a Fm(if\(*YYCURSOR)19 b(<=)j('`'\))p -Fk(f)650 238 y Fm(if\(*YYCURSOR)d(<=)j('/'\))f(goto)g(L11;)650 -288 y(if\(*YYCURSOR)e(<=)j('0'\))f(goto)g(L7;)650 338 -y(if\(*YYCURSOR)e(<=)j('9'\))f(goto)g(L8;)650 388 y(goto)g(L11;)563 -438 y Fk(g)h Fm(else)f Fk(f)650 487 y Fm(if\(*YYCURSOR)e(==)j('p'\))f -(goto)g(L1;)650 537 y(if\(*YYCURSOR)e(<=)j('z'\))f(goto)g(L6;)650 -587 y(goto)g(L11;)563 637 y Fk(g)463 726 y Fr(Figure)14 -b(10:)k(Binary)13 b(lo)q(okup)h(co)q(de)g(sequence)i(for)e(state)g(0.) --42 826 y Fm(switch)e Fr(in)g(Figure)h(8.)k(Note)d(in)e(particular)g -(the)i(comparison)d(for)i(the)g(\\)p Fm(p)p Fr(")f(in)g(Figure)h(9.)k -(This)c(optimization)d(eliminates)-42 876 y(a)k(comparison)f(eac)o(h)i -(time)d(it)i(is)g(applied.)k(Also)c(note)h(that)f(no)g(comparisons)f -(are)i(required)g(at)f(the)g(top)g(and)g(b)q(ottom)f(of)-42 -925 y(the)i(range.)-42 1033 y Fh(Simplifyi)o(ng)j Fm(switch)p -Fh(es)40 b Fr(As)19 b(a)f(general)g(rule,)i(b)q(etter)g(replacemen)o(t) -e(co)q(de)h(can)g(b)q(e)g(generated)h(for)e(a)g Fm(switch)f -Fr(if)h(it)-42 1083 y(con)o(tains)g(few)o(er)g(subranges.)31 -b(One)18 b(w)o(a)o(y)f(of)g(reducing)h(the)g(n)o(um)o(b)q(er)f(of)g -(subranges)i(in)e(a)g Fm(switch)p Fr(,)g(at)g(the)i(exp)q(ense)g(of)-42 -1133 y(some)c(sp)q(eed,)j(is)d(to)h(lo)q(cate)g(a)f Fi(b)n(ase)h -Fm(switch)f Fr(whic)o(h)g(is)h(v)o(ery)g(similar)d(and)j(then)g -(replace)h(the)f(co)q(de)h(for)e(all)f(cases)k(whic)o(h)-42 -1183 y(app)q(ear)d(iden)o(tically)d(in)i(the)g(base)h -Fm(switch)d Fr(with)i(a)f Fm(goto)g Fr(to)h(\(the)h(co)q(de)f -(generated)i(for\))d(the)i(base)f Fm(switch)p Fr(.)j(RE2C)c(uses)-42 -1233 y(this)18 b(optimization)d(to)i(go)q(o)q(d)g(adv)n(an)o(tage)g -(when)h(generating)g(co)q(de)g(in)f(the)h(transitions)g(of)f(states)i -(used)f(for)f(matc)o(hing)-42 1282 y(k)o(eyw)o(ords.)i(F)m(or)12 -b(example,)f(note)j(that)f(the)g Fm(switch)p Fr(es)f(for)h(states)h(1)f -(through)g(4)f(di\013er)h(from)f(the)h Fm(switch)f Fr(of)g(state)i(6)e -(only)-42 1332 y(on)i(\\)p Fm(r)p Fr(",)f(\\)p Fm(i)p -Fr(",)g(\\)p Fm(n)p Fr(",)f(and)i(\\)p Fm(t)p Fr(",)f(resp)q(ectiv)o -(ely)m(.)19 b(Figure)14 b(11)f(sho)o(ws)h(the)h(co)q(de)g(generated)g -(for)e(these)j(states.)37 b(Another)15 b(w)o(a)o(y)560 -1425 y Fm(L1:)49 b(++YYCURSOR;)560 1475 y(M1:)g(if\(*YYCURSOR)19 -b(!=)j('r'\))f(goto)g(M6;)560 1525 y(L2:)49 b(++YYCURSOR;)560 -1574 y(M2:)g(if\(*YYCURSOR)19 b(!=)j('i'\))f(goto)g(M6;)560 -1624 y(L3:)49 b(++YYCURSOR;)560 1674 y(M3:)g(if\(*YYCURSOR)19 -b(!=)j('n'\))f(goto)g(M6;)560 1724 y(L4:)49 b(++YYCURSOR;)560 -1774 y(M4:)g(if\(*YYCURSOR)19 b(!=)j('t'\))f(goto)g(M6;)675 -1824 y(goto)g(L5;)444 1913 y Fr(Figure)14 b(11:)j(Co)q(de)d(for)g -(states)h(1{4)e(after)h(all)f(optimizations.)-42 2013 -y(of)h(implemen)o(ting)d(this)j(optimization)e(is)i(to)g(construct)i(a) -e(tunnel)g(automaton)f([9)o(])h(from)e(the)j(DF)-5 b(A,)14 -b(and)g(then)h(generate)-42 2062 y(co)q(de)g(from)d(the)j(tunnel)f -(automaton.)-42 2170 y Fh(Common)i(Sub)q(expression)d(Eliminatio)o(n)38 -b Fr(Man)o(y)14 b(compilers)g(will)f(miss)g(the)i(fact)f(that)h -Fm(*YYCURSOR)d Fr(in)i(Figures)h(9)-42 2220 y(and)e(10)f(should)h(b)q -(e)g(loaded)f(in)o(to)g(a)h(register.)19 b(Most)13 b(can)f(b)q(e)i -(coaxed)f(to)f(do)h(so)g(b)o(y)f(\014rst)i(assigning)e -Fm(*YYCURSOR)e Fr(to)j(a)f(lo)q(cal)-42 2270 y(v)n(ariable.)-42 -2407 y Fs(4)67 b(Exp)r(erimen)n(tal)25 b(Results)-42 -2498 y Fr(T)m(able)16 b(1)h(compares)f(t)o(w)o(o)g(RE2C-generated)h(C)g -(scanners)h(with)e(the)i(\(hand-co)q(ded\))f(lcc)g(scanner)h([5)o(])e -(and)h(comparable)-42 2548 y(GLA-)f(and)g(\015ex-generated)i(scanners)f -(on)f(a)g(v)n(ariet)o(y)g(of)f(platforms.)48 b(It)16 -b(rep)q(orts)i(the)e(times)f(in)h(seconds)i(required)f(b)o(y)-42 -2598 y(the)e(v)n(arious)e(scanners)i(to)e(scan)h(ab)q(out)g(170,000)e -(lines)h(of)g(C)h(source.)19 b(The)14 b(5,607,820)d(b)o(yte)j(source)g -(\014le)g(used)h(essen)o(tially)923 2748 y(9)p eop -%%Page: 10 10 -10 9 bop 707 452 a Fr(time)438 b(space)328 502 y(program)p -516 517 2 50 v 82 w(user)83 b(sys)76 b(total)p 975 517 -V 82 w(text)j(data)d(bss)f(total)p 292 519 1284 2 v 860 -554 a Fi(R4000)16 b(/)f(gc)n(c2.3.3)h(-O)p 292 570 V -317 605 a Fr(\015ex)e(-Cem)p 516 620 2 50 v 71 w(10.36)p -683 620 V 50 w(0.87)p 808 620 V 71 w(11.23)p 975 620 -V 71 w(5200)p 1130 620 V 71 w(4192)p 1285 620 V 92 w(48)p -1420 620 V 72 w(9440)357 655 y(\015ex)h(-Cf)p 516 670 -V 92 w(5.44)p 683 670 V 50 w(0.72)p 808 670 V 92 w(6.16)p -975 670 V 71 w(4688)p 1130 670 V 51 w(64384)p 1285 670 -V 91 w(48)p 1420 670 V 51 w(69120)443 705 y(lcc)p 516 -720 V 93 w(3.19)p 683 720 V 50 w(0.67)p 808 720 V 92 -w(3.86)p 975 720 V 71 w(7328)p 1130 720 V 71 w(1216)p -1285 720 V 51 w(8256)p 1420 720 V 50 w(16800)438 754 -y(gla)p 516 769 V 92 w(2.89)p 683 769 V 50 w(0.63)p 808 -769 V 92 w(3.52)p 975 769 V 50 w(11552)p 1130 769 V 71 -w(3056)p 1285 769 V 72 w(144)p 1420 769 V 50 w(14752)417 -804 y(re2c)p 516 819 V 94 w(2.54)p 683 819 V 50 w(0.68)p -808 819 V 92 w(3.22)p 975 819 V 50 w(13264)p 1130 819 -V 92 w(512)p 1285 819 V 113 w(0)p 1420 819 V 51 w(13776)373 -854 y(re2c)g(-s)p 516 869 V 93 w(2.38)p 683 869 V 50 -w(0.67)p 808 869 V 92 w(3.05)p 975 869 V 50 w(11056)p -1130 869 V 71 w(4528)p 1285 869 V 113 w(0)p 1420 869 -V 51 w(15584)p 292 871 1284 2 v 737 906 a Fi(R4000)h(/)f(c)n(c2.11.2)g -(-O)g(-Olimit)e(5000)p 292 922 V 317 957 a Fr(\015ex)h(-Cem)p -516 972 2 50 v 92 w(9.97)p 683 972 V 50 w(0.89)p 808 -972 V 71 w(10.86)p 975 972 V 71 w(4704)p 1130 972 V 71 -w(4240)p 1285 972 V 92 w(32)p 1420 972 V 72 w(8976)357 -1007 y(\015ex)h(-Cf)p 516 1022 V 92 w(6.19)p 683 1022 -V 50 w(0.72)p 808 1022 V 92 w(6.91)p 975 1022 V 71 w(4256)p -1130 1022 V 51 w(64432)p 1285 1022 V 91 w(32)p 1420 1022 -V 51 w(68720)443 1057 y(lcc)p 516 1072 V 93 w(2.74)p -683 1072 V 50 w(0.72)p 808 1072 V 92 w(3.46)p 975 1072 -V 71 w(9664)p 1130 1072 V 92 w(864)p 1285 1072 V 51 w(8256)p -1420 1072 V 50 w(18784)438 1106 y(gla)p 516 1121 V 92 -w(2.46)p 683 1121 V 50 w(0.69)p 808 1121 V 92 w(3.15)p -975 1121 V 50 w(19232)p 1130 1121 V 71 w(2992)p 1285 -1121 V 72 w(128)p 1420 1121 V 50 w(22352)417 1156 y(re2c)p -516 1171 V 94 w(2.97)p 683 1171 V 50 w(0.63)p 808 1171 -V 92 w(3.60)p 975 1171 V 50 w(15088)p 1130 1171 V 92 -w(528)p 1285 1171 V 113 w(0)p 1420 1171 V 51 w(15616)373 -1206 y(re2c)g(-s)p 516 1221 V 93 w(2.94)p 683 1221 V -50 w(0.61)p 808 1221 V 92 w(3.55)p 975 1221 V 50 w(16080)p -1130 1221 V 51 w(11808)p 1285 1221 V 112 w(0)p 1420 1221 -V 51 w(27888)p 292 1223 1284 2 v 849 1258 a Fi(SP)m(AR)o(C)g(/)g(gc)n -(c2.3.3)g(-O)p 292 1274 V 317 1309 a Fr(\015ex)f(-Cem)p -516 1324 2 50 v 71 w(16.03)p 683 1324 V 50 w(2.78)p 808 -1324 V 71 w(18.81)p 975 1324 V 71 w(8992)p 1130 1324 -V 113 w(24)p 1285 1324 V 92 w(48)p 1420 1324 V 72 w(9064)357 -1359 y(\015ex)h(-Cf)p 516 1374 V 92 w(7.84)p 683 1374 -V 50 w(2.69)p 808 1374 V 71 w(10.53)p 975 1374 V 71 w(6560)p -1130 1374 V 51 w(62232)p 1285 1374 V 91 w(48)p 1420 1374 -V 51 w(68840)443 1409 y(lcc)p 516 1424 V 93 w(4.46)p -683 1424 V 50 w(2.01)p 808 1424 V 92 w(6.47)p 975 1424 -V 71 w(7800)p 1130 1424 V 92 w(384)p 1285 1424 V 51 w(8256)p -1420 1424 V 50 w(16440)438 1459 y(gla)p 516 1473 V 92 -w(4.08)p 683 1473 V 50 w(1.56)p 808 1473 V 92 w(5.64)p -975 1473 V 50 w(10864)p 1130 1473 V 71 w(2168)p 1285 -1473 V 72 w(136)p 1420 1473 V 50 w(13168)417 1508 y(re2c)p -516 1523 V 94 w(3.67)p 683 1523 V 50 w(1.76)p 808 1523 -V 92 w(5.43)p 975 1523 V 50 w(13552)p 1130 1523 V 134 -w(0)p 1285 1523 V 113 w(0)p 1420 1523 V 51 w(13552)373 -1558 y(re2c)g(-s)p 516 1573 V 93 w(3.48)p 683 1573 V -50 w(1.70)p 808 1573 V 92 w(5.18)p 975 1573 V 50 w(15464)p -1130 1573 V 134 w(0)p 1285 1573 V 113 w(0)p 1420 1573 -V 51 w(15464)p 292 1575 1284 2 v 880 1610 a Fi(i486)g(/)g(gc)n(c2.4.5)g -(-O)p 292 1626 V 317 1661 a Fr(\015ex)f(-Cem)p 516 1676 -2 50 v 71 w(21.86)p 683 1676 V 50 w(1.26)p 808 1676 V -71 w(23.12)p 975 1676 V 71 w(8536)p 1130 1676 V 113 w(20)p -1285 1676 V 92 w(24)p 1420 1676 V 72 w(8580)357 1711 -y(\015ex)h(-Cf)p 516 1726 V 92 w(9.12)p 683 1726 V 50 -w(1.18)p 808 1726 V 71 w(10.30)p 975 1726 V 71 w(6200)p -1130 1726 V 51 w(62228)p 1285 1726 V 91 w(24)p 1420 1726 -V 51 w(68452)443 1761 y(lcc)p 516 1776 V 93 w(5.45)p -683 1776 V 50 w(1.22)p 808 1776 V 92 w(6.67)p 975 1776 -V 71 w(5924)p 1130 1776 V 92 w(384)p 1285 1776 V 51 w(8240)p -1420 1776 V 50 w(14548)438 1811 y(gla)p 516 1825 V 92 -w(5.11)p 683 1825 V 50 w(1.18)p 808 1825 V 92 w(6.29)p -975 1825 V 50 w(15496)p 1130 1825 V 71 w(2144)p 1285 -1825 V 72 w(108)p 1420 1825 V 50 w(17748)417 1860 y(re2c)p -516 1875 V 94 w(4.73)p 683 1875 V 50 w(1.13)p 808 1875 -V 92 w(5.86)p 975 1875 V 71 w(9800)p 1130 1875 V 134 -w(0)p 1285 1875 V 113 w(0)p 1420 1875 V 72 w(9800)373 -1910 y(re2c)g(-s)p 516 1925 V 93 w(4.85)p 683 1925 V -50 w(1.17)p 808 1925 V 92 w(6.02)p 975 1925 V 50 w(12968)p -1130 1925 V 134 w(0)p 1285 1925 V 113 w(0)p 1420 1925 -V 51 w(12968)p 292 1927 1284 2 v 871 1962 a Fi(68020)h(/)f(gc)n(c1.40)h -(-O)p 292 1978 V 317 2013 a Fr(\015ex)e(-Cem)p 516 2028 -2 50 v 50 w(117.37)p 683 2028 V 50 w(5.89)p 808 2028 -V 51 w(123.26)p 975 2028 V 70 w(7700)p 1130 2028 V 113 -w(20)p 1285 2028 V 92 w(22)p 1420 2028 V 72 w(7742)357 -2063 y(\015ex)h(-Cf)p 516 2078 V 71 w(50.93)p 683 2078 -V 50 w(5.27)p 808 2078 V 71 w(56.20)p 975 2078 V 71 w(5388)p -1130 2078 V 51 w(62228)p 1285 2078 V 91 w(22)p 1420 2078 -V 51 w(67638)443 2113 y(lcc)p 516 2128 V 72 w(33.28)p -683 2128 V 50 w(6.28)p 808 2128 V 71 w(39.56)p 975 2128 -V 71 w(4956)p 1130 2128 V 92 w(384)p 1285 2128 V 51 w(8236)p -1420 2128 V 50 w(13576)438 2163 y(gla)p 516 2177 V 71 -w(33.80)p 683 2177 V 50 w(4.20)p 808 2177 V 71 w(38.00)p -975 2177 V 50 w(13904)p 1130 2177 V 71 w(2144)p 1285 -2177 V 72 w(106)p 1420 2177 V 50 w(16154)417 2212 y(re2c)p -516 2227 V 73 w(28.92)p 683 2227 V 50 w(2.91)p 808 2227 -V 71 w(31.83)p 975 2227 V 71 w(8556)p 1130 2227 V 134 -w(0)p 1285 2227 V 113 w(0)p 1420 2227 V 72 w(8556)373 -2262 y(re2c)g(-s)p 516 2277 V 72 w(30.72)p 683 2277 V -50 w(3.19)p 808 2277 V 71 w(33.91)p 975 2277 V 71 w(9856)p -1130 2277 V 134 w(0)p 1285 2277 V 113 w(0)p 1420 2277 -V 72 w(9856)513 2352 y(T)m(able)e(1:)18 b(Comparison)12 -b(of)h(generated)j(C)d(scanners.)913 2748 y(10)p eop -%%Page: 11 11 -11 10 bop -42 195 a Fr(consists)15 b(of)d(10)h(copies)h(of)e(the)i -(source)h(to)e(James)f(Clark's)h(SGML)f(parser,)i(sgmls)1263 -180 y Fq(10)1297 195 y Fr(.)k(The)c(times)e(rep)q(orted)i(are)g(a)o(v)o -(erages)-42 245 y(for)j(10)g(trials;)h(the)g(sizes)g(rep)q(orted)h -(include)e(ev)o(erything)h(but)g(C)f(library)f(co)q(de)1251 -230 y Fq(11)1287 245 y Fr(.)28 b(\015ex)18 b(pro)o(vides)f(a)g(n)o(um)o -(b)q(er)g(of)g(table)-42 295 y(compression)e(options)g(including)f -Fm(-Cem)g Fr(for)h(tables)g(optimized)f(for)h(space,)h(and)f -Fm(-Cf)f Fr(for)h(tables)g(optimized)f(for)g(sp)q(eed.)-42 -345 y(By)f(default,)e(RE2C)h(will)f(use)h(a)g(heuristic)h(to)f(decide)h -(if)e(a)h Fm(switch)f Fr(should)h(b)q(e)g(replaced)h(with)f -Fm(if)p Fr(s:)17 b(the)c Fm(-s)e Fr(option)h(forces)-42 -394 y(RE2C)i(to)g(alw)o(a)o(ys)e(generate)k Fm(switch)p -Fr(es.)21 444 y(T)m(o)21 b(mak)o(e)g(comparisons)g(more)g(meaningful,)h -(all)e(seman)o(tic)i(pro)q(cessing)h(co)q(de)g(w)o(as)f(remo)o(v)o(ed)g -(from)e(the)j(GLA-)-42 494 y(generated)d(and)f(lcc)f(scanners,)j(and)d -(co)q(de)i(to)e(pro)o(vide)g(line)g(and)g(column)f(n)o(um)o(b)q(er)h -(information)d(w)o(as)k(added)g(to)f(the)-42 544 y(RE2C)c(sp)q -(eci\014cation.)19 b(The)14 b(remaining)e(di\013erences)k(of)d(note)h -(b)q(et)o(w)o(een)i(the)e(scanners)i(include:)21 627 -y Fk(\017)k Fr(The)15 b(\015ex-generated)g(scanners)h(do)d(not)h(pro)o -(vide)g(line)f(or)h(column)e(n)o(um)o(b)q(er)i(information.)21 -710 y Fk(\017)20 b Fr(The)15 b(GLA-generated)f(scanner)i(assumes)d -(7-bit)h(input.)21 793 y(As)e(a)g(general)g(rule,)h(the)f -(RE2C-generated)h(scanners)h(w)o(ere)f(the)f(fastest,)h(follo)o(w)o(ed) -e(b)o(y)h(the)g(GLA-generated)h(scanner)-42 843 y(and)j(then)g(the)g -(lcc)g(scanner.)24 b(The)15 b(\015ex-generated)i(scanners)h(w)o(ere)e -(signi\014can)o(tly)e(slo)o(w)o(er.)23 b(Only)15 b(the)h -(space-optimized)-42 892 y(\015ex)i(scanner)h(w)o(as)e(smaller)f(than)h -(the)h(default)f(RE2C)g(scanner,)i(and)e(only)g(b)o(y)g(a)g(narro)o(w)g -(margin.)26 b(There)19 b(are)e(some)-42 942 y(arc)o(hitectures,)j -(notably)d(the)g(IBM)h(370,)f(on)g(whic)o(h)g(table)g(driv)o(en)g -(scanners)i(will)d(probably)h(pro)q(duce)h(b)q(etter)h(results:)-42 -992 y(IBM)c(370)e(compilers)g(t)o(ypically)f(generate)j(p)q(o)q(or)f -(co)q(de)h(for)f(large)f(routines.)21 1042 y(The)22 b(v)n(arious)f -(scanners)j(and)d(input)h(\014les)g(used)h(for)e(the)i(tests)g(are)f(a) -o(v)n(ailable)e(for)h(anon)o(ymous)f(ftp)i(from)e Ff(csg.-)-42 -1092 y(u)o(w)o(aterlo)q(o.ca)10 b Fr(in)f Ff(/pub/p)q -(eter/re2c/sampler.ta)o(r.Z)p Fr(.)i(\015ex)f(is)f(a)o(v)n(ailable)e -(for)i(anon)o(ymous)e(ftp)i(from)f Ff(ftp.uu.net)i Fr(as)f -Ff(/pack)o(ages/-)-42 1142 y(gnu/\015ex-2.3.7.ta)o(r.Z)p -Fr(,)14 b(GLA)h(is)g(a)o(v)n(ailable)e(for)i(anon)o(ymous)f(ftp)h(from) -f Ff(ftp.cs.colo)o(rado.edu)i Fr(as)f(part)g(of)g(the)h(Eli)f(pac)o(k)n -(age)-42 1191 y Ff(/pub/cs/distribs/eli/Eli3.4.2.ta)o(r.Z)p -Fr(,)i(and)h(the)g(lcc)h(fron)o(t)e(end)i(is)f(a)o(v)n(ailable)e(for)h -(anon)o(ymous)g(ftp)g(from)g Ff(p)o(rinceton.edu)-42 -1241 y Fr(as)f Ff(/pub/lcc/lccfe-1.9.ta)o(r.Z)p Fr(.)g(An)f(alpha)g(v)o -(ersion)g(of)g(RE2C)g(will)f(so)q(on)i(b)q(e)g(made)e(a)o(v)n(ailable)f -(for)i(anon)o(ymous)f(ftp)h(from)-42 1291 y Ff(csg.u)o(w)o(aterlo)q -(o.ca)g Fr(as)f Ff(/pub/p)q(eter/re2c/re2c-0.5.ta)o(r.Z)p -Fr(.)-42 1428 y Fs(5)67 b(Related)23 b(W)-6 b(ork)-42 -1519 y Fr(The)18 b(k)o(ey)g(to)f(the)h(p)q(erformance)f(and)g -(\015exibilit)o(y)e(of)i(an)g(RE2C-generated)h(scanner)h(is)e(the)h -(approac)o(h)f(used)h(to)f(deter-)-42 1569 y(mine)e(when)i(the)g -(bu\013er)g(needs)g(\014lling.)23 b(In)o(terestingly)m(,)16 -b(the)h(lcc)g(scanner)g([5)o(])f(uses)h(a)f(similar)e(approac)o(h)i -(\(with)g(certain)-42 1619 y(concessions)g(to)e(k)o(eep)g(the)h(b)q(o)q -(okk)o(eeping)e(manageable.\))-42 1735 y Fj(5.1)56 b(Comparison)18 -b(With)g(GLA)-42 1812 y Fr(It)h(is)f(natural)f(to)h(compare)f(RE2C)h -(to)g(GLA)f([7])g(as)h(it)g(also)f(generates)j(directly)f(executable)g -(scanners.)32 b(RE2C)18 b(and)-42 1861 y(GLA)d(ha)o(v)o(e)g(man)o(y)e -(di\013erences)k(simply)c(b)q(ecause)j(they)g(are)f(targeted)h(for)e -(di\013eren)o(t)i(t)o(yp)q(es)g(of)e(users:)22 b(GLA)14 -b(is)h(in)o(tended)-42 1911 y(for)g(p)q(eople)h(who)f(simply)e(wish)i -(to)f(lev)o(erage)i(their)f(e\013orts)i(with)d(existing)h(to)q(ols)g -(and)g(libraries;)g(RE2C)f(is)h(in)o(tended)h(for)-42 -1961 y(p)q(eople)d(that)g(ha)o(v)o(e)f(more)g(sp)q(ecialized)h(needs)h -(and)e(are)h(willing)d(to)j(pro)o(vide)f(their)h(o)o(wn)f(supp)q(ort)h -(routines.)18 b(F)m(or)12 b(example,)-42 2011 y(GLA)h(pro)o(vides)h(a)f -(go)q(o)q(d)f(bu\013ering)i(mec)o(hanism,)c(RE2C)j(users)i(m)o(ust)d -(supply)h(their)h(o)o(wn.)j(These)d(di\013erences,)i(ho)o(w)o(ev)o(er,) --42 2061 y(are)f(not)f(unique)f(to)h(GLA)g(and)f(ha)o(v)o(e)h(b)q(een)h -(addressed)h(for)d(the)i(most)e(part)h(in)f(previous)h(sections.)21 -2111 y(Of)j(more)f(in)o(terest)i(is)f(the)h(di\013erences)i(in)c(the)i -(co)q(de)g(that)f(RE2C)g(and)g(GLA)g(generate.)29 b(Scanners)18 -b(generated)h(b)o(y)-42 2160 y(RE2C)14 b(and)g(GLA)f(di\013er)h -(primarily)e(in)h(t)o(w)o(o)h(asp)q(ects:)19 b(ho)o(w)14 -b(they)g(determine)g(when)h(the)f(bu\013er)h(needs)g(\014lling,)d(and)i -(ho)o(w)-42 2210 y(they)h(generate)g(co)q(de)g(for)e -Fm(switch)p Fr(es.)21 2260 y(GLA)d(uses)i(the)f(ASCI)q(I)g(NUL)f(c)o -(haracter)i(as)f(the)g(sen)o(tinel)g(to)f(determine)h(when)g(the)g -(bu\013er)g(needs)h(\014lling.)j(T)m(o)10 b(impro)o(v)o(e)-42 -2310 y(the)j(sp)q(eed)h(and)e(reduce)i(the)f(size)g(of)e(the)i -(generated)g(scanners)h(GLA)e(bu\013ers)i(only)d(complete)h(lines)g -(and)g(restricts)i(tok)o(ens)-42 2360 y(to)d(those)g(that)f(do)g(not)g -(con)o(tain)g(newline)g(\(ASCI)q(I)h(LF\))f(c)o(haracters)1013 -2345 y Fq(12)1050 2360 y Fr(.)17 b(If)10 b(a)g(tok)o(en)g(with)g(an)g -(em)o(b)q(edded)h(newline)f(c)o(haracter)-42 2409 y(\(suc)o(h)k(as)f(a) -f(commen)o(t\))f(is)h(required)i(it)e(m)o(ust)f(b)q(e)j(recognized)g -(with)e(an)g(auxiliary)f(scanner)j(written)f(in)f(C.)g(This)h(co)q(de)g -(has)-42 2459 y(to)h(p)q(erform)f(the)i(bu\013ering-related)f(b)q(o)q -(okk)o(eeping)g(that)g(is)g(done)g(automatically)d(b)o(y)i -(GLA-generated)i(co)q(de.)p -42 2494 780 2 v -11 2521 -a Fn(10)22 2532 y Fo(Av)n(ailable)10 b(for)h(anon)o(ymous)d(ftp)j(from) -f Fe(ftp.uu.net)i Fo(as)f Fe(/pub/text-p)o(ro)q(cessing/sg)o(ml/sgmls-) -o(1.1.)o(ta)o(r.Z)o Fo(.)-11 2560 y Fn(11)22 2572 y Fo(The)h -(GLA-generated)c(scanner)i(sizes)h(also)f(do)h(not)g(include)f(the)g -(size)h(of)g(an)g(error)g(rep)q(orting)e(mo)q(dule)g -Fg(err.o)p Fo(.)-11 2600 y Fn(12)22 2612 y Fo(This)j(is)f(discussed)f -(in)h(more)f(detail)g(in)h(Section)f(3.3.1.)913 2748 -y Fr(11)p eop -%%Page: 12 12 -12 11 bop 21 195 a Fr(The)20 b(mec)o(hanism)e(RE2C)h(uses)i(to)f -(re\014ll)g(the)g(bu\013er)h(eliminates)d(these)k(restrictions)f(and)e -(y)o(et)i(allo)o(ws)d(RE2C)i(to)-42 245 y(generate)15 -b(faster)f(and)g(smaller)e(scanners.)19 b(RE2C)13 b(also)g(allo)o(ws)g -(b)q(oth)g(auxiliary)f(and)h(primary)f(scanners)j(to)f(b)q(e)g(sp)q -(eci\014ed)-42 295 y(using)g(regular)g(expressions.)20 -b(F)m(or)13 b(example,)f(App)q(endix)i(A)g(con)o(tains)g(an)g -(auxiliary)e(scanner)j(for)f(commen)o(ts.)21 345 y(Lik)o(e)i(RE2C,)g -(GLA)h(usually)f(replaces)i Fm(switch)p Fr(es)e(with)h -Fm(if)p Fr(s.)27 b(Unlik)o(e)16 b(RE2C,)g(GLA)g(do)q(es)i(not)f(use)g -(a)g Fm(case)p Fr(-based)-42 394 y(heuristic)f(to)f(decide)h(whic)o(h)f -Fm(switch)p Fr(es)g(to)g(replace:)21 b(rather,)16 b(it)f(alw)o(a)o(ys)f -(generates)j(a)e Fm(switch)e Fr(for)i(the)h(start)g(state)g(and)-42 -444 y(uses)g Fm(if)p Fr(s)d(for)h(the)g(rest.)20 b(GLA)13 -b(replaces)i Fm(switch)p Fr(es)f(with)f(co)q(de)i(sequences)h(of)e(the) -g(form:)138 518 y Fm(if\(*YYCURSOR)20 b Fi(in)i Fl(S)505 -524 y Fq(1)524 518 y Fm(\))f(goto)g(L)698 524 y Fq(1)717 -518 y Fm(;)226 562 y(.)226 579 y(.)226 595 y(.)138 645 -y(if\(*YYCURSOR)f Fi(in)i Fl(S)505 651 y Fd(n)528 645 -y Fm(\))f(goto)g(L)702 651 y Fd(n)725 645 y Fm(;)-42 -737 y Fr(Bit)12 b(v)o(ectors)h(are)f(used)h(for)e(all)f(mem)o(b)q -(ership)g(tests)k(in)o(v)o(olving)9 b(sets)k(with)e(more)g(than)g(one)h -(elemen)o(t.)17 b(As)12 b(an)g(optimization,)-42 787 -y(if)k(a)f(state)i(has)f(a)g(transition)g(to)g(itself)f(the)i(test)g -(as)f(to)g(whether)h(to)f(remain)f(in)g(the)i(same)e(state)i(or)f(not)g -(is)g(p)q(erformed)-42 837 y(\014rst.)25 b(F)m(or)16 -b(example,)e(Figure)i(12)f(sho)o(ws)i(the)f(GLA-generated)h(co)q(de)f -(for)g(state)g(8)g(in)f(Figure)h(2)1506 822 y Fq(13)1541 -837 y Fr(.)73 b(Note)16 b(the)g(use)h(of)291 926 y Fm(static)j -(unsigned)g(char)h(yytable[])f(=)i Fk(f)291 976 y Fm(0x00,)e(0x00,)h -(0x00,)g(0x00,)g(/*)65 b(0.)g(1.)g(2.)f(3.)22 b(*/)291 -1025 y Fl(:)7 b(:)g(:)291 1075 y Fm(0x00,)20 b(0x00,)h(0x00,)g(0x00,)g -(/*)43 b(,)g(-)h(.)f(/)22 b(*/)291 1125 y(0x01,)e(0x01,)h(0x01,)g -(0x01,)g(/*)43 b(0)g(1)h(2)f(3)22 b(*/)291 1175 y(0x01,)e(0x01,)h -(0x01,)g(0x01,)g(/*)43 b(4)g(5)h(6)f(7)22 b(*/)291 1225 -y(0x01,)e(0x01,)h(0x00,)g(0x00,)g(/*)43 b(8)g(9)h(:)f(;)22 -b(*/)291 1275 y(0x00,)e(0x00,)h(0x00,)g(0x00,)g(/*)43 -b(<)g(=)h(>)f(?)22 b(*/)291 1324 y Fl(:)7 b(:)g(:)291 -1374 y Fm(0x00,)20 b(0x00,)h(0x00,)g(0x00)g Fk(g)p Fm(;)g(/*)43 -b(|)h Fk(g)108 b Fm(127.)21 b(*/)378 1419 y(.)378 1435 -y(.)378 1452 y(.)291 1502 y(L8:)g(if\(yytable[\(*YYC)o(URSOR)o(++\)+0)o -(])e(&)j(1<<0\))e(goto)h(L8;--YYCURSOR;)291 1552 y(goto)g(A3;)527 -1631 y Fr(Figure)14 b(12:)j(GLA)d(co)q(de)h(for)e(state)i(8)e(in)h -(Figure)g(2.)-42 1731 y(128)i(elemen)o(t)f(en)o(tries)i(for)e(the)i -(bit)f(v)o(ectors)h(to)e(reduce)j(the)e(scanner)i(size:)k(A)16 -b(GLA-generated)h(scanner)g(will)e(crash)h(or)-42 1781 -y(otherwise)f(b)q(eha)o(v)o(e)g(unpredictably)f(if)f(a)h(non-ASCI)q(I)f -(c)o(haracter)j(app)q(ears)e(in)g(the)g(source)1395 1766 -y Fq(14)1432 1781 y Fr(.)21 1831 y(In)k(some)g(sense)j(the)e(results)h -(of)e(Section)h(4)f(are)i(a)e(bit)g(misleading:)26 b(the)19 -b(GLA)f(sp)q(eci\014cation)i(that)e(w)o(as)h(used)h(to)-42 -1880 y(obtain)15 b(the)g(\014gures)h(in)e(T)m(able)g(1)h(is)g(not)f(a)h -(t)o(ypical)f(GLA)g(sp)q(eci\014cation.)22 b(Usually)14 -b(scanners)j(implem)o(en)o(ted)c(using)i(GLA)-42 1930 -y(will)h(handle)i(k)o(eyw)o(ords)f(as)h(iden)o(ti\014ers)g(as)g(GLA)f -(has)g(b)q(een)i(optimized)d(for)h(this)g([7)o(].)29 -b(T)m(able)16 b(2)h(presen)o(ts)j(a)d(more)f(fair)-42 -1980 y(comparison:)h(the)e(k)o(eyw)o(ord)f(matc)o(hing)e(rules)j(w)o -(ere)g(remo)o(v)o(ed)e(from)f(b)q(oth)j(the)f(GLA)g(and)g(RE2C)f(sp)q -(eci\014cations.)39 b(The)-42 2030 y(RE2C-generated)18 -b(scanners)g(w)o(ere)f(still)e(faster)i(and)g(smaller)d(except)k(on)e -(the)h(MIPS)g(R4000,)e(where)i(the)g(cc-compiled)-42 -2080 y(GLA)d(scanner)h(w)o(as)f(sligh)o(tly)f(faster.)21 -2130 y(Note)k(ho)o(w)o(ev)o(er,)g(that)g(the)g(RE2C)g(sp)q -(eci\014cation)g(can)g(b)q(e)g(substan)o(tially)f(sp)q(ed)i(up)f(b)o(y) -f(using)h(a)f(tec)o(hnique)i(akin)e(to)-42 2179 y(lo)q(op)e(unrolling.) -i(Replacing)d(the)i(original)d(k)o(eyw)o(ord)h(matc)o(hing)f(rule)j(in) -e(the)i(RE2C)e(sp)q(eci\014cation)1548 2164 y Fq(15)138 -2252 y Fm(L)22 b(I*)435 b Fk(f)22 b Fm(RET\(ID\);)e Fk(g)-42 -2345 y Fr(with)14 b(the)h(follo)o(wing)c(rules)p -42 -2380 780 2 v -11 2407 a Fn(13)22 2419 y Fo(Actually)m(,)f(GLA)i(w)o -(ould)f(generate)e(a)i Fg(while)f Fo(statemen)o(t.)j(Most)e(compilers)e -(will)j(generate)d(the)i(same)f(ob)r(ject)g(co)q(de)g(for)h(b)q(oth.) --11 2446 y Fn(14)22 2458 y Fo(No)h(c)o(hec)o(ks)e(are)h(made)f(to)h -(ensure)f(that)g(only)h(7-bit)f(c)o(haracters)f(app)q(ear)h(in)h(the)g -(input.)-11 2486 y Fn(15)22 2498 y Fg(L)e Fo(=)i Fg([a-zA-Z)p -215 2498 11 2 v 10 w(])g Fo(and)g Fg(I)e Fo(=)i Fg([a-zA-Z)p -514 2498 V 10 w(0-9])n Fo(.)913 2748 y Fr(12)p eop -%%Page: 13 13 -13 12 bop 727 776 a Fr(time)387 b(space)380 826 y(program)p -557 841 2 50 v 61 w(user)72 b(sys)65 b(total)p 974 841 -V 62 w(text)58 b(data)e(bss)64 b(total)p 355 842 1158 -2 v 850 877 a Fi(R4000)16 b(/)f(gc)n(c2.3.3)g(-O)p 355 -894 V 479 929 a Fr(gla)p 557 944 2 50 v 71 w(2.63)p 703 -944 V 51 w(0.58)p 828 944 V 71 w(3.21)p 974 944 V 50 -w(5040)p 1109 944 V 50 w(2496)p 1243 944 V 51 w(144)p -1357 944 V 71 w(7680)458 979 y(re2c)p 557 994 V 73 w(2.50)p -703 994 V 51 w(0.65)p 828 994 V 71 w(3.15)p 974 994 V -50 w(6448)p 1109 994 V 71 w(512)p 1243 994 V 92 w(0)p -1357 994 V 72 w(6960)414 1028 y(re2c)g(-s)p 557 1043 -V 72 w(2.49)p 703 1043 V 51 w(0.67)p 828 1043 V 71 w(3.16)p -974 1043 V 50 w(4976)p 1109 1043 V 50 w(4224)p 1243 1043 -V 92 w(0)p 1357 1043 V 72 w(9200)382 1078 y(re2c)g(-s)f -Fk(y)p 557 1093 V 72 w Fr(2.08)p 703 1093 V 51 w(0.59)p -828 1093 V 71 w(2.67)p 974 1093 V 50 w(5792)p 1109 1093 -V 50 w(4224)p 1243 1093 V 92 w(0)p 1357 1093 V 52 w(10016)p -355 1095 1158 2 v 726 1130 a Fi(R4000)i(/)f(c)n(c2.11.2)g(-O)g(-Olimit) -e(5000)p 355 1146 V 479 1181 a Fr(gla)p 557 1196 2 50 -v 71 w(2.43)p 703 1196 V 51 w(0.64)p 828 1196 V 71 w(3.07)p -974 1196 V 50 w(6512)p 1109 1196 V 50 w(2416)p 1243 1196 -V 51 w(128)p 1357 1196 V 71 w(9056)458 1231 y(re2c)p -557 1246 V 73 w(2.93)p 703 1246 V 51 w(0.67)p 828 1246 -V 71 w(3.60)p 974 1246 V 50 w(8048)p 1109 1246 V 71 w(528)p -1243 1246 V 92 w(0)p 1357 1246 V 72 w(8576)414 1281 y(re2c)i(-s)p -557 1296 V 72 w(3.04)p 703 1296 V 51 w(0.64)p 828 1296 -V 71 w(3.68)p 974 1296 V 50 w(9952)p 1109 1296 V 50 w(2208)p -1243 1296 V 92 w(0)p 1357 1296 V 52 w(12160)p 355 1297 -1158 2 v 838 1332 a Fi(SP)m(AR)o(C)g(/)g(gc)n(c2.3.3)g(-O)p -355 1349 V 479 1384 a Fr(gla)p 557 1399 2 50 v 71 w(4.08)p -703 1399 V 51 w(1.65)p 828 1399 V 71 w(5.73)p 974 1399 -V 50 w(5472)p 1109 1399 V 50 w(1656)p 1243 1399 V 51 -w(136)p 1357 1399 V 71 w(7264)458 1434 y(re2c)p 557 1449 -V 73 w(3.77)p 703 1449 V 51 w(1.67)p 828 1449 V 71 w(5.44)p -974 1449 V 50 w(7008)p 1109 1449 V 113 w(0)p 1243 1449 -V 92 w(0)p 1357 1449 V 72 w(7008)414 1483 y(re2c)g(-s)p -557 1498 V 72 w(3.66)p 703 1498 V 51 w(2.37)p 828 1498 -V 71 w(6.03)p 974 1498 V 50 w(9112)p 1109 1498 V 113 -w(0)p 1243 1498 V 92 w(0)p 1357 1498 V 72 w(9112)p 355 -1500 1158 2 v 869 1535 a Fi(i486)g(/)g(gc)n(c2.4.5)g(-O)p -355 1551 V 479 1586 a Fr(gla)p 557 1601 2 50 v 71 w(5.04)p -703 1601 V 51 w(1.15)p 828 1601 V 71 w(6.19)p 974 1601 -V 50 w(5368)p 1109 1601 V 50 w(1632)p 1243 1601 V 51 -w(108)p 1357 1601 V 71 w(7108)458 1636 y(re2c)p 557 1651 -V 73 w(4.75)p 703 1651 V 51 w(1.17)p 828 1651 V 71 w(5.92)p -974 1651 V 50 w(5448)p 1109 1651 V 113 w(0)p 1243 1651 -V 92 w(0)p 1357 1651 V 72 w(5448)414 1686 y(re2c)g(-s)p -557 1701 V 72 w(5.06)p 703 1701 V 51 w(1.13)p 828 1701 -V 71 w(6.19)p 974 1701 V 50 w(8248)p 1109 1701 V 113 -w(0)p 1243 1701 V 92 w(0)p 1357 1701 V 72 w(8248)p 355 -1703 1158 2 v 860 1737 a Fi(68020)h(/)f(gc)n(c1.40)h(-O)p -355 1754 V 479 1789 a Fr(gla)p 557 1804 2 50 v 51 w(32.69)p -703 1804 V 50 w(3.37)p 828 1804 V 50 w(36.06)p 974 1804 -V 50 w(4772)p 1109 1804 V 50 w(1632)p 1243 1804 V 51 -w(106)p 1357 1804 V 71 w(6510)458 1839 y(re2c)p 557 1854 -V 53 w(29.86)p 703 1854 V 50 w(3.74)p 828 1854 V 50 w(33.60)p -974 1854 V 50 w(4468)p 1109 1854 V 113 w(0)p 1243 1854 -V 92 w(0)p 1357 1854 V 72 w(4468)414 1889 y(re2c)f(-s)p -557 1904 V 52 w(28.77)p 703 1904 V 50 w(3.55)p 828 1904 -V 50 w(32.32)p 974 1904 V 50 w(5616)p 1109 1904 V 113 -w(0)p 1243 1904 V 92 w(0)p 1357 1904 V 72 w(5616)108 -1978 y(T)m(able)i(2:)24 b(Scanner)18 b(p)q(erformance)f(with)g(k)o(eyw) -o(ords)g(treated)h(as)f(iden)o(ti\014ers.)29 b Fk(y)17 -b Fr(uses)h(an)f(\\unrolled")108 2028 y(sp)q(eci\014cation.)913 -2748 y(13)p eop -%%Page: 14 14 -14 13 bop 138 185 a Fm(L)501 b Fk(f)22 b Fm(RET\(ID\);)e -Fk(g)138 235 y Fm(L)i(I)457 b Fk(f)22 b Fm(RET\(ID\);)e -Fk(g)138 284 y Fm(L)i(I)g(I)413 b Fk(f)22 b Fm(RET\(ID\);)e -Fk(g)138 334 y Fm(L)i(I)g(I)f(I)370 b Fk(f)22 b Fm(RET\(ID\);)e -Fk(g)138 384 y Fm(L)i(I)g(I)f(I)h(I)326 b Fk(f)22 b Fm(RET\(ID\);)e -Fk(g)138 434 y Fm(L)i(I)g(I)f(I)h(I)f(I)283 b Fk(f)22 -b Fm(RET\(ID\);)e Fk(g)138 484 y Fm(L)i(I)g(I)f(I)h(I)f(I)h(I)239 -b Fk(f)22 b Fm(RET\(ID\);)e Fk(g)138 533 y Fm(L)i(I)g(I)f(I)h(I)f(I)h -(I)f(I)196 b Fk(f)22 b Fm(RET\(ID\);)e Fk(g)138 583 y -Fm(L)i(I*)435 b Fk(f)22 b Fm(RET\(ID\);)e Fk(g)-42 677 -y Fr(reduces)f(the)e(n)o(um)o(b)q(er)f(of)g(end-of-bu\013er)h(c)o(hec)o -(ks)h(and)e(results)i(in)e(a)g(signi\014can)o(t)g(sp)q(eed)i(impro)o(v) -o(emen)o(t)13 b(o)o(v)o(er)k(the)g(GLA-)-42 726 y(generated)f(scanner.) --42 864 y Fs(6)67 b(Summary)24 b(and)f(F)-6 b(urther)25 -b(W)-6 b(ork)-42 955 y Fr(This)16 b(pap)q(er)h(has)f(describ)q(ed)h -(RE2C,)e(a)h(to)q(ol)f(for)g(creating)h(lexical)f(analyzers.)24 -b(Unlik)o(e)16 b(other)g(suc)o(h)h(to)q(ols,)e(RE2C)g(con-)-42 -1004 y(cen)o(trates)i(solely)d(on)g(generating)g(e\016cien)o(t)h(co)q -(de)g(for)f(matc)o(hing)f(regular)h(expressions.)21 b(Not)14 -b(only)g(do)q(es)h(this)g(singleness)-42 1054 y(of)h(purp)q(ose)i(mak)o -(e)c(RE2C)i(more)g(suitable)g(for)g(a)g(wider)g(v)n(ariet)o(y)g(of)g -(applications,)f(it)h(allo)o(ws)f(it)h(to)g(generate)i(scanners)-42 -1104 y(whic)o(h)c(approac)o(h)g(hand-crafted)h(scanners)g(in)f(terms)g -(of)f(size)i(and)e(sp)q(eed.)20 b(Compared)13 b(to)h(scanners)i -(generated)f(b)o(y)f(\015ex,)-42 1154 y(and)g(GLA,)f(RE2C-generated)i -(scanners)g(are)g(faster)f(and)g(in)f(man)o(y)f(cases)k(smaller)c(as)i -(w)o(ell.)21 1204 y(While)g(RE2C-generated)h(scanners)h(p)q(erform)e(w) -o(ell,)g(there)i(is)e(still)g(ro)q(om)f(for)h(impro)o(v)o(emen)o(t.)k -(Near)d(term)f(impro)o(v)o(e-)-42 1254 y(men)o(ts)g(include)g(using)f -(GLA's)h(bit)f(v)o(ectors)i(to)f(simplify)d(some)i Fm(switch)p -Fr(es)h(and)f(adding)g(a)h(state)h(unrolling)d(op)q(erator.)21 -1303 y(In)i(the)g(longer)g(term,)f(inline)g(actions)h(will)e(b)q(e)i -(added)h(to)e(RE2C.)g(F)m(or)h(example,)e(a)h(sp)q(eci\014cation)i(lik) -o(e)138 1386 y Fm(D)22 b Fk(f)p Fm(c)f(=)h($)p Fk(g)f -Fm(\(D)h Fk(f)p Fm(c)f(=)h(10*c)e(+)i($)p Fk(g)p Fm(\)*)-42 -1469 y Fr(migh)o(t)15 b(b)q(e)i(used)g(to)f(obtain)g(the)h(v)n(alue)f -(of)g(a)g(previously)g(scanned)i(in)o(teger.)26 b(T)o(ypically)m(,)14 -b(these)k(sorts)g(of)d(sp)q(eci\014cations)-42 1519 y(w)o(ould)f(b)q(e) -g(used)h(as)f(an)f(action)h(in)f(some)g(other)i(sp)q(eci\014cation.)-42 -1656 y Fs(7)67 b(Ac)n(kno)n(wledgmen)n(ts)-42 1747 y -Fr(The)15 b(authors)f(thank)g(the)g(referees)j(for)c(their)h(man)o(y)e -(v)n(aluable)h(commen)o(ts)f(and)i(suggestions.)913 2748 -y(14)p eop -%%Page: 15 15 -15 14 bop -42 195 a Fs(A)68 b(C)22 b(Scanner)-42 276 -y Fg(#define)16 b(BSIZE)192 b(8192)-42 315 y(#define)16 -b(RET\(i\))174 b({s->cur)15 b(=)i(cursor;)e(return)h(i;})-42 -394 y(#define)g(YYCTYPE)156 b(uchar)-42 433 y(#define)16 -b(YYCURSOR)138 b(cursor)-42 473 y(#define)16 b(YYLIMIT)156 -b(s->lim)-42 512 y(#define)16 b(YYMARKER)138 b(s->ptr)-42 -552 y(#define)16 b(YYFILL\(n\))120 b({cursor)15 b(=)i(fill\(s,)e -(cursor\);})-42 631 y(typedef)h(struct)f(Scanner)g({)29 -670 y(int)299 b(fd;)29 709 y(uint)281 b(line;)29 749 -y(uchar)263 b(*bot,)16 b(*tok,)g(*ptr,)f(*cur,)h(*pos,)g(*lim,)g(*top,) -g(*eof;)-42 788 y(})i(Scanner;)-42 867 y(uchar)e(*fill\(Scann)o(er)e -(*s,)j(uchar)f(*cursor\){)29 907 y(if\(!s->eof)o(\){)100 -946 y(uint)g(cnt)g(=)i(s->tok)d(-)i(s->bot;)100 985 y(if\(cnt\){)d(/*)j -(move)g(partial)e(token)h(to)g(bottom)g(*/)170 1025 y(memcpy\(s->b)o -(ot,)e(s->tok,)h(s->lim)g(-)j(s->tok\);)d(s->tok)g(=)i(s->bot;)170 -1064 y(s->ptr)f(-=)h(cnt;)f(cursor)f(-=)i(cnt;)f(s->pos)g(-=)h(cnt;)f -(s->lim)g(-=)h(cnt;)100 1104 y(})100 1143 y(if\(\(s->to)o(p)e(-)i -(s->lim\))e(<)i(BSIZE\){)f(/*)g(buffer)g(needs)g(to)h(be)g(expanded)d -(*/)170 1183 y(uchar)i(*buf)g(=)i(\(uchar*\))c(malloc\(\(\(s-)o(>li)o -(m)h(-)i(s->bot\))e(+)i(BSIZE\)*size)o(of\()o(uch)o(ar\))o(\);)170 -1222 y(memcpy\(buf,)d(s->tok,)h(s->lim)h(-)h(s->tok\);)e(s->tok)g(=)i -(buf;)170 1262 y(s->ptr)f(=)h(&buf[s->pt)o(r)e(-)i(s->bot];)e(cursor)g -(=)j(&buf[curs)o(or)c(-)k(s->bot];)170 1301 y(s->pos)e(=)h(&buf[s->po)o -(s)e(-)i(s->bot];)e(s->lim)g(=)j(&buf[s->l)o(im)c(-)k(s->bot];)170 -1340 y(s->top)e(=)h(&s->lim[BS)o(IZE)o(];)170 1380 y(free\(s->bot)o -(\);)d(s->bot)i(=)h(buf;)100 1419 y(})100 1459 y(if\(\(cnt)e(=)i -(read\(s->fd)o(,)e(\(char*\))g(s->lim,)g(BSIZE\)\))g(!=)i(BSIZE\){)e -(/*)i(EOF)g(*/)170 1498 y(s->eof)f(=)h(&s->lim[cn)o(t];)d(*\(s->eof\)+) -o(+)h(=)i('\\n';)100 1538 y(})100 1577 y(s->lim)e(+=)i(cnt;)29 -1616 y(})29 1656 y(return)f(cursor;)-42 1695 y(})-42 -1774 y(int)h(scan\(Scann)o(er)d(*s\){)100 1814 y(uchar)h(*cursor)h(=)h -(s->cur;)-42 1853 y(std:)70 b(s->tok)15 b(=)i(cursor;)-42 -1892 y(/*!re2c)100 1932 y("/*")157 b({)17 b(goto)f(comment;)f(})-42 -2013 y Fc(:)6 b(:)g(:)11 b Fb(mor)n(e)j(rules)e Fc(:)6 -b(:)g(:)100 2086 y Fg([)17 b(\\t\\v\\f]+)50 b({)17 b(goto)f(std;)h(}) -100 2125 y("\\n")157 b({)17 b(if\(cursor)e(==)i(s->eof\))e(RET\(EOI\);) -f(s->pos)i(=)h(cursor;)e(s->line++;)364 2164 y(goto)h(std;)h(})100 -2204 y([\\000-\\37)o(7])32 b({)17 b(printf\("une)o(xpe)o(cte)o(d)d -(character:)h('\045c'\\n",)f(*s->tok\);)364 2243 y(goto)i(std;)h(})-42 -2283 y(*/)-42 2322 y(comment:)-42 2362 y(/*!re2c)100 -2401 y("*/")157 b({)17 b(goto)f(std;)h(})100 2440 y("\\n")157 -b({)17 b(if\(cursor)e(==)i(s->eof\))e(RET\(EOI\);)f(s->tok)i(=)h -(s->pos)e(=)j(cursor;)d(s->line++;)364 2480 y(goto)h(comment;)f(})100 -2519 y([\\000-\\37)o(7])32 b({)17 b(goto)f(comment;)f(})-42 -2559 y(*/)-42 2598 y(})913 2748 y Fr(15)p eop -%%Page: 16 16 -16 15 bop -42 195 a Fs(References)-21 286 y Fr([1])20 -b Fa(Aho,)d(A.)g(V.,)f(Sethi,)h(R.,)h(and)f(Ullman,)h(J.)e(D.)21 -b Fi(Compilers:)e(principles,)c(te)n(chniques,)h(and)g(to)n(ols)p -Fr(.)k(Addison-)44 336 y(W)m(esley)m(,)13 b(1988.)k(Reprin)o(ted)d -(with)f(corrections.)-21 419 y([2])20 b Fa(Bernstein,)c(R.)f(L.)k -Fr(Pro)q(ducing)13 b(go)q(o)q(d)g(co)q(de)i(for)e(the)h(case)g -(statemen)o(t.)j Fi(Softwar)n(e{Pr)n(actic)n(e)d(and)h(Exp)n(erienc)n -(e)g(15)p Fr(,)44 469 y(10)e(\(Octob)q(er)j(1985\),)c(1021{1024.)-21 -552 y([3])20 b Fa(DeRemer,)e(F.,)f(and)g(Pennello,)i(T.)h -Fr(E\016cien)o(t)15 b(computation)e(of)h Fl(LALR)p Fr(\(1\))h(lo)q -(ok-ahead)f(sets.)21 b Fi(A)o(CM)15 b(T)m(r)n(ans-)44 -602 y(actions)g(on)g(Pr)n(o)n(gr)n(amming)g(L)n(anguages)h(and)f -(Systems)g(4)p Fr(,)f(4)g(\(Octob)q(er)h(1982\),)e(615{649.)-21 -685 y([4])20 b Fa(Ellis,)c(M.,)g(and)g(Str)o(oustr)o(up,)h(B.)i -Fi(The)c(A)o(nnotate)n(d)h(C++)e(R)n(efer)n(enc)n(e)h(Manual)p -Fr(.)j(Addison-W)m(esley)m(,)13 b(1990.)-21 768 y([5])20 -b Fa(Fraser,)c(C.)f(W.,)h(and)g(Hanson,)h(D.)f(R.)i Fr(A)c -(retargetable)g(compiler)e(for)h(ANSI)h(C.)j Fi(SIGPLAN)e(Notic)n(es)f -(26)p Fr(,)g(10)44 817 y(\(Octob)q(er)h(1991\),)e(29{43.)-21 -900 y([6])20 b Fa(Garey,)g(M.)d(R.,)i(and)g(Johnson,)h(D.)e(S.)25 -b Fi(Computers)16 b(and)h(Intr)n(actability:)k(A)c(Guide)g(to)f(the)h -(The)n(ory)f(of)h(NP-)44 950 y(Completeness)p Fr(.)h(W.)13 -b(H.)g(F)m(reeman)g(and)h(Compan)o(y)m(,)d(1991.)-21 -1033 y([7])20 b Fa(Gra)m(y,)14 b(R.)e(W.)g Fl(\015)r -Fr(-GLA)e(-)g(A)g(generator)g(for)g(lexical)f(analyzers)h(that)g -(programmers)e(can)i(use.)i Fi(USENIX)g(Confer)n(enc)n(e)44 -1083 y(Pr)n(o)n(c)n(e)n(e)n(dings)17 b Fr(\(June)e(1988\),)d(147{160.) --21 1166 y([8])20 b Fa(Gra)m(y,)c(R.)f(W.,)g(Heuring,)g(V.)f(P.,)h -(Levi,)h(S.)f(P.,)g(Slo)o(ane,)i(A.)d(M.,)h(and)g(W)-5 -b(aite,)15 b(W.)g(M.)h Fr(Eli:)h(A)c(complete,)44 1216 -y(\015exible)h(compiler)e(construction)j(system.)j Fi(Communic)n -(ations)d(of)g(the)g(A)o(CM)f(35)p Fr(,)g(2)g(\(F)m(ebruary)g(1992\),)f -(121{131.)-21 1299 y([9])20 b Fa(Gr)o(osch,)e(J.)i Fr(E\016cien)o(t)15 -b(generation)g(of)f(lexical)f(analysers.)21 b Fi(Softwar)n(e{Pr)n -(actic)n(e)15 b(and)h(Exp)n(erienc)n(e)g(19)p Fr(,)f(11)f(\(1989\),)44 -1349 y(1089{1103.)-42 1432 y([10])20 b Fa(Harrison,)c(M.)g(A.)j -Fi(Intr)n(o)n(duction)c(to)g(F)m(ormal)f(L)n(anguage)i(The)n(ory)p -Fr(.)i(Addison-W)m(esley)m(,)13 b(1978.)-42 1515 y([11])20 -b Fa(Hennessy,)15 b(J.)e(L.,)i(and)f(Mendelsohn,)i(N.)f -Fr(Compilation)8 b(of)j(the)h(Pascal)g(case)g(statemen)o(t.)i -Fi(Softwar)n(e{Pr)n(actic)n(e)44 1565 y(and)h(Exp)n(erienc)n(e)h(12)p -Fr(,)d(9)h(\(Septem)o(b)q(er)h(1982\),)d(879{882.)-42 -1648 y([12])20 b Fa(Horspool,)c(R.)g(N.,)f(and)g(Whitney,)h(M.)h -Fr(Ev)o(en)d(faster)g(LR)e(parsing.)k Fi(Softwar)n(e{Pr)n(actic)n(e)d -(and)i(Exp)n(erienc)n(e)g(20)p Fr(,)44 1697 y(6)e(\(1990\),)g(515{535.) --42 1780 y([13])20 b Fa(Ja)o(cobson,)d(V.)i Fr(T)m(uning)14 -b(UNIX)g(Lex)h(or)f(it's)f(NOT)i(true)g(what)f(they)h(sa)o(y)f(ab)q -(out)g(Lex.)19 b(In)14 b Fi(USENIX)i(Confer)n(enc)n(e)44 -1830 y(Pr)n(o)n(c)n(e)n(e)n(dings)h Fr(\(W)m(ashington,)12 -b(DC,)h(Win)o(ter)h(1987\),)f(pp.)g(163{164.)j(Abstract)f(only)m(.)-42 -1913 y([14])20 b Fa(Kernighan,)h(B.)e(W.,)h(and)g(Ritchie,)f(D.)h(M.)28 -b Fi(The)18 b(C)f(Pr)n(o)n(gr)n(amming)g(L)n(anguage,)i(2nd)g(Ed.)28 -b Fr(Pren)o(tice-Hall,)44 1963 y(Inc.,)13 b(1988.)-42 -2046 y([15])20 b Fa(Lesk,)25 b(M.)e(E.)38 b Fr(LEX)20 -b({)g(a)g(lexical)g(analyzer)g(generator.)38 b(Computing)18 -b(Science)k(T)m(ec)o(hnical)e(Rep)q(ort)h(39,)g(Bell)44 -2096 y(T)m(elephone)14 b(Lab)q(oratories,)g(Murra)o(y)g(Hill,)e(NJ,)h -(1975.)-42 2179 y([16])20 b Fa(P)l(axson,)e(V.)h Fr(\015ex)14 -b({)g(man)e(pages,)i(1988.)j(In)d Ff(\015ex-2.3.7.ta)o(r.Z)p -Fr(.)e(Av)n(ailable)g(for)i(anon)o(ymous)e(ftp)i(from)e -Ff(ftp.uu.net)j Fr(in)44 2229 y Ff(/pack)o(ages/gnu)p -Fr(.)-42 2312 y([17])20 b Fa(Pennello,)k(T.)d(J.)33 b -Fr(V)m(ery)19 b(fast)g(LR)f(parsing.)33 b(In)19 b Fi(Pr)n(o)n(c)n(e)n -(e)n(dings)g(of)g(the)h(A)o(CM)f(SIGPLAN'86)h(Symp)n(osium)g(on)44 -2362 y(Compiler)14 b(Construction)j Fr(\(July)c(1986\),)g(A)o(CM.)-42 -2445 y([18])20 b Fa(Sale,)g(A.)25 b Fr(The)17 b(implemen)o(tatio)o(n)c -(of)j(case)h(statemen)o(ts)g(in)e(Pascal.)25 b Fi(Softwar)n(e{Pr)n -(actic)n(e)16 b(and)i(Exp)n(erienc)n(e)f(11)p Fr(,)g(9)44 -2494 y(\(Septem)o(b)q(er)d(1981\),)f(929{942.)913 2748 -y(16)p eop -%%Trailer -end -userdict /end-hook known{end-hook}if -%%EOF diff --git a/tools/re2c/doc/sample.bib b/tools/re2c/doc/sample.bib deleted file mode 100644 index 1f34ab13c..000000000 --- a/tools/re2c/doc/sample.bib +++ /dev/null @@ -1,48 +0,0 @@ -@Article{Bumbulis94, - author = {Peter Bumbulis and Donald D. Cowan}, - title = {RE2C -- A More Versatile Scanner Generator}, - journal = "ACM Letters on Programming Languages and Systems", - volume = 2, - number = "1--4", - year = 1994, - abstract = { - It is usually claimed that lexical analysis routines are still coded by - hand, despite the widespread availability of scanner generators, for - efficiency reasons. While efficiency is a consideration, there exist - freely available scanner generators such as GLA \cite{Gray88} that can - generate scanners that are faster than most hand-coded ones. However, - most generated scanners are tailored for a particular environment, and - retargetting these scanners to other environments, if possible, is - usually complex enough to make a hand-coded scanner more appealing. In - this paper we describe RE2C, a scanner generator that not only generates - scanners which are faster (and usually smaller) than those produced by - any other scanner generator known to the authors, including GLA, but - also adapt easily to any environment. - } -} -@Article{Gray88, - author = {Robert W. Gray}, - title = {{$\gamma$-GLA} - {A} Generator for Lexical Analyzers That - Programmers Can Use}, - journal = {USENIX Conference Proceedings}, - year = {1988}, - month = {June}, - pages = {147-160}, - abstract = {Writing an efficient lexical analyzer for even a simple - language is not a trivial task, and should not be done by hand. We - describe GLA, a tool that generates very efficient scanners. These - scanners do not use the conventional transition matrix, but instead - use a few 128 element vectors. Scanning time is only slightly - greater than the absolute minimum --- the time it takes to look at - each character in a file. The GLA language allows simple, concise - specification of scanners. Augmenting regular expressions with - auxiliary scanners easily handles nasty problems such as C comments - and C literal constants. We formalize the connection between token - scanning and token processing by associating a processor with - appropriate patterns. A library of canned descriptions simplifies the - specification of commonly used language pieces --- such as, - C\_IDENTIFIERS, C\_STRINGS, PASCAL\_COMMENTS, etc. Finally, carefully - tuned lexical analysis support modules are provided for error - handling, input buffering, storing identifiers in hash tables and - manipulating denotations.} -} diff --git a/tools/re2c/examples/001_upn_calculator/README b/tools/re2c/examples/001_upn_calculator/README new file mode 100644 index 000000000..81377d75a --- /dev/null +++ b/tools/re2c/examples/001_upn_calculator/README @@ -0,0 +1,83 @@ +re2c lesson 001_upn_calculator, (c) M. Boerger 2006 + +This lesson gets you started with re2c. In the end you will have an easy RPN +(reverse polish notation) calculator for use at command line. + +You will learn about the basic interface of re2c when scanning input strings. +How to detect the end of the input and use that to stop scanning in order to +avoid problems. + +Once you have successfully installed re2c you can use it to generate *.c files +from the *.re files presented in this lesson. Actually the expected *.c files +are already present. So you should name them *.cc or something alike or just +give them a different name like test.c. To do so you simply change into the +directory and execute the following command: + + re2c calc_001.re > test.c + +Then use your compiler to compile that code and run it. If you are using gcc +you simply do the following: + + gcc -o test.o test.c + ./test.o + +If you are using windows you might want to read till the end of this lesson. + +When you want to debug the code it helps to make re2c generate working #line +information. To do so you simply specify the output file using the -o switch +followed by the output filename: + + re2c -o test.c calc_001.re + +The input files *.re each contain basic step by comments that explain what is +going on and what you can see in the examples. + +In order to optimize the generated code we will use the -s command line switch +of re2c. This tells re2c to generate code that uses if statements rather +then endless switch/case expressions where appropriate. Note that the file name +extension is actually '.s.re' to tell the test system to use the -s switch. To +invoke re2 you do the following: + + re2c -s -o test.c calc_006.s.re + +Finally we use the -b switch to have the code use a decision table. The -b +switch also contains the -s behavior. + + re2c -b -o test.c calc_007.b.re + + + +------------------------------------------------------------------------------- + +For windows users Lynn Allan provided some additional stuff to get you started +in the Microsoft world. This addon resides in the windows subdirectory and +gives you something to expereiment with. The code in that directory is based +on the first step and has the following changes: + +* vc6 .dsp/.dsw and vc7/vc8 .sln/.vcproj project files that have "Custom Build +Steps" that can tell when main.re changes, and know how to generate main.c +from main.re. They assume that you unpacked the zip package and have re2c +itself build or installed in Release and Release-2005 directory respectively. +If re2c cannot be found you need to modify the custom build step and correct +the path to re2c. + +* BuildAndRun.bat to do command line rec2 and then cl and then run the +executable (discontinues with message if errors). + +* built-in cppunit-like test to confirm it worked as expected. + +* array of test strings "fed" to scan rather than file contents to facilitate +testing and also reduce the newbie learning curve. + +* HiResTimer output for 10,000 loops and 100,000 loops. While this might be +excessive for this lesson, it illustrates how to do it for subsequent lessons +and your own stuff using windows. Also it shows that Release build is as fast +as strncmp for this test and can probably be made significantly faster. + +* If you want to build the other steps of this lesson using windows tools +simply copy the *.re files into the windows directory as main.re and rebuild. + + +------------------------------------------------------------------------------- +Sidenote: UPN is the german translation of RPN, somehow hardcoded into the +authors brain :-) diff --git a/tools/re2c/examples/001_upn_calculator/calc_001.re b/tools/re2c/examples/001_upn_calculator/calc_001.re new file mode 100644 index 000000000..fe8d3ae1b --- /dev/null +++ b/tools/re2c/examples/001_upn_calculator/calc_001.re @@ -0,0 +1,84 @@ +/* re2c lesson 001_upn_calculator, calc_001, (c) M. Boerger 2006 - 2007 */ +/*!ignore:re2c + +- basic interface for string reading + + . We define the macros YYCTYPE, YYCURSOR, YYLIMIT, YYMARKER, YYFILL + . YYCTYPE is the type re2c operates on or in other words the type that + it generates code for. While it is not a big difference when we were + using 'unsigned char' here we would need to run re2c with option -w + to fully support types with sieof() > 1. + . YYCURSOR is used internally and holds the current scanner position. In + expression handlers, the code blocks after re2c expressions, this can be + used to identify the end of the token. + . YYMARKER is not always being used so we set an initial value to avoid + a compiler warning. Here we could also omit it compleley. + . YYLIMIT stores the end of the input. Unfortunatley we have to use strlen() + in this lesson. In the next example we see one way to get rid of it. + . We use a 'for(;;)'-loop around the scanner block. We could have used a + 'while(1)'-loop instead but some compilers generate a warning for it. + . To make the output more readable we use 're2c:indent:top' scanner + configuration that configures re2c to prepend a single tab (the default) + to the beginning of each output line. + . The following lines are expressions and for each expression we output the + token name and continue the scanner loop. + . The second last token detects the end of our input, the terminating zero in + our input string. In other scanners detecting the end of input may vary. + For example binary code may contain \0 as valid input. + . The last expression accepts any input character. It tells re2c to accept + the opposit of the empty range. This includes numbers and our tokens but + as re2c goes from top to botton when evaluating the expressions this is no + problem. + . The first three rules show that re2c actually prioritizes the expressions + from top to bottom. Octal number require a starting "0" and the actual + number. Normal numbers start with a digit greater 0. And zero is finally a + special case. A single "0" is detected by the last rule of this set. And + valid ocal number is already being detected by the first rule. This even + includes multi "0" sequences that in octal notation also means zero. + Another way would be to only use two rules: + "0" [0-9]+ + "0" | ( [1-9] [0-9]* ) + A full description of re2c rule syntax can be found in the manual. +*/ + +#include +#include +#include + +int scan(char *s, int l) +{ + char *p = s; + char *q = 0; +#define YYCTYPE char +#define YYCURSOR p +#define YYLIMIT (s+l) +#define YYMARKER q +#define YYFILL(n) + + for(;;) + { +/*!re2c + re2c:indent:top = 2; + "0"[0-9]+ { printf("Oct\n"); continue; } + [1-9][0-9]* { printf("Num\n"); continue; } + "0" { printf("Num\n"); continue; } + "+" { printf("+\n"); continue; } + "-" { printf("-\n"); continue; } + "\000" { printf("EOF\n"); return 0; } + [^] { printf("ERR\n"); return 1; } +*/ + } +} + +int main(int argc, char **argv) +{ + if (argc > 1) + { + return scan(argv[1], strlen(argv[1])); + } + else + { + fprintf(stderr, "%s \n", argv[0]); + return 1; + } +} diff --git a/tools/re2c/examples/001_upn_calculator/calc_002.re b/tools/re2c/examples/001_upn_calculator/calc_002.re new file mode 100644 index 000000000..417e9f315 --- /dev/null +++ b/tools/re2c/examples/001_upn_calculator/calc_002.re @@ -0,0 +1,69 @@ +/* re2c lesson 001_upn_calculator, calc_002, (c) M. Boerger 2006 - 2007 */ +/*!ignore:re2c + +- making use of YYFILL + + . Here we modified the scanner to not require strlen() on the call. Instead + we compute limit on the fly. That is whenever more input is needed we + search for the terminating \0 in the next n chars the scanner needs. + . If there is not enough input we quit the scanner. + . Note that in lesson_001 YYLIMIT was a character pointer computed only once. + Here is of course also of type YYCTYPE but a variable that gets reevaluated + by YYFILL(). + . To make the code smaller we take advantage of the fact that our loop has no + break so far. This allows us to use break here and have the code that is + used for YYFILL() not contain the printf in every occurence. That way the + generated code gets smaller. + +*/ + +#include +#include +#include + +int fill(char *p, int n, char **l) +{ + while (*++p && n--) ; + * l = p; + return n <= 0; +} + +int scan(char *s) +{ + char *p = s; + char *l = s; + char *q = 0; +#define YYCTYPE char +#define YYCURSOR p +#define YYLIMIT l +#define YYMARKER q +#define YYFILL(n) { if (!fill(p, n, &l)) break; } + + for(;;) + { +/*!re2c + re2c:indent:top = 2; + "0"[0-9]+ { printf("Oct\n"); continue; } + [1-9][0-9]* { printf("Num\n"); continue; } + "0" { printf("Num\n"); continue; } + "+" { printf("+\n"); continue; } + "-" { printf("+\n"); continue; } + "\000" { printf("EOF\n"); return 0; } + [^] { printf("ERR\n"); return 1; } +*/ + } + printf("OOD\n"); return 2; +} + +int main(int argc, char **argv) +{ + if (argc > 1) + { + return scan(argv[1]); + } + else + { + fprintf(stderr, "%s \n", argv[0]); + return 0; + } +} diff --git a/tools/re2c/examples/001_upn_calculator/calc_003.re b/tools/re2c/examples/001_upn_calculator/calc_003.re new file mode 100644 index 000000000..e48aec928 --- /dev/null +++ b/tools/re2c/examples/001_upn_calculator/calc_003.re @@ -0,0 +1,61 @@ +/* re2c lesson 001_upn_calculator, calc_003, (c) M. Boerger 2006 - 2007 */ +/*!ignore:re2c + +- making use of YYFILL + + . Again provide the length of the input to generate the limit only once. Now + we can use YYFILL() to detect the end and simply return since YYFILL() is + only being used if the next scanner run might use more chars then YYLIMIT + allows. + . Note that we now use (s+l+2) instead of (s+l) as we did in lesson_001. In + the first lesson we did not quit from YYFILL() and used a special rule to + detect the end of input. Here we use the fact that we know the exact end + of input and that this length does not include the terminating zero. Since + YYLIMIT points to the first character behind the used buffer we use "+ 2". + If we would use "+1" we could drop the "\000" rule but could no longer + distinguish between end of input and out of data. + +*/ + +#include +#include +#include + +int scan(char *s, int l) +{ + char *p = s; + char *q = 0; +#define YYCTYPE char +#define YYCURSOR p +#define YYLIMIT (s+l+2) +#define YYMARKER q +#define YYFILL(n) { printf("OOD\n"); return 2; } + + for(;;) + { +/*!re2c + re2c:indent:top = 2; + "0"[0-9]+ { printf("Oct\n"); continue; } + [1-9][0-9]* { printf("Num\n"); continue; } + "0" { printf("Num\n"); continue; } + "+" { printf("+\n"); continue; } + "-" { printf("+\n"); continue; } + "\000" { printf("EOF\n"); return 0; } + [^] { printf("ERR\n"); return 1; } +*/ + } + return 0; +} + +int main(int argc, char **argv) +{ + if (argc > 1) + { + return scan(argv[1], strlen(argv[1])); + } + else + { + fprintf(stderr, "%s \n", argv[0]); + return 0; + } +} diff --git a/tools/re2c/examples/001_upn_calculator/calc_004.re b/tools/re2c/examples/001_upn_calculator/calc_004.re new file mode 100644 index 000000000..977e438bf --- /dev/null +++ b/tools/re2c/examples/001_upn_calculator/calc_004.re @@ -0,0 +1,78 @@ +/* re2c lesson 001_upn_calculator, calc_004, (c) M. Boerger 2006 - 2007 */ +/*!ignore:re2c + +- making use of definitions + . We provide complex rules as definitions. We can even have definitions made + up from other definitions. And we could also use definitions as part of + rules and not only as full rules as shown in this lesson. + +- showing the tokens + . re2c does not store the beginning of a token on its own but we can easily + do this by providing variable, in our case t, that is set to YYCURSOR on + every loop. If we were not using a loop here the token, we could have used + s instead of a new variable instead. + . As we use the token for an output function that requires a terminating zero + we copy the token. Alternatively we could store the end of the token, then + replace it with a zero character and replace it after the token has been + used. However that approach is not always acceptable. + +*/ + +#include +#include +#include + +char * tokendup(const char *t, const char *l) +{ + size_t n = l -t + 1; + char *r = (char*)malloc(n); + + memmove(r, t, n-1); + r[n] = '\0'; + return r; +} + +int scan(char *s, int l) +{ + char *p = s; + char *q = 0; + char *t; +#define YYCTYPE char +#define YYCURSOR p +#define YYLIMIT (s+l+2) +#define YYMARKER q +#define YYFILL(n) { printf("OOD\n"); return 2; } + + for(;;) + { + t = p; +/*!re2c + re2c:indent:top = 2; + + DIGIT = [0-9] ; + OCT = "0" DIGIT+ ; + INT = "0" | ( [1-9] DIGIT* ) ; + + OCT { t = tokendup(t, p); printf("Oct: %s\n", t); free(t); continue; } + INT { t = tokendup(t, p); printf("Num: %s\n", t); free(t); continue; } + "+" { printf("+\n"); continue; } + "-" { printf("+\n"); continue; } + "\000" { printf("EOF\n"); return 0; } + [^] { printf("ERR\n"); return 1; } +*/ + } + return 0; +} + +int main(int argc, char **argv) +{ + if (argc > 1) + { + return scan(argv[1], strlen(argv[1])); + } + else + { + fprintf(stderr, "%s \n", argv[0]); + return 0; + } +} diff --git a/tools/re2c/examples/001_upn_calculator/calc_005.re b/tools/re2c/examples/001_upn_calculator/calc_005.re new file mode 100644 index 000000000..6ae2a484c --- /dev/null +++ b/tools/re2c/examples/001_upn_calculator/calc_005.re @@ -0,0 +1,144 @@ +/* re2c lesson 001_upn_calculator, calc_005, (c) M. Boerger 2006 - 2007 */ +/*!ignore:re2c + +- turning this lesson into an easy calculator + . We are going to write an UPN calculator so we need an additional rule to + ignore white space. + . Then we need to store the scanned input somewhere and do our math on it. + . Also we need to scan all arguments since the main c code gets the input + split up into chunks. + . In contrast to what we did before we now add a variable res that holds the + scanner state. We initialize that variable to 0 and quit the loop when it + is non zero. This will also be our return value so that we can use it in + function main to generate error information. + . To support operating systems where ' and " get passed in program arguments + we check for them being first and last input character. If so we correct + input pointer and input length. Since now our scanner might not see a + terminating zero we change YYLIMIT again and drop the special zero rule. +*/ + +#include +#include +#include + +#define DEBUG(stmt) stmt + +int stack[4]; +int depth = 0; + +int push_num(const char *t, const char *l, int radix) +{ + int num = 0; + + if (depth >= sizeof(stack)) + { + return 3; + } + + --t; + while(++t < l) + { + num = num * radix + (*t - '0'); + } + DEBUG(printf("Num: %d\n", num)); + + stack[depth++] = num; + return 0; +} + +int stack_add() +{ + if (depth < 2) return 4; + + --depth; + stack[depth-1] = stack[depth-1] + stack[depth]; + return 0; +} + +int stack_sub() +{ + if (depth < 2) return 4; + + --depth; + stack[depth-1] = stack[depth-1] - stack[depth]; + return 0; +} + +int scan(char *s, int l) +{ + char *p = s; + char *q = 0; + char *t; + int res = 0; + +#define YYCTYPE char +#define YYCURSOR p +#define YYLIMIT (s+l+1) +#define YYMARKER q +#define YYFILL(n) { return depth == 1 ? 0 : 2; } + + while(!res) + { + t = p; +/*!re2c + re2c:indent:top = 2; + + DIGIT = [0-9] ; + OCT = "0" DIGIT+ ; + INT = "0" | ( [1-9] DIGIT* ) ; + WS = [ \t]+ ; + + WS { continue; } + OCT { res = push_num(t, p, 8); continue; } + INT { res = push_num(t, p, 10); continue; } + "+" { res = stack_add(); continue; } + "-" { res = stack_sub(); continue; } + [^] { res = 1; continue; } +*/ + } + return res; +} + +int main(int argc, char **argv) +{ + if (argc > 1) + { + char *inp; + int res = 0, argp = 0, len; + + while(!res && ++argp < argc) + { + inp = argv[argp]; + len = strlen(inp); + if (inp[0] == '\"' && inp[len-1] == '\"') + { + ++inp; + len -=2; + } + res = scan(inp, len); + } + switch(res) + { + case 0: + printf("Result: %d\n", stack[0]); + return 0; + case 1: + fprintf(stderr, "Illegal character in input.\n"); + return 1; + case 2: + fprintf(stderr, "Premature end of input.\n"); + return 2; + case 3: + fprintf(stderr, "Stack overflow.\n"); + return 3; + case 4: + fprintf(stderr, "Stack underflow.\n"); + return 4; + } + } + else + { + fprintf(stderr, "%s \n", argv[0]); + return 0; + } +} diff --git a/tools/re2c/examples/001_upn_calculator/calc_006.s.re b/tools/re2c/examples/001_upn_calculator/calc_006.s.re new file mode 100644 index 000000000..10da31cd1 --- /dev/null +++ b/tools/re2c/examples/001_upn_calculator/calc_006.s.re @@ -0,0 +1,162 @@ +/* re2c lesson 001_upn_calculator, calc_006, (c) M. Boerger 2006 - 2007 */ +/*!ignore:re2c + +- avoiding YYFILL() + . We use the inplace configuration re2c:yyfill to suppress generation of + YYFILL() blocks. This of course means we no longer have to provide the + macro. + . We also drop the YYMARKER stuff since we know that re2c does not generate + it for this example. + . Since re2c does no longer check for out of data situations we must do this. + For that reason we first reintroduce our zero rule and second we need to + ensure that the scanner does not take more than one bytes in one go. + + In the example suppose "0" is passed. The scanner reads the first "0" and + then is in an undecided state. The scanner can earliest decide on the next + char what the token is. In case of a zero the input ends and it was a + number, 0 to be precise. In case of a digit it is an octal number and the + next character needs to be read. In case of any other character the scanner + will detect an error with the any rule [^]. + + Now the above shows that the scanner may read two characters directly. But + only if the first is a "0". So we could easily check that if the first char + is "0" and the next char is a digit then yet another charcter is present. + But we require our inut to be zero terminated. And that means we do not + have to check anything for this scanner. + + However with other rule sets re2c might read more then one character in a + row. In those cases it is normally hard to impossible to avoid YYFILL. + +- optimizing the generated code by using -s command line switch of re2c + . This tells re2c to generate code that uses if statements rather + then endless switch/case expressions where appropriate. Note that the + generated code now requires the input to be unsigned char rather than char + due to the way comparisons are generated. +*/ + +#include +#include +#include + +#define DEBUG(stmt) stmt + +int stack[4]; +int depth = 0; + +int push_num(const unsigned char *t, const unsigned char *l, int radix) +{ + int num = 0; + + if (depth >= sizeof(stack)) + { + return 3; + } + + --t; + while(++t < l) + { + num = num * radix + (*t - (unsigned char)'0'); + } + DEBUG(printf("Num: %d\n", num)); + + stack[depth++] = num; + return 0; +} + +int stack_add() +{ + if (depth < 2) return 4; + + --depth; + stack[depth-1] = stack[depth-1] + stack[depth]; + DEBUG(printf("+\n")); + return 0; +} + +int stack_sub() +{ + if (depth < 2) return 4; + + --depth; + stack[depth-1] = stack[depth-1] - stack[depth]; + DEBUG(printf("-\n")); + return 0; +} + +int scan(char *s) +{ + unsigned char *p = (unsigned char*)s; + unsigned char *t; + int res = 0; + +#define YYCTYPE unsigned char +#define YYCURSOR p + + while(!res) + { + t = p; +/*!re2c + re2c:indent:top = 2; + re2c:yyfill:enable = 0; + + DIGIT = [0-9] ; + OCT = "0" DIGIT+ ; + INT = "0" | ( [1-9] DIGIT* ) ; + WS = [ \t]+ ; + + WS { continue; } + OCT { res = push_num(t, p, 8); continue; } + INT { res = push_num(t, p, 10); continue; } + "+" { res = stack_add(); continue; } + "-" { res = stack_sub(); continue; } + "\000" { res = depth == 1 ? 0 : 2; break; } + [^] { res = 1; continue; } +*/ + } + return res; +} + +int main(int argc, char **argv) +{ + if (argc > 1) + { + char *inp; + int res = 0, argp = 0, len; + + while(!res && ++argp < argc) + { + inp = strdup(argv[argp]); + len = strlen(inp); + if (inp[0] == '\"' && inp[len-1] == '\"') + { + inp[len - 1] = '\0'; + ++inp; + } + res = scan(inp); + free(inp); + } + switch(res) + { + case 0: + printf("Result: %d\n", stack[0]); + return 0; + case 1: + fprintf(stderr, "Illegal character in input.\n"); + return 1; + case 2: + fprintf(stderr, "Premature end of input.\n"); + return 2; + case 3: + fprintf(stderr, "Stack overflow.\n"); + return 3; + case 4: + fprintf(stderr, "Stack underflow.\n"); + return 4; + } + } + else + { + fprintf(stderr, "%s \n", argv[0]); + return 0; + } +} diff --git a/tools/re2c/examples/001_upn_calculator/calc_007.b.re b/tools/re2c/examples/001_upn_calculator/calc_007.b.re new file mode 100644 index 000000000..523819654 --- /dev/null +++ b/tools/re2c/examples/001_upn_calculator/calc_007.b.re @@ -0,0 +1,135 @@ +/* re2c lesson 001_upn_calculator, calc_007, (c) M. Boerger 2006 - 2007 */ +/*!ignore:re2c + +- optimizing the generated code by using -b command line switch of re2c + . This tells re2c to generate code that uses a decision table. The -b switch + also contains the -s behavior. And -b also requires the input to be + unsigned chars. +*/ + +#include +#include +#include + +#define DEBUG(stmt) stmt + +int stack[4]; +int depth = 0; + +int push_num(const unsigned char *t, const unsigned char *l, int radix) +{ + int num = 0; + + if (depth >= sizeof(stack)) + { + return 3; + } + + --t; + while(++t < l) + { + num = num * radix + (*t - (unsigned char)'0'); + } + DEBUG(printf("Num: %d\n", num)); + + stack[depth++] = num; + return 0; +} + +int stack_add() +{ + if (depth < 2) return 4; + + --depth; + stack[depth-1] = stack[depth-1] + stack[depth]; + DEBUG(printf("+\n")); + return 0; +} + +int stack_sub() +{ + if (depth < 2) return 4; + + --depth; + stack[depth-1] = stack[depth-1] - stack[depth]; + DEBUG(printf("+\n")); + return 0; +} + +int scan(char *s) +{ + unsigned char *p = (unsigned char*)s; + unsigned char *t; + int res = 0; + +#define YYCTYPE unsigned char +#define YYCURSOR p + + while(!res) + { + t = p; +/*!re2c + re2c:indent:top = 2; + re2c:yyfill:enable = 0; + + DIGIT = [0-9] ; + OCT = "0" DIGIT+ ; + INT = "0" | ( [1-9] DIGIT* ) ; + WS = [ \t]+ ; + + WS { continue; } + OCT { res = push_num(t, p, 8); continue; } + INT { res = push_num(t, p, 10); continue; } + "+" { res = stack_add(); continue; } + "-" { res = stack_sub(); continue; } + "\000" { res = depth == 1 ? 0 : 2; break; } + [^] { res = 1; continue; } +*/ + } + return res; +} + +int main(int argc, char **argv) +{ + if (argc > 1) + { + char *inp; + int res = 0, argp = 0, len; + + while(!res && ++argp < argc) + { + inp = strdup(argv[argp]); + len = strlen(inp); + if (inp[0] == '\"' && inp[len-1] == '\"') + { + inp[len - 1] = '\0'; + ++inp; + } + res = scan(inp); + free(inp); + } + switch(res) + { + case 0: + printf("Result: %d\n", stack[0]); + return 0; + case 1: + fprintf(stderr, "Illegal character in input.\n"); + return 1; + case 2: + fprintf(stderr, "Premature end of input.\n"); + return 2; + case 3: + fprintf(stderr, "Stack overflow.\n"); + return 3; + case 4: + fprintf(stderr, "Stack underflow.\n"); + return 4; + } + } + else + { + fprintf(stderr, "%s \n", argv[0]); + return 0; + } +} diff --git a/tools/re2c/examples/001_upn_calculator/calc_008.b.re b/tools/re2c/examples/001_upn_calculator/calc_008.b.re new file mode 100644 index 000000000..ed1a088e4 --- /dev/null +++ b/tools/re2c/examples/001_upn_calculator/calc_008.b.re @@ -0,0 +1,158 @@ +/* re2c lesson 001_upn_calculator, calc_008, (c) M. Boerger 2006 - 2007 */ +/*!ignore:re2c + +- using -b with signed character input + . Since the code is being generated with -b switch re2c requires the internal + character variable yych to use an unsigned character type. For that reason + the previous lessons had a conversion at the beginning of their scan() + function. Other re2c generated code often have the scanners work completely + on unsigned input. Thus requesting a conversion. + + To avoid the conversion on input, re2c allows to do the conversion when + reading the internal yych variable. To enable that conversion you need to + use the implace configuration 're2c:yych:conversion' and set it to 1. This + will change the generated code to insert conversions to YYCTYPE whenever + yych is being read. + +- More inplace configurations for better/nicer code + . re2c allows to overwrite the generation of any define, label or variable + used in the generated code. For example we overwrite the 'yych' variable + name to 'curr' using inplace configuration 're2c:variable:yych = curr;'. + + . We further more use inplace configurations instead of defines. This allows + to use correct conversions to 'unsigned char' instead of having to convert + to 'YYCTYPE' when placing 're2c:define:YYCTYPE = "unsigned char";' infront + of 're2c:yych:conversion'. Note that we have to use apostrophies for the + first setting as it contains a space. + + . Last but not least we use 're2c:labelprefix = scan' to change the prefix + of generated labels. +*/ + +#include +#include +#include + +#define DEBUG(stmt) stmt + +int stack[4]; +int depth = 0; + +int push_num(const char *t, const char *l, int radix) +{ + int num = 0; + + if (depth >= sizeof(stack)) + { + return 3; + } + + --t; + while(++t < l) + { + num = num * radix + (*t - '0'); + } + DEBUG(printf("Num: %d\n", num)); + + stack[depth++] = num; + return 0; +} + +int stack_add() +{ + if (depth < 2) return 4; + + --depth; + stack[depth-1] = stack[depth-1] + stack[depth]; + DEBUG(printf("+\n")); + return 0; +} + +int stack_sub() +{ + if (depth < 2) return 4; + + --depth; + stack[depth-1] = stack[depth-1] - stack[depth]; + DEBUG(printf("+\n")); + return 0; +} + +int scan(char *p) +{ + char *t; + int res = 0; + + while(!res) + { + t = p; +/*!re2c + re2c:define:YYCTYPE = "unsigned char"; + re2c:define:YYCURSOR = p; + re2c:variable:yych = curr; + re2c:indent:top = 2; + re2c:yyfill:enable = 0; + re2c:yych:conversion = 1; + re2c:labelprefix = scan; + + DIGIT = [0-9] ; + OCT = "0" DIGIT+ ; + INT = "0" | ( [1-9] DIGIT* ) ; + WS = [ \t]+ ; + + WS { continue; } + OCT { res = push_num(t, p, 8); continue; } + INT { res = push_num(t, p, 10); continue; } + "+" { res = stack_add(); continue; } + "-" { res = stack_sub(); continue; } + "\000" { res = depth == 1 ? 0 : 2; break; } + [^] { res = 1; continue; } +*/ + } + return res; +} + +int main(int argc, char **argv) +{ + if (argc > 1) + { + char *inp; + int res = 0, argp = 0, len; + + while(!res && ++argp < argc) + { + inp = strdup(argv[argp]); + len = strlen(inp); + if (inp[0] == '\"' && inp[len-1] == '\"') + { + inp[len - 1] = '\0'; + ++inp; + } + res = scan(inp); + free(inp); + } + switch(res) + { + case 0: + printf("Result: %d\n", stack[0]); + return 0; + case 1: + fprintf(stderr, "Illegal character in input.\n"); + return 1; + case 2: + fprintf(stderr, "Premature end of input.\n"); + return 2; + case 3: + fprintf(stderr, "Stack overflow.\n"); + return 3; + case 4: + fprintf(stderr, "Stack underflow.\n"); + return 4; + } + } + else + { + fprintf(stderr, "%s \n", argv[0]); + return 0; + } +} diff --git a/tools/re2c/examples/001_upn_calculator/windows/HiResTimer.h b/tools/re2c/examples/001_upn_calculator/windows/HiResTimer.h new file mode 100644 index 000000000..585a1d98f --- /dev/null +++ b/tools/re2c/examples/001_upn_calculator/windows/HiResTimer.h @@ -0,0 +1,54 @@ +/** + * @file HiResTimer.h + * @brief + * @note + */ + +#ifndef _HI_RES_TIMER_H_ +#define _HI_RES_TIMER_H_ + +#ifdef WIN32 +#include // probably already done in stdafx.h +static LARGE_INTEGER start; +static LARGE_INTEGER stop; +static LARGE_INTEGER freq; +static _int64 elapsedCounts; +static double elapsedMillis; +static double elapsedMicros; +static HANDLE processHandle; +static DWORD prevPriorityClass; + +void HrtInit() +{ + processHandle = GetCurrentProcess(); + prevPriorityClass = GetPriorityClass(processHandle); + QueryPerformanceFrequency(&freq); +} + +void HrtStart() +{ + QueryPerformanceCounter(&start); +} + +void HrtSetPriority(DWORD priority) +{ + int flag; + prevPriorityClass = GetPriorityClass(processHandle); + flag = SetPriorityClass(processHandle, priority); +} + +void HrtResetPriority(void) +{ + int flag = SetPriorityClass(processHandle, prevPriorityClass); +} + +double HrtElapsedMillis() +{ + QueryPerformanceCounter(&stop); + elapsedCounts = (stop.QuadPart - start.QuadPart); + elapsedMillis = ((elapsedCounts * 1000.0) / freq.QuadPart); + return elapsedMillis; +} + +#endif +#endif \ No newline at end of file diff --git a/tools/re2c/examples/001_upn_calculator/windows/main.b.re b/tools/re2c/examples/001_upn_calculator/windows/main.b.re new file mode 100644 index 000000000..1600b83d2 --- /dev/null +++ b/tools/re2c/examples/001_upn_calculator/windows/main.b.re @@ -0,0 +1,291 @@ +/* re2c lesson 001_upn_calculator, main.b.re, (c) M. Boerger, L. Allan 2006 */ +/*!ignore:re2c + +- basic interface for string reading + + . We define the macros YYCTYPE, YYCURSOR, YYLIMIT, YYMARKER, YYFILL + . YYCTYPE is the type re2c operates on or in other words the type that + it generates code for. While it is not a big difference when we were + using 'unsigned char' here we would need to run re2c with option -w + to fully support types with sieof() > 1. + . YYCURSOR is used internally and holds the current scanner position. In + expression handlers, the code blocks after re2c expressions, this can be + used to identify the end of the token. + . YYMARKER is not always being used so we set an initial value to avoid + a compiler warning. + . YYLIMIT stores the end of the input. Unfortunatley we have to use strlen() + in this lesson. In the next example we see one way to get rid of it. + . We use a 'for(;;)'-loop around the scanner block. We could have used a + 'while(1)'-loop instead but some compilers generate a warning for it. + . To make the output more readable we use 're2c:indent:top' scanner + configuration that configures re2c to prepend a single tab (the default) + to the beginning of each output line. + . The following lines are expressions and for each expression we output the + token name and continue the scanner loop. + . The second last token detects the end of our input, the terminating zero in + our input string. In other scanners detecting the end of input may vary. + For example binary code may contain \0 as valid input. + . The last expression accepts any input character. It tells re2c to accept + the opposit of the empty range. This includes numbers and our tokens but + as re2c goes from top to botton when evaluating the expressions this is no + problem. + . The first three rules show that re2c actually prioritizes the expressions + from top to bottom. Octal number require a starting "0" and the actual + number. Normal numbers start with a digit greater 0. And zero is finally a + special case. A single "0" is detected by the last rule of this set. And + valid ocal number is already being detected by the first rule. This even + includes multi "0" sequences that in octal notation also means zero. + Another way would be to only use two rules: + "0" [0-9]+ + "0" | ( [1-9] [0-9]* ) + A full description of re2c rule syntax can be found in the manual. +*/ + +#define VC_EXTRALEAN // Exclude rarely-used stuff from Windows headers + +#if _MSC_VER > 1200 +#define WINVER 0x0400 // Change this to the appropriate value to target Windows 98 and Windows 2000 or later. +#endif // Prevents warning from vc7.1 complaining about redefinition + +#include +#include +#include +#include +#include +#include "HiResTimer.h" + +static char gTestBuf[1000] = ""; + +/** + * @brief Setup HiResolution timer and confirm it is working ok + */ +void InitHiResTimerAndVerifyWorking(void) +{ + double elapsed; + HrtInit(); + HrtSetPriority(ABOVE_NORMAL_PRIORITY_CLASS); + HrtStart(); + Sleep(100); + elapsed = HrtElapsedMillis(); + if ((elapsed < 90) || (elapsed > 110)) { + printf("HiResTimer misbehaving: %f\n", elapsed); + exit(2); + } +} + +/** + * @brief Scan for numbers in different formats + */ +int ScanFullSpeed(char *pzStrToScan, size_t lenStrToScan) +{ + unsigned char *pzCurScanPos = (unsigned char*)pzStrToScan; + unsigned char *pzBacktrackInfo = 0; +#define YYCTYPE unsigned char +#define YYCURSOR pzCurScanPos +#define YYLIMIT (pzStrToScan+lenStrToScan) +#define YYMARKER pzBacktrackInfo +#define YYFILL(n) + + for(;;) + { +/*!re2c + re2c:indent:top = 2; + [1-9][0-9]* { continue; } + [0][0-9]+ { continue; } + "+" { continue; } + "-" { continue; } + "\000" { return 0; } + [^] { return 1; } +*/ + } +} + +/** + * @brief Scan for numbers in different formats + */ +int scan(char *pzStrToScan, size_t lenStrToScan) +{ + unsigned char *pzCurScanPos = (unsigned char*)pzStrToScan; + unsigned char *pzBacktrackInfo = 0; +#define YYCTYPE unsigned char +#define YYCURSOR pzCurScanPos +#define YYLIMIT (pzStrToScan+lenStrToScan) +#define YYMARKER pzBacktrackInfo +#define YYFILL(n) + + for(;;) + { +/*!re2c + re2c:indent:top = 2; + [1-9][0-9]* { printf("Num\n"); strcat(gTestBuf, "Num "); continue; } + [0][0-9]+ { printf("Oct\n"); strcat(gTestBuf, "Oct "); continue; } + "+" { printf("+\n"); strcat(gTestBuf, "+ "); continue; } + "-" { printf("-\n"); strcat(gTestBuf, "- "); continue; } + "\000" { printf("EOF\n"); return 0; } + [^] { printf("ERR\n"); strcat(gTestBuf, "ERR "); return 1; } +*/ + } +} + +/** + * @brief Show high resolution elapsed time for 10,000 and 100,000 loops + */ +void DoTimingsOfStrnCmp(void) +{ + char testStr[] = "Hello, world"; + int totLoops = 10000; + int totFoundCount = 0; + int foundCount = 0; + int loop; + int rc; + const int progressAnd = 0xFFFFF000; + double elapsed; + + printf("\n\n%d loops with * every %d loops to confirm\n", totLoops, ((~progressAnd) + 1)); + + HrtStart(); + for (loop = 0; loop < totLoops; ++loop) { + foundCount = 0; + rc = strncmp(testStr, "Hello", 5); + if (rc == 0) { + foundCount++; + totFoundCount++; + if ((totFoundCount & progressAnd) == totFoundCount) { + printf("*"); + } + } + } + elapsed = HrtElapsedMillis(); + printf("\nstrncmp Elapsed for %7d loops milliseconds: %7.3f\n", totLoops, elapsed); + printf("FoundCount each loop: %d\n", foundCount); + printf("TotalFoundCount for all loops: %d\n", totFoundCount); + + totLoops = 100000; + HrtStart(); + for (loop = 0; loop < totLoops; ++loop) { + foundCount = 0; + rc = strncmp(testStr, "Hello", 5); + if (rc == 0) { + foundCount++; + totFoundCount++; + if ((totFoundCount & progressAnd) == totFoundCount) { + printf("*"); + } + } + } + elapsed = HrtElapsedMillis(); + printf("\nstrncmp Elapsed for %7d loops milliseconds: %7.3f\n", totLoops, elapsed); + printf("FoundCount each loop: %d\n", foundCount); + printf("TotalFoundCount for all loops: %d\n", totFoundCount); +} + +/** + * @brief Show high resolution elapsed time for 10,000 and 100,000 loops + */ +void DoTimingsOfRe2c(void) +{ + char* testStrings[] = { "123", "1234", "+123", "01234", "-04321", "abc", "123abc" }; + const int testCount = sizeof(testStrings) / sizeof(testStrings[0]); + int i; + int totLoops = 10000 / testCount; // Doing more than one per loop + int totFoundCount = 0; + int foundCount = 0; + int loop; + int rc; + const int progressAnd = 0xFFFFF000; + double elapsed; + + printf("\n\n%d loops with * every %d loops to confirm\n", totLoops, ((~progressAnd) + 1)); + + HrtStart(); + for (loop = 0; loop < totLoops; ++loop) { + foundCount = 0; + strcpy(gTestBuf, ""); + for (i = 0; i < testCount; ++i) { + char* pzCurStr = testStrings[i]; + size_t len = strlen(pzCurStr); // Calc of strlen slows things down ... std::string? + rc = ScanFullSpeed(pzCurStr, len); + if (rc == 0) { + foundCount++; + totFoundCount++; + if ((totFoundCount & progressAnd) == totFoundCount) { + printf("*"); + } + } + } + } + elapsed = HrtElapsedMillis(); + printf("\nRe2c Elapsed for %7d loops milliseconds: %7.3f\n", totLoops, elapsed); + printf("FoundCount each loop: %d\n", foundCount); + printf("TotalFoundCount for all loops: %d\n", totFoundCount); + + totLoops = 100000 / testCount; + printf("\n\n%d loops with * every %d loops to confirm\n", totLoops, ((~progressAnd) + 1)); + + HrtStart(); + for (loop = 0; loop < totLoops; ++loop) { + foundCount = 0; + strcpy(gTestBuf, ""); + for (i = 0; i < testCount; ++i) { + char* pzCurStr = testStrings[i]; + size_t len = strlen(pzCurStr); // Calc of strlen slows things down ... std::string? + rc = ScanFullSpeed(pzCurStr, len); + if (rc == 0) { + foundCount++; + totFoundCount++; + if ((totFoundCount & progressAnd) == totFoundCount) { + printf("*"); + } + } + } + } + elapsed = HrtElapsedMillis(); + printf("\nRe2c Elapsed for %7d loops milliseconds: %7.3f\n", totLoops, elapsed); + printf("FoundCount each loop: %d\n", foundCount); + printf("TotalFoundCount for all loops: %d\n", totFoundCount); +} + +/** + * @brief Entry point for console app + */ +int main(int argc, char **argv) +{ + char testStr_A[] = "123"; + char* testStr_B = "456"; + char* testStrings[] = { "123", "1234", "+123", "01234", "-04321", "abc", "123abc" }; + const int testCount = sizeof(testStrings) / sizeof(testStrings[0]); + int i; + + int rc = scan(testStr_A, 3); + printf("rc: %d\n", rc); + + rc = scan(testStr_B, 3); + printf("rc: %d\n", rc); + + rc = scan("789", 3); + printf("rc: %d\n", rc); + + strcpy(gTestBuf, ""); + for (i = 0; i < testCount; ++i) { + char* pzCurStr = testStrings[i]; + size_t len = strlen(pzCurStr); + scan(pzCurStr, len); + } + printf("%s\n", gTestBuf); + rc = strcmp(gTestBuf, "Num Num + Num Oct - Oct ERR Num ERR "); + if (rc == 0) { + printf("Success\n"); + } + else { + printf("Failure\n"); + } + assert(0 == rc); // Doesn't work with Release build + + InitHiResTimerAndVerifyWorking(); + + DoTimingsOfStrnCmp(); + + DoTimingsOfRe2c(); + + return 0; +} diff --git a/tools/re2c/examples/002_strip_comments/README b/tools/re2c/examples/002_strip_comments/README new file mode 100644 index 000000000..353d66904 --- /dev/null +++ b/tools/re2c/examples/002_strip_comments/README @@ -0,0 +1,21 @@ +re2c lesson 002_strip_comments, (c) M. Boerger 2006 + +In this lesson you will learn how to use multiple scanner blocks and how to +read the input from a file instead of a zero terminated string. In the end you +will have a scanner that filters comments out of c source files but keeps re2c +comments. + +The first scanner can be generated with: + + re2c -s -o t.c strip_001.s.re + +In the second step we will learn about YYMARKER that stores backtracking +information. + + re2c -s -0 t.c strip_002.b.re + +The third step brings trailing contexts that are stored in YYCTXMARKER. We also +change to use -b instead of -s option since the scanner gets more and more +complex. + + re2c -b -0 t.c strip_002.b.re diff --git a/tools/re2c/examples/002_strip_comments/strip_001.s.re b/tools/re2c/examples/002_strip_comments/strip_001.s.re new file mode 100644 index 000000000..5525ae3c9 --- /dev/null +++ b/tools/re2c/examples/002_strip_comments/strip_001.s.re @@ -0,0 +1,147 @@ +/* re2c lesson 002_strip_comments, strip_001.s, (c) M. Boerger 2006 - 2007 */ +/*!ignore:re2c + +- basic interface for file reading + . This scanner will read chunks of input from a file. The easiest way would + be to read the whole file into a memory buffer and use that a zero + terminated string. + . Instead we want to read input chunks of a reasonable size as they are neede + by the scanner. Thus we basically need YYFILL(n) to call fread(n). + . Before we provide a buffer that we constantly reallocate we instead use + one buffer that we get from the stack or global memory just once. When we + reach the end of the buffer we simply move the beginning of our input + that is somewhere in our buffer to the beginning of our buffer and then + append the next chunk of input to the correct end inside our buffer. + . As re2c scanners might read more than one character we need to ensure our + buffer is long enough. We can use re2c to inform about the maximum size + by placing a "!max:re2c" comment somewhere. This gets translated to a + "#define YYMAXFILL " line where is the maximum length value. This + define can be used as precompiler condition. + +- multiple scanner blocks + . We use a main scanner block that outputs every input character unless the + input is two /s or a / followed by a *. In the latter two cases we switch + to a special c++ comment and a comment block respectively. + . Both special blocks simply detect their end ignore any other character. + . The c++ block is a bit special. Since the terminating new line needs to + be output and that can either be a new line or a carridge return followed + by a new line. + . In order to ensure that we do not read behind our buffer we reset the token + pointer to the cursor on every scanner run. +*/ + +#include +#include +#include + +/*!max:re2c */ +#define BSIZE 128 + +#if BSIZE < YYMAXFILL +# error BSIZE must be greater YYMAXFILL +#endif + +#define YYCTYPE unsigned char +#define YYCURSOR s.cur +#define YYLIMIT s.lim +#define YYFILL(n) { if ((res = fill(&s, n)) >= 0) break; } + +typedef struct Scanner +{ + FILE *fp; + unsigned char *cur, *tok, *lim, *eof; + unsigned char buffer[BSIZE]; +} Scanner; + +int fill(Scanner *s, int len) +{ + if (!len) + { + s->cur = s->tok = s->lim = s->buffer; + s->eof = 0; + } + if (!s->eof) + { + int got, cnt = s->tok - s->buffer; + + if (cnt > 0) + { + memcpy(s->buffer, s->tok, s->lim - s->tok); + s->tok -= cnt; + s->cur -= cnt; + s->lim -= cnt; + } + cnt = BSIZE - cnt; + if ((got = fread(s->lim, 1, cnt, s->fp)) != cnt) + { + s->eof = &s->lim[got]; + } + s->lim += got; + } + else if (s->cur + len > s->eof) + { + return 0; /* not enough input data */ + } + return -1; +} + +int scan(FILE *fp) +{ + int res = 0; + Scanner s; + + if (!fp) + { + return 1; /* no file was opened */ + } + + s.fp = fp; + + fill(&s, 0); + + for(;;) + { + s.tok = s.cur; +/*!re2c + re2c:indent:top = 2; + + NL = "\r"? "\n" ; + ANY = [^] ; + + "/" "/" { goto cppcomment; } + "/" "*" { goto comment; } + ANY { fputc(*s.tok, stdout); continue; } +*/ +comment: + s.tok = s.cur; +/*!re2c + "*" "/" { continue; } + ANY { goto comment; } +*/ +cppcomment: + s.tok = s.cur; +/*!re2c + NL { fwrite(s.tok, 1, s.cur - s.tok, stdout); continue; } + ANY { goto cppcomment; } +*/ + } + + if (fp != stdin) + { + fclose(fp); /* close only if not stdin */ + } + return res; /* return result */ +} + +int main(int argc, char **argv) +{ + if (argc > 1) + { + return scan(!strcmp(argv[1], "-") ? stdin : fopen(argv[1], "r")); + } + else + { + fprintf(stderr, "%s \n", argv[0]); + return 1; + } +} diff --git a/tools/re2c/examples/002_strip_comments/strip_002.s.re b/tools/re2c/examples/002_strip_comments/strip_002.s.re new file mode 100644 index 000000000..3c2a6cf8c --- /dev/null +++ b/tools/re2c/examples/002_strip_comments/strip_002.s.re @@ -0,0 +1,162 @@ +/* re2c lesson 002_strip_comments, strip_002.s, (c) M. Boerger 2006 - 2007 */ +/*!ignore:re2c + +- complexity + . When a comment is preceeded by a new line and followed by whitespace and a + new line then we can drop the trailing whitespace and new line. + . Additional to what we strip out already what about two consequtive comment + blocks? When two comments are only separated by whitespace we want to drop + both. In other words when detecting the end of a comment block we need to + check whether it is followed by only whitespace and the a new comment in + which case we continure ignoring the input. If it is followed only by white + space and a new line we strip out the new white space and new line. In any + other case we start outputting all that follows. + But we cannot simply use the following two rules: + "*" "/" WS* "/" "*" { continue; } + "*" "/" WS* NL { continue; } + The main problem is that WS* can get bigger then our buffer, so we need a + new scanner. + . Meanwhile our scanner gets a bit more complex and we have to add two more + things. First the scanner code now uses a YYMARKER to store backtracking + information. + +- backtracking information + . When the scanner has two rules that can have the same beginning but a + different ending then it needs to store the position that identifies the + common part. This is called backtracking. As mentioned above re2c expects + you to provide compiler define YYMARKER and a pointer variable. + . When shifting buffer contents as done in our fill function the marker needs + to be corrected, too. + +*/ + +#include +#include +#include + +/*!max:re2c */ +#define BSIZE 128 + +#if BSIZE < YYMAXFILL +# error BSIZE must be greater YYMAXFILL +#endif + +#define YYCTYPE unsigned char +#define YYCURSOR s.cur +#define YYLIMIT s.lim +#define YYMARKER s.mrk +#define YYFILL(n) { if ((res = fill(&s, n)) >= 0) break; } + +typedef struct Scanner +{ + FILE *fp; + unsigned char *cur, *tok, *lim, *eof, *mrk; + unsigned char buffer[BSIZE]; +} Scanner; + +int fill(Scanner *s, int len) +{ + if (!len) + { + s->cur = s->tok = s->lim = s->mrk = s->buffer; + s->eof = 0; + } + if (!s->eof) + { + int got, cnt = s->tok - s->buffer; + + if (cnt > 0) + { + memcpy(s->buffer, s->tok, s->lim - s->tok); + s->tok -= cnt; + s->cur -= cnt; + s->lim -= cnt; + s->mrk -= cnt; + } + cnt = BSIZE - cnt; + if ((got = fread(s->lim, 1, cnt, s->fp)) != cnt) + { + s->eof = &s->lim[got]; + } + s->lim += got; + } + else if (s->cur + len > s->eof) + { + return 0; /* not enough input data */ + } + return -1; +} + +void echo(Scanner *s) +{ + fwrite(s->tok, 1, s->cur - s->tok, stdout); +} + +int scan(FILE *fp) +{ + int res = 0; + Scanner s; + + if (!fp) + { + return 1; /* no file was opened */ + } + + s.fp = fp; + + fill(&s, 0); + + for(;;) + { + s.tok = s.cur; +/*!re2c + re2c:indent:top = 2; + + NL = "\r"? "\n" ; + WS = [\r\n\t ] ; + ANY = [^] ; + + "/" "/" { goto cppcomment; } + "/" "*" { goto comment; } + ANY { fputc(*s.tok, stdout); continue; } +*/ +comment: + s.tok = s.cur; +/*!re2c + "*" "/" { goto commentws; } + ANY { goto comment; } +*/ +commentws: + s.tok = s.cur; +/*!re2c + NL { echo(&s); continue; } + WS { goto commentws; } + ANY { echo(&s); continue; } +*/ +cppcomment: + s.tok = s.cur; +/*!re2c + NL { echo(&s); continue; } + ANY { goto cppcomment; } +*/ + } + + if (fp != stdin) + { + fclose(fp); /* close only if not stdin */ + } + return res; /* return result */ +} + +int main(int argc, char **argv) +{ + if (argc > 1) + { + return scan(!strcmp(argv[1], "-") ? stdin : fopen(argv[1], "r")); + } + else + { + fprintf(stderr, "%s \n", argv[0]); + return 1; + } +} diff --git a/tools/re2c/examples/002_strip_comments/strip_003.b.re b/tools/re2c/examples/002_strip_comments/strip_003.b.re new file mode 100644 index 000000000..a7b1a5c72 --- /dev/null +++ b/tools/re2c/examples/002_strip_comments/strip_003.b.re @@ -0,0 +1,179 @@ +/* re2c lesson 002_strip_comments, strip_003.b, (c) M. Boerger 2006 - 2007 */ +/*!ignore:re2c + +- more complexity + . Right now we strip out trailing white space and new lines after a comment + block. This can be a problem when the comment block was not preceeded by + a new line. + . The solution is to use trailing contexts. + +- trailing contexts + . Re2c allows to check for a portion of input and only recognize it when it + is followed by another portion. This is called a trailing context. + . The trailing context is not part of the identified input. That means that + it follows exactly at the cursor. A consequence is that the scanner has + already read more input and on the next run you need to restore begining + of input, in our case s.tok, from the cursor, here s.cur, rather then + restoring to the beginning of the buffer. This way the scanner can reuse + the portion it has already read. + . The position of the trailing context is stored in YYCTXMARKER for which + a pointer variable needs to be provided. + . As with YYMARKER the corrsponding variable needs to be corrected if we + shift in some buffer. + . Still this is not all we need to solve the problem. What is left is that + the information whether we detected a trailing context was detected has to + be stored somewhere. This is done by the new variable nlcomment. + +- formatting + . Until now we only used single line expression code and we always had the + opening { on the same line as the rule itself. If we have multiline rule + code and care for formatting we can no longer rely on re2c. Now we have + to indent the rule code ourself. Also we need to take care of the opening + {. If we keep it on the same line as the rule then re2c will indent it + correctly and the emitted #line informations will be correct. If we place + it on the next line then the #line directive will also point to that line + and not to the rule. +*/ + +#include +#include +#include + +/*!max:re2c */ +#define BSIZE 128 + +#if BSIZE < YYMAXFILL +# error BSIZE must be greater YYMAXFILL +#endif + +#define YYCTYPE unsigned char +#define YYCURSOR s.cur +#define YYLIMIT s.lim +#define YYMARKER s.mrk +#define YYCTXMARKER s.ctx +#define YYFILL(n) { if ((res = fill(&s, n)) >= 0) break; } + +typedef struct Scanner +{ + FILE *fp; + unsigned char *cur, *tok, *lim, *eof, *ctx, *mrk; + unsigned char buffer[BSIZE]; +} Scanner; + +int fill(Scanner *s, int len) +{ + if (!len) + { + s->cur = s->tok = s->lim = s->mrk = s->buffer; + s->eof = 0; + } + if (!s->eof) + { + int got, cnt = s->tok - s->buffer; + + if (cnt > 0) + { + memcpy(s->buffer, s->tok, s->lim - s->tok); + s->tok -= cnt; + s->cur -= cnt; + s->lim -= cnt; + s->mrk -= cnt; + s->ctx -= cnt; + } + cnt = BSIZE - cnt; + if ((got = fread(s->lim, 1, cnt, s->fp)) != cnt) + { + s->eof = &s->lim[got]; + } + s->lim += got; + } + else if (s->cur + len > s->eof) + { + return 0; /* not enough input data */ + } + return -1; +} + +void echo(Scanner *s) +{ + fwrite(s->tok, 1, s->cur - s->tok, stdout); +} + +int scan(FILE *fp) +{ + int res = 0; + int nlcomment = 0; + Scanner s; + + if (!fp) + { + return 1; /* no file was opened */ + } + + s.fp = fp; + + fill(&s, 0); + + for(;;) + { + s.tok = s.cur; +/*!re2c + re2c:indent:top = 2; + + NL = "\r"? "\n" ; + WS = [\r\n\t ] ; + ANY = [^] ; + + "/" "/" { goto cppcomment; } + NL / "/""*" { echo(&s); nlcomment = 1; continue; } + "/" "*" { goto comment; } + ANY { fputc(*s.tok, stdout); continue; } +*/ +comment: + s.tok = s.cur; +/*!re2c + "*" "/" { goto commentws; } + ANY { goto comment; } +*/ +commentws: + s.tok = s.cur; +/*!re2c + NL? "/" "*" { goto comment; } + NL { + if (!nlcomment) + { + echo(&s); + } + nlcomment = 0; + continue; + } + WS { goto commentws; } + ANY { echo(&s); nlcomment = 0; continue; } +*/ +cppcomment: + s.tok = s.cur; +/*!re2c + NL { echo(&s); continue; } + ANY { goto cppcomment; } +*/ + } + + if (fp != stdin) + { + fclose(fp); /* close only if not stdin */ + } + return res; /* return result */ +} + +int main(int argc, char **argv) +{ + if (argc > 1) + { + return scan(!strcmp(argv[1], "-") ? stdin : fopen(argv[1], "r")); + } + else + { + fprintf(stderr, "%s \n", argv[0]); + return 1; + } +} diff --git a/tools/re2c/examples/basemmap.c b/tools/re2c/examples/basemmap.c deleted file mode 100644 index 3e5b037ad..000000000 --- a/tools/re2c/examples/basemmap.c +++ /dev/null @@ -1,26 +0,0 @@ -#include -#include -#include -#include -#include - -#ifndef MAP_NORESERVE -#define MAP_NORESERVE 0 -#endif - -volatile char ch; - -main(){ - struct stat statbuf; - uchar *buf; - fstat(0, &statbuf); - buf = mmap(NULL, statbuf.st_size, PROT_READ, MAP_SHARED|MAP_NORESERVE, - 0, 0); - if(buf != (uchar*)(-1)){ - uchar *cur, *lim = &buf[statbuf.st_size]; - for(cur = buf; buf != lim; ++cur){ - ch = *cur; - } - munmap(buf, statbuf.st_size); - } -} diff --git a/tools/re2c/examples/cmmap.re b/tools/re2c/examples/cmmap.re deleted file mode 100644 index bc4d498af..000000000 --- a/tools/re2c/examples/cmmap.re +++ /dev/null @@ -1,267 +0,0 @@ -#include -#include -#include -#include -#include - -#define ADDEQ 257 -#define ANDAND 258 -#define ANDEQ 259 -#define ARRAY 260 -#define ASM 261 -#define AUTO 262 -#define BREAK 263 -#define CASE 264 -#define CHAR 265 -#define CONST 266 -#define CONTINUE 267 -#define DECR 268 -#define DEFAULT 269 -#define DEREF 270 -#define DIVEQ 271 -#define DO 272 -#define DOUBLE 273 -#define ELLIPSIS 274 -#define ELSE 275 -#define ENUM 276 -#define EQL 277 -#define EXTERN 278 -#define FCON 279 -#define FLOAT 280 -#define FOR 281 -#define FUNCTION 282 -#define GEQ 283 -#define GOTO 284 -#define ICON 285 -#define ID 286 -#define IF 287 -#define INCR 288 -#define INT 289 -#define LEQ 290 -#define LONG 291 -#define LSHIFT 292 -#define LSHIFTEQ 293 -#define MODEQ 294 -#define MULEQ 295 -#define NEQ 296 -#define OREQ 297 -#define OROR 298 -#define POINTER 299 -#define REGISTER 300 -#define RETURN 301 -#define RSHIFT 302 -#define RSHIFTEQ 303 -#define SCON 304 -#define SHORT 305 -#define SIGNED 306 -#define SIZEOF 307 -#define STATIC 308 -#define STRUCT 309 -#define SUBEQ 310 -#define SWITCH 311 -#define TYPEDEF 312 -#define UNION 313 -#define UNSIGNED 314 -#define VOID 315 -#define VOLATILE 316 -#define WHILE 317 -#define XOREQ 318 -#define EOI 319 - -typedef unsigned int unint; -typedef unsigned char uchar; - -#define YYCTYPE uchar -#define YYCURSOR cursor -#define YYLIMIT s->lim -#define YYMARKER s->ptr -#define YYFILL(n) {cursor = fill(s, cursor);} - -#define RET(i) {s->cur = cursor; return i;} - -typedef struct Scanner { - uchar *tok, *ptr, *cur, *pos, *lim, *eof; - unint line; -} Scanner; - -uchar *fill(Scanner *s, uchar *cursor){ - if(!s->eof){ - unint cnt = s->lim - s->tok; - uchar *buf = malloc((cnt + 1)*sizeof(uchar)); - memcpy(buf, s->tok, cnt); - cursor = &buf[cursor - s->tok]; - s->pos = &buf[s->pos - s->tok]; - s->ptr = &buf[s->ptr - s->tok]; - s->lim = &buf[cnt]; - s->eof = s->lim; *(s->eof)++ = '\n'; - s->tok = buf; - } - return cursor; -} - -int scan(Scanner *s){ - uchar *cursor = s->cur; -std: - s->tok = cursor; -/*!re2c -any = [\000-\377]; -O = [0-7]; -D = [0-9]; -L = [a-zA-Z_]; -H = [a-fA-F0-9]; -E = [Ee] [+-]? D+; -FS = [fFlL]; -IS = [uUlL]*; -ESC = [\\] ([abfnrtv?'"\\] | "x" H+ | O+); -*/ - -/*!re2c - "/*" { goto comment; } - - "auto" { RET(AUTO); } - "break" { RET(BREAK); } - "case" { RET(CASE); } - "char" { RET(CHAR); } - "const" { RET(CONST); } - "continue" { RET(CONTINUE); } - "default" { RET(DEFAULT); } - "do" { RET(DO); } - "double" { RET(DOUBLE); } - "else" { RET(ELSE); } - "enum" { RET(ENUM); } - "extern" { RET(EXTERN); } - "float" { RET(FLOAT); } - "for" { RET(FOR); } - "goto" { RET(GOTO); } - "if" { RET(IF); } - "int" { RET(INT); } - "long" { RET(LONG); } - "register" { RET(REGISTER); } - "return" { RET(RETURN); } - "short" { RET(SHORT); } - "signed" { RET(SIGNED); } - "sizeof" { RET(SIZEOF); } - "static" { RET(STATIC); } - "struct" { RET(STRUCT); } - "switch" { RET(SWITCH); } - "typedef" { RET(TYPEDEF); } - "union" { RET(UNION); } - "unsigned" { RET(UNSIGNED); } - "void" { RET(VOID); } - "volatile" { RET(VOLATILE); } - "while" { RET(WHILE); } - - L (L|D)* { RET(ID); } - - ("0" [xX] H+ IS?) | ("0" D+ IS?) | (D+ IS?) | - (['] (ESC|any\[\n\\'])* [']) - { RET(ICON); } - - (D+ E FS?) | (D* "." D+ E? FS?) | (D+ "." D* E? FS?) - { RET(FCON); } - - (["] (ESC|any\[\n\\"])* ["]) - { RET(SCON); } - - "..." { RET(ELLIPSIS); } - ">>=" { RET(RSHIFTEQ); } - "<<=" { RET(LSHIFTEQ); } - "+=" { RET(ADDEQ); } - "-=" { RET(SUBEQ); } - "*=" { RET(MULEQ); } - "/=" { RET(DIVEQ); } - "%=" { RET(MODEQ); } - "&=" { RET(ANDEQ); } - "^=" { RET(XOREQ); } - "|=" { RET(OREQ); } - ">>" { RET(RSHIFT); } - "<<" { RET(LSHIFT); } - "++" { RET(INCR); } - "--" { RET(DECR); } - "->" { RET(DEREF); } - "&&" { RET(ANDAND); } - "||" { RET(OROR); } - "<=" { RET(LEQ); } - ">=" { RET(GEQ); } - "==" { RET(EQL); } - "!=" { RET(NEQ); } - ";" { RET(';'); } - "{" { RET('{'); } - "}" { RET('}'); } - "," { RET(','); } - ":" { RET(':'); } - "=" { RET('='); } - "(" { RET('('); } - ")" { RET(')'); } - "[" { RET('['); } - "]" { RET(']'); } - "." { RET('.'); } - "&" { RET('&'); } - "!" { RET('!'); } - "~" { RET('~'); } - "-" { RET('-'); } - "+" { RET('+'); } - "*" { RET('*'); } - "/" { RET('/'); } - "%" { RET('%'); } - "<" { RET('<'); } - ">" { RET('>'); } - "^" { RET('^'); } - "|" { RET('|'); } - "?" { RET('?'); } - - - [ \t\v\f]+ { goto std; } - - "\n" - { - if(cursor == s->eof) RET(EOI); - s->pos = cursor; s->line++; - goto std; - } - - any - { - printf("unexpected character: %c\n", *s->tok); - goto std; - } -*/ - -comment: -/*!re2c - "*/" { goto std; } - "\n" - { - if(cursor == s->eof) RET(EOI); - s->tok = s->pos = cursor; s->line++; - goto comment; - } - any { goto comment; } -*/ -} - -#ifndef MAP_NORESERVE -#define MAP_NORESERVE 0 -#endif - -main(){ - Scanner in; - struct stat statbuf; - uchar *buf; - fstat(0, &statbuf); - buf = mmap(NULL, statbuf.st_size, PROT_READ, MAP_SHARED|MAP_NORESERVE, - 0, 0); - if(buf != (uchar*)(-1)){ - int t; - in.lim = &(in.cur = buf)[statbuf.st_size]; - in.pos = NULL; - in.eof = NULL; - while((t = scan(&in)) != EOI){ -/* - printf("%d\t%.*s\n", t, in.cur - in.tok, in.tok); - printf("%d\n", t); -*/ - } - munmap(buf, statbuf.st_size); - } -} diff --git a/tools/re2c/examples/cnokw.re b/tools/re2c/examples/cnokw.re deleted file mode 100644 index bdc127932..000000000 --- a/tools/re2c/examples/cnokw.re +++ /dev/null @@ -1,239 +0,0 @@ -#include -#include -#include - -#define ADDEQ 257 -#define ANDAND 258 -#define ANDEQ 259 -#define ARRAY 260 -#define ASM 261 -#define AUTO 262 -#define BREAK 263 -#define CASE 264 -#define CHAR 265 -#define CONST 266 -#define CONTINUE 267 -#define DECR 268 -#define DEFAULT 269 -#define DEREF 270 -#define DIVEQ 271 -#define DO 272 -#define DOUBLE 273 -#define ELLIPSIS 274 -#define ELSE 275 -#define ENUM 276 -#define EQL 277 -#define EXTERN 278 -#define FCON 279 -#define FLOAT 280 -#define FOR 281 -#define FUNCTION 282 -#define GEQ 283 -#define GOTO 284 -#define ICON 285 -#define ID 286 -#define IF 287 -#define INCR 288 -#define INT 289 -#define LEQ 290 -#define LONG 291 -#define LSHIFT 292 -#define LSHIFTEQ 293 -#define MODEQ 294 -#define MULEQ 295 -#define NEQ 296 -#define OREQ 297 -#define OROR 298 -#define POINTER 299 -#define REGISTER 300 -#define RETURN 301 -#define RSHIFT 302 -#define RSHIFTEQ 303 -#define SCON 304 -#define SHORT 305 -#define SIGNED 306 -#define SIZEOF 307 -#define STATIC 308 -#define STRUCT 309 -#define SUBEQ 310 -#define SWITCH 311 -#define TYPEDEF 312 -#define UNION 313 -#define UNSIGNED 314 -#define VOID 315 -#define VOLATILE 316 -#define WHILE 317 -#define XOREQ 318 -#define EOI 319 - -typedef unsigned int uint; -typedef unsigned char uchar; - -#define BSIZE 8192 - -#define YYCTYPE uchar -#define YYCURSOR cursor -#define YYLIMIT s->lim -#define YYMARKER s->ptr -#define YYFILL(n) {cursor = fill(s, cursor);} - -#define RET(i) {s->cur = cursor; return i;} - -typedef struct Scanner { - int fd; - uchar *bot, *tok, *ptr, *cur, *pos, *lim, *top, *eof; - uint line; -} Scanner; - -uchar *fill(Scanner *s, uchar *cursor){ - if(!s->eof){ - uint cnt = s->tok - s->bot; - if(cnt){ - memcpy(s->bot, s->tok, s->lim - s->tok); - s->tok = s->bot; - s->ptr -= cnt; - cursor -= cnt; - s->pos -= cnt; - s->lim -= cnt; - } - if((s->top - s->lim) < BSIZE){ - uchar *buf = (uchar*) malloc(((s->lim - s->bot) + BSIZE)*sizeof(uchar)); - memcpy(buf, s->tok, s->lim - s->tok); - s->tok = buf; - s->ptr = &buf[s->ptr - s->bot]; - cursor = &buf[cursor - s->bot]; - s->pos = &buf[s->pos - s->bot]; - s->lim = &buf[s->lim - s->bot]; - s->top = &s->lim[BSIZE]; - free(s->bot); - s->bot = buf; - } - if((cnt = read(s->fd, (char*) s->lim, BSIZE)) != BSIZE){ - s->eof = &s->lim[cnt]; *(s->eof)++ = '\n'; - } - s->lim += cnt; - } - return cursor; -} - -int scan(Scanner *s){ - uchar *cursor = s->cur; -std: - s->tok = cursor; -/*!re2c -any = [\000-\377]; -O = [0-7]; -D = [0-9]; -L = [a-zA-Z_]; -H = [a-fA-F0-9]; -E = [Ee] [+-]? D+; -FS = [fFlL]; -IS = [uUlL]*; -ESC = [\\] ([abfnrtv?'"\\] | "x" H+ | O+); -*/ - -/*!re2c - "/*" { goto comment; } - - L (L|D)* { RET(ID); } - - ("0" [xX] H+ IS?) | ("0" D+ IS?) | (D+ IS?) | - (['] (ESC|any\[\n\\'])* [']) - { RET(ICON); } - - (D+ E FS?) | (D* "." D+ E? FS?) | (D+ "." D* E? FS?) - { RET(FCON); } - - (["] (ESC|any\[\n\\"])* ["]) - { RET(SCON); } - - "..." { RET(ELLIPSIS); } - ">>=" { RET(RSHIFTEQ); } - "<<=" { RET(LSHIFTEQ); } - "+=" { RET(ADDEQ); } - "-=" { RET(SUBEQ); } - "*=" { RET(MULEQ); } - "/=" { RET(DIVEQ); } - "%=" { RET(MODEQ); } - "&=" { RET(ANDEQ); } - "^=" { RET(XOREQ); } - "|=" { RET(OREQ); } - ">>" { RET(RSHIFT); } - "<<" { RET(LSHIFT); } - "++" { RET(INCR); } - "--" { RET(DECR); } - "->" { RET(DEREF); } - "&&" { RET(ANDAND); } - "||" { RET(OROR); } - "<=" { RET(LEQ); } - ">=" { RET(GEQ); } - "==" { RET(EQL); } - "!=" { RET(NEQ); } - ";" { RET(';'); } - "{" { RET('{'); } - "}" { RET('}'); } - "," { RET(','); } - ":" { RET(':'); } - "=" { RET('='); } - "(" { RET('('); } - ")" { RET(')'); } - "[" { RET('['); } - "]" { RET(']'); } - "." { RET('.'); } - "&" { RET('&'); } - "!" { RET('!'); } - "~" { RET('~'); } - "-" { RET('-'); } - "+" { RET('+'); } - "*" { RET('*'); } - "/" { RET('/'); } - "%" { RET('%'); } - "<" { RET('<'); } - ">" { RET('>'); } - "^" { RET('^'); } - "|" { RET('|'); } - "?" { RET('?'); } - - - [ \t\v\f]+ { goto std; } - - "\n" - { - if(cursor == s->eof) RET(EOI); - s->pos = cursor; s->line++; - goto std; - } - - any - { - printf("unexpected character: %c\n", *s->tok); - goto std; - } -*/ - -comment: -/*!re2c - "*/" { goto std; } - "\n" - { - if(cursor == s->eof) RET(EOI); - s->tok = s->pos = cursor; s->line++; - goto comment; - } - any { goto comment; } -*/ -} - -main(){ - Scanner in; - int t; - memset((char*) &in, 0, sizeof(in)); - in.fd = 0; - while((t = scan(&in)) != EOI){ -/* - printf("%d\t%.*s\n", t, in.cur - in.tok, in.tok); - printf("%d\n", t); -*/ - } - close(in.fd); -} diff --git a/tools/re2c/examples/cunroll.re b/tools/re2c/examples/cunroll.re deleted file mode 100644 index dd9d80542..000000000 --- a/tools/re2c/examples/cunroll.re +++ /dev/null @@ -1,258 +0,0 @@ -#include -#include -#include - -#define ADDEQ 257 -#define ANDAND 258 -#define ANDEQ 259 -#define ARRAY 260 -#define ASM 261 -#define AUTO 262 -#define BREAK 263 -#define CASE 264 -#define CHAR 265 -#define CONST 266 -#define CONTINUE 267 -#define DECR 268 -#define DEFAULT 269 -#define DEREF 270 -#define DIVEQ 271 -#define DO 272 -#define DOUBLE 273 -#define ELLIPSIS 274 -#define ELSE 275 -#define ENUM 276 -#define EQL 277 -#define EXTERN 278 -#define FCON 279 -#define FLOAT 280 -#define FOR 281 -#define FUNCTION 282 -#define GEQ 283 -#define GOTO 284 -#define ICON 285 -#define ID 286 -#define IF 287 -#define INCR 288 -#define INT 289 -#define LEQ 290 -#define LONG 291 -#define LSHIFT 292 -#define LSHIFTEQ 293 -#define MODEQ 294 -#define MULEQ 295 -#define NEQ 296 -#define OREQ 297 -#define OROR 298 -#define POINTER 299 -#define REGISTER 300 -#define RETURN 301 -#define RSHIFT 302 -#define RSHIFTEQ 303 -#define SCON 304 -#define SHORT 305 -#define SIGNED 306 -#define SIZEOF 307 -#define STATIC 308 -#define STRUCT 309 -#define SUBEQ 310 -#define SWITCH 311 -#define TYPEDEF 312 -#define UNION 313 -#define UNSIGNED 314 -#define VOID 315 -#define VOLATILE 316 -#define WHILE 317 -#define XOREQ 318 -#define EOI 319 - -typedef unsigned int uint; -typedef unsigned char uchar; - -#define BSIZE 8192 - -#define YYCTYPE uchar -#define YYCURSOR cursor -#define YYLIMIT s->lim -#define YYMARKER s->ptr -#define YYFILL(n) {cursor = fill(s, cursor);} - -#define RET(i) {s->cur = cursor; return i;} - -typedef struct Scanner { - int fd; - uchar *bot, *tok, *ptr, *cur, *pos, *lim, *top, *eof; - uint line; -} Scanner; - -uchar *fill(Scanner *s, uchar *cursor){ - if(!s->eof){ - uint cnt = s->tok - s->bot; - if(cnt){ - memcpy(s->bot, s->tok, s->lim - s->tok); - s->tok = s->bot; - s->ptr -= cnt; - cursor -= cnt; - s->pos -= cnt; - s->lim -= cnt; - } - if((s->top - s->lim) < BSIZE){ - uchar *buf = (uchar*) malloc(((s->lim - s->bot) + BSIZE)*sizeof(uchar)); - memcpy(buf, s->tok, s->lim - s->tok); - s->tok = buf; - s->ptr = &buf[s->ptr - s->bot]; - cursor = &buf[cursor - s->bot]; - s->pos = &buf[s->pos - s->bot]; - s->lim = &buf[s->lim - s->bot]; - s->top = &s->lim[BSIZE]; - free(s->bot); - s->bot = buf; - } - if((cnt = read(s->fd, (char*) s->lim, BSIZE)) != BSIZE){ - s->eof = &s->lim[cnt]; *(s->eof)++ = '\n'; - } - s->lim += cnt; - } - return cursor; -} - -int scan(Scanner *s){ - uchar *cursor = s->cur; -std: - s->tok = cursor; -/*!re2c -any = [\000-\377]; -O = [0-7]; -D = [0-9]; -L = [a-zA-Z_]; -I = L|D; -H = [a-fA-F0-9]; -E = [Ee] [+-]? D+; -FS = [fFlL]; -IS = [uUlL]*; -ESC = [\\] ([abfnrtv?'"\\] | "x" H+ | O+); -X = any\[*/]; -*/ - -/*!re2c - "/*" { goto comment; } - - - L { RET(ID); } - L I { RET(ID); } - L I I { RET(ID); } - L I I I { RET(ID); } - L I I I I { RET(ID); } - L I I I I I { RET(ID); } - L I I I I I I { RET(ID); } - L I I I I I I I { RET(ID); } - L I* { RET(ID); } - - ("0" [xX] H+ IS?) | ("0" D+ IS?) | (D+ IS?) | - (['] (ESC|any\[\n\\'])* [']) - { RET(ICON); } - - (D+ E FS?) | (D* "." D+ E? FS?) | (D+ "." D* E? FS?) - { RET(FCON); } - - (["] (ESC|any\[\n\\"])* ["]) - { RET(SCON); } - - "..." { RET(ELLIPSIS); } - ">>=" { RET(RSHIFTEQ); } - "<<=" { RET(LSHIFTEQ); } - "+=" { RET(ADDEQ); } - "-=" { RET(SUBEQ); } - "*=" { RET(MULEQ); } - "/=" { RET(DIVEQ); } - "%=" { RET(MODEQ); } - "&=" { RET(ANDEQ); } - "^=" { RET(XOREQ); } - "|=" { RET(OREQ); } - ">>" { RET(RSHIFT); } - "<<" { RET(LSHIFT); } - "++" { RET(INCR); } - "--" { RET(DECR); } - "->" { RET(DEREF); } - "&&" { RET(ANDAND); } - "||" { RET(OROR); } - "<=" { RET(LEQ); } - ">=" { RET(GEQ); } - "==" { RET(EQL); } - "!=" { RET(NEQ); } - ";" { RET(';'); } - "{" { RET('{'); } - "}" { RET('}'); } - "," { RET(','); } - ":" { RET(':'); } - "=" { RET('='); } - "(" { RET('('); } - ")" { RET(')'); } - "[" { RET('['); } - "]" { RET(']'); } - "." { RET('.'); } - "&" { RET('&'); } - "!" { RET('!'); } - "~" { RET('~'); } - "-" { RET('-'); } - "+" { RET('+'); } - "*" { RET('*'); } - "/" { RET('/'); } - "%" { RET('%'); } - "<" { RET('<'); } - ">" { RET('>'); } - "^" { RET('^'); } - "|" { RET('|'); } - "?" { RET('?'); } - - - [ \t\v\f]+ { goto std; } - - "\n" - { - if(cursor == s->eof) RET(EOI); - s->pos = cursor; s->line++; - goto std; - } - - any - { - printf("unexpected character: %c\n", *s->tok); - goto std; - } -*/ - -comment: -/*!re2c - "*/" { goto std; } - "\n" - { - if(cursor == s->eof) RET(EOI); - s->tok = s->pos = cursor; s->line++; - goto comment; - } - X { goto comment; } - X X { goto comment; } - X X X { goto comment; } - X X X X { goto comment; } - X X X X X { goto comment; } - X X X X X X { goto comment; } - X X X X X X X { goto comment; } - X X X X X X X X { goto comment; } - any { goto comment; } -*/ -} - -main(){ - Scanner in; - int t; - memset((char*) &in, 0, sizeof(in)); - in.fd = 0; - while((t = scan(&in)) != EOI){ -/* - printf("%d\t%.*s\n", t, in.cur - in.tok, in.tok); - printf("%d\n", t); -*/ - } - close(in.fd); -} diff --git a/tools/re2c/examples/input_custom/fixed.re b/tools/re2c/examples/input_custom/fixed.re new file mode 100644 index 000000000..51f3b2b00 --- /dev/null +++ b/tools/re2c/examples/input_custom/fixed.re @@ -0,0 +1,35 @@ +// Build with "--input custom" re2c switch. +// +// This is an example of handling fixed-length buffer with "--input custom": +// on each YYPEEK we check for the end of input, thus YYFILL generation +// can be safely suppressed. +// +// Note that YYLIMIT points not to terminating NULL, but to the previous +// character: we emulate the case when input has no terminating NULL. +// +// For a real-life example see https://github.com/sopyer/mjson +// or mjson.re from re2c test collection. + +bool lex (const char * cursor, const char * const limit) +{ + const char * marker; + const char * ctxmarker; +# define YYCTYPE char +# define YYPEEK() (cursor >= limit ? 0 : *cursor) +# define YYSKIP() ++cursor +# define YYBACKUP() marker = cursor +# define YYBACKUPCTX() ctxmarker = cursor +# define YYRESTORE() cursor = marker +# define YYRESTORECTX() cursor = ctxmarker + /*!re2c + re2c:yyfill:enable = 0; + "int buffer " / "[" [0-9]+ "]" { return true; } + * { return false; } + */ +} + +int main () +{ + char buffer [] = "int buffer [1024]"; + return !lex (buffer, buffer + sizeof (buffer) - 1); +} diff --git a/tools/re2c/examples/input_custom/simple/README b/tools/re2c/examples/input_custom/simple/README new file mode 100644 index 000000000..c0c4d955a --- /dev/null +++ b/tools/re2c/examples/input_custom/simple/README @@ -0,0 +1,20 @@ +Build with "--input custom" re2c switch. + +These are three examples of "--input custom" usage: + +- input_custom_default.re: + implements default re2c input model (pointers to plain buffer) + +- input_custom_fgetc: + implements C-style file input (using ) + +- input_custom_fgetc: + implements std::istringstream input + +Note that these examples are very simple and don't need +to implement YYFILL; the only reason they don't use +"re2c:yyfill:enable = 0;" is to keep YYLESSTHAN and YYLIMIT +(for the sake of example). + +In real-life programs one will need to care for correct +end-of-input handling. diff --git a/tools/re2c/examples/input_custom/simple/default.re b/tools/re2c/examples/input_custom/simple/default.re new file mode 100644 index 000000000..94cde7cd6 --- /dev/null +++ b/tools/re2c/examples/input_custom/simple/default.re @@ -0,0 +1,24 @@ +bool lex (const char * cursor, const char * const limit) +{ + const char * marker; + const char * ctxmarker; +# define YYCTYPE char +# define YYPEEK() *cursor +# define YYSKIP() ++cursor +# define YYBACKUP() marker = cursor +# define YYBACKUPCTX() ctxmarker = cursor +# define YYRESTORE() cursor = marker +# define YYRESTORECTX() cursor = ctxmarker +# define YYLESSTHAN(n) limit - cursor < n +# define YYFILL(n) {} + /*!re2c + "int buffer " / "[" [0-9]+ "]" { return true; } + * { return false; } + */ +} + +int main () +{ + char buffer [] = "int buffer [1024]"; + return !lex (buffer, buffer + sizeof (buffer)); +} diff --git a/tools/re2c/examples/input_custom/simple/fgetc.re b/tools/re2c/examples/input_custom/simple/fgetc.re new file mode 100644 index 000000000..d2dffd9a5 --- /dev/null +++ b/tools/re2c/examples/input_custom/simple/fgetc.re @@ -0,0 +1,43 @@ +#include + +char peek (FILE * f) +{ + char c = fgetc (f); + ungetc (c, f); + return c; +} + +bool lex (FILE * f, const long limit) +{ + long marker; + long ctxmarker; +# define YYCTYPE char +# define YYPEEK() peek (f) +# define YYSKIP() fgetc (f) +# define YYBACKUP() marker = ftell (f) +# define YYBACKUPCTX() ctxmarker = ftell (f) +# define YYRESTORE() fseek (f, marker, SEEK_SET) +# define YYRESTORECTX() fseek (f, ctxmarker, SEEK_SET) +# define YYLESSTHAN(n) limit - ftell (f) < n +# define YYFILL(n) {} + /*!re2c + "int buffer " / "[" [0-9]+ "]" { return true; } + * { return false; } + */ +} + +int main () +{ + const char buffer [] = "int buffer [1024]"; + const char fn [] = "input.txt"; + + FILE * f = fopen (fn, "w"); + fwrite (buffer, 1, sizeof (buffer), f); + fclose (f); + + f = fopen (fn, "rb"); + int result = !lex (f, sizeof (buffer)); + fclose (f); + + return result; +} diff --git a/tools/re2c/examples/input_custom/simple/istringstream.re b/tools/re2c/examples/input_custom/simple/istringstream.re new file mode 100644 index 000000000..5d702291e --- /dev/null +++ b/tools/re2c/examples/input_custom/simple/istringstream.re @@ -0,0 +1,27 @@ +#include + +bool lex (std::istringstream & is, const std::streampos limit) +{ + std::streampos marker; + std::streampos ctxmarker; +# define YYCTYPE char +# define YYPEEK() is.peek () +# define YYSKIP() is.ignore () +# define YYBACKUP() marker = is.tellg () +# define YYBACKUPCTX() ctxmarker = is.tellg () +# define YYRESTORE() is.seekg (marker) +# define YYRESTORECTX() is.seekg (ctxmarker) +# define YYLESSTHAN(n) limit - is.tellg () < n +# define YYFILL(n) {} + /*!re2c + "int buffer " / "[" [0-9]+ "]" { return true; } + * { return false; } + */ +} + +int main () +{ + const char buffer [] = "int buffer [1024]"; + std::istringstream is (buffer); + return !lex (is, sizeof (buffer)); +} diff --git a/tools/re2c/examples/c.re b/tools/re2c/examples/langs/c.re similarity index 100% rename from tools/re2c/examples/c.re rename to tools/re2c/examples/langs/c.re diff --git a/tools/re2c/examples/modula.re b/tools/re2c/examples/langs/modula.re similarity index 98% rename from tools/re2c/examples/modula.re rename to tools/re2c/examples/langs/modula.re index 0468ba4e1..186b0cc13 100644 --- a/tools/re2c/examples/modula.re +++ b/tools/re2c/examples/langs/modula.re @@ -11,13 +11,14 @@ typedef unsigned char uchar; #define YYCURSOR cursor #define YYLIMIT s->lim #define YYMARKER s->ptr +#define YYCTXMARKER s->ctx #define YYFILL {cursor = fill(s, cursor);} #define RETURN(i) {s->cur = cursor; return i;} typedef struct Scanner { int fd; - uchar *bot, *tok, *ptr, *cur, *pos, *lim, *top, *eof; + uchar *bot, *tok, *ptr, *ctx, *cur, *pos, *lim, *top, *eof; uint line; } Scanner; diff --git a/tools/re2c/examples/rexx/rexx.l b/tools/re2c/examples/langs/rexx.re similarity index 100% rename from tools/re2c/examples/rexx/rexx.l rename to tools/re2c/examples/langs/rexx.re diff --git a/tools/re2c/examples/push.re b/tools/re2c/examples/push_model/push.re similarity index 99% rename from tools/re2c/examples/push.re rename to tools/re2c/examples/push_model/push.re index a76b7aec3..5ad6e7ac8 100644 --- a/tools/re2c/examples/push.re +++ b/tools/re2c/examples/push_model/push.re @@ -226,14 +226,14 @@ public: start: - /*!re2c - + /*!re2c + re2c:startlabel = 1; eol = "\n"; eof = "\000"; digit = [0-9]; integer = digit+; alpha = [A-Za-z_]; - any = [\000-\0377]; + any = [\000-\377]; space = [ \h\t\v\f\r]; "if" { SEND(kIf); } diff --git a/tools/re2c/examples/repeater.re b/tools/re2c/examples/repeater.re deleted file mode 100644 index 05c4c8842..000000000 --- a/tools/re2c/examples/repeater.re +++ /dev/null @@ -1,44 +0,0 @@ -#include -#include -#include - -#define RET(n) printf("%d\n", n); return n - -int scan(char *s, int l){ -char *p = s; -char *q; -#define YYCTYPE char -#define YYCURSOR p -#define YYLIMIT (s+l) -#define YYMARKER q -#define YYFILL(n) -/*!re2c - 'a'{1}"\n" {RET(1);} - 'a'{2,3}"\n" {RET(2);} - 'a'{6}"\n" {RET(4);} - 'a'{4,}"\n" {RET(3);} - [^aq]|"\n" {RET(0);} -*/ -} - -#define do_scan(str) scan(str, strlen(str)) - -main() -{ - do_scan("a\n"); - do_scan("aa\n"); - do_scan("aaa\n"); - do_scan("aaaa\n"); - do_scan("q"); - do_scan("a"); - do_scan("A\n"); - do_scan("AA\n"); - do_scan("aAa\n"); - do_scan("AaaA\n"); - do_scan("Q"); - do_scan("AaaAa\n"); - do_scan("AaaAaA\n"); - do_scan("A"); - do_scan("\n"); - do_scan("0"); -} diff --git a/tools/re2c/examples/rexx/README b/tools/re2c/examples/rexx/README deleted file mode 100644 index 2af0178d5..000000000 --- a/tools/re2c/examples/rexx/README +++ /dev/null @@ -1 +0,0 @@ -Replacement modules for an existing REXX interpreter. Not standalone. diff --git a/tools/re2c/examples/rexx/scanio.c b/tools/re2c/examples/rexx/scanio.c deleted file mode 100644 index de6898dfc..000000000 --- a/tools/re2c/examples/rexx/scanio.c +++ /dev/null @@ -1,41 +0,0 @@ -uchar *ScanFill(uchar *cursor){ - unsigned cnt = s->tok - s->bot; - s->pos += cursor - s->mrk; - if(cnt){ - if(s->eot){ - unsigned len = s->eot - s->tok; - memcpy(s->bot, s->tok, len); - s->eot = &s->bot[len]; - if((len = s->lim - cursor) != 0) - memcpy(s->eot, cursor, len); - cursor = s->eot; - s->lim = &cursor[len]; - } else { - memcpy(s->bot, s->tok, s->lim - s->tok); - cursor -= cnt; - s->lim -= cnt; - } - s->tok = s->bot; - s->ptr -= cnt; - } - if((s->top - s->lim) < 512){ - uchar *buf = (uchar*) malloc(((s->lim - s->bot) + 512)*sizeof(uchar)); - memcpy(buf, s->bot, s->lim - s->bot); - s->tok = buf; - s->ptr = &buf[s->ptr - s->bot]; - if(s->eot) - s->eot = &buf[s->eot - s->bot]; - cursor = &buf[cursor - s->bot]; - s->lim = &buf[s->lim - s->bot]; - s->top = &s->lim[512]; - free(s->bot); - s->bot = buf; - } - s->mrk = cursor; - if(ScanCBIO.file){ - if((cnt = read(ScanCBIO.u.f.fd, (char*) s->lim, 512)) != 512) - memset(&s->lim[cnt], 0, 512 - cnt); - s->lim += 512; - } - return cursor; -} diff --git a/tools/re2c/examples/sample.re b/tools/re2c/examples/sample.re deleted file mode 100644 index 2f497a3b5..000000000 --- a/tools/re2c/examples/sample.re +++ /dev/null @@ -1,7 +0,0 @@ -/*!re2c - "print" {return PRINT;} - [a-z]+ {return ID;} - [0-9]+ {return DEC;} - "0x" [0-9a-f]+ {return HEX;} - [\000-\377] {return ERR;} -*/ diff --git a/tools/re2c/examples/simple.re b/tools/re2c/examples/simple.re deleted file mode 100644 index 5fd8891fd..000000000 --- a/tools/re2c/examples/simple.re +++ /dev/null @@ -1,13 +0,0 @@ -#define NULL ((char*) 0) -char *scan(char *p){ -char *q; -#define YYCTYPE char -#define YYCURSOR p -#define YYLIMIT p -#define YYMARKER q -#define YYFILL(n) -/*!re2c - [0-9]+ {return YYCURSOR;} - [\000-\377] {return NULL;} -*/ -} diff --git a/tools/re2c/globals.h b/tools/re2c/globals.h deleted file mode 100644 index 341d0aac3..000000000 --- a/tools/re2c/globals.h +++ /dev/null @@ -1,73 +0,0 @@ -/* $Id: globals.h 713 2007-04-29 15:33:47Z helly $ */ -#ifndef _globals_h -#define _globals_h - -#include "basics.h" -#include -#include -#include -#include "stream_lc.h" -#include "code_names.h" - -namespace re2c -{ - -extern file_info sourceFileInfo; -extern file_info outputFileInfo; - -extern bool bFlag; -extern bool dFlag; -extern bool eFlag; -extern bool fFlag; -extern bool gFlag; -extern bool iFlag; -extern bool sFlag; -extern bool uFlag; -extern bool wFlag; - -extern bool bNoGenerationDate; - -extern bool bSinglePass; -extern bool bFirstPass; -extern bool bLastPass; - -extern bool bUsedYYAccept; -extern bool bUsedYYMaxFill; -extern bool bUsedYYMarker; - -extern bool bUseStartLabel; -extern std::string startLabelName; -extern std::string labelPrefix; -extern std::string yychConversion; -extern uint maxFill; -extern uint next_label; -extern uint cGotoThreshold; - -/* configurations */ -extern uint topIndent; -extern std::string indString; -extern bool yybmHexTable; -extern bool bUseStateAbort; -extern bool bUseStateNext; -extern bool bWroteGetState; -extern bool bUseYYFill; -extern bool bUseYYFillParam; - -extern uint asc2ebc[256]; -extern uint ebc2asc[256]; - -extern uint *xlat, *talx; - -extern uint next_fill_index; -extern uint last_fill_index; -extern std::set vUsedLabels; -extern re2c::CodeNames mapCodeName; - -extern uint nRealChars; - -extern char octCh(uint c); -extern char hexCh(uint c); - -} // end namespace re2c - -#endif diff --git a/tools/re2c/ins.h b/tools/re2c/ins.h deleted file mode 100644 index a2e379585..000000000 --- a/tools/re2c/ins.h +++ /dev/null @@ -1,56 +0,0 @@ -/* $Id: ins.h 535 2006-05-25 13:36:14Z helly $ */ -#ifndef _ins_h -#define _ins_h - -#include "basics.h" - -namespace re2c -{ - -typedef unsigned short Char; - -const uint CHAR = 0; -const uint GOTO = 1; -const uint FORK = 2; -const uint TERM = 3; -const uint CTXT = 4; - -union Ins { - - struct - { - byte tag; - byte marked; - void *link; - } - - i; - - struct - { - ushort value; - ushort bump; - void *link; - } - - c; -}; - -inline bool isMarked(Ins *i) -{ - return i->i.marked != 0; -} - -inline void mark(Ins *i) -{ - i->i.marked = true; -} - -inline void unmark(Ins *i) -{ - i->i.marked = false; -} - -} // end namespace re2c - -#endif diff --git a/tools/re2c/main.cc b/tools/re2c/main.cc deleted file mode 100644 index 1116b9772..000000000 --- a/tools/re2c/main.cc +++ /dev/null @@ -1,351 +0,0 @@ -/* $Id: main.cc 691 2007-04-22 15:07:39Z helly $ */ -#ifdef HAVE_CONFIG_H -#include "config.h" -#elif defined(_WIN32) -#include "config_w32.h" -#endif - -#include -#include -#include -#include - -#include "globals.h" -#include "parser.h" -#include "dfa.h" -#include "mbo_getopt.h" - -namespace re2c -{ - -file_info sourceFileInfo; -file_info outputFileInfo; - -bool bFlag = false; -bool dFlag = false; -bool eFlag = false; -bool fFlag = false; -bool gFlag = false; -bool iFlag = false; -bool sFlag = false; -bool uFlag = false; -bool wFlag = false; - -bool bNoGenerationDate = false; - -bool bSinglePass = false; -bool bFirstPass = true; -bool bLastPass = false; - -bool bUsedYYAccept = false; -bool bUsedYYMaxFill = false; -bool bUsedYYMarker = true; - -bool bUseStartLabel = false; -bool bUseStateNext = false; -bool bUseYYFill = true; -bool bUseYYFillParam = true; - -std::string startLabelName; -std::string labelPrefix("yy"); -std::string yychConversion(""); -uint maxFill = 1; -uint next_label = 0; -uint cGotoThreshold = 9; - -uint topIndent = 0; -std::string indString("\t"); -bool yybmHexTable = false; -bool bUseStateAbort = false; -bool bWroteGetState = false; - -uint nRealChars = 256; - -uint next_fill_index = 0; -uint last_fill_index = 0; -std::set vUsedLabels; -re2c::CodeNames mapCodeName; - -free_list RegExp::vFreeList; -free_list Range::vFreeList; - -using namespace std; - -static char *opt_arg = NULL; -static int opt_ind = 1; - -static const mbo_opt_struct OPTIONS[] = -{ - mbo_opt_struct('?', 0, "help"), - mbo_opt_struct('b', 0, "bit-vectors"), - mbo_opt_struct('d', 0, "debug-output"), - mbo_opt_struct('e', 0, "ecb"), - mbo_opt_struct('f', 0, "storable-state"), - mbo_opt_struct('g', 0, "computed-gotos"), - mbo_opt_struct('h', 0, "help"), - mbo_opt_struct('i', 0, "no-debug-info"), - mbo_opt_struct('o', 1, "output"), - mbo_opt_struct('s', 0, "nested-ifs"), - mbo_opt_struct('u', 0, "unicode"), - mbo_opt_struct('v', 0, "version"), - mbo_opt_struct('V', 0, "vernum"), - mbo_opt_struct('w', 0, "wide-chars"), - mbo_opt_struct('1', 0, "single-pass"), - mbo_opt_struct(10, 0, "no-generation-date"), - mbo_opt_struct('-', 0, NULL) /* end of args */ -}; - -static void usage() -{ - cerr << "usage: re2c [-bdefghisvVw1] [-o file] file\n" - "\n" - "-? -h --help Display this info.\n" - "\n" - "-b --bit-vectors Implies -s. Use bit vectors as well in the attempt to\n" - " coax better code out of the compiler. Most useful for\n" - " specifications with more than a few keywords (e.g. for\n" - " most programming languages).\n" - "\n" - "-d --debug-output Creates a parser that dumps information during\n" - " about the current position and in which state the\n" - " parser is.\n" - "\n" - "-e --ecb Cross-compile from an ASCII platform to\n" - " an EBCDIC one.\n" - "\n" - "-f --storable-state Generate a scanner that supports storable states.\n" - "\n" - "-g --computed-gotos Implies -b. Generate computed goto code (only useable\n" - " with gcc).\n" - "\n" - "-i --no-debug-info Do not generate '#line' info (usefull for versioning).\n" - "\n" - "-o --output=output Specify the output file instead of stdout\n" - " This cannot be used together with -e switch.\n" - "\n" - "-s --nested-ifs Generate nested ifs for some switches. Many compilers\n" - " need this assist to generate better code.\n" - "\n" - "-u --unicode Implies -w but supports the full Unicode character set.\n" - "\n" - "-v --version Show version information.\n" - "\n" - "-V --vernum Show version as one number.\n" - "\n" - "-w --wide-chars Create a parser that supports wide chars (UCS-2). This\n" - " implies -s and cannot be used together with -e switch.\n" - "\n" - "-1 --single-pass Force single pass generation, this cannot be combined\n" - " with -f and disables YYMAXFILL generation prior to last\n" - " re2c block.\n" - "\n" - "--no-generation-date Suppress date output in the generated output so that it\n" - " only shows the re2c version.\n" - ; -} - -} // end namespace re2c - -using namespace re2c; - -int main(int argc, char *argv[]) -{ - int c; - const char *sourceFileName = 0; - const char *outputFileName = 0; - - if (argc == 1) - { - usage(); - return 2; - } - - while ((c = mbo_getopt(argc, argv, OPTIONS, &opt_arg, &opt_ind, 0)) != -1) - { - switch (c) - { - - case 'b': - bFlag = true; - sFlag = true; - break; - - case 'e': - xlat = asc2ebc; - talx = ebc2asc; - eFlag = true; - break; - - case 'd': - dFlag = true; - break; - - case 'f': - fFlag = true; - break; - - case 'g': - gFlag = true; - bFlag = true; - sFlag = true; - break; - - case 'i': - iFlag = true; - break; - - case 'o': - outputFileName = opt_arg; - break; - - case 's': - sFlag = true; - break; - - case '1': - bSinglePass = true; - break; - - case 'v': - cout << "re2c " << PACKAGE_VERSION << "\n"; - return 2; - - case 'V': { - string vernum(PACKAGE_VERSION); - - if (vernum[1] == '.') - { - vernum.insert(0, "0"); - } - vernum.erase(2, 1); - if (vernum[3] == '.') - { - vernum.insert(2, "0"); - } - vernum.erase(4, 1); - if (vernum.length() < 6 || vernum[5] < '0' || vernum[5] > '9') - { - vernum.insert(4, "0"); - } - vernum.resize(6); - cout << vernum << endl; - return 2; - } - - case 'w': - nRealChars = (1<<16); /* 0x10000 */ - sFlag = true; - wFlag = true; - break; - - case 'u': - nRealChars = 0x110000; /* 17 times w-Flag */ - sFlag = true; - uFlag = true; - break; - - case 'h': - case '?': - default: - usage(); - return 2; - - case 10: - bNoGenerationDate = true; - break; - } - } - - if ((bFlag || fFlag) && bSinglePass) { - std::cerr << "re2c: error: Cannot combine -1 and -b or -f switch\n"; - return 1; - } - - if (wFlag && eFlag) - { - std::cerr << "re2c: error: Cannot combine -e with -w or -u switch\n"; - return 2; - } - if (wFlag && uFlag) - { - std::cerr << "re2c: error: Cannot combine -u with -w switch\n"; - return 2; - } - - if (uFlag) - { - wFlag = true; - } - - if (argc == opt_ind + 1) - { - sourceFileName = argv[opt_ind]; - } - else - { - usage(); - return 2; - } - - // set up the source stream - re2c::ifstream_lc source; - - if (sourceFileName[0] == '-' && sourceFileName[1] == '\0') - { - if (fFlag) - { - std::cerr << "re2c: error: multiple /*!re2c stdin is not acceptable when -f is specified\n"; - return 1; - } - sourceFileName = ""; - source.open(stdin); - } - else if (!source.open(sourceFileName).is_open()) - { - cerr << "re2c: error: cannot open " << sourceFileName << "\n"; - return 1; - } - - // set up the output stream - re2c::ofstream_lc output; - - if (outputFileName == 0 || (sourceFileName[0] == '-' && sourceFileName[1] == '\0')) - { - outputFileName = ""; - output.open(stdout); - } - else if (!output.open(outputFileName).is_open()) - { - cerr << "re2c: error: cannot open " << outputFileName << "\n"; - return 1; - } - Scanner scanner(sourceFileName, source, output); - sourceFileInfo = file_info(sourceFileName, &scanner); - outputFileInfo = file_info(outputFileName, &output); - - if (!bSinglePass) - { - bUsedYYMarker = false; - - re2c::ifstream_lc null_source; - - if (!null_source.open(sourceFileName).is_open()) - { - cerr << "re2c: error: cannot re-open " << sourceFileName << "\n"; - return 1; - } - - null_stream null_dev; - Scanner null_scanner(sourceFileName, null_source, null_dev); - parse(null_scanner, null_dev); - next_label = 0; - next_fill_index = 0; - bWroteGetState = false; - bUsedYYMaxFill = false; - bFirstPass = false; - } - - bLastPass = true; - parse(scanner, output); - return 0; -} diff --git a/tools/re2c/mbo_getopt.cc b/tools/re2c/mbo_getopt.cc deleted file mode 100644 index 637627926..000000000 --- a/tools/re2c/mbo_getopt.cc +++ /dev/null @@ -1,210 +0,0 @@ -/* - Author: Marcus Boerger -*/ - -/* $Id: mbo_getopt.cc 698 2007-04-23 21:06:56Z helly $ */ - -#include -#include -#include -#include -#include "mbo_getopt.h" -#define OPTERRCOLON (1) -#define OPTERRNF (2) -#define OPTERRARG (3) - -namespace re2c -{ - -static int mbo_opt_error(int, char * const *argv, int oint, int optchr, int err, int show_err) -{ - if (show_err) - { - fprintf(stderr, "Error in argument %d, char %d: ", oint, optchr + 1); - - switch (err) - { - - case OPTERRCOLON: - fprintf(stderr, ": in flags\n"); - break; - - case OPTERRNF: - fprintf(stderr, "option not found %c\n", argv[oint][optchr]); - break; - - case OPTERRARG: - fprintf(stderr, "no argument for option %c\n", argv[oint][optchr]); - break; - - default: - fprintf(stderr, "unknown\n"); - break; - } - } - - return ('?'); -} - -int mbo_getopt(int argc, char* const *argv, const mbo_opt_struct *opts, char **optarg, int *optind, int show_err) -{ - static int optchr = 0; - static int dash = 0; /* have already seen the - */ - int arg_start = 2; - - int opts_idx = -1; - - if (*optind >= argc) - { - return (EOF); - } - - if (!dash) - { - if ((argv[*optind][0] != '-')) - { - return (EOF); - } - else - { - if (!argv[*optind][1]) - { - /* - * use to specify stdin. Need to let pgm process this and - * the following args - */ - return (EOF); - } - } - } - - if ((argv[*optind][0] == '-') && (argv[*optind][1] == '-')) - { - /* '--' indicates end of args if not followed by a known long option name */ - if (argv[*optind][2] == '\0') { - (*optind)++; - return(EOF); - } - - while (1) - { - opts_idx++; - - if (opts[opts_idx].opt_char == '-') - { - (*optind)++; - return (mbo_opt_error(argc, argv, *optind - 1, optchr, OPTERRARG, show_err)); - } - else if (opts[opts_idx].opt_name && !strcmp(&argv[*optind][2], opts[opts_idx].opt_name)) - { - break; - } - } - - optchr = 0; - dash = 0; - arg_start = 2 + strlen(opts[opts_idx].opt_name); - } - - else - { - if (!dash) - { - dash = 1; - optchr = 1; - } - - /* Check if the guy tries to do a -: kind of flag */ - if (argv[*optind][optchr] == ':') - { - dash = 0; - (*optind)++; - return (mbo_opt_error(argc, argv, *optind - 1, optchr, OPTERRCOLON, show_err)); - } - arg_start = 1 + optchr; - } - - if (opts_idx < 0) - { - while (1) - { - opts_idx++; - - if (opts[opts_idx].opt_char == '-') - { - int errind = *optind; - int errchr = optchr; - - if (!argv[*optind][optchr + 1]) - { - dash = 0; - (*optind)++; - } - else - { - optchr++; - arg_start++; - } - - return (mbo_opt_error(argc, argv, errind, errchr, OPTERRNF, show_err)); - } - else if (argv[*optind][optchr] == opts[opts_idx].opt_char) - { - break; - } - } - } - - if (opts[opts_idx].need_param) - { - /* Check for cases where the value of the argument - is in the form - or in the form - */ - dash = 0; - - if (!argv[*optind][arg_start]) - { - (*optind)++; - - if (*optind == argc) - { - return (mbo_opt_error(argc, argv, *optind - 1, optchr, OPTERRARG, show_err)); - } - - *optarg = argv[(*optind)++]; - } - else - { - *optarg = &argv[*optind][arg_start]; - (*optind)++; - } - - return opts[opts_idx].opt_char; - } - else - { - if (arg_start >= 2 && !((argv[*optind][0] == '-') && (argv[*optind][1] == '-'))) - { - if (!argv[*optind][optchr + 1]) - { - dash = 0; - (*optind)++; - } - else - { - optchr++; - } - } - else - { - (*optind)++; - } - - return opts[opts_idx].opt_char; - } - - assert(0); - return (0); /* never reached */ -} - -} // end namespace re2c - diff --git a/tools/re2c/mbo_getopt.h b/tools/re2c/mbo_getopt.h deleted file mode 100644 index ca72f0db7..000000000 --- a/tools/re2c/mbo_getopt.h +++ /dev/null @@ -1,34 +0,0 @@ -/* - Author: Marcus Boerger -*/ - -/* $Id: mbo_getopt.h 539 2006-05-25 13:37:38Z helly $ */ - -/* Define structure for one recognized option (both single char and long name). - * If short_open is '-' this is the last option. - */ - -#ifndef RE2C_MBO_GETOPT_H_INCLUDE_GUARD_ -#define RE2C_MBO_GETOPT_H_INCLUDE_GUARD_ - -namespace re2c -{ - -struct mbo_opt_struct -{ - mbo_opt_struct(char _opt_char, int _need_param, const char * _opt_name) - : opt_char(_opt_char), need_param(_need_param), opt_name(_opt_name) - { - } - - const char opt_char; - const int need_param; - const char * opt_name; -}; - -int mbo_getopt(int argc, char* const *argv, const mbo_opt_struct *opts, char **optarg, int *optind, int show_err); - -} // end namespace re2c - -#endif - diff --git a/tools/re2c/parser.cc b/tools/re2c/parser.cc deleted file mode 100644 index ad1bf74ec..000000000 --- a/tools/re2c/parser.cc +++ /dev/null @@ -1,1807 +0,0 @@ -/* A Bison parser, made by GNU Bison 2.3. */ - -/* Skeleton implementation for Bison's Yacc-like parsers in C - - Copyright (C) 1984, 1989, 1990, 2000, 2001, 2002, 2003, 2004, 2005, 2006 - Free Software Foundation, Inc. - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2, or (at your option) - any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 51 Franklin Street, Fifth Floor, - Boston, MA 02110-1301, USA. */ - -/* As a special exception, you may create a larger work that contains - part or all of the Bison parser skeleton and distribute that work - under terms of your choice, so long as that work isn't itself a - parser generator using the skeleton or a modified version thereof - as a parser skeleton. Alternatively, if you modify or redistribute - the parser skeleton itself, you may (at your option) remove this - special exception, which will cause the skeleton and the resulting - Bison output files to be licensed under the GNU General Public - License without this special exception. - - This special exception was added by the Free Software Foundation in - version 2.2 of Bison. */ - -/* C LALR(1) parser skeleton written by Richard Stallman, by - simplifying the original so-called "semantic" parser. */ - -/* All symbols defined below should begin with yy or YY, to avoid - infringing on user name space. This should be done even for local - variables, as they might otherwise be expanded by user macros. - There are some unavoidable exceptions within include files to - define necessary library symbols; they are noted "INFRINGES ON - USER NAME SPACE" below. */ - -/* Identify Bison output. */ -#define YYBISON 1 - -/* Bison version. */ -#define YYBISON_VERSION "2.3" - -/* Skeleton name. */ -#define YYSKELETON_NAME "yacc.c" - -/* Pure parsers. */ -#define YYPURE 0 - -/* Using locations. */ -#define YYLSP_NEEDED 0 - - - -/* Tokens. */ -#ifndef YYTOKENTYPE -# define YYTOKENTYPE - /* Put the tokens into the symbol table, so that GDB and other debuggers - know about them. */ - enum yytokentype { - CLOSESIZE = 258, - CLOSE = 259, - ID = 260, - CODE = 261, - RANGE = 262, - STRING = 263, - CONFIG = 264, - VALUE = 265, - NUMBER = 266 - }; -#endif -/* Tokens. */ -#define CLOSESIZE 258 -#define CLOSE 259 -#define ID 260 -#define CODE 261 -#define RANGE 262 -#define STRING 263 -#define CONFIG 264 -#define VALUE 265 -#define NUMBER 266 - - - - -/* Copy the first part of user declarations. */ -#line 1 "./parser.y" - - -/* $Id: parser.y 674 2007-04-16 21:39:11Z helly $ */ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#include -#include -#include -#include - -#include "globals.h" -#include "parser.h" -#include "basics.h" - -#define YYMALLOC malloc -#define YYFREE free - -using namespace re2c; - -extern "C" -{ -int yylex(); -void yyerror(const char*); -} - -static re2c::uint accept; -static RegExp *spec; -static Scanner *in = NULL; - -/* Bison version 1.875 emits a definition that is not working - * with several g++ version. Hence we disable it here. - */ -#if defined(__GNUC__) -#define __attribute__(x) -#endif - -/* strdup() isn't standard C, so if we don't have it, we'll create our - * own version - */ -#if !defined(HAVE_STRDUP) -static char* strdup(const char* s) -{ - char* rv = (char*)malloc(strlen(s) + 1); - if (rv == NULL) - return NULL; - strcpy(rv, s); - return rv; -} -#endif - - - -/* Enabling traces. */ -#ifndef YYDEBUG -# define YYDEBUG 0 -#endif - -/* Enabling verbose error messages. */ -#ifdef YYERROR_VERBOSE -# undef YYERROR_VERBOSE -# define YYERROR_VERBOSE 1 -#else -# define YYERROR_VERBOSE 0 -#endif - -/* Enabling the token table. */ -#ifndef YYTOKEN_TABLE -# define YYTOKEN_TABLE 0 -#endif - -#if ! defined YYSTYPE && ! defined YYSTYPE_IS_DECLARED -typedef union YYSTYPE -#line 58 "./parser.y" -{ - re2c::Symbol *symbol; - re2c::RegExp *regexp; - re2c::Token *token; - char op; - int number; - re2c::ExtOp extop; - re2c::Str *str; -} -/* Line 187 of yacc.c. */ -#line 183 "parser.cc" - YYSTYPE; -# define yystype YYSTYPE /* obsolescent; will be withdrawn */ -# define YYSTYPE_IS_DECLARED 1 -# define YYSTYPE_IS_TRIVIAL 1 -#endif - - - -/* Copy the second part of user declarations. */ - - -/* Line 216 of yacc.c. */ -#line 196 "parser.cc" - -#ifdef short -# undef short -#endif - -#ifdef YYTYPE_UINT8 -typedef YYTYPE_UINT8 yytype_uint8; -#else -typedef unsigned char yytype_uint8; -#endif - -#ifdef YYTYPE_INT8 -typedef YYTYPE_INT8 yytype_int8; -#elif (defined __STDC__ || defined __C99__FUNC__ \ - || defined __cplusplus || defined _MSC_VER) -typedef signed char yytype_int8; -#else -typedef short int yytype_int8; -#endif - -#ifdef YYTYPE_UINT16 -typedef YYTYPE_UINT16 yytype_uint16; -#else -typedef unsigned short int yytype_uint16; -#endif - -#ifdef YYTYPE_INT16 -typedef YYTYPE_INT16 yytype_int16; -#else -typedef short int yytype_int16; -#endif - -#ifndef YYSIZE_T -# ifdef __SIZE_TYPE__ -# define YYSIZE_T __SIZE_TYPE__ -# elif defined size_t -# define YYSIZE_T size_t -# elif ! defined YYSIZE_T && (defined __STDC__ || defined __C99__FUNC__ \ - || defined __cplusplus || defined _MSC_VER) -# include /* INFRINGES ON USER NAME SPACE */ -# define YYSIZE_T size_t -# else -# define YYSIZE_T unsigned int -# endif -#endif - -#define YYSIZE_MAXIMUM ((YYSIZE_T) -1) - -#ifndef YY_ -# if YYENABLE_NLS -# if ENABLE_NLS -# include /* INFRINGES ON USER NAME SPACE */ -# define YY_(msgid) dgettext ("bison-runtime", msgid) -# endif -# endif -# ifndef YY_ -# define YY_(msgid) msgid -# endif -#endif - -/* Suppress unused-variable warnings by "using" E. */ -#if ! defined lint || defined __GNUC__ -# define YYUSE(e) ((void) (e)) -#else -# define YYUSE(e) /* empty */ -#endif - -/* Identity function, used to suppress warnings about constant conditions. */ -#ifndef lint -# define YYID(n) (n) -#else -#if (defined __STDC__ || defined __C99__FUNC__ \ - || defined __cplusplus || defined _MSC_VER) -static int -YYID (int i) -#else -static int -YYID (i) - int i; -#endif -{ - return i; -} -#endif - -#if ! defined yyoverflow || YYERROR_VERBOSE - -/* The parser invokes alloca or malloc; define the necessary symbols. */ - -# ifdef YYSTACK_USE_ALLOCA -# if YYSTACK_USE_ALLOCA -# ifdef __GNUC__ -# define YYSTACK_ALLOC __builtin_alloca -# elif defined __BUILTIN_VA_ARG_INCR -# include /* INFRINGES ON USER NAME SPACE */ -# elif defined _AIX -# define YYSTACK_ALLOC __alloca -# elif defined _MSC_VER -# include /* INFRINGES ON USER NAME SPACE */ -# define alloca _alloca -# else -# define YYSTACK_ALLOC alloca -# if ! defined _ALLOCA_H && ! defined _STDLIB_H && (defined __STDC__ || defined __C99__FUNC__ \ - || defined __cplusplus || defined _MSC_VER) -# include /* INFRINGES ON USER NAME SPACE */ -# ifndef _STDLIB_H -# define _STDLIB_H 1 -# endif -# endif -# endif -# endif -# endif - -# ifdef YYSTACK_ALLOC - /* Pacify GCC's `empty if-body' warning. */ -# define YYSTACK_FREE(Ptr) do { /* empty */; } while (YYID (0)) -# ifndef YYSTACK_ALLOC_MAXIMUM - /* The OS might guarantee only one guard page at the bottom of the stack, - and a page size can be as small as 4096 bytes. So we cannot safely - invoke alloca (N) if N exceeds 4096. Use a slightly smaller number - to allow for a few compiler-allocated temporary stack slots. */ -# define YYSTACK_ALLOC_MAXIMUM 4032 /* reasonable circa 2006 */ -# endif -# else -# define YYSTACK_ALLOC YYMALLOC -# define YYSTACK_FREE YYFREE -# ifndef YYSTACK_ALLOC_MAXIMUM -# define YYSTACK_ALLOC_MAXIMUM YYSIZE_MAXIMUM -# endif -# if (defined __cplusplus && ! defined _STDLIB_H \ - && ! ((defined YYMALLOC || defined malloc) \ - && (defined YYFREE || defined free))) -# include /* INFRINGES ON USER NAME SPACE */ -# ifndef _STDLIB_H -# define _STDLIB_H 1 -# endif -# endif -# ifndef YYMALLOC -# define YYMALLOC malloc -# if ! defined malloc && ! defined _STDLIB_H && (defined __STDC__ || defined __C99__FUNC__ \ - || defined __cplusplus || defined _MSC_VER) -void *malloc (YYSIZE_T); /* INFRINGES ON USER NAME SPACE */ -# endif -# endif -# ifndef YYFREE -# define YYFREE free -# if ! defined free && ! defined _STDLIB_H && (defined __STDC__ || defined __C99__FUNC__ \ - || defined __cplusplus || defined _MSC_VER) -void free (void *); /* INFRINGES ON USER NAME SPACE */ -# endif -# endif -# endif -#endif /* ! defined yyoverflow || YYERROR_VERBOSE */ - - -#if (! defined yyoverflow \ - && (! defined __cplusplus \ - || (defined YYSTYPE_IS_TRIVIAL && YYSTYPE_IS_TRIVIAL))) - -/* A type that is properly aligned for any stack member. */ -union yyalloc -{ - yytype_int16 yyss; - YYSTYPE yyvs; - }; - -/* The size of the maximum gap between one aligned stack and the next. */ -# define YYSTACK_GAP_MAXIMUM (sizeof (union yyalloc) - 1) - -/* The size of an array large to enough to hold all stacks, each with - N elements. */ -# define YYSTACK_BYTES(N) \ - ((N) * (sizeof (yytype_int16) + sizeof (YYSTYPE)) \ - + YYSTACK_GAP_MAXIMUM) - -/* Copy COUNT objects from FROM to TO. The source and destination do - not overlap. */ -# ifndef YYCOPY -# if defined __GNUC__ && 1 < __GNUC__ -# define YYCOPY(To, From, Count) \ - __builtin_memcpy (To, From, (Count) * sizeof (*(From))) -# else -# define YYCOPY(To, From, Count) \ - do \ - { \ - YYSIZE_T yyi; \ - for (yyi = 0; yyi < (Count); yyi++) \ - (To)[yyi] = (From)[yyi]; \ - } \ - while (YYID (0)) -# endif -# endif - -/* Relocate STACK from its old location to the new one. The - local variables YYSIZE and YYSTACKSIZE give the old and new number of - elements in the stack, and YYPTR gives the new location of the - stack. Advance YYPTR to a properly aligned location for the next - stack. */ -# define YYSTACK_RELOCATE(Stack) \ - do \ - { \ - YYSIZE_T yynewbytes; \ - YYCOPY (&yyptr->Stack, Stack, yysize); \ - Stack = &yyptr->Stack; \ - yynewbytes = yystacksize * sizeof (*Stack) + YYSTACK_GAP_MAXIMUM; \ - yyptr += yynewbytes / sizeof (*yyptr); \ - } \ - while (YYID (0)) - -#endif - -/* YYFINAL -- State number of the termination state. */ -#define YYFINAL 2 -/* YYLAST -- Last index in YYTABLE. */ -#define YYLAST 37 - -/* YYNTOKENS -- Number of terminals. */ -#define YYNTOKENS 19 -/* YYNNTS -- Number of nonterminals. */ -#define YYNNTS 11 -/* YYNRULES -- Number of rules. */ -#define YYNRULES 26 -/* YYNRULES -- Number of states. */ -#define YYNSTATES 40 - -/* YYTRANSLATE(YYLEX) -- Bison symbol number corresponding to YYLEX. */ -#define YYUNDEFTOK 2 -#define YYMAXUTOK 266 - -#define YYTRANSLATE(YYX) \ - ((unsigned int) (YYX) <= YYMAXUTOK ? yytranslate[YYX] : YYUNDEFTOK) - -/* YYTRANSLATE[YYLEX] -- Bison symbol number corresponding to YYLEX. */ -static const yytype_uint8 yytranslate[] = -{ - 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 17, 18, 2, 2, 2, 2, 2, 14, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 13, - 2, 12, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 16, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 15, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 1, 2, 3, 4, - 5, 6, 7, 8, 9, 10, 11 -}; - -#if YYDEBUG -/* YYPRHS[YYN] -- Index of the first RHS symbol of rule number YYN in - YYRHS. */ -static const yytype_uint8 yyprhs[] = -{ - 0, 0, 3, 4, 7, 10, 15, 20, 25, 30, - 34, 35, 38, 40, 44, 46, 50, 52, 55, 57, - 60, 63, 65, 68, 70, 72, 74 -}; - -/* YYRHS -- A `-1'-separated list of the rules' RHS. */ -static const yytype_int8 yyrhs[] = -{ - 20, 0, -1, -1, 20, 22, -1, 20, 21, -1, - 5, 12, 24, 13, -1, 5, 12, 24, 14, -1, - 9, 12, 10, 13, -1, 9, 12, 11, 13, -1, - 24, 23, 6, -1, -1, 14, 24, -1, 25, -1, - 24, 15, 25, -1, 26, -1, 25, 16, 26, -1, - 27, -1, 26, 27, -1, 29, -1, 29, 28, -1, - 29, 3, -1, 4, -1, 28, 4, -1, 5, -1, - 7, -1, 8, -1, 17, 24, 18, -1 -}; - -/* YYRLINE[YYN] -- source line where rule number YYN was defined. */ -static const yytype_uint8 yyrline[] = -{ - 0, 84, 84, 86, 88, 91, 95, 97, 99, 103, - 108, 109, 113, 115, 119, 121, 128, 130, 134, 136, - 150, 156, 158, 162, 166, 168, 170 -}; -#endif - -#if YYDEBUG || YYERROR_VERBOSE || YYTOKEN_TABLE -/* YYTNAME[SYMBOL-NUM] -- String name of the symbol SYMBOL-NUM. - First, the terminals, then, starting at YYNTOKENS, nonterminals. */ -static const char *const yytname[] = -{ - "$end", "error", "$undefined", "CLOSESIZE", "CLOSE", "ID", "CODE", - "RANGE", "STRING", "CONFIG", "VALUE", "NUMBER", "'='", "';'", "'/'", - "'|'", "'\\\\'", "'('", "')'", "$accept", "spec", "decl", "rule", "look", - "expr", "diff", "term", "factor", "close", "primary", 0 -}; -#endif - -# ifdef YYPRINT -/* YYTOKNUM[YYLEX-NUM] -- Internal token number corresponding to - token YYLEX-NUM. */ -static const yytype_uint16 yytoknum[] = -{ - 0, 256, 257, 258, 259, 260, 261, 262, 263, 264, - 265, 266, 61, 59, 47, 124, 92, 40, 41 -}; -# endif - -/* YYR1[YYN] -- Symbol number of symbol that rule YYN derives. */ -static const yytype_uint8 yyr1[] = -{ - 0, 19, 20, 20, 20, 21, 21, 21, 21, 22, - 23, 23, 24, 24, 25, 25, 26, 26, 27, 27, - 27, 28, 28, 29, 29, 29, 29 -}; - -/* YYR2[YYN] -- Number of symbols composing right hand side of rule YYN. */ -static const yytype_uint8 yyr2[] = -{ - 0, 2, 0, 2, 2, 4, 4, 4, 4, 3, - 0, 2, 1, 3, 1, 3, 1, 2, 1, 2, - 2, 1, 2, 1, 1, 1, 3 -}; - -/* YYDEFACT[STATE-NAME] -- Default rule to reduce with in state - STATE-NUM when YYTABLE doesn't specify something else to do. Zero - means the default is an error. */ -static const yytype_uint8 yydefact[] = -{ - 2, 0, 1, 23, 24, 25, 0, 0, 4, 3, - 10, 12, 14, 16, 18, 0, 0, 23, 0, 0, - 0, 0, 0, 17, 20, 21, 19, 0, 0, 0, - 26, 11, 13, 9, 15, 22, 5, 6, 7, 8 -}; - -/* YYDEFGOTO[NTERM-NUM]. */ -static const yytype_int8 yydefgoto[] = -{ - -1, 1, 8, 9, 21, 10, 11, 12, 13, 26, - 14 -}; - -/* YYPACT[STATE-NUM] -- Index in YYTABLE of the portion describing - STATE-NUM. */ -#define YYPACT_NINF -13 -static const yytype_int8 yypact[] = -{ - -13, 1, -13, -5, -13, -13, 0, -3, -13, -13, - 9, 13, -3, -13, 22, -3, 17, -13, -2, -3, - -3, 11, -3, -13, -13, -13, 26, 6, 18, 19, - -13, 20, 13, -13, -3, -13, -13, -13, -13, -13 -}; - -/* YYPGOTO[NTERM-NUM]. */ -static const yytype_int8 yypgoto[] = -{ - -13, -13, -13, -13, -13, -4, 14, 15, -12, -13, - -13 -}; - -/* YYTABLE[YYPACT[STATE-NUM]]. What to do in state STATE-NUM. If - positive, shift that token. If negative, reduce the rule which - number is the opposite. If zero, do what YYDEFACT says. - If YYTABLE_NINF, syntax error. */ -#define YYTABLE_NINF -1 -static const yytype_uint8 yytable[] = -{ - 23, 2, 17, 18, 4, 5, 3, 15, 4, 5, - 6, 27, 16, 20, 7, 31, 30, 33, 7, 36, - 37, 20, 23, 19, 20, 24, 25, 28, 29, 22, - 35, 38, 39, 0, 32, 20, 0, 34 -}; - -static const yytype_int8 yycheck[] = -{ - 12, 0, 5, 7, 7, 8, 5, 12, 7, 8, - 9, 15, 12, 15, 17, 19, 18, 6, 17, 13, - 14, 15, 34, 14, 15, 3, 4, 10, 11, 16, - 4, 13, 13, -1, 20, 15, -1, 22 -}; - -/* YYSTOS[STATE-NUM] -- The (internal number of the) accessing - symbol of state STATE-NUM. */ -static const yytype_uint8 yystos[] = -{ - 0, 20, 0, 5, 7, 8, 9, 17, 21, 22, - 24, 25, 26, 27, 29, 12, 12, 5, 24, 14, - 15, 23, 16, 27, 3, 4, 28, 24, 10, 11, - 18, 24, 25, 6, 26, 4, 13, 14, 13, 13 -}; - -#define yyerrok (yyerrstatus = 0) -#define yyclearin (yychar = YYEMPTY) -#define YYEMPTY (-2) -#define YYEOF 0 - -#define YYACCEPT goto yyacceptlab -#define YYABORT goto yyabortlab -#define YYERROR goto yyerrorlab - - -/* Like YYERROR except do call yyerror. This remains here temporarily - to ease the transition to the new meaning of YYERROR, for GCC. - Once GCC version 2 has supplanted version 1, this can go. */ - -#define YYFAIL goto yyerrlab - -#define YYRECOVERING() (!!yyerrstatus) - -#define YYBACKUP(Token, Value) \ -do \ - if (yychar == YYEMPTY && yylen == 1) \ - { \ - yychar = (Token); \ - yylval = (Value); \ - yytoken = YYTRANSLATE (yychar); \ - YYPOPSTACK (1); \ - goto yybackup; \ - } \ - else \ - { \ - yyerror (YY_("syntax error: cannot back up")); \ - YYERROR; \ - } \ -while (YYID (0)) - - -#define YYTERROR 1 -#define YYERRCODE 256 - - -/* YYLLOC_DEFAULT -- Set CURRENT to span from RHS[1] to RHS[N]. - If N is 0, then set CURRENT to the empty location which ends - the previous symbol: RHS[0] (always defined). */ - -#define YYRHSLOC(Rhs, K) ((Rhs)[K]) -#ifndef YYLLOC_DEFAULT -# define YYLLOC_DEFAULT(Current, Rhs, N) \ - do \ - if (YYID (N)) \ - { \ - (Current).first_line = YYRHSLOC (Rhs, 1).first_line; \ - (Current).first_column = YYRHSLOC (Rhs, 1).first_column; \ - (Current).last_line = YYRHSLOC (Rhs, N).last_line; \ - (Current).last_column = YYRHSLOC (Rhs, N).last_column; \ - } \ - else \ - { \ - (Current).first_line = (Current).last_line = \ - YYRHSLOC (Rhs, 0).last_line; \ - (Current).first_column = (Current).last_column = \ - YYRHSLOC (Rhs, 0).last_column; \ - } \ - while (YYID (0)) -#endif - - -/* YY_LOCATION_PRINT -- Print the location on the stream. - This macro was not mandated originally: define only if we know - we won't break user code: when these are the locations we know. */ - -#ifndef YY_LOCATION_PRINT -# if YYLTYPE_IS_TRIVIAL -# define YY_LOCATION_PRINT(File, Loc) \ - fprintf (File, "%d.%d-%d.%d", \ - (Loc).first_line, (Loc).first_column, \ - (Loc).last_line, (Loc).last_column) -# else -# define YY_LOCATION_PRINT(File, Loc) ((void) 0) -# endif -#endif - - -/* YYLEX -- calling `yylex' with the right arguments. */ - -#ifdef YYLEX_PARAM -# define YYLEX yylex (YYLEX_PARAM) -#else -# define YYLEX yylex () -#endif - -/* Enable debugging if requested. */ -#if YYDEBUG - -# ifndef YYFPRINTF -# include /* INFRINGES ON USER NAME SPACE */ -# define YYFPRINTF fprintf -# endif - -# define YYDPRINTF(Args) \ -do { \ - if (yydebug) \ - YYFPRINTF Args; \ -} while (YYID (0)) - -# define YY_SYMBOL_PRINT(Title, Type, Value, Location) \ -do { \ - if (yydebug) \ - { \ - YYFPRINTF (stderr, "%s ", Title); \ - yy_symbol_print (stderr, \ - Type, Value); \ - YYFPRINTF (stderr, "\n"); \ - } \ -} while (YYID (0)) - - -/*--------------------------------. -| Print this symbol on YYOUTPUT. | -`--------------------------------*/ - -/*ARGSUSED*/ -#if (defined __STDC__ || defined __C99__FUNC__ \ - || defined __cplusplus || defined _MSC_VER) -static void -yy_symbol_value_print (FILE *yyoutput, int yytype, YYSTYPE const * const yyvaluep) -#else -static void -yy_symbol_value_print (yyoutput, yytype, yyvaluep) - FILE *yyoutput; - int yytype; - YYSTYPE const * const yyvaluep; -#endif -{ - if (!yyvaluep) - return; -# ifdef YYPRINT - if (yytype < YYNTOKENS) - YYPRINT (yyoutput, yytoknum[yytype], *yyvaluep); -# else - YYUSE (yyoutput); -# endif - switch (yytype) - { - default: - break; - } -} - - -/*--------------------------------. -| Print this symbol on YYOUTPUT. | -`--------------------------------*/ - -#if (defined __STDC__ || defined __C99__FUNC__ \ - || defined __cplusplus || defined _MSC_VER) -static void -yy_symbol_print (FILE *yyoutput, int yytype, YYSTYPE const * const yyvaluep) -#else -static void -yy_symbol_print (yyoutput, yytype, yyvaluep) - FILE *yyoutput; - int yytype; - YYSTYPE const * const yyvaluep; -#endif -{ - if (yytype < YYNTOKENS) - YYFPRINTF (yyoutput, "token %s (", yytname[yytype]); - else - YYFPRINTF (yyoutput, "nterm %s (", yytname[yytype]); - - yy_symbol_value_print (yyoutput, yytype, yyvaluep); - YYFPRINTF (yyoutput, ")"); -} - -/*------------------------------------------------------------------. -| yy_stack_print -- Print the state stack from its BOTTOM up to its | -| TOP (included). | -`------------------------------------------------------------------*/ - -#if (defined __STDC__ || defined __C99__FUNC__ \ - || defined __cplusplus || defined _MSC_VER) -static void -yy_stack_print (yytype_int16 *bottom, yytype_int16 *top) -#else -static void -yy_stack_print (bottom, top) - yytype_int16 *bottom; - yytype_int16 *top; -#endif -{ - YYFPRINTF (stderr, "Stack now"); - for (; bottom <= top; ++bottom) - YYFPRINTF (stderr, " %d", *bottom); - YYFPRINTF (stderr, "\n"); -} - -# define YY_STACK_PRINT(Bottom, Top) \ -do { \ - if (yydebug) \ - yy_stack_print ((Bottom), (Top)); \ -} while (YYID (0)) - - -/*------------------------------------------------. -| Report that the YYRULE is going to be reduced. | -`------------------------------------------------*/ - -#if (defined __STDC__ || defined __C99__FUNC__ \ - || defined __cplusplus || defined _MSC_VER) -static void -yy_reduce_print (YYSTYPE *yyvsp, int yyrule) -#else -static void -yy_reduce_print (yyvsp, yyrule) - YYSTYPE *yyvsp; - int yyrule; -#endif -{ - int yynrhs = yyr2[yyrule]; - int yyi; - unsigned long int yylno = yyrline[yyrule]; - YYFPRINTF (stderr, "Reducing stack by rule %d (line %lu):\n", - yyrule - 1, yylno); - /* The symbols being reduced. */ - for (yyi = 0; yyi < yynrhs; yyi++) - { - fprintf (stderr, " $%d = ", yyi + 1); - yy_symbol_print (stderr, yyrhs[yyprhs[yyrule] + yyi], - &(yyvsp[(yyi + 1) - (yynrhs)]) - ); - fprintf (stderr, "\n"); - } -} - -# define YY_REDUCE_PRINT(Rule) \ -do { \ - if (yydebug) \ - yy_reduce_print (yyvsp, Rule); \ -} while (YYID (0)) - -/* Nonzero means print parse trace. It is left uninitialized so that - multiple parsers can coexist. */ -int yydebug; -#else /* !YYDEBUG */ -# define YYDPRINTF(Args) -# define YY_SYMBOL_PRINT(Title, Type, Value, Location) -# define YY_STACK_PRINT(Bottom, Top) -# define YY_REDUCE_PRINT(Rule) -#endif /* !YYDEBUG */ - - -/* YYINITDEPTH -- initial size of the parser's stacks. */ -#ifndef YYINITDEPTH -# define YYINITDEPTH 200 -#endif - -/* YYMAXDEPTH -- maximum size the stacks can grow to (effective only - if the built-in stack extension method is used). - - Do not make this value too large; the results are undefined if - YYSTACK_ALLOC_MAXIMUM < YYSTACK_BYTES (YYMAXDEPTH) - evaluated with infinite-precision integer arithmetic. */ - -#ifndef YYMAXDEPTH -# define YYMAXDEPTH 10000 -#endif - - - -#if YYERROR_VERBOSE - -# ifndef yystrlen -# if defined __GLIBC__ && defined _STRING_H -# define yystrlen strlen -# else -/* Return the length of YYSTR. */ -#if (defined __STDC__ || defined __C99__FUNC__ \ - || defined __cplusplus || defined _MSC_VER) -static YYSIZE_T -yystrlen (const char *yystr) -#else -static YYSIZE_T -yystrlen (yystr) - const char *yystr; -#endif -{ - YYSIZE_T yylen; - for (yylen = 0; yystr[yylen]; yylen++) - continue; - return yylen; -} -# endif -# endif - -# ifndef yystpcpy -# if defined __GLIBC__ && defined _STRING_H && defined _GNU_SOURCE -# define yystpcpy stpcpy -# else -/* Copy YYSRC to YYDEST, returning the address of the terminating '\0' in - YYDEST. */ -#if (defined __STDC__ || defined __C99__FUNC__ \ - || defined __cplusplus || defined _MSC_VER) -static char * -yystpcpy (char *yydest, const char *yysrc) -#else -static char * -yystpcpy (yydest, yysrc) - char *yydest; - const char *yysrc; -#endif -{ - char *yyd = yydest; - const char *yys = yysrc; - - while ((*yyd++ = *yys++) != '\0') - continue; - - return yyd - 1; -} -# endif -# endif - -# ifndef yytnamerr -/* Copy to YYRES the contents of YYSTR after stripping away unnecessary - quotes and backslashes, so that it's suitable for yyerror. The - heuristic is that double-quoting is unnecessary unless the string - contains an apostrophe, a comma, or backslash (other than - backslash-backslash). YYSTR is taken from yytname. If YYRES is - null, do not copy; instead, return the length of what the result - would have been. */ -static YYSIZE_T -yytnamerr (char *yyres, const char *yystr) -{ - if (*yystr == '"') - { - YYSIZE_T yyn = 0; - char const *yyp = yystr; - - for (;;) - switch (*++yyp) - { - case '\'': - case ',': - goto do_not_strip_quotes; - - case '\\': - if (*++yyp != '\\') - goto do_not_strip_quotes; - /* Fall through. */ - default: - if (yyres) - yyres[yyn] = *yyp; - yyn++; - break; - - case '"': - if (yyres) - yyres[yyn] = '\0'; - return yyn; - } - do_not_strip_quotes: ; - } - - if (! yyres) - return yystrlen (yystr); - - return yystpcpy (yyres, yystr) - yyres; -} -# endif - -/* Copy into YYRESULT an error message about the unexpected token - YYCHAR while in state YYSTATE. Return the number of bytes copied, - including the terminating null byte. If YYRESULT is null, do not - copy anything; just return the number of bytes that would be - copied. As a special case, return 0 if an ordinary "syntax error" - message will do. Return YYSIZE_MAXIMUM if overflow occurs during - size calculation. */ -static YYSIZE_T -yysyntax_error (char *yyresult, int yystate, int yychar) -{ - int yyn = yypact[yystate]; - - if (! (YYPACT_NINF < yyn && yyn <= YYLAST)) - return 0; - else - { - int yytype = YYTRANSLATE (yychar); - YYSIZE_T yysize0 = yytnamerr (0, yytname[yytype]); - YYSIZE_T yysize = yysize0; - YYSIZE_T yysize1; - int yysize_overflow = 0; - enum { YYERROR_VERBOSE_ARGS_MAXIMUM = 5 }; - char const *yyarg[YYERROR_VERBOSE_ARGS_MAXIMUM]; - int yyx; - -# if 0 - /* This is so xgettext sees the translatable formats that are - constructed on the fly. */ - YY_("syntax error, unexpected %s"); - YY_("syntax error, unexpected %s, expecting %s"); - YY_("syntax error, unexpected %s, expecting %s or %s"); - YY_("syntax error, unexpected %s, expecting %s or %s or %s"); - YY_("syntax error, unexpected %s, expecting %s or %s or %s or %s"); -# endif - char *yyfmt; - char const *yyf; - static char const yyunexpected[] = "syntax error, unexpected %s"; - static char const yyexpecting[] = ", expecting %s"; - static char const yyor[] = " or %s"; - char yyformat[sizeof yyunexpected - + sizeof yyexpecting - 1 - + ((YYERROR_VERBOSE_ARGS_MAXIMUM - 2) - * (sizeof yyor - 1))]; - char const *yyprefix = yyexpecting; - - /* Start YYX at -YYN if negative to avoid negative indexes in - YYCHECK. */ - int yyxbegin = yyn < 0 ? -yyn : 0; - - /* Stay within bounds of both yycheck and yytname. */ - int yychecklim = YYLAST - yyn + 1; - int yyxend = yychecklim < YYNTOKENS ? yychecklim : YYNTOKENS; - int yycount = 1; - - yyarg[0] = yytname[yytype]; - yyfmt = yystpcpy (yyformat, yyunexpected); - - for (yyx = yyxbegin; yyx < yyxend; ++yyx) - if (yycheck[yyx + yyn] == yyx && yyx != YYTERROR) - { - if (yycount == YYERROR_VERBOSE_ARGS_MAXIMUM) - { - yycount = 1; - yysize = yysize0; - yyformat[sizeof yyunexpected - 1] = '\0'; - break; - } - yyarg[yycount++] = yytname[yyx]; - yysize1 = yysize + yytnamerr (0, yytname[yyx]); - yysize_overflow |= (yysize1 < yysize); - yysize = yysize1; - yyfmt = yystpcpy (yyfmt, yyprefix); - yyprefix = yyor; - } - - yyf = YY_(yyformat); - yysize1 = yysize + yystrlen (yyf); - yysize_overflow |= (yysize1 < yysize); - yysize = yysize1; - - if (yysize_overflow) - return YYSIZE_MAXIMUM; - - if (yyresult) - { - /* Avoid sprintf, as that infringes on the user's name space. - Don't have undefined behavior even if the translation - produced a string with the wrong number of "%s"s. */ - char *yyp = yyresult; - int yyi = 0; - while ((*yyp = *yyf) != '\0') - { - if (*yyp == '%' && yyf[1] == 's' && yyi < yycount) - { - yyp += yytnamerr (yyp, yyarg[yyi++]); - yyf += 2; - } - else - { - yyp++; - yyf++; - } - } - } - return yysize; - } -} -#endif /* YYERROR_VERBOSE */ - - -/*-----------------------------------------------. -| Release the memory associated to this symbol. | -`-----------------------------------------------*/ - -/*ARGSUSED*/ -#if (defined __STDC__ || defined __C99__FUNC__ \ - || defined __cplusplus || defined _MSC_VER) -static void -yydestruct (const char *yymsg, int yytype, YYSTYPE *yyvaluep) -#else -static void -yydestruct (yymsg, yytype, yyvaluep) - const char *yymsg; - int yytype; - YYSTYPE *yyvaluep; -#endif -{ - YYUSE (yyvaluep); - - if (!yymsg) - yymsg = "Deleting"; - YY_SYMBOL_PRINT (yymsg, yytype, yyvaluep, yylocationp); -} - - -/* Prevent warnings from -Wmissing-prototypes. */ - -#ifdef YYPARSE_PARAM -#if defined __STDC__ || defined __cplusplus -int yyparse (void *YYPARSE_PARAM); -#else -int yyparse (); -#endif -#else /* ! YYPARSE_PARAM */ -#if defined __STDC__ || defined __cplusplus -int yyparse (void); -#else -int yyparse (); -#endif -#endif /* ! YYPARSE_PARAM */ - - - -/* The look-ahead symbol. */ -int yychar; - -/* The semantic value of the look-ahead symbol. */ -YYSTYPE yylval; - -/* Number of syntax errors so far. */ -int yynerrs; - - - -/*----------. -| yyparse. | -`----------*/ - -#ifdef YYPARSE_PARAM -#if (defined __STDC__ || defined __C99__FUNC__ \ - || defined __cplusplus || defined _MSC_VER) -int -yyparse (void *YYPARSE_PARAM) -#else -int -yyparse (YYPARSE_PARAM) - void *YYPARSE_PARAM; -#endif -#else /* ! YYPARSE_PARAM */ -#if (defined __STDC__ || defined __C99__FUNC__ \ - || defined __cplusplus || defined _MSC_VER) -int -yyparse (void) -#else -int -yyparse () - -#endif -#endif -{ - - int yystate; - int yyn; - int yyresult; - /* Number of tokens to shift before error messages enabled. */ - int yyerrstatus; - /* Look-ahead token as an internal (translated) token number. */ - int yytoken = 0; -#if YYERROR_VERBOSE - /* Buffer for error messages, and its allocated size. */ - char yymsgbuf[128]; - char *yymsg = yymsgbuf; - YYSIZE_T yymsg_alloc = sizeof yymsgbuf; -#endif - - /* Three stacks and their tools: - `yyss': related to states, - `yyvs': related to semantic values, - `yyls': related to locations. - - Refer to the stacks thru separate pointers, to allow yyoverflow - to reallocate them elsewhere. */ - - /* The state stack. */ - yytype_int16 yyssa[YYINITDEPTH]; - yytype_int16 *yyss = yyssa; - yytype_int16 *yyssp; - - /* The semantic value stack. */ - YYSTYPE yyvsa[YYINITDEPTH]; - YYSTYPE *yyvs = yyvsa; - YYSTYPE *yyvsp; - - - -#define YYPOPSTACK(N) (yyvsp -= (N), yyssp -= (N)) - - YYSIZE_T yystacksize = YYINITDEPTH; - - /* The variables used to return semantic value and location from the - action routines. */ - YYSTYPE yyval; - - - /* The number of symbols on the RHS of the reduced rule. - Keep to zero when no symbol should be popped. */ - int yylen = 0; - - YYDPRINTF ((stderr, "Starting parse\n")); - - yystate = 0; - yyerrstatus = 0; - yynerrs = 0; - yychar = YYEMPTY; /* Cause a token to be read. */ - - /* Initialize stack pointers. - Waste one element of value and location stack - so that they stay on the same level as the state stack. - The wasted elements are never initialized. */ - - yyssp = yyss; - yyvsp = yyvs; - - goto yysetstate; - -/*------------------------------------------------------------. -| yynewstate -- Push a new state, which is found in yystate. | -`------------------------------------------------------------*/ - yynewstate: - /* In all cases, when you get here, the value and location stacks - have just been pushed. So pushing a state here evens the stacks. */ - yyssp++; - - yysetstate: - *yyssp = yystate; - - if (yyss + yystacksize - 1 <= yyssp) - { - /* Get the current used size of the three stacks, in elements. */ - YYSIZE_T yysize = yyssp - yyss + 1; - -#ifdef yyoverflow - { - /* Give user a chance to reallocate the stack. Use copies of - these so that the &'s don't force the real ones into - memory. */ - YYSTYPE *yyvs1 = yyvs; - yytype_int16 *yyss1 = yyss; - - - /* Each stack pointer address is followed by the size of the - data in use in that stack, in bytes. This used to be a - conditional around just the two extra args, but that might - be undefined if yyoverflow is a macro. */ - yyoverflow (YY_("memory exhausted"), - &yyss1, yysize * sizeof (*yyssp), - &yyvs1, yysize * sizeof (*yyvsp), - - &yystacksize); - - yyss = yyss1; - yyvs = yyvs1; - } -#else /* no yyoverflow */ -# ifndef YYSTACK_RELOCATE - goto yyexhaustedlab; -# else - /* Extend the stack our own way. */ - if (YYMAXDEPTH <= yystacksize) - goto yyexhaustedlab; - yystacksize *= 2; - if (YYMAXDEPTH < yystacksize) - yystacksize = YYMAXDEPTH; - - { - yytype_int16 *yyss1 = yyss; - union yyalloc *yyptr = - (union yyalloc *) YYSTACK_ALLOC (YYSTACK_BYTES (yystacksize)); - if (! yyptr) - goto yyexhaustedlab; - YYSTACK_RELOCATE (yyss); - YYSTACK_RELOCATE (yyvs); - -# undef YYSTACK_RELOCATE - if (yyss1 != yyssa) - YYSTACK_FREE (yyss1); - } -# endif -#endif /* no yyoverflow */ - - yyssp = yyss + yysize - 1; - yyvsp = yyvs + yysize - 1; - - - YYDPRINTF ((stderr, "Stack size increased to %lu\n", - (unsigned long int) yystacksize)); - - if (yyss + yystacksize - 1 <= yyssp) - YYABORT; - } - - YYDPRINTF ((stderr, "Entering state %d\n", yystate)); - - goto yybackup; - -/*-----------. -| yybackup. | -`-----------*/ -yybackup: - - /* Do appropriate processing given the current state. Read a - look-ahead token if we need one and don't already have one. */ - - /* First try to decide what to do without reference to look-ahead token. */ - yyn = yypact[yystate]; - if (yyn == YYPACT_NINF) - goto yydefault; - - /* Not known => get a look-ahead token if don't already have one. */ - - /* YYCHAR is either YYEMPTY or YYEOF or a valid look-ahead symbol. */ - if (yychar == YYEMPTY) - { - YYDPRINTF ((stderr, "Reading a token: ")); - yychar = YYLEX; - } - - if (yychar <= YYEOF) - { - yychar = yytoken = YYEOF; - YYDPRINTF ((stderr, "Now at end of input.\n")); - } - else - { - yytoken = YYTRANSLATE (yychar); - YY_SYMBOL_PRINT ("Next token is", yytoken, &yylval, &yylloc); - } - - /* If the proper action on seeing token YYTOKEN is to reduce or to - detect an error, take that action. */ - yyn += yytoken; - if (yyn < 0 || YYLAST < yyn || yycheck[yyn] != yytoken) - goto yydefault; - yyn = yytable[yyn]; - if (yyn <= 0) - { - if (yyn == 0 || yyn == YYTABLE_NINF) - goto yyerrlab; - yyn = -yyn; - goto yyreduce; - } - - if (yyn == YYFINAL) - YYACCEPT; - - /* Count tokens shifted since error; after three, turn off error - status. */ - if (yyerrstatus) - yyerrstatus--; - - /* Shift the look-ahead token. */ - YY_SYMBOL_PRINT ("Shifting", yytoken, &yylval, &yylloc); - - /* Discard the shifted token unless it is eof. */ - if (yychar != YYEOF) - yychar = YYEMPTY; - - yystate = yyn; - *++yyvsp = yylval; - - goto yynewstate; - - -/*-----------------------------------------------------------. -| yydefault -- do the default action for the current state. | -`-----------------------------------------------------------*/ -yydefault: - yyn = yydefact[yystate]; - if (yyn == 0) - goto yyerrlab; - goto yyreduce; - - -/*-----------------------------. -| yyreduce -- Do a reduction. | -`-----------------------------*/ -yyreduce: - /* yyn is the number of a rule to reduce with. */ - yylen = yyr2[yyn]; - - /* If YYLEN is nonzero, implement the default value of the action: - `$$ = $1'. - - Otherwise, the following line sets YYVAL to garbage. - This behavior is undocumented and Bison - users should not rely upon it. Assigning to YYVAL - unconditionally makes the parser a bit smaller, and it avoids a - GCC warning that YYVAL may be used uninitialized. */ - yyval = yyvsp[1-yylen]; - - - YY_REDUCE_PRINT (yyn); - switch (yyn) - { - case 2: -#line 84 "./parser.y" - { accept = 0; - spec = NULL; ;} - break; - - case 3: -#line 87 "./parser.y" - { spec = spec? mkAlt(spec, (yyvsp[(2) - (2)].regexp)) : (yyvsp[(2) - (2)].regexp); ;} - break; - - case 5: -#line 92 "./parser.y" - { if((yyvsp[(1) - (4)].symbol)->re) - in->fatal("sym already defined"); - (yyvsp[(1) - (4)].symbol)->re = (yyvsp[(3) - (4)].regexp); ;} - break; - - case 6: -#line 96 "./parser.y" - { in->fatal("trailing contexts are not allowed in named definitions"); ;} - break; - - case 7: -#line 98 "./parser.y" - { in->config(*(yyvsp[(1) - (4)].str), *(yyvsp[(3) - (4)].str)); delete (yyvsp[(1) - (4)].str); delete (yyvsp[(3) - (4)].str); ;} - break; - - case 8: -#line 100 "./parser.y" - { in->config(*(yyvsp[(1) - (4)].str), (yyvsp[(3) - (4)].number)); delete (yyvsp[(1) - (4)].str); ;} - break; - - case 9: -#line 104 "./parser.y" - { (yyval.regexp) = new RuleOp((yyvsp[(1) - (3)].regexp), (yyvsp[(2) - (3)].regexp), (yyvsp[(3) - (3)].token), accept++); ;} - break; - - case 10: -#line 108 "./parser.y" - { (yyval.regexp) = new NullOp; ;} - break; - - case 11: -#line 110 "./parser.y" - { (yyval.regexp) = (yyvsp[(2) - (2)].regexp); ;} - break; - - case 12: -#line 114 "./parser.y" - { (yyval.regexp) = (yyvsp[(1) - (1)].regexp); ;} - break; - - case 13: -#line 116 "./parser.y" - { (yyval.regexp) = mkAlt((yyvsp[(1) - (3)].regexp), (yyvsp[(3) - (3)].regexp)); ;} - break; - - case 14: -#line 120 "./parser.y" - { (yyval.regexp) = (yyvsp[(1) - (1)].regexp); ;} - break; - - case 15: -#line 122 "./parser.y" - { (yyval.regexp) = mkDiff((yyvsp[(1) - (3)].regexp), (yyvsp[(3) - (3)].regexp)); - if(!(yyval.regexp)) - in->fatal("can only difference char sets"); - ;} - break; - - case 16: -#line 129 "./parser.y" - { (yyval.regexp) = (yyvsp[(1) - (1)].regexp); ;} - break; - - case 17: -#line 131 "./parser.y" - { (yyval.regexp) = new CatOp((yyvsp[(1) - (2)].regexp), (yyvsp[(2) - (2)].regexp)); ;} - break; - - case 18: -#line 135 "./parser.y" - { (yyval.regexp) = (yyvsp[(1) - (1)].regexp); ;} - break; - - case 19: -#line 137 "./parser.y" - { - switch((yyvsp[(2) - (2)].op)){ - case '*': - (yyval.regexp) = mkAlt(new CloseOp((yyvsp[(1) - (2)].regexp)), new NullOp()); - break; - case '+': - (yyval.regexp) = new CloseOp((yyvsp[(1) - (2)].regexp)); - break; - case '?': - (yyval.regexp) = mkAlt((yyvsp[(1) - (2)].regexp), new NullOp()); - break; - } - ;} - break; - - case 20: -#line 151 "./parser.y" - { - (yyval.regexp) = new CloseVOp((yyvsp[(1) - (2)].regexp), (yyvsp[(2) - (2)].extop).minsize, (yyvsp[(2) - (2)].extop).maxsize); - ;} - break; - - case 21: -#line 157 "./parser.y" - { (yyval.op) = (yyvsp[(1) - (1)].op); ;} - break; - - case 22: -#line 159 "./parser.y" - { (yyval.op) = ((yyvsp[(1) - (2)].op) == (yyvsp[(2) - (2)].op)) ? (yyvsp[(1) - (2)].op) : '*'; ;} - break; - - case 23: -#line 163 "./parser.y" - { if(!(yyvsp[(1) - (1)].symbol)->re) - in->fatal("can't find symbol"); - (yyval.regexp) = (yyvsp[(1) - (1)].symbol)->re; ;} - break; - - case 24: -#line 167 "./parser.y" - { (yyval.regexp) = (yyvsp[(1) - (1)].regexp); ;} - break; - - case 25: -#line 169 "./parser.y" - { (yyval.regexp) = (yyvsp[(1) - (1)].regexp); ;} - break; - - case 26: -#line 171 "./parser.y" - { (yyval.regexp) = (yyvsp[(2) - (3)].regexp); ;} - break; - - -/* Line 1267 of yacc.c. */ -#line 1553 "parser.cc" - default: break; - } - YY_SYMBOL_PRINT ("-> $$ =", yyr1[yyn], &yyval, &yyloc); - - YYPOPSTACK (yylen); - yylen = 0; - YY_STACK_PRINT (yyss, yyssp); - - *++yyvsp = yyval; - - - /* Now `shift' the result of the reduction. Determine what state - that goes to, based on the state we popped back to and the rule - number reduced by. */ - - yyn = yyr1[yyn]; - - yystate = yypgoto[yyn - YYNTOKENS] + *yyssp; - if (0 <= yystate && yystate <= YYLAST && yycheck[yystate] == *yyssp) - yystate = yytable[yystate]; - else - yystate = yydefgoto[yyn - YYNTOKENS]; - - goto yynewstate; - - -/*------------------------------------. -| yyerrlab -- here on detecting error | -`------------------------------------*/ -yyerrlab: - /* If not already recovering from an error, report this error. */ - if (!yyerrstatus) - { - ++yynerrs; -#if ! YYERROR_VERBOSE - yyerror (YY_("syntax error")); -#else - { - YYSIZE_T yysize = yysyntax_error (0, yystate, yychar); - if (yymsg_alloc < yysize && yymsg_alloc < YYSTACK_ALLOC_MAXIMUM) - { - YYSIZE_T yyalloc = 2 * yysize; - if (! (yysize <= yyalloc && yyalloc <= YYSTACK_ALLOC_MAXIMUM)) - yyalloc = YYSTACK_ALLOC_MAXIMUM; - if (yymsg != yymsgbuf) - YYSTACK_FREE (yymsg); - yymsg = (char *) YYSTACK_ALLOC (yyalloc); - if (yymsg) - yymsg_alloc = yyalloc; - else - { - yymsg = yymsgbuf; - yymsg_alloc = sizeof yymsgbuf; - } - } - - if (0 < yysize && yysize <= yymsg_alloc) - { - (void) yysyntax_error (yymsg, yystate, yychar); - yyerror (yymsg); - } - else - { - yyerror (YY_("syntax error")); - if (yysize != 0) - goto yyexhaustedlab; - } - } -#endif - } - - - - if (yyerrstatus == 3) - { - /* If just tried and failed to reuse look-ahead token after an - error, discard it. */ - - if (yychar <= YYEOF) - { - /* Return failure if at end of input. */ - if (yychar == YYEOF) - YYABORT; - } - else - { - yydestruct ("Error: discarding", - yytoken, &yylval); - yychar = YYEMPTY; - } - } - - /* Else will try to reuse look-ahead token after shifting the error - token. */ - goto yyerrlab1; - - -/*---------------------------------------------------. -| yyerrorlab -- error raised explicitly by YYERROR. | -`---------------------------------------------------*/ -yyerrorlab: - - /* Pacify compilers like GCC when the user code never invokes - YYERROR and the label yyerrorlab therefore never appears in user - code. */ - if (/*CONSTCOND*/ 0) - goto yyerrorlab; - - /* Do not reclaim the symbols of the rule which action triggered - this YYERROR. */ - YYPOPSTACK (yylen); - yylen = 0; - YY_STACK_PRINT (yyss, yyssp); - yystate = *yyssp; - goto yyerrlab1; - - -/*-------------------------------------------------------------. -| yyerrlab1 -- common code for both syntax error and YYERROR. | -`-------------------------------------------------------------*/ -yyerrlab1: - yyerrstatus = 3; /* Each real token shifted decrements this. */ - - for (;;) - { - yyn = yypact[yystate]; - if (yyn != YYPACT_NINF) - { - yyn += YYTERROR; - if (0 <= yyn && yyn <= YYLAST && yycheck[yyn] == YYTERROR) - { - yyn = yytable[yyn]; - if (0 < yyn) - break; - } - } - - /* Pop the current state because it cannot handle the error token. */ - if (yyssp == yyss) - YYABORT; - - - yydestruct ("Error: popping", - yystos[yystate], yyvsp); - YYPOPSTACK (1); - yystate = *yyssp; - YY_STACK_PRINT (yyss, yyssp); - } - - if (yyn == YYFINAL) - YYACCEPT; - - *++yyvsp = yylval; - - - /* Shift the error token. */ - YY_SYMBOL_PRINT ("Shifting", yystos[yyn], yyvsp, yylsp); - - yystate = yyn; - goto yynewstate; - - -/*-------------------------------------. -| yyacceptlab -- YYACCEPT comes here. | -`-------------------------------------*/ -yyacceptlab: - yyresult = 0; - goto yyreturn; - -/*-----------------------------------. -| yyabortlab -- YYABORT comes here. | -`-----------------------------------*/ -yyabortlab: - yyresult = 1; - goto yyreturn; - -#ifndef yyoverflow -/*-------------------------------------------------. -| yyexhaustedlab -- memory exhaustion comes here. | -`-------------------------------------------------*/ -yyexhaustedlab: - yyerror (YY_("memory exhausted")); - yyresult = 2; - /* Fall through. */ -#endif - -yyreturn: - if (yychar != YYEOF && yychar != YYEMPTY) - yydestruct ("Cleanup: discarding lookahead", - yytoken, &yylval); - /* Do not reclaim the symbols of the rule which action triggered - this YYABORT or YYACCEPT. */ - YYPOPSTACK (yylen); - YY_STACK_PRINT (yyss, yyssp); - while (yyssp != yyss) - { - yydestruct ("Cleanup: popping", - yystos[*yyssp], yyvsp); - YYPOPSTACK (1); - } -#ifndef yyoverflow - if (yyss != yyssa) - YYSTACK_FREE (yyss); -#endif -#if YYERROR_VERBOSE - if (yymsg != yymsgbuf) - YYSTACK_FREE (yymsg); -#endif - /* Make sure YYID is used. */ - return YYID (yyresult); -} - - -#line 174 "./parser.y" - - -extern "C" { -void yyerror(const char* s) -{ - in->fatal(s); -} - -int yylex(){ - return in ? in->scan() : 0; -} -} // end extern "C" - -namespace re2c -{ - -void parse(Scanner& i, std::ostream& o) -{ - in = &i; - - o << "/* Generated by re2c " PACKAGE_VERSION; - if (!bNoGenerationDate) - { - o << " on "; - time_t now = time(&now); - o.write(ctime(&now), 24); - } - o << " */\n"; - o << sourceFileInfo; - - while(i.echo()) - { - yyparse(); - if(spec) - { - genCode(o, topIndent, spec); - } - o << sourceFileInfo; - } - - RegExp::vFreeList.clear(); - Range::vFreeList.clear(); - Symbol::ClearTable(); - in = NULL; -} - -} // end namespace re2c - diff --git a/tools/re2c/parser.h b/tools/re2c/parser.h deleted file mode 100644 index 642d6db4e..000000000 --- a/tools/re2c/parser.h +++ /dev/null @@ -1,56 +0,0 @@ -/* $Id: parser.h 565 2006-06-05 22:07:13Z helly $ */ -#ifndef _parser_h -#define _parser_h - -#include "scanner.h" -#include "re.h" -#include -#include - -namespace re2c -{ - -class Symbol -{ -public: - - RegExp* re; - - static Symbol *find(const SubStr&); - static void ClearTable(); - - typedef std::map SymbolTable; - -protected: - - Symbol(const SubStr& str) - : re(NULL) - , name(str) - { - } - -private: - - static SymbolTable symbol_table; - - Str name; - -#if PEDANTIC - Symbol(const Symbol& oth) - : re(oth.re) - , name(oth.name) - { - } - Symbol& operator = (const Symbol& oth) - { - new(this) Symbol(oth); - return *this; - } -#endif -}; - -void parse(Scanner&, std::ostream&); - -} // end namespace re2c - -#endif diff --git a/tools/re2c/parser.y b/tools/re2c/parser.y deleted file mode 100644 index 0164c45fe..000000000 --- a/tools/re2c/parser.y +++ /dev/null @@ -1,220 +0,0 @@ -%{ - -/* $Id: parser.y 674 2007-04-16 21:39:11Z helly $ */ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#include -#include -#include -#include - -#include "globals.h" -#include "parser.h" -#include "basics.h" - -#define YYMALLOC malloc -#define YYFREE free - -using namespace re2c; - -extern "C" -{ -int yylex(); -void yyerror(const char*); -} - -static re2c::uint accept; -static RegExp *spec; -static Scanner *in = NULL; - -/* Bison version 1.875 emits a definition that is not working - * with several g++ version. Hence we disable it here. - */ -#if defined(__GNUC__) -#define __attribute__(x) -#endif - -/* strdup() isn't standard C, so if we don't have it, we'll create our - * own version - */ -#if !defined(HAVE_STRDUP) -static char* strdup(const char* s) -{ - char* rv = (char*)malloc(strlen(s) + 1); - if (rv == NULL) - return NULL; - strcpy(rv, s); - return rv; -} -#endif - -%} - -%start spec - -%union { - re2c::Symbol *symbol; - re2c::RegExp *regexp; - re2c::Token *token; - char op; - int number; - re2c::ExtOp extop; - re2c::Str *str; -}; - -%token CLOSESIZE CLOSE ID CODE RANGE STRING -%token CONFIG VALUE NUMBER - -%type CLOSE -%type close -%type CLOSESIZE -%type ID -%type CODE -%type RANGE STRING -%type rule look expr diff term factor primary -%type CONFIG VALUE -%type NUMBER - -%% - -spec : - { accept = 0; - spec = NULL; } - | spec rule - { spec = spec? mkAlt(spec, $2) : $2; } - | spec decl - ; - -decl : ID '=' expr ';' - { if($1->re) - in->fatal("sym already defined"); - $1->re = $3; } - | ID '=' expr '/' - { in->fatal("trailing contexts are not allowed in named definitions"); } - | CONFIG '=' VALUE ';' - { in->config(*$1, *$3); delete $1; delete $3; } - | CONFIG '=' NUMBER ';' - { in->config(*$1, $3); delete $1; } - ; - -rule : expr look CODE - { $$ = new RuleOp($1, $2, $3, accept++); } - ; - -look : - { $$ = new NullOp; } - | '/' expr - { $$ = $2; } - ; - -expr : diff - { $$ = $1; } - | expr '|' diff - { $$ = mkAlt($1, $3); } - ; - -diff : term - { $$ = $1; } - | diff '\\' term - { $$ = mkDiff($1, $3); - if(!$$) - in->fatal("can only difference char sets"); - } - ; - -term : factor - { $$ = $1; } - | term factor - { $$ = new CatOp($1, $2); } - ; - -factor : primary - { $$ = $1; } - | primary close - { - switch($2){ - case '*': - $$ = mkAlt(new CloseOp($1), new NullOp()); - break; - case '+': - $$ = new CloseOp($1); - break; - case '?': - $$ = mkAlt($1, new NullOp()); - break; - } - } - | primary CLOSESIZE - { - $$ = new CloseVOp($1, $2.minsize, $2.maxsize); - } - ; - -close : CLOSE - { $$ = $1; } - | close CLOSE - { $$ = ($1 == $2) ? $1 : '*'; } - ; - -primary : ID - { if(!$1->re) - in->fatal("can't find symbol"); - $$ = $1->re; } - | RANGE - { $$ = $1; } - | STRING - { $$ = $1; } - | '(' expr ')' - { $$ = $2; } - ; - -%% - -extern "C" { -void yyerror(const char* s) -{ - in->fatal(s); -} - -int yylex(){ - return in ? in->scan() : 0; -} -} // end extern "C" - -namespace re2c -{ - -void parse(Scanner& i, std::ostream& o) -{ - in = &i; - - o << "/* Generated by re2c " PACKAGE_VERSION; - if (!bNoGenerationDate) - { - o << " on "; - time_t now = time(&now); - o.write(ctime(&now), 24); - } - o << " */\n"; - o << sourceFileInfo; - - while(i.echo()) - { - yyparse(); - if(spec) - { - genCode(o, topIndent, spec); - } - o << sourceFileInfo; - } - - RegExp::vFreeList.clear(); - Range::vFreeList.clear(); - Symbol::ClearTable(); - in = NULL; -} - -} // end namespace re2c diff --git a/tools/re2c/re.h b/tools/re2c/re.h deleted file mode 100644 index f17bdff7c..000000000 --- a/tools/re2c/re.h +++ /dev/null @@ -1,496 +0,0 @@ -/* $Id: re.h 775 2007-07-10 19:33:17Z helly $ */ -#ifndef _re_h -#define _re_h - -#include -#include -#include "token.h" -#include "ins.h" -#include "globals.h" - -namespace re2c -{ - -template -class free_list: protected std::set<_Ty> -{ -public: - typedef typename std::set<_Ty>::iterator iterator; - typedef typename std::set<_Ty>::size_type size_type; - typedef typename std::set<_Ty>::key_type key_type; - - free_list(): in_clear(false) - { - } - - using std::set<_Ty>::insert; - - size_type erase(const key_type& key) - { - if (!in_clear) - { - return std::set<_Ty>::erase(key); - } - return 0; - } - - void clear() - { - in_clear = true; - - for(iterator it = this->begin(); it != this->end(); ++it) - { - delete *it; - } - std::set<_Ty>::clear(); - - in_clear = false; - } - - ~free_list() - { - clear(); - } - -protected: - bool in_clear; -}; - -typedef struct extop -{ - char op; - int minsize; - int maxsize; -} - -ExtOp; - -struct CharPtn -{ - uint card; - CharPtn *fix; - CharPtn *nxt; -}; - -typedef CharPtn *CharPtr; - -struct CharSet -{ - CharSet(); - ~CharSet(); - - CharPtn *fix; - CharPtn *freeHead, **freeTail; - CharPtr *rep; - CharPtn *ptn; -}; - -class Range -{ - -public: - Range *next; - uint lb, ub; // [lb,ub) - - static free_list vFreeList; - -public: - Range(uint l, uint u) : next(NULL), lb(l), ub(u) - { - vFreeList.insert(this); - } - - Range(Range &r) : next(NULL), lb(r.lb), ub(r.ub) - { - vFreeList.insert(this); - } - - ~Range() - { - vFreeList.erase(this); - } - - friend std::ostream& operator<<(std::ostream&, const Range&); - friend std::ostream& operator<<(std::ostream&, const Range*); -}; - -inline std::ostream& operator<<(std::ostream &o, const Range *r) -{ - return r ? o << *r : o; -} - -class RegExp -{ - -public: - uint size; - - static free_list vFreeList; - -public: - RegExp() : size(0) - { - vFreeList.insert(this); - } - - virtual ~RegExp() - { - vFreeList.erase(this); - } - - virtual const char *typeOf() = 0; - RegExp *isA(const char *t) - { - return typeOf() == t ? this : NULL; - } - - virtual void split(CharSet&) = 0; - virtual void calcSize(Char*) = 0; - virtual uint fixedLength(); - virtual void compile(Char*, Ins*) = 0; - virtual void display(std::ostream&) const = 0; - friend std::ostream& operator<<(std::ostream&, const RegExp&); - friend std::ostream& operator<<(std::ostream&, const RegExp*); -}; - -inline std::ostream& operator<<(std::ostream &o, const RegExp &re) -{ - re.display(o); - return o; -} - -inline std::ostream& operator<<(std::ostream &o, const RegExp *re) -{ - return o << *re; -} - -class NullOp: public RegExp -{ - -public: - static const char *type; - -public: - const char *typeOf() - { - return type; - } - - void split(CharSet&); - void calcSize(Char*); - uint fixedLength(); - void compile(Char*, Ins*); - void display(std::ostream &o) const - { - o << "_"; - } -}; - -class MatchOp: public RegExp -{ - -public: - static const char *type; - Range *match; - -public: - MatchOp(Range *m) : match(m) - { - } - - const char *typeOf() - { - return type; - } - - void split(CharSet&); - void calcSize(Char*); - uint fixedLength(); - void compile(Char*, Ins*); - void display(std::ostream&) const; - -#ifdef PEDANTIC -private: - MatchOp(const MatchOp& oth) - : RegExp(oth) - , match(oth.match) - { - } - - MatchOp& operator = (const MatchOp& oth) - { - new(this) MatchOp(oth); - return *this; - } -#endif -}; - -class RuleOp: public RegExp -{ -public: - static const char *type; - -private: - RegExp *exp; - -public: - RegExp *ctx; - Ins *ins; - uint accept; - Token *code; - uint line; - -public: - RuleOp(RegExp*, RegExp*, Token*, uint); - - ~RuleOp() - { - delete code; - } - - const char *typeOf() - { - return type; - } - - void split(CharSet&); - void calcSize(Char*); - void compile(Char*, Ins*); - void display(std::ostream &o) const - { - o << exp << "/" << ctx << ";"; - } - -#ifdef PEDANTIC -private: - RuleOp(const RuleOp& oth) - : RegExp(oth) - , exp(oth.exp) - , ctx(oth.ctx) - , ins(oth.ins) - , accept(oth.accept) - , code(oth.code) - , line(oth.line) - { - } - RuleOp& operator = (const RuleOp& oth) - { - new(this) RuleOp(oth); - return *this; - } -#endif -}; - -class RuleLine: public line_number -{ -public: - - RuleLine(const RuleOp& _op) - : op(_op) - { - } - - uint get_line() const - { - return op.code->line; - } - - const RuleOp& op; -}; - -RegExp *mkAlt(RegExp*, RegExp*); - -class AltOp: public RegExp -{ - -private: - RegExp *exp1, *exp2; - -public: - static const char *type; - -public: - AltOp(RegExp *e1, RegExp *e2) - : exp1(e1) - , exp2(e2) - { - } - - const char *typeOf() - { - return type; - } - - void split(CharSet&); - void calcSize(Char*); - uint fixedLength(); - void compile(Char*, Ins*); - void display(std::ostream &o) const - { - o << exp1 << "|" << exp2; - } - - friend RegExp *mkAlt(RegExp*, RegExp*); - -#ifdef PEDANTIC -private: - AltOp(const AltOp& oth) - : RegExp(oth) - , exp1(oth.exp1) - , exp2(oth.exp2) - { - } - AltOp& operator = (const AltOp& oth) - { - new(this) AltOp(oth); - return *this; - } -#endif -}; - -class CatOp: public RegExp -{ - -private: - RegExp *exp1, *exp2; - -public: - static const char *type; - -public: - CatOp(RegExp *e1, RegExp *e2) - : exp1(e1) - , exp2(e2) - { - } - - const char *typeOf() - { - return type; - } - - void split(CharSet&); - void calcSize(Char*); - uint fixedLength(); - void compile(Char*, Ins*); - void display(std::ostream &o) const - { - o << exp1 << exp2; - } - -#ifdef PEDANTIC -private: - CatOp(const CatOp& oth) - : RegExp(oth) - , exp1(oth.exp1) - , exp2(oth.exp2) - { - } - CatOp& operator = (const CatOp& oth) - { - new(this) CatOp(oth); - return *this; - } -#endif -}; - -class CloseOp: public RegExp -{ - -private: - RegExp *exp; - -public: - static const char *type; - -public: - CloseOp(RegExp *e) - : exp(e) - { - } - - const char *typeOf() - { - return type; - } - - void split(CharSet&); - void calcSize(Char*); - void compile(Char*, Ins*); - void display(std::ostream &o) const - { - o << exp << "+"; - } - -#ifdef PEDANTIC -private: - CloseOp(const CloseOp& oth) - : RegExp(oth) - , exp(oth.exp) - { - } - CloseOp& operator = (const CloseOp& oth) - { - new(this) CloseOp(oth); - return *this; - } -#endif -}; - -class CloseVOp: public RegExp -{ - -private: - RegExp *exp; - int min; - int max; - -public: - static const char *type; - -public: - CloseVOp(RegExp *e, int lb, int ub) - : exp(e) - , min(lb) - , max(ub) - { - } - - const char *typeOf() - { - return type; - } - - void split(CharSet&); - void calcSize(Char*); - void compile(Char*, Ins*); - void display(std::ostream &o) const - { - o << exp << "+"; - } -#ifdef PEDANTIC -private: - CloseVOp(const CloseVOp& oth) - : RegExp(oth) - , exp(oth.exp) - , min(oth.min) - , max(oth.max) - { - } - CloseVOp& operator = (const CloseVOp& oth) - { - new(this) CloseVOp(oth); - return *this; - } -#endif -}; - -extern void genCode(std::ostream&, RegExp*); -extern void genCode(std::ostream&, uint, RegExp*); -extern void genGetState(std::ostream&, uint&, uint); -extern RegExp *mkDiff(RegExp*, RegExp*); -extern RegExp *mkAlt(RegExp*, RegExp*); - -} // end namespace re2c - -#endif diff --git a/tools/re2c/re2c.1 b/tools/re2c/re2c.1 deleted file mode 100644 index fe12c0179..000000000 --- a/tools/re2c/re2c.1 +++ /dev/null @@ -1,597 +0,0 @@ -./" -./" $Id: re2c.1.in 663 2007-04-01 11:22:15Z helly $ -./" -.TH RE2C 1 "22 April 2005" "Version 0.12.3" -.ds re \fBre2c\fP -.ds le \fBlex\fP -.ds rx regular expression -.ds lx \fIl\fP-expression -.SH NAME -re2c \- convert regular expressions to C/C++ - -.SH SYNOPSIS -\*(re [\fB-bdefghisuvVw1\fP] [\fB-o output\fP] file\fP - -.SH DESCRIPTION -\*(re is a preprocessor that generates C-based recognizers from regular -expressions. -The input to \*(re consists of C/C++ source interleaved with -comments of the form \fC/*!re2c\fP ... \fC*/\fP which contain -scanner specifications. -In the output these comments are replaced with code that, when -executed, will find the next input token and then execute -some user-supplied token-specific code. - -For example, given the following code - -.in +3 -.nf -char *scan(char *p) -{ -/*!re2c - re2c:define:YYCTYPE = "unsigned char"; - re2c:define:YYCURSOR = p; - re2c:yyfill:enable = 0; - re2c:yych:conversion = 1; - re2c:indent:top = 1; - [0-9]+ {return p;} - [\000-\377] {return (char*)0;} -*/ -} -.fi -.in -3 - -\*(re -is will generate - -.in +3 -.nf -/* Generated by re2c on Sat Apr 16 11:40:58 1994 */ -char *scan(char *p) -{ - { - unsigned char yych; - - yych = (unsigned char)*p; - if(yych <= '/') goto yy4; - if(yych >= ':') goto yy4; - ++p; - yych = (unsigned char)*p; - goto yy7; -yy3: - {return p;} -yy4: - ++p; - yych = (unsigned char)*p; - {return char*)0;} -yy6: - ++p; - yych = (unsigned char)*p; -yy7: - if(yych <= '/') goto yy3; - if(yych <= '9') goto yy6; - goto yy3; - } - -} -.fi -.in -3 - -You can place one \fC/*!max:re2c */\fP comment that will output a "#define -\fCYYMAXFILL\fP " line that holds the maximum number of characters -required to parse the input. That is the maximum value \fCYYFILL\fP(n) -will receive. If -1 is in effect then YYMAXFILL can only be triggered once -after the last \fC/*!re2c */\fP. - -You can also use \fC/*!ignore:re2c */\fP blocks that allows to document the -scanner code and will not be part of the output. - -.SH OPTIONS -\*(re provides the following options: -.TP -\fB-?\fP -\fB-h\fP -Invoke a short help. -.TP -\fB-b\fP -Implies \fB-s\fP. Use bit vectors as well in the attempt to coax better -code out of the compiler. Most useful for specifications with more than a -few keywords (e.g. for most programming languages). -.TP -\fB-d\fP -Creates a parser that dumps information about the current position and in -which state the parser is while parsing the input. This is useful to debug -parser issues and states. If you use this switch you need to define a macro -\fIYYDEBUG\fP that is called like a function with two parameters: -\fIvoid YYDEBUG(int state, char current)\fP. The first parameter receives the -state or -1 and the second parameter receives the input at the current cursor. -.TP -\fB-e\fP -Cross-compile from an ASCII platform to an EBCDIC one. -.TP -\fB-f\fP -Generate a scanner with support for storable state. -For details see below at \fBSCANNER WITH STORABLE STATES\fP. -.TP -\fB-g\fP -Generate a scanner that utilizes GCC's computed goto feature. That is \*(re -generates jump tables whenever a decision is of a certain complexity (e.g. a -lot of if conditions are otherwise necessary). This is only useable with GCC -and produces output that cannot be compiled with any other compiler. Note that -this implies -b and that the complexity threshold can be configured using the -inplace configuration "cgoto:threshold". -.TP -\fB-i\fP -Do not output #line information. This is usefull when you want use a CMS tool -with the \*(re output which you might want if you do not require your users to -have \*(re themselves when building from your source. -\fB-o output\fP -Specify the output file. -.TP -\fB-s\fP -Generate nested \fCif\fPs for some \fCswitch\fPes. Many compilers need this -assist to generate better code. -.TP -\fB-u\fP -Generate a parser that supports Unicode chars (UTF-32). This means the -generated code can deal with any valid Unicode character up to 0x10FFFF. When -UTF-8 or UTF-16 needs to be supported you need to convert the incoming stream -to UTF-32 upon input yourself. -.TP -\fB-v\fP -Show version information. -.TP -\fB-V\fP -Show the version as a number XXYYZZ. -.TP -\fB-w\fP -Create a parser that supports wide chars (UCS-2). This implies \fB-s\fP and -cannot be used together with \fB-e\fP switch. -.TP -\fB-1\fP -Force single pass generation, this cannot be combined with -f and disables -YYMAXFILL generation prior to last \*(re block. -.TP -\fb--no-generation-date\fP -Suppress date output in the generated output so that it only shows the re2c -version. -.SH "INTERFACE CODE" -Unlike other scanner generators, \*(re does not generate complete scanners: -the user must supply some interface code. -In particular, the user must define the following macros or use the -corresponding inplace configurations: -.TP -\fCYYCTYPE\fP -Type used to hold an input symbol. -Usually \fCchar\fP or \fCunsigned char\fP. -.TP -\fCYYCURSOR\fP -\*(lx of type \fC*YYCTYPE\fP that points to the current input symbol. -The generated code advances \fCYYCURSOR\fP as symbols are matched. -On entry, \fCYYCURSOR\fP is assumed to point to the first character of the -current token. On exit, \fCYYCURSOR\fP will point to the first character of -the following token. -.TP -\fCYYLIMIT\fP -Expression of type \fC*YYCTYPE\fP that marks the end of the buffer -(\fCYYLIMIT[-1]\fP is the last character in the buffer). -The generated code repeatedly compares \fCYYCURSOR\fP to \fCYYLIMIT\fP -to determine when the buffer needs (re)filling. -.TP -\fCYYMARKER\fP -\*(lx of type \fC*YYCTYPE\fP. -The generated code saves backtracking information in \fCYYMARKER\fP. Some easy -scanners might not use this. -.TP -\fCYYCTXMARKER\fP -\*(lx of type \fC*YYCTYPE\fP. -The generated code saves trailing context backtracking information in \fCYYCTXMARKER\fP. -The user only needs to define this macro if a scanner specification uses trailing -context in one or more of its regular expressions. -.TP -\fCYYFILL\fP(\fIn\fP\fC\fP) -The generated code "calls" \fCYYFILL\fP(n) when the buffer needs -(re)filling: at least \fIn\fP additional characters should -be provided. \fCYYFILL\fP(n) should adjust \fCYYCURSOR\fP, \fCYYLIMIT\fP, -\fCYYMARKER\fP and \fCYYCTXMARKER\fP as needed. Note that for typical -programming languages \fIn\fP will be the length of the longest keyword plus one. -The user can place a comment of the form \fC/*!max:re2c */\fP once to insert -a \fCYYMAXFILL\fP(n) definition that is set to the maximum length value. If -1 -switch is used then \fCYYMAXFILL\fP can be triggered only once after the -last \fC/*!re2c */\fP -block. -.TP -\fCYYGETSTATE\fP() -The user only needs to define this macro if the \fB-f\fP flag was specified. -In that case, the generated code "calls" \fCYYGETSTATE\fP() at the very beginning -of the scanner in order to obtain the saved state. \fCYYGETSTATE\fP() must return a signed -integer. The value must be either -1, indicating that the scanner is entered for the -first time, or a value previously saved by \fCYYSETSTATE\fP(s). In the second case, the -scanner will resume operations right after where the last \fCYYFILL\fP(n) was called. -.TP -\fCYYSETSTATE(\fP\fIs\fP\fC)\fP -The user only needs to define this macro if the \fB-f\fP flag was specified. -In that case, the generated code "calls" \fCYYSETSTATE\fP just before calling -\fCYYFILL\fP(n). The parameter to \fCYYSETSTATE\fP is a signed integer that uniquely -identifies the specific instance of \fCYYFILL\fP(n) that is about to be called. -Should the user wish to save the state of the scanner and have \fCYYFILL\fP(n) return -to the caller, all he has to do is store that unique identifer in a variable. -Later, when the scannered is called again, it will call \fCYYGETSTATE()\fP and -resume execution right where it left off. The generated code will contain -both \fCYYSETSTATE\fP(s) and \fCYYGETSTATE\fP even if \fCYYFILL\fP(n) is being -disabled. -.TP -\fCYYDEBUG(\fP\fIstate\fP,\fIcurrent\fC)\fP -This is only needed if the \fB-d\fP flag was specified. It allows to easily debug -the generated parser by calling a user defined function for every state. The function -should have the following signature: \fIvoid YYDEBUG(int state, char current)\fP. -The first parameter receives the state or -1 and the second parameter receives the -input at the current cursor. -.TP -\fCYYMAXFILL -This will be automatically defined by \fC/*!max:re2c */\fP blocks as explained above. - -.SH "SCANNER WITH STORABLE STATES" -When the \fB-f\fP flag is specified, \*(re generates a scanner that -can store its current state, return to the caller, and later resume -operations exactly where it left off. - -The default operation of \*(re is a "pull" model, where the scanner asks -for extra input whenever it needs it. However, this mode of operation -assumes that the scanner is the "owner" the parsing loop, and that may -not always be convenient. - -Typically, if there is a preprocessor ahead of the scanner in the stream, -or for that matter any other procedural source of data, the scanner cannot -"ask" for more data unless both scanner and source live in a separate threads. - -The \fB-f\fP flag is useful for just this situation : it lets users design -scanners that work in a "push" model, i.e. where data is fed to the scanner -chunk by chunk. When the scanner runs out of data to consume, it just stores -its state, and return to the caller. When more input data is fed to the scanner, -it resumes operations exactly where it left off. - -When using the -f option \*(re does not accept stdin because it has to do the -full generation process twice which means it has to read the input twice. That -means \*(re would fail in case it cannot open the input twice or reading the -input for the first time influences the second read attempt. - -Changes needed compared to the "pull" model. - -1. User has to supply macros YYSETSTATE() and YYGETSTATE(state) - -2. The \fB-f\fP option inhibits declaration of \fIyych\fP and -\fIyyaccept\fP. So the user has to declare these. Also the user has -to save and restore these. In the example \fIexamples/push.re\fP these -are declared as fields of the (C++) class of which the scanner is a -method, so they do not need to be saved/restored explicitly. For C -they could e.g. be made macros that select fields from a structure -passed in as parameter. Alternatively, they could be declared as local -variables, saved with YYFILL(n) when it decides to return and restored -at entry to the function. Also, it could be more efficient to save the -state from YYFILL(n) because YYSETSTATE(state) is called -unconditionally. YYFILL(n) however does not get \fIstate\fP as -parameter, so we would have to store state in a local variable by -YYSETSTATE(state). - -3. Modify YYFILL(n) to return (from the function calling it) if more -input is needed. - -4. Modify caller to recognise "more input is needed" and respond -appropriately. - -5. The generated code will contain a switch block that is used to restores -the last state by jumping behind the corrspoding YYFILL(n) call. This code is -automatically generated in the epilog of the first "\fC/*!re2c */\fP" block. -It is possible to trigger generation of the YYGETSTATE() block earlier by -placing a "\fC/*!getstate:re2c */\fP" comment. This is especially useful when -the scanner code should be wrapped inside a loop. - -Please see examples/push.re for push-model scanner. The generated code can be -tweaked using inplace configurations "\fBstate:abort\fP" and "\fBstate:nextlabel\fP". - -.SH "SCANNER SPECIFICATIONS" -Each scanner specification consists of a set of \fIrules\fP, \fInamed -definitions\fP and \fIconfigurations\fP. -.LP -\fIRules\fP consist of a regular expression along with a block of C/C++ code that -is to be executed when the associated \fIregular expression\fP is matched. -.P -.RS -\fIregular expression\fP \fC{\fP \fIC/C++ code\fP \fC}\fP -.RE -.LP -Named definitions are of the form: -.P -.RS -\fIname\fP \fC=\fP \fIregular expression\fP\fC;\fP -.RE -.LP -Configurations look like named definitions whose names start -with "\fBre2c:\fP": -.P -.RS -\fCre2c:\fP\fIname\fP \fC=\fP \fIvalue\fP\fC;\fP -.RE -.RS -\fCre2c:\fP\fIname\fP \fC=\fP \fB"\fP\fIvalue\fP\fB"\fP\fC;\fP -.RE - -.SH "SUMMARY OF RE2C REGULAR EXPRESSIONS" -.TP -\fC"foo"\fP -the literal string \fCfoo\fP. -ANSI-C escape sequences can be used. -.TP -\fC'foo'\fP -the literal string \fCfoo\fP (characters [a-zA-Z] treated case-insensitive). -ANSI-C escape sequences can be used. -.TP -\fC[xyz]\fP -a "character class"; in this case, -the \*(rx matches either an '\fCx\fP', a '\fCy\fP', or a '\fCz\fP'. -.TP -\fC[abj-oZ]\fP -a "character class" with a range in it; -matches an '\fCa\fP', a '\fCb\fP', any letter from '\fCj\fP' through '\fCo\fP', -or a '\fCZ\fP'. -.TP -\fC[^\fIclass\fP\fC]\fP -an inverted "character class". -.TP -\fIr\fP\fC\e\fP\fIs\fP -match any \fIr\fP which isn't an \fIs\fP. \fIr\fP and \fIs\fP must be regular expressions -which can be expressed as character classes. -.TP -\fIr\fP\fC*\fP -zero or more \fIr\fP's, where \fIr\fP is any regular expression -.TP -\fC\fIr\fP\fC+\fP -one or more \fIr\fP's -.TP -\fC\fIr\fP\fC?\fP -zero or one \fIr\fP's (that is, "an optional \fIr\fP") -.TP -name -the expansion of the "named definition" (see above) -.TP -\fC(\fP\fIr\fP\fC)\fP -an \fIr\fP; parentheses are used to override precedence -(see below) -.TP -\fIrs\fP -an \fIr\fP followed by an \fIs\fP ("concatenation") -.TP -\fIr\fP\fC|\fP\fIs\fP -either an \fIr\fP or an \fIs\fP -.TP -\fIr\fP\fC/\fP\fIs\fP -an \fIr\fP but only if it is followed by an \fIs\fP. The \fIs\fP is not part of -the matched text. This type of \*(rx is called "trailing context". A trailing -context can only be the end of a rule and not part of a named definition. -.TP -\fIr\fP\fC{\fP\fIn\fP\fC}\fP -matches \fIr\fP exactly \fIn\fP times. -.TP -\fIr\fP\fC{\fP\fIn\fP\fC,}\fP -matches \fIr\fP at least \fIn\fP times. -.TP -\fIr\fP\fC{\fP\fIn\fP\fC,\fP\fIm\fP\fC}\fP -matches \fIr\fP at least \fIn\fP but not more than \fIm\fP times. -.TP -\fC.\fP -match any character except newline (\\n). -.TP -\fIdef\fP -matches named definition as specified by \fIdef\fP. -.LP -Character classes and string literals may contain octoal or hexadecimal -character definitions and the following set of escape sequences (\fB\\n\fP, - \fB\\t\fP, \fB\\v\fP, \fB\\b\fP, \fB\\r\fP, \fB\\f\fP, \fB\\a\fP, \fB\\\\\fP). -An octal character is defined by a backslash followed by its three octal digits -and a hexadecimal character is defined by backslash, a lower cased '\fBx\fP' -and its two hexadecimal digits or a backslash, an upper cased \fBX\fP and its -four hexadecimal digits. -.LP -\*(re further more supports the c/c++ unicode notation. That is a backslash followed -by either a lowercased \fBu\fP and its four hexadecimal digits or an uppercased -\fBU\fP and its eight hexadecimal digits. However only in \fB-u\fP mode the -generated code can deal with any valid Unicode character up to 0x10FFFF. -.LP -Since characters greater \fB\\X00FF\fP are not allowed in non unicode mode, the -only portable "\fBany\fP" rules are \fB(.|"\\n")\fP and \fB[^]\fP. -.LP -The regular expressions listed above are grouped according to -precedence, from highest precedence at the top to lowest at the bottom. -Those grouped together have equal precedence. - -.SH "INPLACE CONFIGURATION" -.LP -It is possible to configure code generation inside \*(re blocks. The following -lists the available configurations: -.TP -\fIre2c:indent:top\fP \fB=\fP 0 \fB;\fP -Specifies the minimum number of indendation to use. Requires a numeric value -greater than or equal zero. -.TP -\fIre2c:indent:string\fP \fB=\fP "\\t" \fB;\fP -Specifies the string to use for indendation. Requires a string that should -contain only whitespace unless you need this for external tools. The easiest -way to specify spaces is to enclude them in single or double quotes. If you do -not want any indendation at all you can simply set this to \fB""\fP. -.TP -\fIre2c:yybm:hex\fP \fB=\fP 0 \fB;\fP -If set to zero then a decimal table is being used else a hexadecimal table -will be generated. -.TP -\fIre2c:yyfill:enable\fP \fB=\fP 1 \fB;\fP -Set this to zero to suppress generation of YYFILL(n). When using this be sure -to verify that the generated scanner does not read behind input. Allowing -this behavior might introduce sever security issues to you programs. -.TP -\fIre2c:yyfill:parameter\fP \fB=\fP 1 \fB;\fP -Allows to suppress parameter passing to \fBYYFILL\fP calls. If set to zero -then no parameter is passed to \fBYYFILL\fP. If set to a non zero value then -\fBYYFILL\fP usage will be followed by the number of requested characters in -braces. -.TP -\fIre2c:startlabel\fP \fB=\fP 0 \fB;\fP -If set to a non zero integer then the start label of the next scanner blocks -will be generated even if not used by the scanner itself. Otherwise the normal -\fByy0\fP like start label is only being generated if needed. If set to a text -value then a label with that text will be generated regardless of whether the -normal start label is being used or not. This setting is being reset to \fB0\fP -after a start label has been generated. -.TP -\fIre2c:labelprefix\fP \fB=\fP yy \fB;\fP -Allows to change the prefix of numbered labels. The default is \fByy\fP and -can be set any string that is a valid label. -.TP -\fIre2c:state:abort\fP \fB=\fP 0 \fB;\fP -When not zero and switch -f is active then the \fCYYGETSTATE\fP block will -contain a default case that aborts and a -1 case is used for initialization. -.TP -\fIre2c:state:nextlabel\fP \fB=\fP 0 \fB;\fP -Used when -f is active to control whether the \fCYYGETSTATE\fP block is -followed by a \fCyyNext:\fP label line. Instead of using \fCyyNext\fP you can -usually also use configuration \fIstartlabel\fP to force a specific start label -or default to \fCyy0\fP as start label. Instead of using a dedicated label it -is often better to separate the YYGETSTATE code from the actual scanner code by -placing a "\fC/*!getstate:re2c */\fP" comment. -.TP -\fIre2c:cgoto:threshold\fP \fB=\fP 9 \fB;\fP -When -g is active this value specifies the complexity threshold that triggers -generation of jump tables rather than using nested if's and decision bitfields. -The threshold is compared against a calculated estimation of if-s needed where -every used bitmap divides the threshold by 2. -.TP -\fIre2c:yych:conversion\fP \fB=\fP 0 \fB;\fP -When the input uses signed characters and \fB-s\fP or \fB-b\fP switches are -in effect re2c allows to automatically convert to the unsigned character type -that is then necessary for its internal single character. When this setting -is zero or an empty string the conversion is disabled. Using a non zero number -the conversion is taken from \fBYYCTYPE\fP. If that is given by an inplace -configuration that value is being used. Otherwise it will be \fB(YYCTYPE)\fP -and changes to that configuration are no longer possible. When this setting is -a string the braces must be specified. Now assuming your input is a \fBchar*\fP -buffer and you are using above mentioned switches you can set \fBYYCTYPE\fP to -\fBunsigned char\fP and this setting to either \fB1\fP or \fB"(unsigned char)"\fP. -.TP -\fIre2c:define:YYCTXMARKER\fP \fB=\fP YYCTXMARKER \fB;\fP -Allows to overwrite the define YYCTXMARKER and thus avoiding it by setting the -value to the actual code needed. -.TP -\fIre2c:define:YYCTYPE\fP \fB=\fP YYCTYPE \fB;\fP -Allows to overwrite the define YYCTYPE and thus avoiding it by setting the -value to the actual code needed. -.TP -\fIre2c:define:YYCURSOR\fP \fB=\fP YYCURSOR \fB;\fP -Allows to overwrite the define YYCURSOR and thus avoiding it by setting the -value to the actual code needed. -.TP -\fIre2c:define:YYDEBUG\fP \fB=\fP YYDEBUG \fB;\fP -Allows to overwrite the define YYDEBUG and thus avoiding it by setting the -value to the actual code needed. -.TP -\fIre2c:define:YYFILL\fP \fB=\fP YYFILL \fB;\fP -Allows to overwrite the define YYFILL and thus avoiding it by setting the -value to the actual code needed. -.TP -\fIre2c:define:YYGETSTATE\fP \fB=\fP YYGETSTATE \fB;\fP -Allows to overwrite the define YYGETSTATE and thus avoiding it by setting the -value to the actual code needed. -.TP -\fIre2c:define:YYLIMIT\fP \fB=\fP YYLIMIT \fB;\fP -Allows to overwrite the define YYLIMIT and thus avoiding it by setting the -value to the actual code needed. -.TP -\fIre2c:define:YYMARKER\fP \fB=\fP YYMARKER \fB;\fP -Allows to overwrite the define YYMARKER and thus avoiding it by setting the -value to the actual code needed. -.TP -\fIre2c:define:YYSETSTATE\fP \fB=\fP YYSETSTATE \fB;\fP -Allows to overwrite the define YYSETSTATE and thus avoiding it by setting the -value to the actual code needed. -.TP -\fIre2c:label:yyFillLabel\fP \fB=\fP yyFillLabel \fB;\fP -Allows to overwrite the name of the label yyFillLabel. -.TP -\fIre2c:label:yyNext\fP \fB=\fP yyNext \fB;\fP -Allows to overwrite the name of the label yyNext. -.TP -\fIre2c:variable:yyaccept\fP \fB=\fP yyaccept \fB;\fP -Allows to overwrite the name of the variable yyaccept. -.TP -\fIre2c:variable:yybm\fP \fB=\fP yybm \fB;\fP -Allows to overwrite the name of the variable yybm. -.TP -\fIre2c:variable:yych\fP \fB=\fP yych \fB;\fP -Allows to overwrite the name of the variable yych. -.TP -\fIre2c:variable:yytarget\fP \fB=\fP yytarget \fB;\fP -Allows to overwrite the name of the variable yytarget. - -.SH "UNDERSTANDING RE2C" -.LP -The subdirectory lessons of the \*(re distribution contains a few step by step -lessons to get you started with \*(re. All examples in the lessons subdirectory -can be compiled and actually work. - -.SH FEATURES -.LP -\*(re does not provide a default action: -the generated code assumes that the input -will consist of a sequence of tokens. -Typically this can be dealt with by adding a rule such as the one for -unexpected characters in the example above. -.LP -The user must arrange for a sentinel token to appear at the end of input -(and provide a rule for matching it): -\*(re does not provide an \fC<>\fP expression. -If the source is from a null-byte terminated string, a -rule matching a null character will suffice. If the source is from a -file then you could pad the input with a newline (or some other character that -cannot appear within another token); upon recognizing such a character check -to see if it is the sentinel and act accordingly. And you can also use YYFILL(n) -to end the scanner in case not enough characters are available which is nothing -else then e detection of end of data/file. -.LP -\*(re does not provide start conditions: use a separate scanner -specification for each start condition (as illustrated in the above example). - -.SH BUGS -.LP -Difference only works for character sets. -.LP -The \*(re internal algorithms need documentation. - -.SH "SEE ALSO" -.LP -flex(1), lex(1). -.P -More information on \*(re can be found here: -.PD 0 -.P -.B http://re2c.org/ -.PD 1 - -.SH AUTHORS -.PD 0 -.P -Peter Bumbulis -.P -Brian Young -.P -Dan Nuffer -.P -Marcus Boerger -.P -Hartmut Kaiser -.P -Emmanuel Mogenet added storable state -.P -.PD 1 - -.SH VERSION INFORMATION -This manpage describes \*(re, version 0.12.3. - -.fi diff --git a/tools/re2c/re2c.vcproj b/tools/re2c/re2c.vcproj deleted file mode 100644 index c10906185..000000000 --- a/tools/re2c/re2c.vcproj +++ /dev/null @@ -1,549 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/tools/re2c/scanner.cc b/tools/re2c/scanner.cc deleted file mode 100644 index 1a6cbbe7c..000000000 --- a/tools/re2c/scanner.cc +++ /dev/null @@ -1,1302 +0,0 @@ -/* Generated by re2c 0.12.3 on Fri Jan 25 21:09:48 2008 */ -/* $Id: scanner.re 663 2007-04-01 11:22:15Z helly $ */ -#include -#include -#include -#include -#include "scanner.h" -#include "parser.h" -#include "y.tab.h" -#include "globals.h" -#include "dfa.h" - -extern YYSTYPE yylval; - -#ifndef MAX -#define MAX(a,b) (((a)>(b))?(a):(b)) -#endif - -#define BSIZE 8192 - -#define YYCTYPE unsigned char -#define YYCURSOR cursor -#define YYLIMIT lim -#define YYMARKER ptr -#define YYFILL(n) {cursor = fill(cursor);} - -#define RETURN(i) {cur = cursor; return i;} - -namespace re2c -{ - -Scanner::Scanner(const char *fn, std::istream& i, std::ostream& o) - : in(i) - , out(o) - , bot(NULL), tok(NULL), ptr(NULL), cur(NULL), pos(NULL), lim(NULL) - , top(NULL), eof(NULL), tchar(0), tline(0), cline(1), iscfg(0), filename(fn) -{ - ; -} - -char *Scanner::fill(char *cursor) -{ - if(!eof) - { - uint cnt = tok - bot; - if(cnt) - { - memcpy(bot, tok, lim - tok); - tok = bot; - ptr -= cnt; - cursor -= cnt; - pos -= cnt; - lim -= cnt; - } - if((top - lim) < BSIZE) - { - char *buf = new char[(lim - bot) + BSIZE]; - memcpy(buf, tok, lim - tok); - tok = buf; - ptr = &buf[ptr - bot]; - cursor = &buf[cursor - bot]; - pos = &buf[pos - bot]; - lim = &buf[lim - bot]; - top = &lim[BSIZE]; - delete [] bot; - bot = buf; - } - in.read(lim, BSIZE); - if ((cnt = in.gcount()) != BSIZE ) - { - eof = &lim[cnt]; *eof++ = '\0'; - } - lim += cnt; - } - return cursor; -} - - - -int Scanner::echo() -{ - char *cursor = cur; - bool ignore_eoc = false; - int ignore_cnt = 0; - - if (eof && cursor == eof) // Catch EOF - { - return 0; - } - - tok = cursor; -echo: -{ - - { - YYCTYPE yych; - unsigned int yyaccept = 0; - - if((YYLIMIT - YYCURSOR) < 16) YYFILL(16); - yych = *YYCURSOR; - if(yych <= ')') { - if(yych <= 0x00) goto yy7; - if(yych == 0x0A) goto yy5; - goto yy9; - } else { - if(yych <= '*') goto yy4; - if(yych != '/') goto yy9; - } - yyaccept = 0; - yych = *(YYMARKER = ++YYCURSOR); - if(yych == '*') goto yy16; -yy3: - { - goto echo; - } -yy4: - yych = *++YYCURSOR; - if(yych == '/') goto yy10; - goto yy3; -yy5: - ++YYCURSOR; - { - if (ignore_eoc) { - ignore_cnt++; - } else { - out.write((const char*)(tok), (const char*)(cursor) - (const char*)(tok)); - } - tok = pos = cursor; cline++; - goto echo; - } -yy7: - ++YYCURSOR; - { - if (!ignore_eoc) { - out.write((const char*)(tok), (const char*)(cursor) - (const char*)(tok) - 1); // -1 so we don't write out the \0 - } - if(cursor == eof) { - RETURN(0); - } - } -yy9: - yych = *++YYCURSOR; - goto yy3; -yy10: - yyaccept = 1; - yych = *(YYMARKER = ++YYCURSOR); - if(yych == 0x0A) goto yy14; - if(yych == 0x0D) goto yy12; -yy11: - { - if (ignore_eoc) { - if (ignore_cnt) { - out << "\n" << sourceFileInfo; - } - ignore_eoc = false; - ignore_cnt = 0; - } else { - out.write((const char*)(tok), (const char*)(cursor) - (const char*)(tok)); - } - tok = pos = cursor; - goto echo; - } -yy12: - yych = *++YYCURSOR; - if(yych == 0x0A) goto yy14; -yy13: - YYCURSOR = YYMARKER; - if(yyaccept <= 0) { - goto yy3; - } else { - goto yy11; - } -yy14: - ++YYCURSOR; - { - cline++; - if (ignore_eoc) { - if (ignore_cnt) { - out << sourceFileInfo; - } - ignore_eoc = false; - ignore_cnt = 0; - } else { - out.write((const char*)(tok), (const char*)(cursor) - (const char*)(tok)); - } - tok = pos = cursor; - goto echo; - } -yy16: - yych = *++YYCURSOR; - if(yych != '!') goto yy13; - yych = *++YYCURSOR; - switch(yych) { - case 'g': goto yy19; - case 'i': goto yy18; - case 'm': goto yy20; - case 'r': goto yy21; - default: goto yy13; - } -yy18: - yych = *++YYCURSOR; - if(yych == 'g') goto yy47; - goto yy13; -yy19: - yych = *++YYCURSOR; - if(yych == 'e') goto yy34; - goto yy13; -yy20: - yych = *++YYCURSOR; - if(yych == 'a') goto yy26; - goto yy13; -yy21: - yych = *++YYCURSOR; - if(yych != 'e') goto yy13; - yych = *++YYCURSOR; - if(yych != '2') goto yy13; - yych = *++YYCURSOR; - if(yych != 'c') goto yy13; - ++YYCURSOR; - { - if (bUsedYYMaxFill && bSinglePass) { - fatal("found scanner block after YYMAXFILL declaration"); - } - out.write((const char*)(tok), (const char*)(&cursor[-7]) - (const char*)(tok)); - tok = cursor; - RETURN(1); - } -yy26: - yych = *++YYCURSOR; - if(yych != 'x') goto yy13; - yych = *++YYCURSOR; - if(yych != ':') goto yy13; - yych = *++YYCURSOR; - if(yych != 'r') goto yy13; - yych = *++YYCURSOR; - if(yych != 'e') goto yy13; - yych = *++YYCURSOR; - if(yych != '2') goto yy13; - yych = *++YYCURSOR; - if(yych != 'c') goto yy13; - ++YYCURSOR; - { - if (bUsedYYMaxFill) { - fatal("cannot generate YYMAXFILL twice"); - } - out << "#define YYMAXFILL " << maxFill << std::endl; - tok = pos = cursor; - ignore_eoc = true; - bUsedYYMaxFill = true; - goto echo; - } -yy34: - yych = *++YYCURSOR; - if(yych != 't') goto yy13; - yych = *++YYCURSOR; - if(yych != 's') goto yy13; - yych = *++YYCURSOR; - if(yych != 't') goto yy13; - yych = *++YYCURSOR; - if(yych != 'a') goto yy13; - yych = *++YYCURSOR; - if(yych != 't') goto yy13; - yych = *++YYCURSOR; - if(yych != 'e') goto yy13; - yych = *++YYCURSOR; - if(yych != ':') goto yy13; - yych = *++YYCURSOR; - if(yych != 'r') goto yy13; - yych = *++YYCURSOR; - if(yych != 'e') goto yy13; - yych = *++YYCURSOR; - if(yych != '2') goto yy13; - yych = *++YYCURSOR; - if(yych != 'c') goto yy13; - ++YYCURSOR; - { - tok = pos = cursor; - genGetState(out, topIndent, 0); - ignore_eoc = true; - goto echo; - } -yy47: - yych = *++YYCURSOR; - if(yych != 'n') goto yy13; - yych = *++YYCURSOR; - if(yych != 'o') goto yy13; - yych = *++YYCURSOR; - if(yych != 'r') goto yy13; - yych = *++YYCURSOR; - if(yych != 'e') goto yy13; - yych = *++YYCURSOR; - if(yych != ':') goto yy13; - yych = *++YYCURSOR; - if(yych != 'r') goto yy13; - yych = *++YYCURSOR; - if(yych != 'e') goto yy13; - yych = *++YYCURSOR; - if(yych != '2') goto yy13; - yych = *++YYCURSOR; - if(yych != 'c') goto yy13; - ++YYCURSOR; - { - tok = pos = cursor; - ignore_eoc = true; - goto echo; - } - } -} - -} - - -int Scanner::scan() -{ - char *cursor = cur; - uint depth; - -scan: - tchar = cursor - pos; - tline = cline; - tok = cursor; - if (iscfg == 1) - { - goto config; - } - else if (iscfg == 2) - { - goto value; - } -{ - static const unsigned char yybm[] = { - 112, 112, 112, 112, 112, 112, 112, 112, - 112, 116, 0, 112, 112, 112, 112, 112, - 112, 112, 112, 112, 112, 112, 112, 112, - 112, 112, 112, 112, 112, 112, 112, 112, - 116, 112, 48, 112, 112, 112, 112, 80, - 112, 112, 112, 112, 112, 112, 112, 112, - 248, 248, 248, 248, 248, 248, 248, 248, - 248, 248, 112, 112, 112, 112, 112, 112, - 112, 120, 120, 120, 120, 120, 120, 120, - 120, 120, 120, 120, 120, 120, 120, 120, - 120, 120, 120, 120, 120, 120, 120, 120, - 120, 120, 120, 112, 0, 96, 112, 120, - 112, 120, 120, 120, 120, 120, 120, 120, - 120, 120, 120, 120, 120, 120, 120, 120, - 120, 120, 120, 120, 120, 120, 120, 120, - 120, 120, 120, 112, 112, 112, 112, 112, - 112, 112, 112, 112, 112, 112, 112, 112, - 112, 112, 112, 112, 112, 112, 112, 112, - 112, 112, 112, 112, 112, 112, 112, 112, - 112, 112, 112, 112, 112, 112, 112, 112, - 112, 112, 112, 112, 112, 112, 112, 112, - 112, 112, 112, 112, 112, 112, 112, 112, - 112, 112, 112, 112, 112, 112, 112, 112, - 112, 112, 112, 112, 112, 112, 112, 112, - 112, 112, 112, 112, 112, 112, 112, 112, - 112, 112, 112, 112, 112, 112, 112, 112, - 112, 112, 112, 112, 112, 112, 112, 112, - 112, 112, 112, 112, 112, 112, 112, 112, - 112, 112, 112, 112, 112, 112, 112, 112, - 112, 112, 112, 112, 112, 112, 112, 112, - 112, 112, 112, 112, 112, 112, 112, 112, - 112, 112, 112, 112, 112, 112, 112, 112, - }; - - { - YYCTYPE yych; - unsigned int yyaccept = 0; - if((YYLIMIT - YYCURSOR) < 5) YYFILL(5); - yych = *YYCURSOR; - if(yych <= ':') { - if(yych <= '"') { - if(yych <= 0x0C) { - if(yych <= 0x08) goto yy85; - if(yych <= 0x09) goto yy79; - if(yych <= 0x0A) goto yy81; - goto yy85; - } else { - if(yych <= 0x1F) { - if(yych <= 0x0D) goto yy83; - goto yy85; - } else { - if(yych <= ' ') goto yy79; - if(yych <= '!') goto yy85; - goto yy66; - } - } - } else { - if(yych <= '*') { - if(yych <= '&') goto yy85; - if(yych <= '\'') goto yy68; - if(yych <= ')') goto yy72; - goto yy64; - } else { - if(yych <= '-') { - if(yych <= '+') goto yy73; - goto yy85; - } else { - if(yych <= '.') goto yy77; - if(yych <= '/') goto yy62; - goto yy85; - } - } - } - } else { - if(yych <= '\\') { - if(yych <= '>') { - if(yych == '<') goto yy85; - if(yych <= '=') goto yy72; - goto yy85; - } else { - if(yych <= '@') { - if(yych <= '?') goto yy73; - goto yy85; - } else { - if(yych <= 'Z') goto yy76; - if(yych <= '[') goto yy70; - goto yy72; - } - } - } else { - if(yych <= 'q') { - if(yych == '_') goto yy76; - if(yych <= '`') goto yy85; - goto yy76; - } else { - if(yych <= 'z') { - if(yych <= 'r') goto yy74; - goto yy76; - } else { - if(yych <= '{') goto yy60; - if(yych <= '|') goto yy72; - goto yy85; - } - } - } - } -yy60: - yyaccept = 0; - yych = *(YYMARKER = ++YYCURSOR); - if(yych <= '/') { - if(yych == ',') goto yy126; - } else { - if(yych <= '0') goto yy123; - if(yych <= '9') goto yy124; - } -yy61: - { depth = 1; - goto code; - } -yy62: - ++YYCURSOR; - if((yych = *YYCURSOR) == '*') goto yy121; -yy63: - { RETURN(*tok); } -yy64: - ++YYCURSOR; - if((yych = *YYCURSOR) == '/') goto yy119; -yy65: - { yylval.op = *tok; - RETURN(CLOSE); } -yy66: - yyaccept = 1; - yych = *(YYMARKER = ++YYCURSOR); - if(yych != 0x0A) goto yy115; -yy67: - { fatal("unterminated string constant (missing \")"); } -yy68: - yyaccept = 2; - yych = *(YYMARKER = ++YYCURSOR); - if(yych != 0x0A) goto yy110; -yy69: - { fatal("unterminated string constant (missing ')"); } -yy70: - yyaccept = 3; - yych = *(YYMARKER = ++YYCURSOR); - if(yych == 0x0A) goto yy71; - if(yych == '^') goto yy101; - goto yy100; -yy71: - { fatal("unterminated range (missing ])"); } -yy72: - yych = *++YYCURSOR; - goto yy63; -yy73: - yych = *++YYCURSOR; - goto yy65; -yy74: - ++YYCURSOR; - if((yych = *YYCURSOR) == 'e') goto yy91; - goto yy90; -yy75: - { cur = cursor; - yylval.symbol = Symbol::find(token()); - return ID; } -yy76: - yych = *++YYCURSOR; - goto yy90; -yy77: - ++YYCURSOR; - { cur = cursor; - yylval.regexp = mkDot(); - return RANGE; - } -yy79: - ++YYCURSOR; - yych = *YYCURSOR; - goto yy88; -yy80: - { goto scan; } -yy81: - ++YYCURSOR; -yy82: - { if(cursor == eof) RETURN(0); - pos = cursor; cline++; - goto scan; - } -yy83: - ++YYCURSOR; - if((yych = *YYCURSOR) == 0x0A) goto yy86; -yy84: - { std::ostringstream msg; - msg << "unexpected character: "; - prtChOrHex(msg, *tok); - fatal(msg.str().c_str()); - goto scan; - } -yy85: - yych = *++YYCURSOR; - goto yy84; -yy86: - yych = *++YYCURSOR; - goto yy82; -yy87: - ++YYCURSOR; - if(YYLIMIT == YYCURSOR) YYFILL(1); - yych = *YYCURSOR; -yy88: - if(yybm[0+yych] & 4) { - goto yy87; - } - goto yy80; -yy89: - ++YYCURSOR; - if(YYLIMIT == YYCURSOR) YYFILL(1); - yych = *YYCURSOR; -yy90: - if(yybm[0+yych] & 8) { - goto yy89; - } - goto yy75; -yy91: - yych = *++YYCURSOR; - if(yych != '2') goto yy90; - yych = *++YYCURSOR; - if(yych != 'c') goto yy90; - yyaccept = 4; - yych = *(YYMARKER = ++YYCURSOR); - if(yych != ':') goto yy90; -yy94: - ++YYCURSOR; - if(YYLIMIT == YYCURSOR) YYFILL(1); - yych = *YYCURSOR; - if(yych <= '^') { - if(yych <= '@') goto yy95; - if(yych <= 'Z') goto yy96; - } else { - if(yych == '`') goto yy95; - if(yych <= 'z') goto yy96; - } -yy95: - YYCURSOR = YYMARKER; - if(yyaccept <= 3) { - if(yyaccept <= 1) { - if(yyaccept <= 0) { - goto yy61; - } else { - goto yy67; - } - } else { - if(yyaccept <= 2) { - goto yy69; - } else { - goto yy71; - } - } - } else { - if(yyaccept <= 5) { - if(yyaccept <= 4) { - goto yy75; - } else { - goto yy98; - } - } else { - goto yy127; - } - } -yy96: - yyaccept = 5; - YYMARKER = ++YYCURSOR; - if(YYLIMIT == YYCURSOR) YYFILL(1); - yych = *YYCURSOR; - if(yych <= 'Z') { - if(yych <= '9') { - if(yych >= '0') goto yy96; - } else { - if(yych <= ':') goto yy94; - if(yych >= 'A') goto yy96; - } - } else { - if(yych <= '_') { - if(yych >= '_') goto yy96; - } else { - if(yych <= '`') goto yy98; - if(yych <= 'z') goto yy96; - } - } -yy98: - { cur = cursor; - tok+= 5; /* skip "re2c:" */ - iscfg = 1; - yylval.str = new Str(token()); - return CONFIG; - } -yy99: - ++YYCURSOR; - if(YYLIMIT == YYCURSOR) YYFILL(1); - yych = *YYCURSOR; -yy100: - if(yybm[0+yych] & 16) { - goto yy99; - } - if(yych <= '[') goto yy95; - if(yych <= '\\') goto yy103; - goto yy104; -yy101: - ++YYCURSOR; - if(YYLIMIT == YYCURSOR) YYFILL(1); - yych = *YYCURSOR; - if(yych <= '[') { - if(yych == 0x0A) goto yy95; - goto yy101; - } else { - if(yych <= '\\') goto yy106; - if(yych <= ']') goto yy107; - goto yy101; - } -yy103: - ++YYCURSOR; - if(YYLIMIT == YYCURSOR) YYFILL(1); - yych = *YYCURSOR; - if(yych == 0x0A) goto yy95; - goto yy99; -yy104: - ++YYCURSOR; - { cur = cursor; - yylval.regexp = ranToRE(token()); - return RANGE; } -yy106: - ++YYCURSOR; - if(YYLIMIT == YYCURSOR) YYFILL(1); - yych = *YYCURSOR; - if(yych == 0x0A) goto yy95; - goto yy101; -yy107: - ++YYCURSOR; - { cur = cursor; - yylval.regexp = invToRE(token()); - return RANGE; } -yy109: - ++YYCURSOR; - if(YYLIMIT == YYCURSOR) YYFILL(1); - yych = *YYCURSOR; -yy110: - if(yybm[0+yych] & 32) { - goto yy109; - } - if(yych <= '&') goto yy95; - if(yych <= '\'') goto yy112; - ++YYCURSOR; - if(YYLIMIT == YYCURSOR) YYFILL(1); - yych = *YYCURSOR; - if(yych == 0x0A) goto yy95; - goto yy109; -yy112: - ++YYCURSOR; - { cur = cursor; - yylval.regexp = strToCaseInsensitiveRE(token()); - return STRING; } -yy114: - ++YYCURSOR; - if(YYLIMIT == YYCURSOR) YYFILL(1); - yych = *YYCURSOR; -yy115: - if(yybm[0+yych] & 64) { - goto yy114; - } - if(yych <= '!') goto yy95; - if(yych <= '"') goto yy117; - ++YYCURSOR; - if(YYLIMIT == YYCURSOR) YYFILL(1); - yych = *YYCURSOR; - if(yych == 0x0A) goto yy95; - goto yy114; -yy117: - ++YYCURSOR; - { cur = cursor; - yylval.regexp = strToRE(token()); - return STRING; } -yy119: - ++YYCURSOR; - { tok = cursor; - RETURN(0); } -yy121: - ++YYCURSOR; - { depth = 1; - goto comment; } -yy123: - yych = *++YYCURSOR; - if(yych == ',') goto yy137; - goto yy125; -yy124: - ++YYCURSOR; - if((YYLIMIT - YYCURSOR) < 2) YYFILL(2); - yych = *YYCURSOR; -yy125: - if(yybm[0+yych] & 128) { - goto yy124; - } - if(yych == ',') goto yy130; - if(yych == '}') goto yy128; - goto yy95; -yy126: - ++YYCURSOR; -yy127: - { fatal("illegal closure form, use '{n}', '{n,}', '{n,m}' where n and m are numbers"); } -yy128: - ++YYCURSOR; - { yylval.extop.minsize = atoi((char *)tok+1); - yylval.extop.maxsize = atoi((char *)tok+1); - RETURN(CLOSESIZE); } -yy130: - yyaccept = 6; - yych = *(YYMARKER = ++YYCURSOR); - if(yych <= '/') goto yy127; - if(yych <= '9') goto yy133; - if(yych != '}') goto yy127; - ++YYCURSOR; - { yylval.extop.minsize = atoi((char *)tok+1); - yylval.extop.maxsize = -1; - RETURN(CLOSESIZE); } -yy133: - ++YYCURSOR; - if(YYLIMIT == YYCURSOR) YYFILL(1); - yych = *YYCURSOR; - if(yych <= '/') goto yy95; - if(yych <= '9') goto yy133; - if(yych != '}') goto yy95; - ++YYCURSOR; - { yylval.extop.minsize = atoi((char *)tok+1); - yylval.extop.maxsize = MAX(yylval.extop.minsize,atoi(strchr((char *)tok, ',')+1)); - RETURN(CLOSESIZE); } -yy137: - yyaccept = 6; - yych = *(YYMARKER = ++YYCURSOR); - if(yych <= '/') goto yy127; - if(yych <= '9') goto yy133; - if(yych != '}') goto yy127; - ++YYCURSOR; - { yylval.op = '*'; - RETURN(CLOSE); } - } -} - - -code: -{ - static const unsigned char yybm[] = { - 192, 192, 192, 192, 192, 192, 192, 192, - 192, 192, 0, 192, 192, 192, 192, 192, - 192, 192, 192, 192, 192, 192, 192, 192, - 192, 192, 192, 192, 192, 192, 192, 192, - 192, 192, 64, 192, 192, 192, 192, 128, - 192, 192, 192, 192, 192, 192, 192, 192, - 192, 192, 192, 192, 192, 192, 192, 192, - 192, 192, 192, 192, 192, 192, 192, 192, - 192, 192, 192, 192, 192, 192, 192, 192, - 192, 192, 192, 192, 192, 192, 192, 192, - 192, 192, 192, 192, 192, 192, 192, 192, - 192, 192, 192, 192, 0, 192, 192, 192, - 192, 192, 192, 192, 192, 192, 192, 192, - 192, 192, 192, 192, 192, 192, 192, 192, - 192, 192, 192, 192, 192, 192, 192, 192, - 192, 192, 192, 192, 192, 192, 192, 192, - 192, 192, 192, 192, 192, 192, 192, 192, - 192, 192, 192, 192, 192, 192, 192, 192, - 192, 192, 192, 192, 192, 192, 192, 192, - 192, 192, 192, 192, 192, 192, 192, 192, - 192, 192, 192, 192, 192, 192, 192, 192, - 192, 192, 192, 192, 192, 192, 192, 192, - 192, 192, 192, 192, 192, 192, 192, 192, - 192, 192, 192, 192, 192, 192, 192, 192, - 192, 192, 192, 192, 192, 192, 192, 192, - 192, 192, 192, 192, 192, 192, 192, 192, - 192, 192, 192, 192, 192, 192, 192, 192, - 192, 192, 192, 192, 192, 192, 192, 192, - 192, 192, 192, 192, 192, 192, 192, 192, - 192, 192, 192, 192, 192, 192, 192, 192, - 192, 192, 192, 192, 192, 192, 192, 192, - 192, 192, 192, 192, 192, 192, 192, 192, - }; - - { - YYCTYPE yych; - if((YYLIMIT - YYCURSOR) < 2) YYFILL(2); - yych = *YYCURSOR; - if(yych <= '&') { - if(yych <= 0x0A) { - if(yych <= 0x00) goto yy148; - if(yych <= 0x09) goto yy150; - goto yy146; - } else { - if(yych == '"') goto yy152; - goto yy150; - } - } else { - if(yych <= '{') { - if(yych <= '\'') goto yy153; - if(yych <= 'z') goto yy150; - goto yy144; - } else { - if(yych != '}') goto yy150; - } - } - ++YYCURSOR; - { if(--depth == 0){ - cur = cursor; - yylval.token = new Token(token(), tline); - return CODE; - } - goto code; } -yy144: - ++YYCURSOR; - { ++depth; - goto code; } -yy146: - ++YYCURSOR; - { if(cursor == eof) fatal("missing '}'"); - pos = cursor; cline++; - goto code; - } -yy148: - ++YYCURSOR; - { if(cursor == eof) { - if (depth) fatal("missing '}'"); - RETURN(0); - } - goto code; - } -yy150: - ++YYCURSOR; -yy151: - { goto code; } -yy152: - yych = *(YYMARKER = ++YYCURSOR); - if(yych == 0x0A) goto yy151; - goto yy159; -yy153: - yych = *(YYMARKER = ++YYCURSOR); - if(yych == 0x0A) goto yy151; - goto yy155; -yy154: - ++YYCURSOR; - if(YYLIMIT == YYCURSOR) YYFILL(1); - yych = *YYCURSOR; -yy155: - if(yybm[0+yych] & 64) { - goto yy154; - } - if(yych <= '&') goto yy156; - if(yych <= '\'') goto yy150; - goto yy157; -yy156: - YYCURSOR = YYMARKER; - goto yy151; -yy157: - ++YYCURSOR; - if(YYLIMIT == YYCURSOR) YYFILL(1); - yych = *YYCURSOR; - if(yych == 0x0A) goto yy156; - goto yy154; -yy158: - ++YYCURSOR; - if(YYLIMIT == YYCURSOR) YYFILL(1); - yych = *YYCURSOR; -yy159: - if(yybm[0+yych] & 128) { - goto yy158; - } - if(yych <= '!') goto yy156; - if(yych <= '"') goto yy150; - ++YYCURSOR; - if(YYLIMIT == YYCURSOR) YYFILL(1); - yych = *YYCURSOR; - if(yych == 0x0A) goto yy156; - goto yy158; - } -} - - -comment: -{ - - { - YYCTYPE yych; - if((YYLIMIT - YYCURSOR) < 2) YYFILL(2); - yych = *YYCURSOR; - if(yych <= ')') { - if(yych == 0x0A) goto yy166; - goto yy168; - } else { - if(yych <= '*') goto yy163; - if(yych == '/') goto yy165; - goto yy168; - } -yy163: - ++YYCURSOR; - if((yych = *YYCURSOR) == '/') goto yy171; -yy164: - { if(cursor == eof) RETURN(0); - goto comment; } -yy165: - yych = *++YYCURSOR; - if(yych == '*') goto yy169; - goto yy164; -yy166: - ++YYCURSOR; - { if(cursor == eof) RETURN(0); - tok = pos = cursor; cline++; - goto comment; - } -yy168: - yych = *++YYCURSOR; - goto yy164; -yy169: - ++YYCURSOR; - { ++depth; - fatal("ambiguous /* found"); - goto comment; } -yy171: - ++YYCURSOR; - { if(--depth == 0) - goto scan; - else - goto comment; } - } -} - - -config: -{ - static const unsigned char yybm[] = { - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 128, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 128, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - }; - - { - YYCTYPE yych; - if((YYLIMIT - YYCURSOR) < 2) YYFILL(2); - yych = *YYCURSOR; - if(yych <= 0x1F) { - if(yych != 0x09) goto yy179; - } else { - if(yych <= ' ') goto yy175; - if(yych == '=') goto yy177; - goto yy179; - } -yy175: - ++YYCURSOR; - yych = *YYCURSOR; - goto yy184; -yy176: - { goto config; } -yy177: - ++YYCURSOR; - yych = *YYCURSOR; - goto yy182; -yy178: - { iscfg = 2; - cur = cursor; - RETURN('='); - } -yy179: - ++YYCURSOR; - { fatal("missing '='"); } -yy181: - ++YYCURSOR; - if(YYLIMIT == YYCURSOR) YYFILL(1); - yych = *YYCURSOR; -yy182: - if(yybm[0+yych] & 128) { - goto yy181; - } - goto yy178; -yy183: - ++YYCURSOR; - if(YYLIMIT == YYCURSOR) YYFILL(1); - yych = *YYCURSOR; -yy184: - if(yych == 0x09) goto yy183; - if(yych == ' ') goto yy183; - goto yy176; - } -} - - -value: -{ - static const unsigned char yybm[] = { - 248, 248, 248, 248, 248, 248, 248, 248, - 248, 192, 0, 248, 248, 192, 248, 248, - 248, 248, 248, 248, 248, 248, 248, 248, - 248, 248, 248, 248, 248, 248, 248, 248, - 192, 248, 104, 248, 248, 248, 248, 152, - 248, 248, 248, 248, 248, 248, 248, 248, - 252, 252, 252, 252, 252, 252, 252, 252, - 252, 252, 248, 192, 248, 248, 248, 248, - 248, 248, 248, 248, 248, 248, 248, 248, - 248, 248, 248, 248, 248, 248, 248, 248, - 248, 248, 248, 248, 248, 248, 248, 248, - 248, 248, 248, 248, 8, 248, 248, 248, - 248, 248, 248, 248, 248, 248, 248, 248, - 248, 248, 248, 248, 248, 248, 248, 248, - 248, 248, 248, 248, 248, 248, 248, 248, - 248, 248, 248, 248, 248, 248, 248, 248, - 248, 248, 248, 248, 248, 248, 248, 248, - 248, 248, 248, 248, 248, 248, 248, 248, - 248, 248, 248, 248, 248, 248, 248, 248, - 248, 248, 248, 248, 248, 248, 248, 248, - 248, 248, 248, 248, 248, 248, 248, 248, - 248, 248, 248, 248, 248, 248, 248, 248, - 248, 248, 248, 248, 248, 248, 248, 248, - 248, 248, 248, 248, 248, 248, 248, 248, - 248, 248, 248, 248, 248, 248, 248, 248, - 248, 248, 248, 248, 248, 248, 248, 248, - 248, 248, 248, 248, 248, 248, 248, 248, - 248, 248, 248, 248, 248, 248, 248, 248, - 248, 248, 248, 248, 248, 248, 248, 248, - 248, 248, 248, 248, 248, 248, 248, 248, - 248, 248, 248, 248, 248, 248, 248, 248, - 248, 248, 248, 248, 248, 248, 248, 248, - }; - - { - YYCTYPE yych; - if((YYLIMIT - YYCURSOR) < 2) YYFILL(2); - yych = *YYCURSOR; - if(yych <= '&') { - if(yych <= 0x0D) { - if(yych <= 0x08) goto yy193; - if(yych <= 0x0A) goto yy187; - if(yych <= 0x0C) goto yy193; - } else { - if(yych <= ' ') { - if(yych <= 0x1F) goto yy193; - } else { - if(yych == '"') goto yy195; - goto yy193; - } - } - } else { - if(yych <= '/') { - if(yych <= '\'') goto yy197; - if(yych == '-') goto yy190; - goto yy193; - } else { - if(yych <= '9') { - if(yych <= '0') goto yy188; - goto yy191; - } else { - if(yych != ';') goto yy193; - } - } - } -yy187: - { cur = cursor; - yylval.str = new Str(token()); - iscfg = 0; - return VALUE; - } -yy188: - ++YYCURSOR; - if(yybm[0+(yych = *YYCURSOR)] & 8) { - goto yy193; - } -yy189: - { cur = cursor; - yylval.number = atoi(token().to_string().c_str()); - iscfg = 0; - return NUMBER; - } -yy190: - yych = *++YYCURSOR; - if(yych <= '0') goto yy194; - if(yych >= ':') goto yy194; -yy191: - ++YYCURSOR; - if(YYLIMIT == YYCURSOR) YYFILL(1); - yych = *YYCURSOR; - if(yybm[0+yych] & 4) { - goto yy191; - } - if(yych <= 0x0D) { - if(yych <= 0x08) goto yy193; - if(yych <= 0x0A) goto yy189; - if(yych >= 0x0D) goto yy189; - } else { - if(yych <= ' ') { - if(yych >= ' ') goto yy189; - } else { - if(yych == ';') goto yy189; - } - } -yy193: - ++YYCURSOR; - if(YYLIMIT == YYCURSOR) YYFILL(1); - yych = *YYCURSOR; -yy194: - if(yybm[0+yych] & 8) { - goto yy193; - } - goto yy187; -yy195: - YYMARKER = ++YYCURSOR; - if(YYLIMIT == YYCURSOR) YYFILL(1); - yych = *YYCURSOR; - if(yybm[0+yych] & 16) { - goto yy195; - } - if(yych <= '!') { - if(yych == 0x0A) goto yy187; - goto yy205; - } else { - if(yych <= '"') goto yy193; - if(yych <= '[') goto yy205; - goto yy207; - } -yy197: - YYMARKER = ++YYCURSOR; - if(YYLIMIT == YYCURSOR) YYFILL(1); - yych = *YYCURSOR; - if(yybm[0+yych] & 32) { - goto yy197; - } - if(yych <= '&') { - if(yych == 0x0A) goto yy187; - } else { - if(yych <= '\'') goto yy193; - if(yych >= '\\') goto yy202; - } -yy199: - ++YYCURSOR; - if(YYLIMIT == YYCURSOR) YYFILL(1); - yych = *YYCURSOR; - if(yybm[0+yych] & 64) { - goto yy199; - } - if(yych <= '&') goto yy201; - if(yych <= '\'') goto yy203; - goto yy204; -yy201: - YYCURSOR = YYMARKER; - goto yy187; -yy202: - YYMARKER = ++YYCURSOR; - if(YYLIMIT == YYCURSOR) YYFILL(1); - yych = *YYCURSOR; - if(yych <= 0x0D) { - if(yych <= 0x09) { - if(yych <= 0x08) goto yy197; - goto yy199; - } else { - if(yych <= 0x0A) goto yy187; - if(yych <= 0x0C) goto yy197; - goto yy199; - } - } else { - if(yych <= ' ') { - if(yych <= 0x1F) goto yy197; - goto yy199; - } else { - if(yych == ';') goto yy199; - goto yy197; - } - } -yy203: - yych = *++YYCURSOR; - goto yy187; -yy204: - ++YYCURSOR; - if(YYLIMIT == YYCURSOR) YYFILL(1); - yych = *YYCURSOR; - if(yych == 0x0A) goto yy201; - goto yy199; -yy205: - ++YYCURSOR; - if(YYLIMIT == YYCURSOR) YYFILL(1); - yych = *YYCURSOR; - if(yybm[0+yych] & 128) { - goto yy205; - } - if(yych <= '!') goto yy201; - if(yych <= '"') goto yy203; - goto yy208; -yy207: - YYMARKER = ++YYCURSOR; - if(YYLIMIT == YYCURSOR) YYFILL(1); - yych = *YYCURSOR; - if(yych <= 0x0D) { - if(yych <= 0x09) { - if(yych <= 0x08) goto yy195; - goto yy205; - } else { - if(yych <= 0x0A) goto yy187; - if(yych <= 0x0C) goto yy195; - goto yy205; - } - } else { - if(yych <= ' ') { - if(yych <= 0x1F) goto yy195; - goto yy205; - } else { - if(yych == ';') goto yy205; - goto yy195; - } - } -yy208: - ++YYCURSOR; - if(YYLIMIT == YYCURSOR) YYFILL(1); - yych = *YYCURSOR; - if(yych == 0x0A) goto yy201; - goto yy205; - } -} - -} - -void Scanner::fatal(uint ofs, const char *msg) const -{ - out.flush(); -#ifdef _MSC_VER - std::cerr << filename << "(" << tline << "): error : " - << "column " << (tchar + ofs + 1) << ": " - << msg << std::endl; -#else - std::cerr << "re2c: error: " - << "line " << tline << ", column " << (tchar + ofs + 1) << ": " - << msg << std::endl; -#endif - exit(1); -} - -Scanner::~Scanner() -{ - if (bot) - { - delete [] bot; - } -} - -} // end namespace re2c - diff --git a/tools/re2c/scanner.h b/tools/re2c/scanner.h deleted file mode 100644 index ddff0c13b..000000000 --- a/tools/re2c/scanner.h +++ /dev/null @@ -1,76 +0,0 @@ -/* $Id: scanner.h,v 1.17 2006/02/25 12:57:50 helly Exp $ */ -#ifndef _scanner_h -#define _scanner_h - -#include -#include -#include "token.h" -#include "re.h" -#include "globals.h" - -namespace re2c -{ - -class Scanner: - public line_number -{ -private: - std::istream& in; - std::ostream& out; - char *bot, *tok, *ptr, *cur, *pos, *lim, *top, *eof; - uint tchar, tline, cline, iscfg; - const char *filename; - -private: - char *fill(char*); - Scanner(const Scanner&); //unimplemented - Scanner& operator=(const Scanner&); //unimplemented - -public: - Scanner(const char*, std::istream&, std::ostream&); - ~Scanner(); - - int echo(); - int scan(); - - void fatal(const char*) const; - void fatal(uint, const char*) const; - - void config(const Str&, int); - void config(const Str&, const Str&); - - SubStr token() const; - virtual uint get_line() const; - uint xlat(uint c) const; - - uint unescape(SubStr &s) const; - std::string& unescape(SubStr& str_in, std::string& str_out) const; - - Range * getRange(SubStr &s) const; - RegExp * matchChar(uint c) const; - RegExp * strToName(SubStr s) const; - RegExp * strToRE(SubStr s) const; - RegExp * strToCaseInsensitiveRE(SubStr s) const; - RegExp * ranToRE(SubStr s) const; - RegExp * invToRE(SubStr s) const; - RegExp * mkDot() const; -}; - -inline void Scanner::fatal(const char *msg) const -{ - fatal(0, msg); -} - -inline SubStr Scanner::token() const -{ - return SubStr(tok, cur - tok); -} - -inline uint Scanner::xlat(uint c) const -{ - return re2c::wFlag ? c : re2c::xlat[c & 0xFF]; -} - -} // end namespace re2c - -#endif diff --git a/tools/re2c/scanner.re b/tools/re2c/scanner.re deleted file mode 100644 index fde59be72..000000000 --- a/tools/re2c/scanner.re +++ /dev/null @@ -1,381 +0,0 @@ -/* $Id: scanner.re 663 2007-04-01 11:22:15Z helly $ */ -#include -#include -#include -#include -#include "scanner.h" -#include "parser.h" -#include "y.tab.h" -#include "globals.h" -#include "dfa.h" - -extern YYSTYPE yylval; - -#ifndef MAX -#define MAX(a,b) (((a)>(b))?(a):(b)) -#endif - -#define BSIZE 8192 - -#define YYCTYPE unsigned char -#define YYCURSOR cursor -#define YYLIMIT lim -#define YYMARKER ptr -#define YYFILL(n) {cursor = fill(cursor);} - -#define RETURN(i) {cur = cursor; return i;} - -namespace re2c -{ - -Scanner::Scanner(const char *fn, std::istream& i, std::ostream& o) - : in(i) - , out(o) - , bot(NULL), tok(NULL), ptr(NULL), cur(NULL), pos(NULL), lim(NULL) - , top(NULL), eof(NULL), tchar(0), tline(0), cline(1), iscfg(0), filename(fn) -{ - ; -} - -char *Scanner::fill(char *cursor) -{ - if(!eof) - { - uint cnt = tok - bot; - if(cnt) - { - memcpy(bot, tok, lim - tok); - tok = bot; - ptr -= cnt; - cursor -= cnt; - pos -= cnt; - lim -= cnt; - } - if((top - lim) < BSIZE) - { - char *buf = new char[(lim - bot) + BSIZE]; - memcpy(buf, tok, lim - tok); - tok = buf; - ptr = &buf[ptr - bot]; - cursor = &buf[cursor - bot]; - pos = &buf[pos - bot]; - lim = &buf[lim - bot]; - top = &lim[BSIZE]; - delete [] bot; - bot = buf; - } - in.read(lim, BSIZE); - if ((cnt = in.gcount()) != BSIZE ) - { - eof = &lim[cnt]; *eof++ = '\0'; - } - lim += cnt; - } - return cursor; -} - -/*!re2c -zero = "\000"; -any = [\000-\377]; -dot = any \ [\n]; -esc = dot \ [\\]; -istring = "[" "^" ((esc \ [\]]) | "\\" dot)* "]" ; -cstring = "[" ((esc \ [\]]) | "\\" dot)* "]" ; -dstring = "\"" ((esc \ ["] ) | "\\" dot)* "\""; -sstring = "'" ((esc \ ['] ) | "\\" dot)* "'" ; -letter = [a-zA-Z]; -digit = [0-9]; -number = "0" | ("-"? [1-9] digit*); -name = (letter|"_") (letter|digit|"_")*; -cname = ":" name; -space = [ \t]; -eol = ("\r\n" | "\n"); -config = "re2c" cname+; -value = [^\r\n; \t]* | dstring | sstring; -*/ - -int Scanner::echo() -{ - char *cursor = cur; - bool ignore_eoc = false; - int ignore_cnt = 0; - - if (eof && cursor == eof) // Catch EOF - { - return 0; - } - - tok = cursor; -echo: -/*!re2c - "/*!re2c" { - if (bUsedYYMaxFill && bSinglePass) { - fatal("found scanner block after YYMAXFILL declaration"); - } - out.write((const char*)(tok), (const char*)(&cursor[-7]) - (const char*)(tok)); - tok = cursor; - RETURN(1); - } - "/*!max:re2c" { - if (bUsedYYMaxFill) { - fatal("cannot generate YYMAXFILL twice"); - } - out << "#define YYMAXFILL " << maxFill << std::endl; - tok = pos = cursor; - ignore_eoc = true; - bUsedYYMaxFill = true; - goto echo; - } - "/*!getstate:re2c" { - tok = pos = cursor; - genGetState(out, topIndent, 0); - ignore_eoc = true; - goto echo; - } - "/*!ignore:re2c" { - tok = pos = cursor; - ignore_eoc = true; - goto echo; - } - "*" "/" "\r"? "\n" { - cline++; - if (ignore_eoc) { - if (ignore_cnt) { - out << sourceFileInfo; - } - ignore_eoc = false; - ignore_cnt = 0; - } else { - out.write((const char*)(tok), (const char*)(cursor) - (const char*)(tok)); - } - tok = pos = cursor; - goto echo; - } - "*" "/" { - if (ignore_eoc) { - if (ignore_cnt) { - out << "\n" << sourceFileInfo; - } - ignore_eoc = false; - ignore_cnt = 0; - } else { - out.write((const char*)(tok), (const char*)(cursor) - (const char*)(tok)); - } - tok = pos = cursor; - goto echo; - } - "\n" { - if (ignore_eoc) { - ignore_cnt++; - } else { - out.write((const char*)(tok), (const char*)(cursor) - (const char*)(tok)); - } - tok = pos = cursor; cline++; - goto echo; - } - zero { - if (!ignore_eoc) { - out.write((const char*)(tok), (const char*)(cursor) - (const char*)(tok) - 1); // -1 so we don't write out the \0 - } - if(cursor == eof) { - RETURN(0); - } - } - any { - goto echo; - } -*/ -} - - -int Scanner::scan() -{ - char *cursor = cur; - uint depth; - -scan: - tchar = cursor - pos; - tline = cline; - tok = cursor; - if (iscfg == 1) - { - goto config; - } - else if (iscfg == 2) - { - goto value; - } -/*!re2c - "{" { depth = 1; - goto code; - } - "/*" { depth = 1; - goto comment; } - - "*/" { tok = cursor; - RETURN(0); } - - dstring { cur = cursor; - yylval.regexp = strToRE(token()); - return STRING; } - - sstring { cur = cursor; - yylval.regexp = strToCaseInsensitiveRE(token()); - return STRING; } - - "\"" { fatal("unterminated string constant (missing \")"); } - "'" { fatal("unterminated string constant (missing ')"); } - - istring { cur = cursor; - yylval.regexp = invToRE(token()); - return RANGE; } - - cstring { cur = cursor; - yylval.regexp = ranToRE(token()); - return RANGE; } - - "[" { fatal("unterminated range (missing ])"); } - - [()|=;/\\] { RETURN(*tok); } - - [*+?] { yylval.op = *tok; - RETURN(CLOSE); } - - "{0,}" { yylval.op = '*'; - RETURN(CLOSE); } - - "{" [0-9]+ "}" { yylval.extop.minsize = atoi((char *)tok+1); - yylval.extop.maxsize = atoi((char *)tok+1); - RETURN(CLOSESIZE); } - - "{" [0-9]+ "," [0-9]+ "}" { yylval.extop.minsize = atoi((char *)tok+1); - yylval.extop.maxsize = MAX(yylval.extop.minsize,atoi(strchr((char *)tok, ',')+1)); - RETURN(CLOSESIZE); } - - "{" [0-9]+ ",}" { yylval.extop.minsize = atoi((char *)tok+1); - yylval.extop.maxsize = -1; - RETURN(CLOSESIZE); } - - "{" [0-9]* "," { fatal("illegal closure form, use '{n}', '{n,}', '{n,m}' where n and m are numbers"); } - - config { cur = cursor; - tok+= 5; /* skip "re2c:" */ - iscfg = 1; - yylval.str = new Str(token()); - return CONFIG; - } - - name { cur = cursor; - yylval.symbol = Symbol::find(token()); - return ID; } - - "." { cur = cursor; - yylval.regexp = mkDot(); - return RANGE; - } - - space+ { goto scan; } - - eol { if(cursor == eof) RETURN(0); - pos = cursor; cline++; - goto scan; - } - - any { std::ostringstream msg; - msg << "unexpected character: "; - prtChOrHex(msg, *tok); - fatal(msg.str().c_str()); - goto scan; - } -*/ - -code: -/*!re2c - "}" { if(--depth == 0){ - cur = cursor; - yylval.token = new Token(token(), tline); - return CODE; - } - goto code; } - "{" { ++depth; - goto code; } - "\n" { if(cursor == eof) fatal("missing '}'"); - pos = cursor; cline++; - goto code; - } - zero { if(cursor == eof) { - if (depth) fatal("missing '}'"); - RETURN(0); - } - goto code; - } - dstring | sstring | any { goto code; } -*/ - -comment: -/*!re2c - "*/" { if(--depth == 0) - goto scan; - else - goto comment; } - "/*" { ++depth; - fatal("ambiguous /* found"); - goto comment; } - "\n" { if(cursor == eof) RETURN(0); - tok = pos = cursor; cline++; - goto comment; - } - any { if(cursor == eof) RETURN(0); - goto comment; } -*/ - -config: -/*!re2c - space+ { goto config; } - "=" space* { iscfg = 2; - cur = cursor; - RETURN('='); - } - any { fatal("missing '='"); } -*/ - -value: -/*!re2c - number { cur = cursor; - yylval.number = atoi(token().to_string().c_str()); - iscfg = 0; - return NUMBER; - } - value { cur = cursor; - yylval.str = new Str(token()); - iscfg = 0; - return VALUE; - } -*/ -} - -void Scanner::fatal(uint ofs, const char *msg) const -{ - out.flush(); -#ifdef _MSC_VER - std::cerr << filename << "(" << tline << "): error : " - << "column " << (tchar + ofs + 1) << ": " - << msg << std::endl; -#else - std::cerr << "re2c: error: " - << "line " << tline << ", column " << (tchar + ofs + 1) << ": " - << msg << std::endl; -#endif - exit(1); -} - -Scanner::~Scanner() -{ - if (bot) - { - delete [] bot; - } -} - -} // end namespace re2c - diff --git a/tools/re2c/src/codegen/bitmap.cc b/tools/re2c/src/codegen/bitmap.cc new file mode 100644 index 000000000..ca23423c4 --- /dev/null +++ b/tools/re2c/src/codegen/bitmap.cc @@ -0,0 +1,168 @@ +#include // min +#include // memset + +#include "src/codegen/bitmap.h" +#include "src/codegen/go.h" +#include "src/codegen/output.h" +#include "src/conf/opt.h" +#include "src/globals.h" + +namespace re2c +{ + +BitMap *BitMap::first = NULL; + +BitMap::BitMap(const Go *g, const State *x) + : go(g) + , on(x) + , next(first) + , i(0) + , m(0) +{ + first = this; +} + +BitMap::~BitMap() +{ + delete next; +} + +const BitMap *BitMap::find(const Go *g, const State *x) +{ + for (const BitMap *b = first; b; b = b->next) + { + if (matches(b->go->span, b->go->nSpans, b->on, g->span, g->nSpans, x)) + { + return b; + } + } + + return new BitMap(g, x); +} + +const BitMap *BitMap::find(const State *x) +{ + for (const BitMap *b = first; b; b = b->next) + { + if (b->on == x) + { + return b; + } + } + + return NULL; +} + +static void doGen(const Go *g, const State *s, uint32_t *bm, uint32_t f, uint32_t m) +{ + Span *b = g->span, *e = &b[g->nSpans]; + uint32_t lb = 0; + + for (; b < e; ++b) + { + if (b->to == s) + { + for (; lb < b->ub && lb < 256; ++lb) + { + bm[lb-f] |= m; + } + } + + lb = b->ub; + } +} + +void BitMap::gen(OutputFile & o, uint32_t ind, uint32_t lb, uint32_t ub) +{ + if (first && bUsedYYBitmap) + { + o.wind(ind).ws("static const unsigned char ").wstring(opts->yybm).ws("[] = {"); + + uint32_t c = 1, n = ub - lb; + const BitMap *cb = first; + + while((cb = cb->next) != NULL) { + ++c; + } + BitMap *b = first; + + uint32_t *bm = new uint32_t[n]; + + for (uint32_t i = 0, t = 1; b; i += n, t += 8) + { + memset(bm, 0, n * sizeof(uint32_t)); + + for (uint32_t m = 0x80; b && m; m >>= 1) + { + b->i = i; + b->m = m; + doGen(b->go, b->on, bm, lb, m); + b = const_cast(b->next); + } + + if (c > 8) + { + o.ws("\n").wind(ind+1).ws("/* table ").wu32(t).ws(" .. ").wu32(std::min(c, t+7)).ws(": ").wu32(i).ws(" */"); + } + + for (uint32_t j = 0; j < n; ++j) + { + if (j % 8 == 0) + { + o.ws("\n").wind(ind+1); + } + + if (opts->yybmHexTable) + { + o.wu32_hex(bm[j]); + } + else + { + o.wu32_width(bm[j], 3); + } + o.ws(", "); + } + } + + o.ws("\n").wind(ind).ws("};\n"); + + delete[] bm; + } +} + +// All spans in b1 that lead to s1 are pairwise equal to that in b2 leading to s2 +bool matches(const Span * b1, uint32_t n1, const State * s1, const Span * b2, uint32_t n2, const State * s2) +{ + const Span * e1 = &b1[n1]; + uint32_t lb1 = 0; + const Span * e2 = &b2[n2]; + uint32_t lb2 = 0; + + for (;;) + { + for (; b1 < e1 && b1->to != s1; ++b1) + { + lb1 = b1->ub; + } + for (; b2 < e2 && b2->to != s2; ++b2) + { + lb2 = b2->ub; + } + if (b1 == e1) + { + return b2 == e2; + } + if (b2 == e2) + { + return false; + } + if (lb1 != lb2 || b1->ub != b2->ub) + { + return false; + } + ++b1; + ++b2; + } +} + +} // end namespace re2c diff --git a/tools/re2c/src/codegen/bitmap.h b/tools/re2c/src/codegen/bitmap.h new file mode 100644 index 000000000..3c0cc1be6 --- /dev/null +++ b/tools/re2c/src/codegen/bitmap.h @@ -0,0 +1,45 @@ +#ifndef _RE2C_CODEGEN_BITMAP_ +#define _RE2C_CODEGEN_BITMAP_ + +#include "src/util/c99_stdint.h" + +#include "src/util/forbid_copy.h" + +namespace re2c +{ + +struct Go; +struct Span; +class State; +class OutputFile; + +class BitMap +{ +public: + static BitMap *first; + + const Go *go; + const State *on; + const BitMap *next; + uint32_t i; + uint32_t m; + +public: + static const BitMap *find(const Go*, const State*); + static const BitMap *find(const State*); + static void gen(OutputFile &, uint32_t ind, uint32_t, uint32_t); + BitMap(const Go*, const State*); + ~BitMap(); + + FORBID_COPY (BitMap); +}; + +bool matches(const Span * b1, uint32_t n1, const State * s1, const Span * b2, uint32_t n2, const State * s2); + +#ifdef _MSC_VER +# pragma warning(disable: 4355) /* 'this' : used in base member initializer list */ +#endif + +} // end namespace re2c + +#endif // _RE2C_CODEGEN_BITMAP_ diff --git a/tools/re2c/src/codegen/emit.h b/tools/re2c/src/codegen/emit.h new file mode 100644 index 000000000..5d91b14e9 --- /dev/null +++ b/tools/re2c/src/codegen/emit.h @@ -0,0 +1,43 @@ +#ifndef _RE2C_CODEGEN_EMIT_ +#define _RE2C_CODEGEN_EMIT_ + +#include "src/codegen/output.h" +#include "src/ir/adfa/adfa.h" + +namespace re2c { + +typedef std::vector RegExpIndices; + +void emit_action + ( const Action & action + , OutputFile & o + , uint32_t ind + , bool & readCh + , const State * const s + , const std::string & condName + , const Skeleton * skeleton + , const std::set & used_labels + , bool save_yyaccept + ); + +// helpers +void genGoTo (OutputFile & o, uint32_t ind, const State * from, const State * to, bool & readCh); + +template std::string replaceParam (std::string str, const std::string & param, const _Ty & value) +{ + if (!param.empty ()) + { + std::ostringstream strValue; + strValue << value; + std::string::size_type pos; + while((pos = str.find(param)) != std::string::npos) + { + str.replace(pos, param.length(), strValue.str()); + } + } + return str; +} + +} // namespace re2c + +#endif // _RE2C_CODEGEN_EMIT_ diff --git a/tools/re2c/src/codegen/emit_action.cc b/tools/re2c/src/codegen/emit_action.cc new file mode 100644 index 000000000..d3146daef --- /dev/null +++ b/tools/re2c/src/codegen/emit_action.cc @@ -0,0 +1,388 @@ +#include "src/util/c99_stdint.h" +#include +#include +#include + +#include "src/codegen/emit.h" +#include "src/codegen/input_api.h" +#include "src/codegen/output.h" +#include "src/conf/opt.h" +#include "src/globals.h" +#include "src/ir/adfa/action.h" +#include "src/ir/adfa/adfa.h" +#include "src/ir/regexp/regexp.h" +#include "src/ir/regexp/regexp_rule.h" +#include "src/ir/skeleton/skeleton.h" +#include "src/parse/code.h" +#include "src/parse/loc.h" + +namespace re2c +{ + +class label_t; + +static void need (OutputFile & o, uint32_t ind, bool & readCh, size_t n, bool bSetMarker); +static void emit_match (OutputFile & o, uint32_t ind, bool & readCh, const State * const s); +static void emit_initial (OutputFile & o, uint32_t ind, bool & readCh, const State * const s, const Initial & init, const std::set & used_labels); +static void emit_save (OutputFile & o, uint32_t ind, bool & readCh, const State * const s, uint32_t save, bool save_yyaccept); +static void emit_accept_binary (OutputFile & o, uint32_t ind, bool & readCh, const State * const s, const accept_t & accept, size_t l, size_t r); +static void emit_accept (OutputFile & o, uint32_t ind, bool & readCh, const State * const s, const accept_t & accept); +static void emit_rule (OutputFile & o, uint32_t ind, const State * const s, const RuleOp * const rule, const std::string & condName, const Skeleton * skeleton); +static void genYYFill (OutputFile & o, size_t need); +static void genSetCondition (OutputFile & o, uint32_t ind, const std::string & newcond); +static void genSetState (OutputFile & o, uint32_t ind, uint32_t fillIndex); + +void emit_action + ( const Action & action + , OutputFile & o + , uint32_t ind + , bool & readCh + , const State * const s + , const std::string & condName + , const Skeleton * skeleton + , const std::set & used_labels + , bool save_yyaccept + ) +{ + switch (action.type) + { + case Action::MATCH: + emit_match (o, ind, readCh, s); + break; + case Action::INITIAL: + emit_initial (o, ind, readCh, s, * action.info.initial, used_labels); + break; + case Action::SAVE: + emit_save (o, ind, readCh, s, action.info.save, save_yyaccept); + break; + case Action::MOVE: + break; + case Action::ACCEPT: + emit_accept (o, ind, readCh, s, * action.info.accepts); + break; + case Action::RULE: + emit_rule (o, ind, s, action.info.rule, condName, skeleton); + break; + } + if (s->isPreCtxt && opts->target != opt_t::DOT) + { + o.wstring(opts->input_api.stmt_backupctx (ind)); + } +} + +void emit_match (OutputFile & o, uint32_t ind, bool & readCh, const State * const s) +{ + if (opts->target == opt_t::DOT) + { + return; + } + + const bool read_ahead = s + && s->next + && s->next->action.type != Action::RULE; + if (s->fill != 0) + { + o.wstring(opts->input_api.stmt_skip (ind)); + } + else if (!read_ahead) + { + /* do not read next char if match */ + o.wstring(opts->input_api.stmt_skip (ind)); + readCh = true; + } + else + { + o.wstring(opts->input_api.stmt_skip_peek (ind)); + readCh = false; + } + + if (s->fill != 0) + { + need(o, ind, readCh, s->fill, false); + } +} + +void emit_initial (OutputFile & o, uint32_t ind, bool & readCh, const State * const s, const Initial & initial, const std::set & used_labels) +{ + if (opts->target == opt_t::DOT) + { + return; + } + + if (used_labels.count(s->label)) + { + if (s->fill != 0) + { + o.wstring(opts->input_api.stmt_skip (ind)); + } + else + { + o.wstring(opts->input_api.stmt_skip_peek (ind)); + } + } + + if (used_labels.count(initial.label)) + { + o.wstring(opts->labelPrefix).wlabel(initial.label).ws(":\n"); + } + + if (opts->dFlag) + { + o.wind(ind).wstring(opts->yydebug).ws("(").wlabel(initial.label).ws(", *").wstring(opts->yycursor).ws(");\n"); + } + + if (s->fill != 0) + { + need(o, ind, readCh, s->fill, initial.setMarker); + } + else + { + if (initial.setMarker) + { + o.wstring(opts->input_api.stmt_backup (ind)); + } + readCh = false; + } +} + +void emit_save (OutputFile & o, uint32_t ind, bool & readCh, const State * const s, uint32_t save, bool save_yyaccept) +{ + if (opts->target == opt_t::DOT) + { + return; + } + + if (save_yyaccept) + { + o.wind(ind).wstring(opts->yyaccept).ws(" = ").wu32(save).ws(";\n"); + } + + if (s->fill != 0) + { + o.wstring(opts->input_api.stmt_skip_backup (ind)); + need(o, ind, readCh, s->fill, false); + } + else + { + o.wstring(opts->input_api.stmt_skip_backup_peek (ind)); + readCh = false; + } +} + +void emit_accept_binary (OutputFile & o, uint32_t ind, bool & readCh, const State * const s, const accept_t & accepts, size_t l, size_t r) +{ + if (l < r) + { + const size_t m = (l + r) >> 1; + o.wind(ind).ws("if (").wstring(opts->yyaccept).ws(r == l+1 ? " == " : " <= ").wu64(m).ws(") {\n"); + emit_accept_binary (o, ++ind, readCh, s, accepts, l, m); + o.wind(--ind).ws("} else {\n"); + emit_accept_binary (o, ++ind, readCh, s, accepts, m + 1, r); + o.wind(--ind).ws("}\n"); + } + else + { + genGoTo(o, ind, s, accepts[l], readCh); + } +} + +void emit_accept (OutputFile & o, uint32_t ind, bool & readCh, const State * const s, const accept_t & accepts) +{ + const size_t accepts_size = accepts.size (); + if (accepts_size > 0) + { + if (opts->target != opt_t::DOT) + { + o.wstring(opts->input_api.stmt_restore (ind)); + } + + if (readCh) // shouldn't be necessary, but might become at some point + { + o.wstring(opts->input_api.stmt_peek (ind)); + readCh = false; + } + + if (accepts_size > 1) + { + if (opts->gFlag && accepts_size >= opts->cGotoThreshold) + { + o.wind(ind++).ws("{\n"); + o.wind(ind++).ws("static void *").wstring(opts->yytarget).ws("[").wu64(accepts_size).ws("] = {\n"); + for (uint32_t i = 0; i < accepts_size; ++i) + { + o.wind(ind).ws("&&").wstring(opts->labelPrefix).wlabel(accepts[i]->label).ws(",\n"); + } + o.wind(--ind).ws("};\n"); + o.wind(ind).ws("goto *").wstring(opts->yytarget).ws("[").wstring(opts->yyaccept).ws("];\n"); + o.wind(--ind).ws("}\n"); + } + else if (opts->sFlag || (accepts_size == 2 && opts->target != opt_t::DOT)) + { + emit_accept_binary (o, ind, readCh, s, accepts, 0, accepts_size - 1); + } + else if (opts->target == opt_t::DOT) + { + for (uint32_t i = 0; i < accepts_size; ++i) + { + o.wlabel(s->label).ws(" -> ").wlabel(accepts[i]->label); + o.ws(" [label=\"yyaccept=").wu32(i).ws("\"]\n"); + } + } + else + { + o.wind(ind).ws("switch (").wstring(opts->yyaccept).ws(") {\n"); + for (uint32_t i = 0; i < accepts_size - 1; ++i) + { + o.wind(ind).ws("case ").wu32(i).ws(": \t"); + genGoTo(o, 0, s, accepts[i], readCh); + } + o.wind(ind).ws("default:\t"); + genGoTo(o, 0, s, accepts[accepts_size - 1], readCh); + o.wind(ind).ws("}\n"); + } + } + else + { + // no need to write if statement here since there is only case 0. + genGoTo(o, ind, s, accepts[0], readCh); + } + } +} + +void emit_rule (OutputFile & o, uint32_t ind, const State * const s, const RuleOp * const rule, const std::string & condName, const Skeleton * skeleton) +{ + if (opts->target == opt_t::DOT) + { + o.wlabel(s->label); + if (rule->code) + { + o.ws(" [label=\"").wstring(rule->code->loc.filename).ws(":").wu32(rule->code->loc.line).ws("\"]"); + } + o.ws("\n"); + return; + } + + uint32_t back = rule->ctx->fixedLength(); + if (back != 0u && opts->target != opt_t::DOT) + { + o.wstring(opts->input_api.stmt_restorectx (ind)); + } + + if (opts->target == opt_t::SKELETON) + { + skeleton->emit_action (o, ind, rule->rank); + } + else + { + if (!rule->newcond.empty () && condName != rule->newcond) + { + genSetCondition(o, ind, rule->newcond); + } + + if (rule->code) + { + if (!yySetupRule.empty ()) + { + o.wind(ind).wstring(yySetupRule).ws("\n"); + } + o.wline_info(rule->code->loc.line, rule->code->loc.filename.c_str ()) + .wind(ind).wstring(rule->code->text).ws("\n") + .wdelay_line_info (); + } + else if (!rule->newcond.empty ()) + { + o.wind(ind).wstring(replaceParam(opts->condGoto, opts->condGotoParam, opts->condPrefix + rule->newcond)).ws("\n"); + } + } +} + +void need (OutputFile & o, uint32_t ind, bool & readCh, size_t n, bool bSetMarker) +{ + if (opts->target == opt_t::DOT) + { + return; + } + + uint32_t fillIndex = last_fill_index; + + if (opts->fFlag) + { + last_fill_index++; + genSetState (o, ind, fillIndex); + } + + if (opts->fill_use && n > 0) + { + o.wind(ind); + if (n == 1) + { + if (opts->fill_check) + { + o.ws("if (").wstring(opts->input_api.expr_lessthan_one ()).ws(") "); + } + genYYFill(o, n); + } + else + { + if (opts->fill_check) + { + o.ws("if (").wstring(opts->input_api.expr_lessthan (n)).ws(") "); + } + genYYFill(o, n); + } + } + + if (opts->fFlag) + { + o.wstring(opts->yyfilllabel).wu32(fillIndex).ws(":\n"); + } + + if (n > 0) + { + if (bSetMarker) + { + o.wstring(opts->input_api.stmt_backup_peek (ind)); + } + else + { + o.wstring(opts->input_api.stmt_peek (ind)); + } + readCh = false; + } +} + +void genYYFill (OutputFile & o, size_t need) +{ + o.wstring(replaceParam (opts->fill, opts->fill_arg, need)); + if (!opts->fill_naked) + { + if (opts->fill_arg_use) + { + o.ws("(").wu64(need).ws(")"); + } + o.ws(";"); + } + o.ws("\n"); +} + +void genSetCondition(OutputFile & o, uint32_t ind, const std::string& newcond) +{ + o.wind(ind).wstring(replaceParam (opts->cond_set, opts->cond_set_arg, opts->condEnumPrefix + newcond)); + if (!opts->cond_set_naked) + { + o.ws("(").wstring(opts->condEnumPrefix).wstring(newcond).ws(");"); + } + o.ws("\n"); +} + +void genSetState(OutputFile & o, uint32_t ind, uint32_t fillIndex) +{ + o.wind(ind).wstring(replaceParam (opts->state_set, opts->state_set_arg, fillIndex)); + if (!opts->state_set_naked) + { + o.ws("(").wu32(fillIndex).ws(");"); + } + o.ws("\n"); +} + +} // namespace re2c diff --git a/tools/re2c/src/codegen/emit_dfa.cc b/tools/re2c/src/codegen/emit_dfa.cc new file mode 100644 index 000000000..2ec642647 --- /dev/null +++ b/tools/re2c/src/codegen/emit_dfa.cc @@ -0,0 +1,348 @@ +#include "src/util/c99_stdint.h" +#include +#include +#include +#include +#include + +#include "src/codegen/bitmap.h" +#include "src/codegen/emit.h" +#include "src/codegen/go.h" +#include "src/codegen/input_api.h" +#include "src/codegen/label.h" +#include "src/codegen/output.h" +#include "src/conf/opt.h" +#include "src/globals.h" +#include "src/ir/adfa/action.h" +#include "src/ir/adfa/adfa.h" +#include "src/ir/skeleton/skeleton.h" +#include "src/util/counter.h" + +namespace re2c +{ + +static std::string genGetCondition (); +static void genCondGotoSub (OutputFile & o, uint32_t ind, const std::vector & condnames, uint32_t cMin, uint32_t cMax); +static void genCondTable (OutputFile & o, uint32_t ind, const std::vector & condnames); +static void genCondGoto (OutputFile & o, uint32_t ind, const std::vector & condnames); +static void emit_state (OutputFile & o, uint32_t ind, const State * s, bool used_label); + +std::string genGetCondition() +{ + return opts->cond_get_naked + ? opts->cond_get + : opts->cond_get + "()"; +} + +void genGoTo(OutputFile & o, uint32_t ind, const State *from, const State *to, bool & readCh) +{ + if (opts->target == opt_t::DOT) + { + o.wlabel(from->label).ws(" -> ").wlabel(to->label).ws("\n"); + return; + } + + if (readCh && from->next != to) + { + o.wstring(opts->input_api.stmt_peek (ind)); + readCh = false; + } + + o.wind(ind).ws("goto ").wstring(opts->labelPrefix).wlabel(to->label).ws(";\n"); +} + +void emit_state (OutputFile & o, uint32_t ind, const State * s, bool used_label) +{ + if (opts->target != opt_t::DOT) + { + if (used_label) + { + o.wstring(opts->labelPrefix).wlabel(s->label).ws(":\n"); + } + if (opts->dFlag && (s->action.type != Action::INITIAL)) + { + o.wind(ind).wstring(opts->yydebug).ws("(").wlabel(s->label).ws(", ").wstring(opts->input_api.expr_peek ()).ws(");\n"); + } + } +} + +void DFA::count_used_labels (std::set & used, label_t start, label_t initial, bool force_start) const +{ + // In '-f' mode, default state is always state 0 + if (opts->fFlag) + { + used.insert (label_t::first ()); + } + if (force_start) + { + used.insert (start); + } + for (State * s = head; s; s = s->next) + { + s->go.used_labels (used); + } + for (uint32_t i = 0; i < accepts.size (); ++i) + { + used.insert (accepts[i]->label); + } + // must go last: it needs the set of used labels + if (used.count (head->label)) + { + used.insert (initial); + } +} + +void DFA::emit_body (OutputFile & o, uint32_t& ind, const std::set & used_labels, label_t initial) const +{ + // If DFA has transitions to initial state, then initial state + // has a piece of code that advances input position. Wee must + // skip it when entering DFA. + if (used_labels.count(head->label)) + { + o.wind(ind).ws("goto ").wstring(opts->labelPrefix).wlabel(initial).ws(";\n"); + } + + const bool save_yyaccept = accepts.size () > 1; + for (State * s = head; s; s = s->next) + { + bool readCh = false; + emit_state (o, ind, s, used_labels.count (s->label)); + emit_action (s->action, o, ind, readCh, s, cond, skeleton, used_labels, save_yyaccept); + s->go.emit(o, ind, readCh); + } +} + +void DFA::emit(Output & output, uint32_t& ind, bool isLastCond, bool& bPrologBrace) +{ + OutputFile & o = output.source; + + bool bProlog = (!opts->cFlag || !bWroteCondCheck); + + // start_label points to the beginning of current re2c block + // (prior to condition dispatch in '-c' mode) + // it can forced by configuration 're2c:startlabel = ;' + label_t start_label = o.label_counter.next (); + // initial_label points to the beginning of DFA + // in '-c' mode this is NOT equal to start_label + label_t initial_label = bProlog && opts->cFlag + ? o.label_counter.next () + : start_label; + for (State * s = head; s; s = s->next) + { + s->label = o.label_counter.next (); + } + std::set used_labels; + count_used_labels (used_labels, start_label, initial_label, o.get_force_start_label ()); + + head->action.set_initial (initial_label, head->action.type == Action::SAVE); + + skeleton->warn_undefined_control_flow (); + skeleton->warn_unreachable_rules (); + skeleton->warn_match_empty (); + + if (opts->target == opt_t::SKELETON) + { + if (output.skeletons.insert (name).second) + { + skeleton->emit_data (o.file_name); + skeleton->emit_start (o, max_fill, need_backup, need_backupctx, need_accept); + uint32_t i = 2; + emit_body (o, i, used_labels, initial_label); + skeleton->emit_end (o, need_backup, need_backupctx); + } + } + else + { + // Generate prolog + if (bProlog) + { + o.ws("\n").wdelay_line_info (); + if (opts->target == opt_t::DOT) + { + bPrologBrace = true; + o.ws("digraph re2c {\n"); + } + else if ((!opts->fFlag && o.get_used_yyaccept ()) + || (!opts->fFlag && opts->bEmitYYCh) + || (opts->bFlag && !opts->cFlag && BitMap::first) + || (opts->cFlag && !bWroteCondCheck && opts->gFlag) + || (opts->fFlag && !bWroteGetState && opts->gFlag) + ) + { + bPrologBrace = true; + o.wind(ind++).ws("{\n"); + } + else if (ind == 0) + { + ind = 1; + } + if (!opts->fFlag && opts->target != opt_t::DOT) + { + if (opts->bEmitYYCh) + { + o.wind(ind).wstring(opts->yyctype).ws(" ").wstring(opts->yych).ws(";\n"); + } + o.wdelay_yyaccept_init (ind); + } + else + { + o.ws("\n"); + } + } + if (opts->bFlag && !opts->cFlag && BitMap::first) + { + BitMap::gen(o, ind, lbChar, ubChar <= 256 ? ubChar : 256); + } + if (bProlog) + { + if (opts->cFlag && !bWroteCondCheck && opts->gFlag) + { + genCondTable(o, ind, output.types); + } + o.wdelay_state_goto (ind); + if (opts->cFlag && opts->target != opt_t::DOT) + { + if (used_labels.count(start_label)) + { + o.wstring(opts->labelPrefix).wlabel(start_label).ws(":\n"); + } + } + o.wuser_start_label (); + if (opts->cFlag && !bWroteCondCheck) + { + genCondGoto(o, ind, output.types); + } + } + if (opts->cFlag && !cond.empty()) + { + if (opts->condDivider.length()) + { + o.wstring(replaceParam(opts->condDivider, opts->condDividerParam, cond)).ws("\n"); + } + if (opts->target == opt_t::DOT) + { + o.wstring(cond).ws(" -> ").wlabel(head->label).ws("\n"); + } + else + { + o.wstring(opts->condPrefix).wstring(cond).ws(":\n"); + } + } + if (opts->cFlag && opts->bFlag && BitMap::first) + { + o.wind(ind++).ws("{\n"); + BitMap::gen(o, ind, lbChar, ubChar <= 256 ? ubChar : 256); + } + // Generate code + emit_body (o, ind, used_labels, initial_label); + if (opts->cFlag && opts->bFlag && BitMap::first) + { + o.wind(--ind).ws("}\n"); + } + // Generate epilog + if ((!opts->cFlag || isLastCond) && bPrologBrace) + { + o.wind(--ind).ws("}\n"); + } + } + + // Cleanup + if (BitMap::first) + { + delete BitMap::first; + BitMap::first = NULL; + } +} + +void genCondTable(OutputFile & o, uint32_t ind, const std::vector & condnames) +{ + const size_t conds = condnames.size (); + o.wind(ind++).ws("static void *").wstring(opts->yyctable).ws("[").wu64(conds).ws("] = {\n"); + for (size_t i = 0; i < conds; ++i) + { + o.wind(ind).ws("&&").wstring(opts->condPrefix).wstring(condnames[i]).ws(",\n"); + } + o.wind(--ind).ws("};\n"); +} + +void genCondGotoSub(OutputFile & o, uint32_t ind, const std::vector & condnames, uint32_t cMin, uint32_t cMax) +{ + if (cMin == cMax) + { + o.wind(ind).ws("goto ").wstring(opts->condPrefix).wstring(condnames[cMin]).ws(";\n"); + } + else + { + uint32_t cMid = cMin + ((cMax - cMin + 1) / 2); + + o.wind(ind).ws("if (").wstring(genGetCondition()).ws(" < ").wu32(cMid).ws(") {\n"); + genCondGotoSub(o, ind + 1, condnames, cMin, cMid - 1); + o.wind(ind).ws("} else {\n"); + genCondGotoSub(o, ind + 1, condnames, cMid, cMax); + o.wind(ind).ws("}\n"); + } +} + +/* + * note [condition order] + * + * In theory re2c makes no guarantee about the order of conditions in + * the generated lexer. Users should define condition type 'YYCONDTYPE' + * and use values of this type with 'YYGETCONDITION' and 'YYSETCONDITION'. + * This way code is independent of internal re2c condition numbering. + * + * However, it is possible to manually hardcode condition numbers and make + * re2c generate condition dispatch without explicit use of condition names + * (nested 'if' statements with '-b' or computed 'goto' table with '-g'). + * This code is syntactically valid (compiles), but unsafe: + * - change of re2c options may break compilation + * - change of internal re2c condition numbering may break runtime + * + * re2c has to preserve the existing numbering scheme. + * + * re2c warns about implicit assumptions about condition order, unless: + * - condition type is defined with 'types:re2c' or '-t, --type-header' + * - dispatch is independent of condition order: either it uses + * explicit condition names or there's only one condition and + * dispatch shrinks to unconditional jump + */ +void genCondGoto(OutputFile & o, uint32_t ind, const std::vector & condnames) +{ + const size_t conds = condnames.size (); + if (opts->target == opt_t::DOT) + { + o.warn_condition_order = false; // see note [condition order] + for (size_t i = 0; i < conds; ++i) + { + const std::string cond = condnames[i]; + o.ws("0 -> ").wstring(cond).ws(" [label=\"state=").wstring(cond).ws("\"]\n"); + } + } + else if (opts->gFlag) + { + o.wind(ind).ws("goto *").wstring(opts->yyctable).ws("[").wstring(genGetCondition()).ws("];\n"); + } + else if (opts->sFlag) + { + if (conds == 1) + { + o.warn_condition_order = false; // see note [condition order] + } + genCondGotoSub(o, ind, condnames, 0, static_cast (conds) - 1); + } + else + { + o.warn_condition_order = false; // see note [condition order] + o.wind(ind).ws("switch (").wstring(genGetCondition()).ws(") {\n"); + for (size_t i = 0; i < conds; ++i) + { + const std::string & cond = condnames[i]; + o.wind(ind).ws("case ").wstring(opts->condEnumPrefix).wstring(cond).ws(": goto ").wstring(opts->condPrefix).wstring(cond).ws(";\n"); + } + o.wind(ind).ws("}\n"); + } + o.wdelay_warn_condition_order (); + bWroteCondCheck = true; +} + +} // end namespace re2c diff --git a/tools/re2c/src/codegen/go.h b/tools/re2c/src/codegen/go.h new file mode 100644 index 000000000..57c29e8bc --- /dev/null +++ b/tools/re2c/src/codegen/go.h @@ -0,0 +1,216 @@ +#ifndef _RE2C_CODEGEN_GO_ +#define _RE2C_CODEGEN_GO_ + +#include +#include +#include + +#include "src/codegen/output.h" +#include "src/util/c99_stdint.h" +#include "src/util/forbid_copy.h" + +namespace re2c +{ + +class BitMap; +class State; +struct If; + +struct Span +{ + uint32_t ub; + State * to; + + FORBID_COPY (Span); +}; + +struct Case +{ + std::vector > ranges; + const State * to; + void emit (OutputFile & o, uint32_t ind); + + inline Case () + : ranges () + , to (NULL) + {} + + FORBID_COPY (Case); +}; + +struct Cases +{ + const State * def; + Case * cases; + uint32_t cases_size; + void add (uint32_t lb, uint32_t ub, State * to); + Cases (const Span * s, uint32_t n); + ~Cases (); + void emit (OutputFile & o, uint32_t ind, bool & readCh); + void used_labels (std::set & used); + + FORBID_COPY (Cases); +}; + +struct Cond +{ + std::string compare; + uint32_t value; + Cond (const std::string & cmp, uint32_t val); +}; + +struct Binary +{ + Cond * cond; + If * thn; + If * els; + Binary (const Span * s, uint32_t n, const State * next); + ~Binary (); + void emit (OutputFile & o, uint32_t ind, bool & readCh); + void used_labels (std::set & used); + + FORBID_COPY (Binary); +}; + +struct Linear +{ + std::vector > branches; + Linear (const Span * s, uint32_t n, const State * next); + ~Linear (); + void emit (OutputFile & o, uint32_t ind, bool & readCh); + void used_labels (std::set & used); +}; + +struct If +{ + enum type_t + { + BINARY, + LINEAR + } type; + union + { + Binary * binary; + Linear * linear; + } info; + If (type_t t, const Span * sp, uint32_t nsp, const State * next); + ~If (); + void emit (OutputFile & o, uint32_t ind, bool & readCh); + void used_labels (std::set & used); +}; + +struct SwitchIf +{ + enum + { + SWITCH, + IF + } type; + union + { + Cases * cases; + If * ifs; + } info; + SwitchIf (const Span * sp, uint32_t nsp, const State * next); + ~SwitchIf (); + void emit (OutputFile & o, uint32_t ind, bool & readCh); + void used_labels (std::set & used); +}; + +struct GoBitmap +{ + const BitMap * bitmap; + const State * bitmap_state; + SwitchIf * hgo; + SwitchIf * lgo; + GoBitmap (const Span * span, uint32_t nSpans, const Span * hspan, uint32_t hSpans, const BitMap * bm, const State * bm_state, const State * next); + ~GoBitmap (); + void emit (OutputFile & o, uint32_t ind, bool & readCh); + void used_labels (std::set & used); + + FORBID_COPY (GoBitmap); +}; + +struct CpgotoTable +{ + static const uint32_t TABLE_SIZE; + const State ** table; + CpgotoTable (const Span * span, uint32_t nSpans); + ~CpgotoTable (); + void emit (OutputFile & o, uint32_t ind); + void used_labels (std::set & used); + +private: + label_t max_label () const; + + FORBID_COPY (CpgotoTable); +}; + +struct Cpgoto +{ + SwitchIf * hgo; + CpgotoTable * table; + Cpgoto (const Span * span, uint32_t nSpans, const Span * hspan, uint32_t hSpans, const State * next); + ~Cpgoto (); + void emit (OutputFile & o, uint32_t ind, bool & readCh); + void used_labels (std::set & used); + + FORBID_COPY (Cpgoto); +}; + +struct Dot +{ + const State * from; + Cases * cases; + Dot (const Span * sp, uint32_t nsp, const State * from); + ~Dot (); + void emit (OutputFile & o); + + FORBID_COPY (Dot); +}; + +struct Go +{ + uint32_t nSpans; // number of spans + Span * span; + enum + { + EMPTY, + SWITCH_IF, + BITMAP, + CPGOTO, + DOT + } type; + union + { + SwitchIf * switchif; + GoBitmap * bitmap; + Cpgoto * cpgoto; + Dot * dot; + } info; + + Go (); + ~Go (); + void init (const State * from); + void emit (OutputFile & o, uint32_t ind, bool & readCh); + void used_labels (std::set & used); + + Go (const Go & g) + : nSpans (g.nSpans) + , span (g.span) + , type (g.type) + , info (g.info) + {} + Go & operator = (const Go & g) + { + nSpans = g.nSpans; + span = g.span; + type = g.type; + info = g.info; + return * this; + } +}; + +} // namespace re2c + +#endif // _RE2C_CODEGEN_GO_ diff --git a/tools/re2c/src/codegen/go_construct.cc b/tools/re2c/src/codegen/go_construct.cc new file mode 100644 index 000000000..e680475f0 --- /dev/null +++ b/tools/re2c/src/codegen/go_construct.cc @@ -0,0 +1,284 @@ +#include +#include "src/util/c99_stdint.h" +#include +#include +#include + +#include "src/codegen/bitmap.h" +#include "src/codegen/go.h" +#include "src/conf/opt.h" +#include "src/globals.h" +#include "src/ir/adfa/adfa.h" +#include "src/util/allocate.h" + +namespace re2c +{ + +static uint32_t unmap (Span * new_span, const Span * old_span, uint32_t old_nspans, const State * x); + +Cases::Cases (const Span * span, uint32_t span_size) + : def (span_size == 0 ? NULL : span[span_size - 1].to) + , cases (new Case[span_size]) + , cases_size (0) +{ + for (uint32_t i = 0, lb = 0; i < span_size; ++ i) + { + add (lb, span[i].ub, span[i].to); + lb = span[i].ub; + } +} + +void Cases::add (uint32_t lb, uint32_t ub, State * to) +{ + for (uint32_t i = 0; i < cases_size; ++i) + { + if (cases[i].to == to) + { + cases[i].ranges.push_back (std::make_pair (lb, ub)); + return; + } + } + cases[cases_size].ranges.push_back (std::make_pair (lb, ub)); + cases[cases_size].to = to; + ++cases_size; +} + +Cond::Cond (const std::string & cmp, uint32_t val) + : compare (cmp) + , value (val) +{} + +Binary::Binary (const Span * s, uint32_t n, const State * next) + : cond (NULL) + , thn (NULL) + , els (NULL) +{ + const uint32_t l = n / 2; + const uint32_t h = n - l; + cond = new Cond ("<=", s[l - 1].ub - 1); + thn = new If (l > 4 ? If::BINARY : If::LINEAR, &s[0], l, next); + els = new If (h > 4 ? If::BINARY : If::LINEAR, &s[l], h, next); +} + +Linear::Linear (const Span * s, uint32_t n, const State * next) + : branches () +{ + for (;;) + { + const State *bg = s[0].to; + while (n >= 3 && s[2].to == bg && (s[1].ub - s[0].ub) == 1) + { + if (s[1].to == next && n == 3) + { + branches.push_back (std::make_pair (new Cond ("!=", s[0].ub), bg)); + return ; + } + else + { + branches.push_back (std::make_pair (new Cond ("==", s[0].ub), s[1].to)); + } + n -= 2; + s += 2; + } + if (n == 1) + { + if (next == NULL || s[0].to != next) + { + branches.push_back (std::make_pair (static_cast (NULL), s[0].to)); + } + return; + } + else if (n == 2 && bg == next) + { + branches.push_back (std::make_pair (new Cond (">=", s[0].ub), s[1].to)); + return; + } + else + { + branches.push_back (std::make_pair (new Cond ("<=", s[0].ub - 1), bg)); + n -= 1; + s += 1; + } + } +} + +If::If (type_t t, const Span * sp, uint32_t nsp, const State * next) + : type (t) + , info () +{ + switch (type) + { + case BINARY: + info.binary = new Binary (sp, nsp, next); + break; + case LINEAR: + info.linear = new Linear (sp, nsp, next); + break; + } +} + +SwitchIf::SwitchIf (const Span * sp, uint32_t nsp, const State * next) + : type (IF) + , info () +{ + if ((!opts->sFlag && nsp > 2) || (nsp > 8 && (sp[nsp - 2].ub - sp[0].ub <= 3 * (nsp - 2)))) + { + type = SWITCH; + info.cases = new Cases (sp, nsp); + } + else if (nsp > 5) + { + info.ifs = new If (If::BINARY, sp, nsp, next); + } + else + { + info.ifs = new If (If::LINEAR, sp, nsp, next); + } +} + +GoBitmap::GoBitmap (const Span * span, uint32_t nSpans, const Span * hspan, uint32_t hSpans, const BitMap * bm, const State * bm_state, const State * next) + : bitmap (bm) + , bitmap_state (bm_state) + , hgo (NULL) + , lgo (NULL) +{ + Span * bspan = allocate (nSpans); + uint32_t bSpans = unmap (bspan, span, nSpans, bm_state); + lgo = bSpans == 0 + ? NULL + : new SwitchIf (bspan, bSpans, next); + // if there are any low spans, then next state for high spans + // must be NULL to trigger explicit goto generation in linear 'if' + hgo = hSpans == 0 + ? NULL + : new SwitchIf (hspan, hSpans, lgo ? NULL : next); + operator delete (bspan); +} + +const uint32_t CpgotoTable::TABLE_SIZE = 0x100; + +CpgotoTable::CpgotoTable (const Span * span, uint32_t nSpans) + : table (new const State * [TABLE_SIZE]) +{ + uint32_t c = 0; + for (uint32_t i = 0; i < nSpans; ++i) + { + for(; c < span[i].ub && c < TABLE_SIZE; ++c) + { + table[c] = span[i].to; + } + } +} + +Cpgoto::Cpgoto (const Span * span, uint32_t nSpans, const Span * hspan, uint32_t hSpans, const State * next) + : hgo (hSpans == 0 ? NULL : new SwitchIf (hspan, hSpans, next)) + , table (new CpgotoTable (span, nSpans)) +{} + +Dot::Dot (const Span * sp, uint32_t nsp, const State * s) + : from (s) + , cases (new Cases (sp, nsp)) +{} + +Go::Go () + : nSpans (0) + , span (NULL) + , type (EMPTY) + , info () +{} + +void Go::init (const State * from) +{ + if (nSpans == 0) + { + return; + } + + // initialize high (wide) spans + uint32_t hSpans = 0; + const Span * hspan = NULL; + for (uint32_t i = 0; i < nSpans; ++i) + { + if (span[i].ub > 0x100) + { + hspan = &span[i]; + hSpans = nSpans - i; + break; + } + } + + // initialize bitmaps + uint32_t nBitmaps = 0; + const BitMap * bitmap = NULL; + const State * bitmap_state = NULL; + for (uint32_t i = 0; i < nSpans; ++i) + { + if (span[i].to->isBase) + { + const BitMap *b = BitMap::find (span[i].to); + if (b && matches(b->go->span, b->go->nSpans, b->on, span, nSpans, span[i].to)) + { + if (bitmap == NULL) + { + bitmap = b; + bitmap_state = span[i].to; + } + nBitmaps++; + } + } + } + + const uint32_t dSpans = nSpans - hSpans - nBitmaps; + if (opts->target == opt_t::DOT) + { + type = DOT; + info.dot = new Dot (span, nSpans, from); + } + else if (opts->gFlag && (dSpans >= opts->cGotoThreshold)) + { + type = CPGOTO; + info.cpgoto = new Cpgoto (span, nSpans, hspan, hSpans, from->next); + } + else if (opts->bFlag && (nBitmaps > 0)) + { + type = BITMAP; + info.bitmap = new GoBitmap (span, nSpans, hspan, hSpans, bitmap, bitmap_state, from->next); + bUsedYYBitmap = true; + } + else + { + type = SWITCH_IF; + info.switchif = new SwitchIf (span, nSpans, from->next); + } +} + +/* + * Find all spans, that map to the given state. For each of them, + * find upper adjacent span, that maps to another state (if such + * span exists, otherwize try lower one). + * If input contains single span that maps to the given state, + * then output contains 0 spans. + */ +uint32_t unmap (Span * new_span, const Span * old_span, uint32_t old_nspans, const State * x) +{ + uint32_t new_nspans = 0; + for (uint32_t i = 0; i < old_nspans; ++i) + { + if (old_span[i].to != x) + { + if (new_nspans > 0 && new_span[new_nspans - 1].to == old_span[i].to) + new_span[new_nspans - 1].ub = old_span[i].ub; + else + { + new_span[new_nspans].to = old_span[i].to; + new_span[new_nspans].ub = old_span[i].ub; + ++new_nspans; + } + } + } + if (new_nspans > 0) + new_span[new_nspans - 1].ub = old_span[old_nspans - 1].ub; + return new_nspans; +} + +} // namespace re2c diff --git a/tools/re2c/src/codegen/go_destruct.cc b/tools/re2c/src/codegen/go_destruct.cc new file mode 100644 index 000000000..0160d48f7 --- /dev/null +++ b/tools/re2c/src/codegen/go_destruct.cc @@ -0,0 +1,99 @@ +#include "src/util/c99_stdint.h" +#include +#include + +#include "src/codegen/go.h" + +namespace re2c +{ + +Cases::~Cases () +{ + delete [] cases; +} + +Binary::~Binary () +{ + delete cond; + delete thn; + delete els; +} + +Linear::~Linear () +{ + for (uint32_t i = 0; i < branches.size (); ++i) + { + delete branches[i].first; + } +} + +If::~If () +{ + switch (type) + { + case BINARY: + delete info.binary; + break; + case LINEAR: + delete info.linear; + break; + } +} + +SwitchIf::~SwitchIf () +{ + switch (type) + { + case SWITCH: + delete info.cases; + break; + case IF: + delete info.ifs; + break; + } +} + +GoBitmap::~GoBitmap () +{ + delete hgo; + delete lgo; +} + +CpgotoTable::~CpgotoTable () +{ + delete [] table; +} + +Cpgoto::~Cpgoto () +{ + delete hgo; + delete table; +} + +Dot::~Dot () +{ + delete cases; +} + +Go::~Go () +{ + switch (type) + { + case EMPTY: + break; + case SWITCH_IF: + delete info.switchif; + break; + case BITMAP: + delete info.bitmap; + break; + case CPGOTO: + delete info.cpgoto; + break; + case DOT: + delete info.dot; + break; + } +} + +} // namespace re2c diff --git a/tools/re2c/src/codegen/go_emit.cc b/tools/re2c/src/codegen/go_emit.cc new file mode 100644 index 000000000..e970eee56 --- /dev/null +++ b/tools/re2c/src/codegen/go_emit.cc @@ -0,0 +1,271 @@ +#include +#include "src/util/c99_stdint.h" +#include +#include +#include + +#include "src/codegen/bitmap.h" +#include "src/codegen/go.h" +#include "src/codegen/input_api.h" +#include "src/codegen/label.h" +#include "src/codegen/output.h" +#include "src/codegen/print.h" +#include "src/conf/opt.h" +#include "src/globals.h" +#include "src/ir/adfa/adfa.h" +#include "src/ir/regexp/encoding/enc.h" + +namespace re2c +{ + +static void output_if (OutputFile & o, uint32_t ind, bool & readCh, const std::string & compare, uint32_t value); +static void output_goto (OutputFile & o, uint32_t ind, bool & readCh, label_t to); +static std::string output_yych (bool & readCh); +static std::string output_hgo (OutputFile & o, uint32_t ind, bool & readCh, SwitchIf * hgo); + +std::string output_yych (bool & readCh) +{ + if (readCh) + { + readCh = false; + return "(" + opts->input_api.expr_peek_save () + ")"; + } + else + { + return opts->yych; + } +} + +void output_if (OutputFile & o, uint32_t ind, bool & readCh, const std::string & compare, uint32_t value) +{ + o.wind(ind).ws("if (").wstring(output_yych (readCh)).ws(" ").wstring(compare).ws(" ").wc_hex (value).ws(") "); +} + +void output_goto (OutputFile & o, uint32_t ind, bool & readCh, label_t to) +{ + if (readCh) + { + o.wstring(opts->input_api.stmt_peek (ind)); + readCh = false; + } + o.wind(ind).ws("goto ").wstring(opts->labelPrefix).wlabel(to).ws(";\n"); +} + +std::string output_hgo (OutputFile & o, uint32_t ind, bool & readCh, SwitchIf * hgo) +{ + std::string yych = output_yych (readCh); + if (hgo != NULL) + { + o.wind(ind).ws("if (").wstring(yych).ws(" & ~0xFF) {\n"); + hgo->emit (o, ind + 1, readCh); + o.wind(ind).ws("} else "); + yych = opts->yych; + } + else + { + o.wind(ind); + } + return yych; +} + +void Case::emit (OutputFile & o, uint32_t ind) +{ + for (uint32_t i = 0; i < ranges.size (); ++i) + { + for (uint32_t b = ranges[i].first; b < ranges[i].second; ++b) + { + o.wind(ind).ws("case ").wc_hex (b).ws(":"); + if (opts->dFlag && opts->encoding.type () == Enc::EBCDIC) + { + const uint32_t c = opts->encoding.decodeUnsafe (b); + if (is_print (c)) + o.ws(" /* ").wc(static_cast (c)).ws(" */"); + } + bool last_case = i == ranges.size () - 1 && b == ranges[i].second - 1; + if (!last_case) + { + o.ws("\n"); + } + } + } +} + +void Cases::emit (OutputFile & o, uint32_t ind, bool & readCh) +{ + o.wind(ind).ws("switch (").wstring(output_yych (readCh)).ws(") {\n"); + for (uint32_t i = 0; i < cases_size; ++i) + { + if (cases[i].to != def) + { + cases[i].emit (o, ind); + output_goto (o, 1, readCh, cases[i].to->label); + } + } + o.wind(ind).ws("default:"); + output_goto (o, 1, readCh, def->label); + o.wind(ind).ws("}\n"); +} + +void Binary::emit (OutputFile & o, uint32_t ind, bool & readCh) +{ + output_if (o, ind, readCh, cond->compare, cond->value); + o.ws("{\n"); + thn->emit (o, ind + 1, readCh); + o.wind(ind).ws("} else {\n"); + els->emit (o, ind + 1, readCh); + o.wind(ind).ws("}\n"); +} + +void Linear::emit (OutputFile & o, uint32_t ind, bool & readCh) +{ + for (uint32_t i = 0; i < branches.size (); ++i) + { + if (branches[i].first != NULL) + { + output_if (o, ind, readCh, branches[i].first->compare, branches[i].first->value); + output_goto (o, 0, readCh, branches[i].second->label); + } + else + { + output_goto (o, ind, readCh, branches[i].second->label); + } + } +} + +void If::emit (OutputFile & o, uint32_t ind, bool & readCh) +{ + switch (type) + { + case BINARY: + info.binary->emit (o, ind, readCh); + break; + case LINEAR: + info.linear->emit (o, ind, readCh); + break; + } +} + +void SwitchIf::emit (OutputFile & o, uint32_t ind, bool & readCh) +{ + switch (type) + { + case SWITCH: + info.cases->emit (o, ind, readCh); + break; + case IF: + info.ifs->emit (o, ind, readCh); + break; + } +} + +void GoBitmap::emit (OutputFile & o, uint32_t ind, bool & readCh) +{ + std::string yych = output_hgo (o, ind, readCh, hgo); + o.ws("if (").wstring(opts->yybm).ws("[").wu32(bitmap->i).ws("+").wstring(yych).ws("] & "); + if (opts->yybmHexTable) + { + o.wu32_hex(bitmap->m); + } + else + { + o.wu32(bitmap->m); + } + o.ws(") {\n"); + output_goto (o, ind + 1, readCh, bitmap_state->label); + o.wind(ind).ws("}\n"); + if (lgo != NULL) + { + lgo->emit (o, ind, readCh); + } +} + +label_t CpgotoTable::max_label () const +{ + label_t max = label_t::first (); + for (uint32_t i = 0; i < TABLE_SIZE; ++i) + { + if (max < table[i]->label) + { + max = table[i]->label; + }; + } + return max; +} + +void CpgotoTable::emit (OutputFile & o, uint32_t ind) +{ + o.wind(ind).ws("static void *").wstring(opts->yytarget).ws("[256] = {\n"); + o.wind(++ind); + const uint32_t max_digits = max_label ().width (); + for (uint32_t i = 0; i < TABLE_SIZE; ++i) + { + o.ws("&&").wstring(opts->labelPrefix).wlabel(table[i]->label); + if (i == TABLE_SIZE - 1) + { + o.ws("\n"); + } + else if (i % 8 == 7) + { + o.ws(",\n").wind(ind); + } + else + { + const uint32_t padding = max_digits - table[i]->label.width () + 1; + o.ws(",").wstring(std::string (padding, ' ')); + } + } + o.wind(--ind).ws("};\n"); +} + +void Cpgoto::emit (OutputFile & o, uint32_t ind, bool & readCh) +{ + std::string yych = output_hgo (o, ind, readCh, hgo); + o.ws("{\n"); + table->emit (o, ++ind); + o.wind(ind).ws("goto *").wstring(opts->yytarget).ws("[").wstring(yych).ws("];\n"); + o.wind(--ind).ws("}\n"); +} + +void Dot::emit (OutputFile & o) +{ + const uint32_t n = cases->cases_size; + if (n == 1) + { + o.wlabel(from->label).ws(" -> ").wlabel(cases->cases[0].to->label).ws("\n"); + } + else + { + for (uint32_t i = 0; i < n; ++i) + { + o.wlabel(from->label).ws(" -> ").wlabel(cases->cases[i].to->label).ws(" [label=\""); + for (uint32_t j = 0; j < cases->cases[i].ranges.size (); ++j) + { + o.wrange(cases->cases[i].ranges[j].first, cases->cases[i].ranges[j].second); + } + o.ws("\"]\n"); + } + } +} + +void Go::emit (OutputFile & o, uint32_t ind, bool & readCh) +{ + switch (type) + { + case EMPTY: + break; + case SWITCH_IF: + info.switchif->emit (o, ind, readCh); + break; + case BITMAP: + info.bitmap->emit (o, ind, readCh); + break; + case CPGOTO: + info.cpgoto->emit (o, ind, readCh); + break; + case DOT: + info.dot->emit (o); + break; + } +} + +} // namespace re2c diff --git a/tools/re2c/src/codegen/go_used_labels.cc b/tools/re2c/src/codegen/go_used_labels.cc new file mode 100644 index 000000000..09cf98ea8 --- /dev/null +++ b/tools/re2c/src/codegen/go_used_labels.cc @@ -0,0 +1,111 @@ +#include +#include "src/util/c99_stdint.h" +#include +#include +#include + +#include "src/codegen/go.h" +#include "src/codegen/label.h" +#include "src/ir/adfa/adfa.h" + +namespace re2c +{ + +void Cases::used_labels (std::set & used) +{ + for (uint32_t i = 0; i < cases_size; ++i) + { + used.insert (cases[i].to->label); + } +} + +void Binary::used_labels (std::set & used) +{ + thn->used_labels (used); + els->used_labels (used); +} + +void Linear::used_labels (std::set & used) +{ + for (uint32_t i = 0; i < branches.size (); ++i) + { + used.insert (branches[i].second->label); + } +} + +void If::used_labels (std::set & used) +{ + switch (type) + { + case BINARY: + info.binary->used_labels (used); + break; + case LINEAR: + info.linear->used_labels (used); + break; + } +} + +void SwitchIf::used_labels (std::set & used) +{ + switch (type) + { + case SWITCH: + info.cases->used_labels (used); + break; + case IF: + info.ifs->used_labels (used); + break; + } +} + +void GoBitmap::used_labels (std::set & used) +{ + if (hgo != NULL) + { + hgo->used_labels (used); + } + used.insert (bitmap_state->label); + if (lgo != NULL) + { + lgo->used_labels (used); + } +} + +void CpgotoTable::used_labels (std::set & used) +{ + for (uint32_t i = 0; i < TABLE_SIZE; ++i) + { + used.insert (table[i]->label); + } +} + +void Cpgoto::used_labels (std::set & used) +{ + if (hgo != NULL) + { + hgo->used_labels (used); + } + table->used_labels (used); +} + +void Go::used_labels (std::set & used) +{ + switch (type) + { + case EMPTY: + case DOT: + break; + case SWITCH_IF: + info.switchif->used_labels (used); + break; + case BITMAP: + info.bitmap->used_labels (used); + break; + case CPGOTO: + info.cpgoto->used_labels (used); + break; + } +} + +} // namespace re2c diff --git a/tools/re2c/src/codegen/indent.h b/tools/re2c/src/codegen/indent.h new file mode 100644 index 000000000..d2d8f369a --- /dev/null +++ b/tools/re2c/src/codegen/indent.h @@ -0,0 +1,24 @@ +#ifndef _RE2C_CODEGEN_INDENT_ +#define _RE2C_CODEGEN_INDENT_ + +#include + +#include "src/globals.h" + +namespace re2c +{ + +inline std::string indent (uint32_t ind) +{ + std::string str; + + while (opts->target != opt_t::DOT && ind-- > 0) + { + str += opts->indString; + } + return str; +} + +} // end namespace re2c + +#endif // _RE2C_CODEGEN_INDENT_ diff --git a/tools/re2c/src/codegen/input_api.cc b/tools/re2c/src/codegen/input_api.cc new file mode 100644 index 000000000..f2e32c387 --- /dev/null +++ b/tools/re2c/src/codegen/input_api.cc @@ -0,0 +1,175 @@ +#include + +#include "src/codegen/input_api.h" +#include "src/codegen/indent.h" +#include "src/conf/opt.h" +#include "src/globals.h" + +namespace re2c +{ + +InputAPI::InputAPI () + : type_ (DEFAULT) +{} + +InputAPI::type_t InputAPI::type () const +{ + return type_; +} + +void InputAPI::set (type_t t) +{ + type_ = t; +} + +std::string InputAPI::expr_peek () const +{ + std::string s; + switch (type_) + { + case DEFAULT: + s = "*" + opts->yycursor; + break; + case CUSTOM: + s = opts->yypeek + " ()"; + break; + } + return s; +} + +std::string InputAPI::expr_peek_save () const +{ + return opts->yych + " = " + opts.yychConversion () + expr_peek (); +} + +std::string InputAPI::stmt_peek (uint32_t ind) const +{ + return indent (ind) + expr_peek_save () + ";\n"; +} + +std::string InputAPI::stmt_skip (uint32_t ind) const +{ + std::string s; + switch (type_) + { + case DEFAULT: + s = "++" + opts->yycursor; + break; + case CUSTOM: + s = opts->yyskip + " ()"; + break; + } + return indent (ind) + s + ";\n"; +} + +std::string InputAPI::stmt_backup (uint32_t ind) const +{ + std::string s; + switch (type_) + { + case DEFAULT: + s = opts->yymarker + " = " + opts->yycursor; + break; + case CUSTOM: + s = opts->yybackup + " ()"; + break; + } + return indent (ind) + s + ";\n"; +} + +std::string InputAPI::stmt_backupctx (uint32_t ind) const +{ + std::string s; + switch (type_) + { + case DEFAULT: + s = opts->yyctxmarker + " = " + opts->yycursor; + break; + case CUSTOM: + s = opts->yybackupctx + " ()"; + break; + } + return indent (ind) + s + ";\n"; +} + +std::string InputAPI::stmt_restore (uint32_t ind) const +{ + std::string s; + switch (type_) + { + case DEFAULT: + s = opts->yycursor + " = " + opts->yymarker; + break; + case CUSTOM: + s = opts->yyrestore + " ()"; + break; + } + return indent (ind) + s + ";\n"; +} + +std::string InputAPI::stmt_restorectx (uint32_t ind) const +{ + std::string s; + switch (type_) + { + case DEFAULT: + s = indent (ind) + opts->yycursor + " = " + opts->yyctxmarker + ";\n"; + break; + case CUSTOM: + s = indent (ind) + opts->yyrestorectx + " ();\n"; + break; + } + return s; +} + +std::string InputAPI::stmt_skip_peek (uint32_t ind) const +{ + return type_ == DEFAULT + ? indent (ind) + opts->yych + " = " + opts.yychConversion () + "*++" + opts->yycursor + ";\n" + : stmt_skip (ind) + stmt_peek (ind); +} + +std::string InputAPI::stmt_skip_backup (uint32_t ind) const +{ + return type_ == DEFAULT + ? indent (ind) + opts->yymarker + " = ++" + opts->yycursor + ";\n" + : stmt_skip (ind) + stmt_backup (ind); +} + +std::string InputAPI::stmt_backup_peek (uint32_t ind) const +{ + return type_ == DEFAULT + ? indent (ind) + opts->yych + " = " + opts.yychConversion () + "*(" + opts->yymarker + " = " + opts->yycursor + ");\n" + : stmt_backup (ind) + stmt_peek (ind); +} + +std::string InputAPI::stmt_skip_backup_peek (uint32_t ind) const +{ + return type_ == DEFAULT + ? indent (ind) + opts->yych + " = " + opts.yychConversion () + "*(" + opts->yymarker + " = ++" + opts->yycursor + ");\n" + : stmt_skip (ind) + stmt_backup (ind) + stmt_peek (ind); +} + +std::string InputAPI::expr_lessthan_one () const +{ + return type_ == DEFAULT + ? opts->yylimit + " <= " + opts->yycursor + : expr_lessthan (1); +} + +std::string InputAPI::expr_lessthan (size_t n) const +{ + std::ostringstream s; + switch (type_) + { + case DEFAULT: + s << "(" << opts->yylimit << " - " << opts->yycursor << ") < " << n; + break; + case CUSTOM: + s << opts->yylessthan << " (" << n << ")"; + break; + } + return s.str (); +} + +} // end namespace re2c diff --git a/tools/re2c/src/codegen/input_api.h b/tools/re2c/src/codegen/input_api.h new file mode 100644 index 000000000..423475e6b --- /dev/null +++ b/tools/re2c/src/codegen/input_api.h @@ -0,0 +1,43 @@ +#ifndef _RE2C_CODEGEN_INPUT_API_ +#define _RE2C_CODEGEN_INPUT_API_ + +#include "src/util/c99_stdint.h" +#include + +namespace re2c +{ + +class InputAPI +{ +public: + enum type_t + { DEFAULT + , CUSTOM + }; + +private: + type_t type_; + +public: + InputAPI (); + type_t type () const; + void set (type_t t); + std::string expr_peek () const; + std::string expr_peek_save () const; + std::string stmt_peek (uint32_t ind) const; + std::string stmt_skip (uint32_t ind) const; + std::string stmt_backup (uint32_t ind) const; + std::string stmt_backupctx (uint32_t ind) const; + std::string stmt_restore (uint32_t ind) const; + std::string stmt_restorectx (uint32_t ind) const; + std::string stmt_skip_peek (uint32_t ind) const; + std::string stmt_skip_backup (uint32_t ind) const; + std::string stmt_backup_peek (uint32_t ind) const; + std::string stmt_skip_backup_peek (uint32_t ind) const; + std::string expr_lessthan_one () const; + std::string expr_lessthan (size_t n) const; +}; + +} // end namespace re2c + +#endif // _RE2C_CODEGEN_INPUT_API_ diff --git a/tools/re2c/src/codegen/label.cc b/tools/re2c/src/codegen/label.cc new file mode 100644 index 000000000..c2e384fbf --- /dev/null +++ b/tools/re2c/src/codegen/label.cc @@ -0,0 +1,42 @@ +#include + +#include "src/codegen/label.h" + +namespace re2c { + +const uint32_t label_t::FIRST = 0; + +label_t::label_t () + : value (FIRST) +{} + +void label_t::inc () +{ + ++value; +} + +label_t label_t::first () +{ + return label_t (); +} + +bool label_t::operator < (const label_t & l) const +{ + return value < l.value; +} + +uint32_t label_t::width () const +{ + uint32_t v = value; + uint32_t n = 0; + while (v /= 10) ++n; + return n; +} + +std::ostream & operator << (std::ostream & o, label_t l) +{ + o << l.value; + return o; +} + +} // namespace re2c diff --git a/tools/re2c/src/codegen/label.h b/tools/re2c/src/codegen/label.h new file mode 100644 index 000000000..cb2179d3c --- /dev/null +++ b/tools/re2c/src/codegen/label.h @@ -0,0 +1,39 @@ +#ifndef _RE2C_CODEGEN_LABEL_ +#define _RE2C_CODEGEN_LABEL_ + +#include // ostream + +#include "src/util/c99_stdint.h" + +namespace re2c { + +template class counter_t; + +// label public API: +// - get first label +// - compare labels +// - get label width +// - output label to std::ostream +// +// label private API (for label counter): +// - get initial label +// - get next label +class label_t +{ + static const uint32_t FIRST; + uint32_t value; + label_t (); + void inc (); + +public: + static label_t first (); + bool operator < (const label_t & l) const; + uint32_t width () const; + friend std::ostream & operator << (std::ostream & o, label_t l); + + friend class counter_t; +}; + +} // namespace re2c + +#endif // _RE2C_CODEGEN_LABEL_ diff --git a/tools/re2c/src/codegen/output.cc b/tools/re2c/src/codegen/output.cc new file mode 100644 index 000000000..5276ef77e --- /dev/null +++ b/tools/re2c/src/codegen/output.cc @@ -0,0 +1,465 @@ +#include +#include +#include + +#include "src/codegen/indent.h" +#include "src/codegen/output.h" +#include "src/codegen/print.h" +#include "src/conf/opt.h" +#include "src/conf/warn.h" +#include "src/globals.h" +#include "src/ir/rule_rank.h" + +namespace re2c +{ + +OutputFragment::OutputFragment (type_t t, uint32_t i) + : type (t) + , stream () + , indent (i) +{} + +uint32_t OutputFragment::count_lines () +{ + uint32_t lines = 0; + const std::string content = stream.str (); + const char * p = content.c_str (); + for (uint32_t i = 0; i < content.size (); ++i) + { + if (p[i] == '\n') + { + ++lines; + } + } + return lines; +} + +OutputBlock::OutputBlock () + : fragments () + , used_yyaccept (false) + , force_start_label (false) + , user_start_label () + , line (0) +{ + fragments.push_back (new OutputFragment (OutputFragment::CODE, 0)); +} + +OutputBlock::~OutputBlock () +{ + for (unsigned int i = 0; i < fragments.size (); ++i) + { + delete fragments[i]; + } +} + +OutputFile::OutputFile (const char * fn) + : file_name (fn) + , file (NULL) + , blocks () + , label_counter () + , warn_condition_order (!opts->tFlag) // see note [condition order] +{ + new_block (); +} + +bool OutputFile::open () +{ + if (file_name == NULL) + { + file_name = ""; + file = stdout; + } + else + { + file = fopen (file_name, "wb"); + } + return file != NULL; +} + +OutputFile::~OutputFile () +{ + if (file != NULL && file != stdout) + { + fclose (file); + } + for (unsigned int i = 0; i < blocks.size (); ++i) + { + delete blocks[i]; + } +} + +std::ostream & OutputFile::stream () +{ + return blocks.back ()->fragments.back ()->stream; +} + +OutputFile & OutputFile::wraw (const char * s, size_t n) +{ + stream ().write (s, static_cast (n)); + return *this; +} + +OutputFile & OutputFile::wu32_hex (uint32_t n) +{ + prtHex (stream (), n); + return *this; +} + +OutputFile & OutputFile::wc_hex (uint32_t n) +{ + prtChOrHex (stream (), n); + return *this; +} + +OutputFile & OutputFile::wrange (uint32_t l, uint32_t u) +{ + printSpan (stream (), l, u); + return *this; +} + +OutputFile & OutputFile::wu32_width (uint32_t n, int w) +{ + stream () << std::setw (w); + stream () << n; + return *this; +} + +OutputFile & OutputFile::wline_info (uint32_t l, const char * fn) +{ + output_line_info (stream (), l, fn); + return *this; +} + +OutputFile & OutputFile::wversion_time () +{ + output_version_time (stream ()); + return *this; +} + +OutputFile & OutputFile::wuser_start_label () +{ + const std::string label = blocks.back ()->user_start_label; + if (!label.empty ()) + { + wstring(label).ws(":\n"); + } + return *this; +} + +OutputFile & OutputFile::wc (char c) +{ + stream () << c; + return *this; +} + +OutputFile & OutputFile::wu32 (uint32_t n) +{ + stream () << n; + return *this; +} + +OutputFile & OutputFile::wu64 (uint64_t n) +{ + stream () << n; + return *this; +} + +OutputFile & OutputFile::wstring (const std::string & s) +{ + stream () << s; + return *this; +} + +OutputFile & OutputFile::ws (const char * s) +{ + stream () << s; + return *this; +} + +OutputFile & OutputFile::wlabel (label_t l) +{ + stream () << l; + return *this; +} + +OutputFile & OutputFile::wrank (rule_rank_t r) +{ + stream () << r; + return *this; +} + +OutputFile & OutputFile::wind (uint32_t ind) +{ + stream () << indent(ind); + return *this; +} + +void OutputFile::insert_code () +{ + blocks.back ()->fragments.push_back (new OutputFragment (OutputFragment::CODE, 0)); +} + +OutputFile & OutputFile::wdelay_line_info () +{ + blocks.back ()->fragments.push_back (new OutputFragment (OutputFragment::LINE_INFO, 0)); + insert_code (); + return *this; +} + +OutputFile & OutputFile::wdelay_state_goto (uint32_t ind) +{ + if (opts->fFlag && !bWroteGetState) + { + blocks.back ()->fragments.push_back (new OutputFragment (OutputFragment::STATE_GOTO, ind)); + insert_code (); + bWroteGetState = true; + } + return *this; +} + +OutputFile & OutputFile::wdelay_types () +{ + warn_condition_order = false; // see note [condition order] + blocks.back ()->fragments.push_back (new OutputFragment (OutputFragment::TYPES, 0)); + insert_code (); + return *this; +} + +OutputFile & OutputFile::wdelay_warn_condition_order () +{ + blocks.back ()->fragments.push_back (new OutputFragment (OutputFragment::WARN_CONDITION_ORDER, 0)); + insert_code (); + return *this; +} + +OutputFile & OutputFile::wdelay_yyaccept_init (uint32_t ind) +{ + blocks.back ()->fragments.push_back (new OutputFragment (OutputFragment::YYACCEPT_INIT, ind)); + insert_code (); + return *this; +} + +OutputFile & OutputFile::wdelay_yymaxfill () +{ + blocks.back ()->fragments.push_back (new OutputFragment (OutputFragment::YYMAXFILL, 0)); + insert_code (); + return *this; +} + +void OutputFile::set_used_yyaccept () +{ + blocks.back ()->used_yyaccept = true; +} + +bool OutputFile::get_used_yyaccept () const +{ + return blocks.back ()->used_yyaccept; +} + +void OutputFile::set_force_start_label (bool force) +{ + blocks.back ()->force_start_label = force; +} + +void OutputFile::set_user_start_label (const std::string & label) +{ + blocks.back ()->user_start_label = label; +} + +bool OutputFile::get_force_start_label () const +{ + return blocks.back ()->force_start_label; +} + +void OutputFile::set_block_line (uint32_t l) +{ + blocks.back ()->line = l; +} + +uint32_t OutputFile::get_block_line () const +{ + return blocks.back ()->line; +} + +void OutputFile::new_block () +{ + blocks.push_back (new OutputBlock ()); + insert_code (); +} + +void OutputFile::emit + ( const std::vector & types + , size_t max_fill + ) +{ + if (file != NULL) + { + unsigned int line_count = 1; + for (unsigned int j = 0; j < blocks.size (); ++j) + { + OutputBlock & b = * blocks[j]; + for (unsigned int i = 0; i < b.fragments.size (); ++i) + { + OutputFragment & f = * b.fragments[i]; + switch (f.type) + { + case OutputFragment::CODE: + break; + case OutputFragment::LINE_INFO: + output_line_info (f.stream, line_count + 1, file_name); + break; + case OutputFragment::STATE_GOTO: + output_state_goto (f.stream, f.indent, 0); + break; + case OutputFragment::TYPES: + output_types (f.stream, f.indent, types); + break; + case OutputFragment::WARN_CONDITION_ORDER: + if (warn_condition_order) // see note [condition order] + { + warn.condition_order (b.line); + } + break; + case OutputFragment::YYACCEPT_INIT: + output_yyaccept_init (f.stream, f.indent, b.used_yyaccept); + break; + case OutputFragment::YYMAXFILL: + output_yymaxfill (f.stream, max_fill); + break; + } + std::string content = f.stream.str (); + fwrite (content.c_str (), 1, content.size (), file); + line_count += f.count_lines (); + } + } + } +} + +HeaderFile::HeaderFile (const char * fn) + : stream () + // header is always generated, but not always dumped to file + // NULL filename crashes 'operator <<' on some platforms + // TODO: generate header only if necessary + , file_name (fn ? fn : ".h") + , file (NULL) +{} + +bool HeaderFile::open () +{ + file = fopen (file_name, "wb"); + return file != NULL; +} + +void HeaderFile::emit (const std::vector & types) +{ + output_version_time (stream); + output_line_info (stream, 3, file_name); + stream << "\n"; + output_types (stream, 0, types); +} + +HeaderFile::~HeaderFile () +{ + if (file != NULL) + { + std::string content = stream.str (); + fwrite (content.c_str (), 1, content.size (), file); + fclose (file); + } +} + +Output::Output (const char * source_name, const char * header_name) + : source (source_name) + , header (header_name) + , types () + , skeletons () + , max_fill (1) +{} + +Output::~Output () +{ + if (!warn.error ()) + { + source.emit (types, max_fill); + header.emit (types); + } +} + +void output_state_goto (std::ostream & o, uint32_t ind, uint32_t start_label) +{ + o << indent(ind) << "switch (" << output_get_state() << ") {\n"; + if (opts->bUseStateAbort) + { + o << indent(ind) << "default: abort();\n"; + o << indent(ind) << "case -1: goto " << opts->labelPrefix << start_label << ";\n"; + } + else + { + o << indent(ind) << "default: goto " << opts->labelPrefix << start_label << ";\n"; + } + for (uint32_t i = 0; i < last_fill_index; ++i) + { + o << indent(ind) << "case " << i << ": goto " << opts->yyfilllabel << i << ";\n"; + } + o << indent(ind) << "}\n"; + if (opts->bUseStateNext) + { + o << opts->yynext << ":\n"; + } +} + +void output_yyaccept_init (std::ostream & o, uint32_t ind, bool used_yyaccept) +{ + if (used_yyaccept) + { + o << indent (ind) << "unsigned int " << opts->yyaccept << " = 0;\n"; + } +} + +void output_yymaxfill (std::ostream & o, size_t max_fill) +{ + o << "#define YYMAXFILL " << max_fill << "\n"; +} + +void output_line_info (std::ostream & o, uint32_t line_number, const char * file_name) +{ + if (!opts->iFlag) + { + o << "#line " << line_number << " \"" << file_name << "\"\n"; + } +} + +void output_types (std::ostream & o, uint32_t ind, const std::vector & types) +{ + o << indent (ind++) << "enum " << opts->yycondtype << " {\n"; + for (unsigned int i = 0; i < types.size (); ++i) + { + o << indent (ind) << opts->condEnumPrefix << types[i] << ",\n"; + } + o << indent (--ind) << "};\n"; +} + +void output_version_time (std::ostream & o) +{ + o << "/* Generated by re2c"; + if (opts->version) + { + o << " " << PACKAGE_VERSION; + } + if (!opts->bNoGenerationDate) + { + o << " on "; + time_t now = time (NULL); + o.write (ctime (&now), 24); + } + o << " */" << "\n"; +} + +std::string output_get_state () +{ + return opts->state_get_naked + ? opts->state_get + : opts->state_get + "()"; +} + +} // namespace re2c diff --git a/tools/re2c/src/codegen/output.h b/tools/re2c/src/codegen/output.h new file mode 100644 index 000000000..774fea352 --- /dev/null +++ b/tools/re2c/src/codegen/output.h @@ -0,0 +1,158 @@ +#ifndef _RE2C_CODEGEN_OUTPUT_ +#define _RE2C_CODEGEN_OUTPUT_ + +#include "src/util/c99_stdint.h" +#include +#include +#include +#include +#include +#include +#include + +#include "src/codegen/label.h" +#include "src/util/counter.h" +#include "src/util/forbid_copy.h" + +namespace re2c +{ + +class rule_rank_t; + +struct OutputFragment +{ + enum type_t + { CODE +// , CONFIG + , LINE_INFO + , STATE_GOTO + , TYPES + , WARN_CONDITION_ORDER + , YYACCEPT_INIT + , YYMAXFILL + }; + + type_t type; + std::ostringstream stream; + uint32_t indent; + + OutputFragment (type_t t, uint32_t i); + uint32_t count_lines (); +}; + +struct OutputBlock +{ + std::vector fragments; + bool used_yyaccept; + bool force_start_label; + std::string user_start_label; + uint32_t line; + + OutputBlock (); + ~OutputBlock (); +}; + +struct OutputFile +{ +public: + const char * file_name; + +private: + FILE * file; + std::vector blocks; + +public: + counter_t label_counter; + bool warn_condition_order; + +private: + std::ostream & stream (); + void insert_code (); + +public: + OutputFile (const char * fn); + ~OutputFile (); + + bool open (); + + void new_block (); + + // immediate output + OutputFile & wraw (const char * s, size_t n); + OutputFile & wc (char c); + OutputFile & wc_hex (uint32_t n); + OutputFile & wu32 (uint32_t n); + OutputFile & wu32_hex (uint32_t n); + OutputFile & wu32_width (uint32_t n, int w); + OutputFile & wu64 (uint64_t n); + OutputFile & wstring (const std::string & s); + OutputFile & ws (const char * s); + OutputFile & wlabel (label_t l); + OutputFile & wrank (rule_rank_t l); + OutputFile & wrange (uint32_t u, uint32_t l); + OutputFile & wline_info (uint32_t l, const char * fn); + OutputFile & wversion_time (); + OutputFile & wuser_start_label (); + OutputFile & wind (uint32_t ind); + + // delayed output + OutputFile & wdelay_line_info (); + OutputFile & wdelay_state_goto (uint32_t ind); + OutputFile & wdelay_types (); + OutputFile & wdelay_warn_condition_order (); + OutputFile & wdelay_yyaccept_init (uint32_t ind); + OutputFile & wdelay_yymaxfill (); + + void set_used_yyaccept (); + bool get_used_yyaccept () const; + void set_force_start_label (bool force); + void set_user_start_label (const std::string & label); + bool get_force_start_label () const; + void set_block_line (uint32_t l); + uint32_t get_block_line () const; + + void emit (const std::vector & types, size_t max_fill); + + FORBID_COPY (OutputFile); +}; + +struct HeaderFile +{ + HeaderFile (const char * fn); + ~HeaderFile (); + bool open (); + void emit (const std::vector & types); + +private: + std::ostringstream stream; + const char * file_name; + FILE * file; + + FORBID_COPY (HeaderFile); +}; + +struct Output +{ + OutputFile source; + HeaderFile header; + std::vector types; + std::set skeletons; + size_t max_fill; + + Output (const char * source_name, const char * header_name); + ~Output (); +}; + +void output_line_info (std::ostream &, uint32_t, const char *); +void output_state_goto (std::ostream &, uint32_t, uint32_t); +void output_types (std::ostream &, uint32_t, const std::vector &); +void output_version_time (std::ostream &); +void output_yyaccept_init (std::ostream &, uint32_t, bool); +void output_yymaxfill (std::ostream &, size_t); + +// helpers +std::string output_get_state (); + +} // namespace re2c + +#endif // _RE2C_CODEGEN_OUTPUT_ diff --git a/tools/re2c/src/codegen/print.cc b/tools/re2c/src/codegen/print.cc new file mode 100644 index 000000000..2303e847f --- /dev/null +++ b/tools/re2c/src/codegen/print.cc @@ -0,0 +1,156 @@ +#include + +#include "src/codegen/print.h" +#include "src/conf/opt.h" +#include "src/globals.h" +#include "src/ir/regexp/encoding/enc.h" + +namespace re2c +{ + +bool is_print (uint32_t c) +{ + return c >= 0x20 && c < 0x7F; +} + +bool is_space (uint32_t c) +{ + switch (c) + { + case '\t': + case '\f': + case '\v': + case '\n': + case '\r': + case ' ': + return true; + default: + return false; + } +} + +char hexCh(uint32_t c) +{ + static const char * sHex = "0123456789ABCDEF"; + return sHex[c & 0x0F]; +} + +void prtChOrHex(std::ostream& o, uint32_t c) +{ + if (opts->encoding.type () != Enc::EBCDIC + && (is_print (c) || is_space (c))) + { + o << '\''; + prtCh(o, c); + o << '\''; + } + else + { + prtHex(o, c); + } +} + +void prtHex(std::ostream& o, uint32_t c) +{ + o << "0x"; + const uint32_t cunit_size = opts->encoding.szCodeUnit (); + if (cunit_size >= 4) + { + o << hexCh (c >> 28u) + << hexCh (c >> 24u) + << hexCh (c >> 20u) + << hexCh (c >> 16u); + } + if (cunit_size >= 2) + { + o << hexCh (c >> 12u) + << hexCh (c >> 8u); + } + o << hexCh (c >> 4u) + << hexCh (c); +} + +void prtCh(std::ostream& o, uint32_t c) +{ + const bool dot = opts->target == opt_t::DOT; + + switch (c) + { + case '\'': + o << (dot ? "'" : "\\'"); + break; + + case '"': + o << (dot ? "\\\"" : "\""); + break; + + case '\n': + o << (dot ? "\\\\n" : "\\n"); + break; + + case '\t': + o << (dot ? "\\\\t" : "\\t"); + break; + + case '\v': + o << (dot ? "\\\\v" : "\\v"); + break; + + case '\b': + o << (dot ? "\\\\b" : "\\b"); + break; + + case '\r': + o << (dot ? "\\\\r" : "\\r"); + break; + + case '\f': + o << (dot ? "\\\\f" : "\\f"); + break; + + case '\a': + o << (dot ? "\\\\a" :"\\a"); + break; + + case '\\': + o << "\\\\"; // both .dot and C/C++ code expect "\\" + break; + + default: + o << static_cast (c); + break; + } +} + +void prtChOrHexForSpan(std::ostream& o, uint32_t c) +{ + if (opts->encoding.type () != Enc::EBCDIC + && is_print (c) + && (c != ']')) + { + prtCh(o, c); + } + else + { + prtHex(o, c); + } +} + +void printSpan(std::ostream& o, uint32_t lb, uint32_t ub) +{ + o << "["; + if ((ub - lb) == 1) + { + prtChOrHexForSpan(o, lb); + } + else + { + prtChOrHexForSpan(o, lb); + o << "-"; + prtChOrHexForSpan(o, ub - 1); + } + o << "]"; +} + +} // end namespace re2c + diff --git a/tools/re2c/src/codegen/print.h b/tools/re2c/src/codegen/print.h new file mode 100644 index 000000000..978d13e5c --- /dev/null +++ b/tools/re2c/src/codegen/print.h @@ -0,0 +1,20 @@ +#ifndef _RE2C_CODEGEN_PRINT_ +#define _RE2C_CODEGEN_PRINT_ + +#include "src/util/c99_stdint.h" +#include + +namespace re2c +{ + +bool is_print (uint32_t c); +bool is_space (uint32_t c); +char hexCh(uint32_t c); +void prtCh(std::ostream&, uint32_t); +void prtHex(std::ostream&, uint32_t); +void prtChOrHex(std::ostream&, uint32_t); +void printSpan(std::ostream&, uint32_t, uint32_t); + +} // end namespace re2c + +#endif // _RE2C_CODEGEN_PRINT_ diff --git a/tools/re2c/src/conf/msg.cc b/tools/re2c/src/conf/msg.cc new file mode 100644 index 000000000..ec13c0ac7 --- /dev/null +++ b/tools/re2c/src/conf/msg.cc @@ -0,0 +1,254 @@ +#include +#include +#include + +#include "config.h" +#include "src/conf/msg.h" + +namespace re2c { + +void error (const char * fmt, ...) +{ + fprintf (stderr, "re2c: error: "); + + va_list args; + va_start (args, fmt); + vfprintf (stderr, fmt, args); + va_end (args); + + fprintf (stderr, "\n"); +} + +void error_encoding () +{ + error ("only one of switches -e, -w, -x, -u and -8 must be set"); +} + +void error_arg (const char * option) +{ + error ("expected argument to option %s", option); +} + +void warning_start (uint32_t line, bool error) +{ + static const char * msg = error ? "error" : "warning"; + fprintf (stderr, "re2c: %s: line %u: ", msg, line); +} + +void warning_end (const char * type, bool error) +{ + if (type != NULL) + { + const char * prefix = error ? "error-" : ""; + fprintf (stderr, " [-W%s%s]", prefix, type); + } + fprintf (stderr, "\n"); +} + +void warning (const char * type, uint32_t line, bool error, const char * fmt, ...) +{ + warning_start (line, error); + + va_list args; + va_start (args, fmt); + vfprintf (stderr, fmt, args); + va_end (args); + + warning_end (type, error); +} + +void usage () +{ + fprintf (stderr, + "usage: re2c [-bcdDefFghirsuvVwx18] [-o of] [-t th] file\n" + "\n" + "-? -h --help Display this info.\n" + "\n" + "-b --bit-vectors Implies -s. Use bit vectors as well in the attempt to\n" + " coax better code out of the compiler. Most useful for\n" + " specifications with more than a few keywords (e.g. for\n" + " most programming languages).\n" + "\n" + "-c --conditions Require start conditions.\n" + "\n" + "-d --debug-output Creates a parser that dumps information during\n" + " about the current position and in which state the\n" + " parser is.\n" + "\n" + "-D --emit-dot Emit a Graphviz dot view of the DFA graph\n" + "\n" + "-e --ecb Generate a parser that supports EBCDIC. The generated code\n" + " can deal with any character up to 0xFF. In this mode re2c\n" + " assumes that input character size is 1 byte. This switch is\n" + " incompatible with -w, -u, -x and -8\n" + "\n" + "-f --storable-state Generate a scanner that supports storable states.\n" + "\n" + "-F --flex-syntax Partial support for flex syntax.\n" + "\n" + "-g --computed-gotos Implies -b. Generate computed goto code (only useable\n" + " with gcc).\n" + "\n" + "-i --no-debug-info Do not generate '#line' info (useful for versioning).\n" + "\n" + "-o of --output=of Specify the output file (of) instead of stdout\n" + "\n" + "-r --reusable Allow reuse of scanner definitions.\n" + "\n" + "-s --nested-ifs Generate nested ifs for some switches. Many compilers\n" + " need this assist to generate better code.\n" + "\n" + "-t th --type-header=th Generate a type header file (th) with type definitions.\n" + "\n" + "-u --unicode Generate a parser that supports UTF-32. The generated code\n" + " can deal with any valid Unicode character up to 0x10FFFF.\n" + " In this mode re2c assumes that input character size is 4 bytes.\n" + " This switch is incompatible with -e, -w, -x and -8. It implies -s.\n" + "\n" + "-v --version Show version information.\n" + "\n" + "-V --vernum Show version as one number.\n" + "\n" + "-w --wide-chars Generate a parser that supports UCS-2. The generated code can\n" + " deal with any valid Unicode character up to 0xFFFF. In this mode\n" + " re2c assumes that input character size is 2 bytes. This switch is\n" + " incompatible with -e, -x, -u and -8. It implies -s." + "\n" + "-x --utf-16 Generate a parser that supports UTF-16. The generated code can\n" + " deal with any valid Unicode character up to 0x10FFFF. In this mode\n" + " re2c assumes that input character size is 2 bytes. This switch is\n" + " incompatible with -e, -w, -u and -8. It implies -s." + "\n" + "-8 --utf-8 Generate a parser that supports UTF-8. The generated code can\n" + " deal with any valid Unicode character up to 0x10FFFF. In this mode\n" + " re2c assumes that input character size is 1 byte. This switch is\n" + " incompatible with -e, -w, -x and -u." + "\n" + "--no-generation-date Suppress date output in the generated file.\n" + "\n" + "--no-version Suppress version output in the generated file.\n" + "\n" + "--case-insensitive All strings are case insensitive, so all \"-expressions\n" + " are treated in the same way '-expressions are.\n" + "\n" + "--case-inverted Invert the meaning of single and double quoted strings.\n" + " With this switch single quotes are case sensitive and\n" + " double quotes are case insensitive.\n" + "\n" + "--encoding-policy ep Specify what re2c should do when given bad code unit.\n" + " ep can be one of the following: fail, substitute, ignore.\n" + "\n" + "--input i Specify re2c input API.\n" + " i can be one of the following: default, custom.\n" + "\n" + "--skeleton Instead of embedding re2c-generated code into C/C++ source,\n" + " generate a self-contained program for the same DFA.\n" + " Most useful for correctness and performance testing.\n" + "\n" + "--empty-class policy What to do if user inputs empty character class. policy can be\n" + " one of the following: 'match-empty' (match empty input, default),\n" + " 'match-none' (fail to match on any input), 'error' (compilation\n" + " error). Note that there are various ways to construct empty class,\n" + " e.g: [], [^\\x00-\\xFF], [\\x00-\\xFF]\\[\\x00-\\xFF].\n" + "\n" + "--dfa-minimization
\n" + " Internal algorithm used by re2c to minimize DFA (defaults to\n" + " 'moore'). Both table filling and Moore's algorithms should\n" + " produce identical DFA (up to states relabelling). Table filling\n" + " algorithm is much simpler and slower; it serves as a reference\n" + " implementation.\n" + "\n" + "-1 --single-pass Deprecated and does nothing (single pass is by default now).\n" + "\n" + "-W Turn on all warnings.\n" + "\n" + "-Werror Turn warnings into errors. Note that this option along doesn't\n" + " turn on any warnings, it only affects those warnings that have\n" + " been turned on so far or will be turned on later.\n" + "\n" + "-W Turn on individual warning.\n" + "\n" + "-Wno- Turn off individual warning.\n" + "\n" + "-Werror- Turn on individual warning and treat it as error (this implies\n" + " '-W').\n" + "\n" + "-Wno-error- Don't treat this particular warning as error. This doesn't turn\n" + " off the warning itself.\n" + "\n" + "Warnings:\n" + "\n" + "-Wcondition-order Warn if the generated program makes implicit assumptions about\n" + " condition numbering. One should use either '-t, --type-header'\n" + " option or '/*!types:re2c*/' directive to generate mapping of\n" + " condition names to numbers and use autogenerated condition names.\n" + "\n" + "-Wempty-character-class Warn if regular expression contains empty character class. From\n" + " the rational point of view trying to match empty character class\n" + " makes no sense: it should always fail. However, for backwards\n" + " compatibility reasons re2c allows empty character class and treats\n" + " it as empty string. Use '--empty-class' option to change default\n" + " behaviour.\n" + "\n" + "-Wmatch-empty-string Warn if regular expression in a rule is nullable (matches empty\n" + " string). If DFA runs in a loop and empty match is unintentional\n" + " (input position in not advanced manually), lexer may get stuck\n" + " in eternal loop.\n" + "\n" + "-Wswapped-range Warn if range lower bound is greater that upper bound. Default\n" + " re2c behaviour is to silently swap range bounds.\n" + "\n" + "-Wundefined-control-flow\n" + " Warn if some input strings cause undefined control flow in lexer\n" + " (the faulty patterns are reported). This is the most dangerous\n" + " and common mistake. It can be easily fixed by adding default rule\n" + " '*' (this rule has the lowest priority, matches any code unit\n" + " and consumes exactly one code unit).\n" + "\n" + "-Wuseless-escape Warn if a symbol is escaped when it shouldn't be. By default re2c\n" + " silently ignores escape, but this may as well indicate a typo\n" + " or an error in escape sequence.\n" + "\n" + ); +} + +void vernum () +{ + std::string vernum (PACKAGE_VERSION); + if (vernum[1] == '.') + { + vernum.insert(0, "0"); + } + vernum.erase(2, 1); + if (vernum[3] == '.') + { + vernum.insert(2, "0"); + } + vernum.erase(4, 1); + if (vernum.length() < 6 || vernum[5] < '0' || vernum[5] > '9') + { + vernum.insert(4, "0"); + } + vernum.resize(6, '0'); + + printf ("%s\n", vernum.c_str ()); +} + +void version () +{ + printf ("re2c %s\n", PACKAGE_VERSION); +} + +std::string incond (const std::string & cond) +{ + std::string s; + if (!cond.empty ()) + { + s += "in condition '"; + s += cond; + s += "' "; + } + return s; +} + +} // namespace re2c diff --git a/tools/re2c/src/conf/msg.h b/tools/re2c/src/conf/msg.h new file mode 100644 index 000000000..b70555239 --- /dev/null +++ b/tools/re2c/src/conf/msg.h @@ -0,0 +1,24 @@ +#ifndef _RE2C_CONF_MSG_ +#define _RE2C_CONF_MSG_ + +#include + +#include "src/util/attribute.h" +#include "src/util/c99_stdint.h" + +namespace re2c { + +void error (const char * fmt, ...) RE2C_GXX_ATTRIBUTE ((format (printf, 1, 2))); +void error_encoding (); +void error_arg (const char * option); +void warning_start (uint32_t line, bool error); +void warning_end (const char * type, bool error); +void warning (const char * type, uint32_t line, bool error, const char * fmt, ...) RE2C_GXX_ATTRIBUTE ((format (printf, 4, 5))); +void usage (); +void vernum (); +void version (); +std::string incond (const std::string & cond); + +} // namespace re2c + +#endif // _RE2C_CONF_MSG_ diff --git a/tools/re2c/src/conf/opt.cc b/tools/re2c/src/conf/opt.cc new file mode 100644 index 000000000..fa65ceaa8 --- /dev/null +++ b/tools/re2c/src/conf/opt.cc @@ -0,0 +1,331 @@ +#include "src/conf/msg.h" +#include "src/conf/opt.h" + +namespace re2c +{ + +Opt opts; + +opt_t::opt_t () +#define OPT1(type, name, value) : name (value) +#define OPT(type, name, value) , name (value) + RE2C_OPTS +#undef OPT1 +#undef OPT +{} + +opt_t::opt_t (const opt_t & opt) +#define OPT1(type, name, value) : name (opt.name) +#define OPT(type, name, value) , name (opt.name) + RE2C_OPTS +#undef OPT1 +#undef OPT +{} + +opt_t & opt_t::operator = (const opt_t & opt) +{ +#define OPT1 OPT +#define OPT(type, name, value) name = opt.name; + RE2C_OPTS +#undef OPT1 +#undef OPT + return *this; +} + +void opt_t::fix () +{ + // some options either make no sense or must have fixed value + // with current target: reset them to default + switch (target) + { + case DOT: + // default code generation options + sFlag = Opt::baseopt.sFlag; + bFlag = Opt::baseopt.bFlag; + gFlag = Opt::baseopt.gFlag; + cGotoThreshold = Opt::baseopt.cGotoThreshold; + // default environment-insensitive formatting + yybmHexTable = Opt::baseopt.yybmHexTable; + // fallthrough + case SKELETON: + // default line information + iFlag = Opt::baseopt.iFlag; + // default environment-sensitive formatting + topIndent = Opt::baseopt.topIndent; + indString = Opt::baseopt.indString; + condDivider = Opt::baseopt.condDivider; + condDividerParam = Opt::baseopt.condDividerParam; + // default environment bindings + tFlag = Opt::baseopt.tFlag; + header_file = Opt::baseopt.header_file; + yycondtype = Opt::baseopt.yycondtype; + cond_get = Opt::baseopt.cond_get; + cond_get_naked = Opt::baseopt.cond_get_naked; + cond_set = Opt::baseopt.cond_set; + cond_set_arg = Opt::baseopt.cond_set_arg; + cond_set_naked = Opt::baseopt.cond_set_naked; + yyctable = Opt::baseopt.yyctable; + condPrefix = Opt::baseopt.condPrefix; + condEnumPrefix = Opt::baseopt.condEnumPrefix; + condGoto = Opt::baseopt.condGoto; + condGotoParam = Opt::baseopt.condGotoParam; + fFlag = Opt::baseopt.fFlag; + state_get = Opt::baseopt.state_get; + state_get_naked = Opt::baseopt.state_get_naked; + state_set = Opt::baseopt.state_set; + state_set_arg = Opt::baseopt.state_set_arg; + state_set_naked = Opt::baseopt.state_set_naked; + yyfilllabel = Opt::baseopt.yyfilllabel; + yynext = Opt::baseopt.yynext; + yyaccept = Opt::baseopt.yyaccept; + bUseStateAbort = Opt::baseopt.bUseStateAbort; + bUseStateNext = Opt::baseopt.bUseStateNext; + yybm = Opt::baseopt.yybm; + yytarget = Opt::baseopt.yytarget; + input_api = Opt::baseopt.input_api; + yycursor = Opt::baseopt.yycursor; + yymarker = Opt::baseopt.yymarker; + yyctxmarker = Opt::baseopt.yyctxmarker; + yylimit = Opt::baseopt.yylimit; + yypeek = Opt::baseopt.yypeek; + yyskip = Opt::baseopt.yyskip; + yybackup = Opt::baseopt.yybackup; + yybackupctx = Opt::baseopt.yybackupctx; + yyrestore = Opt::baseopt.yyrestore; + yyrestorectx = Opt::baseopt.yyrestorectx; + yylessthan = Opt::baseopt.yylessthan; + dFlag = Opt::baseopt.dFlag; + yydebug = Opt::baseopt.yydebug; + yyctype = Opt::baseopt.yyctype; + yych = Opt::baseopt.yych; + bEmitYYCh = Opt::baseopt.bEmitYYCh; + yychConversion = Opt::baseopt.yychConversion; + fill = Opt::baseopt.fill; + fill_use = Opt::baseopt.fill_use; + fill_check = Opt::baseopt.fill_check; + fill_arg = Opt::baseopt.fill_arg; + fill_arg_use = Opt::baseopt.fill_arg_use; + fill_naked = Opt::baseopt.fill_naked; + labelPrefix = Opt::baseopt.labelPrefix; + break; + default: + break; + } + + if (bCaseInsensitive) + { + bCaseInverted = Opt::baseopt.bCaseInverted; + } + + // respect hierarchy + if (!cFlag) + { + tFlag = Opt::baseopt.tFlag; + header_file = Opt::baseopt.header_file; + yycondtype = Opt::baseopt.yycondtype; + cond_get = Opt::baseopt.cond_get; + cond_get_naked = Opt::baseopt.cond_get_naked; + cond_set = Opt::baseopt.cond_set; + cond_set_arg = Opt::baseopt.cond_set_arg; + cond_set_naked = Opt::baseopt.cond_set_naked; + yyctable = Opt::baseopt.yyctable; + condPrefix = Opt::baseopt.condPrefix; + condEnumPrefix = Opt::baseopt.condEnumPrefix; + condDivider = Opt::baseopt.condDivider; + condDividerParam = Opt::baseopt.condDividerParam; + condGoto = Opt::baseopt.condGoto; + condGotoParam = Opt::baseopt.condGotoParam; + } + if (!fFlag) + { + state_get = Opt::baseopt.state_get; + state_get_naked = Opt::baseopt.state_get_naked; + state_set = Opt::baseopt.state_set; + state_set_arg = Opt::baseopt.state_set_arg; + state_set_naked = Opt::baseopt.state_set_naked; + yyfilllabel = Opt::baseopt.yyfilllabel; + yynext = Opt::baseopt.yynext; + yyaccept = Opt::baseopt.yyaccept; + bUseStateAbort = Opt::baseopt.bUseStateAbort; + bUseStateNext = Opt::baseopt.bUseStateNext; + } + if (!bFlag) + { + yybmHexTable = Opt::baseopt.yybmHexTable; + yybm = Opt::baseopt.yybm; + } + if (!gFlag) + { + cGotoThreshold = Opt::baseopt.cGotoThreshold; + yytarget = Opt::baseopt.yytarget; + } + if (input_api.type () != InputAPI::DEFAULT) + { + yycursor = Opt::baseopt.yycursor; + yymarker = Opt::baseopt.yymarker; + yyctxmarker = Opt::baseopt.yyctxmarker; + yylimit = Opt::baseopt.yylimit; + } + if (input_api.type () != InputAPI::CUSTOM) + { + yypeek = Opt::baseopt.yypeek; + yyskip = Opt::baseopt.yyskip; + yybackup = Opt::baseopt.yybackup; + yybackupctx = Opt::baseopt.yybackupctx; + yyrestore = Opt::baseopt.yyrestore; + yyrestorectx = Opt::baseopt.yyrestorectx; + yylessthan = Opt::baseopt.yylessthan; + } + if (!dFlag) + { + yydebug = Opt::baseopt.yydebug; + } + if (!fill_use) + { + fill = Opt::baseopt.fill; + fill_check = Opt::baseopt.fill_check; + fill_arg = Opt::baseopt.fill_arg; + fill_arg_use = Opt::baseopt.fill_arg_use; + fill_naked = Opt::baseopt.fill_naked; + } + + // force individual options + switch (target) + { + case DOT: + iFlag = true; + break; + case SKELETON: + iFlag = true; + input_api.set (InputAPI::CUSTOM); + indString = " "; + topIndent = 2; + break; + default: + break; + } + switch (encoding.type ()) + { + case Enc::UCS2: + case Enc::UTF16: + case Enc::UTF32: + sFlag = true; + break; + default: + break; + } + if (bFlag) + { + sFlag = true; + } + if (gFlag) + { + bFlag = true; + sFlag = true; + } + if (header_file != NULL) + { + tFlag = true; + } +} + +realopt_t::realopt_t (useropt_t & opt) + : real () + , user (opt) +{} + +const opt_t * realopt_t::operator -> () +{ + sync (); + return ℜ +} + +void realopt_t::sync () +{ + if (user.diverge) + { + real = user.opt; + real.fix (); + user.diverge = false; + } +} + +useropt_t::useropt_t () + : opt () + , diverge (true) +{} + +opt_t * useropt_t::operator -> () +{ + diverge = true; + return &opt; +} + +const opt_t Opt::baseopt; + +bool Opt::source (const char * s) +{ + if (source_file) + { + error ("multiple source files: %s, %s", source_file, s); + return false; + } + else + { + source_file = s; + return true; + } +} + +bool Opt::output (const char * s) +{ + if (output_file) + { + error ("multiple output files: %s, %s", output_file, s); + return false; + } + else + { + output_file = s; + return true; + } +} + +void Opt::reset_encoding (const Enc & enc) +{ + useropt->encoding = enc; +} + +void Opt::reset_mapCodeName () +{ + // historically arranged set of names + // no actual reason why these particular options should be reset + useropt->cond_get = Opt::baseopt.cond_get; + useropt->cond_set = Opt::baseopt.cond_set; + useropt->fill = Opt::baseopt.fill; + useropt->state_get = Opt::baseopt.state_get; + useropt->state_set = Opt::baseopt.state_set; + useropt->yybackup = Opt::baseopt.yybackup; + useropt->yybackupctx = Opt::baseopt.yybackupctx; + useropt->yycondtype = Opt::baseopt.yycondtype; + useropt->yyctxmarker = Opt::baseopt.yyctxmarker; + useropt->yyctype = Opt::baseopt.yyctype; + useropt->yycursor = Opt::baseopt.yycursor; + useropt->yydebug = Opt::baseopt.yydebug; + useropt->yylessthan = Opt::baseopt.yylessthan; + useropt->yylimit = Opt::baseopt.yylimit; + useropt->yymarker = Opt::baseopt.yymarker; + useropt->yypeek = Opt::baseopt.yypeek; + useropt->yyrestore = Opt::baseopt.yyrestore; + useropt->yyrestorectx = Opt::baseopt.yyrestorectx; + useropt->yyskip = Opt::baseopt.yyskip; + useropt->yyfilllabel = Opt::baseopt.yyfilllabel; + useropt->yynext = Opt::baseopt.yynext; + useropt->yyaccept = Opt::baseopt.yyaccept; + useropt->yybm = Opt::baseopt.yybm; + useropt->yych = Opt::baseopt.yych; + useropt->yyctable = Opt::baseopt.yyctable; + useropt->yytarget = Opt::baseopt.yytarget; +} + +} // namespace re2c diff --git a/tools/re2c/src/conf/opt.h b/tools/re2c/src/conf/opt.h new file mode 100644 index 000000000..30ab21e55 --- /dev/null +++ b/tools/re2c/src/conf/opt.h @@ -0,0 +1,218 @@ +#ifndef _RE2C_CONF_OPT_ +#define _RE2C_CONF_OPT_ + +#include "src/util/c99_stdint.h" +#include +#include + +#include "src/codegen/input_api.h" +#include "src/ir/dfa/dfa.h" +#include "src/ir/regexp/empty_class_policy.h" +#include "src/ir/regexp/encoding/enc.h" +#include "src/util/forbid_copy.h" + +namespace re2c +{ + +#define RE2C_OPTS \ + /* target */ \ + OPT1 (opt_t::target_t, target, CODE) \ + /* fingerprint */ \ + OPT (bool, bNoGenerationDate, false) \ + OPT (bool, version, true) \ + /* regular expressions */ \ + OPT (Enc, encoding, Enc ()) \ + OPT (bool, bCaseInsensitive, false) \ + OPT (bool, bCaseInverted, false) \ + OPT (empty_class_policy_t, empty_class_policy, EMPTY_CLASS_MATCH_EMPTY) \ + /* conditions */ \ + OPT (bool, cFlag, false) \ + OPT (bool, tFlag, false) \ + OPT (const char *, header_file, NULL) \ + OPT (std::string, yycondtype, "YYCONDTYPE") \ + OPT (std::string, cond_get, "YYGETCONDITION") \ + OPT (bool, cond_get_naked, false) \ + OPT (std::string, cond_set, "YYSETCONDITION" ) \ + OPT (std::string, cond_set_arg, "@@" ) \ + OPT (bool, cond_set_naked, false ) \ + OPT (std::string, yyctable, "yyctable") \ + OPT (std::string, condPrefix, "yyc_") \ + OPT (std::string, condEnumPrefix, "yyc") \ + OPT (std::string, condDivider, "/* *********************************** */") \ + OPT (std::string, condDividerParam, "@@") \ + OPT (std::string, condGoto, "goto @@;") \ + OPT (std::string, condGotoParam, "@@") \ + /* states */ \ + OPT (bool, fFlag, false) \ + OPT (std::string, state_get, "YYGETSTATE") \ + OPT (bool, state_get_naked, false) \ + OPT (std::string, state_set, "YYSETSTATE") \ + OPT (std::string, state_set_arg, "@@") \ + OPT (bool, state_set_naked, false) \ + OPT (std::string, yyfilllabel, "yyFillLabel") \ + OPT (std::string, yynext, "yyNext") \ + OPT (std::string, yyaccept, "yyaccept") \ + OPT (bool, bUseStateAbort, false) \ + OPT (bool, bUseStateNext, false) \ + /* reuse */ \ + OPT (bool, rFlag, false) \ + /* partial flex syntax support */ \ + OPT (bool, FFlag, false) \ + /* code generation */ \ + OPT (bool, sFlag, false) \ + OPT (bool, bFlag, false) \ + OPT (std::string, yybm, "yybm") \ + OPT (bool, yybmHexTable, false) \ + OPT (bool, gFlag, false) \ + OPT (std::string, yytarget, "yytarget") \ + OPT (uint32_t, cGotoThreshold, 9) \ + /* formatting */ \ + OPT (uint32_t, topIndent, 0) \ + OPT (std::string, indString, "\t") \ + /* input API */ \ + OPT (InputAPI, input_api, InputAPI ()) \ + OPT (std::string, yycursor, "YYCURSOR") \ + OPT (std::string, yymarker, "YYMARKER") \ + OPT (std::string, yyctxmarker, "YYCTXMARKER") \ + OPT (std::string, yylimit, "YYLIMIT") \ + OPT (std::string, yypeek, "YYPEEK") \ + OPT (std::string, yyskip, "YYSKIP") \ + OPT (std::string, yybackup, "YYBACKUP") \ + OPT (std::string, yybackupctx, "YYBACKUPCTX") \ + OPT (std::string, yyrestore, "YYRESTORE") \ + OPT (std::string, yyrestorectx, "YYRESTORECTX") \ + OPT (std::string, yylessthan, "YYLESSTHAN") \ + /* #line directives */ \ + OPT (bool, iFlag, false) \ + /* debug */ \ + OPT (bool, dFlag, false) \ + OPT (std::string, yydebug, "YYDEBUG") \ + /* yych */ \ + OPT (std::string, yyctype, "YYCTYPE") \ + OPT (std::string, yych, "yych") \ + OPT (bool, bEmitYYCh, true) \ + OPT (bool, yychConversion, false) \ + /* YYFILL */ \ + OPT (std::string, fill, "YYFILL") \ + OPT (bool, fill_use, true) \ + OPT (bool, fill_check, true) \ + OPT (std::string, fill_arg, "@@") \ + OPT (bool, fill_arg_use, true) \ + OPT (bool, fill_naked, false) \ + /* labels */ \ + OPT (std::string, labelPrefix, "yy") \ + /* internals */ \ + OPT (dfa_minimization_t, dfa_minimization, DFA_MINIMIZATION_MOORE) + +struct opt_t +{ + enum target_t + { + CODE, + DOT, + SKELETON + }; + +#define OPT1 OPT +#define OPT(type, name, value) type name; + RE2C_OPTS +#undef OPT1 +#undef OPT + + opt_t (); + opt_t (const opt_t & opt); + opt_t & operator = (const opt_t & opt); + void fix (); +}; + +class useropt_t; +class realopt_t +{ + opt_t real; + useropt_t & user; +public: + realopt_t (useropt_t & opt); + const opt_t * operator -> (); + void sync (); +}; + +class useropt_t +{ + opt_t opt; + bool diverge; +public: + useropt_t (); + opt_t * operator -> (); + friend void realopt_t::sync (); +}; + +struct Opt +{ + static const opt_t baseopt; + + const char * source_file; + const char * output_file; + +private: + useropt_t useropt; + realopt_t realopt; + +public: + Opt () + : source_file (NULL) + , output_file (NULL) + , useropt () + , realopt (useropt) + {} + + // read-only access, forces options syncronization + const opt_t * operator -> () + { + return realopt.operator -> (); + } + + bool source (const char * s); + bool output (const char * s); + + // Inplace configurations are applied immediately when parsed. + // This is very bad: first, re2c behaviour is changed in the middle + // of the block; second, config is resynced too often (every + // attempt to read config that has been updated results in + // automatic resync). It is much better to set all options at once. + bool set_encoding (Enc::type_t t) { return useropt->encoding.set (t); } + void unset_encoding (Enc::type_t t) { useropt->encoding.unset (t); } + void set_encoding_policy (Enc::policy_t p) { useropt->encoding.setPolicy (p); } + void set_input_api (InputAPI::type_t t) { useropt->input_api.set (t); } +#define OPT1 OPT +#define OPT(type, name, value) void set_##name (type arg) { useropt->name = arg; } + RE2C_OPTS +#undef OPT1 +#undef OPT + + // helpers + std::string yychConversion () + { + return realopt->yychConversion + ? "(" + realopt->yyctype + ")" + : ""; + } + + // bad temporary hacks, should be fixed by proper scoping of config (parts). + void reset_encoding (const Enc & enc); + void reset_mapCodeName (); + + FORBID_COPY (Opt); +}; + +enum parse_opts_t +{ + OK, + EXIT_OK, + EXIT_FAIL +}; + +parse_opts_t parse_opts (char ** argv, Opt & opts); + +} // namespace re2c + +#endif // _RE2C_CONF_OPT_ diff --git a/tools/re2c/src/conf/parse_opts.cc b/tools/re2c/src/conf/parse_opts.cc new file mode 100644 index 000000000..9eb07989e --- /dev/null +++ b/tools/re2c/src/conf/parse_opts.cc @@ -0,0 +1,2846 @@ +/* Generated by re2c 0.16 on Thu Jan 21 10:47:47 2016 */ +#line 1 "../src/conf/parse_opts.re" +#include "src/codegen/input_api.h" +#include "src/conf/msg.h" +#include "src/conf/opt.h" +#include "src/conf/warn.h" +#include "src/globals.h" +#include "src/ir/regexp/empty_class_policy.h" +#include "src/ir/regexp/encoding/enc.h" + +namespace re2c +{ + +static inline bool next (char * & arg, char ** & argv) +{ + arg = *++argv; + return arg != NULL; +} + +parse_opts_t parse_opts (char ** argv, Opt & opts) +{ +#define YYCTYPE unsigned char + char * YYCURSOR; + char * YYMARKER; + Warn::option_t option; + +#line 31 "../src/conf/parse_opts.re" + + +opt: + if (!next (YYCURSOR, argv)) + { + goto end; + } + +#line 37 "src/conf/parse_opts.cc" +{ + YYCTYPE yych; + unsigned int yyaccept = 0; + static const unsigned char yybm[] = { + 0, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + }; + yych = (YYCTYPE)*YYCURSOR; + if (yych <= 0x00) goto yy2; + if (yych == '-') goto yy5; + goto yy4; +yy2: + ++YYCURSOR; +yy3: +#line 40 "../src/conf/parse_opts.re" + { + error ("bad option: %s", *argv); + return EXIT_FAIL; + } +#line 87 "src/conf/parse_opts.cc" +yy4: + yyaccept = 0; + yych = (YYCTYPE)*(YYMARKER = ++YYCURSOR); + goto yy10; +yy5: + ++YYCURSOR; + if ((yych = (YYCTYPE)*YYCURSOR) <= '-') { + if (yych <= 0x00) goto yy11; + if (yych >= '-') goto yy13; + } else { + if (yych == 'W') goto yy15; + } +#line 63 "../src/conf/parse_opts.re" + { goto opt_short; } +#line 102 "src/conf/parse_opts.cc" +yy7: + ++YYCURSOR; +#line 61 "../src/conf/parse_opts.re" + { if (!opts.source (*argv)) return EXIT_FAIL; goto opt; } +#line 107 "src/conf/parse_opts.cc" +yy9: + ++YYCURSOR; + yych = (YYCTYPE)*YYCURSOR; +yy10: + if (yybm[0+yych] & 128) { + goto yy9; + } + goto yy7; +yy11: + ++YYCURSOR; +#line 60 "../src/conf/parse_opts.re" + { if (!opts.source ("")) return EXIT_FAIL; goto opt; } +#line 120 "src/conf/parse_opts.cc" +yy13: + ++YYCURSOR; + if ((yych = (YYCTYPE)*YYCURSOR) <= 0x00) goto yy17; +#line 64 "../src/conf/parse_opts.re" + { goto opt_long; } +#line 126 "src/conf/parse_opts.cc" +yy15: + yyaccept = 1; + yych = (YYCTYPE)*(YYMARKER = ++YYCURSOR); + if (yych <= 'e') { + if (yych <= 0x00) goto yy19; + if (yych >= 'e') goto yy21; + } else { + if (yych == 'n') goto yy23; + } +yy16: +#line 68 "../src/conf/parse_opts.re" + { option = Warn::W; goto opt_warn; } +#line 139 "src/conf/parse_opts.cc" +yy17: + ++YYCURSOR; +#line 46 "../src/conf/parse_opts.re" + { + // all remaining arguments are non-options + // so they must be input files + // re2c expects exactly one input file + for (char * f; next (f, argv);) + { + if (!opts.source (f)) + { + return EXIT_FAIL; + } + } + goto end; + } +#line 156 "src/conf/parse_opts.cc" +yy19: + ++YYCURSOR; +#line 66 "../src/conf/parse_opts.re" + { warn.set_all (); goto opt; } +#line 161 "src/conf/parse_opts.cc" +yy21: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'r') goto yy24; +yy22: + YYCURSOR = YYMARKER; + if (yyaccept <= 1) { + if (yyaccept == 0) { + goto yy3; + } else { + goto yy16; + } + } else { + goto yy28; + } +yy23: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'o') goto yy25; + goto yy22; +yy24: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'r') goto yy26; + goto yy22; +yy25: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == '-') goto yy27; + goto yy22; +yy26: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'o') goto yy29; + goto yy22; +yy27: + yyaccept = 2; + yych = (YYCTYPE)*(YYMARKER = ++YYCURSOR); + if (yych == 'e') goto yy30; +yy28: +#line 69 "../src/conf/parse_opts.re" + { option = Warn::WNO; goto opt_warn; } +#line 199 "src/conf/parse_opts.cc" +yy29: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'r') goto yy31; + goto yy22; +yy30: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'r') goto yy32; + goto yy22; +yy31: + yych = (YYCTYPE)*++YYCURSOR; + if (yych <= 0x00) goto yy33; + if (yych == '-') goto yy35; + goto yy22; +yy32: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'r') goto yy37; + goto yy22; +yy33: + ++YYCURSOR; +#line 67 "../src/conf/parse_opts.re" + { warn.set_all_error (); goto opt; } +#line 221 "src/conf/parse_opts.cc" +yy35: + ++YYCURSOR; +#line 70 "../src/conf/parse_opts.re" + { option = Warn::WERROR; goto opt_warn; } +#line 226 "src/conf/parse_opts.cc" +yy37: + yych = (YYCTYPE)*++YYCURSOR; + if (yych != 'o') goto yy22; + yych = (YYCTYPE)*++YYCURSOR; + if (yych != 'r') goto yy22; + yych = (YYCTYPE)*++YYCURSOR; + if (yych != '-') goto yy22; + ++YYCURSOR; +#line 71 "../src/conf/parse_opts.re" + { option = Warn::WNOERROR; goto opt_warn; } +#line 237 "src/conf/parse_opts.cc" +} +#line 72 "../src/conf/parse_opts.re" + + +opt_warn: + +#line 244 "src/conf/parse_opts.cc" +{ + YYCTYPE yych; + yych = (YYCTYPE)*YYCURSOR; + switch (yych) { + case 'c': goto yy46; + case 'e': goto yy47; + case 'm': goto yy48; + case 's': goto yy49; + case 'u': goto yy50; + default: goto yy44; + } +yy44: + ++YYCURSOR; +yy45: +#line 77 "../src/conf/parse_opts.re" + { + error ("bad warning: %s", *argv); + return EXIT_FAIL; + } +#line 264 "src/conf/parse_opts.cc" +yy46: + yych = (YYCTYPE)*(YYMARKER = ++YYCURSOR); + if (yych == 'o') goto yy51; + goto yy45; +yy47: + yych = (YYCTYPE)*(YYMARKER = ++YYCURSOR); + if (yych == 'm') goto yy53; + goto yy45; +yy48: + yych = (YYCTYPE)*(YYMARKER = ++YYCURSOR); + if (yych == 'a') goto yy54; + goto yy45; +yy49: + yych = (YYCTYPE)*(YYMARKER = ++YYCURSOR); + if (yych == 'w') goto yy55; + goto yy45; +yy50: + yych = (YYCTYPE)*(YYMARKER = ++YYCURSOR); + if (yych == 'n') goto yy56; + if (yych == 's') goto yy57; + goto yy45; +yy51: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'n') goto yy58; +yy52: + YYCURSOR = YYMARKER; + goto yy45; +yy53: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'p') goto yy59; + goto yy52; +yy54: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 't') goto yy60; + goto yy52; +yy55: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'a') goto yy61; + goto yy52; +yy56: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'd') goto yy62; + if (yych == 'r') goto yy63; + goto yy52; +yy57: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'e') goto yy64; + goto yy52; +yy58: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'd') goto yy65; + goto yy52; +yy59: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 't') goto yy66; + goto yy52; +yy60: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'c') goto yy67; + goto yy52; +yy61: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'p') goto yy68; + goto yy52; +yy62: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'e') goto yy69; + goto yy52; +yy63: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'e') goto yy70; + goto yy52; +yy64: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'l') goto yy71; + goto yy52; +yy65: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'i') goto yy72; + goto yy52; +yy66: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'y') goto yy73; + goto yy52; +yy67: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'h') goto yy74; + goto yy52; +yy68: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'p') goto yy75; + goto yy52; +yy69: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'f') goto yy76; + goto yy52; +yy70: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'a') goto yy77; + goto yy52; +yy71: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'e') goto yy78; + goto yy52; +yy72: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 't') goto yy79; + goto yy52; +yy73: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == '-') goto yy80; + goto yy52; +yy74: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == '-') goto yy81; + goto yy52; +yy75: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'e') goto yy82; + goto yy52; +yy76: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'i') goto yy83; + goto yy52; +yy77: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'c') goto yy84; + goto yy52; +yy78: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 's') goto yy85; + goto yy52; +yy79: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'i') goto yy86; + goto yy52; +yy80: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'c') goto yy87; + goto yy52; +yy81: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'e') goto yy88; + goto yy52; +yy82: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'd') goto yy89; + goto yy52; +yy83: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'n') goto yy90; + goto yy52; +yy84: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'h') goto yy91; + goto yy52; +yy85: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 's') goto yy92; + goto yy52; +yy86: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'o') goto yy93; + goto yy52; +yy87: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'h') goto yy94; + goto yy52; +yy88: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'm') goto yy95; + goto yy52; +yy89: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == '-') goto yy96; + goto yy52; +yy90: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'e') goto yy97; + goto yy52; +yy91: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'a') goto yy98; + goto yy52; +yy92: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == '-') goto yy99; + goto yy52; +yy93: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'n') goto yy100; + goto yy52; +yy94: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'a') goto yy101; + goto yy52; +yy95: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'p') goto yy102; + goto yy52; +yy96: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'r') goto yy103; + goto yy52; +yy97: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'd') goto yy104; + goto yy52; +yy98: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'b') goto yy105; + goto yy52; +yy99: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'e') goto yy106; + goto yy52; +yy100: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == '-') goto yy107; + goto yy52; +yy101: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'r') goto yy108; + goto yy52; +yy102: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 't') goto yy109; + goto yy52; +yy103: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'a') goto yy110; + goto yy52; +yy104: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == '-') goto yy111; + goto yy52; +yy105: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'l') goto yy112; + goto yy52; +yy106: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 's') goto yy113; + goto yy52; +yy107: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'o') goto yy114; + goto yy52; +yy108: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'a') goto yy115; + goto yy52; +yy109: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'y') goto yy116; + goto yy52; +yy110: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'n') goto yy117; + goto yy52; +yy111: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'c') goto yy118; + goto yy52; +yy112: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'e') goto yy119; + goto yy52; +yy113: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'c') goto yy120; + goto yy52; +yy114: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'r') goto yy121; + goto yy52; +yy115: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'c') goto yy122; + goto yy52; +yy116: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == '-') goto yy123; + goto yy52; +yy117: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'g') goto yy124; + goto yy52; +yy118: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'o') goto yy125; + goto yy52; +yy119: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == '-') goto yy126; + goto yy52; +yy120: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'a') goto yy127; + goto yy52; +yy121: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'd') goto yy128; + goto yy52; +yy122: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 't') goto yy129; + goto yy52; +yy123: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 's') goto yy130; + goto yy52; +yy124: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'e') goto yy131; + goto yy52; +yy125: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'n') goto yy132; + goto yy52; +yy126: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'r') goto yy133; + goto yy52; +yy127: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'p') goto yy134; + goto yy52; +yy128: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'e') goto yy135; + goto yy52; +yy129: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'e') goto yy136; + goto yy52; +yy130: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 't') goto yy137; + goto yy52; +yy131: + yych = (YYCTYPE)*++YYCURSOR; + if (yych <= 0x00) goto yy138; + goto yy52; +yy132: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 't') goto yy140; + goto yy52; +yy133: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'u') goto yy141; + goto yy52; +yy134: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'e') goto yy142; + goto yy52; +yy135: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'r') goto yy143; + goto yy52; +yy136: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'r') goto yy144; + goto yy52; +yy137: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'r') goto yy145; + goto yy52; +yy138: + ++YYCURSOR; +#line 84 "../src/conf/parse_opts.re" + { warn.set (Warn::SWAPPED_RANGE, option); goto opt; } +#line 637 "src/conf/parse_opts.cc" +yy140: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'r') goto yy146; + goto yy52; +yy141: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'l') goto yy147; + goto yy52; +yy142: + yych = (YYCTYPE)*++YYCURSOR; + if (yych <= 0x00) goto yy148; + goto yy52; +yy143: + yych = (YYCTYPE)*++YYCURSOR; + if (yych <= 0x00) goto yy150; + goto yy52; +yy144: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == '-') goto yy152; + goto yy52; +yy145: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'i') goto yy153; + goto yy52; +yy146: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'o') goto yy154; + goto yy52; +yy147: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'e') goto yy155; + goto yy52; +yy148: + ++YYCURSOR; +#line 87 "../src/conf/parse_opts.re" + { warn.set (Warn::USELESS_ESCAPE, option); goto opt; } +#line 674 "src/conf/parse_opts.cc" +yy150: + ++YYCURSOR; +#line 81 "../src/conf/parse_opts.re" + { warn.set (Warn::CONDITION_ORDER, option); goto opt; } +#line 679 "src/conf/parse_opts.cc" +yy152: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'c') goto yy156; + goto yy52; +yy153: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'n') goto yy157; + goto yy52; +yy154: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'l') goto yy158; + goto yy52; +yy155: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 's') goto yy159; + goto yy52; +yy156: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'l') goto yy160; + goto yy52; +yy157: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'g') goto yy161; + goto yy52; +yy158: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == '-') goto yy162; + goto yy52; +yy159: + yych = (YYCTYPE)*++YYCURSOR; + if (yych <= 0x00) goto yy163; + goto yy52; +yy160: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'a') goto yy165; + goto yy52; +yy161: + yych = (YYCTYPE)*++YYCURSOR; + if (yych <= 0x00) goto yy166; + goto yy52; +yy162: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'f') goto yy168; + goto yy52; +yy163: + ++YYCURSOR; +#line 86 "../src/conf/parse_opts.re" + { warn.set (Warn::UNREACHABLE_RULES, option); goto opt; } +#line 728 "src/conf/parse_opts.cc" +yy165: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 's') goto yy169; + goto yy52; +yy166: + ++YYCURSOR; +#line 83 "../src/conf/parse_opts.re" + { warn.set (Warn::MATCH_EMPTY_STRING, option); goto opt; } +#line 737 "src/conf/parse_opts.cc" +yy168: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'l') goto yy170; + goto yy52; +yy169: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 's') goto yy171; + goto yy52; +yy170: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'o') goto yy172; + goto yy52; +yy171: + yych = (YYCTYPE)*++YYCURSOR; + if (yych <= 0x00) goto yy173; + goto yy52; +yy172: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'w') goto yy175; + goto yy52; +yy173: + ++YYCURSOR; +#line 82 "../src/conf/parse_opts.re" + { warn.set (Warn::EMPTY_CHARACTER_CLASS, option); goto opt; } +#line 762 "src/conf/parse_opts.cc" +yy175: + yych = (YYCTYPE)*++YYCURSOR; + if (yych >= 0x01) goto yy52; + ++YYCURSOR; +#line 85 "../src/conf/parse_opts.re" + { warn.set (Warn::UNDEFINED_CONTROL_FLOW, option); goto opt; } +#line 769 "src/conf/parse_opts.cc" +} +#line 88 "../src/conf/parse_opts.re" + + +opt_short: + +#line 776 "src/conf/parse_opts.cc" +{ + YYCTYPE yych; + yych = (YYCTYPE)*YYCURSOR; + if (yych <= 'b') { + if (yych <= 'C') { + if (yych <= '7') { + if (yych <= 0x00) goto yy180; + if (yych == '1') goto yy184; + goto yy182; + } else { + if (yych <= '8') goto yy186; + if (yych == '?') goto yy188; + goto yy182; + } + } else { + if (yych <= 'R') { + if (yych <= 'D') goto yy190; + if (yych == 'F') goto yy192; + goto yy182; + } else { + if (yych <= 'U') { + if (yych <= 'S') goto yy194; + goto yy182; + } else { + if (yych <= 'V') goto yy196; + if (yych <= 'a') goto yy182; + goto yy198; + } + } + } + } else { + if (yych <= 'o') { + if (yych <= 'f') { + if (yych <= 'c') goto yy200; + if (yych <= 'd') goto yy202; + if (yych <= 'e') goto yy204; + goto yy206; + } else { + if (yych <= 'h') { + if (yych <= 'g') goto yy208; + goto yy188; + } else { + if (yych <= 'i') goto yy210; + if (yych <= 'n') goto yy182; + goto yy212; + } + } + } else { + if (yych <= 't') { + if (yych <= 'q') goto yy182; + if (yych <= 'r') goto yy214; + if (yych <= 's') goto yy216; + goto yy218; + } else { + if (yych <= 'v') { + if (yych <= 'u') goto yy220; + goto yy222; + } else { + if (yych <= 'w') goto yy224; + if (yych <= 'x') goto yy226; + goto yy182; + } + } + } + } +yy180: + ++YYCURSOR; +#line 97 "../src/conf/parse_opts.re" + { goto opt; } +#line 846 "src/conf/parse_opts.cc" +yy182: + ++YYCURSOR; +#line 93 "../src/conf/parse_opts.re" + { + error ("bad short option: %s", *argv); + return EXIT_FAIL; + } +#line 854 "src/conf/parse_opts.cc" +yy184: + ++YYCURSOR; +#line 121 "../src/conf/parse_opts.re" + { goto opt_short; } +#line 859 "src/conf/parse_opts.cc" +yy186: + ++YYCURSOR; +#line 116 "../src/conf/parse_opts.re" + { if (!opts.set_encoding (Enc::UTF8)) { error_encoding (); return EXIT_FAIL; } goto opt_short; } +#line 864 "src/conf/parse_opts.cc" +yy188: + ++YYCURSOR; +#line 98 "../src/conf/parse_opts.re" + { usage (); return EXIT_OK; } +#line 869 "src/conf/parse_opts.cc" +yy190: + ++YYCURSOR; +#line 104 "../src/conf/parse_opts.re" + { opts.set_target (opt_t::DOT); goto opt_short; } +#line 874 "src/conf/parse_opts.cc" +yy192: + ++YYCURSOR; +#line 106 "../src/conf/parse_opts.re" + { opts.set_FFlag (true); goto opt_short; } +#line 879 "src/conf/parse_opts.cc" +yy194: + ++YYCURSOR; +#line 111 "../src/conf/parse_opts.re" + { opts.set_target (opt_t::SKELETON); goto opt_short; } +#line 884 "src/conf/parse_opts.cc" +yy196: + ++YYCURSOR; +#line 100 "../src/conf/parse_opts.re" + { vernum (); return EXIT_OK; } +#line 889 "src/conf/parse_opts.cc" +yy198: + ++YYCURSOR; +#line 101 "../src/conf/parse_opts.re" + { opts.set_bFlag (true); goto opt_short; } +#line 894 "src/conf/parse_opts.cc" +yy200: + ++YYCURSOR; +#line 102 "../src/conf/parse_opts.re" + { opts.set_cFlag (true); goto opt_short; } +#line 899 "src/conf/parse_opts.cc" +yy202: + ++YYCURSOR; +#line 103 "../src/conf/parse_opts.re" + { opts.set_dFlag (true); goto opt_short; } +#line 904 "src/conf/parse_opts.cc" +yy204: + ++YYCURSOR; +#line 112 "../src/conf/parse_opts.re" + { if (!opts.set_encoding (Enc::EBCDIC)) { error_encoding (); return EXIT_FAIL; } goto opt_short; } +#line 909 "src/conf/parse_opts.cc" +yy206: + ++YYCURSOR; +#line 105 "../src/conf/parse_opts.re" + { opts.set_fFlag (true); goto opt_short; } +#line 914 "src/conf/parse_opts.cc" +yy208: + ++YYCURSOR; +#line 107 "../src/conf/parse_opts.re" + { opts.set_gFlag (true); goto opt_short; } +#line 919 "src/conf/parse_opts.cc" +yy210: + ++YYCURSOR; +#line 108 "../src/conf/parse_opts.re" + { opts.set_iFlag (true); goto opt_short; } +#line 924 "src/conf/parse_opts.cc" +yy212: + ++YYCURSOR; + if ((yych = (YYCTYPE)*YYCURSOR) <= 0x00) goto yy228; +#line 118 "../src/conf/parse_opts.re" + { *argv = YYCURSOR; goto opt_output; } +#line 930 "src/conf/parse_opts.cc" +yy214: + ++YYCURSOR; +#line 109 "../src/conf/parse_opts.re" + { opts.set_rFlag (true); goto opt_short; } +#line 935 "src/conf/parse_opts.cc" +yy216: + ++YYCURSOR; +#line 110 "../src/conf/parse_opts.re" + { opts.set_sFlag (true); goto opt_short; } +#line 940 "src/conf/parse_opts.cc" +yy218: + ++YYCURSOR; + if ((yych = (YYCTYPE)*YYCURSOR) <= 0x00) goto yy230; +#line 120 "../src/conf/parse_opts.re" + { *argv = YYCURSOR; goto opt_header; } +#line 946 "src/conf/parse_opts.cc" +yy220: + ++YYCURSOR; +#line 113 "../src/conf/parse_opts.re" + { if (!opts.set_encoding (Enc::UTF32)) { error_encoding (); return EXIT_FAIL; } goto opt_short; } +#line 951 "src/conf/parse_opts.cc" +yy222: + ++YYCURSOR; +#line 99 "../src/conf/parse_opts.re" + { version (); return EXIT_OK; } +#line 956 "src/conf/parse_opts.cc" +yy224: + ++YYCURSOR; +#line 114 "../src/conf/parse_opts.re" + { if (!opts.set_encoding (Enc::UCS2)) { error_encoding (); return EXIT_FAIL; } goto opt_short; } +#line 961 "src/conf/parse_opts.cc" +yy226: + ++YYCURSOR; +#line 115 "../src/conf/parse_opts.re" + { if (!opts.set_encoding (Enc::UTF16)) { error_encoding (); return EXIT_FAIL; } goto opt_short; } +#line 966 "src/conf/parse_opts.cc" +yy228: + ++YYCURSOR; +#line 117 "../src/conf/parse_opts.re" + { if (!next (YYCURSOR, argv)) { error_arg ("-o, --output"); return EXIT_FAIL; } goto opt_output; } +#line 971 "src/conf/parse_opts.cc" +yy230: + ++YYCURSOR; +#line 119 "../src/conf/parse_opts.re" + { if (!next (YYCURSOR, argv)) { error_arg ("-t, --type-header"); return EXIT_FAIL; } goto opt_header; } +#line 976 "src/conf/parse_opts.cc" +} +#line 122 "../src/conf/parse_opts.re" + + +opt_long: + +#line 983 "src/conf/parse_opts.cc" +{ + YYCTYPE yych; + yych = (YYCTYPE)*YYCURSOR; + switch (yych) { + case 'b': goto yy236; + case 'c': goto yy237; + case 'd': goto yy238; + case 'e': goto yy239; + case 'f': goto yy240; + case 'h': goto yy241; + case 'i': goto yy242; + case 'n': goto yy243; + case 'o': goto yy244; + case 'r': goto yy245; + case 's': goto yy246; + case 't': goto yy247; + case 'u': goto yy248; + case 'v': goto yy249; + case 'w': goto yy250; + default: goto yy234; + } +yy234: + ++YYCURSOR; +yy235: +#line 127 "../src/conf/parse_opts.re" + { + error ("bad long option: %s", *argv); + return EXIT_FAIL; + } +#line 1013 "src/conf/parse_opts.cc" +yy236: + yych = (YYCTYPE)*(YYMARKER = ++YYCURSOR); + if (yych == 'i') goto yy251; + goto yy235; +yy237: + yych = (YYCTYPE)*(YYMARKER = ++YYCURSOR); + if (yych == 'a') goto yy253; + if (yych == 'o') goto yy254; + goto yy235; +yy238: + yych = (YYCTYPE)*(YYMARKER = ++YYCURSOR); + if (yych <= 'd') goto yy235; + if (yych <= 'e') goto yy255; + if (yych <= 'f') goto yy256; + goto yy235; +yy239: + yych = (YYCTYPE)*(YYMARKER = ++YYCURSOR); + if (yych <= 'l') { + if (yych == 'c') goto yy257; + goto yy235; + } else { + if (yych <= 'm') goto yy258; + if (yych <= 'n') goto yy259; + goto yy235; + } +yy240: + yych = (YYCTYPE)*(YYMARKER = ++YYCURSOR); + if (yych == 'l') goto yy260; + goto yy235; +yy241: + yych = (YYCTYPE)*(YYMARKER = ++YYCURSOR); + if (yych == 'e') goto yy261; + goto yy235; +yy242: + yych = (YYCTYPE)*(YYMARKER = ++YYCURSOR); + if (yych == 'n') goto yy262; + goto yy235; +yy243: + yych = (YYCTYPE)*(YYMARKER = ++YYCURSOR); + if (yych == 'e') goto yy263; + if (yych == 'o') goto yy264; + goto yy235; +yy244: + yych = (YYCTYPE)*(YYMARKER = ++YYCURSOR); + if (yych == 'u') goto yy265; + goto yy235; +yy245: + yych = (YYCTYPE)*(YYMARKER = ++YYCURSOR); + if (yych == 'e') goto yy266; + goto yy235; +yy246: + yych = (YYCTYPE)*(YYMARKER = ++YYCURSOR); + if (yych <= 'j') { + if (yych == 'i') goto yy267; + goto yy235; + } else { + if (yych <= 'k') goto yy268; + if (yych == 't') goto yy269; + goto yy235; + } +yy247: + yych = (YYCTYPE)*(YYMARKER = ++YYCURSOR); + if (yych == 'y') goto yy270; + goto yy235; +yy248: + yych = (YYCTYPE)*(YYMARKER = ++YYCURSOR); + if (yych == 'n') goto yy271; + if (yych == 't') goto yy272; + goto yy235; +yy249: + yych = (YYCTYPE)*(YYMARKER = ++YYCURSOR); + if (yych == 'e') goto yy273; + goto yy235; +yy250: + yych = (YYCTYPE)*(YYMARKER = ++YYCURSOR); + if (yych == 'i') goto yy274; + goto yy235; +yy251: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 't') goto yy275; +yy252: + YYCURSOR = YYMARKER; + goto yy235; +yy253: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 's') goto yy276; + goto yy252; +yy254: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'm') goto yy277; + goto yy252; +yy255: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'b') goto yy278; + goto yy252; +yy256: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'a') goto yy279; + goto yy252; +yy257: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'b') goto yy280; + goto yy252; +yy258: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'i') goto yy281; + if (yych == 'p') goto yy282; + goto yy252; +yy259: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'c') goto yy283; + goto yy252; +yy260: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'e') goto yy284; + goto yy252; +yy261: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'l') goto yy285; + goto yy252; +yy262: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'p') goto yy286; + goto yy252; +yy263: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 's') goto yy287; + goto yy252; +yy264: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == '-') goto yy288; + goto yy252; +yy265: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 't') goto yy289; + goto yy252; +yy266: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'u') goto yy290; + goto yy252; +yy267: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'n') goto yy291; + goto yy252; +yy268: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'e') goto yy292; + goto yy252; +yy269: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'a') goto yy293; + if (yych == 'o') goto yy294; + goto yy252; +yy270: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'p') goto yy295; + goto yy252; +yy271: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'i') goto yy296; + goto yy252; +yy272: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'f') goto yy297; + goto yy252; +yy273: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'r') goto yy298; + goto yy252; +yy274: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'd') goto yy299; + goto yy252; +yy275: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == '-') goto yy300; + goto yy252; +yy276: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'e') goto yy301; + goto yy252; +yy277: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'p') goto yy302; + goto yy252; +yy278: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'u') goto yy303; + goto yy252; +yy279: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == '-') goto yy304; + goto yy252; +yy280: + yych = (YYCTYPE)*++YYCURSOR; + if (yych <= 0x00) goto yy305; + goto yy252; +yy281: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 't') goto yy307; + goto yy252; +yy282: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 't') goto yy308; + goto yy252; +yy283: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'o') goto yy309; + goto yy252; +yy284: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'x') goto yy310; + goto yy252; +yy285: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'p') goto yy311; + goto yy252; +yy286: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'u') goto yy312; + goto yy252; +yy287: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 't') goto yy313; + goto yy252; +yy288: + yych = (YYCTYPE)*++YYCURSOR; + if (yych <= 'f') { + if (yych == 'd') goto yy314; + goto yy252; + } else { + if (yych <= 'g') goto yy315; + if (yych == 'v') goto yy316; + goto yy252; + } +yy289: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'p') goto yy317; + goto yy252; +yy290: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 's') goto yy318; + goto yy252; +yy291: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'g') goto yy319; + goto yy252; +yy292: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'l') goto yy320; + goto yy252; +yy293: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'r') goto yy321; + goto yy252; +yy294: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'r') goto yy322; + goto yy252; +yy295: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'e') goto yy323; + goto yy252; +yy296: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'c') goto yy324; + goto yy252; +yy297: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == '-') goto yy325; + goto yy252; +yy298: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'n') goto yy326; + if (yych == 's') goto yy327; + goto yy252; +yy299: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'e') goto yy328; + goto yy252; +yy300: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'v') goto yy329; + goto yy252; +yy301: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == '-') goto yy330; + goto yy252; +yy302: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'u') goto yy331; + goto yy252; +yy303: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'g') goto yy332; + goto yy252; +yy304: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'm') goto yy333; + goto yy252; +yy305: + ++YYCURSOR; +#line 149 "../src/conf/parse_opts.re" + { if (!opts.set_encoding (Enc::EBCDIC)) { error_encoding (); return EXIT_FAIL; } goto opt; } +#line 1318 "src/conf/parse_opts.cc" +yy307: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == '-') goto yy334; + goto yy252; +yy308: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'y') goto yy335; + goto yy252; +yy309: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'd') goto yy336; + goto yy252; +yy310: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == '-') goto yy337; + goto yy252; +yy311: + yych = (YYCTYPE)*++YYCURSOR; + if (yych <= 0x00) goto yy338; + goto yy252; +yy312: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 't') goto yy340; + goto yy252; +yy313: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'e') goto yy341; + goto yy252; +yy314: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'e') goto yy342; + goto yy252; +yy315: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'e') goto yy343; + goto yy252; +yy316: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'e') goto yy344; + goto yy252; +yy317: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'u') goto yy345; + goto yy252; +yy318: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'a') goto yy346; + goto yy252; +yy319: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'l') goto yy347; + goto yy252; +yy320: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'e') goto yy348; + goto yy252; +yy321: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 't') goto yy349; + goto yy252; +yy322: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'a') goto yy350; + goto yy252; +yy323: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == '-') goto yy351; + goto yy252; +yy324: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'o') goto yy352; + goto yy252; +yy325: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == '1') goto yy353; + if (yych == '8') goto yy354; + goto yy252; +yy326: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'u') goto yy355; + goto yy252; +yy327: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'i') goto yy356; + goto yy252; +yy328: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == '-') goto yy357; + goto yy252; +yy329: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'e') goto yy358; + goto yy252; +yy330: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'i') goto yy359; + goto yy252; +yy331: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 't') goto yy360; + goto yy252; +yy332: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == '-') goto yy361; + goto yy252; +yy333: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'i') goto yy362; + goto yy252; +yy334: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'd') goto yy363; + goto yy252; +yy335: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == '-') goto yy364; + goto yy252; +yy336: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'i') goto yy365; + goto yy252; +yy337: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 's') goto yy366; + goto yy252; +yy338: + ++YYCURSOR; +#line 131 "../src/conf/parse_opts.re" + { usage (); return EXIT_OK; } +#line 1448 "src/conf/parse_opts.cc" +yy340: + yych = (YYCTYPE)*++YYCURSOR; + if (yych <= 0x00) goto yy367; + goto yy252; +yy341: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'd') goto yy369; + goto yy252; +yy342: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'b') goto yy370; + goto yy252; +yy343: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'n') goto yy371; + goto yy252; +yy344: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'r') goto yy372; + goto yy252; +yy345: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 't') goto yy373; + goto yy252; +yy346: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'b') goto yy374; + goto yy252; +yy347: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'e') goto yy375; + goto yy252; +yy348: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 't') goto yy376; + goto yy252; +yy349: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == '-') goto yy377; + goto yy252; +yy350: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'b') goto yy378; + goto yy252; +yy351: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'h') goto yy379; + goto yy252; +yy352: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'd') goto yy380; + goto yy252; +yy353: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == '6') goto yy381; + goto yy252; +yy354: + yych = (YYCTYPE)*++YYCURSOR; + if (yych <= 0x00) goto yy382; + goto yy252; +yy355: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'm') goto yy384; + goto yy252; +yy356: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'o') goto yy385; + goto yy252; +yy357: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'c') goto yy386; + goto yy252; +yy358: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'c') goto yy387; + goto yy252; +yy359: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'n') goto yy388; + goto yy252; +yy360: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'e') goto yy389; + goto yy252; +yy361: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'o') goto yy390; + goto yy252; +yy362: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'n') goto yy391; + goto yy252; +yy363: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'o') goto yy392; + goto yy252; +yy364: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'c') goto yy393; + goto yy252; +yy365: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'n') goto yy394; + goto yy252; +yy366: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'y') goto yy395; + goto yy252; +yy367: + ++YYCURSOR; +#line 157 "../src/conf/parse_opts.re" + { goto opt_input; } +#line 1561 "src/conf/parse_opts.cc" +yy369: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == '-') goto yy396; + goto yy252; +yy370: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'u') goto yy397; + goto yy252; +yy371: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'e') goto yy398; + goto yy252; +yy372: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 's') goto yy399; + goto yy252; +yy373: + yych = (YYCTYPE)*++YYCURSOR; + if (yych <= 0x00) goto yy400; + goto yy252; +yy374: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'l') goto yy402; + goto yy252; +yy375: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == '-') goto yy403; + goto yy252; +yy376: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'o') goto yy404; + goto yy252; +yy377: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'c') goto yy405; + goto yy252; +yy378: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'l') goto yy406; + goto yy252; +yy379: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'e') goto yy407; + goto yy252; +yy380: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'e') goto yy408; + goto yy252; +yy381: + yych = (YYCTYPE)*++YYCURSOR; + if (yych <= 0x00) goto yy409; + goto yy252; +yy382: + ++YYCURSOR; +#line 153 "../src/conf/parse_opts.re" + { if (!opts.set_encoding (Enc::UTF8)) { error_encoding (); return EXIT_FAIL; } goto opt; } +#line 1618 "src/conf/parse_opts.cc" +yy384: + yych = (YYCTYPE)*++YYCURSOR; + if (yych <= 0x00) goto yy411; + goto yy252; +yy385: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'n') goto yy413; + goto yy252; +yy386: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'h') goto yy414; + goto yy252; +yy387: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 't') goto yy415; + goto yy252; +yy388: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 's') goto yy416; + if (yych == 'v') goto yy417; + goto yy252; +yy389: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'd') goto yy418; + goto yy252; +yy390: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'u') goto yy419; + goto yy252; +yy391: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'i') goto yy420; + goto yy252; +yy392: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 't') goto yy421; + goto yy252; +yy393: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'l') goto yy422; + goto yy252; +yy394: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'g') goto yy423; + goto yy252; +yy395: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'n') goto yy424; + goto yy252; +yy396: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'i') goto yy425; + goto yy252; +yy397: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'g') goto yy426; + goto yy252; +yy398: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'r') goto yy427; + goto yy252; +yy399: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'i') goto yy428; + goto yy252; +yy400: + ++YYCURSOR; +#line 154 "../src/conf/parse_opts.re" + { if (!next (YYCURSOR, argv)) { error_arg ("-o, --output"); return EXIT_FAIL; } goto opt_output; } +#line 1688 "src/conf/parse_opts.cc" +yy402: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'e') goto yy429; + goto yy252; +yy403: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'p') goto yy430; + goto yy252; +yy404: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'n') goto yy431; + goto yy252; +yy405: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'o') goto yy432; + goto yy252; +yy406: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'e') goto yy433; + goto yy252; +yy407: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'a') goto yy434; + goto yy252; +yy408: + yych = (YYCTYPE)*++YYCURSOR; + if (yych <= 0x00) goto yy435; + goto yy252; +yy409: + ++YYCURSOR; +#line 152 "../src/conf/parse_opts.re" + { if (!opts.set_encoding (Enc::UTF16)) { error_encoding (); return EXIT_FAIL; } goto opt; } +#line 1721 "src/conf/parse_opts.cc" +yy411: + ++YYCURSOR; +#line 133 "../src/conf/parse_opts.re" + { vernum (); return EXIT_OK; } +#line 1726 "src/conf/parse_opts.cc" +yy413: + yych = (YYCTYPE)*++YYCURSOR; + if (yych <= 0x00) goto yy437; + goto yy252; +yy414: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'a') goto yy439; + goto yy252; +yy415: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'o') goto yy440; + goto yy252; +yy416: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'e') goto yy441; + goto yy252; +yy417: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'e') goto yy442; + goto yy252; +yy418: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == '-') goto yy443; + goto yy252; +yy419: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 't') goto yy444; + goto yy252; +yy420: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'm') goto yy445; + goto yy252; +yy421: + yych = (YYCTYPE)*++YYCURSOR; + if (yych <= 0x00) goto yy446; + goto yy252; +yy422: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'a') goto yy448; + goto yy252; +yy423: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == '-') goto yy449; + goto yy252; +yy424: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 't') goto yy450; + goto yy252; +yy425: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'f') goto yy451; + goto yy252; +yy426: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == '-') goto yy452; + goto yy252; +yy427: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'a') goto yy453; + goto yy252; +yy428: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'o') goto yy454; + goto yy252; +yy429: + yych = (YYCTYPE)*++YYCURSOR; + if (yych <= 0x00) goto yy455; + goto yy252; +yy430: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'a') goto yy457; + goto yy252; +yy431: + yych = (YYCTYPE)*++YYCURSOR; + if (yych <= 0x00) goto yy458; + goto yy252; +yy432: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'n') goto yy460; + goto yy252; +yy433: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == '-') goto yy461; + goto yy252; +yy434: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'd') goto yy462; + goto yy252; +yy435: + ++YYCURSOR; +#line 150 "../src/conf/parse_opts.re" + { if (!opts.set_encoding (Enc::UTF32)) { error_encoding (); return EXIT_FAIL; } goto opt; } +#line 1819 "src/conf/parse_opts.cc" +yy437: + ++YYCURSOR; +#line 132 "../src/conf/parse_opts.re" + { version (); return EXIT_OK; } +#line 1824 "src/conf/parse_opts.cc" +yy439: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'r') goto yy463; + goto yy252; +yy440: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'r') goto yy464; + goto yy252; +yy441: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'n') goto yy465; + goto yy252; +yy442: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'r') goto yy466; + goto yy252; +yy443: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'g') goto yy467; + goto yy252; +yy444: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'p') goto yy468; + goto yy252; +yy445: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'i') goto yy469; + goto yy252; +yy446: + ++YYCURSOR; +#line 137 "../src/conf/parse_opts.re" + { opts.set_target (opt_t::DOT); goto opt; } +#line 1857 "src/conf/parse_opts.cc" +yy448: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 's') goto yy470; + goto yy252; +yy449: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'p') goto yy471; + goto yy252; +yy450: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'a') goto yy472; + goto yy252; +yy451: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 's') goto yy473; + goto yy252; +yy452: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'i') goto yy474; + goto yy252; +yy453: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 't') goto yy475; + goto yy252; +yy454: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'n') goto yy476; + goto yy252; +yy455: + ++YYCURSOR; +#line 142 "../src/conf/parse_opts.re" + { opts.set_rFlag (true); goto opt; } +#line 1890 "src/conf/parse_opts.cc" +yy457: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 's') goto yy477; + goto yy252; +yy458: + ++YYCURSOR; +#line 148 "../src/conf/parse_opts.re" + { opts.set_target (opt_t::SKELETON); goto opt; } +#line 1899 "src/conf/parse_opts.cc" +yy460: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'd') goto yy478; + goto yy252; +yy461: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 's') goto yy479; + goto yy252; +yy462: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'e') goto yy480; + goto yy252; +yy463: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 's') goto yy481; + goto yy252; +yy464: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 's') goto yy482; + goto yy252; +yy465: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 's') goto yy483; + goto yy252; +yy466: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 't') goto yy484; + goto yy252; +yy467: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'o') goto yy485; + goto yy252; +yy468: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'u') goto yy486; + goto yy252; +yy469: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'z') goto yy487; + goto yy252; +yy470: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 's') goto yy488; + goto yy252; +yy471: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'o') goto yy489; + goto yy252; +yy472: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'x') goto yy490; + goto yy252; +yy473: + yych = (YYCTYPE)*++YYCURSOR; + if (yych <= 0x00) goto yy491; + goto yy252; +yy474: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'n') goto yy493; + goto yy252; +yy475: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'i') goto yy494; + goto yy252; +yy476: + yych = (YYCTYPE)*++YYCURSOR; + if (yych <= 0x00) goto yy495; + goto yy252; +yy477: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 's') goto yy497; + goto yy252; +yy478: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'i') goto yy498; + goto yy252; +yy479: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 't') goto yy499; + goto yy252; +yy480: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'r') goto yy500; + goto yy252; +yy481: + yych = (YYCTYPE)*++YYCURSOR; + if (yych <= 0x00) goto yy501; + goto yy252; +yy482: + yych = (YYCTYPE)*++YYCURSOR; + if (yych <= 0x00) goto yy503; + goto yy252; +yy483: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'i') goto yy505; + goto yy252; +yy484: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'e') goto yy506; + goto yy252; +yy485: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 't') goto yy507; + goto yy252; +yy486: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 't') goto yy508; + goto yy252; +yy487: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'a') goto yy509; + goto yy252; +yy488: + yych = (YYCTYPE)*++YYCURSOR; + if (yych <= 0x00) goto yy510; + goto yy252; +yy489: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'l') goto yy512; + goto yy252; +yy490: + yych = (YYCTYPE)*++YYCURSOR; + if (yych <= 0x00) goto yy513; + goto yy252; +yy491: + ++YYCURSOR; +#line 143 "../src/conf/parse_opts.re" + { opts.set_sFlag (true); goto opt; } +#line 2028 "src/conf/parse_opts.cc" +yy493: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'f') goto yy515; + goto yy252; +yy494: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'o') goto yy516; + goto yy252; +yy495: + ++YYCURSOR; +#line 145 "../src/conf/parse_opts.re" + { opts.set_version (false); goto opt; } +#line 2041 "src/conf/parse_opts.cc" +yy497: + yych = (YYCTYPE)*++YYCURSOR; + if (yych <= 0x00) goto yy517; + goto yy252; +yy498: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 't') goto yy519; + goto yy252; +yy499: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'a') goto yy520; + goto yy252; +yy500: + yych = (YYCTYPE)*++YYCURSOR; + if (yych <= 0x00) goto yy521; + goto yy252; +yy501: + ++YYCURSOR; +#line 151 "../src/conf/parse_opts.re" + { if (!opts.set_encoding (Enc::UCS2)) { error_encoding (); return EXIT_FAIL; } goto opt; } +#line 2062 "src/conf/parse_opts.cc" +yy503: + ++YYCURSOR; +#line 134 "../src/conf/parse_opts.re" + { opts.set_bFlag (true); goto opt; } +#line 2067 "src/conf/parse_opts.cc" +yy505: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 't') goto yy523; + goto yy252; +yy506: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'd') goto yy524; + goto yy252; +yy507: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'o') goto yy525; + goto yy252; +yy508: + yych = (YYCTYPE)*++YYCURSOR; + if (yych <= 0x00) goto yy526; + goto yy252; +yy509: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 't') goto yy528; + goto yy252; +yy510: + ++YYCURSOR; +#line 158 "../src/conf/parse_opts.re" + { goto opt_empty_class; } +#line 2092 "src/conf/parse_opts.cc" +yy512: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'i') goto yy529; + goto yy252; +yy513: + ++YYCURSOR; +#line 139 "../src/conf/parse_opts.re" + { opts.set_FFlag (true); goto opt; } +#line 2101 "src/conf/parse_opts.cc" +yy515: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'o') goto yy530; + goto yy252; +yy516: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'n') goto yy531; + goto yy252; +yy517: + ++YYCURSOR; +#line 160 "../src/conf/parse_opts.re" + { goto opt; } +#line 2114 "src/conf/parse_opts.cc" +yy519: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'i') goto yy532; + goto yy252; +yy520: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 't') goto yy533; + goto yy252; +yy521: + ++YYCURSOR; +#line 155 "../src/conf/parse_opts.re" + { if (!next (YYCURSOR, argv)) { error_arg ("-t, --type-header"); return EXIT_FAIL; } goto opt_header; } +#line 2127 "src/conf/parse_opts.cc" +yy523: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'i') goto yy534; + goto yy252; +yy524: + yych = (YYCTYPE)*++YYCURSOR; + if (yych <= 0x00) goto yy535; + goto yy252; +yy525: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 's') goto yy537; + goto yy252; +yy526: + ++YYCURSOR; +#line 136 "../src/conf/parse_opts.re" + { opts.set_dFlag (true); goto opt; } +#line 2144 "src/conf/parse_opts.cc" +yy528: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'i') goto yy538; + goto yy252; +yy529: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'c') goto yy539; + goto yy252; +yy530: + yych = (YYCTYPE)*++YYCURSOR; + if (yych <= 0x00) goto yy540; + goto yy252; +yy531: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == '-') goto yy542; + goto yy252; +yy532: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'o') goto yy543; + goto yy252; +yy533: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'e') goto yy544; + goto yy252; +yy534: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'v') goto yy545; + goto yy252; +yy535: + ++YYCURSOR; +#line 147 "../src/conf/parse_opts.re" + { opts.set_bCaseInverted (true); goto opt; } +#line 2177 "src/conf/parse_opts.cc" +yy537: + yych = (YYCTYPE)*++YYCURSOR; + if (yych <= 0x00) goto yy546; + goto yy252; +yy538: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'o') goto yy548; + goto yy252; +yy539: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'y') goto yy549; + goto yy252; +yy540: + ++YYCURSOR; +#line 141 "../src/conf/parse_opts.re" + { opts.set_iFlag (true); goto opt; } +#line 2194 "src/conf/parse_opts.cc" +yy542: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'd') goto yy550; + goto yy252; +yy543: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'n') goto yy551; + goto yy252; +yy544: + yych = (YYCTYPE)*++YYCURSOR; + if (yych <= 0x00) goto yy552; + goto yy252; +yy545: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'e') goto yy554; + goto yy252; +yy546: + ++YYCURSOR; +#line 140 "../src/conf/parse_opts.re" + { opts.set_gFlag (true); goto opt; } +#line 2215 "src/conf/parse_opts.cc" +yy548: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'n') goto yy555; + goto yy252; +yy549: + yych = (YYCTYPE)*++YYCURSOR; + if (yych <= 0x00) goto yy556; + goto yy252; +yy550: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'a') goto yy558; + goto yy252; +yy551: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 's') goto yy559; + goto yy252; +yy552: + ++YYCURSOR; +#line 138 "../src/conf/parse_opts.re" + { opts.set_fFlag (true); goto opt; } +#line 2236 "src/conf/parse_opts.cc" +yy554: + yych = (YYCTYPE)*++YYCURSOR; + if (yych <= 0x00) goto yy560; + goto yy252; +yy555: + yych = (YYCTYPE)*++YYCURSOR; + if (yych <= 0x00) goto yy562; + goto yy252; +yy556: + ++YYCURSOR; +#line 156 "../src/conf/parse_opts.re" + { goto opt_encoding_policy; } +#line 2249 "src/conf/parse_opts.cc" +yy558: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 't') goto yy564; + goto yy252; +yy559: + yych = (YYCTYPE)*++YYCURSOR; + if (yych <= 0x00) goto yy565; + goto yy252; +yy560: + ++YYCURSOR; +#line 146 "../src/conf/parse_opts.re" + { opts.set_bCaseInsensitive (true); goto opt; } +#line 2262 "src/conf/parse_opts.cc" +yy562: + ++YYCURSOR; +#line 159 "../src/conf/parse_opts.re" + { goto opt_dfa_minimization; } +#line 2267 "src/conf/parse_opts.cc" +yy564: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'e') goto yy567; + goto yy252; +yy565: + ++YYCURSOR; +#line 135 "../src/conf/parse_opts.re" + { opts.set_cFlag (true); goto opt; } +#line 2276 "src/conf/parse_opts.cc" +yy567: + yych = (YYCTYPE)*++YYCURSOR; + if (yych >= 0x01) goto yy252; + ++YYCURSOR; +#line 144 "../src/conf/parse_opts.re" + { opts.set_bNoGenerationDate (true); goto opt; } +#line 2283 "src/conf/parse_opts.cc" +} +#line 161 "../src/conf/parse_opts.re" + + +opt_output: + +#line 2290 "src/conf/parse_opts.cc" +{ + YYCTYPE yych; + static const unsigned char yybm[] = { + 0, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + }; + yych = (YYCTYPE)*YYCURSOR; + if (yych <= 0x00) goto yy572; + if (yych != '-') goto yy574; +yy572: + ++YYCURSOR; +#line 166 "../src/conf/parse_opts.re" + { + error ("bad argument to option -o, --output: %s", *argv); + return EXIT_FAIL; + } +#line 2337 "src/conf/parse_opts.cc" +yy574: + yych = (YYCTYPE)*++YYCURSOR; + goto yy578; +yy575: + ++YYCURSOR; +#line 170 "../src/conf/parse_opts.re" + { if (!opts.output (*argv)) return EXIT_FAIL; goto opt; } +#line 2345 "src/conf/parse_opts.cc" +yy577: + ++YYCURSOR; + yych = (YYCTYPE)*YYCURSOR; +yy578: + if (yybm[0+yych] & 128) { + goto yy577; + } + goto yy575; +} +#line 171 "../src/conf/parse_opts.re" + + +opt_header: + +#line 2360 "src/conf/parse_opts.cc" +{ + YYCTYPE yych; + static const unsigned char yybm[] = { + 0, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + }; + yych = (YYCTYPE)*YYCURSOR; + if (yych <= 0x00) goto yy581; + if (yych != '-') goto yy583; +yy581: + ++YYCURSOR; +#line 176 "../src/conf/parse_opts.re" + { + error ("bad argument to option -t, --type-header: %s", *argv); + return EXIT_FAIL; + } +#line 2407 "src/conf/parse_opts.cc" +yy583: + yych = (YYCTYPE)*++YYCURSOR; + goto yy587; +yy584: + ++YYCURSOR; +#line 180 "../src/conf/parse_opts.re" + { opts.set_header_file (*argv); goto opt; } +#line 2415 "src/conf/parse_opts.cc" +yy586: + ++YYCURSOR; + yych = (YYCTYPE)*YYCURSOR; +yy587: + if (yybm[0+yych] & 128) { + goto yy586; + } + goto yy584; +} +#line 181 "../src/conf/parse_opts.re" + + +opt_encoding_policy: + if (!next (YYCURSOR, argv)) + { + error_arg ("--encoding-policy"); + return EXIT_FAIL; + } + +#line 2435 "src/conf/parse_opts.cc" +{ + YYCTYPE yych; + yych = (YYCTYPE)*YYCURSOR; + if (yych <= 'h') { + if (yych == 'f') goto yy592; + } else { + if (yych <= 'i') goto yy593; + if (yych == 's') goto yy594; + } + ++YYCURSOR; +yy591: +#line 191 "../src/conf/parse_opts.re" + { + error ("bad argument to option --encoding-policy (expected: ignore | substitute | fail): %s", *argv); + return EXIT_FAIL; + } +#line 2452 "src/conf/parse_opts.cc" +yy592: + yych = (YYCTYPE)*(YYMARKER = ++YYCURSOR); + if (yych == 'a') goto yy595; + goto yy591; +yy593: + yych = (YYCTYPE)*(YYMARKER = ++YYCURSOR); + if (yych == 'g') goto yy597; + goto yy591; +yy594: + yych = (YYCTYPE)*(YYMARKER = ++YYCURSOR); + if (yych == 'u') goto yy598; + goto yy591; +yy595: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'i') goto yy599; +yy596: + YYCURSOR = YYMARKER; + goto yy591; +yy597: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'n') goto yy600; + goto yy596; +yy598: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'b') goto yy601; + goto yy596; +yy599: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'l') goto yy602; + goto yy596; +yy600: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'o') goto yy603; + goto yy596; +yy601: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 's') goto yy604; + goto yy596; +yy602: + yych = (YYCTYPE)*++YYCURSOR; + if (yych <= 0x00) goto yy605; + goto yy596; +yy603: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'r') goto yy607; + goto yy596; +yy604: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 't') goto yy608; + goto yy596; +yy605: + ++YYCURSOR; +#line 197 "../src/conf/parse_opts.re" + { opts.set_encoding_policy (Enc::POLICY_FAIL); goto opt; } +#line 2507 "src/conf/parse_opts.cc" +yy607: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'e') goto yy609; + goto yy596; +yy608: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'i') goto yy610; + goto yy596; +yy609: + yych = (YYCTYPE)*++YYCURSOR; + if (yych <= 0x00) goto yy611; + goto yy596; +yy610: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 't') goto yy613; + goto yy596; +yy611: + ++YYCURSOR; +#line 195 "../src/conf/parse_opts.re" + { opts.set_encoding_policy (Enc::POLICY_IGNORE); goto opt; } +#line 2528 "src/conf/parse_opts.cc" +yy613: + yych = (YYCTYPE)*++YYCURSOR; + if (yych != 'u') goto yy596; + yych = (YYCTYPE)*++YYCURSOR; + if (yych != 't') goto yy596; + yych = (YYCTYPE)*++YYCURSOR; + if (yych != 'e') goto yy596; + yych = (YYCTYPE)*++YYCURSOR; + if (yych >= 0x01) goto yy596; + ++YYCURSOR; +#line 196 "../src/conf/parse_opts.re" + { opts.set_encoding_policy (Enc::POLICY_SUBSTITUTE); goto opt; } +#line 2541 "src/conf/parse_opts.cc" +} +#line 198 "../src/conf/parse_opts.re" + + +opt_input: + if (!next (YYCURSOR, argv)) + { + error_arg ("--input"); + return EXIT_FAIL; + } + +#line 2553 "src/conf/parse_opts.cc" +{ + YYCTYPE yych; + yych = (YYCTYPE)*YYCURSOR; + if (yych <= 'b') goto yy621; + if (yych <= 'c') goto yy623; + if (yych <= 'd') goto yy624; +yy621: + ++YYCURSOR; +yy622: +#line 208 "../src/conf/parse_opts.re" + { + error ("bad argument to option --input (expected: default | custom): %s", *argv); + return EXIT_FAIL; + } +#line 2568 "src/conf/parse_opts.cc" +yy623: + yych = (YYCTYPE)*(YYMARKER = ++YYCURSOR); + if (yych == 'u') goto yy625; + goto yy622; +yy624: + yych = (YYCTYPE)*(YYMARKER = ++YYCURSOR); + if (yych == 'e') goto yy627; + goto yy622; +yy625: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 's') goto yy628; +yy626: + YYCURSOR = YYMARKER; + goto yy622; +yy627: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'f') goto yy629; + goto yy626; +yy628: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 't') goto yy630; + goto yy626; +yy629: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'a') goto yy631; + goto yy626; +yy630: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'o') goto yy632; + goto yy626; +yy631: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'u') goto yy633; + goto yy626; +yy632: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'm') goto yy634; + goto yy626; +yy633: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'l') goto yy635; + goto yy626; +yy634: + yych = (YYCTYPE)*++YYCURSOR; + if (yych <= 0x00) goto yy636; + goto yy626; +yy635: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 't') goto yy638; + goto yy626; +yy636: + ++YYCURSOR; +#line 213 "../src/conf/parse_opts.re" + { opts.set_input_api (InputAPI::CUSTOM); goto opt; } +#line 2623 "src/conf/parse_opts.cc" +yy638: + yych = (YYCTYPE)*++YYCURSOR; + if (yych >= 0x01) goto yy626; + ++YYCURSOR; +#line 212 "../src/conf/parse_opts.re" + { opts.set_input_api (InputAPI::DEFAULT); goto opt; } +#line 2630 "src/conf/parse_opts.cc" +} +#line 214 "../src/conf/parse_opts.re" + + +opt_empty_class: + if (!next (YYCURSOR, argv)) + { + error_arg ("--empty-class"); + return EXIT_FAIL; + } + +#line 2642 "src/conf/parse_opts.cc" +{ + YYCTYPE yych; + yych = (YYCTYPE)*YYCURSOR; + if (yych == 'e') goto yy645; + if (yych == 'm') goto yy646; + ++YYCURSOR; +yy644: +#line 224 "../src/conf/parse_opts.re" + { + error ("bad argument to option --empty-class (expected: match-empty | match-none | error): %s", *argv); + return EXIT_FAIL; + } +#line 2655 "src/conf/parse_opts.cc" +yy645: + yych = (YYCTYPE)*(YYMARKER = ++YYCURSOR); + if (yych == 'r') goto yy647; + goto yy644; +yy646: + yych = (YYCTYPE)*(YYMARKER = ++YYCURSOR); + if (yych == 'a') goto yy649; + goto yy644; +yy647: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'r') goto yy650; +yy648: + YYCURSOR = YYMARKER; + goto yy644; +yy649: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 't') goto yy651; + goto yy648; +yy650: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'o') goto yy652; + goto yy648; +yy651: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'c') goto yy653; + goto yy648; +yy652: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'r') goto yy654; + goto yy648; +yy653: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'h') goto yy655; + goto yy648; +yy654: + yych = (YYCTYPE)*++YYCURSOR; + if (yych <= 0x00) goto yy656; + goto yy648; +yy655: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == '-') goto yy658; + goto yy648; +yy656: + ++YYCURSOR; +#line 230 "../src/conf/parse_opts.re" + { opts.set_empty_class_policy (EMPTY_CLASS_ERROR); goto opt; } +#line 2702 "src/conf/parse_opts.cc" +yy658: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'e') goto yy659; + if (yych == 'n') goto yy660; + goto yy648; +yy659: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'm') goto yy661; + goto yy648; +yy660: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'o') goto yy662; + goto yy648; +yy661: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'p') goto yy663; + goto yy648; +yy662: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'n') goto yy664; + goto yy648; +yy663: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 't') goto yy665; + goto yy648; +yy664: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'e') goto yy666; + goto yy648; +yy665: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'y') goto yy667; + goto yy648; +yy666: + yych = (YYCTYPE)*++YYCURSOR; + if (yych <= 0x00) goto yy668; + goto yy648; +yy667: + yych = (YYCTYPE)*++YYCURSOR; + if (yych <= 0x00) goto yy670; + goto yy648; +yy668: + ++YYCURSOR; +#line 229 "../src/conf/parse_opts.re" + { opts.set_empty_class_policy (EMPTY_CLASS_MATCH_NONE); goto opt; } +#line 2748 "src/conf/parse_opts.cc" +yy670: + ++YYCURSOR; +#line 228 "../src/conf/parse_opts.re" + { opts.set_empty_class_policy (EMPTY_CLASS_MATCH_EMPTY); goto opt; } +#line 2753 "src/conf/parse_opts.cc" +} +#line 231 "../src/conf/parse_opts.re" + + +opt_dfa_minimization: + if (!next (YYCURSOR, argv)) + { + error_arg ("--minimization"); + return EXIT_FAIL; + } + +#line 2765 "src/conf/parse_opts.cc" +{ + YYCTYPE yych; + yych = (YYCTYPE)*YYCURSOR; + if (yych == 'm') goto yy676; + if (yych == 't') goto yy677; + ++YYCURSOR; +yy675: +#line 241 "../src/conf/parse_opts.re" + { + error ("bad argument to option --dfa-minimization (expected: table | moore): %s", *argv); + return EXIT_FAIL; + } +#line 2778 "src/conf/parse_opts.cc" +yy676: + yych = (YYCTYPE)*(YYMARKER = ++YYCURSOR); + if (yych == 'o') goto yy678; + goto yy675; +yy677: + yych = (YYCTYPE)*(YYMARKER = ++YYCURSOR); + if (yych == 'a') goto yy680; + goto yy675; +yy678: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'o') goto yy681; +yy679: + YYCURSOR = YYMARKER; + goto yy675; +yy680: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'b') goto yy682; + goto yy679; +yy681: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'r') goto yy683; + goto yy679; +yy682: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'l') goto yy684; + goto yy679; +yy683: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'e') goto yy685; + goto yy679; +yy684: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'e') goto yy686; + goto yy679; +yy685: + yych = (YYCTYPE)*++YYCURSOR; + if (yych <= 0x00) goto yy687; + goto yy679; +yy686: + yych = (YYCTYPE)*++YYCURSOR; + if (yych <= 0x00) goto yy689; + goto yy679; +yy687: + ++YYCURSOR; +#line 246 "../src/conf/parse_opts.re" + { opts.set_dfa_minimization (DFA_MINIMIZATION_MOORE); goto opt; } +#line 2825 "src/conf/parse_opts.cc" +yy689: + ++YYCURSOR; +#line 245 "../src/conf/parse_opts.re" + { opts.set_dfa_minimization (DFA_MINIMIZATION_TABLE); goto opt; } +#line 2830 "src/conf/parse_opts.cc" +} +#line 247 "../src/conf/parse_opts.re" + + +end: + if (!opts.source_file) + { + error ("no source file"); + return EXIT_FAIL; + } + + return OK; + +#undef YYCTYPE +} + +} // namespace re2c diff --git a/tools/re2c/src/conf/parse_opts.re b/tools/re2c/src/conf/parse_opts.re new file mode 100644 index 000000000..5ff3a2175 --- /dev/null +++ b/tools/re2c/src/conf/parse_opts.re @@ -0,0 +1,261 @@ +#include "src/codegen/input_api.h" +#include "src/conf/msg.h" +#include "src/conf/opt.h" +#include "src/conf/warn.h" +#include "src/globals.h" +#include "src/ir/regexp/empty_class_policy.h" +#include "src/ir/regexp/encoding/enc.h" + +namespace re2c +{ + +static inline bool next (char * & arg, char ** & argv) +{ + arg = *++argv; + return arg != NULL; +} + +parse_opts_t parse_opts (char ** argv, Opt & opts) +{ +#define YYCTYPE unsigned char + char * YYCURSOR; + char * YYMARKER; + Warn::option_t option; + +/*!re2c + re2c:yyfill:enable = 0; + re2c:yych:conversion = 1; + + end = "\x00"; + filename = [^\x00-] [^\x00]*; +*/ + +opt: + if (!next (YYCURSOR, argv)) + { + goto end; + } +/*!re2c + * + { + error ("bad option: %s", *argv); + return EXIT_FAIL; + } + + "--" end + { + // all remaining arguments are non-options + // so they must be input files + // re2c expects exactly one input file + for (char * f; next (f, argv);) + { + if (!opts.source (f)) + { + return EXIT_FAIL; + } + } + goto end; + } + + "-" end { if (!opts.source ("")) return EXIT_FAIL; goto opt; } + filename end { if (!opts.source (*argv)) return EXIT_FAIL; goto opt; } + + "-" { goto opt_short; } + "--" { goto opt_long; } + + "-W" end { warn.set_all (); goto opt; } + "-Werror" end { warn.set_all_error (); goto opt; } + "-W" { option = Warn::W; goto opt_warn; } + "-Wno-" { option = Warn::WNO; goto opt_warn; } + "-Werror-" { option = Warn::WERROR; goto opt_warn; } + "-Wno-error-" { option = Warn::WNOERROR; goto opt_warn; } +*/ + +opt_warn: +/*!re2c + * + { + error ("bad warning: %s", *argv); + return EXIT_FAIL; + } + "condition-order" end { warn.set (Warn::CONDITION_ORDER, option); goto opt; } + "empty-character-class" end { warn.set (Warn::EMPTY_CHARACTER_CLASS, option); goto opt; } + "match-empty-string" end { warn.set (Warn::MATCH_EMPTY_STRING, option); goto opt; } + "swapped-range" end { warn.set (Warn::SWAPPED_RANGE, option); goto opt; } + "undefined-control-flow" end { warn.set (Warn::UNDEFINED_CONTROL_FLOW, option); goto opt; } + "unreachable-rules" end { warn.set (Warn::UNREACHABLE_RULES, option); goto opt; } + "useless-escape" end { warn.set (Warn::USELESS_ESCAPE, option); goto opt; } +*/ + +opt_short: +/*!re2c + * + { + error ("bad short option: %s", *argv); + return EXIT_FAIL; + } + end { goto opt; } + [?h] { usage (); return EXIT_OK; } + "v" { version (); return EXIT_OK; } + "V" { vernum (); return EXIT_OK; } + "b" { opts.set_bFlag (true); goto opt_short; } + "c" { opts.set_cFlag (true); goto opt_short; } + "d" { opts.set_dFlag (true); goto opt_short; } + "D" { opts.set_target (opt_t::DOT); goto opt_short; } + "f" { opts.set_fFlag (true); goto opt_short; } + "F" { opts.set_FFlag (true); goto opt_short; } + "g" { opts.set_gFlag (true); goto opt_short; } + "i" { opts.set_iFlag (true); goto opt_short; } + "r" { opts.set_rFlag (true); goto opt_short; } + "s" { opts.set_sFlag (true); goto opt_short; } + "S" { opts.set_target (opt_t::SKELETON); goto opt_short; } + "e" { if (!opts.set_encoding (Enc::EBCDIC)) { error_encoding (); return EXIT_FAIL; } goto opt_short; } + "u" { if (!opts.set_encoding (Enc::UTF32)) { error_encoding (); return EXIT_FAIL; } goto opt_short; } + "w" { if (!opts.set_encoding (Enc::UCS2)) { error_encoding (); return EXIT_FAIL; } goto opt_short; } + "x" { if (!opts.set_encoding (Enc::UTF16)) { error_encoding (); return EXIT_FAIL; } goto opt_short; } + "8" { if (!opts.set_encoding (Enc::UTF8)) { error_encoding (); return EXIT_FAIL; } goto opt_short; } + "o" end { if (!next (YYCURSOR, argv)) { error_arg ("-o, --output"); return EXIT_FAIL; } goto opt_output; } + "o" { *argv = YYCURSOR; goto opt_output; } + "t" end { if (!next (YYCURSOR, argv)) { error_arg ("-t, --type-header"); return EXIT_FAIL; } goto opt_header; } + "t" { *argv = YYCURSOR; goto opt_header; } + "1" { goto opt_short; } // deprecated +*/ + +opt_long: +/*!re2c + * + { + error ("bad long option: %s", *argv); + return EXIT_FAIL; + } + "help" end { usage (); return EXIT_OK; } + "version" end { version (); return EXIT_OK; } + "vernum" end { vernum (); return EXIT_OK; } + "bit-vectors" end { opts.set_bFlag (true); goto opt; } + "start-conditions" end { opts.set_cFlag (true); goto opt; } + "debug-output" end { opts.set_dFlag (true); goto opt; } + "emit-dot" end { opts.set_target (opt_t::DOT); goto opt; } + "storable-state" end { opts.set_fFlag (true); goto opt; } + "flex-syntax" end { opts.set_FFlag (true); goto opt; } + "computed-gotos" end { opts.set_gFlag (true); goto opt; } + "no-debug-info" end { opts.set_iFlag (true); goto opt; } + "reusable" end { opts.set_rFlag (true); goto opt; } + "nested-ifs" end { opts.set_sFlag (true); goto opt; } + "no-generation-date" end { opts.set_bNoGenerationDate (true); goto opt; } + "no-version" end { opts.set_version (false); goto opt; } + "case-insensitive" end { opts.set_bCaseInsensitive (true); goto opt; } + "case-inverted" end { opts.set_bCaseInverted (true); goto opt; } + "skeleton" end { opts.set_target (opt_t::SKELETON); goto opt; } + "ecb" end { if (!opts.set_encoding (Enc::EBCDIC)) { error_encoding (); return EXIT_FAIL; } goto opt; } + "unicode" end { if (!opts.set_encoding (Enc::UTF32)) { error_encoding (); return EXIT_FAIL; } goto opt; } + "wide-chars" end { if (!opts.set_encoding (Enc::UCS2)) { error_encoding (); return EXIT_FAIL; } goto opt; } + "utf-16" end { if (!opts.set_encoding (Enc::UTF16)) { error_encoding (); return EXIT_FAIL; } goto opt; } + "utf-8" end { if (!opts.set_encoding (Enc::UTF8)) { error_encoding (); return EXIT_FAIL; } goto opt; } + "output" end { if (!next (YYCURSOR, argv)) { error_arg ("-o, --output"); return EXIT_FAIL; } goto opt_output; } + "type-header" end { if (!next (YYCURSOR, argv)) { error_arg ("-t, --type-header"); return EXIT_FAIL; } goto opt_header; } + "encoding-policy" end { goto opt_encoding_policy; } + "input" end { goto opt_input; } + "empty-class" end { goto opt_empty_class; } + "dfa-minimization" end { goto opt_dfa_minimization; } + "single-pass" end { goto opt; } // deprecated +*/ + +opt_output: +/*!re2c + * + { + error ("bad argument to option -o, --output: %s", *argv); + return EXIT_FAIL; + } + filename end { if (!opts.output (*argv)) return EXIT_FAIL; goto opt; } +*/ + +opt_header: +/*!re2c + * + { + error ("bad argument to option -t, --type-header: %s", *argv); + return EXIT_FAIL; + } + filename end { opts.set_header_file (*argv); goto opt; } +*/ + +opt_encoding_policy: + if (!next (YYCURSOR, argv)) + { + error_arg ("--encoding-policy"); + return EXIT_FAIL; + } +/*!re2c + * + { + error ("bad argument to option --encoding-policy (expected: ignore | substitute | fail): %s", *argv); + return EXIT_FAIL; + } + "ignore" end { opts.set_encoding_policy (Enc::POLICY_IGNORE); goto opt; } + "substitute" end { opts.set_encoding_policy (Enc::POLICY_SUBSTITUTE); goto opt; } + "fail" end { opts.set_encoding_policy (Enc::POLICY_FAIL); goto opt; } +*/ + +opt_input: + if (!next (YYCURSOR, argv)) + { + error_arg ("--input"); + return EXIT_FAIL; + } +/*!re2c + * + { + error ("bad argument to option --input (expected: default | custom): %s", *argv); + return EXIT_FAIL; + } + "default" end { opts.set_input_api (InputAPI::DEFAULT); goto opt; } + "custom" end { opts.set_input_api (InputAPI::CUSTOM); goto opt; } +*/ + +opt_empty_class: + if (!next (YYCURSOR, argv)) + { + error_arg ("--empty-class"); + return EXIT_FAIL; + } +/*!re2c + * + { + error ("bad argument to option --empty-class (expected: match-empty | match-none | error): %s", *argv); + return EXIT_FAIL; + } + "match-empty" end { opts.set_empty_class_policy (EMPTY_CLASS_MATCH_EMPTY); goto opt; } + "match-none" end { opts.set_empty_class_policy (EMPTY_CLASS_MATCH_NONE); goto opt; } + "error" end { opts.set_empty_class_policy (EMPTY_CLASS_ERROR); goto opt; } +*/ + +opt_dfa_minimization: + if (!next (YYCURSOR, argv)) + { + error_arg ("--minimization"); + return EXIT_FAIL; + } +/*!re2c + * + { + error ("bad argument to option --dfa-minimization (expected: table | moore): %s", *argv); + return EXIT_FAIL; + } + "table" end { opts.set_dfa_minimization (DFA_MINIMIZATION_TABLE); goto opt; } + "moore" end { opts.set_dfa_minimization (DFA_MINIMIZATION_MOORE); goto opt; } +*/ + +end: + if (!opts.source_file) + { + error ("no source file"); + return EXIT_FAIL; + } + + return OK; + +#undef YYCTYPE +} + +} // namespace re2c diff --git a/tools/re2c/src/conf/warn.cc b/tools/re2c/src/conf/warn.cc new file mode 100644 index 000000000..e309f26d6 --- /dev/null +++ b/tools/re2c/src/conf/warn.cc @@ -0,0 +1,200 @@ +#include +#include +#include +#include + +#include "src/conf/msg.h" +#include "src/conf/warn.h" + +namespace re2c { + +Warn warn; + +const uint32_t Warn::SILENT = 0; +const uint32_t Warn::WARNING = 1u << 0; +const uint32_t Warn::ERROR = 1u << 1; + +const char * Warn::names [TYPES] = +{ +#define W(x, y) y + RE2C_WARNING_TYPES +#undef W +}; + +Warn::Warn () + : mask () + , error_accuml (false) +{ + for (uint32_t i = 0; i < TYPES; ++i) + { + mask[i] = SILENT; + } +} + +bool Warn::error () const +{ + return error_accuml; +} + +void Warn::set (type_t t, option_t o) +{ + switch (o) + { + case W: + mask[t] |= WARNING; + break; + case WNO: + mask[t] &= ~WARNING; + break; + case WERROR: + // unlike -Werror, -Werror- implies -W + mask[t] |= (WARNING | ERROR); + break; + case WNOERROR: + mask[t] &= ~ERROR; + break; + } +} + +void Warn::set_all () +{ + for (uint32_t i = 0; i < TYPES; ++i) + { + mask[i] |= WARNING; + } +} + +// -Werror doesn't set any warnings: it only guarantees that if a warning +// has been set by now or will be set later then it will result into error. +void Warn::set_all_error () +{ + for (uint32_t i = 0; i < TYPES; ++i) + { + mask[i] |= ERROR; + } +} + +void Warn::fail (type_t t, uint32_t line, const char * s) +{ + if (mask[t] & WARNING) + { + // -Werror has no effect + warning (names[t], line, false, "%s", s); + } +} + +void Warn::condition_order (uint32_t line) +{ + if (mask[CONDITION_ORDER] & WARNING) + { + const bool e = mask[CONDITION_ORDER] & ERROR; + error_accuml |= e; + warning (names[CONDITION_ORDER], line, e, + "looks like you use hardcoded numbers instead of autogenerated condition names: " + "better add '/*!types:re2c*/' directive or '-t, --type-header' option " + "and don't rely on fixed condition order."); + } +} + +void Warn::empty_class (uint32_t line) +{ + if (mask[EMPTY_CHARACTER_CLASS] & WARNING) + { + const bool e = mask[EMPTY_CHARACTER_CLASS] & ERROR; + error_accuml |= e; + warning (names[EMPTY_CHARACTER_CLASS], line, e, "empty character class"); + } +} + +void Warn::match_empty_string (uint32_t line) +{ + if (mask[MATCH_EMPTY_STRING] & WARNING) + { + const bool e = mask[MATCH_EMPTY_STRING] & ERROR; + error_accuml |= e; + warning (names[MATCH_EMPTY_STRING], line, e, "rule matches empty string"); + } +} + +void Warn::swapped_range (uint32_t line, uint32_t l, uint32_t u) +{ + if (mask[SWAPPED_RANGE] & WARNING) + { + const bool e = mask[SWAPPED_RANGE] & ERROR; + error_accuml |= e; + warning (names[SWAPPED_RANGE], line, e, "range lower bound (0x%X) is greater than upper bound (0x%X), swapping", l, u); + } +} + +void Warn::undefined_control_flow (uint32_t line, const std::string & cond, std::vector & ways, bool overflow) +{ + if (mask[UNDEFINED_CONTROL_FLOW] & WARNING) + { + const bool e = mask[UNDEFINED_CONTROL_FLOW] & ERROR; + error_accuml |= e; + + // report shorter patterns first + std::sort (ways.begin (), ways.end (), cmp_ways); + + warning_start (line, e); + fprintf (stderr, "control flow %sis undefined for strings that match ", incond (cond).c_str ()); + const size_t count = ways.size (); + if (count == 1) + { + fprint_way (stderr, ways[0]); + } + else + { + for (size_t i = 0; i < count; ++i) + { + fprintf (stderr, "\n\t"); + fprint_way (stderr, ways[i]); + } + fprintf (stderr, "\n"); + } + if (overflow) + { + fprintf (stderr, " ... and a few more"); + } + fprintf (stderr, ", use default rule '*'"); + warning_end (names[UNDEFINED_CONTROL_FLOW], e); + } +} + +void Warn::unreachable_rule (const std::string & cond, const rule_info_t & rule, const rules_t & rules) +{ + if (mask[UNREACHABLE_RULES] & WARNING) + { + const bool e = mask[UNREACHABLE_RULES] & ERROR; + error_accuml |= e; + warning_start (rule.line, e); + fprintf (stderr, "unreachable rule %s", incond (cond).c_str ()); + const size_t shadows = rule.shadow.size (); + if (shadows > 0) + { + const char * pl = shadows > 1 + ? "s" + : ""; + std::set::const_iterator i = rule.shadow.begin (); + fprintf (stderr, "(shadowed by rule%s at line%s %u", pl, pl, rules.find (*i)->second.line); + for (++i; i != rule.shadow.end (); ++i) + { + fprintf (stderr, ", %u", rules.find (*i)->second.line); + } + fprintf (stderr, ")"); + } + warning_end (names[UNREACHABLE_RULES], e); + } +} + +void Warn::useless_escape (uint32_t line, uint32_t col, char c) +{ + if (mask[USELESS_ESCAPE] & WARNING) + { + const bool e = mask[USELESS_ESCAPE] & ERROR; + error_accuml |= e; + warning (names[USELESS_ESCAPE], line, e, "column %u: escape has no effect: '\\%c'", col, c); + } +} + +} // namespace re2c diff --git a/tools/re2c/src/conf/warn.h b/tools/re2c/src/conf/warn.h new file mode 100644 index 000000000..fb313b01d --- /dev/null +++ b/tools/re2c/src/conf/warn.h @@ -0,0 +1,67 @@ +#ifndef _RE2C_CONF_WARN_ +#define _RE2C_CONF_WARN_ + +#include "src/util/c99_stdint.h" +#include +#include + +#include "src/ir/skeleton/way.h" +#include "src/parse/rules.h" + +namespace re2c { + +#define RE2C_WARNING_TYPES \ + W (CONDITION_ORDER, "condition-order"), \ + W (EMPTY_CHARACTER_CLASS, "empty-character-class"), \ + W (MATCH_EMPTY_STRING, "match-empty-string"), \ + W (SWAPPED_RANGE, "swapped-range"), \ + W (UNDEFINED_CONTROL_FLOW, "undefined-control-flow"), \ + W (UNREACHABLE_RULES, "unreachable-rules"), \ + W (USELESS_ESCAPE, "useless-escape"), + +class Warn +{ +public: + enum type_t + { +#define W(x, y) x + RE2C_WARNING_TYPES +#undef W + TYPES // count + }; + enum option_t + { + W, + WNO, + WERROR, + WNOERROR + }; + +private: + static const uint32_t SILENT; + static const uint32_t WARNING; + static const uint32_t ERROR; + static const char * names [TYPES]; + uint32_t mask[TYPES]; + bool error_accuml; + +public: + Warn (); + bool error () const; + void set (type_t t, option_t o); + void set_all (); + void set_all_error (); + void fail (type_t t, uint32_t line, const char * s); + + void condition_order (uint32_t line); + void empty_class (uint32_t line); + void match_empty_string (uint32_t line); + void swapped_range (uint32_t line, uint32_t l, uint32_t u); + void undefined_control_flow (uint32_t line, const std::string & cond, std::vector & ways, bool overflow); + void unreachable_rule (const std::string & cond, const rule_info_t & rule, const rules_t & rules); + void useless_escape (uint32_t line, uint32_t col, char c); +}; + +} // namespace re2c + +#endif // _RE2C_CONF_WARN_ diff --git a/tools/re2c/src/globals.h b/tools/re2c/src/globals.h new file mode 100644 index 000000000..b2ea67ae0 --- /dev/null +++ b/tools/re2c/src/globals.h @@ -0,0 +1,24 @@ +#ifndef _RE2C_GLOBALS_ +#define _RE2C_GLOBALS_ + +#include + +#include "src/conf/opt.h" +#include "src/conf/warn.h" +#include "src/util/c99_stdint.h" + +namespace re2c +{ + +extern bool bUsedYYBitmap; +extern bool bWroteGetState; +extern bool bWroteCondCheck; +extern uint32_t last_fill_index; +extern std::string yySetupRule; + +extern Opt opts; +extern Warn warn; + +} // end namespace re2c + +#endif // _RE2C_GLOBALS_ diff --git a/tools/re2c/src/ir/adfa/action.h b/tools/re2c/src/ir/adfa/action.h new file mode 100644 index 000000000..abc2d9907 --- /dev/null +++ b/tools/re2c/src/ir/adfa/action.h @@ -0,0 +1,109 @@ +#ifndef _RE2C_IR_ADFA_ACTION_ +#define _RE2C_IR_ADFA_ACTION_ + +#include + +#include "src/codegen/label.h" +#include "src/util/c99_stdint.h" +#include "src/util/uniq_vector.h" + +namespace re2c +{ + +struct OutputFile; +class RuleOp; +class State; + +struct Initial +{ + label_t label; + bool setMarker; + + inline Initial (label_t l, bool b) + : label (l) + , setMarker (b) + {} +}; + +typedef uniq_vector_t accept_t; + +class Action +{ +public: + enum type_t + { + MATCH, + INITIAL, + SAVE, + MOVE, + ACCEPT, + RULE + } type; + union + { + Initial * initial; + uint32_t save; + const accept_t * accepts; + const RuleOp * rule; + } info; + +public: + inline Action () + : type (MATCH) + , info () + {} + ~Action () + { + clear (); + } + void set_initial (label_t label, bool used_marker) + { + clear (); + type = INITIAL; + info.initial = new Initial (label, used_marker); + } + void set_save (uint32_t save) + { + clear (); + type = SAVE; + info.save = save; + } + void set_move () + { + clear (); + type = MOVE; + } + void set_accept (const accept_t * accepts) + { + clear (); + type = ACCEPT; + info.accepts = accepts; + } + void set_rule (const RuleOp * const rule) + { + clear (); + type = RULE; + info.rule = rule; + } + +private: + void clear () + { + switch (type) + { + case INITIAL: + delete info.initial; + break; + case MATCH: + case SAVE: + case MOVE: + case ACCEPT: + case RULE: + break; + } + } +}; + +} // namespace re2c + +#endif // _RE2C_IR_ADFA_ACTION_ diff --git a/tools/re2c/src/ir/adfa/adfa.cc b/tools/re2c/src/ir/adfa/adfa.cc new file mode 100644 index 000000000..cb41d0351 --- /dev/null +++ b/tools/re2c/src/ir/adfa/adfa.cc @@ -0,0 +1,135 @@ +#include +#include +#include +#include +#include + +#include "src/codegen/go.h" +#include "src/ir/adfa/adfa.h" +#include "src/ir/dfa/dfa.h" +#include "src/ir/skeleton/skeleton.h" +#include "src/util/allocate.h" + +namespace re2c +{ + +DFA::DFA + ( const dfa_t &dfa + , const std::vector &fill + , Skeleton *skel + , const charset_t &charset + , const std::string &n + , const std::string &c + , uint32_t l + ) + : accepts () + , skeleton (skel) + , name (n) + , cond (c) + , line (l) + , lbChar(0) + , ubChar(charset.back()) + , nStates(0) + , head(NULL) + + // statistics + , max_fill (0) + , need_backup (false) + , need_backupctx (false) + , need_accept (false) +{ + const size_t nstates = dfa.states.size(); + const size_t nchars = dfa.nchars; + + State **i2s = new State*[nstates]; + for (size_t i = 0; i < nstates; ++i) + { + i2s[i] = new State; + } + + State **p = &head; + for (size_t i = 0; i < nstates; ++i) + { + dfa_state_t *t = dfa.states[i]; + State *s = i2s[i]; + + ++nStates; + *p = s; + p = &s->next; + + s->isPreCtxt = t->ctx; + s->rule = t->rule; + s->fill = fill[i]; + s->go.span = allocate(nchars); + uint32_t j = 0; + for (uint32_t c = 0; c < nchars; ++j) + { + const size_t to = t->arcs[c]; + for (;++c < nchars && t->arcs[c] == to;); + s->go.span[j].to = to == dfa_t::NIL ? NULL : i2s[to]; + s->go.span[j].ub = charset[c]; + } + s->go.nSpans = j; + } + *p = NULL; + + delete[] i2s; +} + +DFA::~DFA() +{ + State *s; + + while ((s = head)) + { + head = s->next; + delete s; + } + + delete skeleton; +} + +void DFA::reorder() +{ + std::vector ord; + ord.reserve(nStates); + + std::queue todo; + todo.push(head); + + std::set done; + done.insert(head); + + for(;!todo.empty();) + { + State *s = todo.front(); + todo.pop(); + ord.push_back(s); + for(uint32_t i = 0; i < s->go.nSpans; ++i) + { + State *q = s->go.span[i].to; + if(q && done.insert(q).second) + { + todo.push(q); + } + } + } + + assert(nStates == ord.size()); + + ord.push_back(NULL); + for(uint32_t i = 0; i < nStates; ++i) + { + ord[i]->next = ord[i + 1]; + } +} + +void DFA::addState(State *s, State *next) +{ + ++nStates; + s->next = next->next; + next->next = s; +} + +} // namespace re2c + diff --git a/tools/re2c/src/ir/adfa/adfa.h b/tools/re2c/src/ir/adfa/adfa.h new file mode 100644 index 000000000..95f512d63 --- /dev/null +++ b/tools/re2c/src/ir/adfa/adfa.h @@ -0,0 +1,101 @@ +#ifndef _RE2C_IR_ADFA_ADFA_ +#define _RE2C_IR_ADFA_ADFA_ + +#include +#include "src/util/c99_stdint.h" +#include +#include + +#include "src/codegen/go.h" +#include "src/codegen/label.h" +#include "src/ir/adfa/action.h" +#include "src/ir/regexp/regexp.h" +#include "src/util/forbid_copy.h" + +namespace re2c +{ + +struct Skeleton; +struct Output; +struct OutputFile; +struct dfa_t; + +struct State +{ + label_t label; + RuleOp * rule; + State * next; + size_t fill; + + bool isPreCtxt; + bool isBase; + Go go; + Action action; + + State () + : label (label_t::first ()) + , rule (NULL) + , next (0) + , fill (0) + , isPreCtxt (false) + , isBase (false) + , go () + , action () + {} + ~State () + { + operator delete (go.span); + } + + FORBID_COPY (State); +}; + +class DFA +{ + accept_t accepts; + Skeleton * skeleton; + +public: + const std::string name; + const std::string cond; + const uint32_t line; + + uint32_t lbChar; + uint32_t ubChar; + uint32_t nStates; + State * head; + + // statistics + size_t max_fill; + bool need_backup; + bool need_backupctx; + bool need_accept; + +public: + DFA ( const dfa_t &dfa + , const std::vector &fill + , Skeleton *skel + , const charset_t &charset + , const std::string &n + , const std::string &c + , uint32_t l + ); + ~DFA (); + void reorder(); + void prepare(); + void calc_stats(); + void emit (Output &, uint32_t &, bool, bool &); + +private: + void addState(State*, State *); + void split (State *); + void findBaseState (); + void count_used_labels (std::set & used, label_t prolog, label_t start, bool force_start) const; + void emit_body (OutputFile &, uint32_t &, const std::set & used_labels, label_t initial) const; + + FORBID_COPY (DFA); +}; + +} // namespace re2c + +#endif // _RE2C_IR_ADFA_ADFA_ diff --git a/tools/re2c/src/ir/adfa/prepare.cc b/tools/re2c/src/ir/adfa/prepare.cc new file mode 100644 index 000000000..39cf65c1b --- /dev/null +++ b/tools/re2c/src/ir/adfa/prepare.cc @@ -0,0 +1,268 @@ +#include "src/util/c99_stdint.h" +#include +#include + +#include "src/codegen/bitmap.h" +#include "src/codegen/go.h" +#include "src/globals.h" +#include "src/ir/adfa/action.h" +#include "src/ir/adfa/adfa.h" +#include "src/ir/regexp/regexp_rule.h" +#include "src/ir/rule_rank.h" +#include "src/util/allocate.h" + +namespace re2c { + +void DFA::split(State *s) +{ + State *move = new State; + addState(move, s); + move->action.set_move (); + move->rule = s->rule; + move->fill = s->fill; + move->go = s->go; + s->rule = NULL; + s->go.nSpans = 1; + s->go.span = allocate (1); + s->go.span[0].ub = ubChar; + s->go.span[0].to = move; +} + +static uint32_t merge(Span *x0, State *fg, State *bg) +{ + Span *x = x0, *f = fg->go.span, *b = bg->go.span; + uint32_t nf = fg->go.nSpans, nb = bg->go.nSpans; + State *prev = NULL, *to; + // NB: we assume both spans are for same range + + for (;;) + { + if (f->ub == b->ub) + { + to = f->to == b->to ? bg : f->to; + + if (to == prev) + { + --x; + } + else + { + x->to = prev = to; + } + + x->ub = f->ub; + ++x; + ++f; + --nf; + ++b; + --nb; + + if (nf == 0 && nb == 0) + { + return static_cast (x - x0); + } + } + + while (f->ub < b->ub) + { + to = f->to == b->to ? bg : f->to; + + if (to == prev) + { + --x; + } + else + { + x->to = prev = to; + } + + x->ub = f->ub; + ++x; + ++f; + --nf; + } + + while (b->ub < f->ub) + { + to = b->to == f->to ? bg : f->to; + + if (to == prev) + { + --x; + } + else + { + x->to = prev = to; + } + + x->ub = b->ub; + ++x; + ++b; + --nb; + } + } +} + +void DFA::findBaseState() +{ + Span *span = allocate (ubChar - lbChar); + + for (State *s = head; s; s = s->next) + { + if (s->fill == 0) + { + for (uint32_t i = 0; i < s->go.nSpans; ++i) + { + State *to = s->go.span[i].to; + + if (to->isBase) + { + to = to->go.span[0].to; + uint32_t nSpans = merge(span, s, to); + + if (nSpans < s->go.nSpans) + { + operator delete (s->go.span); + s->go.nSpans = nSpans; + s->go.span = allocate (nSpans); + memcpy(s->go.span, span, nSpans*sizeof(Span)); + } + + break; + } + } + } + } + + operator delete (span); +} + +void DFA::prepare () +{ + bUsedYYBitmap = false; + + // create rule states + std::map rules; + for (State * s = head; s; s = s->next) + { + if (s->rule) + { + if (rules.find (s->rule->rank) == rules.end ()) + { + State *n = new State; + n->action.set_rule (s->rule); + rules[s->rule->rank] = n; + addState(n, s); + } + for (uint32_t i = 0; i < s->go.nSpans; ++i) + { + if (!s->go.span[i].to) + { + s->go.span[i].to = rules[s->rule->rank]; + } + } + } + } + + // create default state (if needed) + State * default_state = NULL; + for (State * s = head; s; s = s->next) + { + for (uint32_t i = 0; i < s->go.nSpans; ++i) + { + if (!s->go.span[i].to) + { + if (!default_state) + { + default_state = new State; + addState(default_state, s); + } + s->go.span[i].to = default_state; + } + } + } + + // find backup states and create accept state (if needed) + if (default_state) + { + for (State * s = head; s; s = s->next) + { + if (s->rule) + { + for (uint32_t i = 0; i < s->go.nSpans; ++i) + { + if (!s->go.span[i].to->rule && s->go.span[i].to->action.type != Action::RULE) + { + const uint32_t accept = static_cast (accepts.find_or_add (rules[s->rule->rank])); + s->action.set_save (accept); + } + } + } + } + default_state->action.set_accept (&accepts); + } + + // split ``base'' states into two parts + for (State * s = head; s; s = s->next) + { + s->isBase = false; + + if (s->fill != 0) + { + for (uint32_t i = 0; i < s->go.nSpans; ++i) + { + if (s->go.span[i].to == s) + { + s->isBase = true; + split(s); + + if (opts->bFlag) + { + BitMap::find(&s->next->go, s); + } + + s = s->next; + break; + } + } + } + } + + // find ``base'' state, if possible + findBaseState(); + + for (State * s = head; s; s = s->next) + { + s->go.init (s); + } +} + +void DFA::calc_stats () +{ + // calculate 'YYMAXFILL' + max_fill = 0; + for (State * s = head; s; s = s->next) + { + if (max_fill < s->fill) + { + max_fill = s->fill; + } + } + + // determine if 'YYMARKER' or 'YYBACKUP'/'YYRESTORE' pair is used + need_backup = accepts.size () > 0; + + // determine if 'YYCTXMARKER' or 'YYBACKUPCTX'/'YYRESTORECTX' pair is used + for (State * s = head; s; s = s->next) + { + if (s->isPreCtxt) + { + need_backupctx = true; + } + } + + // determine if 'yyaccept' variable is used + need_accept = accepts.size () > 1; +} + +} // namespace re2c diff --git a/tools/re2c/src/ir/compile.cc b/tools/re2c/src/ir/compile.cc new file mode 100644 index 000000000..b38b398a1 --- /dev/null +++ b/tools/re2c/src/ir/compile.cc @@ -0,0 +1,104 @@ +#include +#include +#include + +#include "src/codegen/output.h" +#include "src/ir/compile.h" +#include "src/ir/adfa/adfa.h" +#include "src/ir/dfa/dfa.h" +#include "src/ir/nfa/nfa.h" +#include "src/ir/regexp/regexp.h" +#include "src/ir/skeleton/skeleton.h" +#include "src/parse/spec.h" + +namespace re2c { + +static std::string make_name(const std::string &cond, uint32_t line) +{ + std::ostringstream os; + os << "line" << line; + std::string name = os.str(); + if (!cond.empty ()) + { + name += "_"; + name += cond; + } + return name; +} + +smart_ptr compile (Spec & spec, Output & output, const std::string & cond, uint32_t cunits) +{ + const uint32_t line = output.source.get_block_line(); + const std::string name = make_name(cond, line); + + // The original set of code units (charset) might be very large. + // A common trick it is to split charset into disjoint character ranges + // and choose a representative of each range (we choose lower bound). + // The set of all representatives is the new (compacted) charset. + // Don't forget to include zero and upper bound, even if they + // do not explicitely apper in ranges. + std::set bounds; + spec.re->split(bounds); + bounds.insert(0); + bounds.insert(cunits); + charset_t cs; + for (std::set::const_iterator i = bounds.begin(); i != bounds.end(); ++i) + { + cs.push_back(*i); + } + + nfa_t nfa(spec.re); + + dfa_t dfa(nfa, cs, spec.rules); + + // skeleton must be constructed after DFA construction + // but prior to any other DFA transformations + Skeleton *skeleton = new Skeleton(dfa, cs, spec.rules, name, cond, line); + + minimization(dfa); + + // find YYFILL states and calculate argument to YYFILL + std::vector fill; + fillpoints(dfa, fill); + + // ADFA stands for 'DFA with actions' + DFA *adfa = new DFA(dfa, fill, skeleton, cs, name, cond, line); + + /* + * note [reordering DFA states] + * + * re2c-generated code depends on the order of states in DFA: simply + * flipping two states may change the output significantly. + * The order of states is affected by many factors, e.g.: + * - flipping left and right subtrees of alternative when constructing + * AST (also applies to iteration and counted repetition) + * - changing the order in which graph nodes are visited (applies to + * any intermediate representation: bytecode, NFA, DFA, etc.) + * + * To make the resulting code independent of such changes, we hereby + * reorder DFA states. The ordering scheme is very simple: + * + * Starting with DFA root, walk DFA nodes in breadth-first order. + * Child nodes are ordered accoding to the (alphabetically) first symbol + * leading to each node. Each node must be visited exactly once. + * Default state (NULL) is always the last state. + */ + adfa->reorder(); + + // skeleton is constructed, do further DFA transformations + adfa->prepare(); + + // finally gather overall DFA statistics + adfa->calc_stats(); + + // accumulate global statistics from this particular DFA + output.max_fill = std::max (output.max_fill, adfa->max_fill); + if (adfa->need_accept) + { + output.source.set_used_yyaccept (); + } + + return make_smart_ptr(adfa); +} + +} // namespace re2c diff --git a/tools/re2c/src/ir/compile.h b/tools/re2c/src/ir/compile.h new file mode 100644 index 000000000..6883c1c3f --- /dev/null +++ b/tools/re2c/src/ir/compile.h @@ -0,0 +1,20 @@ +#ifndef _RE2C_IR_COMPILE_ +#define _RE2C_IR_COMPILE_ + +#include "src/util/c99_stdint.h" +#include + +#include "src/util/smart_ptr.h" + +namespace re2c +{ + +class DFA; +struct Output; +struct Spec; + +smart_ptr compile (Spec & spec, Output & output, const std::string & cond, uint32_t cunits); + +} // namespace re2c + +#endif // _RE2C_IR_COMPILE_ diff --git a/tools/re2c/src/ir/dfa/determinization.cc b/tools/re2c/src/ir/dfa/determinization.cc new file mode 100644 index 000000000..01a04cc7e --- /dev/null +++ b/tools/re2c/src/ir/dfa/determinization.cc @@ -0,0 +1,197 @@ +#include +#include +#include +#include +#include + +#include "src/ir/dfa/dfa.h" +#include "src/ir/nfa/nfa.h" +#include "src/ir/regexp/regexp.h" +#include "src/ir/regexp/regexp_rule.h" +#include "src/ir/rule_rank.h" +#include "src/parse/rules.h" +#include "src/util/ord_hash_set.h" +#include "src/util/range.h" + +namespace re2c +{ + +const size_t dfa_t::NIL = std::numeric_limits::max(); + +/* + * note [marking DFA states] + * + * DFA state is a set of NFA states. + * However, DFA state includes not all NFA states that are in + * epsilon-closure (NFA states that have only epsilon-transitions + * and are not context of final states are omitted). + * The included states are called 'kernel' states. + * + * We mark visited NFA states during closure construction. + * These marks serve two purposes: + * - avoid loops in NFA + * - avoid duplication of NFA states in kernel + * + * Note that after closure construction: + * - all non-kernel states must be unmarked (these states are + * not stored in kernel and it is impossible to unmark them + * afterwards) + * - all kernel states must be marked (because we may later + * extend this kernel with epsilon-closure of another NFA + * state). Kernel states are unmarked later (before finding + * or adding DFA state). + */ +static nfa_state_t **closure(nfa_state_t **cP, nfa_state_t *n) +{ + if (!n->mark) + { + n->mark = true; + switch (n->type) + { + case nfa_state_t::ALT: + cP = closure(cP, n->value.alt.out2); + cP = closure(cP, n->value.alt.out1); + n->mark = false; + break; + case nfa_state_t::CTX: + *(cP++) = n; + cP = closure(cP, n->value.ctx.out); + break; + default: + *(cP++) = n; + break; + } + } + + return cP; +} + +static size_t find_state + ( nfa_state_t **kernel + , nfa_state_t **end + , ord_hash_set_t &kernels + ) +{ + // zero-sized kernel corresponds to default state + if (kernel == end) + { + return dfa_t::NIL; + } + + // see note [marking DFA states] + for (nfa_state_t **p = kernel; p != end; ++p) + { + (*p)->mark = false; + } + + // sort kernel states: we need this to get stable hash + // and to compare states with simple 'memcmp' + std::sort(kernel, end); + const size_t size = static_cast(end - kernel) * sizeof(nfa_state_t*); + return kernels.insert(kernel, size); +} + +dfa_t::dfa_t(const nfa_t &nfa, const charset_t &charset, rules_t &rules) + : states() + , nchars(charset.size() - 1) // (n + 1) bounds for n ranges +{ + std::map > s2rules; + ord_hash_set_t kernels; + nfa_state_t **const buffer = new nfa_state_t*[nfa.size]; + std::vector > arcs(nchars); + + find_state(buffer, closure(buffer, nfa.root), kernels); + for (size_t i = 0; i < kernels.size(); ++i) + { + dfa_state_t *s = new dfa_state_t; + states.push_back(s); + + nfa_state_t **kernel; + const size_t kernel_size = kernels.deref(i, kernel); + for (size_t j = 0; j < kernel_size; ++j) + { + nfa_state_t *n = kernel[j]; + switch (n->type) + { + case nfa_state_t::RAN: + { + nfa_state_t *m = n->value.ran.out; + size_t c = 0; + for (Range *r = n->value.ran.ran; r; r = r->next ()) + { + for (; charset[c] != r->lower(); ++c); + for (; charset[c] != r->upper(); ++c) + { + arcs[c].push_back(m); + } + } + break; + } + case nfa_state_t::CTX: + s->ctx = true; + break; + case nfa_state_t::FIN: + s2rules[i].insert(n->value.fin.rule); + break; + default: + break; + } + } + + s->arcs = new size_t[nchars]; + for(size_t c = 0; c < nchars; ++c) + { + nfa_state_t **end = buffer; + for (std::vector::const_iterator j = arcs[c].begin(); j != arcs[c].end(); ++j) + { + end = closure(end, *j); + } + s->arcs[c] = find_state(buffer, end, kernels); + } + + for(size_t c = 0; c < nchars; ++c) + { + arcs[c].clear(); + } + } + delete[] buffer; + + const size_t count = states.size(); + for (size_t i = 0; i < count; ++i) + { + dfa_state_t *s = states[i]; + std::set &rs = s2rules[i]; + // for each final state: choose the rule with the smallest rank + for (std::set::const_iterator j = rs.begin(); j != rs.end(); ++j) + { + RuleOp *rule = *j; + if (!s->rule || rule->rank < s->rule->rank) + { + s->rule = rule; + } + } + // other rules are shadowed by the chosen rule + for (std::set::const_iterator j = rs.begin(); j != rs.end(); ++j) + { + RuleOp *rule = *j; + if (s->rule != rule) + { + rules[rule->rank].shadow.insert(s->rule->rank); + } + } + } +} + +dfa_t::~dfa_t() +{ + std::vector::iterator + i = states.begin(), + e = states.end(); + for (; i != e; ++i) + { + delete *i; + } +} + +} // namespace re2c + diff --git a/tools/re2c/src/ir/dfa/dfa.h b/tools/re2c/src/ir/dfa/dfa.h new file mode 100644 index 000000000..459ed4ab3 --- /dev/null +++ b/tools/re2c/src/ir/dfa/dfa.h @@ -0,0 +1,58 @@ +#ifndef _RE2C_IR_DFA_DFA_ +#define _RE2C_IR_DFA_DFA_ + +#include "src/util/c99_stdint.h" +#include + +#include "src/ir/regexp/regexp.h" +#include "src/parse/rules.h" +#include "src/util/forbid_copy.h" + +namespace re2c +{ + +struct nfa_t; +class RuleOp; + +struct dfa_state_t +{ + size_t *arcs; + RuleOp *rule; + bool ctx; + + dfa_state_t() + : arcs(NULL) + , rule(NULL) + , ctx(false) + {} + ~dfa_state_t() + { + delete[] arcs; + } + + FORBID_COPY(dfa_state_t); +}; + +struct dfa_t +{ + static const size_t NIL; + + std::vector states; + const size_t nchars; + + dfa_t(const nfa_t &nfa, const charset_t &charset, rules_t &rules); + ~dfa_t(); +}; + +enum dfa_minimization_t +{ + DFA_MINIMIZATION_TABLE, + DFA_MINIMIZATION_MOORE +}; + +void minimization(dfa_t &dfa); +void fillpoints(const dfa_t &dfa, std::vector &fill); + +} // namespace re2c + +#endif // _RE2C_IR_DFA_DFA_ diff --git a/tools/re2c/src/ir/dfa/fillpoints.cc b/tools/re2c/src/ir/dfa/fillpoints.cc new file mode 100644 index 000000000..f4488ea75 --- /dev/null +++ b/tools/re2c/src/ir/dfa/fillpoints.cc @@ -0,0 +1,154 @@ +#include +#include +#include + +#include "src/ir/dfa/dfa.h" + +namespace re2c +{ + +static const size_t INFINITY = std::numeric_limits::max(); +static const size_t UNDEFINED = INFINITY - 1; + +static bool loopback(size_t node, size_t narcs, const size_t *arcs) +{ + for (size_t i = 0; i < narcs; ++i) + { + if (arcs[i] == node) + { + return true; + } + } + return false; +} + +/* + * node [finding strongly connected components of DFA] + * + * A slight modification of Tarjan's algorithm. + * + * The algorithm walks graph in deep-first order. It maintains a stack + * of nodes that have already been visited but haven't been assigned to + * SCC yet. For each node the algorithm calculates 'lowlink': index of + * the highest ancestor node reachable in one step from a descendant of + * the node. Lowlink is used to determine when a set of nodes should be + * popped off the stack into a new SCC. + * + * We use lowlink to hold different kinds of information: + * - values in range [0 .. stack size] mean that this node is on stack + * (link to a node with the smallest index reachable from this one) + * - UNDEFINED means that this node has not been visited yet + * - INFINITY means that this node has already been popped off stack + * + * We use stack size (rather than topological sort index) as unique index + * of a node on stack. This is safe because indices of nodes on stack are + * still unique and less than indices of nodes that have been popped off + * stack (INFINITY). + * + */ +static void scc( + const dfa_t &dfa, + std::stack &stack, + std::vector &lowlink, + std::vector &trivial, + size_t i) +{ + const size_t link = stack.size(); + lowlink[i] = link; + stack.push(i); + + const size_t *arcs = dfa.states[i]->arcs; + for (size_t c = 0; c < dfa.nchars; ++c) + { + const size_t j = arcs[c]; + if (j != dfa_t::NIL) + { + if (lowlink[j] == UNDEFINED) + { + scc(dfa, stack, lowlink, trivial, j); + } + if (lowlink[j] < lowlink[i]) + { + lowlink[i] = lowlink[j]; + } + } + } + + if (lowlink[i] == link) + { + // SCC is non-trivial (has loops) iff it either: + // - consists of multiple nodes (they all must be interconnected) + // - consists of single node which loops back to itself + trivial[i] = i == stack.top() + && !loopback(i, dfa.nchars, arcs); + + size_t j; + do + { + j = stack.top(); + stack.pop(); + lowlink[j] = INFINITY; + } + while (j != i); + } +} + +static void calc_fill( + const dfa_t &dfa, + const std::vector &trivial, + std::vector &fill, + size_t i) +{ + if (fill[i] == UNDEFINED) + { + fill[i] = 0; + const size_t *arcs = dfa.states[i]->arcs; + for (size_t c = 0; c < dfa.nchars; ++c) + { + const size_t j = arcs[c]; + if (j != dfa_t::NIL) + { + calc_fill(dfa, trivial, fill, j); + size_t max = 1; + if (trivial[j]) + { + max += fill[j]; + } + if (max > fill[i]) + { + fill[i] = max; + } + } + } + } +} + +void fillpoints(const dfa_t &dfa, std::vector &fill) +{ + const size_t size = dfa.states.size(); + + // find DFA states that belong to non-trivial SCC + std::stack stack; + std::vector lowlink(size, UNDEFINED); + std::vector trivial(size, false); + scc(dfa, stack, lowlink, trivial, 0); + + // for each DFA state, calculate YYFILL argument: + // maximal path length to the next YYFILL state + fill.resize(size, UNDEFINED); + calc_fill(dfa, trivial, fill, 0); + + // The following states must trigger YYFILL: + // - inital state + // - all states in non-trivial SCCs + // for other states, reset YYFILL argument to zero + for (size_t i = 1; i < size; ++i) + { + if (trivial[i]) + { + fill[i] = 0; + } + } +} + +} // namespace re2c diff --git a/tools/re2c/src/ir/dfa/minimization.cc b/tools/re2c/src/ir/dfa/minimization.cc new file mode 100644 index 000000000..79c93e490 --- /dev/null +++ b/tools/re2c/src/ir/dfa/minimization.cc @@ -0,0 +1,252 @@ +#include +#include +#include + +#include "src/conf/opt.h" +#include "src/ir/dfa/dfa.h" +#include "src/globals.h" + +namespace re2c +{ + +class RuleOp; + +/* + * note [DFA minimization: table filling algorithm] + * + * This algorithm is simple and slow; it's a reference implementation. + * + * The algorithm constructs (strictly lower triangular) boolean matrix + * indexed by DFA states. Each matrix cell (S1,S2) indicates if states + * S1 and S2 are distinguishable. Initialy states are distinguished + * according to their rule and context. One step of the algorithm + * updates the matrix as follows: each pair of states S1 and S2 is + * marked as distinguishable iff exist transitions from S1 and S2 on + * the same symbol that go to distinguishable states. The algorithm + * loops until the matrix stops changing. + */ +static void minimization_table( + size_t *part, + const std::vector &states, + size_t nchars) +{ + const size_t count = states.size(); + + bool **tbl = new bool*[count]; + tbl[0] = new bool[count * (count - 1) / 2]; + for (size_t i = 0; i < count - 1; ++i) + { + tbl[i + 1] = tbl[i] + i; + } + + for (size_t i = 0; i < count; ++i) + { + dfa_state_t *s1 = states[i]; + for (size_t j = 0; j < i; ++j) + { + dfa_state_t *s2 = states[j]; + tbl[i][j] = s1->ctx != s2->ctx + || s1->rule != s2->rule; + } + } + + for (bool loop = true; loop;) + { + loop = false; + for (size_t i = 0; i < count; ++i) + { + for (size_t j = 0; j < i; ++j) + { + if (!tbl[i][j]) + { + for (size_t k = 0; k < nchars; ++k) + { + size_t oi = states[i]->arcs[k]; + size_t oj = states[j]->arcs[k]; + if (oi < oj) + { + std::swap(oi, oj); + } + if (oi != oj && + (oi == dfa_t::NIL || + oj == dfa_t::NIL || + tbl[oi][oj])) + { + tbl[i][j] = true; + loop = true; + break; + } + } + } + } + } + } + + for (size_t i = 0; i < count; ++i) + { + part[i] = i; + for (size_t j = 0; j < i; ++j) + { + if (!tbl[i][j]) + { + part[i] = j; + break; + } + } + } + + delete[] tbl[0]; + delete[] tbl; +} + +/* + * note [DFA minimization: Moore algorithm] + * + * The algorithm maintains partition of DFA states. + * Initial partition is coarse: states are distinguished according + * to their rule and context. Partition is gradually refined: each + * set of states is split into minimal number of subsets such that + * for all states in a subset transitions on the same symbol go to + * the same set of states. + * The algorithm loops until partition stops changing. + */ +static void minimization_moore( + size_t *part, + const std::vector &states, + size_t nchars) +{ + const size_t count = states.size(); + + size_t *next = new size_t[count]; + + std::map, size_t> init; + for (size_t i = 0; i < count; ++i) + { + dfa_state_t *s = states[i]; + std::pair key(s->rule, s->ctx); + if (init.insert(std::make_pair(key, i)).second) + { + part[i] = i; + next[i] = dfa_t::NIL; + } + else + { + const size_t j = init[key]; + part[i] = j; + next[i] = next[j]; + next[j] = i; + } + } + + size_t *out = new size_t[nchars * count]; + size_t *diff = new size_t[count]; + for (bool loop = true; loop;) + { + loop = false; + for (size_t i = 0; i < count; ++i) + { + if (i != part[i] || next[i] == dfa_t::NIL) + { + continue; + } + + for (size_t j = i; j != dfa_t::NIL; j = next[j]) + { + size_t *o = &out[j * nchars]; + size_t *a = states[j]->arcs; + for (size_t c = 0; c < nchars; ++c) + { + o[c] = a[c] == dfa_t::NIL + ? dfa_t::NIL + : part[a[c]]; + } + } + + size_t diff_count = 0; + for (size_t j = i; j != dfa_t::NIL;) + { + const size_t j_next = next[j]; + size_t n = 0; + for (; n < diff_count; ++n) + { + size_t k = diff[n]; + if (memcmp(&out[j * nchars], + &out[k * nchars], + nchars * sizeof(size_t)) == 0) + { + part[j] = k; + next[j] = next[k]; + next[k] = j; + break; + } + } + if (n == diff_count) + { + diff[diff_count++] = j; + part[j] = j; + next[j] = dfa_t::NIL; + } + j = j_next; + } + loop |= diff_count > 1; + } + } + delete[] out; + delete[] diff; + delete[] next; +} + +void minimization(dfa_t &dfa) +{ + const size_t count = dfa.states.size(); + + size_t *part = new size_t[count]; + + switch (opts->dfa_minimization) + { + case DFA_MINIMIZATION_TABLE: + minimization_table(part, dfa.states, dfa.nchars); + break; + case DFA_MINIMIZATION_MOORE: + minimization_moore(part, dfa.states, dfa.nchars); + break; + } + + size_t *compact = new size_t[count]; + for (size_t i = 0, j = 0; i < count; ++i) + { + if (i == part[i]) + { + compact[i] = j++; + } + } + + size_t new_count = 0; + for (size_t i = 0; i < count; ++i) + { + dfa_state_t *s = dfa.states[i]; + if (i == part[i]) + { + size_t *arcs = s->arcs; + for (size_t c = 0; c < dfa.nchars; ++c) + { + if (arcs[c] != dfa_t::NIL) + { + arcs[c] = compact[part[arcs[c]]]; + } + } + dfa.states[new_count++] = s; + } + else + { + delete s; + } + } + dfa.states.resize(new_count); + + delete[] compact; + delete[] part; +} + +} // namespace re2c + diff --git a/tools/re2c/src/ir/nfa/calc_size.cc b/tools/re2c/src/ir/nfa/calc_size.cc new file mode 100644 index 000000000..39f0b4e1b --- /dev/null +++ b/tools/re2c/src/ir/nfa/calc_size.cc @@ -0,0 +1,50 @@ +#include "src/util/c99_stdint.h" + +#include "src/ir/regexp/regexp.h" +#include "src/ir/regexp/regexp_alt.h" +#include "src/ir/regexp/regexp_cat.h" +#include "src/ir/regexp/regexp_close.h" +#include "src/ir/regexp/regexp_match.h" +#include "src/ir/regexp/regexp_null.h" +#include "src/ir/regexp/regexp_rule.h" + +namespace re2c +{ + +uint32_t AltOp::calc_size() const +{ + return exp1->calc_size() + + exp2->calc_size() + + 1; +} + +uint32_t CatOp::calc_size() const +{ + return exp1->calc_size() + + exp2->calc_size(); +} + +uint32_t CloseOp::calc_size() const +{ + return exp->calc_size() + 1; +} + +uint32_t MatchOp::calc_size() const +{ + return 1; +} + +uint32_t NullOp::calc_size() const +{ + return 0; +} + +uint32_t RuleOp::calc_size() const +{ + const uint32_t n = ctx->calc_size(); + return exp->calc_size() + + (n > 0 ? n + 1 : 0) + + 1; +} + +} // end namespace re2c diff --git a/tools/re2c/src/ir/nfa/nfa.cc b/tools/re2c/src/ir/nfa/nfa.cc new file mode 100644 index 000000000..64f4641cf --- /dev/null +++ b/tools/re2c/src/ir/nfa/nfa.cc @@ -0,0 +1,72 @@ +#include "src/ir/nfa/nfa.h" +#include "src/ir/regexp/regexp.h" +#include "src/ir/regexp/regexp_alt.h" +#include "src/ir/regexp/regexp_cat.h" +#include "src/ir/regexp/regexp_close.h" +#include "src/ir/regexp/regexp_match.h" +#include "src/ir/regexp/regexp_null.h" +#include "src/ir/regexp/regexp_rule.h" + +namespace re2c { + +nfa_t::nfa_t(RegExp *re) + : max_size(re->calc_size()) + , size(0) + , states(new nfa_state_t[max_size]) + , root(re->compile(*this, NULL)) +{} + +nfa_t::~nfa_t() +{ + delete[] states; +} + +nfa_state_t *AltOp::compile(nfa_t &nfa, nfa_state_t *t) +{ + nfa_state_t *s = &nfa.states[nfa.size++]; + s->alt(exp1->compile(nfa, t), + exp2->compile(nfa, t)); + return s; +} + +nfa_state_t *CatOp::compile(nfa_t &nfa, nfa_state_t *t) +{ + nfa_state_t *s2 = exp2->compile(nfa, t); + nfa_state_t *s1 = exp1->compile(nfa, s2); + return s1; +} + +nfa_state_t *CloseOp::compile(nfa_t &nfa, nfa_state_t *t) +{ + nfa_state_t *s = &nfa.states[nfa.size++]; + s->alt(t, exp->compile(nfa, s)); + return s; +} + +nfa_state_t *MatchOp::compile(nfa_t &nfa, nfa_state_t *t) +{ + nfa_state_t *s = &nfa.states[nfa.size++]; + s->ran(t, match); + return s; +} + +nfa_state_t *NullOp::compile(nfa_t &, nfa_state_t *t) +{ + return t; +} + +nfa_state_t *RuleOp::compile(nfa_t &nfa, nfa_state_t *) +{ + nfa_state_t *s3 = &nfa.states[nfa.size++]; + s3->fin(this); + if (ctx->calc_size() > 0) + { + nfa_state_t *s2 = &nfa.states[nfa.size++]; + s2->ctx(ctx->compile(nfa, s3)); + s3 = s2; + } + nfa_state_t *s1 = exp->compile(nfa, s3); + return s1; +} + +} // namespace re2c diff --git a/tools/re2c/src/ir/nfa/nfa.h b/tools/re2c/src/ir/nfa/nfa.h new file mode 100644 index 000000000..28587f415 --- /dev/null +++ b/tools/re2c/src/ir/nfa/nfa.h @@ -0,0 +1,90 @@ +#ifndef _RE2C_IR_NFA_NFA_ +#define _RE2C_IR_NFA_NFA_ + +#include "src/util/c99_stdint.h" + +#include "src/util/forbid_copy.h" + +namespace re2c +{ + +struct Range; +struct RegExp; +struct RuleOp; + +struct nfa_state_t +{ + enum type_t + { + ALT, + RAN, + CTX, + FIN + } type; + union + { + struct + { + nfa_state_t *out1; + nfa_state_t *out2; + } alt; + struct + { + nfa_state_t *out; + Range *ran; + } ran; + struct + { + nfa_state_t *out; + } ctx; + struct + { + RuleOp *rule; + } fin; + } value; + bool mark; + + void alt(nfa_state_t *s1, nfa_state_t *s2) + { + type = ALT; + value.alt.out1 = s1; + value.alt.out2 = s2; + mark = false; + } + void ran(nfa_state_t *s, Range *r) + { + type = RAN; + value.ran.out = s; + value.ran.ran = r; + mark = false; + } + void ctx(nfa_state_t *s) + { + type = CTX; + value.ctx.out = s; + mark = false; + } + void fin(RuleOp *r) + { + type = FIN; + value.fin.rule = r; + mark = false; + } +}; + +struct nfa_t +{ + const uint32_t max_size; + uint32_t size; + nfa_state_t *states; + nfa_state_t *root; + + nfa_t(RegExp *re); + ~nfa_t(); + + FORBID_COPY(nfa_t); +}; + +} // namespace re2c + +#endif // _RE2C_IR_NFA_NFA_ diff --git a/tools/re2c/src/ir/nfa/split.cc b/tools/re2c/src/ir/nfa/split.cc new file mode 100644 index 000000000..73e63040b --- /dev/null +++ b/tools/re2c/src/ir/nfa/split.cc @@ -0,0 +1,49 @@ +#include "src/util/c99_stdint.h" +#include + +#include "src/ir/regexp/regexp.h" +#include "src/ir/regexp/regexp_alt.h" +#include "src/ir/regexp/regexp_cat.h" +#include "src/ir/regexp/regexp_close.h" +#include "src/ir/regexp/regexp_match.h" +#include "src/ir/regexp/regexp_null.h" +#include "src/ir/regexp/regexp_rule.h" +#include "src/util/range.h" + +namespace re2c { + +void AltOp::split (std::set & cs) +{ + exp1->split (cs); + exp2->split (cs); +} + +void CatOp::split (std::set & cs) +{ + exp1->split (cs); + exp2->split (cs); +} + +void CloseOp::split (std::set & cs) +{ + exp->split (cs); +} + +void MatchOp::split (std::set & cs) +{ + for (Range *r = match; r; r = r->next ()) + { + cs.insert (r->lower ()); + cs.insert (r->upper ()); + } +} + +void NullOp::split (std::set &) {} + +void RuleOp::split (std::set & cs) +{ + exp->split (cs); + ctx->split (cs); +} + +} // namespace re2c diff --git a/tools/re2c/src/ir/regexp/display.cc b/tools/re2c/src/ir/regexp/display.cc new file mode 100644 index 000000000..d139dc53a --- /dev/null +++ b/tools/re2c/src/ir/regexp/display.cc @@ -0,0 +1,51 @@ +#include + +#include "src/ir/regexp/regexp.h" +#include "src/ir/regexp/regexp_alt.h" +#include "src/ir/regexp/regexp_cat.h" +#include "src/ir/regexp/regexp_close.h" +#include "src/ir/regexp/regexp_match.h" +#include "src/ir/regexp/regexp_null.h" +#include "src/ir/regexp/regexp_rule.h" + +namespace re2c +{ + +std::ostream & operator << (std::ostream & o, const RegExp & re) +{ + re.display (o); + return o; +} + +void AltOp::display (std::ostream & o) const +{ + o << exp1 << "|" << exp2; +} + +void CatOp::display (std::ostream & o) const +{ + o << exp1 << exp2; +} + +void CloseOp::display (std::ostream & o) const +{ + o << exp << "+"; +} + +void MatchOp::display (std::ostream & o) const +{ + o << match; +} + +void NullOp::display (std::ostream & o) const +{ + o << "_"; +} + +void RuleOp::display (std::ostream & o) const +{ + o << exp << "/" << ctx << ";"; +} + +} // end namespace re2c + diff --git a/tools/re2c/src/ir/regexp/empty_class_policy.h b/tools/re2c/src/ir/regexp/empty_class_policy.h new file mode 100644 index 000000000..bb062de03 --- /dev/null +++ b/tools/re2c/src/ir/regexp/empty_class_policy.h @@ -0,0 +1,15 @@ +#ifndef _RE2C_IR_REGEXP_EMPTY_CLASS_POLICY_ +#define _RE2C_IR_REGEXP_EMPTY_CLASS_POLICY_ + +namespace re2c { + +enum empty_class_policy_t +{ + EMPTY_CLASS_MATCH_EMPTY, // match on empty input + EMPTY_CLASS_MATCH_NONE, // fail to match on any input + EMPTY_CLASS_ERROR // compilation error +}; + +} // namespace re2c + +#endif // _RE2C_IR_REGEXP_EMPTY_CLASS_POLICY_ diff --git a/tools/re2c/src/ir/regexp/encoding/case.h b/tools/re2c/src/ir/regexp/encoding/case.h new file mode 100644 index 000000000..38efa0e19 --- /dev/null +++ b/tools/re2c/src/ir/regexp/encoding/case.h @@ -0,0 +1,31 @@ +#ifndef _RE2C_IR_REGEXP_ENCODING_CASE_ +#define _RE2C_IR_REGEXP_ENCODING_CASE_ + +#include "src/util/c99_stdint.h" + +namespace re2c { + +// TODO: support non-ASCII encodings +bool is_alpha (uint32_t c); +uint32_t to_lower_unsafe (uint32_t c); +uint32_t to_upper_unsafe (uint32_t c); + +inline bool is_alpha (uint32_t c) +{ + return (c >= 'a' && c <= 'z') + || (c >= 'A' && c <= 'Z'); +} + +inline uint32_t to_lower_unsafe (uint32_t c) +{ + return c | 0x20u; +} + +inline uint32_t to_upper_unsafe (uint32_t c) +{ + return c & ~0x20u; +} + +} + +#endif // _RE2C_IR_REGEXP_ENCODING_CASE_ diff --git a/tools/re2c/translate.cc b/tools/re2c/src/ir/regexp/encoding/enc.cc similarity index 50% rename from tools/re2c/translate.cc rename to tools/re2c/src/ir/regexp/encoding/enc.cc index 0393856fa..d8c5e9836 100644 --- a/tools/re2c/translate.cc +++ b/tools/re2c/src/ir/regexp/encoding/enc.cc @@ -1,45 +1,13 @@ -/* $Id: translate.cc 713 2007-04-29 15:33:47Z helly $ */ -#include "globals.h" +#include "src/ir/regexp/encoding/enc.h" +#include "src/util/range.h" -namespace re2c -{ +namespace re2c { -static const char * sHex = "0123456789ABCDEF"; +const uint32_t Enc::SURR_MIN = 0xD800; +const uint32_t Enc::SURR_MAX = 0xDFFF; +const uint32_t Enc::UNICODE_ERROR = 0xFFFD; -char octCh(uint c) -{ - return '0' + c % 8; -} - -char hexCh(uint c) -{ - return sHex[c & 0x0F]; -} - -uint asc2asc[256] = - { - 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, - 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, - 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, - 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, - 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f, - 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f, - 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, - 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, - 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, - 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, - 0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf, - 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba, 0xbb, 0xbc, 0xbd, 0xbe, 0xbf, - 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, - 0xd0, 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xdb, 0xdc, 0xdd, 0xde, 0xdf, - 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, - 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff - }; - -uint *xlat = asc2asc; -uint *talx = asc2asc; - -uint asc2ebc[256] = +const uint32_t Enc::asc2ebc[256] = { /* Based on ISO 8859/1 and Code Page 37 */ 0x00, 0x01, 0x02, 0x03, 0x37, 0x2d, 0x2e, 0x2f, 0x16, 0x05, 0x25, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11, 0x12, 0x13, 0x3c, 0x3d, 0x32, 0x26, 0x18, 0x19, 0x3f, 0x27, 0x1c, 0x1d, 0x1e, 0x1f, @@ -59,7 +27,7 @@ uint asc2ebc[256] = 0x8c, 0x49, 0xcd, 0xce, 0xcb, 0xcf, 0xcc, 0xe1, 0x70, 0xdd, 0xde, 0xdb, 0xdc, 0x8d, 0xae, 0xdf }; -uint ebc2asc[256] = +const uint32_t Enc::ebc2asc[256] = { /* Based on ISO 8859/1 and Code Page 37 */ 0x00, 0x01, 0x02, 0x03, 0x9c, 0x09, 0x86, 0x7f, 0x97, 0x8d, 0x8e, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11, 0x12, 0x13, 0x9d, 0x85, 0x08, 0x87, 0x18, 0x19, 0x92, 0x8f, 0x1c, 0x1d, 0x1e, 0x1f, @@ -79,4 +47,160 @@ uint ebc2asc[256] = 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0xb3, 0xdb, 0xdc, 0xd9, 0xda, 0x9f }; -} // end namespace re2c +/* + * Returns code point representation for current + * encoding with regard to current policy. + * + * Since code point is exacly specified by user, + * it is assumed that user considers it to be valid. + * We must check it. + * + * Returns false if this code point exceeds maximum + * or is forbidden by current policy, otherwise + * returns true. Overwrites code point. + */ +bool Enc::encode(uint32_t & c) const +{ + if (c >= nCodePoints ()) + { + return false; + } + + switch (type_) + { + case ASCII: + return true; + case EBCDIC: + c = asc2ebc[c]; + return true; + case UCS2: + case UTF16: + case UTF32: + case UTF8: + if (c < SURR_MIN || c > SURR_MAX) + return true; + else + { + switch (policy_) + { + case POLICY_FAIL: + return false; + case POLICY_SUBSTITUTE: + c = UNICODE_ERROR; + return true; + case POLICY_IGNORE: + return true; + } + } + } + return false; // to silence gcc warning +} + +/* + * Returns original representation of code point. + * Assumes code point is valid (hence 'unsafe'). + */ +uint32_t Enc::decodeUnsafe(uint32_t c) const +{ + switch (type_) + { + case EBCDIC: + c = ebc2asc[c & 0xFF]; + break; + case ASCII: + case UCS2: + case UTF16: + case UTF32: + case UTF8: + break; + } + return c; +} + +/* + * Returns [l - h] range representation for current + * encoding with regard to current policy. + * + * Since range borders are exacly specified by user, + * it is assumed that user considers that all code + * points from this range are valid. re2c must check it. + * + * Returns NULL if range contains code points that + * exceed maximum or are forbidden by current policy, + * otherwise returns pointer to newly constructed range. + */ +Range * Enc::encodeRange(uint32_t l, uint32_t h) const +{ + if (l >= nCodePoints () || h >= nCodePoints ()) + { + return NULL; + } + + Range * r = NULL; + switch (type_) + { + case ASCII: + r = Range::ran (l, h + 1); + break; + case EBCDIC: + { + const uint32_t el = asc2ebc[l]; + r = Range::sym (el); + for (uint32_t c = l + 1; c <= h; ++c) + { + const uint32_t ec = asc2ebc[c]; + r = Range::add (r, Range::sym (ec)); + } + break; + } + case UCS2: + case UTF16: + case UTF32: + case UTF8: + r = Range::ran (l, h + 1); + if (l <= SURR_MAX && h >= SURR_MIN) + { + switch (policy_) + { + case POLICY_FAIL: + r = NULL; + break; + case POLICY_SUBSTITUTE: + { + Range * surrs = Range::ran (SURR_MIN, SURR_MAX + 1); + Range * error = Range::sym (UNICODE_ERROR); + r = Range::sub (r, surrs); + r = Range::add (r, error); + break; + } + case POLICY_IGNORE: + break; + } + } + break; + } + return r; +} + +/* + * Returns full range representation for current encoding + * with regard to current policy. + * + * Since range is defined declaratively, re2c does + * all the necessary corrections 'for free'. + * + * Always succeeds, returns pointer to newly constructed + * range. + */ +Range * Enc::fullRange() const +{ + Range * r = Range::ran (0, nCodePoints()); + if (policy_ != POLICY_IGNORE) + { + Range * surrs = Range::ran (SURR_MIN, SURR_MAX + 1); + r = Range::sub (r, surrs); + } + return r; +} + +} // namespace re2c diff --git a/tools/re2c/src/ir/regexp/encoding/enc.h b/tools/re2c/src/ir/regexp/encoding/enc.h new file mode 100644 index 000000000..b85ae0bec --- /dev/null +++ b/tools/re2c/src/ir/regexp/encoding/enc.h @@ -0,0 +1,197 @@ +#ifndef _RE2C_IR_REGEXP_ENCODING_ENC_ +#define _RE2C_IR_REGEXP_ENCODING_ENC_ + +#include "src/util/c99_stdint.h" + +namespace re2c { + +class Range; + +/* + * note [encodings] + * + * Each encoding defines two concepts: + * + * 1) Code point -- abstract number, which represents single encoding symbol. + * E.g., Unicode defines code points in the range [0 - 0x10FFFF] , so each + * Unicode encoding must be capable of representing 0x110000 code points. + * + * 2) Code unit -- the smallest unit of memory, which is used in the encoded + * text. One or more code units can be needed to represent a single code + * point, depending on the encoding. For each encoding, all code points + * either are represented with equal number of code units (fixed-length + * encodings), or with variable number of code units (variable-length + * encodings). + * + * +----------+------------------+-----------------------+-----------------+----------------+ + * | encoding | code point range | code point size | code unit range | code unit size | + * +----------+------------------+-----------------------+-----------------+----------------+ + * | ASCII | 0 - 0xFF | fixed, 1 byte | 0 - 0xFF | 1 byte | + * | EBCDIC | 0 - 0xFF | fixed, 1 byte | 0 - 0xFF | 1 byte | + * | UCS2 | 0 - 0xFFFF | fixed, 2 bytes | 0 - 0xFFFF | 2 bytes | + * | UTF16 | 0 - 0x10FFFF | variable, 2 - 4 bytes | 0 - 0xFFFF | 2 bytes | + * | UTF32 | 0 - 0x10FFFF | fixed, 4 bytes | 0 - 0x10FFFF | 4 bytes | + * | UTF8 | 0 - 0x10FFFF | variable, 1 - 4 bytes | 0 - 0xFF | 1 byte | + * +----------+------------------+-----------------------+-----------------+----------------+ + */ + +class Enc +{ +public: + // Supported encodings. + enum type_t + { ASCII + , EBCDIC + , UCS2 + , UTF16 + , UTF32 + , UTF8 + }; + + // What to do with invalid code points + enum policy_t + { POLICY_FAIL + , POLICY_SUBSTITUTE + , POLICY_IGNORE + }; + +private: + static const uint32_t asc2ebc[256]; + static const uint32_t ebc2asc[256]; + static const uint32_t SURR_MIN; + static const uint32_t SURR_MAX; + static const uint32_t UNICODE_ERROR; + + type_t type_; + policy_t policy_; + +public: + Enc() + : type_ (ASCII) + , policy_ (POLICY_IGNORE) + { } + + static const char * name (type_t t); + + bool operator != (const Enc & e) const { return type_ != e.type_; } + + inline uint32_t nCodePoints() const; + inline uint32_t nCodeUnits() const; + inline uint32_t szCodePoint() const; + inline uint32_t szCodeUnit() const; + + inline bool set(type_t t); + inline void unset(type_t); + inline type_t type () const; + + inline void setPolicy(policy_t t); + + bool encode(uint32_t & c) const; + uint32_t decodeUnsafe(uint32_t c) const; + Range * encodeRange(uint32_t l, uint32_t h) const; + Range * fullRange() const; +}; + +inline const char * Enc::name (type_t t) +{ + switch (t) + { + case ASCII: return "ASCII"; + case EBCDIC: return "EBCDIC"; + case UTF8: return "UTF8"; + case UCS2: return "USC2"; + case UTF16: return "UTF16"; + case UTF32: return "UTF32"; + default: return ""; + } +} + +inline uint32_t Enc::nCodePoints() const +{ + switch (type_) + { + case ASCII: + case EBCDIC: return 0x100; + case UCS2: return 0x10000; + case UTF16: + case UTF32: + case UTF8: + default: return 0x110000; + } +} + +inline uint32_t Enc::nCodeUnits() const +{ + switch (type_) + { + case ASCII: + case EBCDIC: + case UTF8: return 0x100; + case UCS2: + case UTF16: return 0x10000; + case UTF32: + default: return 0x110000; + } +} + +// returns *maximal* code point size for encoding +inline uint32_t Enc::szCodePoint() const +{ + switch (type_) + { + case ASCII: + case EBCDIC: return 1; + case UCS2: return 2; + case UTF16: + case UTF32: + case UTF8: + default: return 4; + } +} + +inline uint32_t Enc::szCodeUnit() const +{ + switch (type_) + { + case ASCII: + case EBCDIC: + case UTF8: return 1; + case UCS2: + case UTF16: return 2; + case UTF32: + default: return 4; + } +} + +inline bool Enc::set(type_t t) +{ + if (type_ == t) + return true; + else if (type_ != ASCII) + return false; + else + { + type_ = t; + return true; + } +} + +inline void Enc::unset(type_t t) +{ + if (type_ == t) + type_ = ASCII; +} + +inline Enc::type_t Enc::type () const +{ + return type_; +} + +inline void Enc::setPolicy(policy_t t) +{ + policy_ = t; +} + +} // namespace re2c + +#endif // _RE2C_IR_REGEXP_ENCODING_ENC_ diff --git a/tools/re2c/src/ir/regexp/encoding/range_suffix.cc b/tools/re2c/src/ir/regexp/encoding/range_suffix.cc new file mode 100644 index 000000000..486bd558f --- /dev/null +++ b/tools/re2c/src/ir/regexp/encoding/range_suffix.cc @@ -0,0 +1,38 @@ +#include "src/ir/regexp/encoding/range_suffix.h" +#include "src/ir/regexp/regexp.h" +#include "src/ir/regexp/regexp_match.h" +#include "src/util/range.h" + +namespace re2c { + +static RegExp * emit (RangeSuffix * p, RegExp * re); + +free_list RangeSuffix::freeList; + +RegExp * to_regexp (RangeSuffix * p) +{ + return p + ? emit (p, NULL) + : new MatchOp (NULL); +} + +/* + * Build regexp from suffix tree. + */ +RegExp * emit(RangeSuffix * p, RegExp * re) +{ + if (p == NULL) + return re; + else + { + RegExp * regexp = NULL; + for (; p != NULL; p = p->next) + { + RegExp * re1 = doCat(new MatchOp(Range::ran (p->l, p->h + 1)), re); + regexp = doAlt(regexp, emit(p->child, re1)); + } + return regexp; + } +} + +} // namespace re2c diff --git a/tools/re2c/src/ir/regexp/encoding/range_suffix.h b/tools/re2c/src/ir/regexp/encoding/range_suffix.h new file mode 100644 index 000000000..ea46bdd86 --- /dev/null +++ b/tools/re2c/src/ir/regexp/encoding/range_suffix.h @@ -0,0 +1,39 @@ +#ifndef _RE2C_IR_REGEXP_ENCODING_RANGE_SUFFIX_ +#define _RE2C_IR_REGEXP_ENCODING_RANGE_SUFFIX_ + +#include "src/util/c99_stdint.h" +#include // NULL + +#include "src/util/forbid_copy.h" +#include "src/util/free_list.h" + +namespace re2c { + +class RegExp; + +struct RangeSuffix +{ + static free_list freeList; + + uint32_t l; + uint32_t h; + RangeSuffix * next; + RangeSuffix * child; + + RangeSuffix (uint32_t lo, uint32_t hi) + : l (lo) + , h (hi) + , next (NULL) + , child (NULL) + { + freeList.insert(this); + } + + FORBID_COPY (RangeSuffix); +}; + +RegExp * to_regexp (RangeSuffix * p); + +} // namespace re2c + +#endif // _RE2C_IR_REGEXP_ENCODING_RANGE_SUFFIX_ diff --git a/tools/re2c/src/ir/regexp/encoding/utf16/utf16.cc b/tools/re2c/src/ir/regexp/encoding/utf16/utf16.cc new file mode 100644 index 000000000..4b0a13bbb --- /dev/null +++ b/tools/re2c/src/ir/regexp/encoding/utf16/utf16.cc @@ -0,0 +1,10 @@ +#include "src/ir/regexp/encoding/utf16/utf16.h" + +namespace re2c { + +const uint32_t utf16::MAX_1WORD_RUNE = 0xFFFFu; +const uint32_t utf16::MIN_LEAD_SURR = 0xD800u; +const uint32_t utf16::MIN_TRAIL_SURR = 0xDC00u; +const uint32_t utf16::MAX_TRAIL_SURR = 0xDFFFu; + +} // namespace re2c diff --git a/tools/re2c/src/ir/regexp/encoding/utf16/utf16.h b/tools/re2c/src/ir/regexp/encoding/utf16/utf16.h new file mode 100644 index 000000000..89cdbdbdd --- /dev/null +++ b/tools/re2c/src/ir/regexp/encoding/utf16/utf16.h @@ -0,0 +1,37 @@ +#ifndef _RE2C_IR_REGEXP_ENCODING_UTF16_UTF16_ +#define _RE2C_IR_REGEXP_ENCODING_UTF16_UTF16_ + +#include "src/util/c99_stdint.h" + +namespace re2c { + +class utf16 +{ +public: + typedef uint32_t rune; + + static const uint32_t MAX_1WORD_RUNE; + static const uint32_t MIN_LEAD_SURR; + static const uint32_t MIN_TRAIL_SURR; + static const uint32_t MAX_TRAIL_SURR; + + /* leading surrogate of UTF-16 symbol */ + static inline uint32_t lead_surr(rune r); + + /* trailing surrogate of UTF-16 symbol */ + static inline uint32_t trail_surr(rune r); +}; + +inline uint32_t utf16::lead_surr(rune r) +{ + return ((r - 0x10000u) / 0x400u) + MIN_LEAD_SURR; +} + +inline uint32_t utf16::trail_surr(rune r) +{ + return ((r - 0x10000u) % 0x400u) + MIN_TRAIL_SURR; +} + +} // namespace re2c + +#endif // _RE2C_IR_REGEXP_ENCODING_UTF16_UTF16_ diff --git a/tools/re2c/src/ir/regexp/encoding/utf16/utf16_range.cc b/tools/re2c/src/ir/regexp/encoding/utf16/utf16_range.cc new file mode 100644 index 000000000..51f966bac --- /dev/null +++ b/tools/re2c/src/ir/regexp/encoding/utf16/utf16_range.cc @@ -0,0 +1,146 @@ +#include "src/ir/regexp/encoding/utf16/utf16_range.h" +#include "src/ir/regexp/encoding/range_suffix.h" + +namespace re2c { + +/* + * Add word range [w1-w2]. + */ +void UTF16addContinuous1(RangeSuffix * & root, uint32_t l, uint32_t h) +{ + RangeSuffix ** p = &root; + for (;;) + { + if (*p == NULL) + { + *p = new RangeSuffix(l, h); + break; + } + else if ((*p)->l == l && (*p)->h == h) + { + break; + } + else + p = &(*p)->next; + } +} + +/* + * Now that we have catenation of word ranges [l1-h1],[l2-h2], + * we want to add it to existing range, merging suffixes on the fly. + */ +void UTF16addContinuous2(RangeSuffix * & root, uint32_t l_ld, uint32_t h_ld, uint32_t l_tr, uint32_t h_tr) +{ + RangeSuffix ** p = &root; + for (;;) + { + if (*p == NULL) + { + *p = new RangeSuffix(l_tr, h_tr); + p = &(*p)->child; + break; + } + else if ((*p)->l == l_tr && (*p)->h == h_tr) + { + p = &(*p)->child; + break; + } + else + p = &(*p)->next; + } + for (;;) + { + if (*p == NULL) + { + *p = new RangeSuffix(l_ld, h_ld); + break; + } + else if ((*p)->l == l_ld && (*p)->h == h_ld) + { + break; + } + else + p = &(*p)->next; + } +} + +/* + * Split range into sub-ranges that agree on leading surrogates. + * + * We have two Unicode runes, L and H, both map to UTF-16 + * surrogate pairs 'L1 L2' and 'H1 H2'. + * We want to represent Unicode range [L - H] as a catenation + * of word ranges [L1 - H1],[L2 - H2]. + * + * This is only possible if the following condition holds: + * if L1 /= H1, then L2 == 0xdc00 and H2 == 0xdfff. + * This condition ensures that: + * 1) all possible UTF-16 sequences between L and H are allowed + * 2) no word ranges [w1 - w2] appear, such that w1 > w2 + * + * E.g.: + * [\U00010001-\U00010400] => [d800-d801],[dc01-dc00]. + * The last word range, [dc01-dc00], is incorrect: its lower bound + * is greater than its upper bound. To fix this, we must split + * the original range into two sub-ranges: + * [\U00010001-\U000103ff] => [d800-d800],[dc01-dfff] + * [\U00010400-\U00010400] => [d801-d801],[dc00-dc00] + * + * This function finds all such 'points of discontinuity' + * and represents original range as alternation of continuous + * sub-ranges. + */ +void UTF16splitByContinuity(RangeSuffix * & root, uint32_t l_ld, uint32_t h_ld, uint32_t l_tr, uint32_t h_tr) +{ + if (l_ld != h_ld) + { + if (l_tr > utf16::MIN_TRAIL_SURR) + { + UTF16splitByContinuity(root, l_ld, l_ld, l_tr, utf16::MAX_TRAIL_SURR); + UTF16splitByContinuity(root, l_ld + 1, h_ld, utf16::MIN_TRAIL_SURR, h_tr); + return; + } + if (h_tr < utf16::MAX_TRAIL_SURR) + { + UTF16splitByContinuity(root, l_ld, h_ld - 1, l_tr, utf16::MAX_TRAIL_SURR); + UTF16splitByContinuity(root, h_ld, h_ld, utf16::MIN_TRAIL_SURR, h_tr); + return; + } + } + UTF16addContinuous2(root, l_ld, h_ld, l_tr, h_tr); +} + +/* + * Split range into sub-ranges, so that all runes in the same + * sub-range have equal length of UTF-16 sequence. E.g., full + * Unicode range [0-0x10FFFF] gets split into sub-ranges: + * [0 - 0xFFFF] (2-byte UTF-16 sequences) + * [0x10000 - 0x10FFFF] (4-byte UTF-16 sequences) + */ +void UTF16splitByRuneLength(RangeSuffix * & root, utf16::rune l, utf16::rune h) +{ + if (l <= utf16::MAX_1WORD_RUNE) + { + if (h <= utf16::MAX_1WORD_RUNE) + { + UTF16addContinuous1(root, l, h); + } + else + { + UTF16addContinuous1(root, l, utf16::MAX_1WORD_RUNE); + const uint32_t h_ld = utf16::lead_surr(h); + const uint32_t h_tr = utf16::trail_surr(h); + UTF16splitByContinuity(root, utf16::MIN_LEAD_SURR, h_ld, utf16::MIN_TRAIL_SURR, h_tr); + } + } + else + { + const uint32_t l_ld = utf16::lead_surr(l); + const uint32_t l_tr = utf16::trail_surr(l); + const uint32_t h_ld = utf16::lead_surr(h); + const uint32_t h_tr = utf16::trail_surr(h); + UTF16splitByContinuity(root, l_ld, h_ld, l_tr, h_tr); + } +} + +} // namespace re2c diff --git a/tools/re2c/src/ir/regexp/encoding/utf16/utf16_range.h b/tools/re2c/src/ir/regexp/encoding/utf16/utf16_range.h new file mode 100644 index 000000000..8a74e8f34 --- /dev/null +++ b/tools/re2c/src/ir/regexp/encoding/utf16/utf16_range.h @@ -0,0 +1,19 @@ +#ifndef _RE2C_IR_REGEXP_ENCODING_UTF16_RANGE_ +#define _RE2C_IR_REGEXP_ENCODING_UTF16_RANGE_ + +#include "src/util/c99_stdint.h" + +#include "src/ir/regexp/encoding/utf16/utf16.h" + +namespace re2c { + +struct RangeSuffix; + +void UTF16addContinuous1(RangeSuffix * & root, uint32_t l, uint32_t h); +void UTF16addContinuous2(RangeSuffix * & root, uint32_t l_ld, uint32_t h_ld, uint32_t l_tr, uint32_t h_tr); +void UTF16splitByContinuity(RangeSuffix * & root, uint32_t l_ld, uint32_t h_ld, uint32_t l_tr, uint32_t h_tr); +void UTF16splitByRuneLength(RangeSuffix * & root, utf16::rune l, utf16::rune h); + +} // namespace re2c + +#endif // _RE2C_IR_REGEXP_ENCODING_UTF16_RANGE_ diff --git a/tools/re2c/src/ir/regexp/encoding/utf16/utf16_regexp.cc b/tools/re2c/src/ir/regexp/encoding/utf16/utf16_regexp.cc new file mode 100644 index 000000000..3b2442904 --- /dev/null +++ b/tools/re2c/src/ir/regexp/encoding/utf16/utf16_regexp.cc @@ -0,0 +1,38 @@ +#include "src/util/c99_stdint.h" + +#include "src/ir/regexp/encoding/utf16/utf16_regexp.h" +#include "src/ir/regexp/encoding/range_suffix.h" +#include "src/ir/regexp/encoding/utf16/utf16_range.h" +#include "src/ir/regexp/regexp_cat.h" +#include "src/ir/regexp/regexp_match.h" +#include "src/util/range.h" + +namespace re2c { + +RegExp * UTF16Symbol(utf16::rune r) +{ + if (r <= utf16::MAX_1WORD_RUNE) + return new MatchOp(Range::sym (r)); + else + { + const uint32_t ld = utf16::lead_surr(r); + const uint32_t tr = utf16::trail_surr(r); + return new CatOp(new MatchOp(Range::sym (ld)), new MatchOp(Range::sym (tr))); + } +} + +/* + * Split Unicode character class {[l1, h1), ..., [lN, hN)} into + * ranges [l1, h1-1], ..., [lN, hN-1] and return alternation of + * them. We store partially built range in suffix tree, which + * allows to eliminate common suffixes while building. + */ +RegExp * UTF16Range(const Range * r) +{ + RangeSuffix * root = NULL; + for (; r != NULL; r = r->next ()) + UTF16splitByRuneLength(root, r->lower (), r->upper () - 1); + return to_regexp (root); +} + +} // namespace re2c diff --git a/tools/re2c/src/ir/regexp/encoding/utf16/utf16_regexp.h b/tools/re2c/src/ir/regexp/encoding/utf16/utf16_regexp.h new file mode 100644 index 000000000..d381de94b --- /dev/null +++ b/tools/re2c/src/ir/regexp/encoding/utf16/utf16_regexp.h @@ -0,0 +1,16 @@ +#ifndef _RE2C_IR_REGEXP_ENCODING_UTF16_REGEXP_ +#define _RE2C_IR_REGEXP_ENCODING_UTF16_REGEXP_ + +#include "src/ir/regexp/encoding/utf16/utf16.h" + +namespace re2c { + +class Range; +class RegExp; + +RegExp * UTF16Symbol(utf16::rune r); +RegExp * UTF16Range(const Range * r); + +} // namespace re2c + +#endif // _RE2C_IR_REGEXP_ENCODING_UTF16_REGEXP_ diff --git a/tools/re2c/src/ir/regexp/encoding/utf8/utf8.cc b/tools/re2c/src/ir/regexp/encoding/utf8/utf8.cc new file mode 100644 index 000000000..dd4b59ef2 --- /dev/null +++ b/tools/re2c/src/ir/regexp/encoding/utf8/utf8.cc @@ -0,0 +1,84 @@ +#include "src/ir/regexp/encoding/utf8/utf8.h" + +namespace re2c { + +const uint32_t utf8::ERROR = 0xFFFDu; + +const utf8::rune utf8::MAX_1BYTE_RUNE = 0x7Fu; +const utf8::rune utf8::MAX_2BYTE_RUNE = 0x7FFu; +const utf8::rune utf8::MAX_3BYTE_RUNE = 0xFFFFu; +const utf8::rune utf8::MAX_4BYTE_RUNE = 0x10FFFFu; +const utf8::rune utf8::MAX_RUNE = utf8::MAX_4BYTE_RUNE; + +const uint32_t utf8::PREFIX_1BYTE = 0u; // 0000 0000 +const uint32_t utf8::INFIX = 0x80u; // 1000 0000 +const uint32_t utf8::PREFIX_2BYTE = 0xC0u; // 1100 0000 +const uint32_t utf8::PREFIX_3BYTE = 0xE0u; // 1110 0000 +const uint32_t utf8::PREFIX_4BYTE = 0xF0u; // 1111 0000 + +const uint32_t utf8::SHIFT = 6u; +const uint32_t utf8::MASK = 0x3Fu; // 0011 1111 + +uint32_t utf8::rune_to_bytes(uint32_t *str, rune c) +{ + // one byte sequence: 0-0x7F => 0xxxxxxx + if (c <= MAX_1BYTE_RUNE) + { + str[0] = PREFIX_1BYTE | c; + return 1; + } + + // two byte sequence: 0x80-0x7FF => 110xxxxx 10xxxxxx + if (c <= MAX_2BYTE_RUNE) + { + str[0] = PREFIX_2BYTE | (c >> 1*SHIFT); + str[1] = INFIX | (c & MASK); + return 2; + } + + // If the Rune is out of range, convert it to the error rune. + // Do this test here because the error rune encodes to three bytes. + // Doing it earlier would duplicate work, since an out of range + // Rune wouldn't have fit in one or two bytes. + if (c > MAX_RUNE) + c = ERROR; + + // three byte sequence: 0x800 - 0xFFFF => 1110xxxx 10xxxxxx 10xxxxxx + if (c <= MAX_3BYTE_RUNE) + { + str[0] = PREFIX_3BYTE | (c >> 2*SHIFT); + str[1] = INFIX | ((c >> 1*SHIFT) & MASK); + str[2] = INFIX | (c & MASK); + return 3; + } + + // four byte sequence (21-bit value): + // 0x10000 - 0x1FFFFF => 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx + str[0] = PREFIX_4BYTE | (c >> 3*SHIFT); + str[1] = INFIX | ((c >> 2*SHIFT) & MASK); + str[2] = INFIX | ((c >> 1*SHIFT) & MASK); + str[3] = INFIX | (c & MASK); + return 4; +} + +uint32_t utf8::rune_length(rune r) +{ + if (r <= MAX_2BYTE_RUNE) + return r <= MAX_1BYTE_RUNE ? 1 : 2; + else + return r <= MAX_3BYTE_RUNE ? 3 : 4; +} + +utf8::rune utf8::max_rune(uint32_t i) +{ + switch (i) + { + case 1: return MAX_1BYTE_RUNE; + case 2: return MAX_2BYTE_RUNE; + case 3: return MAX_3BYTE_RUNE; + case 4: return MAX_4BYTE_RUNE; + default: return ERROR; + } +} + +} // namespace re2c diff --git a/tools/re2c/src/ir/regexp/encoding/utf8/utf8.h b/tools/re2c/src/ir/regexp/encoding/utf8/utf8.h new file mode 100644 index 000000000..0ca314228 --- /dev/null +++ b/tools/re2c/src/ir/regexp/encoding/utf8/utf8.h @@ -0,0 +1,48 @@ +#ifndef _RE2C_IR_REGEXP_ENCODING_UTF8_UTF8_ +#define _RE2C_IR_REGEXP_ENCODING_UTF8_UTF8_ + +#include "src/util/c99_stdint.h" + +namespace re2c { + +class utf8 +{ +public: + typedef uint32_t rune; + + // maximum characters per rune + // enum instead of static const member because of [-Wvla] + enum { MAX_RUNE_LENGTH = 4u }; + + // decoding error + static const uint32_t ERROR; + + // maximal runes for each rune length + static const rune MAX_1BYTE_RUNE; + static const rune MAX_2BYTE_RUNE; + static const rune MAX_3BYTE_RUNE; + static const rune MAX_4BYTE_RUNE; + static const rune MAX_RUNE; + + static const uint32_t PREFIX_1BYTE; + static const uint32_t INFIX; + static const uint32_t PREFIX_2BYTE; + static const uint32_t PREFIX_3BYTE; + static const uint32_t PREFIX_4BYTE; + + static const uint32_t SHIFT; + static const uint32_t MASK; + + // UTF-8 bytestring for given Unicode rune + static uint32_t rune_to_bytes(uint32_t * s, rune r); + + // length of UTF-8 bytestring for given Unicode rune + static uint32_t rune_length(rune r); + + // maximal Unicode rune with given length of UTF-8 bytestring + static rune max_rune(uint32_t i); +}; + +} // namespace re2c + +#endif // _RE2C_IR_REGEXP_ENCODING_UTF8_UTF8_ diff --git a/tools/re2c/src/ir/regexp/encoding/utf8/utf8_range.cc b/tools/re2c/src/ir/regexp/encoding/utf8/utf8_range.cc new file mode 100644 index 000000000..d3d256cf8 --- /dev/null +++ b/tools/re2c/src/ir/regexp/encoding/utf8/utf8_range.cc @@ -0,0 +1,112 @@ +#include "src/ir/regexp/encoding/utf8/utf8_range.h" +#include "src/ir/regexp/encoding/range_suffix.h" + +namespace re2c { + +/* + * Now that we have catenation of byte ranges [l1-h1]...[lN-hN], + * we want to add it to existing range, merging suffixes on the fly. + */ +void UTF8addContinuous(RangeSuffix * & root, utf8::rune l, utf8::rune h, uint32_t n) +{ + uint32_t lcs[utf8::MAX_RUNE_LENGTH]; + uint32_t hcs[utf8::MAX_RUNE_LENGTH]; + utf8::rune_to_bytes(lcs, l); + utf8::rune_to_bytes(hcs, h); + + RangeSuffix ** p = &root; + for (uint32_t i = 1; i <= n; ++i) + { + const uint32_t lc = lcs[n - i]; + const uint32_t hc = hcs[n - i]; + for (;;) + { + if (*p == NULL) + { + *p = new RangeSuffix(lc, hc); + p = &(*p)->child; + break; + } + else if ((*p)->l == lc && (*p)->h == hc) + { + p = &(*p)->child; + break; + } + else + p = &(*p)->next; + } + } +} + +/* + * Split range into sub-ranges that agree on leading bytes. + * + * We have two Unicode runes of equal length, L and H, which + * map to UTF-8 sequences 'L_1 ... L_n' and 'H_1 ... H_n'. + * We want to represent Unicode range [L - H] as a catenation + * of byte ranges [L_1 - H_1], ..., [L_n - H_n]. + * + * This is only possible if for all i > 1: + * if L_i /= H_i, then L_(i+1) == 0x80 and H_(i+1) == 0xbf. + * This condition ensures that: + * 1) all possible UTF-8 sequences between L and H are allowed + * 2) no byte ranges [b1 - b2] appear, such that b1 > b2 + * + * E.g.: + * [\U000e0031-\U000e0043] => [f3-f3],[a0-a0],[80-81],[b1-83]. + * The last byte range, [b1-83], is incorrect: its lower bound + * is greater than its upper bound. To fix this, we must split + * the original range into two sub-ranges: + * [\U000e0031-\U000e003f] => [f3-f3],[a0-a0],[80-80],[b1-bf] + * [\U000e0040-\U000e0043] => [f3-f3],[a0-a0],[81-81],[80-83] + * + * This function finds all such 'points of discontinuity' + * and represents original range as alternation of continuous + * sub-ranges. + */ +void UTF8splitByContinuity(RangeSuffix * & root, utf8::rune l, utf8::rune h, uint32_t n) +{ + for (uint32_t i = 1; i < n; ++i) + { + uint32_t m = (1u << (6u * i)) - 1u; // last i bytes of a UTF-8 sequence + if ((l & ~m) != (h & ~m)) + { + if ((l & m) != 0) + { + UTF8splitByContinuity(root, l, l | m, n); + UTF8splitByContinuity(root, (l | m) + 1, h, n); + return; + } + if ((h & m) != m) + { + UTF8splitByContinuity(root, l, (h & ~m) - 1, n); + UTF8splitByContinuity(root, h & ~m, h, n); + return; + } + } + } + UTF8addContinuous(root, l, h, n); +} + +/* + * Split range into sub-ranges, so that all runes in the same + * sub-range have equal length of UTF-8 sequence. E.g., full + * Unicode range [0-0x10FFFF] gets split into sub-ranges: + * [0 - 0x7F] (1-byte UTF-8 sequences) + * [0x80 - 0x7FF] (2-byte UTF-8 sequences) + * [0x800 - 0xFFFF] (3-byte UTF-8 sequences) + * [0x10000 - 0x10FFFF] (4-byte UTF-8 sequences) + */ +void UTF8splitByRuneLength(RangeSuffix * & root, utf8::rune l, utf8::rune h) +{ + const uint32_t nh = utf8::rune_length(h); + for (uint32_t nl = utf8::rune_length(l); nl < nh; ++nl) + { + utf8::rune r = utf8::max_rune(nl); + UTF8splitByContinuity(root, l, r, nl); + l = r + 1; + } + UTF8splitByContinuity(root, l, h, nh); +} + +} // namespace re2c diff --git a/tools/re2c/src/ir/regexp/encoding/utf8/utf8_range.h b/tools/re2c/src/ir/regexp/encoding/utf8/utf8_range.h new file mode 100644 index 000000000..1ce46132f --- /dev/null +++ b/tools/re2c/src/ir/regexp/encoding/utf8/utf8_range.h @@ -0,0 +1,18 @@ +#ifndef _RE2C_IR_REGEXP_ENCODING_UTF8_RANGE_ +#define _RE2C_IR_REGEXP_ENCODING_UTF8_RANGE_ + +#include "src/util/c99_stdint.h" + +#include "src/ir/regexp/encoding/utf8/utf8.h" + +namespace re2c { + +struct RangeSuffix; + +void UTF8addContinuous(RangeSuffix * & p, utf8::rune l, utf8::rune h, uint32_t n); +void UTF8splitByContinuity(RangeSuffix * & p, utf8::rune l, utf8::rune h, uint32_t n); +void UTF8splitByRuneLength(RangeSuffix * & p, utf8::rune l, utf8::rune h); + +} // namespace re2c + +#endif // _RE2C_IR_REGEXP_ENCODING_UTF8_RANGE_ diff --git a/tools/re2c/src/ir/regexp/encoding/utf8/utf8_regexp.cc b/tools/re2c/src/ir/regexp/encoding/utf8/utf8_regexp.cc new file mode 100644 index 000000000..54ef6f0e0 --- /dev/null +++ b/tools/re2c/src/ir/regexp/encoding/utf8/utf8_regexp.cc @@ -0,0 +1,36 @@ +#include "src/util/c99_stdint.h" + +#include "src/ir/regexp/encoding/utf8/utf8_regexp.h" +#include "src/ir/regexp/encoding/range_suffix.h" +#include "src/ir/regexp/encoding/utf8/utf8_range.h" +#include "src/ir/regexp/regexp_cat.h" +#include "src/ir/regexp/regexp_match.h" +#include "src/util/range.h" + +namespace re2c { + +RegExp * UTF8Symbol(utf8::rune r) +{ + uint32_t chars[utf8::MAX_RUNE_LENGTH]; + const uint32_t chars_count = utf8::rune_to_bytes(chars, r); + RegExp * re = new MatchOp(Range::sym (chars[0])); + for (uint32_t i = 1; i < chars_count; ++i) + re = new CatOp(re, new MatchOp(Range::sym (chars[i]))); + return re; +} + +/* + * Split Unicode character class {[l1, h1), ..., [lN, hN)} into + * ranges [l1, h1-1], ..., [lN, hN-1] and return alternation of + * them. We store partially built range in suffix tree, which + * allows to eliminate common suffixes while building. + */ +RegExp * UTF8Range(const Range * r) +{ + RangeSuffix * root = NULL; + for (; r != NULL; r = r->next ()) + UTF8splitByRuneLength(root, r->lower (), r->upper () - 1); + return to_regexp (root); +} + +} // namespace re2c diff --git a/tools/re2c/src/ir/regexp/encoding/utf8/utf8_regexp.h b/tools/re2c/src/ir/regexp/encoding/utf8/utf8_regexp.h new file mode 100644 index 000000000..676759a37 --- /dev/null +++ b/tools/re2c/src/ir/regexp/encoding/utf8/utf8_regexp.h @@ -0,0 +1,16 @@ +#ifndef _RE2C_IR_REGEXP_ENCODING_UTF8_REGEXP_ +#define _RE2C_IR_REGEXP_ENCODING_UTF8_REGEXP_ + +#include "src/ir/regexp/encoding/utf8/utf8.h" + +namespace re2c { + +class Range; +class RegExp; + +RegExp * UTF8Symbol(utf8::rune r); +RegExp * UTF8Range(const Range * r); + +} // namespace re2c + +#endif // _RE2C_IR_REGEXP_ENCODING_UTF8_REGEXP_ diff --git a/tools/re2c/src/ir/regexp/fixed_length.cc b/tools/re2c/src/ir/regexp/fixed_length.cc new file mode 100644 index 000000000..e0fd7e00c --- /dev/null +++ b/tools/re2c/src/ir/regexp/fixed_length.cc @@ -0,0 +1,55 @@ +#include "src/util/c99_stdint.h" + +#include "src/ir/regexp/regexp.h" +#include "src/ir/regexp/regexp_alt.h" +#include "src/ir/regexp/regexp_cat.h" +#include "src/ir/regexp/regexp_match.h" +#include "src/ir/regexp/regexp_null.h" + +namespace re2c +{ + +uint32_t RegExp::fixedLength () +{ + return ~0u; +} + +uint32_t AltOp::fixedLength () +{ + uint32_t l1 = exp1->fixedLength (); + uint32_t l2 = exp1->fixedLength (); + + if (l1 != l2 || l1 == ~0u) + { + return ~0u; + } + + return l1; +} + +uint32_t CatOp::fixedLength () +{ + const uint32_t l1 = exp1->fixedLength (); + if (l1 != ~0u) + { + const uint32_t l2 = exp2->fixedLength (); + if (l2 != ~0u) + { + return l1 + l2; + } + } + return ~0u; +} + +uint32_t MatchOp::fixedLength () +{ + return 1; +} + +uint32_t NullOp::fixedLength () +{ + return 0; +} + +} // end namespace re2c + diff --git a/tools/re2c/src/ir/regexp/regexp.cc b/tools/re2c/src/ir/regexp/regexp.cc new file mode 100644 index 000000000..e5a7d9bd5 --- /dev/null +++ b/tools/re2c/src/ir/regexp/regexp.cc @@ -0,0 +1,241 @@ +#include + +#include "src/conf/opt.h" +#include "src/conf/warn.h" +#include "src/globals.h" +#include "src/ir/regexp/empty_class_policy.h" +#include "src/ir/regexp/encoding/case.h" +#include "src/ir/regexp/encoding/enc.h" +#include "src/ir/regexp/encoding/utf16/utf16_regexp.h" +#include "src/ir/regexp/encoding/utf8/utf8_regexp.h" +#include "src/ir/regexp/regexp.h" +#include "src/ir/regexp/regexp_alt.h" +#include "src/ir/regexp/regexp_cat.h" +#include "src/ir/regexp/regexp_close.h" +#include "src/ir/regexp/regexp_match.h" +#include "src/ir/regexp/regexp_null.h" +#include "src/parse/scanner.h" +#include "src/util/range.h" + +namespace re2c +{ + +static MatchOp * merge (MatchOp * m1, MatchOp * m2); + +free_list RegExp::vFreeList; + +RegExp * doAlt (RegExp * e1, RegExp * e2) +{ + if (!e1) + { + return e2; + } + if (!e2) + { + return e1; + } + return new AltOp (e1, e2); +} + +RegExp * mkAlt (RegExp * e1, RegExp * e2) +{ + AltOp * a; + MatchOp * m1; + MatchOp * m2; + + a = dynamic_cast (e1); + if (a != NULL) + { + m1 = dynamic_cast (a->exp1); + if (m1 != NULL) + { + e1 = a->exp2; + } + } + else + { + m1 = dynamic_cast (e1); + if (m1 != NULL) + { + e1 = NULL; + } + } + a = dynamic_cast (e2); + if (a != NULL) + { + m2 = dynamic_cast (a->exp1); + if (m2 != NULL) + { + e2 = a->exp2; + } + } + else + { + m2 = dynamic_cast (e2); + if (m2 != NULL) + { + e2 = NULL; + } + } + + return doAlt (merge (m1, m2), doAlt (e1, e2)); +} + +MatchOp * merge (MatchOp * m1, MatchOp * m2) +{ + if (!m1) + { + return m2; + } + if (!m2) + { + return m1; + } + MatchOp * m = new MatchOp (Range::add (m1->match, m2->match)); + return m; +} + +RegExp * doCat (RegExp * e1, RegExp * e2) +{ + if (!e1) + { + return e2; + } + if (!e2) + { + return e1; + } + return new CatOp (e1, e2); +} + +RegExp *Scanner::schr(uint32_t c) const +{ + if (!opts->encoding.encode(c)) { + fatalf("Bad code point: '0x%X'", c); + } + switch (opts->encoding.type ()) { + case Enc::UTF16: return UTF16Symbol(c); + case Enc::UTF8: return UTF8Symbol(c); + default: return new MatchOp(Range::sym(c)); + } +} + +RegExp *Scanner::ichr(uint32_t c) const +{ + if (is_alpha(c)) { + RegExp *l = schr(to_lower_unsafe(c)); + RegExp *u = schr(to_upper_unsafe(c)); + return mkAlt(l, u); + } else { + return schr(c); + } +} + +RegExp *Scanner::cls(Range *r) const +{ + if (!r) + { + switch (opts->empty_class_policy) + { + case EMPTY_CLASS_MATCH_EMPTY: + warn.empty_class (get_line ()); + return new NullOp; + case EMPTY_CLASS_MATCH_NONE: + warn.empty_class (get_line ()); + break; + case EMPTY_CLASS_ERROR: + fatal ("empty character class"); + break; + } + } + + switch (opts->encoding.type ()) + { + case Enc::UTF16: return UTF16Range(r); + case Enc::UTF8: return UTF8Range(r); + default: return new MatchOp(r); + } +} + +RegExp * Scanner::mkDiff (RegExp * e1, RegExp * e2) const +{ + MatchOp * m1 = dynamic_cast (e1); + MatchOp * m2 = dynamic_cast (e2); + if (m1 == NULL || m2 == NULL) + { + fatal("can only difference char sets"); + } + Range * r = Range::sub (m1->match, m2->match); + + return cls(r); +} + +RegExp * Scanner::mkDot() const +{ + Range * full = opts->encoding.fullRange(); + uint32_t c = '\n'; + if (!opts->encoding.encode(c)) + fatalf("Bad code point: '0x%X'", c); + Range * ran = Range::sym (c); + Range * inv = Range::sub (full, ran); + + return cls(inv); +} + +/* + * Create a byte range that includes all possible input characters. + * This may include characters, which do not map to any valid symbol + * in current encoding. For encodings, which directly map symbols to + * input characters (ASCII, EBCDIC, UTF-32), it equals [^]. For other + * encodings (UTF-16, UTF-8), [^] and this range are different. + * + * Also note that default range doesn't respect encoding policy + * (the way invalid code points are treated). + */ +RegExp * Scanner::mkDefault() const +{ + Range * def = Range::ran (0, opts->encoding.nCodeUnits()); + return new MatchOp(def); +} + +/* + * note [counted repetition expansion] + * + * r{0} ;;= + * r{n} ::= r{n-1} r + * r{n,m} ::= r{n} (r{0} | ... | r{m-n}) + * r{n,} ::= r{n} r* + */ + +// see note [counted repetition expansion] +RegExp * repeat (RegExp * e, uint32_t n) +{ + RegExp * r = NULL; + for (uint32_t i = 0; i < n; ++i) + { + r = doCat (r, e); + } + return r; +} + +// see note [counted repetition expansion] +RegExp * repeat_from_to (RegExp * e, uint32_t n, uint32_t m) +{ + RegExp * r1 = repeat (e, n); + RegExp * r2 = NULL; + for (uint32_t i = n; i < m; ++i) + { + r2 = mkAlt (new NullOp, doCat (e, r2)); + } + return doCat (r1, r2); +} + +// see note [counted repetition expansion] +RegExp * repeat_from (RegExp * e, uint32_t n) +{ + RegExp * r1 = repeat (e, n); + RegExp * r2 = new CloseOp (e); + return doCat (r1, r2); +} + +} // namespace re2c diff --git a/tools/re2c/src/ir/regexp/regexp.h b/tools/re2c/src/ir/regexp/regexp.h new file mode 100644 index 000000000..5d344dd34 --- /dev/null +++ b/tools/re2c/src/ir/regexp/regexp.h @@ -0,0 +1,52 @@ +#ifndef _RE2C_IR_REGEXP_REGEXP_ +#define _RE2C_IR_REGEXP_REGEXP_ + +#include "src/util/c99_stdint.h" +#include +#include +#include + +#include "src/util/free_list.h" +#include "src/util/forbid_copy.h" + +namespace re2c +{ + +struct nfa_state_t; +struct nfa_t; + +typedef std::vector charset_t; + +class RegExp +{ +public: + static free_list vFreeList; + + inline RegExp () + { + vFreeList.insert (this); + } + inline virtual ~RegExp () + { + vFreeList.erase (this); + } + virtual void split (std::set &) = 0; + virtual uint32_t calc_size() const = 0; + virtual uint32_t fixedLength (); + virtual nfa_state_t *compile(nfa_t &nfa, nfa_state_t *n) = 0; + virtual void display (std::ostream &) const = 0; + friend std::ostream & operator << (std::ostream & o, const RegExp & re); + + FORBID_COPY (RegExp); +}; + +RegExp * doAlt (RegExp * e1, RegExp * e2); +RegExp * mkAlt (RegExp * e1, RegExp * e2); +RegExp * doCat (RegExp * e1, RegExp * e2); +RegExp * repeat (RegExp * e, uint32_t n); +RegExp * repeat_from_to (RegExp * e, uint32_t n, uint32_t m); +RegExp * repeat_from (RegExp * e, uint32_t n); + +} // end namespace re2c + +#endif // _RE2C_IR_REGEXP_REGEXP_ diff --git a/tools/re2c/src/ir/regexp/regexp_alt.h b/tools/re2c/src/ir/regexp/regexp_alt.h new file mode 100644 index 000000000..6f1c8ea48 --- /dev/null +++ b/tools/re2c/src/ir/regexp/regexp_alt.h @@ -0,0 +1,31 @@ +#ifndef _RE2C_IR_REGEXP_REGEXP_ALT_ +#define _RE2C_IR_REGEXP_REGEXP_ALT_ + +#include "src/ir/regexp/regexp.h" + +namespace re2c +{ + +class AltOp: public RegExp +{ + RegExp * exp1; + RegExp * exp2; + +public: + inline AltOp (RegExp * e1, RegExp * e2) + : exp1 (e1) + , exp2 (e2) + {} + void split (std::set &); + uint32_t calc_size() const; + uint32_t fixedLength (); + nfa_state_t *compile(nfa_t &nfa, nfa_state_t *n); + void display (std::ostream & o) const; + friend RegExp * mkAlt (RegExp *, RegExp *); + + FORBID_COPY (AltOp); +}; + +} // end namespace re2c + +#endif // _RE2C_IR_REGEXP_REGEXP_ALT_ diff --git a/tools/re2c/src/ir/regexp/regexp_cat.h b/tools/re2c/src/ir/regexp/regexp_cat.h new file mode 100644 index 000000000..d8176212e --- /dev/null +++ b/tools/re2c/src/ir/regexp/regexp_cat.h @@ -0,0 +1,30 @@ +#ifndef _RE2C_IR_REGEXP_REGEXP_CAT_ +#define _RE2C_IR_REGEXP_REGEXP_CAT_ + +#include "src/ir/regexp/regexp.h" + +namespace re2c +{ + +class CatOp: public RegExp +{ + RegExp * exp1; + RegExp * exp2; + +public: + inline CatOp (RegExp * e1, RegExp * e2) + : exp1 (e1) + , exp2 (e2) + {} + void split (std::set &); + uint32_t calc_size() const; + uint32_t fixedLength (); + nfa_state_t *compile(nfa_t &nfa, nfa_state_t *n); + void display (std::ostream & o) const; + + FORBID_COPY (CatOp); +}; + +} // end namespace re2c + +#endif // _RE2C_IR_REGEXP_REGEXP_CAT_ diff --git a/tools/re2c/src/ir/regexp/regexp_close.h b/tools/re2c/src/ir/regexp/regexp_close.h new file mode 100644 index 000000000..02bea20f3 --- /dev/null +++ b/tools/re2c/src/ir/regexp/regexp_close.h @@ -0,0 +1,27 @@ +#ifndef _RE2C_IR_REGEXP_REGEXP_CLOSE_ +#define _RE2C_IR_REGEXP_REGEXP_CLOSE_ + +#include "src/ir/regexp/regexp.h" + +namespace re2c +{ + +class CloseOp: public RegExp +{ + RegExp * exp; + +public: + inline CloseOp (RegExp * e) + : exp (e) + {} + void split (std::set &); + uint32_t calc_size() const; + nfa_state_t *compile(nfa_t &nfa, nfa_state_t *n); + void display (std::ostream & o) const; + + FORBID_COPY (CloseOp); +}; + +} // end namespace re2c + +#endif // _RE2C_IR_REGEXP_REGEXP_CLOSE_ diff --git a/tools/re2c/src/ir/regexp/regexp_match.h b/tools/re2c/src/ir/regexp/regexp_match.h new file mode 100644 index 000000000..903697b64 --- /dev/null +++ b/tools/re2c/src/ir/regexp/regexp_match.h @@ -0,0 +1,29 @@ +#ifndef _RE2C_IR_REGEXP_REGEXP_MATCH_ +#define _RE2C_IR_REGEXP_REGEXP_MATCH_ + +#include "src/ir/regexp/regexp.h" +#include "src/util/range.h" + +namespace re2c +{ + +class MatchOp: public RegExp +{ +public: + Range * match; + + inline MatchOp (Range * m) + : match (m) + {} + void split (std::set &); + uint32_t calc_size() const; + uint32_t fixedLength (); + nfa_state_t *compile(nfa_t &nfa, nfa_state_t *n); + void display (std::ostream & o) const; + + FORBID_COPY (MatchOp); +}; + +} // end namespace re2c + +#endif // _RE2C_IR_REGEXP_REGEXP_MATCH_ diff --git a/tools/re2c/src/ir/regexp/regexp_null.h b/tools/re2c/src/ir/regexp/regexp_null.h new file mode 100644 index 000000000..8168dbe55 --- /dev/null +++ b/tools/re2c/src/ir/regexp/regexp_null.h @@ -0,0 +1,21 @@ +#ifndef _RE2C_IR_REGEXP_REGEXP_NULL_ +#define _RE2C_IR_REGEXP_REGEXP_NULL_ + +#include "src/ir/regexp/regexp.h" + +namespace re2c +{ + +class NullOp: public RegExp +{ +public: + void split (std::set &); + uint32_t calc_size() const; + uint32_t fixedLength (); + nfa_state_t *compile(nfa_t &nfa, nfa_state_t *n); + void display (std::ostream & o) const; +}; + +} // end namespace re2c + +#endif // _RE2C_IR_REGEXP_REGEXP_NULL_ diff --git a/tools/re2c/src/ir/regexp/regexp_rule.h b/tools/re2c/src/ir/regexp/regexp_rule.h new file mode 100644 index 000000000..1519fa233 --- /dev/null +++ b/tools/re2c/src/ir/regexp/regexp_rule.h @@ -0,0 +1,52 @@ +#ifndef _RE2C_IR_REGEXP_REGEXP_RULE_ +#define _RE2C_IR_REGEXP_REGEXP_RULE_ + +#include + +#include "src/ir/regexp/regexp.h" +#include "src/ir/rule_rank.h" +#include "src/parse/code.h" + +namespace re2c +{ + +class RuleOp: public RegExp +{ +public: + const Loc loc; + +private: + RegExp * exp; + +public: + RegExp * ctx; + rule_rank_t rank; + const Code * code; + const std::string newcond; + + inline RuleOp + ( const Loc & l + , RegExp * r1 + , RegExp * r2 + , rule_rank_t r + , const Code * c + , const std::string * cond + ) + : loc (l) + , exp (r1) + , ctx (r2) + , rank (r) + , code (c) + , newcond (cond ? *cond : "") + {} + void display (std::ostream & o) const; + void split (std::set &); + uint32_t calc_size() const; + nfa_state_t *compile(nfa_t &nfa, nfa_state_t *n); + + FORBID_COPY (RuleOp); +}; + +} // end namespace re2c + +#endif // _RE2C_IR_REGEXP_REGEXP_RULE_ diff --git a/tools/re2c/src/ir/rule_rank.cc b/tools/re2c/src/ir/rule_rank.cc new file mode 100644 index 000000000..12d2de885 --- /dev/null +++ b/tools/re2c/src/ir/rule_rank.cc @@ -0,0 +1,68 @@ +#include +#include +#include + +#include "src/ir/rule_rank.h" + +namespace re2c +{ + +const uint32_t rule_rank_t::NONE = std::numeric_limits::max(); +const uint32_t rule_rank_t::DEF = rule_rank_t::NONE - 1; + +rule_rank_t::rule_rank_t () + : value (0) +{} + +void rule_rank_t::inc () +{ + assert (value < DEF - 1); + ++value; +} + +rule_rank_t rule_rank_t::none () +{ + rule_rank_t r; + r.value = NONE; + return r; +} + +rule_rank_t rule_rank_t::def () +{ + rule_rank_t r; + r.value = DEF; + return r; +} + +bool rule_rank_t::is_none () const +{ + return value == NONE; +} + +bool rule_rank_t::is_def () const +{ + return value == DEF; +} + +bool rule_rank_t::operator < (const rule_rank_t & r) const +{ + return value < r.value; +} + +bool rule_rank_t::operator == (const rule_rank_t & r) const +{ + return value == r.value; +} + +std::ostream & operator << (std::ostream & o, rule_rank_t r) +{ + o << r.value; + return o; +} + +uint32_t rule_rank_t::uint32 () const +{ + return value; +} + +} // namespace re2c diff --git a/tools/re2c/src/ir/rule_rank.h b/tools/re2c/src/ir/rule_rank.h new file mode 100644 index 000000000..ca19cb94b --- /dev/null +++ b/tools/re2c/src/ir/rule_rank.h @@ -0,0 +1,44 @@ +#ifndef _RE2C_IR_RULE_RANK_ +#define _RE2C_IR_RULE_RANK_ + +#include "src/util/c99_stdint.h" +#include + +namespace re2c +{ + +template class counter_t; + +// rule rank public API: +// - get rule rank corresponding to nonexistent/default rule +// - check if rank corresponds to nonexistent/default rule +// - compare ranks +// - output rank to std::ostream +// +// rule rank private API (for rule rank counter): +// - get first rank +// - get next rank +class rule_rank_t +{ + static const uint32_t NONE; + static const uint32_t DEF; + uint32_t value; + rule_rank_t (); + void inc (); + +public: + static rule_rank_t none (); + static rule_rank_t def (); + bool is_none () const; + bool is_def () const; + bool operator < (const rule_rank_t & r) const; + bool operator == (const rule_rank_t & r) const; + friend std::ostream & operator << (std::ostream & o, rule_rank_t r); + uint32_t uint32 () const; + + friend class counter_t; +}; + +} // namespace re2c + +#endif // _RE2C_IR_RULE_RANK_ diff --git a/tools/re2c/src/ir/skeleton/control_flow.cc b/tools/re2c/src/ir/skeleton/control_flow.cc new file mode 100644 index 000000000..74166865f --- /dev/null +++ b/tools/re2c/src/ir/skeleton/control_flow.cc @@ -0,0 +1,61 @@ +#include +#include +#include + +#include "src/conf/warn.h" +#include "src/globals.h" +#include "src/ir/rule_rank.h" +#include "src/ir/skeleton/path.h" +#include "src/ir/skeleton/skeleton.h" +#include "src/ir/skeleton/way.h" +#include "src/util/u32lim.h" + +namespace re2c +{ + +// We don't need all patterns that cause undefined behaviour. +// We only need some examples, the shorter the better. +// See also note [counting skeleton edges]. +void Node::naked_ways (way_t & prefix, std::vector & ways, nakeds_t &size) +{ + if (!rule.rank.is_none ()) + { + return; + } + else if (end ()) + { + ways.push_back (prefix); + size = size + nakeds_t::from64(prefix.size ()); + } + else if (loop < 2) + { + local_inc _ (loop); + for (arcsets_t::iterator i = arcsets.begin (); + i != arcsets.end () && !size.overflow (); ++i) + { + prefix.push_back (&i->second); + i->first->naked_ways (prefix, ways, size); + prefix.pop_back (); + } + } +} + +void Skeleton::warn_undefined_control_flow () +{ + way_t prefix; + std::vector ways; + Node::nakeds_t size = Node::nakeds_t::from32(0u); + + nodes->naked_ways (prefix, ways, size); + + if (!ways.empty ()) + { + warn.undefined_control_flow (line, cond, ways, size.overflow ()); + } + else if (size.overflow ()) + { + warn.fail (Warn::UNDEFINED_CONTROL_FLOW, line, "DFA is too large to check undefined control flow"); + } +} + +} // namespace re2c diff --git a/tools/re2c/src/ir/skeleton/generate_code.cc b/tools/re2c/src/ir/skeleton/generate_code.cc new file mode 100644 index 000000000..38940ae77 --- /dev/null +++ b/tools/re2c/src/ir/skeleton/generate_code.cc @@ -0,0 +1,323 @@ +#include "src/util/c99_stdint.h" +#include +#include +#include +#include + +#include "src/codegen/bitmap.h" +#include "src/codegen/output.h" +#include "src/conf/opt.h" +#include "src/globals.h" +#include "src/ir/regexp/encoding/enc.h" +#include "src/ir/rule_rank.h" +#include "src/ir/skeleton/skeleton.h" + +namespace re2c +{ + +static void exact_uint (OutputFile & o, size_t width) +{ + if (width == sizeof (char)) + { + o.ws("unsigned char"); + } + else if (width == sizeof (short)) + { + o.ws("unsigned short"); + } + else if (width == sizeof (int)) + { + o.ws("unsigned int"); + } + else if (width == sizeof (long)) + { + o.ws("unsigned long"); + } + else + { + o.ws("uint").wu64 (width * 8).ws("_t"); + } +} + +static void from_le(OutputFile &o, uint32_t ind, size_t size, const char *expr) +{ + o.ws("\n").wind(ind).ws("/* from little-endian to host-endian */"); + o.ws("\n").wind(ind).ws("unsigned char *p = (unsigned char*)&").ws(expr).ws(";"); + o.ws("\n").wind(ind).ws(expr).ws(" = p[0]"); + for (uint32_t i = 1; i < size; ++i) + { + o.ws(" + (p[").wu32(i).ws("] << ").wu32(i * 8).ws("u)"); + } + o.ws(";"); +} + +void Skeleton::emit_prolog (OutputFile & o) +{ + o.ws("\n#include "); + o.ws("\n#include /* malloc, free */"); + o.ws("\n"); + o.ws("\nstatic void *read_file"); + o.ws("\n").wind(1).ws("( const char *fname"); + o.ws("\n").wind(1).ws(", size_t unit"); + o.ws("\n").wind(1).ws(", size_t padding"); + o.ws("\n").wind(1).ws(", size_t *pfsize"); + o.ws("\n").wind(1).ws(")"); + o.ws("\n{"); + o.ws("\n").wind(1).ws("void *buffer = NULL;"); + o.ws("\n").wind(1).ws("size_t fsize = 0;"); + o.ws("\n"); + o.ws("\n").wind(1).ws("/* open file */"); + o.ws("\n").wind(1).ws("FILE *f = fopen(fname, \"rb\");"); + o.ws("\n").wind(1).ws("if(f == NULL) {"); + o.ws("\n").wind(2).ws("goto error;"); + o.ws("\n").wind(1).ws("}"); + o.ws("\n"); + o.ws("\n").wind(1).ws("/* get file size */"); + o.ws("\n").wind(1).ws("fseek(f, 0, SEEK_END);"); + o.ws("\n").wind(1).ws("fsize = (size_t) ftell(f) / unit;"); + o.ws("\n").wind(1).ws("fseek(f, 0, SEEK_SET);"); + o.ws("\n"); + o.ws("\n").wind(1).ws("/* allocate memory for file and padding */"); + o.ws("\n").wind(1).ws("buffer = malloc(unit * (fsize + padding));"); + o.ws("\n").wind(1).ws("if (buffer == NULL) {"); + o.ws("\n").wind(2).ws("goto error;"); + o.ws("\n").wind(1).ws("}"); + o.ws("\n"); + o.ws("\n").wind(1).ws("/* read the whole file in memory */"); + o.ws("\n").wind(1).ws("if (fread(buffer, unit, fsize, f) != fsize) {"); + o.ws("\n").wind(2).ws("goto error;"); + o.ws("\n").wind(1).ws("}"); + o.ws("\n"); + o.ws("\n").wind(1).ws("fclose(f);"); + o.ws("\n").wind(1).ws("*pfsize = fsize;"); + o.ws("\n").wind(1).ws("return buffer;"); + o.ws("\n"); + o.ws("\nerror:"); + o.ws("\n").wind(1).ws("fprintf(stderr, \"error: cannot read file '%s'\\n\", fname);"); + o.ws("\n").wind(1).ws("free(buffer);"); + o.ws("\n").wind(1).ws("if (f != NULL) {"); + o.ws("\n").wind(2).ws("fclose(f);"); + o.ws("\n").wind(1).ws("}"); + o.ws("\n").wind(1).ws("return NULL;"); + o.ws("\n}"); + o.ws("\n"); +} + +void Skeleton::emit_start + ( OutputFile & o + , size_t maxfill + , bool backup + , bool backupctx + , bool accept + ) const +{ + const size_t sizeof_cunit = opts->encoding.szCodeUnit(); + const uint32_t default_rule = rule2key (rule_rank_t::none ()); + + o.ws("\n#define YYCTYPE "); + exact_uint (o, sizeof_cunit); + o.ws("\n#define YYKEYTYPE "); + exact_uint (o, sizeof_key); + o.ws("\n#define YYPEEK() *cursor"); + o.ws("\n#define YYSKIP() ++cursor"); + if (backup) + { + o.ws("\n#define YYBACKUP() marker = cursor"); + o.ws("\n#define YYRESTORE() cursor = marker"); + } + if (backupctx) + { + o.ws("\n#define YYBACKUPCTX() ctxmarker = cursor"); + o.ws("\n#define YYRESTORECTX() cursor = ctxmarker"); + } + o.ws("\n#define YYLESSTHAN(n) (limit - cursor) < n"); + o.ws("\n#define YYFILL(n) { break; }"); + o.ws("\n"); + o.ws("\nstatic int action_").wstring(name); + o.ws("\n").wind(1).ws("( unsigned int i"); + o.ws("\n").wind(1).ws(", const YYKEYTYPE *keys"); + o.ws("\n").wind(1).ws(", const YYCTYPE *start"); + o.ws("\n").wind(1).ws(", const YYCTYPE *token"); + o.ws("\n").wind(1).ws(", const YYCTYPE **cursor"); + o.ws("\n").wind(1).ws(", YYKEYTYPE rule_act"); + o.ws("\n").wind(1).ws(")"); + o.ws("\n{"); + o.ws("\n").wind(1).ws("const long pos = token - start;"); + o.ws("\n").wind(1).ws("const long len_act = *cursor - token;"); + o.ws("\n").wind(1).ws("const long len_exp = (long) keys [3 * i + 1];"); + o.ws("\n").wind(1).ws("const YYKEYTYPE rule_exp = keys [3 * i + 2];"); + o.ws("\n").wind(1).ws("if (rule_exp == ").wu32(default_rule).ws(") {"); + o.ws("\n").wind(2).ws("fprintf"); + o.ws("\n").wind(3).ws("( stderr"); + o.ws("\n").wind(3).ws(", \"warning: lex_").wstring(name).ws(": control flow is undefined for input\""); + o.ws("\n").wind(4).ws("\" at position %ld, rerun re2c with '-W'\\n\""); + o.ws("\n").wind(3).ws(", pos"); + o.ws("\n").wind(3).ws(");"); + o.ws("\n").wind(1).ws("}"); + o.ws("\n").wind(1).ws("if (len_act == len_exp && rule_act == rule_exp) {"); + o.ws("\n").wind(2).ws("const YYKEYTYPE offset = keys[3 * i];"); + o.ws("\n").wind(2).ws("*cursor = token + offset;"); + o.ws("\n").wind(2).ws("return 0;"); + o.ws("\n").wind(1).ws("} else {"); + o.ws("\n").wind(2).ws("fprintf"); + o.ws("\n").wind(3).ws("( stderr"); + o.ws("\n").wind(3).ws(", \"error: lex_").wstring(name).ws(": at position %ld (iteration %u):\\n\""); + o.ws("\n").wind(4).ws("\"\\texpected: match length %ld, rule %u\\n\""); + o.ws("\n").wind(4).ws("\"\\tactual: match length %ld, rule %u\\n\""); + o.ws("\n").wind(3).ws(", pos"); + o.ws("\n").wind(3).ws(", i"); + o.ws("\n").wind(3).ws(", len_exp"); + o.ws("\n").wind(3).ws(", rule_exp"); + o.ws("\n").wind(3).ws(", len_act"); + o.ws("\n").wind(3).ws(", rule_act"); + o.ws("\n").wind(3).ws(");"); + o.ws("\n").wind(2).ws("return 1;"); + o.ws("\n").wind(1).ws("}"); + o.ws("\n}"); + o.ws("\n"); + o.ws("\nint lex_").wstring(name).ws("()"); + o.ws("\n{"); + o.ws("\n").wind(1).ws("const size_t padding = ").wu64(maxfill).ws("; /* YYMAXFILL */"); + o.ws("\n").wind(1).ws("int status = 0;"); + o.ws("\n").wind(1).ws("size_t input_len = 0;"); + o.ws("\n").wind(1).ws("size_t keys_count = 0;"); + o.ws("\n").wind(1).ws("YYCTYPE *input = NULL;"); + o.ws("\n").wind(1).ws("YYKEYTYPE *keys = NULL;"); + o.ws("\n").wind(1).ws("const YYCTYPE *cursor = NULL;"); + o.ws("\n").wind(1).ws("const YYCTYPE *limit = NULL;"); + o.ws("\n").wind(1).ws("const YYCTYPE *token = NULL;"); + o.ws("\n").wind(1).ws("const YYCTYPE *eof = NULL;"); + o.ws("\n").wind(1).ws("unsigned int i = 0;"); + o.ws("\n"); + o.ws("\n").wind(1).ws("input = (YYCTYPE *) read_file"); + o.ws("\n").wind(2).ws("(\"").wstring(o.file_name).ws(".").wstring(name).ws(".input\""); + o.ws("\n").wind(2).ws(", sizeof (YYCTYPE)"); + o.ws("\n").wind(2).ws(", padding"); + o.ws("\n").wind(2).ws(", &input_len"); + o.ws("\n").wind(2).ws(");"); + o.ws("\n").wind(1).ws("if (input == NULL) {"); + o.ws("\n").wind(2).ws("status = 1;"); + o.ws("\n").wind(2).ws("goto end;"); + o.ws("\n").wind(1).ws("}"); + o.ws("\n"); + if (sizeof_cunit > 1) + { + o.ws("\n").wind(1).ws("for (i = 0; i < input_len; ++i) {"); + from_le(o, 2, sizeof_cunit, "input[i]"); + o.ws("\n").wind(1).ws("}"); + o.ws("\n"); + } + o.ws("\n").wind(1).ws("keys = (YYKEYTYPE *) read_file"); + o.ws("\n").wind(2).ws("(\"").wstring(o.file_name).ws(".").wstring(name).ws(".keys\""); + o.ws("\n").wind(2).ws(", 3 * sizeof (YYKEYTYPE)"); + o.ws("\n").wind(2).ws(", 0"); + o.ws("\n").wind(2).ws(", &keys_count"); + o.ws("\n").wind(2).ws(");"); + o.ws("\n").wind(1).ws("if (keys == NULL) {"); + o.ws("\n").wind(2).ws("status = 1;"); + o.ws("\n").wind(2).ws("goto end;"); + o.ws("\n").wind(1).ws("}"); + o.ws("\n"); + if (sizeof_key > 1) + { + o.ws("\n").wind(1).ws("for (i = 0; i < 3 * keys_count; ++i) {"); + from_le(o, 2, sizeof_key, "keys[i]"); + o.ws("\n").wind(1).ws("}"); + o.ws("\n"); + } + o.ws("\n").wind(1).ws("cursor = input;"); + o.ws("\n").wind(1).ws("limit = input + input_len + padding;"); + o.ws("\n").wind(1).ws("eof = input + input_len;"); + o.ws("\n"); + o.ws("\n").wind(1).ws("for (i = 0; status == 0 && i < keys_count; ++i) {"); + o.ws("\n").wind(2).ws("token = cursor;"); + if (backup) + { + o.ws("\n").wind(2).ws("const YYCTYPE *marker = NULL;"); + } + if (backupctx) + { + o.ws("\n").wind(2).ws("const YYCTYPE *ctxmarker = NULL;"); + } + o.ws("\n").wind(2).ws("YYCTYPE yych;"); + if (accept) + { + o.ws("\n").wind(2).ws("unsigned int yyaccept = 0;"); + } + o.ws("\n"); + if (opts->bFlag && BitMap::first) + { + BitMap::gen (o, 2, 0, std::min (0x100u, opts->encoding.nCodeUnits ())); + } + o.ws("\n"); +} + +void Skeleton::emit_end + ( OutputFile & o + , bool backup + , bool backupctx + ) const +{ + o.ws("\n").wind(1).ws("}"); + o.ws("\n").wind(1).ws("if (status == 0) {"); + o.ws("\n").wind(2).ws("if (cursor != eof) {"); + o.ws("\n").wind(3).ws("status = 1;"); + o.ws("\n").wind(3).ws("const long pos = token - input;"); + o.ws("\n").wind(3).ws("fprintf(stderr, \"error: lex_").wstring(name).ws(": unused input strings left at position %ld\\n\", pos);"); + o.ws("\n").wind(2).ws("}"); + o.ws("\n").wind(2).ws("if (i != keys_count) {"); + o.ws("\n").wind(3).ws("status = 1;"); + o.ws("\n").wind(3).ws("fprintf(stderr, \"error: lex_").wstring(name).ws(": unused keys left after %u iterations\\n\", i);"); + o.ws("\n").wind(2).ws("}"); + o.ws("\n").wind(1).ws("}"); + o.ws("\n"); + o.ws("\nend:"); + o.ws("\n").wind(1).ws("free(input);"); + o.ws("\n").wind(1).ws("free(keys);"); + o.ws("\n"); + o.ws("\n").wind(1).ws("return status;"); + o.ws("\n}"); + o.ws("\n"); + o.ws("\n#undef YYCTYPE"); + o.ws("\n#undef YYKEYTYPE"); + o.ws("\n#undef YYPEEK"); + o.ws("\n#undef YYSKIP"); + if (backup) + { + o.ws("\n#undef YYBACKUP"); + o.ws("\n#undef YYRESTORE"); + } + if (backupctx) + { + o.ws("\n#undef YYBACKUPCTX"); + o.ws("\n#undef YYRESTORECTX"); + } + o.ws("\n#undef YYLESSTHAN"); + o.ws("\n#undef YYFILL"); + o.ws("\n"); +} + +void Skeleton::emit_epilog (OutputFile & o, const std::set & names) +{ + o.ws("\n").ws("int main()"); + o.ws("\n").ws("{"); + + for (std::set::const_iterator i = names.begin (); i != names.end (); ++i) + { + o.ws("\n").wind(1).ws("if(lex_").wstring(*i).ws("() != 0) {"); + o.ws("\n").wind(2).ws("return 1;"); + o.ws("\n").wind(1).ws("}"); + } + + o.ws("\n").wind(1).ws("return 0;"); + o.ws("\n}"); + o.ws("\n"); +} + +void Skeleton::emit_action (OutputFile & o, uint32_t ind, rule_rank_t rank) const +{ + o.wind(ind).ws("status = action_").wstring(name).ws("(i, keys, input, token, &cursor, ").wu32(rule2key (rank)).ws(");\n"); + o.wind(ind).ws("continue;\n"); +} + +} // namespace re2c diff --git a/tools/re2c/src/ir/skeleton/generate_data.cc b/tools/re2c/src/ir/skeleton/generate_data.cc new file mode 100644 index 000000000..60af8376e --- /dev/null +++ b/tools/re2c/src/ir/skeleton/generate_data.cc @@ -0,0 +1,215 @@ +#include "src/util/c99_stdint.h" +#include +#include +#include +#include +#include +#include +#include +#include + +#include "src/conf/msg.h" +#include "src/conf/opt.h" +#include "src/globals.h" +#include "src/ir/regexp/encoding/enc.h" +#include "src/ir/rule_rank.h" +#include "src/ir/skeleton/path.h" +#include "src/ir/skeleton/skeleton.h" +#include "src/util/u32lim.h" + +namespace re2c +{ + +template + static Node::covers_t cover_one (FILE * input, FILE * keys, const path_t & path); + +/* + * note [generating skeleton path cover] + * + * With --skeleton switch we need to generate lots of data: strings that + * correspond to various paths in DFA and match given regular expression. + * We try to generate path cover (a set of paths that cover all skeleton + * arcs at least once). Generation must stop as soon as the size of path + * cover exceeds limit (in which case we'll only get a partial path cover). + * + * The algorithm walks graph nodes in deep-first order and assigns suffix + * to each node (a path from this node to end node). In order to calculate + * suffix for a given node the algorithm must know suffix for any child + * node (end nodes are assigned empty suffix at start). Suffix is only + * calculated once for each node and then reused as much times as the node + * is visited. This is what reduces search space. + * + * The algorithm calculates prefix (multipath to current node). If current + * node has already been assigned suffix, the algorithm immediately + * calculates path cover from prefix and suffix. Otherwise it recurses to + * child nodes (updating prefix on the go). + * + * The algorithm avoids eternal loops by maintaining loop counter for each + * node. Loop counter is incremented on recursive enter and decremented on + * recursive return. If loop counter is greater than 1, current branch is + * abandoned and recursion returns immediately. + * + * See also note [counting skeleton edges]. + * + */ +template + void Node::cover (path_t & prefix, FILE * input, FILE * keys, covers_t &size) +{ + if (end () && suffix == NULL) + { + suffix = new path_t (rule, ctx); + } + if (suffix != NULL) + { + prefix.append (suffix); + size = size + cover_one (input, keys, prefix); + } + else if (loop < 2) + { + local_inc _ (loop); + for (arcs_t::iterator i = arcs.begin (); + i != arcs.end () && !size.overflow(); ++i) + { + path_t new_prefix = prefix; + new_prefix.extend (i->first->rule, i->first->ctx, &i->second); + i->first->cover (new_prefix, input, keys, size); + if (i->first->suffix != NULL && suffix == NULL) + { + suffix = new path_t (rule, ctx); + suffix->extend (i->first->rule, i->first->ctx, &i->second); + suffix->append (i->first->suffix); + } + } + } +} + +template + void Skeleton::generate_paths_cunit_key (FILE * input, FILE * keys) +{ + path_t prefix (nodes->rule, nodes->ctx); + Node::covers_t size = Node::covers_t::from32(0u); + + nodes->cover (prefix, input, keys, size); + + if (size.overflow ()) + { + warning + ( NULL + , line + , false + , "DFA %sis too large: can only generate partial path cover" + , incond (cond).c_str () + ); + } +} + +template + void Skeleton::generate_paths_cunit (FILE * input, FILE * keys) +{ + switch (sizeof_key) + { + case 4: generate_paths_cunit_key (input, keys); break; + case 2: generate_paths_cunit_key (input, keys); break; + case 1: generate_paths_cunit_key (input, keys); break; + } +} + +void Skeleton::generate_paths (FILE * input, FILE * keys) +{ + switch (opts->encoding.szCodeUnit ()) + { + case 4: generate_paths_cunit (input, keys); break; + case 2: generate_paths_cunit (input, keys); break; + case 1: generate_paths_cunit (input, keys); break; + } +} + +void Skeleton::emit_data (const char * fname) +{ + const std::string input_name = std::string (fname) + "." + name + ".input"; + FILE * input = fopen (input_name.c_str (), "wb"); + if (!input) + { + error ("cannot open file: %s", input_name.c_str ()); + exit (1); + } + const std::string keys_name = std::string (fname) + "." + name + ".keys"; + FILE * keys = fopen (keys_name.c_str (), "wb"); + if (!keys) + { + error ("cannot open file: %s", keys_name.c_str ()); + exit (1); + } + + generate_paths (input, keys); + + fclose (input); + fclose (keys); +} + +template static uintn_t to_le(uintn_t n) +{ + uintn_t m; + uint8_t *p = reinterpret_cast(&m); + for (size_t i = 0; i < sizeof(uintn_t); ++i) + { + p[i] = static_cast(n >> (i * 8)); + } + return m; +} + +template + static void keygen (FILE * f, size_t count, size_t len, size_t len_match, rule_rank_t match) +{ + const key_t m = Skeleton::rule2key (match); + + const size_t keys_size = 3 * count; + key_t * keys = new key_t [keys_size]; + for (uint32_t i = 0; i < keys_size;) + { + keys[i++] = to_le(static_cast (len)); + keys[i++] = to_le(static_cast (len_match)); + keys[i++] = to_le(m); + } + fwrite (keys, sizeof (key_t), keys_size, f); + delete [] keys; +} + +template + static Node::covers_t cover_one (FILE * input, FILE * keys, const path_t & path) +{ + const size_t len = path.len (); + + size_t count = 0; + for (size_t i = 0; i < len; ++i) + { + count = std::max (count, path[i]->size ()); + } + + const Node::covers_t size = Node::covers_t::from64(len) * Node::covers_t::from64(count); + if (!size.overflow ()) + { + // input + const size_t buffer_size = size.uint32 (); + cunit_t * buffer = new cunit_t [buffer_size]; + for (size_t i = 0; i < len; ++i) + { + const std::vector & arc = *path[i]; + const size_t width = arc.size (); + for (size_t j = 0; j < count; ++j) + { + const size_t k = j % width; + buffer[j * len + i] = to_le(static_cast (arc[k])); + } + } + fwrite (buffer, sizeof (cunit_t), buffer_size, input); + delete [] buffer; + + // keys + keygen (keys, count, len, path.len_matching (), path.match ()); + } + + return size; +} + +} // namespace re2c diff --git a/tools/re2c/src/ir/skeleton/match_empty.cc b/tools/re2c/src/ir/skeleton/match_empty.cc new file mode 100644 index 000000000..16fba615e --- /dev/null +++ b/tools/re2c/src/ir/skeleton/match_empty.cc @@ -0,0 +1,49 @@ +#include +#include + +#include "src/conf/warn.h" +#include "src/globals.h" +#include "src/ir/rule_rank.h" +#include "src/ir/skeleton/path.h" +#include "src/ir/skeleton/skeleton.h" +#include "src/parse/rules.h" + +namespace re2c +{ + +void Skeleton::warn_match_empty () +{ + Node & head = nodes[0]; + + head.calc_reachable (); + const std::set & reach = head.reachable; + + // warn about rules that match empty string + if (!head.rule.rank.is_none ()) + { + bool reachable = head.end (); + for (std::set::const_iterator i = reach.begin (); + !reachable && i != reach.end (); ++i) + { + reachable |= i->rank.is_none (); + } + if (reachable) + { + warn.match_empty_string (rules[head.rule.rank].line); + } + } + + // warn about rules that match empty string with nonempty trailing context + if (head.ctx) + { + for (std::set::const_iterator i = reach.begin (); i != reach.end (); ++i) + { + if (i->restorectx) + { + warn.match_empty_string (rules[i->rank].line); + } + } + } +} + +} // namespace re2c diff --git a/tools/re2c/src/ir/skeleton/maxlen.cc b/tools/re2c/src/ir/skeleton/maxlen.cc new file mode 100644 index 000000000..3f1d93310 --- /dev/null +++ b/tools/re2c/src/ir/skeleton/maxlen.cc @@ -0,0 +1,50 @@ +#include "src/util/c99_stdint.h" +#include +#include +#include +#include + +#include "src/ir/skeleton/skeleton.h" + +namespace re2c +{ + +// 0 < DIST_MAX < DIST_ERROR <= std::numeric_limits::max() +const uint32_t Node::DIST_ERROR = std::numeric_limits::max(); +const uint32_t Node::DIST_MAX = DIST_ERROR - 1; + +// different from YYMAXFILL calculation +// in the way it handles loops and empty regexp +void Node::calc_dist () +{ + if (dist != DIST_ERROR) + { + return; + } + else if (end ()) + { + dist = 0; + } + else if (loop < 2) + { + local_inc _ (loop); + for (arcs_t::iterator i = arcs.begin (); i != arcs.end (); ++i) + { + i->first->calc_dist (); + if (i->first->dist != DIST_ERROR) + { + if (dist == DIST_ERROR) + { + dist = i->first->dist; + } + else + { + dist = std::max (dist, i->first->dist); + } + } + } + dist = std::min (dist + 1, DIST_MAX); + } +} + +} // namespace re2c diff --git a/tools/re2c/src/ir/skeleton/path.h b/tools/re2c/src/ir/skeleton/path.h new file mode 100644 index 000000000..d09861e5a --- /dev/null +++ b/tools/re2c/src/ir/skeleton/path.h @@ -0,0 +1,103 @@ +#ifndef _RE2C_IR_SKELETON_PATH_ +#define _RE2C_IR_SKELETON_PATH_ + +#include + +#include "src/ir/rule_rank.h" +#include "src/util/c99_stdint.h" + +namespace re2c +{ + +struct rule_t +{ + rule_rank_t rank; + bool restorectx; + + rule_t (rule_rank_t r, bool c) + : rank (r) + , restorectx (c) + {} + + // needed by STL containers + // same as 'std::pair' comparator + bool operator < (const rule_t & r) const + { + return rank < r.rank + || (!(r.rank < rank) && restorectx < r.restorectx); + } +}; + +class path_t +{ +public: + typedef std::vector arc_t; + +private: + std::vector arcs; + + rule_t rule; + size_t rule_pos; + + bool ctx; + size_t ctx_pos; + +public: + explicit path_t (rule_t r, bool c) + : arcs () + , rule (r) + , rule_pos (0) + , ctx (c) + , ctx_pos (0) + {} + size_t len () const + { + return arcs.size (); + } + size_t len_matching () const + { + return rule.restorectx + ? ctx_pos + : rule_pos; + } + rule_rank_t match () const + { + return rule.rank; + } + const arc_t * operator [] (size_t i) const + { + return arcs[i]; + } + void extend (rule_t r, bool c, const arc_t * a) + { + arcs.push_back (a); + if (!r.rank.is_none ()) + { + rule = r; + rule_pos = arcs.size (); + } + if (c) + { + ctx = true; + ctx_pos = arcs.size (); + } + } + void append (const path_t * p) + { + if (!p->rule.rank.is_none ()) + { + rule = p->rule; + rule_pos = arcs.size () + p->rule_pos; + } + if (p->ctx) + { + ctx = true; + ctx_pos = arcs.size () + p->ctx_pos; + } + arcs.insert (arcs.end (), p->arcs.begin (), p->arcs.end ()); + } +}; + +} // namespace re2c + +#endif // _RE2C_IR_SKELETON_PATH_ diff --git a/tools/re2c/src/ir/skeleton/skeleton.cc b/tools/re2c/src/ir/skeleton/skeleton.cc new file mode 100644 index 000000000..deee11334 --- /dev/null +++ b/tools/re2c/src/ir/skeleton/skeleton.cc @@ -0,0 +1,163 @@ +#include +#include +#include + +#include "src/codegen/go.h" +#include "src/conf/msg.h" +#include "src/ir/dfa/dfa.h" +#include "src/ir/regexp/regexp.h" +#include "src/ir/regexp/regexp_rule.h" +#include "src/ir/skeleton/skeleton.h" + +namespace re2c +{ + +Node::Node () + : arcs () + , arcsets () + , loop (0) + , rule (rule_rank_t::none (), false) + , ctx (false) + , dist (DIST_ERROR) + , reachable () + , suffix (NULL) +{} + +void Node::init(bool c, RuleOp *r, const std::vector > &a) +{ + if (r) + { + rule.rank = r->rank; + rule.restorectx = r->ctx->fixedLength () != 0; + } + + ctx = c; + + uint32_t lb = 0; + std::vector >::const_iterator + i = a.begin(), + e = a.end(); + for (; i != e; ++i) + { + Node *n = i->first; + const uint32_t ub = i->second - 1; + + // pick at most 0x100 unique edges from this range + // (for 1-byte code units this covers the whole range: [0 - 0xFF]) + // - range bounds must be included + // - values should be evenly distributed + // - values should be deterministic + const uint32_t step = 1 + (ub - lb) / 0x100; + for (uint32_t c = lb; c < ub; c += step) + { + arcs[n].push_back (c); + } + arcs[n].push_back (ub); + + arcsets[n].push_back (std::make_pair (lb, ub)); + lb = ub + 1; + } +} + +Node::~Node () +{ + delete suffix; +} + +bool Node::end () const +{ + return arcs.size () == 0; +} + +Skeleton::Skeleton + ( const dfa_t &dfa + , const charset_t &cs + , const rules_t &rs + , const std::string &dfa_name + , const std::string &dfa_cond + , uint32_t dfa_line + ) + : name (dfa_name) + , cond (dfa_cond) + , line (dfa_line) + , nodes_count (dfa.states.size()) + , nodes (new Node [nodes_count + 1]) // +1 for default state + , sizeof_key (4) + , rules (rs) +{ + const size_t nc = cs.size() - 1; + + // initialize skeleton nodes + Node *nil = &nodes[nodes_count]; + for (size_t i = 0; i < nodes_count; ++i) + { + dfa_state_t *s = dfa.states[i]; + std::vector > arcs; + for (size_t c = 0; c < nc;) + { + const size_t j = s->arcs[c]; + for (;++c < nc && s->arcs[c] == j;); + Node *to = j == dfa_t::NIL + ? nil + : &nodes[j]; + arcs.push_back(std::make_pair(to, cs[c])); + } + // all arcs go to default node => this node is final, drop arcs + if (arcs.size() == 1 && arcs[0].first == nil) + { + arcs.clear(); + } + nodes[i].init(s->ctx, s->rule, arcs); + } + + // calculate maximal path length, check overflow + nodes->calc_dist (); + const uint32_t maxlen = nodes->dist; + if (maxlen == Node::DIST_MAX) + { + error ("DFA path %sis too long", incond (cond).c_str ()); + exit (1); + } + + // calculate maximal rule rank (disregarding default and none rules) + uint32_t maxrule = 0; + for (uint32_t i = 0; i < nodes_count; ++i) + { + const rule_rank_t r = nodes[i].rule.rank; + if (!r.is_none () && !r.is_def ()) + { + maxrule = std::max (maxrule, r.uint32 ()); + } + } + // two upper values reserved for default and none rules) + maxrule += 2; + + // initialize size of key + const uint32_t max = std::max (maxlen, maxrule); + if (max <= std::numeric_limits::max()) + { + sizeof_key = 1; + } + else if (max <= std::numeric_limits::max()) + { + sizeof_key = 2; + } +} + +Skeleton::~Skeleton () +{ + delete [] nodes; +} + +uint32_t Skeleton::rule2key (rule_rank_t r) const +{ + switch (sizeof_key) + { + default: // shouldn't happen + case 4: return rule2key (r); + case 2: return rule2key (r); + case 1: return rule2key (r); + } +} + +} // namespace re2c diff --git a/tools/re2c/src/ir/skeleton/skeleton.h b/tools/re2c/src/ir/skeleton/skeleton.h new file mode 100644 index 000000000..78c082716 --- /dev/null +++ b/tools/re2c/src/ir/skeleton/skeleton.h @@ -0,0 +1,174 @@ +#ifndef _RE2C_IR_SKELETON_SKELETON_ +#define _RE2C_IR_SKELETON_SKELETON_ + +#include "src/util/c99_stdint.h" +#include +#include +#include +#include +#include +#include +#include +#include + +#include "src/ir/regexp/regexp.h" +#include "src/ir/rule_rank.h" +#include "src/ir/skeleton/path.h" +#include "src/ir/skeleton/way.h" +#include "src/parse/rules.h" +#include "src/util/local_increment.h" +#include "src/util/forbid_copy.h" +#include "src/util/u32lim.h" + +namespace re2c +{ + +struct dfa_t; +struct OutputFile; +class RuleOp; + +struct Node +{ + /* + * note [counting skeleton edges] + * + * To avoid any possible overflows all size calculations are wrapped in + * a special truncated unsigned 32-bit integer type that checks overflow + * on each binary operation or conversion from another type. + * + * Two things contribute to size calculation: path length and the number + * of outgoing arcs in each node. Some considerations on why these values + * will not overflow before they are converted to truncated type: + * + * - Maximal number of outgoing arcs in each node cannot exceed 32 bits: + * it is bounded by the number of code units in current encoding, and + * re2c doesn't support any encoding with more than 2^32 code units. + * Conversion is safe. + * + * - Maximal path length cannot exceed 32 bits: we estimate it right + * after skeleton construction and check for overflow. If path length + * does overflow, an error is reported and re2c aborts. + */ + + // Type for calculating the size of path cover. + // Paths are dumped to file as soon as generated and don't eat + // heap space. The total size of path cover (measured in edges) + // is O(N^2) where N is the number of edges in skeleton. + typedef u32lim_t<1024 * 1024 * 1024> covers_t; // ~1Gb + + // Type for counting arcs in paths that cause undefined behaviour. + // These paths are stored on heap, so the limit should be low. + // Most real-world cases have only a few short paths. + // We don't need all paths anyway, just some examples. + typedef u32lim_t<1024> nakeds_t; // ~1Kb + + typedef std::map arcs_t; + typedef std::map arcsets_t; + typedef local_increment_t local_inc; + + // outgoing arcs + arcs_t arcs; + arcsets_t arcsets; + + // how many times this node has been visited + // (controls looping in graph traversals) + uint8_t loop; + + // rule for corresponding DFA state (if any) + rule_t rule; + + // start of trailing context + bool ctx; + + // maximal distance to end node (assuming one iteration per loop) + static const uint32_t DIST_ERROR; + static const uint32_t DIST_MAX; + uint32_t dist; + + // rules reachable from this node (including absent rule) + std::set reachable; + + // path to end node (for constructing path cover) + path_t * suffix; + + Node (); + void init(bool b, RuleOp *r, const std::vector > &arcs); + ~Node (); + bool end () const; + void calc_dist (); + void calc_reachable (); + template + void cover (path_t & prefix, FILE * input, FILE * keys, covers_t &size); + void naked_ways (way_t & prefix, std::vector & ways, nakeds_t &size); + + FORBID_COPY (Node); +}; + +struct Skeleton +{ + const std::string name; + const std::string cond; + const uint32_t line; + + const size_t nodes_count; + Node * nodes; + size_t sizeof_key; + rules_t rules; + + Skeleton + ( const dfa_t &dfa + , const charset_t &cs + , const rules_t & rs + , const std::string &dfa_name + , const std::string &dfa_cond + , uint32_t dfa_line + ); + ~Skeleton (); + void warn_undefined_control_flow (); + void warn_unreachable_rules (); + void warn_match_empty (); + void emit_data (const char * fname); + static void emit_prolog (OutputFile & o); + void emit_start + ( OutputFile & o + , size_t maxfill + , bool backup + , bool backupctx + , bool accept + ) const; + void emit_end + ( OutputFile & o + , bool backup + , bool backupctx + ) const; + static void emit_epilog (OutputFile & o, const std::set & names); + void emit_action (OutputFile & o, uint32_t ind, rule_rank_t rank) const; + + template static key_t rule2key (rule_rank_t r); + uint32_t rule2key (rule_rank_t r) const; + +private: + template + void generate_paths_cunit_key (FILE * input, FILE * keys); + template + void generate_paths_cunit (FILE * input, FILE * keys); + void generate_paths (FILE * input, FILE * keys); + + FORBID_COPY (Skeleton); +}; + +template key_t Skeleton::rule2key (rule_rank_t r) +{ + if (r.is_none()) { + return std::numeric_limits::max(); + } else if (r.is_def()) { + key_t k = std::numeric_limits::max(); + return --k; + } else { + return static_cast(r.uint32()); + } +} + +} // namespace re2c + +#endif // _RE2C_IR_SKELETON_SKELETON_ diff --git a/tools/re2c/src/ir/skeleton/unreachable.cc b/tools/re2c/src/ir/skeleton/unreachable.cc new file mode 100644 index 000000000..fac41dfc3 --- /dev/null +++ b/tools/re2c/src/ir/skeleton/unreachable.cc @@ -0,0 +1,73 @@ +#include "src/util/c99_stdint.h" +#include +#include +#include + +#include "src/conf/warn.h" +#include "src/globals.h" +#include "src/ir/rule_rank.h" +#include "src/ir/skeleton/path.h" +#include "src/ir/skeleton/skeleton.h" +#include "src/parse/rules.h" + +namespace re2c +{ + +void Node::calc_reachable () +{ + if (!reachable.empty ()) + { + return; + } + else if (end ()) + { + reachable.insert (rule); + } + else if (loop < 2) + { + local_inc _ (loop); + for (arcs_t::iterator i = arcs.begin (); i != arcs.end (); ++i) + { + i->first->calc_reachable (); + reachable.insert (i->first->reachable.begin (), i->first->reachable.end ()); + } + } +} + +void Skeleton::warn_unreachable_rules () +{ + nodes->calc_reachable (); + for (uint32_t i = 0; i < nodes_count; ++i) + { + const rule_rank_t r1 = nodes[i].rule.rank; + const std::set & rs = nodes[i].reachable; + for (std::set::const_iterator j = rs.begin (); j != rs.end (); ++j) + { + const rule_rank_t r2 = j->rank; + if (r1 == r2 || r2.is_none ()) + { + rules[r1].reachable = true; + } + else + { + rules[r1].shadow.insert (r2); + } + } + } + + // warn about unreachable rules: + // - rules that are shadowed by other rules, e.g. rule '[a]' is shadowed by '[a] [^]' + // - infinite rules that consume infinitely many characters and fail on YYFILL, e.g. '[^]*' + // - rules that contain never-matching link, e.g. '[]' with option '--empty-class match-none' + // default rule '*' should not be reported + for (rules_t::const_iterator i = rules.begin (); i != rules.end (); ++i) + { + const rule_rank_t r = i->first; + if (!r.is_none () && !r.is_def () && !rules[r].reachable) + { + warn.unreachable_rule (cond, i->second, rules); + } + } +} + +} // namespace re2c diff --git a/tools/re2c/src/ir/skeleton/way.cc b/tools/re2c/src/ir/skeleton/way.cc new file mode 100644 index 000000000..0f58efe42 --- /dev/null +++ b/tools/re2c/src/ir/skeleton/way.cc @@ -0,0 +1,74 @@ +#include +#include + +#include "src/ir/skeleton/way.h" + +namespace re2c +{ + +static bool cmp_way_arcs (const way_arc_t * a1, const way_arc_t * a2); +static void fprint_way_arc (FILE * f, const way_arc_t & arc); + +bool cmp_way_arcs (const way_arc_t * a1, const way_arc_t * a2) +{ + return std::lexicographical_compare(a1->begin(), a1->end(), a2->begin(), a2->end()); +} + +// define strict weak ordering on patterns: +// 1st criterion is length (short patterns go first) +// 2nd criterion is lexicographical order (applies to patterns of equal length) +bool cmp_ways (const way_t & w1, const way_t & w2) +{ + const size_t s1 = w1.size (); + const size_t s2 = w2.size (); + return (s1 == s2 && std::lexicographical_compare(w1.begin(), w1.end(), w2.begin(), w2.end(), cmp_way_arcs)) + || s1 < s2; +} + +void fprint_way (FILE * f, const way_t & w) +{ + fprintf (f, "'"); + const size_t len = w.size (); + for (size_t i = 0 ; i < len; ++i) + { + if (i > 0) + { + fprintf (f, " "); + } + if (w[i] == NULL) + { + fprintf (stderr, "(nil)"); + } + else + { + fprint_way_arc (stderr, *w[i]); + } + } + fprintf (f, "'"); +} + +void fprint_way_arc (FILE * f, const way_arc_t & arc) +{ + const size_t ranges = arc.size (); + if (ranges == 1 && arc[0].first == arc[0].second) + { + fprintf (f, "\\x%X", arc[0].first); + } + else + { + fprintf (f, "["); + for (size_t i = 0; i < ranges; ++i) + { + const uint32_t l = arc[i].first; + const uint32_t u = arc[i].second; + fprintf (f, "\\x%X", l); + if (l != u) + { + fprintf (f, "-\\x%X", u); + } + } + fprintf (f, "]"); + } +} + +} // namespace re2c diff --git a/tools/re2c/src/ir/skeleton/way.h b/tools/re2c/src/ir/skeleton/way.h new file mode 100644 index 000000000..e10010a91 --- /dev/null +++ b/tools/re2c/src/ir/skeleton/way.h @@ -0,0 +1,20 @@ +#ifndef _RE2C_IR_SKELETON_WAY_ +#define _RE2C_IR_SKELETON_WAY_ + +#include "src/util/c99_stdint.h" +#include +#include +#include + +namespace re2c +{ + +typedef std::vector > way_arc_t; +typedef std::vector way_t; + +bool cmp_ways (const way_t & w1, const way_t & w2); +void fprint_way (FILE * f, const way_t & p); + +} // namespace re2c + +#endif // _RE2C_IR_SKELETON_WAY_ diff --git a/tools/re2c/src/main.cc b/tools/re2c/src/main.cc new file mode 100644 index 000000000..03b6ee291 --- /dev/null +++ b/tools/re2c/src/main.cc @@ -0,0 +1,60 @@ +#include "src/util/c99_stdint.h" +#include + +#include "src/codegen/output.h" +#include "src/conf/msg.h" +#include "src/conf/opt.h" +#include "src/conf/warn.h" +#include "src/globals.h" +#include "src/parse/input.h" +#include "src/parse/parser.h" +#include "src/parse/scanner.h" + +namespace re2c +{ + +bool bUsedYYBitmap = false; +bool bWroteGetState = false; +bool bWroteCondCheck = false; +uint32_t last_fill_index = 0; +std::string yySetupRule = ""; + +} // end namespace re2c + +using namespace re2c; + +int main(int, char *argv[]) +{ + switch (parse_opts (argv, opts)) + { + case OK: break; + case EXIT_OK: return 0; + case EXIT_FAIL: return 1; + } + + // set up the source stream + re2c::Input input (opts.source_file); + if (!input.open ()) + { + error ("cannot open source file: %s", opts.source_file); + return 1; + } + + // set up the output streams + re2c::Output output (opts.output_file, opts->header_file); + if (!output.source.open ()) + { + error ("cannot open output file: %s", opts.output_file); + return 1; + } + if (opts->tFlag && !output.header.open ()) + { + error ("cannot open header file: %s", opts->header_file); + return 1; + } + + Scanner scanner (input, output.source); + parse (scanner, output); + + return warn.error () ? 1 : 0; +} diff --git a/tools/re2c/src/parse/code.cc b/tools/re2c/src/parse/code.cc new file mode 100644 index 000000000..97a865ce8 --- /dev/null +++ b/tools/re2c/src/parse/code.cc @@ -0,0 +1,8 @@ +#include "src/parse/code.h" + +namespace re2c +{ + +free_list Code::freelist; + +} // namespace re2c diff --git a/tools/re2c/src/parse/code.h b/tools/re2c/src/parse/code.h new file mode 100644 index 000000000..d658e628a --- /dev/null +++ b/tools/re2c/src/parse/code.h @@ -0,0 +1,31 @@ +#ifndef _RE2C_PARSE_CODE_ +#define _RE2C_PARSE_CODE_ + +#include "src/util/c99_stdint.h" +#include +#include + +#include "src/parse/loc.h" +#include "src/util/free_list.h" + +namespace re2c +{ + +struct Code +{ + static free_list freelist; + + const Loc loc; + const std::string text; + + inline Code (const char * t, size_t t_len, const std::string & f, uint32_t l) + : loc (f, l) + , text (t, t_len) + { + freelist.insert (this); + } +}; + +} // namespace re2c + +#endif // _RE2C_PARSE_CODE_ diff --git a/tools/re2c/src/parse/extop.h b/tools/re2c/src/parse/extop.h new file mode 100644 index 000000000..d093be924 --- /dev/null +++ b/tools/re2c/src/parse/extop.h @@ -0,0 +1,17 @@ +#ifndef _RE2C_PARSE_EXTOP_ +#define _RE2C_PARSE_EXTOP_ + +#include "src/util/c99_stdint.h" + +namespace re2c +{ + +struct ExtOp +{ + uint32_t min; + uint32_t max; +}; + +} // end namespace re2c + +#endif // _RE2C_PARSE_EXTOP_ diff --git a/tools/re2c/src/parse/input.cc b/tools/re2c/src/parse/input.cc new file mode 100644 index 000000000..472e6b995 --- /dev/null +++ b/tools/re2c/src/parse/input.cc @@ -0,0 +1,31 @@ +#include "src/parse/input.h" + +namespace re2c { + +Input::Input (const char * fn) + : file (NULL) + , file_name (fn) +{} + +bool Input::open () +{ + if (file_name == "") + { + file = stdin; + } + else + { + file = fopen (file_name.c_str (), "rb"); + } + return file != NULL; +} + +Input::~Input () +{ + if (file != NULL && file != stdin) + { + fclose (file); + } +} + +} // namespace re2c diff --git a/tools/re2c/src/parse/input.h b/tools/re2c/src/parse/input.h new file mode 100644 index 000000000..f58e189e6 --- /dev/null +++ b/tools/re2c/src/parse/input.h @@ -0,0 +1,25 @@ +#ifndef _RE2C_PARSE_INPUT_ +#define _RE2C_PARSE_INPUT_ + +#include +#include + +#include "src/util/forbid_copy.h" + +namespace re2c { + +struct Input +{ + FILE * file; + std::string file_name; + + Input (const char * fn); + ~Input (); + bool open (); + + FORBID_COPY (Input); +}; + +} // namespace re2c + +#endif // _RE2C_PARSE_INPUT_ diff --git a/tools/re2c/src/parse/lex.cc b/tools/re2c/src/parse/lex.cc new file mode 100644 index 000000000..9c7e01438 --- /dev/null +++ b/tools/re2c/src/parse/lex.cc @@ -0,0 +1,2861 @@ +/* Generated by re2c 0.16 on Thu Jan 21 10:47:47 2016 */ +#line 1 "../src/parse/lex.re" +#include "src/util/c99_stdint.h" +#include +#include +#include +#include +#include + +#include "src/codegen/output.h" +#include "src/conf/opt.h" +#include "src/conf/warn.h" +#include "src/globals.h" +#include "src/ir/regexp/encoding/enc.h" +#include "src/ir/regexp/regexp.h" +#include "src/ir/regexp/regexp_null.h" +#include "src/parse/code.h" +#include "src/parse/extop.h" +#include "src/parse/input.h" +#include "src/parse/scanner.h" +#include "src/parse/parser.h" // needed by "y.tab.h" +#include "src/parse/unescape.h" +#include "src/util/range.h" +#include "src/util/s_to_n32_unsafe.h" +#include "y.tab.h" + +extern YYSTYPE yylval; + +#define YYCTYPE unsigned char +#define YYCURSOR cur +#define YYLIMIT lim +#define YYMARKER ptr +#define YYCTXMARKER ctx +#define YYFILL(n) { fill (n); } + +namespace re2c +{ + +// source code is in ASCII: pointers have type 'char *' +// but re2c makes an implicit assumption that YYCTYPE is unsigned +// when it generates comparisons +#line 42 "../src/parse/lex.re" + + +#line 62 "../src/parse/lex.re" + + +Scanner::ParseMode Scanner::echo() +{ + bool ignore_eoc = false; + int ignore_cnt = 0; + + if (eof && cur == eof) // Catch EOF + { + return Stop; + } + + tok = cur; +echo: + +#line 62 "src/parse/lex.cc" +{ + YYCTYPE yych; + unsigned int yyaccept = 0; + static const unsigned char yybm[] = { + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 160, 0, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 160, 128, 0, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 192, 192, 192, 192, 192, 192, 192, 192, + 192, 192, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 0, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + }; + if ((YYLIMIT - YYCURSOR) < 16) YYFILL(16); + yych = (YYCTYPE)*YYCURSOR; + if (yych <= '%') { + if (yych <= '\t') { + if (yych >= 0x01) goto yy4; + } else { + if (yych <= '\n') goto yy6; + if (yych <= '$') goto yy4; + goto yy8; + } + } else { + if (yych <= '*') { + if (yych <= ')') goto yy4; + goto yy9; + } else { + if (yych == '/') goto yy10; + goto yy4; + } + } + ++YYCURSOR; +#line 202 "../src/parse/lex.re" + { + if (!ignore_eoc && opts->target == opt_t::CODE) + { + out.wraw(tok, tok_len () - 1); + // -1 so we don't write out the \0 + } + if(cur == eof) + { + return Stop; + } + } +#line 132 "src/parse/lex.cc" +yy4: + ++YYCURSOR; +yy5: +#line 213 "../src/parse/lex.re" + { + goto echo; + } +#line 140 "src/parse/lex.cc" +yy6: + yyaccept = 0; + yych = (YYCTYPE)*(YYMARKER = ++YYCURSOR); + if (yybm[0+yych] & 32) { + goto yy11; + } + if (yych == '#') goto yy14; +yy7: +#line 189 "../src/parse/lex.re" + { + if (ignore_eoc) + { + ignore_cnt++; + } + else if (opts->target == opt_t::CODE) + { + out.wraw(tok, tok_len ()); + } + tok = pos = cur; + cline++; + goto echo; + } +#line 163 "src/parse/lex.cc" +yy8: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == '{') goto yy16; + goto yy5; +yy9: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == '/') goto yy18; + goto yy5; +yy10: + yyaccept = 1; + yych = (YYCTYPE)*(YYMARKER = ++YYCURSOR); + if (yych == '*') goto yy20; + goto yy5; +yy11: + ++YYCURSOR; + if (YYLIMIT <= YYCURSOR) YYFILL(1); + yych = (YYCTYPE)*YYCURSOR; + if (yybm[0+yych] & 32) { + goto yy11; + } + if (yych == '#') goto yy14; +yy13: + YYCURSOR = YYMARKER; + if (yyaccept <= 1) { + if (yyaccept == 0) { + goto yy7; + } else { + goto yy5; + } + } else { + goto yy19; + } +yy14: + ++YYCURSOR; + if ((YYLIMIT - YYCURSOR) < 5) YYFILL(5); + yych = (YYCTYPE)*YYCURSOR; + if (yych <= 0x1F) { + if (yych == '\t') goto yy14; + goto yy13; + } else { + if (yych <= ' ') goto yy14; + if (yych == 'l') goto yy21; + goto yy13; + } +yy16: + ++YYCURSOR; +#line 78 "../src/parse/lex.re" + { + if (opts->rFlag) + { + fatal("found standard 're2c' block while using -r flag"); + } + if (opts->target == opt_t::CODE) + { + const size_t lexeme_len = cur[-1] == '{' + ? sizeof ("%{") - 1 + : sizeof ("/*!re2c") - 1; + out.wraw(tok, tok_len () - lexeme_len); + } + tok = cur; + return Parse; + } +#line 226 "src/parse/lex.cc" +yy18: + yyaccept = 2; + yych = (YYCTYPE)*(YYMARKER = ++YYCURSOR); + if (yych == '\n') goto yy22; + if (yych == '\r') goto yy24; +yy19: +#line 168 "../src/parse/lex.re" + { + if (ignore_eoc) + { + if (ignore_cnt) + { + out.ws("\n").wline_info (cline, get_fname ().c_str ()); + } + ignore_eoc = false; + ignore_cnt = 0; + } + else if (opts->target == opt_t::CODE) + { + out.wraw(tok, tok_len ()); + } + tok = pos = cur; + goto echo; + } +#line 251 "src/parse/lex.cc" +yy20: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == '!') goto yy25; + goto yy13; +yy21: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'i') goto yy26; + goto yy13; +yy22: + ++YYCURSOR; +#line 150 "../src/parse/lex.re" + { + cline++; + if (ignore_eoc) + { + if (ignore_cnt) + { + out.wline_info (cline, get_fname ().c_str ()); + } + ignore_eoc = false; + ignore_cnt = 0; + } + else if (opts->target == opt_t::CODE) + { + out.wraw(tok, tok_len ()); + } + tok = pos = cur; + goto echo; + } +#line 281 "src/parse/lex.cc" +yy24: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == '\n') goto yy22; + goto yy13; +yy25: + yych = (YYCTYPE)*++YYCURSOR; + switch (yych) { + case 'g': goto yy27; + case 'i': goto yy28; + case 'm': goto yy29; + case 'r': goto yy30; + case 't': goto yy31; + case 'u': goto yy32; + default: goto yy13; + } +yy26: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'n') goto yy33; + goto yy13; +yy27: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'e') goto yy34; + goto yy13; +yy28: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'g') goto yy35; + goto yy13; +yy29: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'a') goto yy36; + goto yy13; +yy30: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'e') goto yy37; + if (yych == 'u') goto yy38; + goto yy13; +yy31: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'y') goto yy39; + goto yy13; +yy32: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 's') goto yy40; + goto yy13; +yy33: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'e') goto yy41; + goto yy13; +yy34: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 't') goto yy42; + goto yy13; +yy35: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'n') goto yy43; + goto yy13; +yy36: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'x') goto yy44; + goto yy13; +yy37: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == '2') goto yy45; + goto yy13; +yy38: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'l') goto yy46; + goto yy13; +yy39: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'p') goto yy47; + goto yy13; +yy40: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'e') goto yy48; + goto yy13; +yy41: + yych = (YYCTYPE)*++YYCURSOR; + if (yych <= '0') goto yy50; + if (yych <= '9') goto yy13; + goto yy50; +yy42: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 's') goto yy51; + goto yy13; +yy43: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'o') goto yy52; + goto yy13; +yy44: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == ':') goto yy53; + goto yy13; +yy45: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'c') goto yy16; + goto yy13; +yy46: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'e') goto yy54; + goto yy13; +yy47: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'e') goto yy55; + goto yy13; +yy48: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == ':') goto yy56; + goto yy13; +yy49: + ++YYCURSOR; + if (YYLIMIT <= YYCURSOR) YYFILL(1); + yych = (YYCTYPE)*YYCURSOR; + YYCTXMARKER = YYCURSOR; +yy50: + if (yych <= 0x1F) { + if (yych == '\t') goto yy49; + goto yy13; + } else { + if (yych <= ' ') goto yy49; + if (yych <= '0') goto yy13; + if (yych <= '9') goto yy57; + goto yy13; + } +yy51: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 't') goto yy59; + goto yy13; +yy52: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'r') goto yy60; + goto yy13; +yy53: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'r') goto yy61; + goto yy13; +yy54: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 's') goto yy62; + goto yy13; +yy55: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 's') goto yy63; + goto yy13; +yy56: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'r') goto yy64; + goto yy13; +yy57: + ++YYCURSOR; + if ((YYLIMIT - YYCURSOR) < 2) YYFILL(2); + yych = (YYCTYPE)*YYCURSOR; + if (yybm[0+yych] & 64) { + goto yy57; + } + if (yych <= '\f') { + if (yych <= 0x08) goto yy13; + if (yych <= '\t') goto yy65; + if (yych <= '\n') goto yy67; + goto yy13; + } else { + if (yych <= '\r') goto yy69; + if (yych == ' ') goto yy65; + goto yy13; + } +yy59: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'a') goto yy70; + goto yy13; +yy60: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'e') goto yy71; + goto yy13; +yy61: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'e') goto yy72; + goto yy13; +yy62: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == ':') goto yy73; + goto yy13; +yy63: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == ':') goto yy74; + goto yy13; +yy64: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'e') goto yy75; + goto yy13; +yy65: + ++YYCURSOR; + if (YYLIMIT <= YYCURSOR) YYFILL(1); + yych = (YYCTYPE)*YYCURSOR; + if (yych <= 0x1F) { + if (yych == '\t') goto yy65; + goto yy13; + } else { + if (yych <= ' ') goto yy65; + if (yych == '"') goto yy76; + goto yy13; + } +yy67: + ++YYCURSOR; + YYCURSOR = YYCTXMARKER; +#line 185 "../src/parse/lex.re" + { + set_sourceline (); + goto echo; + } +#line 491 "src/parse/lex.cc" +yy69: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == '\n') goto yy67; + goto yy13; +yy70: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 't') goto yy78; + goto yy13; +yy71: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == ':') goto yy79; + goto yy13; +yy72: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == '2') goto yy80; + goto yy13; +yy73: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'r') goto yy81; + goto yy13; +yy74: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'r') goto yy82; + goto yy13; +yy75: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == '2') goto yy83; + goto yy13; +yy76: + ++YYCURSOR; + if ((YYLIMIT - YYCURSOR) < 3) YYFILL(3); + yych = (YYCTYPE)*YYCURSOR; + if (yybm[0+yych] & 128) { + goto yy76; + } + if (yych <= '\n') goto yy13; + if (yych <= '"') goto yy84; + goto yy85; +yy78: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'e') goto yy86; + goto yy13; +yy79: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'r') goto yy87; + goto yy13; +yy80: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'c') goto yy88; + goto yy13; +yy81: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'e') goto yy90; + goto yy13; +yy82: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'e') goto yy91; + goto yy13; +yy83: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'c') goto yy92; + goto yy13; +yy84: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == '\n') goto yy67; + if (yych == '\r') goto yy69; + goto yy13; +yy85: + ++YYCURSOR; + if (YYLIMIT <= YYCURSOR) YYFILL(1); + yych = (YYCTYPE)*YYCURSOR; + if (yych == '\n') goto yy13; + goto yy76; +yy86: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == ':') goto yy94; + goto yy13; +yy87: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'e') goto yy95; + goto yy13; +yy88: + ++YYCURSOR; +#line 119 "../src/parse/lex.re" + { + if (opts->target != opt_t::DOT) + { + out.wdelay_yymaxfill (); + } + tok = pos = cur; + ignore_eoc = true; + goto echo; + } +#line 585 "src/parse/lex.cc" +yy90: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == '2') goto yy96; + goto yy13; +yy91: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == '2') goto yy97; + goto yy13; +yy92: + ++YYCURSOR; +#line 105 "../src/parse/lex.re" + { + if (!opts->rFlag) + { + fatal("found 'use:re2c' block without -r flag"); + } + reuse(); + if (opts->target == opt_t::CODE) + { + const size_t lexeme_len = sizeof ("/*!use:re2c") - 1; + out.wraw(tok, tok_len () - lexeme_len); + } + tok = cur; + return Reuse; + } +#line 611 "src/parse/lex.cc" +yy94: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'r') goto yy98; + goto yy13; +yy95: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == '2') goto yy99; + goto yy13; +yy96: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'c') goto yy100; + goto yy13; +yy97: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'c') goto yy102; + goto yy13; +yy98: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'e') goto yy104; + goto yy13; +yy99: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'c') goto yy105; + goto yy13; +yy100: + ++YYCURSOR; +#line 93 "../src/parse/lex.re" + { + if (opts->rFlag) + { + opts.reset_mapCodeName (); + } + else + { + fatal("found 'rules:re2c' block without -r flag"); + } + tok = cur; + return Rules; + } +#line 651 "src/parse/lex.cc" +yy102: + ++YYCURSOR; +#line 139 "../src/parse/lex.re" + { + tok = pos = cur; + ignore_eoc = true; + if (opts->target != opt_t::DOT) + { + out.wdelay_line_info ().ws("\n") + .wdelay_types ().ws("\n") + .wline_info (cline, get_fname ().c_str ()); + } + goto echo; + } +#line 666 "src/parse/lex.cc" +yy104: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == '2') goto yy107; + goto yy13; +yy105: + ++YYCURSOR; +#line 134 "../src/parse/lex.re" + { + tok = pos = cur; + ignore_eoc = true; + goto echo; + } +#line 679 "src/parse/lex.cc" +yy107: + yych = (YYCTYPE)*++YYCURSOR; + if (yych != 'c') goto yy13; + ++YYCURSOR; +#line 128 "../src/parse/lex.re" + { + tok = pos = cur; + out.wdelay_state_goto (opts->topIndent); + ignore_eoc = true; + goto echo; + } +#line 691 "src/parse/lex.cc" +} +#line 216 "../src/parse/lex.re" + +} + +int Scanner::scan() +{ + uint32_t depth; + +scan: + tchar = cur - pos; + tline = cline; + tok = cur; + switch (lexer_state) + { + case LEX_NORMAL: goto start; + case LEX_FLEX_NAME: goto flex_name; + } + +start: + +#line 713 "src/parse/lex.cc" +{ + YYCTYPE yych; + unsigned int yyaccept = 0; + static const unsigned char yybm[] = { + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 144, 0, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 144, 128, 0, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 224, 224, 224, 224, 224, 224, 224, 224, + 224, 224, 128, 128, 128, 128, 128, 128, + 128, 160, 160, 160, 160, 160, 160, 160, + 160, 160, 160, 160, 160, 160, 160, 160, + 160, 160, 160, 160, 160, 160, 160, 160, + 160, 160, 160, 128, 0, 128, 128, 160, + 128, 160, 160, 160, 160, 160, 160, 160, + 160, 160, 160, 160, 160, 160, 160, 160, + 160, 160, 160, 160, 160, 160, 160, 160, + 160, 160, 160, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + }; + if ((YYLIMIT - YYCURSOR) < 6) YYFILL(6); + yych = (YYCTYPE)*YYCURSOR; + if (yybm[0+yych] & 16) { + goto yy114; + } + if (yych <= '9') { + if (yych <= '&') { + if (yych <= '\r') { + if (yych <= 0x08) goto yy112; + if (yych <= '\n') goto yy117; + if (yych >= '\r') goto yy119; + } else { + if (yych <= '"') { + if (yych >= '"') goto yy120; + } else { + if (yych == '%') goto yy122; + } + } + } else { + if (yych <= '+') { + if (yych <= '\'') goto yy123; + if (yych <= ')') goto yy125; + if (yych <= '*') goto yy127; + goto yy129; + } else { + if (yych <= '-') { + if (yych <= ',') goto yy125; + } else { + if (yych <= '.') goto yy131; + if (yych <= '/') goto yy133; + goto yy134; + } + } + } + } else { + if (yych <= '\\') { + if (yych <= '>') { + if (yych <= ':') goto yy135; + if (yych == '<') goto yy136; + goto yy125; + } else { + if (yych <= '@') { + if (yych <= '?') goto yy129; + } else { + if (yych <= 'Z') goto yy134; + if (yych <= '[') goto yy137; + goto yy125; + } + } + } else { + if (yych <= 'q') { + if (yych == '_') goto yy134; + if (yych >= 'a') goto yy134; + } else { + if (yych <= 'z') { + if (yych <= 'r') goto yy139; + goto yy134; + } else { + if (yych <= '{') goto yy140; + if (yych <= '|') goto yy125; + } + } + } + } +yy112: + ++YYCURSOR; +yy113: +#line 388 "../src/parse/lex.re" + { + fatalf("unexpected character: '%c'", *tok); + goto scan; + } +#line 823 "src/parse/lex.cc" +yy114: + ++YYCURSOR; + if (YYLIMIT <= YYCURSOR) YYFILL(1); + yych = (YYCTYPE)*YYCURSOR; + if (yybm[0+yych] & 16) { + goto yy114; + } +#line 372 "../src/parse/lex.re" + { + goto scan; + } +#line 835 "src/parse/lex.cc" +yy117: + yyaccept = 0; + yych = (YYCTYPE)*(YYMARKER = ++YYCURSOR); + if (yych <= 0x1F) { + if (yych == '\t') goto yy142; + } else { + if (yych <= ' ') goto yy142; + if (yych == '#') goto yy145; + } +yy118: +#line 381 "../src/parse/lex.re" + { + if (cur == eof) return 0; + pos = cur; + cline++; + goto scan; + } +#line 853 "src/parse/lex.cc" +yy119: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == '\n') goto yy117; + goto yy113; +yy120: + ++YYCURSOR; +#line 265 "../src/parse/lex.re" + { yylval.regexp = lex_str('"', opts->bCaseInsensitive || opts->bCaseInverted); return TOKEN_REGEXP; } +#line 862 "src/parse/lex.cc" +yy122: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == '}') goto yy147; + goto yy113; +yy123: + ++YYCURSOR; +#line 264 "../src/parse/lex.re" + { yylval.regexp = lex_str('\'', opts->bCaseInsensitive || !opts->bCaseInverted); return TOKEN_REGEXP; } +#line 871 "src/parse/lex.cc" +yy125: + ++YYCURSOR; +yy126: +#line 275 "../src/parse/lex.re" + { + return *tok; + } +#line 879 "src/parse/lex.cc" +yy127: + ++YYCURSOR; + if ((yych = (YYCTYPE)*YYCURSOR) == '/') goto yy147; +#line 279 "../src/parse/lex.re" + { + yylval.op = *tok; + return TOKEN_STAR; + } +#line 888 "src/parse/lex.cc" +yy129: + ++YYCURSOR; +#line 283 "../src/parse/lex.re" + { + yylval.op = *tok; + return TOKEN_CLOSE; + } +#line 896 "src/parse/lex.cc" +yy131: + ++YYCURSOR; +#line 367 "../src/parse/lex.re" + { + yylval.regexp = mkDot(); + return TOKEN_REGEXP; + } +#line 904 "src/parse/lex.cc" +yy133: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == '*') goto yy149; + if (yych == '/') goto yy151; + goto yy126; +yy134: + yych = (YYCTYPE)*++YYCURSOR; + YYCTXMARKER = YYCURSOR; + goto yy159; +yy135: + yych = (YYCTYPE)*++YYCURSOR; + YYCTXMARKER = YYCURSOR; + if (yych == '=') goto yy160; + goto yy113; +yy136: + yyaccept = 1; + yych = (YYCTYPE)*(YYMARKER = ++YYCURSOR); + if (yych == '!') goto yy162; + if (yych == '>') goto yy164; + goto yy126; +yy137: + ++YYCURSOR; + if ((yych = (YYCTYPE)*YYCURSOR) == '^') goto yy165; +#line 266 "../src/parse/lex.re" + { yylval.regexp = lex_cls(false); return TOKEN_REGEXP; } +#line 930 "src/parse/lex.cc" +yy139: + yych = (YYCTYPE)*++YYCURSOR; + YYCTXMARKER = YYCURSOR; + if (yych == 'e') goto yy167; + goto yy159; +yy140: + yyaccept = 2; + yych = (YYCTYPE)*(YYMARKER = ++YYCURSOR); + if (yybm[0+yych] & 64) { + goto yy170; + } + if (yych <= 'Z') { + if (yych == ',') goto yy168; + if (yych >= 'A') goto yy172; + } else { + if (yych <= '_') { + if (yych >= '_') goto yy172; + } else { + if (yych <= '`') goto yy141; + if (yych <= 'z') goto yy172; + } + } +yy141: +#line 235 "../src/parse/lex.re" + { + depth = 1; + goto code; + } +#line 959 "src/parse/lex.cc" +yy142: + ++YYCURSOR; + if (YYLIMIT <= YYCURSOR) YYFILL(1); + yych = (YYCTYPE)*YYCURSOR; + if (yych <= 0x1F) { + if (yych == '\t') goto yy142; + } else { + if (yych <= ' ') goto yy142; + if (yych == '#') goto yy145; + } +yy144: + YYCURSOR = YYMARKER; + if (yyaccept <= 1) { + if (yyaccept == 0) { + goto yy118; + } else { + goto yy126; + } + } else { + if (yyaccept == 2) { + goto yy141; + } else { + goto yy169; + } + } +yy145: + ++YYCURSOR; + if ((YYLIMIT - YYCURSOR) < 5) YYFILL(5); + yych = (YYCTYPE)*YYCURSOR; + if (yych <= 0x1F) { + if (yych == '\t') goto yy145; + goto yy144; + } else { + if (yych <= ' ') goto yy145; + if (yych == 'l') goto yy174; + goto yy144; + } +yy147: + ++YYCURSOR; +#line 259 "../src/parse/lex.re" + { + tok = cur; + return 0; + } +#line 1004 "src/parse/lex.cc" +yy149: + ++YYCURSOR; +#line 253 "../src/parse/lex.re" + { + depth = 1; + goto comment; + } +#line 1012 "src/parse/lex.cc" +yy151: + ++YYCURSOR; +#line 250 "../src/parse/lex.re" + { + goto nextLine; + } +#line 1019 "src/parse/lex.cc" +yy153: + ++YYCURSOR; + YYCURSOR = YYCTXMARKER; +#line 351 "../src/parse/lex.re" + { + if (!opts->FFlag) { + yylval.str = new std::string (tok, tok_len()); + return TOKEN_ID; + } else { + RegExp *r = NULL; + const bool casing = opts->bCaseInsensitive || opts->bCaseInverted; + for (char *s = tok; s < cur; ++s) { + const uint32_t c = static_cast(*s); + r = doCat(r, casing ? ichr(c) : schr(c)); + } + yylval.regexp = r ? r : new NullOp; + return TOKEN_REGEXP; + } + } +#line 1039 "src/parse/lex.cc" +yy155: + yych = (YYCTYPE)*++YYCURSOR; + goto yy178; +yy156: + ++YYCURSOR; + YYCURSOR = YYCTXMARKER; +#line 346 "../src/parse/lex.re" + { + yylval.str = new std::string (tok, tok_len ()); + return TOKEN_ID; + } +#line 1051 "src/parse/lex.cc" +yy158: + ++YYCURSOR; + if ((YYLIMIT - YYCURSOR) < 2) YYFILL(2); + yych = (YYCTYPE)*YYCURSOR; + YYCTXMARKER = YYCURSOR; +yy159: + if (yybm[0+yych] & 32) { + goto yy158; + } + if (yych <= ' ') { + if (yych == '\t') goto yy155; + if (yych <= 0x1F) goto yy153; + goto yy155; + } else { + if (yych <= ',') { + if (yych <= '+') goto yy153; + goto yy156; + } else { + if (yych <= '<') goto yy153; + if (yych <= '>') goto yy156; + goto yy153; + } + } +yy160: + ++YYCURSOR; + if ((yych = (YYCTYPE)*YYCURSOR) == '>') goto yy179; +#line 244 "../src/parse/lex.re" + { + tok += 2; /* skip ":=" */ + depth = 0; + goto code; + } +#line 1084 "src/parse/lex.cc" +yy162: + ++YYCURSOR; +#line 272 "../src/parse/lex.re" + { + return TOKEN_SETUP; + } +#line 1091 "src/parse/lex.cc" +yy164: + yych = (YYCTYPE)*++YYCURSOR; + YYCTXMARKER = YYCURSOR; + goto yy182; +yy165: + ++YYCURSOR; +#line 267 "../src/parse/lex.re" + { yylval.regexp = lex_cls(true); return TOKEN_REGEXP; } +#line 1100 "src/parse/lex.cc" +yy167: + yych = (YYCTYPE)*++YYCURSOR; + YYCTXMARKER = YYCURSOR; + if (yych == '2') goto yy187; + goto yy159; +yy168: + ++YYCURSOR; +yy169: +#line 319 "../src/parse/lex.re" + { + fatal("illegal closure form, use '{n}', '{n,}', '{n,m}' where n and m are numbers"); + } +#line 1113 "src/parse/lex.cc" +yy170: + ++YYCURSOR; + if ((YYLIMIT - YYCURSOR) < 2) YYFILL(2); + yych = (YYCTYPE)*YYCURSOR; + if (yybm[0+yych] & 64) { + goto yy170; + } + if (yych <= '^') { + if (yych <= ',') { + if (yych <= '+') goto yy144; + goto yy188; + } else { + if (yych <= '@') goto yy144; + if (yych >= '[') goto yy144; + } + } else { + if (yych <= 'z') { + if (yych == '`') goto yy144; + } else { + if (yych == '}') goto yy189; + goto yy144; + } + } +yy172: + ++YYCURSOR; + if (YYLIMIT <= YYCURSOR) YYFILL(1); + yych = (YYCTYPE)*YYCURSOR; + if (yych <= '^') { + if (yych <= '9') { + if (yych <= '/') goto yy144; + goto yy172; + } else { + if (yych <= '@') goto yy144; + if (yych <= 'Z') goto yy172; + goto yy144; + } + } else { + if (yych <= 'z') { + if (yych == '`') goto yy144; + goto yy172; + } else { + if (yych == '}') goto yy191; + goto yy144; + } + } +yy174: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'i') goto yy193; + goto yy144; +yy175: + ++YYCURSOR; + YYCURSOR = YYCTXMARKER; +#line 333 "../src/parse/lex.re" + { + yylval.str = new std::string (tok, tok_len ()); + if (opts->FFlag) + { + lexer_state = LEX_FLEX_NAME; + return TOKEN_FID; + } + else + { + return TOKEN_ID; + } + } +#line 1179 "src/parse/lex.cc" +yy177: + ++YYCURSOR; + if (YYLIMIT <= YYCURSOR) YYFILL(1); + yych = (YYCTYPE)*YYCURSOR; +yy178: + if (yych <= ' ') { + if (yych == '\t') goto yy177; + if (yych <= 0x1F) goto yy175; + goto yy177; + } else { + if (yych <= ',') { + if (yych <= '+') goto yy175; + goto yy156; + } else { + if (yych <= '<') goto yy175; + if (yych <= '>') goto yy156; + goto yy175; + } + } +yy179: + ++YYCURSOR; + YYCURSOR = YYCTXMARKER; +#line 240 "../src/parse/lex.re" + { + return *tok; + } +#line 1206 "src/parse/lex.cc" +yy181: + ++YYCURSOR; + if ((YYLIMIT - YYCURSOR) < 2) YYFILL(2); + yych = (YYCTYPE)*YYCURSOR; +yy182: + if (yych <= '9') { + if (yych <= '\t') { + if (yych <= 0x08) goto yy144; + goto yy181; + } else { + if (yych == ' ') goto yy181; + goto yy144; + } + } else { + if (yych <= '=') { + if (yych <= ':') goto yy183; + if (yych <= '<') goto yy144; + goto yy184; + } else { + if (yych == '{') goto yy185; + goto yy144; + } + } +yy183: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == '=') goto yy185; + goto yy144; +yy184: + yych = (YYCTYPE)*++YYCURSOR; + if (yych != '>') goto yy144; +yy185: + ++YYCURSOR; + YYCURSOR = YYCTXMARKER; +#line 269 "../src/parse/lex.re" + { + return TOKEN_NOCOND; + } +#line 1244 "src/parse/lex.cc" +yy187: + yych = (YYCTYPE)*++YYCURSOR; + YYCTXMARKER = YYCURSOR; + if (yych == 'c') goto yy194; + goto yy159; +yy188: + yyaccept = 3; + yych = (YYCTYPE)*(YYMARKER = ++YYCURSOR); + if (yych <= '/') goto yy169; + if (yych <= '9') goto yy195; + if (yych == '}') goto yy197; + goto yy169; +yy189: + ++YYCURSOR; +#line 288 "../src/parse/lex.re" + { + if (!s_to_u32_unsafe (tok + 1, cur - 1, yylval.extop.min)) + { + fatal ("repetition count overflow"); + } + yylval.extop.max = yylval.extop.min; + return TOKEN_CLOSESIZE; + } +#line 1268 "src/parse/lex.cc" +yy191: + ++YYCURSOR; +#line 323 "../src/parse/lex.re" + { + if (!opts->FFlag) { + fatal("curly braces for names only allowed with -F switch"); + } + yylval.str = new std::string (tok + 1, tok_len () - 2); // -2 to omit braces + return TOKEN_ID; + } +#line 1279 "src/parse/lex.cc" +yy193: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'n') goto yy199; + goto yy144; +yy194: + yych = (YYCTYPE)*++YYCURSOR; + YYCTXMARKER = YYCURSOR; + if (yych == ':') goto yy200; + goto yy159; +yy195: + ++YYCURSOR; + if (YYLIMIT <= YYCURSOR) YYFILL(1); + yych = (YYCTYPE)*YYCURSOR; + if (yych <= '/') goto yy144; + if (yych <= '9') goto yy195; + if (yych == '}') goto yy202; + goto yy144; +yy197: + ++YYCURSOR; +#line 310 "../src/parse/lex.re" + { + if (!s_to_u32_unsafe (tok + 1, cur - 2, yylval.extop.min)) + { + fatal ("repetition lower bound overflow"); + } + yylval.extop.max = std::numeric_limits::max(); + return TOKEN_CLOSESIZE; + } +#line 1308 "src/parse/lex.cc" +yy199: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == 'e') goto yy204; + goto yy144; +yy200: + ++YYCURSOR; +#line 331 "../src/parse/lex.re" + { lex_conf (); return TOKEN_CONF; } +#line 1317 "src/parse/lex.cc" +yy202: + ++YYCURSOR; +#line 297 "../src/parse/lex.re" + { + const char * p = strchr (tok, ','); + if (!s_to_u32_unsafe (tok + 1, p, yylval.extop.min)) + { + fatal ("repetition lower bound overflow"); + } + if (!s_to_u32_unsafe (p + 1, cur - 1, yylval.extop.max)) + { + fatal ("repetition upper bound overflow"); + } + return TOKEN_CLOSESIZE; + } +#line 1333 "src/parse/lex.cc" +yy204: + yych = (YYCTYPE)*++YYCURSOR; + if (yych <= '0') goto yy206; + if (yych <= '9') goto yy144; + goto yy206; +yy205: + ++YYCURSOR; + if (YYLIMIT <= YYCURSOR) YYFILL(1); + yych = (YYCTYPE)*YYCURSOR; + YYCTXMARKER = YYCURSOR; +yy206: + if (yych <= 0x1F) { + if (yych == '\t') goto yy205; + goto yy144; + } else { + if (yych <= ' ') goto yy205; + if (yych <= '0') goto yy144; + if (yych >= ':') goto yy144; + } +yy207: + ++YYCURSOR; + if ((YYLIMIT - YYCURSOR) < 2) YYFILL(2); + yych = (YYCTYPE)*YYCURSOR; + if (yych <= '\r') { + if (yych <= '\t') { + if (yych <= 0x08) goto yy144; + } else { + if (yych <= '\n') goto yy211; + if (yych <= '\f') goto yy144; + goto yy213; + } + } else { + if (yych <= ' ') { + if (yych <= 0x1F) goto yy144; + } else { + if (yych <= '/') goto yy144; + if (yych <= '9') goto yy207; + goto yy144; + } + } +yy209: + ++YYCURSOR; + if (YYLIMIT <= YYCURSOR) YYFILL(1); + yych = (YYCTYPE)*YYCURSOR; + if (yych <= 0x1F) { + if (yych == '\t') goto yy209; + goto yy144; + } else { + if (yych <= ' ') goto yy209; + if (yych == '"') goto yy214; + goto yy144; + } +yy211: + ++YYCURSOR; + YYCURSOR = YYCTXMARKER; +#line 376 "../src/parse/lex.re" + { + set_sourceline (); + goto scan; + } +#line 1394 "src/parse/lex.cc" +yy213: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == '\n') goto yy211; + goto yy144; +yy214: + ++YYCURSOR; + if ((YYLIMIT - YYCURSOR) < 3) YYFILL(3); + yych = (YYCTYPE)*YYCURSOR; + if (yybm[0+yych] & 128) { + goto yy214; + } + if (yych <= '\n') goto yy144; + if (yych >= '#') goto yy217; + yych = (YYCTYPE)*++YYCURSOR; + if (yych == '\n') goto yy211; + if (yych == '\r') goto yy213; + goto yy144; +yy217: + ++YYCURSOR; + if (YYLIMIT <= YYCURSOR) YYFILL(1); + yych = (YYCTYPE)*YYCURSOR; + if (yych == '\n') goto yy144; + goto yy214; +} +#line 392 "../src/parse/lex.re" + + +flex_name: + +#line 1424 "src/parse/lex.cc" +{ + YYCTYPE yych; + if ((YYLIMIT - YYCURSOR) < 2) YYFILL(2); + yych = (YYCTYPE)*YYCURSOR; + if (yych == '\n') goto yy222; + if (yych == '\r') goto yy224; + ++YYCURSOR; +yy221: +#line 403 "../src/parse/lex.re" + { + YYCURSOR = tok; + goto start; + } +#line 1438 "src/parse/lex.cc" +yy222: + ++YYCURSOR; +#line 397 "../src/parse/lex.re" + { + YYCURSOR = tok; + lexer_state = LEX_NORMAL; + return TOKEN_FID_END; + } +#line 1447 "src/parse/lex.cc" +yy224: + ++YYCURSOR; + if ((yych = (YYCTYPE)*YYCURSOR) == '\n') goto yy222; + goto yy221; +} +#line 407 "../src/parse/lex.re" + + +code: + +#line 1458 "src/parse/lex.cc" +{ + YYCTYPE yych; + unsigned int yyaccept = 0; + static const unsigned char yybm[] = { + 96, 96, 96, 96, 96, 96, 96, 96, + 96, 112, 0, 96, 96, 96, 96, 96, + 96, 96, 96, 96, 96, 96, 96, 96, + 96, 96, 96, 96, 96, 96, 96, 96, + 112, 96, 64, 96, 96, 96, 96, 32, + 96, 96, 96, 96, 96, 96, 96, 96, + 224, 224, 224, 224, 224, 224, 224, 224, + 224, 224, 96, 96, 96, 96, 96, 96, + 96, 96, 96, 96, 96, 96, 96, 96, + 96, 96, 96, 96, 96, 96, 96, 96, + 96, 96, 96, 96, 96, 96, 96, 96, + 96, 96, 96, 96, 0, 96, 96, 96, + 96, 96, 96, 96, 96, 96, 96, 96, + 96, 96, 96, 96, 96, 96, 96, 96, + 96, 96, 96, 96, 96, 96, 96, 96, + 96, 96, 96, 96, 96, 96, 96, 96, + 96, 96, 96, 96, 96, 96, 96, 96, + 96, 96, 96, 96, 96, 96, 96, 96, + 96, 96, 96, 96, 96, 96, 96, 96, + 96, 96, 96, 96, 96, 96, 96, 96, + 96, 96, 96, 96, 96, 96, 96, 96, + 96, 96, 96, 96, 96, 96, 96, 96, + 96, 96, 96, 96, 96, 96, 96, 96, + 96, 96, 96, 96, 96, 96, 96, 96, + 96, 96, 96, 96, 96, 96, 96, 96, + 96, 96, 96, 96, 96, 96, 96, 96, + 96, 96, 96, 96, 96, 96, 96, 96, + 96, 96, 96, 96, 96, 96, 96, 96, + 96, 96, 96, 96, 96, 96, 96, 96, + 96, 96, 96, 96, 96, 96, 96, 96, + 96, 96, 96, 96, 96, 96, 96, 96, + 96, 96, 96, 96, 96, 96, 96, 96, + }; + if ((YYLIMIT - YYCURSOR) < 3) YYFILL(3); + yych = (YYCTYPE)*YYCURSOR; + if (yych <= '&') { + if (yych <= '\n') { + if (yych <= 0x00) goto yy227; + if (yych <= '\t') goto yy229; + goto yy231; + } else { + if (yych == '"') goto yy233; + goto yy229; + } + } else { + if (yych <= '{') { + if (yych <= '\'') goto yy234; + if (yych <= 'z') goto yy229; + goto yy235; + } else { + if (yych == '}') goto yy237; + goto yy229; + } + } +yy227: + ++YYCURSOR; +#line 470 "../src/parse/lex.re" + { + if (cur == eof) + { + if (depth) + { + fatal("missing '}'"); + } + return 0; + } + goto code; + } +#line 1531 "src/parse/lex.cc" +yy229: + ++YYCURSOR; +yy230: +#line 484 "../src/parse/lex.re" + { + goto code; + } +#line 1539 "src/parse/lex.cc" +yy231: + yyaccept = 0; + yych = (YYCTYPE)*(YYMARKER = ++YYCURSOR); + YYCTXMARKER = YYCURSOR; + if (yych <= '\r') { + if (yych <= '\t') { + if (yych >= '\t') goto yy239; + } else { + if (yych <= '\n') goto yy241; + if (yych >= '\r') goto yy241; + } + } else { + if (yych <= ' ') { + if (yych >= ' ') goto yy239; + } else { + if (yych == '#') goto yy242; + } + } +yy232: +#line 451 "../src/parse/lex.re" + { + if (depth == 0) + { + tok += strspn(tok, " \t\r\n"); + while (cur > tok && strchr(" \t\r\n", cur[-1])) + { + --cur; + } + yylval.code = new Code (tok, tok_len (), get_fname (), tline); + return TOKEN_CODE; + } + else if (cur == eof) + { + fatal("missing '}'"); + } + pos = cur; + cline++; + goto code; + } +#line 1579 "src/parse/lex.cc" +yy233: + yyaccept = 1; + yych = (YYCTYPE)*(YYMARKER = ++YYCURSOR); + if (yych == '\n') goto yy230; + goto yy246; +yy234: + yyaccept = 1; + yych = (YYCTYPE)*(YYMARKER = ++YYCURSOR); + if (yych == '\n') goto yy230; + goto yy251; +yy235: + ++YYCURSOR; +#line 423 "../src/parse/lex.re" + { + if (depth == 0) + { + fatal("Curly braces are not allowed after ':='"); + } + else + { + ++depth; + } + goto code; + } +#line 1604 "src/parse/lex.cc" +yy237: + ++YYCURSOR; +#line 411 "../src/parse/lex.re" + { + if (depth == 0) + { + fatal("Curly braces are not allowed after ':='"); + } + else if (--depth == 0) + { + yylval.code = new Code (tok, tok_len (), get_fname (), tline); + return TOKEN_CODE; + } + goto code; + } +#line 1620 "src/parse/lex.cc" +yy239: + yyaccept = 2; + yych = (YYCTYPE)*(YYMARKER = ++YYCURSOR); + if (yych <= 0x1F) { + if (yych == '\t') goto yy253; + } else { + if (yych <= ' ') goto yy253; + if (yych == '#') goto yy242; + } +yy240: + YYCURSOR = YYCTXMARKER; +#line 438 "../src/parse/lex.re" + { + if (depth == 0) + { + goto code; + } + else if (cur == eof) + { + fatal("missing '}'"); + } + pos = cur; + cline++; + goto code; + } +#line 1646 "src/parse/lex.cc" +yy241: + yych = (YYCTYPE)*++YYCURSOR; + goto yy240; +yy242: + ++YYCURSOR; + if ((YYLIMIT - YYCURSOR) < 5) YYFILL(5); + yych = (YYCTYPE)*YYCURSOR; + if (yybm[0+yych] & 16) { + goto yy242; + } + if (yych == 'l') goto yy255; +yy244: + YYCURSOR = YYMARKER; + if (yyaccept <= 1) { + if (yyaccept == 0) { + goto yy232; + } else { + goto yy230; + } + } else { + goto yy240; + } +yy245: + ++YYCURSOR; + if (YYLIMIT <= YYCURSOR) YYFILL(1); + yych = (YYCTYPE)*YYCURSOR; +yy246: + if (yybm[0+yych] & 32) { + goto yy245; + } + if (yych <= '\n') goto yy244; + if (yych >= '#') goto yy249; +yy247: + ++YYCURSOR; +#line 481 "../src/parse/lex.re" + { + goto code; + } +#line 1685 "src/parse/lex.cc" +yy249: + ++YYCURSOR; + if (YYLIMIT <= YYCURSOR) YYFILL(1); + yych = (YYCTYPE)*YYCURSOR; + if (yych == '\n') goto yy244; + goto yy245; +yy250: + ++YYCURSOR; + if (YYLIMIT <= YYCURSOR) YYFILL(1); + yych = (YYCTYPE)*YYCURSOR; +yy251: + if (yybm[0+yych] & 64) { + goto yy250; + } + if (yych <= '\n') goto yy244; + if (yych <= '\'') goto yy247; + ++YYCURSOR; + if (YYLIMIT <= YYCURSOR) YYFILL(1); + yych = (YYCTYPE)*YYCURSOR; + if (yych == '\n') goto yy244; + goto yy250; +yy253: + ++YYCURSOR; + if (YYLIMIT <= YYCURSOR) YYFILL(1); + yych = (YYCTYPE)*YYCURSOR; + if (yych <= 0x1F) { + if (yych == '\t') goto yy253; + goto yy244; + } else { + if (yych <= ' ') goto yy253; + if (yych == '#') goto yy242; + goto yy244; + } +yy255: + yych = (YYCTYPE)*++YYCURSOR; + if (yych != 'i') goto yy244; + yych = (YYCTYPE)*++YYCURSOR; + if (yych != 'n') goto yy244; + yych = (YYCTYPE)*++YYCURSOR; + if (yych != 'e') goto yy244; + yych = (YYCTYPE)*++YYCURSOR; + if (yych <= '0') goto yy260; + if (yych <= '9') goto yy244; + goto yy260; +yy259: + ++YYCURSOR; + if (YYLIMIT <= YYCURSOR) YYFILL(1); + yych = (YYCTYPE)*YYCURSOR; + YYCTXMARKER = YYCURSOR; +yy260: + if (yych <= 0x1F) { + if (yych == '\t') goto yy259; + goto yy244; + } else { + if (yych <= ' ') goto yy259; + if (yych <= '0') goto yy244; + if (yych >= ':') goto yy244; + } +yy261: + ++YYCURSOR; + if ((YYLIMIT - YYCURSOR) < 2) YYFILL(2); + yych = (YYCTYPE)*YYCURSOR; + if (yybm[0+yych] & 128) { + goto yy261; + } + if (yych <= '\f') { + if (yych <= 0x08) goto yy244; + if (yych <= '\t') goto yy263; + if (yych <= '\n') goto yy265; + goto yy244; + } else { + if (yych <= '\r') goto yy267; + if (yych != ' ') goto yy244; + } +yy263: + ++YYCURSOR; + if (YYLIMIT <= YYCURSOR) YYFILL(1); + yych = (YYCTYPE)*YYCURSOR; + if (yych <= 0x1F) { + if (yych == '\t') goto yy263; + goto yy244; + } else { + if (yych <= ' ') goto yy263; + if (yych == '"') goto yy268; + goto yy244; + } +yy265: + ++YYCURSOR; + YYCURSOR = YYCTXMARKER; +#line 434 "../src/parse/lex.re" + { + set_sourceline (); + goto code; + } +#line 1780 "src/parse/lex.cc" +yy267: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == '\n') goto yy265; + goto yy244; +yy268: + ++YYCURSOR; + if ((YYLIMIT - YYCURSOR) < 3) YYFILL(3); + yych = (YYCTYPE)*YYCURSOR; + if (yych <= '!') { + if (yych == '\n') goto yy244; + goto yy268; + } else { + if (yych <= '"') goto yy270; + if (yych == '\\') goto yy271; + goto yy268; + } +yy270: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == '\n') goto yy265; + if (yych == '\r') goto yy267; + goto yy244; +yy271: + ++YYCURSOR; + if (YYLIMIT <= YYCURSOR) YYFILL(1); + yych = (YYCTYPE)*YYCURSOR; + if (yych == '\n') goto yy244; + goto yy268; +} +#line 487 "../src/parse/lex.re" + + +comment: + +#line 1814 "src/parse/lex.cc" +{ + YYCTYPE yych; + static const unsigned char yybm[] = { + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 160, 0, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 160, 128, 0, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 192, 192, 192, 192, 192, 192, 192, 192, + 192, 192, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 0, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + }; + if ((YYLIMIT - YYCURSOR) < 2) YYFILL(2); + yych = (YYCTYPE)*YYCURSOR; + if (yych <= ')') { + if (yych == '\n') goto yy276; + } else { + if (yych <= '*') goto yy278; + if (yych == '/') goto yy279; + } + ++YYCURSOR; +yy275: +#line 519 "../src/parse/lex.re" + { + if (cur == eof) + { + return 0; + } + goto comment; + } +#line 1869 "src/parse/lex.cc" +yy276: + yych = (YYCTYPE)*(YYMARKER = ++YYCURSOR); + if (yybm[0+yych] & 32) { + goto yy280; + } + if (yych == '#') goto yy283; +yy277: +#line 510 "../src/parse/lex.re" + { + if (cur == eof) + { + return 0; + } + tok = pos = cur; + cline++; + goto comment; + } +#line 1887 "src/parse/lex.cc" +yy278: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == '/') goto yy285; + goto yy275; +yy279: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == '*') goto yy287; + goto yy275; +yy280: + ++YYCURSOR; + if (YYLIMIT <= YYCURSOR) YYFILL(1); + yych = (YYCTYPE)*YYCURSOR; + if (yybm[0+yych] & 32) { + goto yy280; + } + if (yych == '#') goto yy283; +yy282: + YYCURSOR = YYMARKER; + goto yy277; +yy283: + ++YYCURSOR; + if ((YYLIMIT - YYCURSOR) < 5) YYFILL(5); + yych = (YYCTYPE)*YYCURSOR; + if (yych <= 0x1F) { + if (yych == '\t') goto yy283; + goto yy282; + } else { + if (yych <= ' ') goto yy283; + if (yych == 'l') goto yy289; + goto yy282; + } +yy285: + ++YYCURSOR; +#line 491 "../src/parse/lex.re" + { + if (--depth == 0) + { + goto scan; + } + else + { + goto comment; + } + } +#line 1932 "src/parse/lex.cc" +yy287: + ++YYCURSOR; +#line 501 "../src/parse/lex.re" + { + ++depth; + fatal("ambiguous /* found"); + goto comment; + } +#line 1941 "src/parse/lex.cc" +yy289: + yych = (YYCTYPE)*++YYCURSOR; + if (yych != 'i') goto yy282; + yych = (YYCTYPE)*++YYCURSOR; + if (yych != 'n') goto yy282; + yych = (YYCTYPE)*++YYCURSOR; + if (yych != 'e') goto yy282; + yych = (YYCTYPE)*++YYCURSOR; + if (yych <= '0') goto yy294; + if (yych <= '9') goto yy282; + goto yy294; +yy293: + ++YYCURSOR; + if (YYLIMIT <= YYCURSOR) YYFILL(1); + yych = (YYCTYPE)*YYCURSOR; + YYCTXMARKER = YYCURSOR; +yy294: + if (yych <= 0x1F) { + if (yych == '\t') goto yy293; + goto yy282; + } else { + if (yych <= ' ') goto yy293; + if (yych <= '0') goto yy282; + if (yych >= ':') goto yy282; + } +yy295: + ++YYCURSOR; + if ((YYLIMIT - YYCURSOR) < 2) YYFILL(2); + yych = (YYCTYPE)*YYCURSOR; + if (yybm[0+yych] & 64) { + goto yy295; + } + if (yych <= '\f') { + if (yych <= 0x08) goto yy282; + if (yych <= '\t') goto yy297; + if (yych <= '\n') goto yy299; + goto yy282; + } else { + if (yych <= '\r') goto yy301; + if (yych != ' ') goto yy282; + } +yy297: + ++YYCURSOR; + if (YYLIMIT <= YYCURSOR) YYFILL(1); + yych = (YYCTYPE)*YYCURSOR; + if (yych <= 0x1F) { + if (yych == '\t') goto yy297; + goto yy282; + } else { + if (yych <= ' ') goto yy297; + if (yych == '"') goto yy302; + goto yy282; + } +yy299: + ++YYCURSOR; + YYCURSOR = YYCTXMARKER; +#line 506 "../src/parse/lex.re" + { + set_sourceline (); + goto comment; + } +#line 2003 "src/parse/lex.cc" +yy301: + yych = (YYCTYPE)*++YYCURSOR; + if (yych == '\n') goto yy299; + goto yy282; +yy302: + ++YYCURSOR; + if ((YYLIMIT - YYCURSOR) < 3) YYFILL(3); + yych = (YYCTYPE)*YYCURSOR; + if (yybm[0+yych] & 128) { + goto yy302; + } + if (yych <= '\n') goto yy282; + if (yych >= '#') goto yy305; + yych = (YYCTYPE)*++YYCURSOR; + if (yych == '\n') goto yy299; + if (yych == '\r') goto yy301; + goto yy282; +yy305: + ++YYCURSOR; + if (YYLIMIT <= YYCURSOR) YYFILL(1); + yych = (YYCTYPE)*YYCURSOR; + if (yych == '\n') goto yy282; + goto yy302; +} +#line 526 "../src/parse/lex.re" + + +nextLine: + +#line 2033 "src/parse/lex.cc" +{ + YYCTYPE yych; + if (YYLIMIT <= YYCURSOR) YYFILL(1); + yych = (YYCTYPE)*YYCURSOR; + if (yych == '\n') goto yy310; + ++YYCURSOR; +#line 537 "../src/parse/lex.re" + { if(cur == eof) { + return 0; + } + goto nextLine; + } +#line 2046 "src/parse/lex.cc" +yy310: + ++YYCURSOR; +#line 530 "../src/parse/lex.re" + { if(cur == eof) { + return 0; + } + tok = pos = cur; + cline++; + goto scan; + } +#line 2057 "src/parse/lex.cc" +} +#line 542 "../src/parse/lex.re" + +} + +static void escape (std::string & dest, const std::string & src) +{ + dest = src; + size_t l = dest.length(); + for (size_t p = 0; p < l; ++p) + { + if (dest[p] == '\\') + { + dest.insert(++p, "\\"); + ++l; + } + } +} + +RegExp *Scanner::lex_cls(bool neg) +{ + Range *r = NULL, *s; + uint32_t u, l; +fst: + +#line 2083 "src/parse/lex.cc" +{ + YYCTYPE yych; + if (YYLIMIT <= YYCURSOR) YYFILL(1); + yych = (YYCTYPE)*YYCURSOR; + if (yych == ']') goto yy315; +#line 566 "../src/parse/lex.re" + { l = lex_cls_chr(); goto snd; } +#line 2091 "src/parse/lex.cc" +yy315: + ++YYCURSOR; +#line 565 "../src/parse/lex.re" + { goto end; } +#line 2096 "src/parse/lex.cc" +} +#line 567 "../src/parse/lex.re" + +snd: + +#line 2102 "src/parse/lex.cc" +{ + YYCTYPE yych; + if ((YYLIMIT - YYCURSOR) < 2) YYFILL(2); + yych = (YYCTYPE)*(YYMARKER = YYCURSOR); + if (yych == '-') goto yy320; +yy319: +#line 570 "../src/parse/lex.re" + { u = l; goto add; } +#line 2111 "src/parse/lex.cc" +yy320: + yych = (YYCTYPE)*++YYCURSOR; + YYCTXMARKER = YYCURSOR; + if (yych != ']') goto yy322; + YYCURSOR = YYMARKER; + goto yy319; +yy322: + ++YYCURSOR; + YYCURSOR = YYCTXMARKER; +#line 571 "../src/parse/lex.re" + { + u = lex_cls_chr(); + if (l > u) { + warn.swapped_range(get_line(), l, u); + std::swap(l, u); + } + goto add; + } +#line 2130 "src/parse/lex.cc" +} +#line 579 "../src/parse/lex.re" + +add: + if (!(s = opts->encoding.encodeRange(l, u))) { + fatalf ("Bad code point range: '0x%X - 0x%X'", l, u); + } + r = Range::add(r, s); + goto fst; +end: + if (neg) { + r = Range::sub(opts->encoding.fullRange(), r); + } + return cls(r); +} + +uint32_t Scanner::lex_cls_chr() +{ + tok = cur; + +#line 2151 "src/parse/lex.cc" +{ + YYCTYPE yych; + unsigned int yyaccept = 0; + if ((YYLIMIT - YYCURSOR) < 10) YYFILL(10); + yych = (YYCTYPE)*YYCURSOR; + if (yych == '\n') goto yy328; + if (yych == '\\') goto yy330; + ++YYCURSOR; +#line 602 "../src/parse/lex.re" + { return static_cast(tok[0]); } +#line 2162 "src/parse/lex.cc" +yy328: + ++YYCURSOR; +#line 597 "../src/parse/lex.re" + { fatal ((tok - pos) - tchar, "syntax error"); } +#line 2167 "src/parse/lex.cc" +yy330: + ++YYCURSOR; + if ((yych = (YYCTYPE)*YYCURSOR) <= '`') { + if (yych <= '7') { + if (yych <= ',') { + if (yych != '\n') goto yy332; + } else { + if (yych <= '-') goto yy334; + if (yych <= '/') goto yy332; + if (yych <= '3') goto yy336; + goto yy338; + } + } else { + if (yych <= 'X') { + if (yych == 'U') goto yy339; + if (yych <= 'W') goto yy332; + goto yy341; + } else { + if (yych <= '[') goto yy332; + if (yych <= '\\') goto yy342; + if (yych <= ']') goto yy344; + goto yy332; + } + } + } else { + if (yych <= 'q') { + if (yych <= 'e') { + if (yych <= 'a') goto yy346; + if (yych <= 'b') goto yy348; + goto yy332; + } else { + if (yych <= 'f') goto yy350; + if (yych == 'n') goto yy352; + goto yy332; + } + } else { + if (yych <= 'u') { + if (yych <= 'r') goto yy354; + if (yych <= 's') goto yy332; + if (yych <= 't') goto yy356; + goto yy341; + } else { + if (yych <= 'v') goto yy358; + if (yych == 'x') goto yy360; + goto yy332; + } + } + } +#line 600 "../src/parse/lex.re" + { fatal ((tok - pos) - tchar, "syntax error in escape sequence"); } +#line 2218 "src/parse/lex.cc" +yy332: + ++YYCURSOR; +#line 615 "../src/parse/lex.re" + { + warn.useless_escape(tline, tok - pos, tok[1]); + return static_cast(tok[1]); + } +#line 2226 "src/parse/lex.cc" +yy334: + ++YYCURSOR; +#line 613 "../src/parse/lex.re" + { return static_cast('-'); } +#line 2231 "src/parse/lex.cc" +yy336: + yyaccept = 0; + yych = (YYCTYPE)*(YYMARKER = ++YYCURSOR); + if (yych <= '/') goto yy337; + if (yych <= '7') goto yy361; +yy337: +#line 599 "../src/parse/lex.re" + { fatal ((tok - pos) - tchar, "syntax error in octal escape sequence"); } +#line 2240 "src/parse/lex.cc" +yy338: + yych = (YYCTYPE)*++YYCURSOR; + goto yy337; +yy339: + yyaccept = 1; + yych = (YYCTYPE)*(YYMARKER = ++YYCURSOR); + if (yych <= '@') { + if (yych <= '/') goto yy340; + if (yych <= '9') goto yy363; + } else { + if (yych <= 'F') goto yy363; + if (yych <= '`') goto yy340; + if (yych <= 'f') goto yy363; + } +yy340: +#line 598 "../src/parse/lex.re" + { fatal ((tok - pos) - tchar, "syntax error in hexadecimal escape sequence"); } +#line 2258 "src/parse/lex.cc" +yy341: + yyaccept = 1; + yych = (YYCTYPE)*(YYMARKER = ++YYCURSOR); + if (yych <= '@') { + if (yych <= '/') goto yy340; + if (yych <= '9') goto yy364; + goto yy340; + } else { + if (yych <= 'F') goto yy364; + if (yych <= '`') goto yy340; + if (yych <= 'f') goto yy364; + goto yy340; + } +yy342: + ++YYCURSOR; +#line 612 "../src/parse/lex.re" + { return static_cast('\\'); } +#line 2276 "src/parse/lex.cc" +yy344: + ++YYCURSOR; +#line 614 "../src/parse/lex.re" + { return static_cast(']'); } +#line 2281 "src/parse/lex.cc" +yy346: + ++YYCURSOR; +#line 605 "../src/parse/lex.re" + { return static_cast('\a'); } +#line 2286 "src/parse/lex.cc" +yy348: + ++YYCURSOR; +#line 606 "../src/parse/lex.re" + { return static_cast('\b'); } +#line 2291 "src/parse/lex.cc" +yy350: + ++YYCURSOR; +#line 607 "../src/parse/lex.re" + { return static_cast('\f'); } +#line 2296 "src/parse/lex.cc" +yy352: + ++YYCURSOR; +#line 608 "../src/parse/lex.re" + { return static_cast('\n'); } +#line 2301 "src/parse/lex.cc" +yy354: + ++YYCURSOR; +#line 609 "../src/parse/lex.re" + { return static_cast('\r'); } +#line 2306 "src/parse/lex.cc" +yy356: + ++YYCURSOR; +#line 610 "../src/parse/lex.re" + { return static_cast('\t'); } +#line 2311 "src/parse/lex.cc" +yy358: + ++YYCURSOR; +#line 611 "../src/parse/lex.re" + { return static_cast('\v'); } +#line 2316 "src/parse/lex.cc" +yy360: + yyaccept = 1; + yych = (YYCTYPE)*(YYMARKER = ++YYCURSOR); + if (yych <= '@') { + if (yych <= '/') goto yy340; + if (yych <= '9') goto yy365; + goto yy340; + } else { + if (yych <= 'F') goto yy365; + if (yych <= '`') goto yy340; + if (yych <= 'f') goto yy365; + goto yy340; + } +yy361: + yych = (YYCTYPE)*++YYCURSOR; + if (yych <= '/') goto yy362; + if (yych <= '7') goto yy366; +yy362: + YYCURSOR = YYMARKER; + if (yyaccept == 0) { + goto yy337; + } else { + goto yy340; + } +yy363: + yych = (YYCTYPE)*++YYCURSOR; + if (yych <= '@') { + if (yych <= '/') goto yy362; + if (yych <= '9') goto yy368; + goto yy362; + } else { + if (yych <= 'F') goto yy368; + if (yych <= '`') goto yy362; + if (yych <= 'f') goto yy368; + goto yy362; + } +yy364: + yych = (YYCTYPE)*++YYCURSOR; + if (yych <= '@') { + if (yych <= '/') goto yy362; + if (yych <= '9') goto yy369; + goto yy362; + } else { + if (yych <= 'F') goto yy369; + if (yych <= '`') goto yy362; + if (yych <= 'f') goto yy369; + goto yy362; + } +yy365: + yych = (YYCTYPE)*++YYCURSOR; + if (yych <= '@') { + if (yych <= '/') goto yy362; + if (yych <= '9') goto yy370; + goto yy362; + } else { + if (yych <= 'F') goto yy370; + if (yych <= '`') goto yy362; + if (yych <= 'f') goto yy370; + goto yy362; + } +yy366: + ++YYCURSOR; +#line 604 "../src/parse/lex.re" + { return unesc_oct(tok, cur); } +#line 2381 "src/parse/lex.cc" +yy368: + yych = (YYCTYPE)*++YYCURSOR; + if (yych <= '@') { + if (yych <= '/') goto yy362; + if (yych <= '9') goto yy372; + goto yy362; + } else { + if (yych <= 'F') goto yy372; + if (yych <= '`') goto yy362; + if (yych <= 'f') goto yy372; + goto yy362; + } +yy369: + yych = (YYCTYPE)*++YYCURSOR; + if (yych <= '@') { + if (yych <= '/') goto yy362; + if (yych <= '9') goto yy365; + goto yy362; + } else { + if (yych <= 'F') goto yy365; + if (yych <= '`') goto yy362; + if (yych <= 'f') goto yy365; + goto yy362; + } +yy370: + ++YYCURSOR; +#line 603 "../src/parse/lex.re" + { return unesc_hex(tok, cur); } +#line 2410 "src/parse/lex.cc" +yy372: + yych = (YYCTYPE)*++YYCURSOR; + if (yych <= '@') { + if (yych <= '/') goto yy362; + if (yych >= ':') goto yy362; + } else { + if (yych <= 'F') goto yy373; + if (yych <= '`') goto yy362; + if (yych >= 'g') goto yy362; + } +yy373: + ++YYCURSOR; + if ((yych = (YYCTYPE)*YYCURSOR) <= '@') { + if (yych <= '/') goto yy362; + if (yych <= '9') goto yy364; + goto yy362; + } else { + if (yych <= 'F') goto yy364; + if (yych <= '`') goto yy362; + if (yych <= 'f') goto yy364; + goto yy362; + } +} +#line 619 "../src/parse/lex.re" + +} + +uint32_t Scanner::lex_str_chr(char quote, bool &end) +{ + end = false; + tok = cur; + +#line 2443 "src/parse/lex.cc" +{ + YYCTYPE yych; + unsigned int yyaccept = 0; + if ((YYLIMIT - YYCURSOR) < 10) YYFILL(10); + yych = (YYCTYPE)*YYCURSOR; + if (yych == '\n') goto yy378; + if (yych == '\\') goto yy380; + ++YYCURSOR; +#line 632 "../src/parse/lex.re" + { + end = tok[0] == quote; + return static_cast(tok[0]); + } +#line 2457 "src/parse/lex.cc" +yy378: + ++YYCURSOR; +#line 627 "../src/parse/lex.re" + { fatal ((tok - pos) - tchar, "syntax error"); } +#line 2462 "src/parse/lex.cc" +yy380: + ++YYCURSOR; + if ((yych = (YYCTYPE)*YYCURSOR) <= 'a') { + if (yych <= 'T') { + if (yych <= '/') { + if (yych != '\n') goto yy382; + } else { + if (yych <= '3') goto yy384; + if (yych <= '7') goto yy386; + goto yy382; + } + } else { + if (yych <= 'X') { + if (yych <= 'U') goto yy387; + if (yych <= 'W') goto yy382; + goto yy389; + } else { + if (yych == '\\') goto yy390; + if (yych <= '`') goto yy382; + goto yy392; + } + } + } else { + if (yych <= 'r') { + if (yych <= 'f') { + if (yych <= 'b') goto yy394; + if (yych <= 'e') goto yy382; + goto yy396; + } else { + if (yych == 'n') goto yy398; + if (yych <= 'q') goto yy382; + goto yy400; + } + } else { + if (yych <= 'u') { + if (yych <= 's') goto yy382; + if (yych <= 't') goto yy402; + goto yy389; + } else { + if (yych <= 'v') goto yy404; + if (yych == 'x') goto yy406; + goto yy382; + } + } + } +#line 630 "../src/parse/lex.re" + { fatal ((tok - pos) - tchar, "syntax error in escape sequence"); } +#line 2510 "src/parse/lex.cc" +yy382: + ++YYCURSOR; +#line 646 "../src/parse/lex.re" + { + if (tok[1] != quote) { + warn.useless_escape(tline, tok - pos, tok[1]); + } + return static_cast(tok[1]); + } +#line 2520 "src/parse/lex.cc" +yy384: + yyaccept = 0; + yych = (YYCTYPE)*(YYMARKER = ++YYCURSOR); + if (yych <= '/') goto yy385; + if (yych <= '7') goto yy407; +yy385: +#line 629 "../src/parse/lex.re" + { fatal ((tok - pos) - tchar, "syntax error in octal escape sequence"); } +#line 2529 "src/parse/lex.cc" +yy386: + yych = (YYCTYPE)*++YYCURSOR; + goto yy385; +yy387: + yyaccept = 1; + yych = (YYCTYPE)*(YYMARKER = ++YYCURSOR); + if (yych <= '@') { + if (yych <= '/') goto yy388; + if (yych <= '9') goto yy409; + } else { + if (yych <= 'F') goto yy409; + if (yych <= '`') goto yy388; + if (yych <= 'f') goto yy409; + } +yy388: +#line 628 "../src/parse/lex.re" + { fatal ((tok - pos) - tchar, "syntax error in hexadecimal escape sequence"); } +#line 2547 "src/parse/lex.cc" +yy389: + yyaccept = 1; + yych = (YYCTYPE)*(YYMARKER = ++YYCURSOR); + if (yych <= '@') { + if (yych <= '/') goto yy388; + if (yych <= '9') goto yy410; + goto yy388; + } else { + if (yych <= 'F') goto yy410; + if (yych <= '`') goto yy388; + if (yych <= 'f') goto yy410; + goto yy388; + } +yy390: + ++YYCURSOR; +#line 645 "../src/parse/lex.re" + { return static_cast('\\'); } +#line 2565 "src/parse/lex.cc" +yy392: + ++YYCURSOR; +#line 638 "../src/parse/lex.re" + { return static_cast('\a'); } +#line 2570 "src/parse/lex.cc" +yy394: + ++YYCURSOR; +#line 639 "../src/parse/lex.re" + { return static_cast('\b'); } +#line 2575 "src/parse/lex.cc" +yy396: + ++YYCURSOR; +#line 640 "../src/parse/lex.re" + { return static_cast('\f'); } +#line 2580 "src/parse/lex.cc" +yy398: + ++YYCURSOR; +#line 641 "../src/parse/lex.re" + { return static_cast('\n'); } +#line 2585 "src/parse/lex.cc" +yy400: + ++YYCURSOR; +#line 642 "../src/parse/lex.re" + { return static_cast('\r'); } +#line 2590 "src/parse/lex.cc" +yy402: + ++YYCURSOR; +#line 643 "../src/parse/lex.re" + { return static_cast('\t'); } +#line 2595 "src/parse/lex.cc" +yy404: + ++YYCURSOR; +#line 644 "../src/parse/lex.re" + { return static_cast('\v'); } +#line 2600 "src/parse/lex.cc" +yy406: + yyaccept = 1; + yych = (YYCTYPE)*(YYMARKER = ++YYCURSOR); + if (yych <= '@') { + if (yych <= '/') goto yy388; + if (yych <= '9') goto yy411; + goto yy388; + } else { + if (yych <= 'F') goto yy411; + if (yych <= '`') goto yy388; + if (yych <= 'f') goto yy411; + goto yy388; + } +yy407: + yych = (YYCTYPE)*++YYCURSOR; + if (yych <= '/') goto yy408; + if (yych <= '7') goto yy412; +yy408: + YYCURSOR = YYMARKER; + if (yyaccept == 0) { + goto yy385; + } else { + goto yy388; + } +yy409: + yych = (YYCTYPE)*++YYCURSOR; + if (yych <= '@') { + if (yych <= '/') goto yy408; + if (yych <= '9') goto yy414; + goto yy408; + } else { + if (yych <= 'F') goto yy414; + if (yych <= '`') goto yy408; + if (yych <= 'f') goto yy414; + goto yy408; + } +yy410: + yych = (YYCTYPE)*++YYCURSOR; + if (yych <= '@') { + if (yych <= '/') goto yy408; + if (yych <= '9') goto yy415; + goto yy408; + } else { + if (yych <= 'F') goto yy415; + if (yych <= '`') goto yy408; + if (yych <= 'f') goto yy415; + goto yy408; + } +yy411: + yych = (YYCTYPE)*++YYCURSOR; + if (yych <= '@') { + if (yych <= '/') goto yy408; + if (yych <= '9') goto yy416; + goto yy408; + } else { + if (yych <= 'F') goto yy416; + if (yych <= '`') goto yy408; + if (yych <= 'f') goto yy416; + goto yy408; + } +yy412: + ++YYCURSOR; +#line 637 "../src/parse/lex.re" + { return unesc_oct(tok, cur); } +#line 2665 "src/parse/lex.cc" +yy414: + yych = (YYCTYPE)*++YYCURSOR; + if (yych <= '@') { + if (yych <= '/') goto yy408; + if (yych <= '9') goto yy418; + goto yy408; + } else { + if (yych <= 'F') goto yy418; + if (yych <= '`') goto yy408; + if (yych <= 'f') goto yy418; + goto yy408; + } +yy415: + yych = (YYCTYPE)*++YYCURSOR; + if (yych <= '@') { + if (yych <= '/') goto yy408; + if (yych <= '9') goto yy411; + goto yy408; + } else { + if (yych <= 'F') goto yy411; + if (yych <= '`') goto yy408; + if (yych <= 'f') goto yy411; + goto yy408; + } +yy416: + ++YYCURSOR; +#line 636 "../src/parse/lex.re" + { return unesc_hex(tok, cur); } +#line 2694 "src/parse/lex.cc" +yy418: + yych = (YYCTYPE)*++YYCURSOR; + if (yych <= '@') { + if (yych <= '/') goto yy408; + if (yych >= ':') goto yy408; + } else { + if (yych <= 'F') goto yy419; + if (yych <= '`') goto yy408; + if (yych >= 'g') goto yy408; + } +yy419: + ++YYCURSOR; + if ((yych = (YYCTYPE)*YYCURSOR) <= '@') { + if (yych <= '/') goto yy408; + if (yych <= '9') goto yy410; + goto yy408; + } else { + if (yych <= 'F') goto yy410; + if (yych <= '`') goto yy408; + if (yych <= 'f') goto yy410; + goto yy408; + } +} +#line 652 "../src/parse/lex.re" + +} + +RegExp *Scanner::lex_str(char quote, bool casing) +{ + RegExp *r = NULL; + for (bool end;;) { + const uint32_t c = lex_str_chr(quote, end); + if (end) { + return r ? r : new NullOp; + } + r = doCat(r, casing ? ichr(c) : schr(c)); + } +} + +void Scanner::set_sourceline () +{ +sourceline: + tok = cur; + +#line 2739 "src/parse/lex.cc" +{ + YYCTYPE yych; + static const unsigned char yybm[] = { + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 0, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 0, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 192, 192, 192, 192, 192, 192, 192, 192, + 192, 192, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 0, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + }; + if ((YYLIMIT - YYCURSOR) < 2) YYFILL(2); + yych = (YYCTYPE)*YYCURSOR; + if (yych <= '!') { + if (yych == '\n') goto yy424; + } else { + if (yych <= '"') goto yy426; + if (yych <= '0') goto yy422; + if (yych <= '9') goto yy427; + } +yy422: + ++YYCURSOR; +yy423: +#line 695 "../src/parse/lex.re" + { + goto sourceline; + } +#line 2792 "src/parse/lex.cc" +yy424: + ++YYCURSOR; +#line 683 "../src/parse/lex.re" + { + if (cur == eof) + { + --cur; + } + else + { + pos = cur; + } + tok = cur; + return; + } +#line 2808 "src/parse/lex.cc" +yy426: + yych = (YYCTYPE)*(YYMARKER = ++YYCURSOR); + if (yych == '\n') goto yy423; + goto yy431; +yy427: + ++YYCURSOR; + if (YYLIMIT <= YYCURSOR) YYFILL(1); + yych = (YYCTYPE)*YYCURSOR; + if (yybm[0+yych] & 64) { + goto yy427; + } +#line 672 "../src/parse/lex.re" + { + if (!s_to_u32_unsafe (tok, cur, cline)) + { + fatal ("line number overflow"); + } + goto sourceline; + } +#line 2828 "src/parse/lex.cc" +yy430: + ++YYCURSOR; + if (YYLIMIT <= YYCURSOR) YYFILL(1); + yych = (YYCTYPE)*YYCURSOR; +yy431: + if (yybm[0+yych] & 128) { + goto yy430; + } + if (yych <= '\n') goto yy432; + if (yych <= '"') goto yy433; + goto yy435; +yy432: + YYCURSOR = YYMARKER; + goto yy423; +yy433: + ++YYCURSOR; +#line 679 "../src/parse/lex.re" + { + escape (in.file_name, std::string (tok + 1, tok_len () - 2)); // -2 to omit quotes + goto sourceline; + } +#line 2850 "src/parse/lex.cc" +yy435: + ++YYCURSOR; + if (YYLIMIT <= YYCURSOR) YYFILL(1); + yych = (YYCTYPE)*YYCURSOR; + if (yych == '\n') goto yy432; + goto yy430; +} +#line 698 "../src/parse/lex.re" + +} + +} // end namespace re2c diff --git a/tools/re2c/src/parse/lex.re b/tools/re2c/src/parse/lex.re new file mode 100644 index 000000000..2fd98fb93 --- /dev/null +++ b/tools/re2c/src/parse/lex.re @@ -0,0 +1,701 @@ +#include "src/util/c99_stdint.h" +#include +#include +#include +#include +#include + +#include "src/codegen/output.h" +#include "src/conf/opt.h" +#include "src/conf/warn.h" +#include "src/globals.h" +#include "src/ir/regexp/encoding/enc.h" +#include "src/ir/regexp/regexp.h" +#include "src/ir/regexp/regexp_null.h" +#include "src/parse/code.h" +#include "src/parse/extop.h" +#include "src/parse/input.h" +#include "src/parse/scanner.h" +#include "src/parse/parser.h" // needed by "y.tab.h" +#include "src/parse/unescape.h" +#include "src/util/range.h" +#include "src/util/s_to_n32_unsafe.h" +#include "y.tab.h" + +extern YYSTYPE yylval; + +#define YYCTYPE unsigned char +#define YYCURSOR cur +#define YYLIMIT lim +#define YYMARKER ptr +#define YYCTXMARKER ctx +#define YYFILL(n) { fill (n); } + +namespace re2c +{ + +// source code is in ASCII: pointers have type 'char *' +// but re2c makes an implicit assumption that YYCTYPE is unsigned +// when it generates comparisons +/*!re2c + re2c:yych:conversion = 1; +*/ + +/*!re2c +zero = "\000"; +dstring = "\"" ((. \ [\\"] ) | "\\" .)* "\""; +sstring = "'" ((. \ [\\'] ) | "\\" .)* "'" ; +letter = [a-zA-Z]; +digit = [0-9]; +lineno = [1-9] digit*; +name = (letter|digit|"_")+; +space = [ \t]; +ws = (space | [\r\n]); +eol = ("\r\n" | "\n"); +lineinf = lineno (space+ dstring)? eol; + + esc = "\\"; + hex_digit = [0-9a-fA-F]; + esc_hex = esc ("x" hex_digit{2} | [uX] hex_digit{4} | "U" hex_digit{8}); + esc_oct = esc [0-3] [0-7]{2}; // max 1-byte octal value is '\377' + esc_simple = esc [abfnrtv\\]; +*/ + +Scanner::ParseMode Scanner::echo() +{ + bool ignore_eoc = false; + int ignore_cnt = 0; + + if (eof && cur == eof) // Catch EOF + { + return Stop; + } + + tok = cur; +echo: +/*!re2c + beginRE = "%{" | "/*!re2c"; + beginRE { + if (opts->rFlag) + { + fatal("found standard 're2c' block while using -r flag"); + } + if (opts->target == opt_t::CODE) + { + const size_t lexeme_len = cur[-1] == '{' + ? sizeof ("%{") - 1 + : sizeof ("/*!re2c") - 1; + out.wraw(tok, tok_len () - lexeme_len); + } + tok = cur; + return Parse; + } + "/*!rules:re2c" { + if (opts->rFlag) + { + opts.reset_mapCodeName (); + } + else + { + fatal("found 'rules:re2c' block without -r flag"); + } + tok = cur; + return Rules; + } + "/*!use:re2c" { + if (!opts->rFlag) + { + fatal("found 'use:re2c' block without -r flag"); + } + reuse(); + if (opts->target == opt_t::CODE) + { + const size_t lexeme_len = sizeof ("/*!use:re2c") - 1; + out.wraw(tok, tok_len () - lexeme_len); + } + tok = cur; + return Reuse; + } + "/*!max:re2c" { + if (opts->target != opt_t::DOT) + { + out.wdelay_yymaxfill (); + } + tok = pos = cur; + ignore_eoc = true; + goto echo; + } + "/*!getstate:re2c" { + tok = pos = cur; + out.wdelay_state_goto (opts->topIndent); + ignore_eoc = true; + goto echo; + } + "/*!ignore:re2c" { + tok = pos = cur; + ignore_eoc = true; + goto echo; + } + "/*!types:re2c" { + tok = pos = cur; + ignore_eoc = true; + if (opts->target != opt_t::DOT) + { + out.wdelay_line_info ().ws("\n") + .wdelay_types ().ws("\n") + .wline_info (cline, get_fname ().c_str ()); + } + goto echo; + } + "*" "/" "\r"? "\n" { + cline++; + if (ignore_eoc) + { + if (ignore_cnt) + { + out.wline_info (cline, get_fname ().c_str ()); + } + ignore_eoc = false; + ignore_cnt = 0; + } + else if (opts->target == opt_t::CODE) + { + out.wraw(tok, tok_len ()); + } + tok = pos = cur; + goto echo; + } + "*" "/" { + if (ignore_eoc) + { + if (ignore_cnt) + { + out.ws("\n").wline_info (cline, get_fname ().c_str ()); + } + ignore_eoc = false; + ignore_cnt = 0; + } + else if (opts->target == opt_t::CODE) + { + out.wraw(tok, tok_len ()); + } + tok = pos = cur; + goto echo; + } + "\n" space* "#" space* "line" space+ / lineinf { + set_sourceline (); + goto echo; + } + "\n" { + if (ignore_eoc) + { + ignore_cnt++; + } + else if (opts->target == opt_t::CODE) + { + out.wraw(tok, tok_len ()); + } + tok = pos = cur; + cline++; + goto echo; + } + zero { + if (!ignore_eoc && opts->target == opt_t::CODE) + { + out.wraw(tok, tok_len () - 1); + // -1 so we don't write out the \0 + } + if(cur == eof) + { + return Stop; + } + } + * { + goto echo; + } +*/ +} + +int Scanner::scan() +{ + uint32_t depth; + +scan: + tchar = cur - pos; + tline = cline; + tok = cur; + switch (lexer_state) + { + case LEX_NORMAL: goto start; + case LEX_FLEX_NAME: goto flex_name; + } + +start: +/*!re2c + "{" { + depth = 1; + goto code; + } + + ":" / "=>" { + return *tok; + } + + ":=" { + tok += 2; /* skip ":=" */ + depth = 0; + goto code; + } + + "//" { + goto nextLine; + } + "/*" { + depth = 1; + goto comment; + } + + endRE = "%}" | "*/"; + endRE { + tok = cur; + return 0; + } + + "'" { yylval.regexp = lex_str('\'', opts->bCaseInsensitive || !opts->bCaseInverted); return TOKEN_REGEXP; } + "\"" { yylval.regexp = lex_str('"', opts->bCaseInsensitive || opts->bCaseInverted); return TOKEN_REGEXP; } + "[" { yylval.regexp = lex_cls(false); return TOKEN_REGEXP; } + "[^" { yylval.regexp = lex_cls(true); return TOKEN_REGEXP; } + + "<>" / (space* ("{" | "=>" | ":=")) { + return TOKEN_NOCOND; + } + ",()|=;/\\] { + return *tok; + } + + "*" { + yylval.op = *tok; + return TOKEN_STAR; + } + [+?] { + yylval.op = *tok; + return TOKEN_CLOSE; + } + + "{" [0-9]+ "}" { + if (!s_to_u32_unsafe (tok + 1, cur - 1, yylval.extop.min)) + { + fatal ("repetition count overflow"); + } + yylval.extop.max = yylval.extop.min; + return TOKEN_CLOSESIZE; + } + + "{" [0-9]+ "," [0-9]+ "}" { + const char * p = strchr (tok, ','); + if (!s_to_u32_unsafe (tok + 1, p, yylval.extop.min)) + { + fatal ("repetition lower bound overflow"); + } + if (!s_to_u32_unsafe (p + 1, cur - 1, yylval.extop.max)) + { + fatal ("repetition upper bound overflow"); + } + return TOKEN_CLOSESIZE; + } + + "{" [0-9]+ ",}" { + if (!s_to_u32_unsafe (tok + 1, cur - 2, yylval.extop.min)) + { + fatal ("repetition lower bound overflow"); + } + yylval.extop.max = std::numeric_limits::max(); + return TOKEN_CLOSESIZE; + } + + "{" [0-9]* "," { + fatal("illegal closure form, use '{n}', '{n,}', '{n,m}' where n and m are numbers"); + } + + "{" name "}" { + if (!opts->FFlag) { + fatal("curly braces for names only allowed with -F switch"); + } + yylval.str = new std::string (tok + 1, tok_len () - 2); // -2 to omit braces + return TOKEN_ID; + } + + "re2c:" { lex_conf (); return TOKEN_CONF; } + + name / (space+ [^=>,]) { + yylval.str = new std::string (tok, tok_len ()); + if (opts->FFlag) + { + lexer_state = LEX_FLEX_NAME; + return TOKEN_FID; + } + else + { + return TOKEN_ID; + } + } + + name / (space* [=>,]) { + yylval.str = new std::string (tok, tok_len ()); + return TOKEN_ID; + } + + name / [^] { + if (!opts->FFlag) { + yylval.str = new std::string (tok, tok_len()); + return TOKEN_ID; + } else { + RegExp *r = NULL; + const bool casing = opts->bCaseInsensitive || opts->bCaseInverted; + for (char *s = tok; s < cur; ++s) { + const uint32_t c = static_cast(*s); + r = doCat(r, casing ? ichr(c) : schr(c)); + } + yylval.regexp = r ? r : new NullOp; + return TOKEN_REGEXP; + } + } + + "." { + yylval.regexp = mkDot(); + return TOKEN_REGEXP; + } + + space+ { + goto scan; + } + + eol space* "#" space* "line" space+ / lineinf { + set_sourceline (); + goto scan; + } + + eol { + if (cur == eof) return 0; + pos = cur; + cline++; + goto scan; + } + + * { + fatalf("unexpected character: '%c'", *tok); + goto scan; + } +*/ + +flex_name: +/*!re2c + eol + { + YYCURSOR = tok; + lexer_state = LEX_NORMAL; + return TOKEN_FID_END; + } + * + { + YYCURSOR = tok; + goto start; + } +*/ + +code: +/*!re2c + "}" { + if (depth == 0) + { + fatal("Curly braces are not allowed after ':='"); + } + else if (--depth == 0) + { + yylval.code = new Code (tok, tok_len (), get_fname (), tline); + return TOKEN_CODE; + } + goto code; + } + "{" { + if (depth == 0) + { + fatal("Curly braces are not allowed after ':='"); + } + else + { + ++depth; + } + goto code; + } + "\n" space* "#" space* "line" space+ / lineinf { + set_sourceline (); + goto code; + } + "\n" / ws { + if (depth == 0) + { + goto code; + } + else if (cur == eof) + { + fatal("missing '}'"); + } + pos = cur; + cline++; + goto code; + } + "\n" { + if (depth == 0) + { + tok += strspn(tok, " \t\r\n"); + while (cur > tok && strchr(" \t\r\n", cur[-1])) + { + --cur; + } + yylval.code = new Code (tok, tok_len (), get_fname (), tline); + return TOKEN_CODE; + } + else if (cur == eof) + { + fatal("missing '}'"); + } + pos = cur; + cline++; + goto code; + } + zero { + if (cur == eof) + { + if (depth) + { + fatal("missing '}'"); + } + return 0; + } + goto code; + } + dstring | sstring { + goto code; + } + * { + goto code; + } +*/ + +comment: +/*!re2c + "*/" { + if (--depth == 0) + { + goto scan; + } + else + { + goto comment; + } + } + "/*" { + ++depth; + fatal("ambiguous /* found"); + goto comment; + } + "\n" space* "#" space* "line" space+ / lineinf { + set_sourceline (); + goto comment; + } + "\n" { + if (cur == eof) + { + return 0; + } + tok = pos = cur; + cline++; + goto comment; + } + * { + if (cur == eof) + { + return 0; + } + goto comment; + } +*/ + +nextLine: +/*!re2c /* resync emacs */ + "\n" { if(cur == eof) { + return 0; + } + tok = pos = cur; + cline++; + goto scan; + } + * { if(cur == eof) { + return 0; + } + goto nextLine; + } +*/ +} + +static void escape (std::string & dest, const std::string & src) +{ + dest = src; + size_t l = dest.length(); + for (size_t p = 0; p < l; ++p) + { + if (dest[p] == '\\') + { + dest.insert(++p, "\\"); + ++l; + } + } +} + +RegExp *Scanner::lex_cls(bool neg) +{ + Range *r = NULL, *s; + uint32_t u, l; +fst: + /*!re2c + "]" { goto end; } + "" { l = lex_cls_chr(); goto snd; } + */ +snd: + /*!re2c + "" { u = l; goto add; } + "-" / [^\]] { + u = lex_cls_chr(); + if (l > u) { + warn.swapped_range(get_line(), l, u); + std::swap(l, u); + } + goto add; + } + */ +add: + if (!(s = opts->encoding.encodeRange(l, u))) { + fatalf ("Bad code point range: '0x%X - 0x%X'", l, u); + } + r = Range::add(r, s); + goto fst; +end: + if (neg) { + r = Range::sub(opts->encoding.fullRange(), r); + } + return cls(r); +} + +uint32_t Scanner::lex_cls_chr() +{ + tok = cur; + /*!re2c + * { fatal ((tok - pos) - tchar, "syntax error"); } + esc [xXuU] { fatal ((tok - pos) - tchar, "syntax error in hexadecimal escape sequence"); } + esc [0-7] { fatal ((tok - pos) - tchar, "syntax error in octal escape sequence"); } + esc { fatal ((tok - pos) - tchar, "syntax error in escape sequence"); } + + . \ esc { return static_cast(tok[0]); } + esc_hex { return unesc_hex(tok, cur); } + esc_oct { return unesc_oct(tok, cur); } + esc "a" { return static_cast('\a'); } + esc "b" { return static_cast('\b'); } + esc "f" { return static_cast('\f'); } + esc "n" { return static_cast('\n'); } + esc "r" { return static_cast('\r'); } + esc "t" { return static_cast('\t'); } + esc "v" { return static_cast('\v'); } + esc "\\" { return static_cast('\\'); } + esc "-" { return static_cast('-'); } + esc "]" { return static_cast(']'); } + esc . { + warn.useless_escape(tline, tok - pos, tok[1]); + return static_cast(tok[1]); + } + */ +} + +uint32_t Scanner::lex_str_chr(char quote, bool &end) +{ + end = false; + tok = cur; + /*!re2c + * { fatal ((tok - pos) - tchar, "syntax error"); } + esc [xXuU] { fatal ((tok - pos) - tchar, "syntax error in hexadecimal escape sequence"); } + esc [0-7] { fatal ((tok - pos) - tchar, "syntax error in octal escape sequence"); } + esc { fatal ((tok - pos) - tchar, "syntax error in escape sequence"); } + + . \ esc { + end = tok[0] == quote; + return static_cast(tok[0]); + } + esc_hex { return unesc_hex(tok, cur); } + esc_oct { return unesc_oct(tok, cur); } + esc "a" { return static_cast('\a'); } + esc "b" { return static_cast('\b'); } + esc "f" { return static_cast('\f'); } + esc "n" { return static_cast('\n'); } + esc "r" { return static_cast('\r'); } + esc "t" { return static_cast('\t'); } + esc "v" { return static_cast('\v'); } + esc "\\" { return static_cast('\\'); } + esc . { + if (tok[1] != quote) { + warn.useless_escape(tline, tok - pos, tok[1]); + } + return static_cast(tok[1]); + } + */ +} + +RegExp *Scanner::lex_str(char quote, bool casing) +{ + RegExp *r = NULL; + for (bool end;;) { + const uint32_t c = lex_str_chr(quote, end); + if (end) { + return r ? r : new NullOp; + } + r = doCat(r, casing ? ichr(c) : schr(c)); + } +} + +void Scanner::set_sourceline () +{ +sourceline: + tok = cur; +/*!re2c + lineno { + if (!s_to_u32_unsafe (tok, cur, cline)) + { + fatal ("line number overflow"); + } + goto sourceline; + } + dstring { + escape (in.file_name, std::string (tok + 1, tok_len () - 2)); // -2 to omit quotes + goto sourceline; + } + "\n" { + if (cur == eof) + { + --cur; + } + else + { + pos = cur; + } + tok = cur; + return; + } + * { + goto sourceline; + } +*/ +} + +} // end namespace re2c diff --git a/tools/re2c/src/parse/lex_conf.cc b/tools/re2c/src/parse/lex_conf.cc new file mode 100644 index 000000000..eb0aec7fa --- /dev/null +++ b/tools/re2c/src/parse/lex_conf.cc @@ -0,0 +1,2284 @@ +/* Generated by re2c 0.16 on Thu Jan 21 10:47:47 2016 */ +#line 1 "../src/parse/lex_conf.re" +#include "src/util/c99_stdint.h" +#include + +#include "src/codegen/output.h" +#include "src/conf/opt.h" +#include "src/globals.h" +#include "src/ir/regexp/encoding/enc.h" +#include "src/parse/scanner.h" +#include "src/util/s_to_n32_unsafe.h" + +namespace re2c +{ + +// global re2c config (affects the whole file) +#line 36 "../src/parse/lex_conf.re" + + +void Scanner::lex_conf () +{ + tok = cur; + +#line 25 "src/parse/lex_conf.cc" +{ + unsigned char yych; + unsigned int yyaccept = 0; + static const unsigned char yybm[] = { + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 64, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 64, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + }; + if ((lim - cur) < 27) fill(27); + yych = (unsigned char)*cur; + switch (yych) { + case 'c': goto yy4; + case 'd': goto yy5; + case 'f': goto yy6; + case 'i': goto yy7; + case 'l': goto yy8; + case 's': goto yy9; + case 'v': goto yy10; + case 'y': goto yy11; + default: goto yy2; + } +yy2: + ++cur; +yy3: +#line 42 "../src/parse/lex_conf.re" + { fatal ((tok - pos) - tchar, "unrecognized configuration"); } +#line 81 "src/parse/lex_conf.cc" +yy4: + yyaccept = 0; + yych = (unsigned char)*(ptr = ++cur); + if (yych == 'g') goto yy12; + if (yych == 'o') goto yy14; + goto yy3; +yy5: + yyaccept = 0; + yych = (unsigned char)*(ptr = ++cur); + if (yych == 'e') goto yy15; + goto yy3; +yy6: + yyaccept = 0; + yych = (unsigned char)*(ptr = ++cur); + if (yych == 'l') goto yy16; + goto yy3; +yy7: + yyaccept = 0; + yych = (unsigned char)*(ptr = ++cur); + if (yych == 'n') goto yy17; + goto yy3; +yy8: + yyaccept = 0; + yych = (unsigned char)*(ptr = ++cur); + if (yych == 'a') goto yy18; + goto yy3; +yy9: + yyaccept = 0; + yych = (unsigned char)*(ptr = ++cur); + if (yych == 't') goto yy19; + goto yy3; +yy10: + yyaccept = 0; + yych = (unsigned char)*(ptr = ++cur); + if (yych == 'a') goto yy20; + goto yy3; +yy11: + yyaccept = 0; + yych = (unsigned char)*(ptr = ++cur); + if (yych == 'y') goto yy21; + goto yy3; +yy12: + yych = (unsigned char)*++cur; + if (yych == 'o') goto yy22; +yy13: + cur = ptr; + if (yyaccept <= 5) { + if (yyaccept <= 2) { + if (yyaccept <= 1) { + if (yyaccept == 0) { + goto yy3; + } else { + goto yy119; + } + } else { + goto yy161; + } + } else { + if (yyaccept <= 4) { + if (yyaccept == 3) { + goto yy206; + } else { + goto yy255; + } + } else { + goto yy321; + } + } + } else { + if (yyaccept <= 8) { + if (yyaccept <= 7) { + if (yyaccept == 6) { + goto yy356; + } else { + goto yy377; + } + } else { + goto yy383; + } + } else { + if (yyaccept == 9) { + goto yy420; + } else { + goto yy423; + } + } + } +yy14: + yych = (unsigned char)*++cur; + if (yych == 'n') goto yy23; + goto yy13; +yy15: + yych = (unsigned char)*++cur; + if (yych == 'f') goto yy24; + goto yy13; +yy16: + yych = (unsigned char)*++cur; + if (yych == 'a') goto yy25; + goto yy13; +yy17: + yych = (unsigned char)*++cur; + if (yych == 'd') goto yy26; + goto yy13; +yy18: + yych = (unsigned char)*++cur; + if (yych == 'b') goto yy27; + goto yy13; +yy19: + yych = (unsigned char)*++cur; + if (yych == 'a') goto yy28; + goto yy13; +yy20: + yych = (unsigned char)*++cur; + if (yych == 'r') goto yy29; + goto yy13; +yy21: + yych = (unsigned char)*++cur; + if (yych <= 'c') { + if (yych <= 'a') goto yy13; + if (yych <= 'b') goto yy30; + goto yy31; + } else { + if (yych == 'f') goto yy32; + goto yy13; + } +yy22: + yych = (unsigned char)*++cur; + if (yych == 't') goto yy33; + goto yy13; +yy23: + yych = (unsigned char)*++cur; + if (yych == 'd') goto yy34; + goto yy13; +yy24: + yych = (unsigned char)*++cur; + if (yych == 'i') goto yy35; + goto yy13; +yy25: + yych = (unsigned char)*++cur; + if (yych == 'g') goto yy36; + goto yy13; +yy26: + yych = (unsigned char)*++cur; + if (yych == 'e') goto yy37; + goto yy13; +yy27: + yych = (unsigned char)*++cur; + if (yych == 'e') goto yy38; + goto yy13; +yy28: + yych = (unsigned char)*++cur; + if (yych == 'r') goto yy39; + if (yych == 't') goto yy40; + goto yy13; +yy29: + yych = (unsigned char)*++cur; + if (yych == 'i') goto yy41; + goto yy13; +yy30: + yych = (unsigned char)*++cur; + if (yych == 'm') goto yy42; + goto yy13; +yy31: + yych = (unsigned char)*++cur; + if (yych == 'h') goto yy43; + goto yy13; +yy32: + yych = (unsigned char)*++cur; + if (yych == 'i') goto yy44; + goto yy13; +yy33: + yych = (unsigned char)*++cur; + if (yych == 'o') goto yy45; + goto yy13; +yy34: + yych = (unsigned char)*++cur; + if (yych <= 'd') { + if (yych == ':') goto yy46; + goto yy13; + } else { + if (yych <= 'e') goto yy47; + if (yych == 'p') goto yy48; + goto yy13; + } +yy35: + yych = (unsigned char)*++cur; + if (yych == 'n') goto yy49; + goto yy13; +yy36: + yych = (unsigned char)*++cur; + if (yych == 's') goto yy50; + goto yy13; +yy37: + yych = (unsigned char)*++cur; + if (yych == 'n') goto yy51; + goto yy13; +yy38: + yych = (unsigned char)*++cur; + if (yych == 'l') goto yy52; + goto yy13; +yy39: + yych = (unsigned char)*++cur; + if (yych == 't') goto yy53; + goto yy13; +yy40: + yych = (unsigned char)*++cur; + if (yych == 'e') goto yy54; + goto yy13; +yy41: + yych = (unsigned char)*++cur; + if (yych == 'a') goto yy55; + goto yy13; +yy42: + yych = (unsigned char)*++cur; + if (yych == ':') goto yy56; + goto yy13; +yy43: + yych = (unsigned char)*++cur; + if (yych == ':') goto yy57; + goto yy13; +yy44: + yych = (unsigned char)*++cur; + if (yych == 'l') goto yy58; + goto yy13; +yy45: + yych = (unsigned char)*++cur; + if (yych == ':') goto yy59; + goto yy13; +yy46: + yych = (unsigned char)*++cur; + if (yych == 'd') goto yy60; + if (yych == 'g') goto yy61; + goto yy13; +yy47: + yych = (unsigned char)*++cur; + if (yych == 'n') goto yy62; + goto yy13; +yy48: + yych = (unsigned char)*++cur; + if (yych == 'r') goto yy63; + goto yy13; +yy49: + yych = (unsigned char)*++cur; + if (yych == 'e') goto yy64; + goto yy13; +yy50: + yych = (unsigned char)*++cur; + if (yych == ':') goto yy65; + goto yy13; +yy51: + yych = (unsigned char)*++cur; + if (yych == 't') goto yy66; + goto yy13; +yy52: + yych = (unsigned char)*++cur; + if (yych == ':') goto yy67; + if (yych == 'p') goto yy68; + goto yy13; +yy53: + yych = (unsigned char)*++cur; + if (yych == 'l') goto yy69; + goto yy13; +yy54: + yych = (unsigned char)*++cur; + if (yych == ':') goto yy70; + goto yy13; +yy55: + yych = (unsigned char)*++cur; + if (yych == 'b') goto yy71; + goto yy13; +yy56: + yych = (unsigned char)*++cur; + if (yych == 'h') goto yy72; + goto yy13; +yy57: + yych = (unsigned char)*++cur; + if (yych == 'c') goto yy73; + if (yych == 'e') goto yy74; + goto yy13; +yy58: + yych = (unsigned char)*++cur; + if (yych == 'l') goto yy75; + goto yy13; +yy59: + yych = (unsigned char)*++cur; + if (yych == 't') goto yy76; + goto yy13; +yy60: + yych = (unsigned char)*++cur; + if (yych == 'i') goto yy77; + goto yy13; +yy61: + yych = (unsigned char)*++cur; + if (yych == 'o') goto yy78; + goto yy13; +yy62: + yych = (unsigned char)*++cur; + if (yych == 'u') goto yy79; + goto yy13; +yy63: + yych = (unsigned char)*++cur; + if (yych == 'e') goto yy80; + goto yy13; +yy64: + yych = (unsigned char)*++cur; + if (yych == ':') goto yy81; + goto yy13; +yy65: + yych = (unsigned char)*++cur; + if (yych <= 'e') { + if (yych == '8') goto yy82; + if (yych <= 'd') goto yy13; + goto yy82; + } else { + if (yych <= 'u') { + if (yych <= 't') goto yy13; + goto yy82; + } else { + if (yych <= 'v') goto yy13; + if (yych <= 'x') goto yy82; + goto yy13; + } + } +yy66: + yych = (unsigned char)*++cur; + if (yych == ':') goto yy84; + goto yy13; +yy67: + yych = (unsigned char)*++cur; + if (yych == 'y') goto yy85; + goto yy13; +yy68: + yych = (unsigned char)*++cur; + if (yych == 'r') goto yy86; + goto yy13; +yy69: + yych = (unsigned char)*++cur; + if (yych == 'a') goto yy87; + goto yy13; +yy70: + yych = (unsigned char)*++cur; + if (yych == 'a') goto yy88; + if (yych == 'n') goto yy89; + goto yy13; +yy71: + yych = (unsigned char)*++cur; + if (yych == 'l') goto yy90; + goto yy13; +yy72: + yych = (unsigned char)*++cur; + if (yych == 'e') goto yy91; + goto yy13; +yy73: + yych = (unsigned char)*++cur; + if (yych == 'o') goto yy92; + goto yy13; +yy74: + yych = (unsigned char)*++cur; + if (yych == 'm') goto yy93; + goto yy13; +yy75: + yych = (unsigned char)*++cur; + if (yych == ':') goto yy94; + goto yy13; +yy76: + yych = (unsigned char)*++cur; + if (yych == 'h') goto yy95; + goto yy13; +yy77: + yych = (unsigned char)*++cur; + if (yych == 'v') goto yy96; + goto yy13; +yy78: + yych = (unsigned char)*++cur; + if (yych == 't') goto yy97; + goto yy13; +yy79: + yych = (unsigned char)*++cur; + if (yych == 'm') goto yy98; + goto yy13; +yy80: + yych = (unsigned char)*++cur; + if (yych == 'f') goto yy99; + goto yy13; +yy81: + yych = (unsigned char)*++cur; + if (yych == 'Y') goto yy100; + goto yy13; +yy82: + ++cur; +#line 45 "../src/parse/lex_conf.re" + { + Enc::type_t enc = Enc::ASCII; + switch (cur[-1]) + { + case 'e': enc = Enc::EBCDIC; break; + case 'w': enc = Enc::UCS2; break; + case 'x': enc = Enc::UTF16; break; + case 'u': enc = Enc::UTF32; break; + case '8': enc = Enc::UTF8; break; + } + const int32_t n = lex_conf_number (); + if (n == 0) + { + opts.unset_encoding (enc); + } + else if (!opts.set_encoding (enc)) + { + fatalf ("Cannot set %s encoding: please reset %s encoding first" + , Enc::name (enc) + , Enc::name (opts->encoding.type ())); + } + return; + } +#line 496 "src/parse/lex_conf.cc" +yy84: + yych = (unsigned char)*++cur; + if (yych <= 'r') goto yy13; + if (yych <= 's') goto yy101; + if (yych <= 't') goto yy102; + goto yy13; +yy85: + yych = (unsigned char)*++cur; + if (yych == 'y') goto yy103; + goto yy13; +yy86: + yych = (unsigned char)*++cur; + if (yych == 'e') goto yy104; + goto yy13; +yy87: + yych = (unsigned char)*++cur; + if (yych == 'b') goto yy105; + goto yy13; +yy88: + yych = (unsigned char)*++cur; + if (yych == 'b') goto yy106; + goto yy13; +yy89: + yych = (unsigned char)*++cur; + if (yych == 'e') goto yy107; + goto yy13; +yy90: + yych = (unsigned char)*++cur; + if (yych == 'e') goto yy108; + goto yy13; +yy91: + yych = (unsigned char)*++cur; + if (yych == 'x') goto yy109; + goto yy13; +yy92: + yych = (unsigned char)*++cur; + if (yych == 'n') goto yy111; + goto yy13; +yy93: + yych = (unsigned char)*++cur; + if (yych == 'i') goto yy112; + goto yy13; +yy94: + yych = (unsigned char)*++cur; + if (yych <= 'd') { + if (yych == 'c') goto yy113; + goto yy13; + } else { + if (yych <= 'e') goto yy114; + if (yych == 'p') goto yy115; + goto yy13; + } +yy95: + yych = (unsigned char)*++cur; + if (yych == 'r') goto yy116; + goto yy13; +yy96: + yych = (unsigned char)*++cur; + if (yych == 'i') goto yy117; + goto yy13; +yy97: + yych = (unsigned char)*++cur; + if (yych == 'o') goto yy118; + goto yy13; +yy98: + yych = (unsigned char)*++cur; + if (yych == 'p') goto yy120; + goto yy13; +yy99: + yych = (unsigned char)*++cur; + if (yych == 'i') goto yy121; + goto yy13; +yy100: + yych = (unsigned char)*++cur; + if (yych == 'Y') goto yy122; + goto yy13; +yy101: + yych = (unsigned char)*++cur; + if (yych == 't') goto yy123; + goto yy13; +yy102: + yych = (unsigned char)*++cur; + if (yych == 'o') goto yy124; + goto yy13; +yy103: + yych = (unsigned char)*++cur; + if (yych == 'F') goto yy125; + if (yych == 'N') goto yy126; + goto yy13; +yy104: + yych = (unsigned char)*++cur; + if (yych == 'f') goto yy127; + goto yy13; +yy105: + yych = (unsigned char)*++cur; + if (yych == 'e') goto yy128; + goto yy13; +yy106: + yych = (unsigned char)*++cur; + if (yych == 'o') goto yy129; + goto yy13; +yy107: + yych = (unsigned char)*++cur; + if (yych == 'x') goto yy130; + goto yy13; +yy108: + yych = (unsigned char)*++cur; + if (yych == ':') goto yy131; + goto yy13; +yy109: + ++cur; +#line 95 "../src/parse/lex_conf.re" + { opts.set_yybmHexTable (lex_conf_number () != 0); return; } +#line 610 "src/parse/lex_conf.cc" +yy111: + yych = (unsigned char)*++cur; + if (yych == 'v') goto yy132; + goto yy13; +yy112: + yych = (unsigned char)*++cur; + if (yych == 't') goto yy133; + goto yy13; +yy113: + yych = (unsigned char)*++cur; + if (yych == 'h') goto yy135; + goto yy13; +yy114: + yych = (unsigned char)*++cur; + if (yych == 'n') goto yy136; + goto yy13; +yy115: + yych = (unsigned char)*++cur; + if (yych == 'a') goto yy137; + goto yy13; +yy116: + yych = (unsigned char)*++cur; + if (yych == 'e') goto yy138; + goto yy13; +yy117: + yych = (unsigned char)*++cur; + if (yych == 'd') goto yy139; + goto yy13; +yy118: + yyaccept = 1; + yych = (unsigned char)*(ptr = ++cur); + if (yych == '@') goto yy140; +yy119: +#line 79 "../src/parse/lex_conf.re" + { opts.set_condGoto (lex_conf_string ()); return; } +#line 646 "src/parse/lex_conf.cc" +yy120: + yych = (unsigned char)*++cur; + if (yych == 'r') goto yy141; + goto yy13; +yy121: + yych = (unsigned char)*++cur; + if (yych == 'x') goto yy142; + goto yy13; +yy122: + yych = (unsigned char)*++cur; + switch (yych) { + case 'B': goto yy144; + case 'C': goto yy145; + case 'D': goto yy146; + case 'F': goto yy147; + case 'G': goto yy148; + case 'L': goto yy149; + case 'M': goto yy150; + case 'P': goto yy151; + case 'R': goto yy152; + case 'S': goto yy153; + default: goto yy13; + } +yy123: + yych = (unsigned char)*++cur; + if (yych == 'r') goto yy154; + goto yy13; +yy124: + yych = (unsigned char)*++cur; + if (yych == 'p') goto yy155; + goto yy13; +yy125: + yych = (unsigned char)*++cur; + if (yych == 'i') goto yy157; + goto yy13; +yy126: + yych = (unsigned char)*++cur; + if (yych == 'e') goto yy158; + goto yy13; +yy127: + yych = (unsigned char)*++cur; + if (yych == 'i') goto yy159; + goto yy13; +yy128: + yych = (unsigned char)*++cur; + if (yych == 'l') goto yy160; + goto yy13; +yy129: + yych = (unsigned char)*++cur; + if (yych == 'r') goto yy162; + goto yy13; +yy130: + yych = (unsigned char)*++cur; + if (yych == 't') goto yy163; + goto yy13; +yy131: + yych = (unsigned char)*++cur; + if (yych == 'y') goto yy164; + goto yy13; +yy132: + yych = (unsigned char)*++cur; + if (yych == 'e') goto yy165; + goto yy13; +yy133: + ++cur; +#line 138 "../src/parse/lex_conf.re" + { opts.set_bEmitYYCh (lex_conf_number () != 0); return; } +#line 714 "src/parse/lex_conf.cc" +yy135: + yych = (unsigned char)*++cur; + if (yych == 'e') goto yy166; + goto yy13; +yy136: + yych = (unsigned char)*++cur; + if (yych == 'a') goto yy167; + goto yy13; +yy137: + yych = (unsigned char)*++cur; + if (yych == 'r') goto yy168; + goto yy13; +yy138: + yych = (unsigned char)*++cur; + if (yych == 's') goto yy169; + goto yy13; +yy139: + yych = (unsigned char)*++cur; + if (yych == 'e') goto yy170; + goto yy13; +yy140: + yych = (unsigned char)*++cur; + if (yych == 'c') goto yy171; + goto yy13; +yy141: + yych = (unsigned char)*++cur; + if (yych == 'e') goto yy172; + goto yy13; +yy142: + ++cur; +#line 75 "../src/parse/lex_conf.re" + { opts.set_condPrefix (lex_conf_string ()); return; } +#line 747 "src/parse/lex_conf.cc" +yy144: + yych = (unsigned char)*++cur; + if (yych == 'A') goto yy173; + goto yy13; +yy145: + yych = (unsigned char)*++cur; + if (yych <= 'S') { + if (yych == 'O') goto yy174; + goto yy13; + } else { + if (yych <= 'T') goto yy175; + if (yych <= 'U') goto yy176; + goto yy13; + } +yy146: + yych = (unsigned char)*++cur; + if (yych == 'E') goto yy177; + goto yy13; +yy147: + yych = (unsigned char)*++cur; + if (yych == 'I') goto yy178; + goto yy13; +yy148: + yych = (unsigned char)*++cur; + if (yych == 'E') goto yy179; + goto yy13; +yy149: + yych = (unsigned char)*++cur; + if (yych == 'E') goto yy180; + if (yych == 'I') goto yy181; + goto yy13; +yy150: + yych = (unsigned char)*++cur; + if (yych == 'A') goto yy182; + goto yy13; +yy151: + yych = (unsigned char)*++cur; + if (yych == 'E') goto yy183; + goto yy13; +yy152: + yych = (unsigned char)*++cur; + if (yych == 'E') goto yy184; + goto yy13; +yy153: + yych = (unsigned char)*++cur; + if (yych == 'E') goto yy185; + if (yych == 'K') goto yy186; + goto yy13; +yy154: + yych = (unsigned char)*++cur; + if (yych == 'i') goto yy187; + goto yy13; +yy155: + ++cur; +#line 123 "../src/parse/lex_conf.re" + { + const int32_t n = lex_conf_number (); + if (n < 0) + { + fatal ("configuration 'indent:top' must be nonnegative"); + } + opts.set_topIndent (static_cast (n)); + return; + } +#line 812 "src/parse/lex_conf.cc" +yy157: + yych = (unsigned char)*++cur; + if (yych == 'l') goto yy188; + goto yy13; +yy158: + yych = (unsigned char)*++cur; + if (yych == 'x') goto yy189; + goto yy13; +yy159: + yych = (unsigned char)*++cur; + if (yych == 'x') goto yy190; + goto yy13; +yy160: + yyaccept = 2; + yych = (unsigned char)*(ptr = ++cur); + ctx = cur; + if (yybm[0+yych] & 64) { + goto yy192; + } + if (yych == '=') goto yy194; +yy161: +#line 151 "../src/parse/lex_conf.re" + { out.set_user_start_label (lex_conf_string ()); return; } +#line 836 "src/parse/lex_conf.cc" +yy162: + yych = (unsigned char)*++cur; + if (yych == 't') goto yy196; + goto yy13; +yy163: + yych = (unsigned char)*++cur; + if (yych == 'l') goto yy198; + goto yy13; +yy164: + yych = (unsigned char)*++cur; + if (yych == 'y') goto yy199; + goto yy13; +yy165: + yych = (unsigned char)*++cur; + if (yych == 'r') goto yy200; + goto yy13; +yy166: + yych = (unsigned char)*++cur; + if (yych == 'c') goto yy201; + goto yy13; +yy167: + yych = (unsigned char)*++cur; + if (yych == 'b') goto yy202; + goto yy13; +yy168: + yych = (unsigned char)*++cur; + if (yych == 'a') goto yy203; + goto yy13; +yy169: + yych = (unsigned char)*++cur; + if (yych == 'h') goto yy204; + goto yy13; +yy170: + yych = (unsigned char)*++cur; + if (yych == 'r') goto yy205; + goto yy13; +yy171: + yych = (unsigned char)*++cur; + if (yych == 'o') goto yy207; + goto yy13; +yy172: + yych = (unsigned char)*++cur; + if (yych == 'f') goto yy208; + goto yy13; +yy173: + yych = (unsigned char)*++cur; + if (yych == 'C') goto yy209; + goto yy13; +yy174: + yych = (unsigned char)*++cur; + if (yych == 'N') goto yy210; + goto yy13; +yy175: + yych = (unsigned char)*++cur; + if (yych <= 'W') goto yy13; + if (yych <= 'X') goto yy211; + if (yych <= 'Y') goto yy212; + goto yy13; +yy176: + yych = (unsigned char)*++cur; + if (yych == 'R') goto yy213; + goto yy13; +yy177: + yych = (unsigned char)*++cur; + if (yych == 'B') goto yy214; + goto yy13; +yy178: + yych = (unsigned char)*++cur; + if (yych == 'L') goto yy215; + goto yy13; +yy179: + yych = (unsigned char)*++cur; + if (yych == 'T') goto yy216; + goto yy13; +yy180: + yych = (unsigned char)*++cur; + if (yych == 'S') goto yy217; + goto yy13; +yy181: + yych = (unsigned char)*++cur; + if (yych == 'M') goto yy218; + goto yy13; +yy182: + yych = (unsigned char)*++cur; + if (yych == 'R') goto yy219; + goto yy13; +yy183: + yych = (unsigned char)*++cur; + if (yych == 'E') goto yy220; + goto yy13; +yy184: + yych = (unsigned char)*++cur; + if (yych == 'S') goto yy221; + goto yy13; +yy185: + yych = (unsigned char)*++cur; + if (yych == 'T') goto yy222; + goto yy13; +yy186: + yych = (unsigned char)*++cur; + if (yych == 'I') goto yy223; + goto yy13; +yy187: + yych = (unsigned char)*++cur; + if (yych == 'n') goto yy224; + goto yy13; +yy188: + yych = (unsigned char)*++cur; + if (yych == 'l') goto yy225; + goto yy13; +yy189: + yych = (unsigned char)*++cur; + if (yych == 't') goto yy226; + goto yy13; +yy190: + ++cur; +#line 147 "../src/parse/lex_conf.re" + { opts.set_labelPrefix (lex_conf_string ()); return; } +#line 955 "src/parse/lex_conf.cc" +yy192: + ++cur; + if (lim <= cur) fill(1); + yych = (unsigned char)*cur; + if (yybm[0+yych] & 64) { + goto yy192; + } + if (yych != '=') goto yy13; +yy194: + ++cur; + if ((lim - cur) < 2) fill(2); + yych = (unsigned char)*cur; + if (yych <= ',') { + if (yych <= '\t') { + if (yych <= 0x08) goto yy13; + goto yy194; + } else { + if (yych == ' ') goto yy194; + goto yy13; + } + } else { + if (yych <= '/') { + if (yych <= '-') goto yy228; + goto yy13; + } else { + if (yych <= '0') goto yy229; + if (yych <= '9') goto yy231; + goto yy13; + } + } +yy196: + ++cur; +#line 90 "../src/parse/lex_conf.re" + { opts.set_bUseStateAbort (lex_conf_number () != 0); return; } +#line 990 "src/parse/lex_conf.cc" +yy198: + yych = (unsigned char)*++cur; + if (yych == 'a') goto yy233; + goto yy13; +yy199: + yych = (unsigned char)*++cur; + if (yych <= 'c') { + if (yych <= '`') goto yy13; + if (yych <= 'a') goto yy234; + if (yych <= 'b') goto yy235; + goto yy236; + } else { + if (yych <= 'r') goto yy13; + if (yych <= 's') goto yy237; + if (yych <= 't') goto yy238; + goto yy13; + } +yy200: + yych = (unsigned char)*++cur; + if (yych == 's') goto yy239; + goto yy13; +yy201: + yych = (unsigned char)*++cur; + if (yych == 'k') goto yy240; + goto yy13; +yy202: + yych = (unsigned char)*++cur; + if (yych == 'l') goto yy242; + goto yy13; +yy203: + yych = (unsigned char)*++cur; + if (yych == 'm') goto yy243; + goto yy13; +yy204: + yych = (unsigned char)*++cur; + if (yych == 'o') goto yy244; + goto yy13; +yy205: + yyaccept = 3; + yych = (unsigned char)*(ptr = ++cur); + if (yych == '@') goto yy245; +yy206: +#line 77 "../src/parse/lex_conf.re" + { opts.set_condDivider (lex_conf_string ()); return; } +#line 1035 "src/parse/lex_conf.cc" +yy207: + yych = (unsigned char)*++cur; + if (yych == 'n') goto yy246; + goto yy13; +yy208: + yych = (unsigned char)*++cur; + if (yych == 'i') goto yy247; + goto yy13; +yy209: + yych = (unsigned char)*++cur; + if (yych == 'K') goto yy248; + goto yy13; +yy210: + yych = (unsigned char)*++cur; + if (yych == 'D') goto yy249; + goto yy13; +yy211: + yych = (unsigned char)*++cur; + if (yych == 'M') goto yy250; + goto yy13; +yy212: + yych = (unsigned char)*++cur; + if (yych == 'P') goto yy251; + goto yy13; +yy213: + yych = (unsigned char)*++cur; + if (yych == 'S') goto yy252; + goto yy13; +yy214: + yych = (unsigned char)*++cur; + if (yych == 'U') goto yy253; + goto yy13; +yy215: + yych = (unsigned char)*++cur; + if (yych == 'L') goto yy254; + goto yy13; +yy216: + yych = (unsigned char)*++cur; + if (yych == 'C') goto yy256; + if (yych == 'S') goto yy257; + goto yy13; +yy217: + yych = (unsigned char)*++cur; + if (yych == 'S') goto yy258; + goto yy13; +yy218: + yych = (unsigned char)*++cur; + if (yych == 'I') goto yy259; + goto yy13; +yy219: + yych = (unsigned char)*++cur; + if (yych == 'K') goto yy260; + goto yy13; +yy220: + yych = (unsigned char)*++cur; + if (yych == 'K') goto yy261; + goto yy13; +yy221: + yych = (unsigned char)*++cur; + if (yych == 'T') goto yy263; + goto yy13; +yy222: + yych = (unsigned char)*++cur; + if (yych == 'C') goto yy264; + if (yych == 'S') goto yy265; + goto yy13; +yy223: + yych = (unsigned char)*++cur; + if (yych == 'P') goto yy266; + goto yy13; +yy224: + yych = (unsigned char)*++cur; + if (yych == 'g') goto yy268; + goto yy13; +yy225: + yych = (unsigned char)*++cur; + if (yych == 'L') goto yy270; + goto yy13; +yy226: + ++cur; +#line 89 "../src/parse/lex_conf.re" + { opts.set_yynext (lex_conf_string ()); return; } +#line 1118 "src/parse/lex_conf.cc" +yy228: + yych = (unsigned char)*++cur; + if (yych <= '0') goto yy13; + if (yych <= '9') goto yy231; + goto yy13; +yy229: + ++cur; +yy230: + cur = ctx; +#line 150 "../src/parse/lex_conf.re" + { out.set_force_start_label (lex_conf_number () != 0); return; } +#line 1130 "src/parse/lex_conf.cc" +yy231: + ++cur; + if (lim <= cur) fill(1); + yych = (unsigned char)*cur; + if (yybm[0+yych] & 128) { + goto yy231; + } + goto yy230; +yy233: + yych = (unsigned char)*++cur; + if (yych == 'b') goto yy271; + goto yy13; +yy234: + yych = (unsigned char)*++cur; + if (yych == 'c') goto yy272; + goto yy13; +yy235: + yych = (unsigned char)*++cur; + if (yych == 'm') goto yy273; + goto yy13; +yy236: + yych = (unsigned char)*++cur; + if (yych == 'h') goto yy275; + if (yych == 't') goto yy277; + goto yy13; +yy237: + yych = (unsigned char)*++cur; + if (yych == 't') goto yy278; + goto yy13; +yy238: + yych = (unsigned char)*++cur; + if (yych == 'a') goto yy279; + goto yy13; +yy239: + yych = (unsigned char)*++cur; + if (yych == 'i') goto yy280; + goto yy13; +yy240: + ++cur; +#line 145 "../src/parse/lex_conf.re" + { opts.set_fill_check (lex_conf_number () != 0); return; } +#line 1172 "src/parse/lex_conf.cc" +yy242: + yych = (unsigned char)*++cur; + if (yych == 'e') goto yy281; + goto yy13; +yy243: + yych = (unsigned char)*++cur; + if (yych == 'e') goto yy283; + goto yy13; +yy244: + yych = (unsigned char)*++cur; + if (yych == 'l') goto yy284; + goto yy13; +yy245: + yych = (unsigned char)*++cur; + if (yych == 'c') goto yy285; + goto yy13; +yy246: + yych = (unsigned char)*++cur; + if (yych == 'd') goto yy286; + goto yy13; +yy247: + yych = (unsigned char)*++cur; + if (yych == 'x') goto yy288; + goto yy13; +yy248: + yych = (unsigned char)*++cur; + if (yych == 'U') goto yy290; + goto yy13; +yy249: + yych = (unsigned char)*++cur; + if (yych == 'T') goto yy291; + goto yy13; +yy250: + yych = (unsigned char)*++cur; + if (yych == 'A') goto yy292; + goto yy13; +yy251: + yych = (unsigned char)*++cur; + if (yych == 'E') goto yy293; + goto yy13; +yy252: + yych = (unsigned char)*++cur; + if (yych == 'O') goto yy295; + goto yy13; +yy253: + yych = (unsigned char)*++cur; + if (yych == 'G') goto yy296; + goto yy13; +yy254: + yyaccept = 4; + yych = (unsigned char)*(ptr = ++cur); + if (yych == ':') goto yy298; + if (yych == '@') goto yy299; +yy255: +#line 140 "../src/parse/lex_conf.re" + { opts.set_fill (lex_conf_string ()); return; } +#line 1229 "src/parse/lex_conf.cc" +yy256: + yych = (unsigned char)*++cur; + if (yych == 'O') goto yy300; + goto yy13; +yy257: + yych = (unsigned char)*++cur; + if (yych == 'T') goto yy301; + goto yy13; +yy258: + yych = (unsigned char)*++cur; + if (yych == 'T') goto yy302; + goto yy13; +yy259: + yych = (unsigned char)*++cur; + if (yych == 'T') goto yy303; + goto yy13; +yy260: + yych = (unsigned char)*++cur; + if (yych == 'E') goto yy305; + goto yy13; +yy261: + ++cur; +#line 113 "../src/parse/lex_conf.re" + { opts.set_yypeek (lex_conf_string ()); return; } +#line 1254 "src/parse/lex_conf.cc" +yy263: + yych = (unsigned char)*++cur; + if (yych == 'O') goto yy306; + goto yy13; +yy264: + yych = (unsigned char)*++cur; + if (yych == 'O') goto yy307; + goto yy13; +yy265: + yych = (unsigned char)*++cur; + if (yych == 'T') goto yy308; + goto yy13; +yy266: + ++cur; +#line 114 "../src/parse/lex_conf.re" + { opts.set_yyskip (lex_conf_string ()); return; } +#line 1271 "src/parse/lex_conf.cc" +yy268: + ++cur; +#line 121 "../src/parse/lex_conf.re" + { opts.set_indString (lex_conf_string ()); return; } +#line 1276 "src/parse/lex_conf.cc" +yy270: + yych = (unsigned char)*++cur; + if (yych == 'a') goto yy309; + goto yy13; +yy271: + yych = (unsigned char)*++cur; + if (yych == 'e') goto yy310; + goto yy13; +yy272: + yych = (unsigned char)*++cur; + if (yych == 'c') goto yy311; + goto yy13; +yy273: + ++cur; +#line 94 "../src/parse/lex_conf.re" + { opts.set_yybm (lex_conf_string ()); return; } +#line 1293 "src/parse/lex_conf.cc" +yy275: + ++cur; +#line 136 "../src/parse/lex_conf.re" + { opts.set_yych (lex_conf_string ()); return; } +#line 1298 "src/parse/lex_conf.cc" +yy277: + yych = (unsigned char)*++cur; + if (yych == 'a') goto yy312; + goto yy13; +yy278: + yych = (unsigned char)*++cur; + if (yych == 'a') goto yy313; + goto yy13; +yy279: + yych = (unsigned char)*++cur; + if (yych == 'r') goto yy314; + goto yy13; +yy280: + yych = (unsigned char)*++cur; + if (yych == 'o') goto yy315; + goto yy13; +yy281: + ++cur; +#line 141 "../src/parse/lex_conf.re" + { opts.set_fill_use (lex_conf_number () != 0); return; } +#line 1319 "src/parse/lex_conf.cc" +yy283: + yych = (unsigned char)*++cur; + if (yych == 't') goto yy316; + goto yy13; +yy284: + yych = (unsigned char)*++cur; + if (yych == 'd') goto yy317; + goto yy13; +yy285: + yych = (unsigned char)*++cur; + if (yych == 'o') goto yy319; + goto yy13; +yy286: + ++cur; +#line 80 "../src/parse/lex_conf.re" + { opts.set_condGotoParam (lex_conf_string ()); return; } +#line 1336 "src/parse/lex_conf.cc" +yy288: + ++cur; +#line 76 "../src/parse/lex_conf.re" + { opts.set_condEnumPrefix (lex_conf_string ()); return; } +#line 1341 "src/parse/lex_conf.cc" +yy290: + yych = (unsigned char)*++cur; + if (yych == 'P') goto yy320; + goto yy13; +yy291: + yych = (unsigned char)*++cur; + if (yych == 'Y') goto yy322; + goto yy13; +yy292: + yych = (unsigned char)*++cur; + if (yych == 'R') goto yy323; + goto yy13; +yy293: + ++cur; +#line 135 "../src/parse/lex_conf.re" + { opts.set_yyctype (lex_conf_string ()); return; } +#line 1358 "src/parse/lex_conf.cc" +yy295: + yych = (unsigned char)*++cur; + if (yych == 'R') goto yy324; + goto yy13; +yy296: + ++cur; +#line 133 "../src/parse/lex_conf.re" + { opts.set_yydebug (lex_conf_string ()); return; } +#line 1367 "src/parse/lex_conf.cc" +yy298: + yych = (unsigned char)*++cur; + if (yych == 'n') goto yy326; + goto yy13; +yy299: + yych = (unsigned char)*++cur; + if (yych == 'l') goto yy327; + goto yy13; +yy300: + yych = (unsigned char)*++cur; + if (yych == 'N') goto yy328; + goto yy13; +yy301: + yych = (unsigned char)*++cur; + if (yych == 'A') goto yy329; + goto yy13; +yy302: + yych = (unsigned char)*++cur; + if (yych == 'H') goto yy330; + goto yy13; +yy303: + ++cur; +#line 111 "../src/parse/lex_conf.re" + { opts.set_yylimit (lex_conf_string ()); return; } +#line 1392 "src/parse/lex_conf.cc" +yy305: + yych = (unsigned char)*++cur; + if (yych == 'R') goto yy331; + goto yy13; +yy306: + yych = (unsigned char)*++cur; + if (yych == 'R') goto yy333; + goto yy13; +yy307: + yych = (unsigned char)*++cur; + if (yych == 'N') goto yy334; + goto yy13; +yy308: + yych = (unsigned char)*++cur; + if (yych == 'A') goto yy335; + goto yy13; +yy309: + yych = (unsigned char)*++cur; + if (yych == 'b') goto yy336; + goto yy13; +yy310: + yych = (unsigned char)*++cur; + if (yych == 'l') goto yy337; + goto yy13; +yy311: + yych = (unsigned char)*++cur; + if (yych == 'e') goto yy339; + goto yy13; +yy312: + yych = (unsigned char)*++cur; + if (yych == 'b') goto yy340; + goto yy13; +yy313: + yych = (unsigned char)*++cur; + if (yych == 'b') goto yy341; + goto yy13; +yy314: + yych = (unsigned char)*++cur; + if (yych == 'g') goto yy342; + goto yy13; +yy315: + yych = (unsigned char)*++cur; + if (yych == 'n') goto yy343; + goto yy13; +yy316: + yych = (unsigned char)*++cur; + if (yych == 'e') goto yy345; + goto yy13; +yy317: + ++cur; +#line 97 "../src/parse/lex_conf.re" + { + const int32_t n = lex_conf_number (); + if (n < 0) + { + fatal ("configuration 'cgoto:threshold' must be nonnegative"); + } + opts.set_cGotoThreshold (static_cast (n)); + return; + } +#line 1453 "src/parse/lex_conf.cc" +yy319: + yych = (unsigned char)*++cur; + if (yych == 'n') goto yy346; + goto yy13; +yy320: + yyaccept = 5; + yych = (unsigned char)*(ptr = ++cur); + if (yych == 'C') goto yy347; +yy321: +#line 115 "../src/parse/lex_conf.re" + { opts.set_yybackup (lex_conf_string ()); return; } +#line 1465 "src/parse/lex_conf.cc" +yy322: + yych = (unsigned char)*++cur; + if (yych == 'P') goto yy348; + goto yy13; +yy323: + yych = (unsigned char)*++cur; + if (yych == 'K') goto yy349; + goto yy13; +yy324: + ++cur; +#line 108 "../src/parse/lex_conf.re" + { opts.set_yycursor (lex_conf_string ()); return; } +#line 1478 "src/parse/lex_conf.cc" +yy326: + yych = (unsigned char)*++cur; + if (yych == 'a') goto yy350; + goto yy13; +yy327: + yych = (unsigned char)*++cur; + if (yych == 'e') goto yy351; + goto yy13; +yy328: + yych = (unsigned char)*++cur; + if (yych == 'D') goto yy352; + goto yy13; +yy329: + yych = (unsigned char)*++cur; + if (yych == 'T') goto yy353; + goto yy13; +yy330: + yych = (unsigned char)*++cur; + if (yych == 'A') goto yy354; + goto yy13; +yy331: + ++cur; +#line 109 "../src/parse/lex_conf.re" + { opts.set_yymarker (lex_conf_string ()); return; } +#line 1503 "src/parse/lex_conf.cc" +yy333: + yych = (unsigned char)*++cur; + if (yych == 'E') goto yy355; + goto yy13; +yy334: + yych = (unsigned char)*++cur; + if (yych == 'D') goto yy357; + goto yy13; +yy335: + yych = (unsigned char)*++cur; + if (yych == 'T') goto yy358; + goto yy13; +yy336: + yych = (unsigned char)*++cur; + if (yych == 'e') goto yy359; + goto yy13; +yy337: + ++cur; +#line 91 "../src/parse/lex_conf.re" + { opts.set_bUseStateNext (lex_conf_number () != 0); return; } +#line 1524 "src/parse/lex_conf.cc" +yy339: + yych = (unsigned char)*++cur; + if (yych == 'p') goto yy360; + goto yy13; +yy340: + yych = (unsigned char)*++cur; + if (yych == 'l') goto yy361; + goto yy13; +yy341: + yych = (unsigned char)*++cur; + if (yych == 'l') goto yy362; + goto yy13; +yy342: + yych = (unsigned char)*++cur; + if (yych == 'e') goto yy363; + goto yy13; +yy343: + ++cur; +#line 137 "../src/parse/lex_conf.re" + { opts.set_yychConversion (lex_conf_number () != 0); return; } +#line 1545 "src/parse/lex_conf.cc" +yy345: + yych = (unsigned char)*++cur; + if (yych == 'r') goto yy364; + goto yy13; +yy346: + yych = (unsigned char)*++cur; + if (yych == 'd') goto yy366; + goto yy13; +yy347: + yych = (unsigned char)*++cur; + if (yych == 'T') goto yy368; + goto yy13; +yy348: + yych = (unsigned char)*++cur; + if (yych == 'E') goto yy369; + goto yy13; +yy349: + yych = (unsigned char)*++cur; + if (yych == 'E') goto yy371; + goto yy13; +yy350: + yych = (unsigned char)*++cur; + if (yych == 'k') goto yy372; + goto yy13; +yy351: + yych = (unsigned char)*++cur; + if (yych == 'n') goto yy373; + goto yy13; +yy352: + yych = (unsigned char)*++cur; + if (yych == 'I') goto yy375; + goto yy13; +yy353: + yych = (unsigned char)*++cur; + if (yych == 'E') goto yy376; + goto yy13; +yy354: + yych = (unsigned char)*++cur; + if (yych == 'N') goto yy378; + goto yy13; +yy355: + yyaccept = 6; + yych = (unsigned char)*(ptr = ++cur); + if (yych == 'C') goto yy380; +yy356: +#line 117 "../src/parse/lex_conf.re" + { opts.set_yyrestore (lex_conf_string ()); return; } +#line 1593 "src/parse/lex_conf.cc" +yy357: + yych = (unsigned char)*++cur; + if (yych == 'I') goto yy381; + goto yy13; +yy358: + yych = (unsigned char)*++cur; + if (yych == 'E') goto yy382; + goto yy13; +yy359: + yych = (unsigned char)*++cur; + if (yych == 'l') goto yy384; + goto yy13; +yy360: + yych = (unsigned char)*++cur; + if (yych == 't') goto yy386; + goto yy13; +yy361: + yych = (unsigned char)*++cur; + if (yych == 'e') goto yy388; + goto yy13; +yy362: + yych = (unsigned char)*++cur; + if (yych == 'e') goto yy390; + goto yy13; +yy363: + yych = (unsigned char)*++cur; + if (yych == 't') goto yy392; + goto yy13; +yy364: + ++cur; +#line 143 "../src/parse/lex_conf.re" + { opts.set_fill_arg_use (lex_conf_number () != 0); return; } +#line 1626 "src/parse/lex_conf.cc" +yy366: + ++cur; +#line 78 "../src/parse/lex_conf.re" + { opts.set_condDividerParam (lex_conf_string ()); return; } +#line 1631 "src/parse/lex_conf.cc" +yy368: + yych = (unsigned char)*++cur; + if (yych == 'X') goto yy394; + goto yy13; +yy369: + ++cur; +#line 69 "../src/parse/lex_conf.re" + { opts.set_yycondtype (lex_conf_string ()); return; } +#line 1640 "src/parse/lex_conf.cc" +yy371: + yych = (unsigned char)*++cur; + if (yych == 'R') goto yy396; + goto yy13; +yy372: + yych = (unsigned char)*++cur; + if (yych == 'e') goto yy398; + goto yy13; +yy373: + ++cur; +#line 142 "../src/parse/lex_conf.re" + { opts.set_fill_arg (lex_conf_string ()); return; } +#line 1653 "src/parse/lex_conf.cc" +yy375: + yych = (unsigned char)*++cur; + if (yych == 'T') goto yy399; + goto yy13; +yy376: + yyaccept = 7; + yych = (unsigned char)*(ptr = ++cur); + if (yych == ':') goto yy400; +yy377: +#line 83 "../src/parse/lex_conf.re" + { opts.set_state_get (lex_conf_string ()); return; } +#line 1665 "src/parse/lex_conf.cc" +yy378: + ++cur; +#line 119 "../src/parse/lex_conf.re" + { opts.set_yylessthan (lex_conf_string ()); return; } +#line 1670 "src/parse/lex_conf.cc" +yy380: + yych = (unsigned char)*++cur; + if (yych == 'T') goto yy401; + goto yy13; +yy381: + yych = (unsigned char)*++cur; + if (yych == 'T') goto yy402; + goto yy13; +yy382: + yyaccept = 8; + yych = (unsigned char)*(ptr = ++cur); + if (yych == ':') goto yy403; + if (yych == '@') goto yy404; +yy383: +#line 85 "../src/parse/lex_conf.re" + { opts.set_state_set (lex_conf_string ()); return; } +#line 1687 "src/parse/lex_conf.cc" +yy384: + ++cur; +#line 88 "../src/parse/lex_conf.re" + { opts.set_yyfilllabel (lex_conf_string ()); return; } +#line 1692 "src/parse/lex_conf.cc" +yy386: + ++cur; +#line 92 "../src/parse/lex_conf.re" + { opts.set_yyaccept (lex_conf_string ()); return; } +#line 1697 "src/parse/lex_conf.cc" +yy388: + ++cur; +#line 81 "../src/parse/lex_conf.re" + { opts.set_yyctable (lex_conf_string ()); return; } +#line 1702 "src/parse/lex_conf.cc" +yy390: + ++cur; +#line 154 "../src/parse/lex_conf.re" + { lex_conf_string (); return; } +#line 1707 "src/parse/lex_conf.cc" +yy392: + ++cur; +#line 106 "../src/parse/lex_conf.re" + { opts.set_yytarget (lex_conf_string ()); return; } +#line 1712 "src/parse/lex_conf.cc" +yy394: + ++cur; +#line 116 "../src/parse/lex_conf.re" + { opts.set_yybackupctx (lex_conf_string ()); return; } +#line 1717 "src/parse/lex_conf.cc" +yy396: + ++cur; +#line 110 "../src/parse/lex_conf.re" + { opts.set_yyctxmarker (lex_conf_string ()); return; } +#line 1722 "src/parse/lex_conf.cc" +yy398: + yych = (unsigned char)*++cur; + if (yych == 'd') goto yy405; + goto yy13; +yy399: + yych = (unsigned char)*++cur; + if (yych == 'I') goto yy407; + goto yy13; +yy400: + yych = (unsigned char)*++cur; + if (yych == 'n') goto yy408; + goto yy13; +yy401: + yych = (unsigned char)*++cur; + if (yych == 'X') goto yy409; + goto yy13; +yy402: + yych = (unsigned char)*++cur; + if (yych == 'I') goto yy411; + goto yy13; +yy403: + yych = (unsigned char)*++cur; + if (yych == 'n') goto yy412; + goto yy13; +yy404: + yych = (unsigned char)*++cur; + if (yych == 's') goto yy413; + goto yy13; +yy405: + ++cur; +#line 144 "../src/parse/lex_conf.re" + { opts.set_fill_naked (lex_conf_number () != 0); return; } +#line 1755 "src/parse/lex_conf.cc" +yy407: + yych = (unsigned char)*++cur; + if (yych == 'O') goto yy414; + goto yy13; +yy408: + yych = (unsigned char)*++cur; + if (yych == 'a') goto yy415; + goto yy13; +yy409: + ++cur; +#line 118 "../src/parse/lex_conf.re" + { opts.set_yyrestorectx (lex_conf_string ()); return; } +#line 1768 "src/parse/lex_conf.cc" +yy411: + yych = (unsigned char)*++cur; + if (yych == 'O') goto yy416; + goto yy13; +yy412: + yych = (unsigned char)*++cur; + if (yych == 'a') goto yy417; + goto yy13; +yy413: + yych = (unsigned char)*++cur; + if (yych == 't') goto yy418; + goto yy13; +yy414: + yych = (unsigned char)*++cur; + if (yych == 'N') goto yy419; + goto yy13; +yy415: + yych = (unsigned char)*++cur; + if (yych == 'k') goto yy421; + goto yy13; +yy416: + yych = (unsigned char)*++cur; + if (yych == 'N') goto yy422; + goto yy13; +yy417: + yych = (unsigned char)*++cur; + if (yych == 'k') goto yy424; + goto yy13; +yy418: + yych = (unsigned char)*++cur; + if (yych == 'a') goto yy425; + goto yy13; +yy419: + yyaccept = 9; + yych = (unsigned char)*(ptr = ++cur); + if (yych == ':') goto yy426; +yy420: +#line 70 "../src/parse/lex_conf.re" + { opts.set_cond_get (lex_conf_string ()); return; } +#line 1808 "src/parse/lex_conf.cc" +yy421: + yych = (unsigned char)*++cur; + if (yych == 'e') goto yy427; + goto yy13; +yy422: + yyaccept = 10; + yych = (unsigned char)*(ptr = ++cur); + if (yych == ':') goto yy428; + if (yych == '@') goto yy429; +yy423: +#line 72 "../src/parse/lex_conf.re" + { opts.set_cond_set (lex_conf_string ()); return; } +#line 1821 "src/parse/lex_conf.cc" +yy424: + yych = (unsigned char)*++cur; + if (yych == 'e') goto yy430; + goto yy13; +yy425: + yych = (unsigned char)*++cur; + if (yych == 't') goto yy431; + goto yy13; +yy426: + yych = (unsigned char)*++cur; + if (yych == 'n') goto yy432; + goto yy13; +yy427: + yych = (unsigned char)*++cur; + if (yych == 'd') goto yy433; + goto yy13; +yy428: + yych = (unsigned char)*++cur; + if (yych == 'n') goto yy435; + goto yy13; +yy429: + yych = (unsigned char)*++cur; + if (yych == 'c') goto yy436; + goto yy13; +yy430: + yych = (unsigned char)*++cur; + if (yych == 'd') goto yy437; + goto yy13; +yy431: + yych = (unsigned char)*++cur; + if (yych == 'e') goto yy439; + goto yy13; +yy432: + yych = (unsigned char)*++cur; + if (yych == 'a') goto yy441; + goto yy13; +yy433: + ++cur; +#line 84 "../src/parse/lex_conf.re" + { opts.set_state_get_naked (lex_conf_number () != 0); return; } +#line 1862 "src/parse/lex_conf.cc" +yy435: + yych = (unsigned char)*++cur; + if (yych == 'a') goto yy442; + goto yy13; +yy436: + yych = (unsigned char)*++cur; + if (yych == 'o') goto yy443; + goto yy13; +yy437: + ++cur; +#line 86 "../src/parse/lex_conf.re" + { opts.set_state_set_naked (lex_conf_number () != 0); return; } +#line 1875 "src/parse/lex_conf.cc" +yy439: + ++cur; +#line 87 "../src/parse/lex_conf.re" + { opts.set_state_set_arg (lex_conf_string ()); return; } +#line 1880 "src/parse/lex_conf.cc" +yy441: + yych = (unsigned char)*++cur; + if (yych == 'k') goto yy444; + goto yy13; +yy442: + yych = (unsigned char)*++cur; + if (yych == 'k') goto yy445; + goto yy13; +yy443: + yych = (unsigned char)*++cur; + if (yych == 'n') goto yy446; + goto yy13; +yy444: + yych = (unsigned char)*++cur; + if (yych == 'e') goto yy447; + goto yy13; +yy445: + yych = (unsigned char)*++cur; + if (yych == 'e') goto yy448; + goto yy13; +yy446: + yych = (unsigned char)*++cur; + if (yych == 'd') goto yy449; + goto yy13; +yy447: + yych = (unsigned char)*++cur; + if (yych == 'd') goto yy451; + goto yy13; +yy448: + yych = (unsigned char)*++cur; + if (yych == 'd') goto yy453; + goto yy13; +yy449: + ++cur; +#line 73 "../src/parse/lex_conf.re" + { opts.set_cond_set_arg (lex_conf_string ()); return; } +#line 1917 "src/parse/lex_conf.cc" +yy451: + ++cur; +#line 71 "../src/parse/lex_conf.re" + { opts.set_cond_get_naked (lex_conf_number () != 0); return; } +#line 1922 "src/parse/lex_conf.cc" +yy453: + ++cur; +#line 74 "../src/parse/lex_conf.re" + { opts.set_cond_set_naked (lex_conf_number () != 0); return; } +#line 1927 "src/parse/lex_conf.cc" +} +#line 155 "../src/parse/lex_conf.re" + +} + +void Scanner::lex_conf_assign () +{ + +#line 1936 "src/parse/lex_conf.cc" +{ + unsigned char yych; + static const unsigned char yybm[] = { + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 128, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 128, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + }; + if ((lim - cur) < 2) fill(2); + yych = (unsigned char)*cur; + if (yych <= 0x1F) { + if (yych == '\t') goto yy459; + } else { + if (yych <= ' ') goto yy459; + if (yych == '=') goto yy460; + } + ++cur; +yy458: +#line 161 "../src/parse/lex_conf.re" + { fatal ("missing '=' in configuration"); } +#line 1985 "src/parse/lex_conf.cc" +yy459: + yych = (unsigned char)*(ptr = ++cur); + if (yych <= 0x1F) { + if (yych == '\t') goto yy463; + goto yy458; + } else { + if (yych <= ' ') goto yy463; + if (yych != '=') goto yy458; + } +yy460: + ++cur; + if (lim <= cur) fill(1); + yych = (unsigned char)*cur; + if (yybm[0+yych] & 128) { + goto yy460; + } +#line 162 "../src/parse/lex_conf.re" + { return; } +#line 2004 "src/parse/lex_conf.cc" +yy463: + ++cur; + if (lim <= cur) fill(1); + yych = (unsigned char)*cur; + if (yych <= 0x1F) { + if (yych == '\t') goto yy463; + } else { + if (yych <= ' ') goto yy463; + if (yych == '=') goto yy460; + } + cur = ptr; + goto yy458; +} +#line 163 "../src/parse/lex_conf.re" + +} + +void Scanner::lex_conf_semicolon () +{ + +#line 2025 "src/parse/lex_conf.cc" +{ + unsigned char yych; + static const unsigned char yybm[] = { + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 128, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 128, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + }; + if ((lim - cur) < 2) fill(2); + yych = (unsigned char)*cur; + if (yych <= 0x1F) { + if (yych == '\t') goto yy470; + } else { + if (yych <= ' ') goto yy470; + if (yych == ';') goto yy471; + } + ++cur; +yy469: +#line 169 "../src/parse/lex_conf.re" + { fatal ("missing ending ';' in configuration"); } +#line 2074 "src/parse/lex_conf.cc" +yy470: + yych = (unsigned char)*(ptr = ++cur); + if (yybm[0+yych] & 128) { + goto yy473; + } + if (yych != ';') goto yy469; +yy471: + ++cur; +#line 170 "../src/parse/lex_conf.re" + { return; } +#line 2085 "src/parse/lex_conf.cc" +yy473: + ++cur; + if (lim <= cur) fill(1); + yych = (unsigned char)*cur; + if (yybm[0+yych] & 128) { + goto yy473; + } + if (yych == ';') goto yy471; + cur = ptr; + goto yy469; +} +#line 171 "../src/parse/lex_conf.re" + +} + +int32_t Scanner::lex_conf_number () +{ + lex_conf_assign (); + tok = cur; + +#line 2106 "src/parse/lex_conf.cc" +{ + unsigned char yych; + static const unsigned char yybm[] = { + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + }; + if ((lim - cur) < 2) fill(2); + yych = (unsigned char)*cur; + if (yych <= '/') { + if (yych == '-') goto yy479; + } else { + if (yych <= '0') goto yy480; + if (yych <= '9') goto yy482; + } +yy478: +yy479: + yych = (unsigned char)*++cur; + if (yych <= '0') goto yy478; + if (yych <= '9') goto yy482; + goto yy478; +yy480: + ++cur; +yy481: +#line 180 "../src/parse/lex_conf.re" + { + int32_t n = 0; + if (!s_to_i32_unsafe (tok, cur, n)) + { + fatal ("configuration value overflow"); + } + lex_conf_semicolon (); + return n; + } +#line 2170 "src/parse/lex_conf.cc" +yy482: + ++cur; + if (lim <= cur) fill(1); + yych = (unsigned char)*cur; + if (yybm[0+yych] & 128) { + goto yy482; + } + goto yy481; +} +#line 189 "../src/parse/lex_conf.re" + +} + +std::string Scanner::lex_conf_string () +{ + lex_conf_assign (); + std::string s; + tok = cur; + +#line 2190 "src/parse/lex_conf.cc" +{ + unsigned char yych; + static const unsigned char yybm[] = { + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 0, 0, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 0, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 0, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + }; + if (lim <= cur) fill(1); + yych = (unsigned char)*cur; + if (yych <= '!') { + if (yych <= '\n') { + if (yych <= 0x08) goto yy487; + } else { + if (yych != ' ') goto yy487; + } + } else { + if (yych <= '\'') { + if (yych <= '"') goto yy489; + if (yych <= '&') goto yy487; + goto yy489; + } else { + if (yych != ';') goto yy487; + } + } +yy486: +#line 212 "../src/parse/lex_conf.re" + { + s = std::string(tok, tok_len()); + goto end; + } +#line 2250 "src/parse/lex_conf.cc" +yy487: + ++cur; + if (lim <= cur) fill(1); + yych = (unsigned char)*cur; + if (yybm[0+yych] & 128) { + goto yy487; + } + goto yy486; +yy489: + ++cur; +#line 198 "../src/parse/lex_conf.re" + { + const char quote = tok[0]; + for (bool end;;) { + const uint32_t c = lex_str_chr(quote, end); + if (end) { + goto end; + } + if (c > 0xFF) { + fatalf ("multibyte character in configuration string: 0x%X", c); + } else { + s += static_cast(c); + } + } + } +#line 2276 "src/parse/lex_conf.cc" +} +#line 216 "../src/parse/lex_conf.re" + +end: + lex_conf_semicolon (); + return s; +} + +} // end namespace re2c diff --git a/tools/re2c/src/parse/lex_conf.re b/tools/re2c/src/parse/lex_conf.re new file mode 100644 index 000000000..550fe1ab1 --- /dev/null +++ b/tools/re2c/src/parse/lex_conf.re @@ -0,0 +1,222 @@ +#include "src/util/c99_stdint.h" +#include + +#include "src/codegen/output.h" +#include "src/conf/opt.h" +#include "src/globals.h" +#include "src/ir/regexp/encoding/enc.h" +#include "src/parse/scanner.h" +#include "src/util/s_to_n32_unsafe.h" + +namespace re2c +{ + +// global re2c config (affects the whole file) +/*!re2c + re2c:define:YYCTYPE = "unsigned char"; + re2c:define:YYCURSOR = cur; + re2c:define:YYLIMIT = lim; + re2c:define:YYMARKER = ptr; + re2c:define:YYCTXMARKER = ctx; + re2c:define:YYFILL = fill; + + // source code is in ASCII: pointers have type 'char *' + // but re2c makes an implicit assumption that YYCTYPE is unsigned + // when it generates comparisons + re2c:yych:conversion = 1; + + space = [ \t]; + + conf_assign = space* "=" space*; + + naked_char = . \ (space | [;]); + naked = "" | (naked_char \ ['"]) naked_char*; + + number = "0" | ("-"? [1-9] [0-9]*); +*/ + +void Scanner::lex_conf () +{ + tok = cur; +/*!re2c + * { fatal ((tok - pos) - tchar, "unrecognized configuration"); } + + "flags:" [ewxu8] + { + Enc::type_t enc = Enc::ASCII; + switch (cur[-1]) + { + case 'e': enc = Enc::EBCDIC; break; + case 'w': enc = Enc::UCS2; break; + case 'x': enc = Enc::UTF16; break; + case 'u': enc = Enc::UTF32; break; + case '8': enc = Enc::UTF8; break; + } + const int32_t n = lex_conf_number (); + if (n == 0) + { + opts.unset_encoding (enc); + } + else if (!opts.set_encoding (enc)) + { + fatalf ("Cannot set %s encoding: please reset %s encoding first" + , Enc::name (enc) + , Enc::name (opts->encoding.type ())); + } + return; + } + + "define:YYCONDTYPE" { opts.set_yycondtype (lex_conf_string ()); return; } + "define:YYGETCONDITION" { opts.set_cond_get (lex_conf_string ()); return; } + "define:YYGETCONDITION:naked" { opts.set_cond_get_naked (lex_conf_number () != 0); return; } + "define:YYSETCONDITION" { opts.set_cond_set (lex_conf_string ()); return; } + "define:YYSETCONDITION@cond" { opts.set_cond_set_arg (lex_conf_string ()); return; } + "define:YYSETCONDITION:naked" { opts.set_cond_set_naked (lex_conf_number () != 0); return; } + "condprefix" { opts.set_condPrefix (lex_conf_string ()); return; } + "condenumprefix" { opts.set_condEnumPrefix (lex_conf_string ()); return; } + "cond:divider" { opts.set_condDivider (lex_conf_string ()); return; } + "cond:divider@cond" { opts.set_condDividerParam (lex_conf_string ()); return; } + "cond:goto" { opts.set_condGoto (lex_conf_string ()); return; } + "cond:goto@cond" { opts.set_condGotoParam (lex_conf_string ()); return; } + "variable:yyctable" { opts.set_yyctable (lex_conf_string ()); return; } + + "define:YYGETSTATE" { opts.set_state_get (lex_conf_string ()); return; } + "define:YYGETSTATE:naked" { opts.set_state_get_naked (lex_conf_number () != 0); return; } + "define:YYSETSTATE" { opts.set_state_set (lex_conf_string ()); return; } + "define:YYSETSTATE:naked" { opts.set_state_set_naked (lex_conf_number () != 0); return; } + "define:YYSETSTATE@state" { opts.set_state_set_arg (lex_conf_string ()); return; } + "label:yyFillLabel" { opts.set_yyfilllabel (lex_conf_string ()); return; } + "label:yyNext" { opts.set_yynext (lex_conf_string ()); return; } + "state:abort" { opts.set_bUseStateAbort (lex_conf_number () != 0); return; } + "state:nextlabel" { opts.set_bUseStateNext (lex_conf_number () != 0); return; } + "variable:yyaccept" { opts.set_yyaccept (lex_conf_string ()); return; } + + "variable:yybm" { opts.set_yybm (lex_conf_string ()); return; } + "yybm:hex" { opts.set_yybmHexTable (lex_conf_number () != 0); return; } + "cgoto:threshold" + { + const int32_t n = lex_conf_number (); + if (n < 0) + { + fatal ("configuration 'cgoto:threshold' must be nonnegative"); + } + opts.set_cGotoThreshold (static_cast (n)); + return; + } + "variable:yytarget" { opts.set_yytarget (lex_conf_string ()); return; } + + "define:YYCURSOR" { opts.set_yycursor (lex_conf_string ()); return; } + "define:YYMARKER" { opts.set_yymarker (lex_conf_string ()); return; } + "define:YYCTXMARKER" { opts.set_yyctxmarker (lex_conf_string ()); return; } + "define:YYLIMIT" { opts.set_yylimit (lex_conf_string ()); return; } + + "define:YYPEEK" { opts.set_yypeek (lex_conf_string ()); return; } + "define:YYSKIP" { opts.set_yyskip (lex_conf_string ()); return; } + "define:YYBACKUP" { opts.set_yybackup (lex_conf_string ()); return; } + "define:YYBACKUPCTX" { opts.set_yybackupctx (lex_conf_string ()); return; } + "define:YYRESTORE" { opts.set_yyrestore (lex_conf_string ()); return; } + "define:YYRESTORECTX" { opts.set_yyrestorectx (lex_conf_string ()); return; } + "define:YYLESSTHAN" { opts.set_yylessthan (lex_conf_string ()); return; } + + "indent:string" { opts.set_indString (lex_conf_string ()); return; } + "indent:top" + { + const int32_t n = lex_conf_number (); + if (n < 0) + { + fatal ("configuration 'indent:top' must be nonnegative"); + } + opts.set_topIndent (static_cast (n)); + return; + } + + "define:YYDEBUG" { opts.set_yydebug (lex_conf_string ()); return; } + + "define:YYCTYPE" { opts.set_yyctype (lex_conf_string ()); return; } + "variable:yych" { opts.set_yych (lex_conf_string ()); return; } + "yych:conversion" { opts.set_yychConversion (lex_conf_number () != 0); return; } + "yych:emit" { opts.set_bEmitYYCh (lex_conf_number () != 0); return; } + + "define:YYFILL" { opts.set_fill (lex_conf_string ()); return; } + "yyfill:enable" { opts.set_fill_use (lex_conf_number () != 0); return; } + "define:YYFILL@len" { opts.set_fill_arg (lex_conf_string ()); return; } + "yyfill:parameter" { opts.set_fill_arg_use (lex_conf_number () != 0); return; } + "define:YYFILL:naked" { opts.set_fill_naked (lex_conf_number () != 0); return; } + "yyfill:check" { opts.set_fill_check (lex_conf_number () != 0); return; } + + "labelprefix" { opts.set_labelPrefix (lex_conf_string ()); return; } + + // try to lex number first, otherwize it would be lexed as a naked string + "startlabel" / conf_assign number { out.set_force_start_label (lex_conf_number () != 0); return; } + "startlabel" { out.set_user_start_label (lex_conf_string ()); return; } + + // deprecated + "variable:yystable" { lex_conf_string (); return; } +*/ +} + +void Scanner::lex_conf_assign () +{ +/*!re2c + * { fatal ("missing '=' in configuration"); } + conf_assign { return; } +*/ +} + +void Scanner::lex_conf_semicolon () +{ +/*!re2c + * { fatal ("missing ending ';' in configuration"); } + space* ";" { return; } +*/ +} + +int32_t Scanner::lex_conf_number () +{ + lex_conf_assign (); + tok = cur; +/*!re2c + number + { + int32_t n = 0; + if (!s_to_i32_unsafe (tok, cur, n)) + { + fatal ("configuration value overflow"); + } + lex_conf_semicolon (); + return n; + } +*/ +} + +std::string Scanner::lex_conf_string () +{ + lex_conf_assign (); + std::string s; + tok = cur; +/*!re2c + ['"] { + const char quote = tok[0]; + for (bool end;;) { + const uint32_t c = lex_str_chr(quote, end); + if (end) { + goto end; + } + if (c > 0xFF) { + fatalf ("multibyte character in configuration string: 0x%X", c); + } else { + s += static_cast(c); + } + } + } + naked { + s = std::string(tok, tok_len()); + goto end; + } +*/ +end: + lex_conf_semicolon (); + return s; +} + +} // end namespace re2c diff --git a/tools/re2c/src/parse/loc.h b/tools/re2c/src/parse/loc.h new file mode 100644 index 000000000..b3d4277a2 --- /dev/null +++ b/tools/re2c/src/parse/loc.h @@ -0,0 +1,24 @@ +#ifndef _RE2C_PARSE_LOC_ +#define _RE2C_PARSE_LOC_ + +#include + +#include "src/util/c99_stdint.h" + +namespace re2c +{ + +struct Loc +{ + std::string filename; + uint32_t line; + + inline Loc (const std::string & f, uint32_t l) + : filename (f) + , line (l) + {} +}; + +} // namespace re2c + +#endif // _RE2C_PARSE_LOC_ diff --git a/tools/re2c/src/parse/parser.cc b/tools/re2c/src/parse/parser.cc new file mode 100644 index 000000000..a328bc2c9 --- /dev/null +++ b/tools/re2c/src/parse/parser.cc @@ -0,0 +1,2396 @@ +/* A Bison parser, made by GNU Bison 3.0.4. */ + +/* Bison implementation for Yacc-like parsers in C + + Copyright (C) 1984, 1989-1990, 2000-2015 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ + +/* As a special exception, you may create a larger work that contains + part or all of the Bison parser skeleton and distribute that work + under terms of your choice, so long as that work isn't itself a + parser generator using the skeleton or a modified version thereof + as a parser skeleton. Alternatively, if you modify or redistribute + the parser skeleton itself, you may (at your option) remove this + special exception, which will cause the skeleton and the resulting + Bison output files to be licensed under the GNU General Public + License without this special exception. + + This special exception was added by the Free Software Foundation in + version 2.2 of Bison. */ + +/* C LALR(1) parser skeleton written by Richard Stallman, by + simplifying the original so-called "semantic" parser. */ + +/* All symbols defined below should begin with yy or YY, to avoid + infringing on user name space. This should be done even for local + variables, as they might otherwise be expanded by user macros. + There are some unavoidable exceptions within include files to + define necessary library symbols; they are noted "INFRINGES ON + USER NAME SPACE" below. */ + +/* Identify Bison output. */ +#define YYBISON 1 + +/* Bison version. */ +#define YYBISON_VERSION "3.0.4" + +/* Skeleton name. */ +#define YYSKELETON_NAME "yacc.c" + +/* Pure parsers. */ +#define YYPURE 0 + +/* Push parsers. */ +#define YYPUSH 0 + +/* Pull parsers. */ +#define YYPULL 1 + + + + +/* Copy the first part of user declarations. */ +#line 1 "../src/parse/parser.ypp" /* yacc.c:339 */ + + +#include "src/util/c99_stdint.h" +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "src/codegen/output.h" +#include "src/conf/opt.h" +#include "src/globals.h" +#include "src/ir/compile.h" +#include "src/ir/adfa/adfa.h" +#include "src/ir/regexp/encoding/enc.h" +#include "src/ir/regexp/encoding/range_suffix.h" +#include "src/ir/regexp/regexp.h" +#include "src/ir/regexp/regexp_cat.h" +#include "src/ir/regexp/regexp_close.h" +#include "src/ir/regexp/regexp_null.h" +#include "src/ir/regexp/regexp_rule.h" +#include "src/ir/rule_rank.h" +#include "src/ir/skeleton/skeleton.h" +#include "src/parse/code.h" +#include "src/parse/extop.h" +#include "src/parse/loc.h" +#include "src/parse/parser.h" +#include "src/parse/scanner.h" +#include "src/parse/spec.h" +#include "src/util/counter.h" +#include "src/util/free_list.h" +#include "src/util/range.h" +#include "src/util/smart_ptr.h" + +#define YYMALLOC malloc +#define YYFREE free + +using namespace re2c; + +extern "C" +{ +int yylex(); +void yyerror(const char*); +} + +static counter_t rank_counter; +static std::vector condnames; +static re2c::SpecMap specMap; +static Spec spec; +static RuleOp *specNone = NULL; +static RuleOpList specStar; +static RuleOp * star_default = NULL; +static Scanner *in = NULL; +static Scanner::ParseMode parseMode; +static SetupMap ruleSetupMap; +static bool foundRules; +static symbol_table_t symbol_table; + +/* Bison version 1.875 emits a definition that is not working + * with several g++ version. Hence we disable it here. + */ +#if defined(__GNUC__) +#define __attribute__(x) +#endif + +void context_check(CondList *clist) +{ + if (!opts->cFlag) + { + delete clist; + in->fatal("conditions are only allowed when using -c switch"); + } +} + +void context_none(CondList *clist) +{ + delete clist; + context_check(NULL); + in->fatal("no expression specified"); +} + +void context_rule + ( CondList * clist + , const Loc & loc + , RegExp * expr + , RegExp * look + , const Code * code + , const std::string * newcond + ) +{ + context_check(clist); + for(CondList::const_iterator it = clist->begin(); it != clist->end(); ++it) + { + if (specMap.find(*it) == specMap.end()) + { + condnames.push_back (*it); + } + + RuleOp * rule = new RuleOp + ( loc + , expr + , look + , rank_counter.next () + , code + , newcond + ); + specMap[*it].add (rule); + } + delete clist; + delete newcond; +} + +void setup_rule(CondList *clist, const Code * code) +{ + assert(clist); + assert(code); + context_check(clist); + for(CondList::const_iterator it = clist->begin(); it != clist->end(); ++it) + { + if (ruleSetupMap.find(*it) != ruleSetupMap.end()) + { + in->fatalf_at(code->loc.line, "code to setup rule '%s' is already defined", it->c_str()); + } + ruleSetupMap[*it] = std::make_pair(code->loc.line, code->text); + } + delete clist; +} + +void default_rule(CondList *clist, const Code * code) +{ + assert(clist); + assert(code); + context_check(clist); + for(CondList::const_iterator it = clist->begin(); it != clist->end(); ++it) + { + RuleOp * def = new RuleOp + ( code->loc + , in->mkDefault () + , new NullOp + , rule_rank_t::def () + , code + , NULL + ); + if (!specMap[*it].add_def (def)) + { + in->fatalf_at(code->loc.line, "code to default rule '%s' is already defined", it->c_str()); + } + } + delete clist; +} + + +#line 224 "src/parse/parser.cc" /* yacc.c:339 */ + +# ifndef YY_NULLPTR +# if defined __cplusplus && 201103L <= __cplusplus +# define YY_NULLPTR nullptr +# else +# define YY_NULLPTR 0 +# endif +# endif + +/* Enabling verbose error messages. */ +#ifdef YYERROR_VERBOSE +# undef YYERROR_VERBOSE +# define YYERROR_VERBOSE 1 +#else +# define YYERROR_VERBOSE 0 +#endif + +/* In a future release of Bison, this section will be replaced + by #include "y.tab.h". */ +#ifndef YY_YY_SRC_PARSE_Y_TAB_H_INCLUDED +# define YY_YY_SRC_PARSE_Y_TAB_H_INCLUDED +/* Debug traces. */ +#ifndef YYDEBUG +# define YYDEBUG 0 +#endif +#if YYDEBUG +extern int yydebug; +#endif + +/* Token type. */ +#ifndef YYTOKENTYPE +# define YYTOKENTYPE + enum yytokentype + { + TOKEN_CLOSE = 258, + TOKEN_CLOSESIZE = 259, + TOKEN_CODE = 260, + TOKEN_CONF = 261, + TOKEN_ID = 262, + TOKEN_FID = 263, + TOKEN_FID_END = 264, + TOKEN_NOCOND = 265, + TOKEN_REGEXP = 266, + TOKEN_SETUP = 267, + TOKEN_STAR = 268 + }; +#endif + +/* Value type. */ +#if ! defined YYSTYPE && ! defined YYSTYPE_IS_DECLARED + +union YYSTYPE +{ +#line 161 "../src/parse/parser.ypp" /* yacc.c:355 */ + + re2c::RegExp * regexp; + const re2c::Code * code; + char op; + re2c::ExtOp extop; + std::string * str; + re2c::CondList * clist; + +#line 287 "src/parse/parser.cc" /* yacc.c:355 */ +}; + +typedef union YYSTYPE YYSTYPE; +# define YYSTYPE_IS_TRIVIAL 1 +# define YYSTYPE_IS_DECLARED 1 +#endif + + +extern YYSTYPE yylval; + +int yyparse (void); + +#endif /* !YY_YY_SRC_PARSE_Y_TAB_H_INCLUDED */ + +/* Copy the second part of user declarations. */ + +#line 304 "src/parse/parser.cc" /* yacc.c:358 */ + +#ifdef short +# undef short +#endif + +#ifdef YYTYPE_UINT8 +typedef YYTYPE_UINT8 yytype_uint8; +#else +typedef unsigned char yytype_uint8; +#endif + +#ifdef YYTYPE_INT8 +typedef YYTYPE_INT8 yytype_int8; +#else +typedef signed char yytype_int8; +#endif + +#ifdef YYTYPE_UINT16 +typedef YYTYPE_UINT16 yytype_uint16; +#else +typedef unsigned short int yytype_uint16; +#endif + +#ifdef YYTYPE_INT16 +typedef YYTYPE_INT16 yytype_int16; +#else +typedef short int yytype_int16; +#endif + +#ifndef YYSIZE_T +# ifdef __SIZE_TYPE__ +# define YYSIZE_T __SIZE_TYPE__ +# elif defined size_t +# define YYSIZE_T size_t +# elif ! defined YYSIZE_T +# include /* INFRINGES ON USER NAME SPACE */ +# define YYSIZE_T size_t +# else +# define YYSIZE_T unsigned int +# endif +#endif + +#define YYSIZE_MAXIMUM ((YYSIZE_T) -1) + +#ifndef YY_ +# if defined YYENABLE_NLS && YYENABLE_NLS +# if ENABLE_NLS +# include /* INFRINGES ON USER NAME SPACE */ +# define YY_(Msgid) dgettext ("bison-runtime", Msgid) +# endif +# endif +# ifndef YY_ +# define YY_(Msgid) Msgid +# endif +#endif + +#ifndef YY_ATTRIBUTE +# if (defined __GNUC__ \ + && (2 < __GNUC__ || (__GNUC__ == 2 && 96 <= __GNUC_MINOR__))) \ + || defined __SUNPRO_C && 0x5110 <= __SUNPRO_C +# define YY_ATTRIBUTE(Spec) __attribute__(Spec) +# else +# define YY_ATTRIBUTE(Spec) /* empty */ +# endif +#endif + +#ifndef YY_ATTRIBUTE_PURE +# define YY_ATTRIBUTE_PURE YY_ATTRIBUTE ((__pure__)) +#endif + +#ifndef YY_ATTRIBUTE_UNUSED +# define YY_ATTRIBUTE_UNUSED YY_ATTRIBUTE ((__unused__)) +#endif + +#if !defined _Noreturn \ + && (!defined __STDC_VERSION__ || __STDC_VERSION__ < 201112) +# if defined _MSC_VER && 1200 <= _MSC_VER +# define _Noreturn __declspec (noreturn) +# else +# define _Noreturn YY_ATTRIBUTE ((__noreturn__)) +# endif +#endif + +/* Suppress unused-variable warnings by "using" E. */ +#if ! defined lint || defined __GNUC__ +# define YYUSE(E) ((void) (E)) +#else +# define YYUSE(E) /* empty */ +#endif + +#if defined __GNUC__ && 407 <= __GNUC__ * 100 + __GNUC_MINOR__ +/* Suppress an incorrect diagnostic about yylval being uninitialized. */ +# define YY_IGNORE_MAYBE_UNINITIALIZED_BEGIN \ + _Pragma ("GCC diagnostic push") \ + _Pragma ("GCC diagnostic ignored \"-Wuninitialized\"")\ + _Pragma ("GCC diagnostic ignored \"-Wmaybe-uninitialized\"") +# define YY_IGNORE_MAYBE_UNINITIALIZED_END \ + _Pragma ("GCC diagnostic pop") +#else +# define YY_INITIAL_VALUE(Value) Value +#endif +#ifndef YY_IGNORE_MAYBE_UNINITIALIZED_BEGIN +# define YY_IGNORE_MAYBE_UNINITIALIZED_BEGIN +# define YY_IGNORE_MAYBE_UNINITIALIZED_END +#endif +#ifndef YY_INITIAL_VALUE +# define YY_INITIAL_VALUE(Value) /* Nothing. */ +#endif + + +#if ! defined yyoverflow || YYERROR_VERBOSE + +/* The parser invokes alloca or malloc; define the necessary symbols. */ + +# ifdef YYSTACK_USE_ALLOCA +# if YYSTACK_USE_ALLOCA +# ifdef __GNUC__ +# define YYSTACK_ALLOC __builtin_alloca +# elif defined __BUILTIN_VA_ARG_INCR +# include /* INFRINGES ON USER NAME SPACE */ +# elif defined _AIX +# define YYSTACK_ALLOC __alloca +# elif defined _MSC_VER +# include /* INFRINGES ON USER NAME SPACE */ +# define alloca _alloca +# else +# define YYSTACK_ALLOC alloca +# if ! defined _ALLOCA_H && ! defined EXIT_SUCCESS +# include /* INFRINGES ON USER NAME SPACE */ + /* Use EXIT_SUCCESS as a witness for stdlib.h. */ +# ifndef EXIT_SUCCESS +# define EXIT_SUCCESS 0 +# endif +# endif +# endif +# endif +# endif + +# ifdef YYSTACK_ALLOC + /* Pacify GCC's 'empty if-body' warning. */ +# define YYSTACK_FREE(Ptr) do { /* empty */; } while (0) +# ifndef YYSTACK_ALLOC_MAXIMUM + /* The OS might guarantee only one guard page at the bottom of the stack, + and a page size can be as small as 4096 bytes. So we cannot safely + invoke alloca (N) if N exceeds 4096. Use a slightly smaller number + to allow for a few compiler-allocated temporary stack slots. */ +# define YYSTACK_ALLOC_MAXIMUM 4032 /* reasonable circa 2006 */ +# endif +# else +# define YYSTACK_ALLOC YYMALLOC +# define YYSTACK_FREE YYFREE +# ifndef YYSTACK_ALLOC_MAXIMUM +# define YYSTACK_ALLOC_MAXIMUM YYSIZE_MAXIMUM +# endif +# if (defined __cplusplus && ! defined EXIT_SUCCESS \ + && ! ((defined YYMALLOC || defined malloc) \ + && (defined YYFREE || defined free))) +# include /* INFRINGES ON USER NAME SPACE */ +# ifndef EXIT_SUCCESS +# define EXIT_SUCCESS 0 +# endif +# endif +# ifndef YYMALLOC +# define YYMALLOC malloc +# if ! defined malloc && ! defined EXIT_SUCCESS +void *malloc (YYSIZE_T); /* INFRINGES ON USER NAME SPACE */ +# endif +# endif +# ifndef YYFREE +# define YYFREE free +# if ! defined free && ! defined EXIT_SUCCESS +void free (void *); /* INFRINGES ON USER NAME SPACE */ +# endif +# endif +# endif +#endif /* ! defined yyoverflow || YYERROR_VERBOSE */ + + +#if (! defined yyoverflow \ + && (! defined __cplusplus \ + || (defined YYSTYPE_IS_TRIVIAL && YYSTYPE_IS_TRIVIAL))) + +/* A type that is properly aligned for any stack member. */ +union yyalloc +{ + yytype_int16 yyss_alloc; + YYSTYPE yyvs_alloc; +}; + +/* The size of the maximum gap between one aligned stack and the next. */ +# define YYSTACK_GAP_MAXIMUM (sizeof (union yyalloc) - 1) + +/* The size of an array large to enough to hold all stacks, each with + N elements. */ +# define YYSTACK_BYTES(N) \ + ((N) * (sizeof (yytype_int16) + sizeof (YYSTYPE)) \ + + YYSTACK_GAP_MAXIMUM) + +# define YYCOPY_NEEDED 1 + +/* Relocate STACK from its old location to the new one. The + local variables YYSIZE and YYSTACKSIZE give the old and new number of + elements in the stack, and YYPTR gives the new location of the + stack. Advance YYPTR to a properly aligned location for the next + stack. */ +# define YYSTACK_RELOCATE(Stack_alloc, Stack) \ + do \ + { \ + YYSIZE_T yynewbytes; \ + YYCOPY (&yyptr->Stack_alloc, Stack, yysize); \ + Stack = &yyptr->Stack_alloc; \ + yynewbytes = yystacksize * sizeof (*Stack) + YYSTACK_GAP_MAXIMUM; \ + yyptr += yynewbytes / sizeof (*yyptr); \ + } \ + while (0) + +#endif + +#if defined YYCOPY_NEEDED && YYCOPY_NEEDED +/* Copy COUNT objects from SRC to DST. The source and destination do + not overlap. */ +# ifndef YYCOPY +# if defined __GNUC__ && 1 < __GNUC__ +# define YYCOPY(Dst, Src, Count) \ + __builtin_memcpy (Dst, Src, (Count) * sizeof (*(Src))) +# else +# define YYCOPY(Dst, Src, Count) \ + do \ + { \ + YYSIZE_T yyi; \ + for (yyi = 0; yyi < (Count); yyi++) \ + (Dst)[yyi] = (Src)[yyi]; \ + } \ + while (0) +# endif +# endif +#endif /* !YYCOPY_NEEDED */ + +/* YYFINAL -- State number of the termination state. */ +#define YYFINAL 2 +/* YYLAST -- Last index in YYTABLE. */ +#define YYLAST 104 + +/* YYNTOKENS -- Number of terminals. */ +#define YYNTOKENS 25 +/* YYNNTS -- Number of nonterminals. */ +#define YYNNTS 14 +/* YYNRULES -- Number of rules. */ +#define YYNRULES 49 +/* YYNSTATES -- Number of states. */ +#define YYNSTATES 92 + +/* YYTRANSLATE[YYX] -- Symbol number corresponding to YYX as returned + by yylex, with out-of-bounds checking. */ +#define YYUNDEFTOK 2 +#define YYMAXUTOK 268 + +#define YYTRANSLATE(YYX) \ + ((unsigned int) (YYX) <= YYMAXUTOK ? yytranslate[YYX] : YYUNDEFTOK) + +/* YYTRANSLATE[TOKEN-NUM] -- Symbol number corresponding to TOKEN-NUM + as returned by yylex, without out-of-bounds checking. */ +static const yytype_uint8 yytranslate[] = +{ + 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 23, 24, 2, 2, 20, 2, 2, 16, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 19, 15, + 17, 14, 18, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 22, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 21, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 1, 2, 3, 4, + 5, 6, 7, 8, 9, 10, 11, 12, 13 +}; + +#if YYDEBUG + /* YYRLINE[YYN] -- Source line where rule number YYN was defined. */ +static const yytype_uint16 yyrline[] = +{ + 0, 193, 193, 195, 199, 203, 211, 219, 223, 227, + 231, 247, 264, 268, 274, 279, 285, 289, 303, 319, + 324, 330, 345, 362, 381, 387, 395, 398, 405, 411, + 421, 424, 432, 435, 442, 446, 453, 457, 464, 468, + 475, 479, 494, 513, 517, 521, 525, 532, 542, 546 +}; +#endif + +#if YYDEBUG || YYERROR_VERBOSE || 0 +/* YYTNAME[SYMBOL-NUM] -- String name of the symbol SYMBOL-NUM. + First, the terminals, then, starting at YYNTOKENS, nonterminals. */ +static const char *const yytname[] = +{ + "$end", "error", "$undefined", "TOKEN_CLOSE", "TOKEN_CLOSESIZE", + "TOKEN_CODE", "TOKEN_CONF", "TOKEN_ID", "TOKEN_FID", "TOKEN_FID_END", + "TOKEN_NOCOND", "TOKEN_REGEXP", "TOKEN_SETUP", "TOKEN_STAR", "'='", + "';'", "'/'", "'<'", "'>'", "':'", "','", "'|'", "'\\\\'", "'('", "')'", + "$accept", "spec", "decl", "rule", "cond", "clist", "newcond", "look", + "expr", "diff", "term", "factor", "close", "primary", YY_NULLPTR +}; +#endif + +# ifdef YYPRINT +/* YYTOKNUM[NUM] -- (External) token number corresponding to the + (internal) symbol number NUM (which must be that of a token). */ +static const yytype_uint16 yytoknum[] = +{ + 0, 256, 257, 258, 259, 260, 261, 262, 263, 264, + 265, 266, 267, 268, 61, 59, 47, 60, 62, 58, + 44, 124, 92, 40, 41 +}; +# endif + +#define YYPACT_NINF -43 + +#define yypact_value_is_default(Yystate) \ + (!!((Yystate) == (-43))) + +#define YYTABLE_NINF -1 + +#define yytable_value_is_error(Yytable_value) \ + 0 + + /* YYPACT[STATE-NUM] -- Index in YYTABLE of the portion describing + STATE-NUM. */ +static const yytype_int8 yypact[] = +{ + -43, 11, -43, -43, -11, 30, 47, -43, 25, 10, + 33, 30, -43, -43, 48, 17, 30, -43, 1, 30, + -43, 4, 40, 60, 70, -43, 61, 63, 42, -43, + 64, 66, 59, 30, 30, 73, 30, -43, -43, -43, + -43, 32, -9, -43, -43, 78, -43, -43, 81, 82, + 83, 20, 44, -43, 67, 17, -43, 30, -43, -43, + -43, -43, -43, -43, -43, -43, 84, 51, 48, 86, + 54, 48, -43, 60, 87, 57, -43, 60, 88, 58, + -43, -43, 60, 89, -43, -43, 60, 90, -43, -43, + -43, -43 +}; + + /* YYDEFACT[STATE-NUM] -- Default reduction number in state STATE-NUM. + Performed when YYTABLE does not specify something else to do. Zero + means the default is an error. */ +static const yytype_uint8 yydefact[] = +{ + 2, 0, 1, 9, 47, 0, 30, 48, 26, 0, + 26, 0, 4, 3, 32, 34, 36, 38, 40, 0, + 47, 0, 0, 30, 0, 28, 0, 0, 27, 11, + 0, 0, 0, 0, 0, 0, 0, 39, 43, 42, + 44, 41, 0, 6, 8, 0, 23, 22, 0, 0, + 0, 32, 32, 49, 33, 35, 10, 37, 45, 46, + 5, 7, 31, 24, 25, 29, 0, 30, 32, 0, + 30, 32, 21, 30, 0, 30, 16, 30, 0, 30, + 20, 19, 30, 0, 15, 14, 30, 0, 18, 17, + 13, 12 +}; + + /* YYPGOTO[NTERM-NUM]. */ +static const yytype_int8 yypgoto[] = +{ + -43, -43, -43, -43, 91, -43, -23, -42, -3, 62, + 68, -15, -43, -43 +}; + + /* YYDEFGOTO[NTERM-NUM]. */ +static const yytype_int8 yydefgoto[] = +{ + -1, 1, 12, 13, 27, 28, 24, 35, 14, 15, + 16, 17, 41, 18 +}; + + /* YYTABLE[YYPACT[STATE-NUM]] -- What to do in state STATE-NUM. If + positive, shift that token. If negative, reduce the rule whose + number is the opposite. If YYTABLE_NINF, syntax error. */ +static const yytype_uint8 yytable[] = +{ + 46, 37, 21, 19, 38, 39, 60, 61, 32, 67, + 70, 2, 34, 43, 40, 29, 42, 3, 4, 5, + 44, 6, 7, 8, 9, 34, 75, 20, 10, 79, + 54, 7, 25, 66, 11, 58, 33, 20, 26, 36, + 25, 7, 37, 11, 74, 59, 30, 78, 68, 71, + 80, 20, 83, 11, 84, 7, 87, 69, 45, 88, + 33, 22, 50, 90, 33, 22, 23, 11, 22, 34, + 73, 22, 22, 77, 22, 47, 82, 86, 56, 48, + 34, 49, 51, 53, 52, 62, 63, 64, 34, 72, + 65, 76, 81, 85, 89, 91, 55, 0, 0, 0, + 0, 31, 0, 0, 57 +}; + +static const yytype_int8 yycheck[] = +{ + 23, 16, 5, 14, 3, 4, 15, 16, 11, 51, + 52, 0, 21, 9, 13, 5, 19, 6, 7, 8, + 16, 10, 11, 12, 13, 21, 68, 7, 17, 71, + 33, 11, 7, 13, 23, 3, 16, 7, 13, 22, + 7, 11, 57, 23, 67, 13, 13, 70, 51, 52, + 73, 7, 75, 23, 77, 11, 79, 13, 18, 82, + 16, 14, 20, 86, 16, 14, 19, 23, 14, 21, + 19, 14, 14, 19, 14, 5, 19, 19, 5, 18, + 21, 18, 18, 24, 18, 7, 5, 5, 21, 5, + 7, 5, 5, 5, 5, 5, 34, -1, -1, -1, + -1, 10, -1, -1, 36 +}; + + /* YYSTOS[STATE-NUM] -- The (internal number of the) accessing + symbol of state STATE-NUM. */ +static const yytype_uint8 yystos[] = +{ + 0, 26, 0, 6, 7, 8, 10, 11, 12, 13, + 17, 23, 27, 28, 33, 34, 35, 36, 38, 14, + 7, 33, 14, 19, 31, 7, 13, 29, 30, 5, + 13, 29, 33, 16, 21, 32, 22, 36, 3, 4, + 13, 37, 33, 9, 16, 18, 31, 5, 18, 18, + 20, 18, 18, 24, 33, 34, 5, 35, 3, 13, + 15, 16, 7, 5, 5, 7, 13, 32, 33, 13, + 32, 33, 5, 19, 31, 32, 5, 19, 31, 32, + 31, 5, 19, 31, 31, 5, 19, 31, 31, 5, + 31, 5 +}; + + /* YYR1[YYN] -- Symbol number of symbol that rule YYN derives. */ +static const yytype_uint8 yyr1[] = +{ + 0, 25, 26, 26, 26, 27, 27, 27, 27, 27, + 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, + 28, 28, 28, 28, 28, 28, 29, 29, 30, 30, + 31, 31, 32, 32, 33, 33, 34, 34, 35, 35, + 36, 36, 36, 37, 37, 37, 37, 38, 38, 38 +}; + + /* YYR2[YYN] -- Number of symbols on the right hand side of rule YYN. */ +static const yytype_uint8 yyr2[] = +{ + 0, 2, 0, 2, 2, 4, 3, 4, 3, 1, + 3, 2, 7, 7, 6, 6, 5, 7, 7, 6, + 6, 5, 3, 3, 4, 4, 0, 1, 1, 3, + 0, 3, 0, 2, 1, 3, 1, 3, 1, 2, + 1, 2, 2, 1, 1, 2, 2, 1, 1, 3 +}; + + +#define yyerrok (yyerrstatus = 0) +#define yyclearin (yychar = YYEMPTY) +#define YYEMPTY (-2) +#define YYEOF 0 + +#define YYACCEPT goto yyacceptlab +#define YYABORT goto yyabortlab +#define YYERROR goto yyerrorlab + + +#define YYRECOVERING() (!!yyerrstatus) + +#define YYBACKUP(Token, Value) \ +do \ + if (yychar == YYEMPTY) \ + { \ + yychar = (Token); \ + yylval = (Value); \ + YYPOPSTACK (yylen); \ + yystate = *yyssp; \ + goto yybackup; \ + } \ + else \ + { \ + yyerror (YY_("syntax error: cannot back up")); \ + YYERROR; \ + } \ +while (0) + +/* Error token number */ +#define YYTERROR 1 +#define YYERRCODE 256 + + + +/* Enable debugging if requested. */ +#if YYDEBUG + +# ifndef YYFPRINTF +# include /* INFRINGES ON USER NAME SPACE */ +# define YYFPRINTF fprintf +# endif + +# define YYDPRINTF(Args) \ +do { \ + if (yydebug) \ + YYFPRINTF Args; \ +} while (0) + +/* This macro is provided for backward compatibility. */ +#ifndef YY_LOCATION_PRINT +# define YY_LOCATION_PRINT(File, Loc) ((void) 0) +#endif + + +# define YY_SYMBOL_PRINT(Title, Type, Value, Location) \ +do { \ + if (yydebug) \ + { \ + YYFPRINTF (stderr, "%s ", Title); \ + yy_symbol_print (stderr, \ + Type, Value); \ + YYFPRINTF (stderr, "\n"); \ + } \ +} while (0) + + +/*----------------------------------------. +| Print this symbol's value on YYOUTPUT. | +`----------------------------------------*/ + +static void +yy_symbol_value_print (FILE *yyoutput, int yytype, YYSTYPE const * const yyvaluep) +{ + FILE *yyo = yyoutput; + YYUSE (yyo); + if (!yyvaluep) + return; +# ifdef YYPRINT + if (yytype < YYNTOKENS) + YYPRINT (yyoutput, yytoknum[yytype], *yyvaluep); +# endif + YYUSE (yytype); +} + + +/*--------------------------------. +| Print this symbol on YYOUTPUT. | +`--------------------------------*/ + +static void +yy_symbol_print (FILE *yyoutput, int yytype, YYSTYPE const * const yyvaluep) +{ + YYFPRINTF (yyoutput, "%s %s (", + yytype < YYNTOKENS ? "token" : "nterm", yytname[yytype]); + + yy_symbol_value_print (yyoutput, yytype, yyvaluep); + YYFPRINTF (yyoutput, ")"); +} + +/*------------------------------------------------------------------. +| yy_stack_print -- Print the state stack from its BOTTOM up to its | +| TOP (included). | +`------------------------------------------------------------------*/ + +static void +yy_stack_print (yytype_int16 *yybottom, yytype_int16 *yytop) +{ + YYFPRINTF (stderr, "Stack now"); + for (; yybottom <= yytop; yybottom++) + { + int yybot = *yybottom; + YYFPRINTF (stderr, " %d", yybot); + } + YYFPRINTF (stderr, "\n"); +} + +# define YY_STACK_PRINT(Bottom, Top) \ +do { \ + if (yydebug) \ + yy_stack_print ((Bottom), (Top)); \ +} while (0) + + +/*------------------------------------------------. +| Report that the YYRULE is going to be reduced. | +`------------------------------------------------*/ + +static void +yy_reduce_print (yytype_int16 *yyssp, YYSTYPE *yyvsp, int yyrule) +{ + unsigned long int yylno = yyrline[yyrule]; + int yynrhs = yyr2[yyrule]; + int yyi; + YYFPRINTF (stderr, "Reducing stack by rule %d (line %lu):\n", + yyrule - 1, yylno); + /* The symbols being reduced. */ + for (yyi = 0; yyi < yynrhs; yyi++) + { + YYFPRINTF (stderr, " $%d = ", yyi + 1); + yy_symbol_print (stderr, + yystos[yyssp[yyi + 1 - yynrhs]], + &(yyvsp[(yyi + 1) - (yynrhs)]) + ); + YYFPRINTF (stderr, "\n"); + } +} + +# define YY_REDUCE_PRINT(Rule) \ +do { \ + if (yydebug) \ + yy_reduce_print (yyssp, yyvsp, Rule); \ +} while (0) + +/* Nonzero means print parse trace. It is left uninitialized so that + multiple parsers can coexist. */ +int yydebug; +#else /* !YYDEBUG */ +# define YYDPRINTF(Args) +# define YY_SYMBOL_PRINT(Title, Type, Value, Location) +# define YY_STACK_PRINT(Bottom, Top) +# define YY_REDUCE_PRINT(Rule) +#endif /* !YYDEBUG */ + + +/* YYINITDEPTH -- initial size of the parser's stacks. */ +#ifndef YYINITDEPTH +# define YYINITDEPTH 200 +#endif + +/* YYMAXDEPTH -- maximum size the stacks can grow to (effective only + if the built-in stack extension method is used). + + Do not make this value too large; the results are undefined if + YYSTACK_ALLOC_MAXIMUM < YYSTACK_BYTES (YYMAXDEPTH) + evaluated with infinite-precision integer arithmetic. */ + +#ifndef YYMAXDEPTH +# define YYMAXDEPTH 10000 +#endif + + +#if YYERROR_VERBOSE + +# ifndef yystrlen +# if defined __GLIBC__ && defined _STRING_H +# define yystrlen strlen +# else +/* Return the length of YYSTR. */ +static YYSIZE_T +yystrlen (const char *yystr) +{ + YYSIZE_T yylen; + for (yylen = 0; yystr[yylen]; yylen++) + continue; + return yylen; +} +# endif +# endif + +# ifndef yystpcpy +# if defined __GLIBC__ && defined _STRING_H && defined _GNU_SOURCE +# define yystpcpy stpcpy +# else +/* Copy YYSRC to YYDEST, returning the address of the terminating '\0' in + YYDEST. */ +static char * +yystpcpy (char *yydest, const char *yysrc) +{ + char *yyd = yydest; + const char *yys = yysrc; + + while ((*yyd++ = *yys++) != '\0') + continue; + + return yyd - 1; +} +# endif +# endif + +# ifndef yytnamerr +/* Copy to YYRES the contents of YYSTR after stripping away unnecessary + quotes and backslashes, so that it's suitable for yyerror. The + heuristic is that double-quoting is unnecessary unless the string + contains an apostrophe, a comma, or backslash (other than + backslash-backslash). YYSTR is taken from yytname. If YYRES is + null, do not copy; instead, return the length of what the result + would have been. */ +static YYSIZE_T +yytnamerr (char *yyres, const char *yystr) +{ + if (*yystr == '"') + { + YYSIZE_T yyn = 0; + char const *yyp = yystr; + + for (;;) + switch (*++yyp) + { + case '\'': + case ',': + goto do_not_strip_quotes; + + case '\\': + if (*++yyp != '\\') + goto do_not_strip_quotes; + /* Fall through. */ + default: + if (yyres) + yyres[yyn] = *yyp; + yyn++; + break; + + case '"': + if (yyres) + yyres[yyn] = '\0'; + return yyn; + } + do_not_strip_quotes: ; + } + + if (! yyres) + return yystrlen (yystr); + + return yystpcpy (yyres, yystr) - yyres; +} +# endif + +/* Copy into *YYMSG, which is of size *YYMSG_ALLOC, an error message + about the unexpected token YYTOKEN for the state stack whose top is + YYSSP. + + Return 0 if *YYMSG was successfully written. Return 1 if *YYMSG is + not large enough to hold the message. In that case, also set + *YYMSG_ALLOC to the required number of bytes. Return 2 if the + required number of bytes is too large to store. */ +static int +yysyntax_error (YYSIZE_T *yymsg_alloc, char **yymsg, + yytype_int16 *yyssp, int yytoken) +{ + YYSIZE_T yysize0 = yytnamerr (YY_NULLPTR, yytname[yytoken]); + YYSIZE_T yysize = yysize0; + enum { YYERROR_VERBOSE_ARGS_MAXIMUM = 5 }; + /* Internationalized format string. */ + const char *yyformat = YY_NULLPTR; + /* Arguments of yyformat. */ + char const *yyarg[YYERROR_VERBOSE_ARGS_MAXIMUM]; + /* Number of reported tokens (one for the "unexpected", one per + "expected"). */ + int yycount = 0; + + /* There are many possibilities here to consider: + - If this state is a consistent state with a default action, then + the only way this function was invoked is if the default action + is an error action. In that case, don't check for expected + tokens because there are none. + - The only way there can be no lookahead present (in yychar) is if + this state is a consistent state with a default action. Thus, + detecting the absence of a lookahead is sufficient to determine + that there is no unexpected or expected token to report. In that + case, just report a simple "syntax error". + - Don't assume there isn't a lookahead just because this state is a + consistent state with a default action. There might have been a + previous inconsistent state, consistent state with a non-default + action, or user semantic action that manipulated yychar. + - Of course, the expected token list depends on states to have + correct lookahead information, and it depends on the parser not + to perform extra reductions after fetching a lookahead from the + scanner and before detecting a syntax error. Thus, state merging + (from LALR or IELR) and default reductions corrupt the expected + token list. However, the list is correct for canonical LR with + one exception: it will still contain any token that will not be + accepted due to an error action in a later state. + */ + if (yytoken != YYEMPTY) + { + int yyn = yypact[*yyssp]; + yyarg[yycount++] = yytname[yytoken]; + if (!yypact_value_is_default (yyn)) + { + /* Start YYX at -YYN if negative to avoid negative indexes in + YYCHECK. In other words, skip the first -YYN actions for + this state because they are default actions. */ + int yyxbegin = yyn < 0 ? -yyn : 0; + /* Stay within bounds of both yycheck and yytname. */ + int yychecklim = YYLAST - yyn + 1; + int yyxend = yychecklim < YYNTOKENS ? yychecklim : YYNTOKENS; + int yyx; + + for (yyx = yyxbegin; yyx < yyxend; ++yyx) + if (yycheck[yyx + yyn] == yyx && yyx != YYTERROR + && !yytable_value_is_error (yytable[yyx + yyn])) + { + if (yycount == YYERROR_VERBOSE_ARGS_MAXIMUM) + { + yycount = 1; + yysize = yysize0; + break; + } + yyarg[yycount++] = yytname[yyx]; + { + YYSIZE_T yysize1 = yysize + yytnamerr (YY_NULLPTR, yytname[yyx]); + if (! (yysize <= yysize1 + && yysize1 <= YYSTACK_ALLOC_MAXIMUM)) + return 2; + yysize = yysize1; + } + } + } + } + + switch (yycount) + { +# define YYCASE_(N, S) \ + case N: \ + yyformat = S; \ + break + YYCASE_(0, YY_("syntax error")); + YYCASE_(1, YY_("syntax error, unexpected %s")); + YYCASE_(2, YY_("syntax error, unexpected %s, expecting %s")); + YYCASE_(3, YY_("syntax error, unexpected %s, expecting %s or %s")); + YYCASE_(4, YY_("syntax error, unexpected %s, expecting %s or %s or %s")); + YYCASE_(5, YY_("syntax error, unexpected %s, expecting %s or %s or %s or %s")); +# undef YYCASE_ + } + + { + YYSIZE_T yysize1 = yysize + yystrlen (yyformat); + if (! (yysize <= yysize1 && yysize1 <= YYSTACK_ALLOC_MAXIMUM)) + return 2; + yysize = yysize1; + } + + if (*yymsg_alloc < yysize) + { + *yymsg_alloc = 2 * yysize; + if (! (yysize <= *yymsg_alloc + && *yymsg_alloc <= YYSTACK_ALLOC_MAXIMUM)) + *yymsg_alloc = YYSTACK_ALLOC_MAXIMUM; + return 1; + } + + /* Avoid sprintf, as that infringes on the user's name space. + Don't have undefined behavior even if the translation + produced a string with the wrong number of "%s"s. */ + { + char *yyp = *yymsg; + int yyi = 0; + while ((*yyp = *yyformat) != '\0') + if (*yyp == '%' && yyformat[1] == 's' && yyi < yycount) + { + yyp += yytnamerr (yyp, yyarg[yyi++]); + yyformat += 2; + } + else + { + yyp++; + yyformat++; + } + } + return 0; +} +#endif /* YYERROR_VERBOSE */ + +/*-----------------------------------------------. +| Release the memory associated to this symbol. | +`-----------------------------------------------*/ + +static void +yydestruct (const char *yymsg, int yytype, YYSTYPE *yyvaluep) +{ + YYUSE (yyvaluep); + if (!yymsg) + yymsg = "Deleting"; + YY_SYMBOL_PRINT (yymsg, yytype, yyvaluep, yylocationp); + + YY_IGNORE_MAYBE_UNINITIALIZED_BEGIN + YYUSE (yytype); + YY_IGNORE_MAYBE_UNINITIALIZED_END +} + + + + +/* The lookahead symbol. */ +int yychar; + +/* The semantic value of the lookahead symbol. */ +YYSTYPE yylval; +/* Number of syntax errors so far. */ +int yynerrs; + + +/*----------. +| yyparse. | +`----------*/ + +int +yyparse (void) +{ + int yystate; + /* Number of tokens to shift before error messages enabled. */ + int yyerrstatus; + + /* The stacks and their tools: + 'yyss': related to states. + 'yyvs': related to semantic values. + + Refer to the stacks through separate pointers, to allow yyoverflow + to reallocate them elsewhere. */ + + /* The state stack. */ + yytype_int16 yyssa[YYINITDEPTH]; + yytype_int16 *yyss; + yytype_int16 *yyssp; + + /* The semantic value stack. */ + YYSTYPE yyvsa[YYINITDEPTH]; + YYSTYPE *yyvs; + YYSTYPE *yyvsp; + + YYSIZE_T yystacksize; + + int yyn; + int yyresult; + /* Lookahead token as an internal (translated) token number. */ + int yytoken = 0; + /* The variables used to return semantic value and location from the + action routines. */ + YYSTYPE yyval; + +#if YYERROR_VERBOSE + /* Buffer for error messages, and its allocated size. */ + char yymsgbuf[128]; + char *yymsg = yymsgbuf; + YYSIZE_T yymsg_alloc = sizeof yymsgbuf; +#endif + +#define YYPOPSTACK(N) (yyvsp -= (N), yyssp -= (N)) + + /* The number of symbols on the RHS of the reduced rule. + Keep to zero when no symbol should be popped. */ + int yylen = 0; + + yyssp = yyss = yyssa; + yyvsp = yyvs = yyvsa; + yystacksize = YYINITDEPTH; + + YYDPRINTF ((stderr, "Starting parse\n")); + + yystate = 0; + yyerrstatus = 0; + yynerrs = 0; + yychar = YYEMPTY; /* Cause a token to be read. */ + goto yysetstate; + +/*------------------------------------------------------------. +| yynewstate -- Push a new state, which is found in yystate. | +`------------------------------------------------------------*/ + yynewstate: + /* In all cases, when you get here, the value and location stacks + have just been pushed. So pushing a state here evens the stacks. */ + yyssp++; + + yysetstate: + *yyssp = yystate; + + if (yyss + yystacksize - 1 <= yyssp) + { + /* Get the current used size of the three stacks, in elements. */ + YYSIZE_T yysize = yyssp - yyss + 1; + +#ifdef yyoverflow + { + /* Give user a chance to reallocate the stack. Use copies of + these so that the &'s don't force the real ones into + memory. */ + YYSTYPE *yyvs1 = yyvs; + yytype_int16 *yyss1 = yyss; + + /* Each stack pointer address is followed by the size of the + data in use in that stack, in bytes. This used to be a + conditional around just the two extra args, but that might + be undefined if yyoverflow is a macro. */ + yyoverflow (YY_("memory exhausted"), + &yyss1, yysize * sizeof (*yyssp), + &yyvs1, yysize * sizeof (*yyvsp), + &yystacksize); + + yyss = yyss1; + yyvs = yyvs1; + } +#else /* no yyoverflow */ +# ifndef YYSTACK_RELOCATE + goto yyexhaustedlab; +# else + /* Extend the stack our own way. */ + if (YYMAXDEPTH <= yystacksize) + goto yyexhaustedlab; + yystacksize *= 2; + if (YYMAXDEPTH < yystacksize) + yystacksize = YYMAXDEPTH; + + { + yytype_int16 *yyss1 = yyss; + union yyalloc *yyptr = + (union yyalloc *) YYSTACK_ALLOC (YYSTACK_BYTES (yystacksize)); + if (! yyptr) + goto yyexhaustedlab; + YYSTACK_RELOCATE (yyss_alloc, yyss); + YYSTACK_RELOCATE (yyvs_alloc, yyvs); +# undef YYSTACK_RELOCATE + if (yyss1 != yyssa) + YYSTACK_FREE (yyss1); + } +# endif +#endif /* no yyoverflow */ + + yyssp = yyss + yysize - 1; + yyvsp = yyvs + yysize - 1; + + YYDPRINTF ((stderr, "Stack size increased to %lu\n", + (unsigned long int) yystacksize)); + + if (yyss + yystacksize - 1 <= yyssp) + YYABORT; + } + + YYDPRINTF ((stderr, "Entering state %d\n", yystate)); + + if (yystate == YYFINAL) + YYACCEPT; + + goto yybackup; + +/*-----------. +| yybackup. | +`-----------*/ +yybackup: + + /* Do appropriate processing given the current state. Read a + lookahead token if we need one and don't already have one. */ + + /* First try to decide what to do without reference to lookahead token. */ + yyn = yypact[yystate]; + if (yypact_value_is_default (yyn)) + goto yydefault; + + /* Not known => get a lookahead token if don't already have one. */ + + /* YYCHAR is either YYEMPTY or YYEOF or a valid lookahead symbol. */ + if (yychar == YYEMPTY) + { + YYDPRINTF ((stderr, "Reading a token: ")); + yychar = yylex (); + } + + if (yychar <= YYEOF) + { + yychar = yytoken = YYEOF; + YYDPRINTF ((stderr, "Now at end of input.\n")); + } + else + { + yytoken = YYTRANSLATE (yychar); + YY_SYMBOL_PRINT ("Next token is", yytoken, &yylval, &yylloc); + } + + /* If the proper action on seeing token YYTOKEN is to reduce or to + detect an error, take that action. */ + yyn += yytoken; + if (yyn < 0 || YYLAST < yyn || yycheck[yyn] != yytoken) + goto yydefault; + yyn = yytable[yyn]; + if (yyn <= 0) + { + if (yytable_value_is_error (yyn)) + goto yyerrlab; + yyn = -yyn; + goto yyreduce; + } + + /* Count tokens shifted since error; after three, turn off error + status. */ + if (yyerrstatus) + yyerrstatus--; + + /* Shift the lookahead token. */ + YY_SYMBOL_PRINT ("Shifting", yytoken, &yylval, &yylloc); + + /* Discard the shifted token. */ + yychar = YYEMPTY; + + yystate = yyn; + YY_IGNORE_MAYBE_UNINITIALIZED_BEGIN + *++yyvsp = yylval; + YY_IGNORE_MAYBE_UNINITIALIZED_END + + goto yynewstate; + + +/*-----------------------------------------------------------. +| yydefault -- do the default action for the current state. | +`-----------------------------------------------------------*/ +yydefault: + yyn = yydefact[yystate]; + if (yyn == 0) + goto yyerrlab; + goto yyreduce; + + +/*-----------------------------. +| yyreduce -- Do a reduction. | +`-----------------------------*/ +yyreduce: + /* yyn is the number of a rule to reduce with. */ + yylen = yyr2[yyn]; + + /* If YYLEN is nonzero, implement the default value of the action: + '$$ = $1'. + + Otherwise, the following line sets YYVAL to garbage. + This behavior is undocumented and Bison + users should not rely upon it. Assigning to YYVAL + unconditionally makes the parser a bit smaller, and it avoids a + GCC warning that YYVAL may be used uninitialized. */ + yyval = yyvsp[1-yylen]; + + + YY_REDUCE_PRINT (yyn); + switch (yyn) + { + case 2: +#line 193 "../src/parse/parser.ypp" /* yacc.c:1646 */ + { + } +#line 1438 "src/parse/parser.cc" /* yacc.c:1646 */ + break; + + case 3: +#line 196 "../src/parse/parser.ypp" /* yacc.c:1646 */ + { + foundRules = true; + } +#line 1446 "src/parse/parser.cc" /* yacc.c:1646 */ + break; + + case 5: +#line 204 "../src/parse/parser.ypp" /* yacc.c:1646 */ + { + if (!symbol_table.insert (std::make_pair (* (yyvsp[-3].str), (yyvsp[-1].regexp))).second) + { + in->fatal("sym already defined"); + } + delete (yyvsp[-3].str); + } +#line 1458 "src/parse/parser.cc" /* yacc.c:1646 */ + break; + + case 6: +#line 212 "../src/parse/parser.ypp" /* yacc.c:1646 */ + { + if (!symbol_table.insert (std::make_pair (* (yyvsp[-2].str), (yyvsp[-1].regexp))).second) + { + in->fatal("sym already defined"); + } + delete (yyvsp[-2].str); + } +#line 1470 "src/parse/parser.cc" /* yacc.c:1646 */ + break; + + case 7: +#line 220 "../src/parse/parser.ypp" /* yacc.c:1646 */ + { + in->fatal("trailing contexts are not allowed in named definitions"); + } +#line 1478 "src/parse/parser.cc" /* yacc.c:1646 */ + break; + + case 8: +#line 224 "../src/parse/parser.ypp" /* yacc.c:1646 */ + { + in->fatal("trailing contexts are not allowed in named definitions"); + } +#line 1486 "src/parse/parser.cc" /* yacc.c:1646 */ + break; + + case 9: +#line 227 "../src/parse/parser.ypp" /* yacc.c:1646 */ + {} +#line 1492 "src/parse/parser.cc" /* yacc.c:1646 */ + break; + + case 10: +#line 232 "../src/parse/parser.ypp" /* yacc.c:1646 */ + { + if (opts->cFlag) + { + in->fatal("condition or '<*>' required when using -c switch"); + } + RuleOp * rule = new RuleOp + ( (yyvsp[0].code)->loc + , (yyvsp[-2].regexp) + , (yyvsp[-1].regexp) + , rank_counter.next () + , (yyvsp[0].code) + , NULL + ); + spec.add (rule); + } +#line 1512 "src/parse/parser.cc" /* yacc.c:1646 */ + break; + + case 11: +#line 248 "../src/parse/parser.ypp" /* yacc.c:1646 */ + { + if (opts->cFlag) + in->fatal("condition or '<*>' required when using -c switch"); + RuleOp * def = new RuleOp + ( (yyvsp[0].code)->loc + , in->mkDefault () + , new NullOp + , rule_rank_t::def () + , (yyvsp[0].code) + , NULL + ); + if (!spec.add_def (def)) + { + in->fatal("code to default rule is already defined"); + } + } +#line 1533 "src/parse/parser.cc" /* yacc.c:1646 */ + break; + + case 12: +#line 265 "../src/parse/parser.ypp" /* yacc.c:1646 */ + { + context_rule ((yyvsp[-5].clist), (yyvsp[0].code)->loc, (yyvsp[-3].regexp), (yyvsp[-2].regexp), (yyvsp[0].code), (yyvsp[-1].str)); + } +#line 1541 "src/parse/parser.cc" /* yacc.c:1646 */ + break; + + case 13: +#line 269 "../src/parse/parser.ypp" /* yacc.c:1646 */ + { + assert((yyvsp[0].str)); + Loc loc (in->get_fname (), in->get_cline ()); + context_rule ((yyvsp[-5].clist), loc, (yyvsp[-3].regexp), (yyvsp[-2].regexp), NULL, (yyvsp[0].str)); + } +#line 1551 "src/parse/parser.cc" /* yacc.c:1646 */ + break; + + case 14: +#line 275 "../src/parse/parser.ypp" /* yacc.c:1646 */ + { + context_none((yyvsp[-4].clist)); + delete (yyvsp[-1].str); + } +#line 1560 "src/parse/parser.cc" /* yacc.c:1646 */ + break; + + case 15: +#line 280 "../src/parse/parser.ypp" /* yacc.c:1646 */ + { + assert((yyvsp[0].str)); + context_none((yyvsp[-4].clist)); + delete (yyvsp[0].str); + } +#line 1570 "src/parse/parser.cc" /* yacc.c:1646 */ + break; + + case 16: +#line 286 "../src/parse/parser.ypp" /* yacc.c:1646 */ + { + default_rule((yyvsp[-3].clist), (yyvsp[0].code)); + } +#line 1578 "src/parse/parser.cc" /* yacc.c:1646 */ + break; + + case 17: +#line 290 "../src/parse/parser.ypp" /* yacc.c:1646 */ + { + context_check(NULL); + RuleOp * rule = new RuleOp + ( (yyvsp[0].code)->loc + , (yyvsp[-3].regexp) + , (yyvsp[-2].regexp) + , rank_counter.next () + , (yyvsp[0].code) + , (yyvsp[-1].str) + ); + specStar.push_back (rule); + delete (yyvsp[-1].str); + } +#line 1596 "src/parse/parser.cc" /* yacc.c:1646 */ + break; + + case 18: +#line 304 "../src/parse/parser.ypp" /* yacc.c:1646 */ + { + assert((yyvsp[0].str)); + context_check(NULL); + Loc loc (in->get_fname (), in->get_cline ()); + RuleOp * rule = new RuleOp + ( loc + , (yyvsp[-3].regexp) + , (yyvsp[-2].regexp) + , rank_counter.next () + , NULL + , (yyvsp[0].str) + ); + specStar.push_back (rule); + delete (yyvsp[0].str); + } +#line 1616 "src/parse/parser.cc" /* yacc.c:1646 */ + break; + + case 19: +#line 320 "../src/parse/parser.ypp" /* yacc.c:1646 */ + { + context_none(NULL); + delete (yyvsp[-1].str); + } +#line 1625 "src/parse/parser.cc" /* yacc.c:1646 */ + break; + + case 20: +#line 325 "../src/parse/parser.ypp" /* yacc.c:1646 */ + { + assert((yyvsp[0].str)); + context_none(NULL); + delete (yyvsp[0].str); + } +#line 1635 "src/parse/parser.cc" /* yacc.c:1646 */ + break; + + case 21: +#line 331 "../src/parse/parser.ypp" /* yacc.c:1646 */ + { + if (star_default) + { + in->fatal ("code to default rule '*' is already defined"); + } + star_default = new RuleOp + ( (yyvsp[0].code)->loc + , in->mkDefault () + , new NullOp + , rule_rank_t::def () + , (yyvsp[0].code) + , NULL + ); + } +#line 1654 "src/parse/parser.cc" /* yacc.c:1646 */ + break; + + case 22: +#line 346 "../src/parse/parser.ypp" /* yacc.c:1646 */ + { + context_check(NULL); + if (specNone) + { + in->fatal("code to handle illegal condition already defined"); + } + (yyval.regexp) = specNone = new RuleOp + ( (yyvsp[0].code)->loc + , new NullOp + , new NullOp + , rank_counter.next () + , (yyvsp[0].code) + , (yyvsp[-1].str) + ); + delete (yyvsp[-1].str); + } +#line 1675 "src/parse/parser.cc" /* yacc.c:1646 */ + break; + + case 23: +#line 363 "../src/parse/parser.ypp" /* yacc.c:1646 */ + { + assert((yyvsp[0].str)); + context_check(NULL); + if (specNone) + { + in->fatal("code to handle illegal condition already defined"); + } + Loc loc (in->get_fname (), in->get_cline ()); + (yyval.regexp) = specNone = new RuleOp + ( loc + , new NullOp + , new NullOp + , rank_counter.next () + , NULL + , (yyvsp[0].str) + ); + delete (yyvsp[0].str); + } +#line 1698 "src/parse/parser.cc" /* yacc.c:1646 */ + break; + + case 24: +#line 382 "../src/parse/parser.ypp" /* yacc.c:1646 */ + { + CondList *clist = new CondList(); + clist->insert("*"); + setup_rule(clist, (yyvsp[0].code)); + } +#line 1708 "src/parse/parser.cc" /* yacc.c:1646 */ + break; + + case 25: +#line 388 "../src/parse/parser.ypp" /* yacc.c:1646 */ + { + setup_rule((yyvsp[-2].clist), (yyvsp[0].code)); + } +#line 1716 "src/parse/parser.cc" /* yacc.c:1646 */ + break; + + case 26: +#line 395 "../src/parse/parser.ypp" /* yacc.c:1646 */ + { + in->fatal("unnamed condition not supported"); + } +#line 1724 "src/parse/parser.cc" /* yacc.c:1646 */ + break; + + case 27: +#line 399 "../src/parse/parser.ypp" /* yacc.c:1646 */ + { + (yyval.clist) = (yyvsp[0].clist); + } +#line 1732 "src/parse/parser.cc" /* yacc.c:1646 */ + break; + + case 28: +#line 406 "../src/parse/parser.ypp" /* yacc.c:1646 */ + { + (yyval.clist) = new CondList(); + (yyval.clist)->insert(* (yyvsp[0].str)); + delete (yyvsp[0].str); + } +#line 1742 "src/parse/parser.cc" /* yacc.c:1646 */ + break; + + case 29: +#line 412 "../src/parse/parser.ypp" /* yacc.c:1646 */ + { + (yyvsp[-2].clist)->insert(* (yyvsp[0].str)); + delete (yyvsp[0].str); + (yyval.clist) = (yyvsp[-2].clist); + } +#line 1752 "src/parse/parser.cc" /* yacc.c:1646 */ + break; + + case 30: +#line 421 "../src/parse/parser.ypp" /* yacc.c:1646 */ + { + (yyval.str) = NULL; + } +#line 1760 "src/parse/parser.cc" /* yacc.c:1646 */ + break; + + case 31: +#line 425 "../src/parse/parser.ypp" /* yacc.c:1646 */ + { + (yyval.str) = (yyvsp[0].str); + } +#line 1768 "src/parse/parser.cc" /* yacc.c:1646 */ + break; + + case 32: +#line 432 "../src/parse/parser.ypp" /* yacc.c:1646 */ + { + (yyval.regexp) = new NullOp; + } +#line 1776 "src/parse/parser.cc" /* yacc.c:1646 */ + break; + + case 33: +#line 436 "../src/parse/parser.ypp" /* yacc.c:1646 */ + { + (yyval.regexp) = (yyvsp[0].regexp); + } +#line 1784 "src/parse/parser.cc" /* yacc.c:1646 */ + break; + + case 34: +#line 443 "../src/parse/parser.ypp" /* yacc.c:1646 */ + { + (yyval.regexp) = (yyvsp[0].regexp); + } +#line 1792 "src/parse/parser.cc" /* yacc.c:1646 */ + break; + + case 35: +#line 447 "../src/parse/parser.ypp" /* yacc.c:1646 */ + { + (yyval.regexp) = mkAlt((yyvsp[-2].regexp), (yyvsp[0].regexp)); + } +#line 1800 "src/parse/parser.cc" /* yacc.c:1646 */ + break; + + case 36: +#line 454 "../src/parse/parser.ypp" /* yacc.c:1646 */ + { + (yyval.regexp) = (yyvsp[0].regexp); + } +#line 1808 "src/parse/parser.cc" /* yacc.c:1646 */ + break; + + case 37: +#line 458 "../src/parse/parser.ypp" /* yacc.c:1646 */ + { + (yyval.regexp) = in->mkDiff((yyvsp[-2].regexp), (yyvsp[0].regexp)); + } +#line 1816 "src/parse/parser.cc" /* yacc.c:1646 */ + break; + + case 38: +#line 465 "../src/parse/parser.ypp" /* yacc.c:1646 */ + { + (yyval.regexp) = (yyvsp[0].regexp); + } +#line 1824 "src/parse/parser.cc" /* yacc.c:1646 */ + break; + + case 39: +#line 469 "../src/parse/parser.ypp" /* yacc.c:1646 */ + { + (yyval.regexp) = new CatOp((yyvsp[-1].regexp), (yyvsp[0].regexp)); + } +#line 1832 "src/parse/parser.cc" /* yacc.c:1646 */ + break; + + case 40: +#line 476 "../src/parse/parser.ypp" /* yacc.c:1646 */ + { + (yyval.regexp) = (yyvsp[0].regexp); + } +#line 1840 "src/parse/parser.cc" /* yacc.c:1646 */ + break; + + case 41: +#line 480 "../src/parse/parser.ypp" /* yacc.c:1646 */ + { + switch((yyvsp[0].op)) + { + case '*': + (yyval.regexp) = new CloseOp((yyvsp[-1].regexp)); + break; + case '+': + (yyval.regexp) = new CatOp (new CloseOp((yyvsp[-1].regexp)), (yyvsp[-1].regexp)); + break; + case '?': + (yyval.regexp) = mkAlt((yyvsp[-1].regexp), new NullOp()); + break; + } + } +#line 1859 "src/parse/parser.cc" /* yacc.c:1646 */ + break; + + case 42: +#line 495 "../src/parse/parser.ypp" /* yacc.c:1646 */ + { + if ((yyvsp[0].extop).max == std::numeric_limits::max()) + { + (yyval.regexp) = repeat_from ((yyvsp[-1].regexp), (yyvsp[0].extop).min); + } + else if ((yyvsp[0].extop).min == (yyvsp[0].extop).max) + { + (yyval.regexp) = repeat ((yyvsp[-1].regexp), (yyvsp[0].extop).min); + } + else + { + (yyval.regexp) = repeat_from_to ((yyvsp[-1].regexp), (yyvsp[0].extop).min, (yyvsp[0].extop).max); + } + (yyval.regexp) = (yyval.regexp) ? (yyval.regexp) : new NullOp; + } +#line 1879 "src/parse/parser.cc" /* yacc.c:1646 */ + break; + + case 43: +#line 514 "../src/parse/parser.ypp" /* yacc.c:1646 */ + { + (yyval.op) = (yyvsp[0].op); + } +#line 1887 "src/parse/parser.cc" /* yacc.c:1646 */ + break; + + case 44: +#line 518 "../src/parse/parser.ypp" /* yacc.c:1646 */ + { + (yyval.op) = (yyvsp[0].op); + } +#line 1895 "src/parse/parser.cc" /* yacc.c:1646 */ + break; + + case 45: +#line 522 "../src/parse/parser.ypp" /* yacc.c:1646 */ + { + (yyval.op) = ((yyvsp[-1].op) == (yyvsp[0].op)) ? (yyvsp[-1].op) : '*'; + } +#line 1903 "src/parse/parser.cc" /* yacc.c:1646 */ + break; + + case 46: +#line 526 "../src/parse/parser.ypp" /* yacc.c:1646 */ + { + (yyval.op) = ((yyvsp[-1].op) == (yyvsp[0].op)) ? (yyvsp[-1].op) : '*'; + } +#line 1911 "src/parse/parser.cc" /* yacc.c:1646 */ + break; + + case 47: +#line 533 "../src/parse/parser.ypp" /* yacc.c:1646 */ + { + symbol_table_t::iterator i = symbol_table.find (* (yyvsp[0].str)); + delete (yyvsp[0].str); + if (i == symbol_table.end ()) + { + in->fatal("can't find symbol"); + } + (yyval.regexp) = i->second; + } +#line 1925 "src/parse/parser.cc" /* yacc.c:1646 */ + break; + + case 48: +#line 543 "../src/parse/parser.ypp" /* yacc.c:1646 */ + { + (yyval.regexp) = (yyvsp[0].regexp); + } +#line 1933 "src/parse/parser.cc" /* yacc.c:1646 */ + break; + + case 49: +#line 547 "../src/parse/parser.ypp" /* yacc.c:1646 */ + { + (yyval.regexp) = (yyvsp[-1].regexp); + } +#line 1941 "src/parse/parser.cc" /* yacc.c:1646 */ + break; + + +#line 1945 "src/parse/parser.cc" /* yacc.c:1646 */ + default: break; + } + /* User semantic actions sometimes alter yychar, and that requires + that yytoken be updated with the new translation. We take the + approach of translating immediately before every use of yytoken. + One alternative is translating here after every semantic action, + but that translation would be missed if the semantic action invokes + YYABORT, YYACCEPT, or YYERROR immediately after altering yychar or + if it invokes YYBACKUP. In the case of YYABORT or YYACCEPT, an + incorrect destructor might then be invoked immediately. In the + case of YYERROR or YYBACKUP, subsequent parser actions might lead + to an incorrect destructor call or verbose syntax error message + before the lookahead is translated. */ + YY_SYMBOL_PRINT ("-> $$ =", yyr1[yyn], &yyval, &yyloc); + + YYPOPSTACK (yylen); + yylen = 0; + YY_STACK_PRINT (yyss, yyssp); + + *++yyvsp = yyval; + + /* Now 'shift' the result of the reduction. Determine what state + that goes to, based on the state we popped back to and the rule + number reduced by. */ + + yyn = yyr1[yyn]; + + yystate = yypgoto[yyn - YYNTOKENS] + *yyssp; + if (0 <= yystate && yystate <= YYLAST && yycheck[yystate] == *yyssp) + yystate = yytable[yystate]; + else + yystate = yydefgoto[yyn - YYNTOKENS]; + + goto yynewstate; + + +/*--------------------------------------. +| yyerrlab -- here on detecting error. | +`--------------------------------------*/ +yyerrlab: + /* Make sure we have latest lookahead translation. See comments at + user semantic actions for why this is necessary. */ + yytoken = yychar == YYEMPTY ? YYEMPTY : YYTRANSLATE (yychar); + + /* If not already recovering from an error, report this error. */ + if (!yyerrstatus) + { + ++yynerrs; +#if ! YYERROR_VERBOSE + yyerror (YY_("syntax error")); +#else +# define YYSYNTAX_ERROR yysyntax_error (&yymsg_alloc, &yymsg, \ + yyssp, yytoken) + { + char const *yymsgp = YY_("syntax error"); + int yysyntax_error_status; + yysyntax_error_status = YYSYNTAX_ERROR; + if (yysyntax_error_status == 0) + yymsgp = yymsg; + else if (yysyntax_error_status == 1) + { + if (yymsg != yymsgbuf) + YYSTACK_FREE (yymsg); + yymsg = (char *) YYSTACK_ALLOC (yymsg_alloc); + if (!yymsg) + { + yymsg = yymsgbuf; + yymsg_alloc = sizeof yymsgbuf; + yysyntax_error_status = 2; + } + else + { + yysyntax_error_status = YYSYNTAX_ERROR; + yymsgp = yymsg; + } + } + yyerror (yymsgp); + if (yysyntax_error_status == 2) + goto yyexhaustedlab; + } +# undef YYSYNTAX_ERROR +#endif + } + + + + if (yyerrstatus == 3) + { + /* If just tried and failed to reuse lookahead token after an + error, discard it. */ + + if (yychar <= YYEOF) + { + /* Return failure if at end of input. */ + if (yychar == YYEOF) + YYABORT; + } + else + { + yydestruct ("Error: discarding", + yytoken, &yylval); + yychar = YYEMPTY; + } + } + + /* Else will try to reuse lookahead token after shifting the error + token. */ + goto yyerrlab1; + + +/*---------------------------------------------------. +| yyerrorlab -- error raised explicitly by YYERROR. | +`---------------------------------------------------*/ +yyerrorlab: + + /* Pacify compilers like GCC when the user code never invokes + YYERROR and the label yyerrorlab therefore never appears in user + code. */ + if (/*CONSTCOND*/ 0) + goto yyerrorlab; + + /* Do not reclaim the symbols of the rule whose action triggered + this YYERROR. */ + YYPOPSTACK (yylen); + yylen = 0; + YY_STACK_PRINT (yyss, yyssp); + yystate = *yyssp; + goto yyerrlab1; + + +/*-------------------------------------------------------------. +| yyerrlab1 -- common code for both syntax error and YYERROR. | +`-------------------------------------------------------------*/ +yyerrlab1: + yyerrstatus = 3; /* Each real token shifted decrements this. */ + + for (;;) + { + yyn = yypact[yystate]; + if (!yypact_value_is_default (yyn)) + { + yyn += YYTERROR; + if (0 <= yyn && yyn <= YYLAST && yycheck[yyn] == YYTERROR) + { + yyn = yytable[yyn]; + if (0 < yyn) + break; + } + } + + /* Pop the current state because it cannot handle the error token. */ + if (yyssp == yyss) + YYABORT; + + + yydestruct ("Error: popping", + yystos[yystate], yyvsp); + YYPOPSTACK (1); + yystate = *yyssp; + YY_STACK_PRINT (yyss, yyssp); + } + + YY_IGNORE_MAYBE_UNINITIALIZED_BEGIN + *++yyvsp = yylval; + YY_IGNORE_MAYBE_UNINITIALIZED_END + + + /* Shift the error token. */ + YY_SYMBOL_PRINT ("Shifting", yystos[yyn], yyvsp, yylsp); + + yystate = yyn; + goto yynewstate; + + +/*-------------------------------------. +| yyacceptlab -- YYACCEPT comes here. | +`-------------------------------------*/ +yyacceptlab: + yyresult = 0; + goto yyreturn; + +/*-----------------------------------. +| yyabortlab -- YYABORT comes here. | +`-----------------------------------*/ +yyabortlab: + yyresult = 1; + goto yyreturn; + +#if !defined yyoverflow || YYERROR_VERBOSE +/*-------------------------------------------------. +| yyexhaustedlab -- memory exhaustion comes here. | +`-------------------------------------------------*/ +yyexhaustedlab: + yyerror (YY_("memory exhausted")); + yyresult = 2; + /* Fall through. */ +#endif + +yyreturn: + if (yychar != YYEMPTY) + { + /* Make sure we have latest lookahead translation. See comments at + user semantic actions for why this is necessary. */ + yytoken = YYTRANSLATE (yychar); + yydestruct ("Cleanup: discarding lookahead", + yytoken, &yylval); + } + /* Do not reclaim the symbols of the rule whose action triggered + this YYABORT or YYACCEPT. */ + YYPOPSTACK (yylen); + YY_STACK_PRINT (yyss, yyssp); + while (yyssp != yyss) + { + yydestruct ("Cleanup: popping", + yystos[*yyssp], yyvsp); + YYPOPSTACK (1); + } +#ifndef yyoverflow + if (yyss != yyssa) + YYSTACK_FREE (yyss); +#endif +#if YYERROR_VERBOSE + if (yymsg != yymsgbuf) + YYSTACK_FREE (yymsg); +#endif + return yyresult; +} +#line 552 "../src/parse/parser.ypp" /* yacc.c:1906 */ + + +extern "C" { +void yyerror(const char* s) +{ + in->fatal(s); +} + +int yylex(){ + return in ? in->scan() : 0; +} +} // end extern "C" + +namespace re2c +{ + +void parse(Scanner& i, Output & o) +{ + std::map > dfa_map; + ScannerState rules_state; + + in = &i; + + o.source.wversion_time () + .wline_info (in->get_cline (), in->get_fname ().c_str ()); + if (opts->target == opt_t::SKELETON) + { + Skeleton::emit_prolog (o.source); + } + + Enc encodingOld = opts->encoding; + + while ((parseMode = i.echo()) != Scanner::Stop) + { + o.source.new_block (); + bool bPrologBrace = false; + ScannerState curr_state; + + i.save_state(curr_state); + foundRules = false; + + if (opts->rFlag && parseMode == Scanner::Rules && dfa_map.size()) + { + in->fatal("cannot have a second 'rules:re2c' block"); + } + if (parseMode == Scanner::Reuse) + { + if (dfa_map.empty()) + { + in->fatal("got 'use:re2c' without 'rules:re2c'"); + } + } + else if (parseMode == Scanner::Rules) + { + i.save_state(rules_state); + } + else + { + dfa_map.clear(); + } + rank_counter.reset (); + spec.clear (); + in->set_in_parse(true); + yyparse(); + in->set_in_parse(false); + if (opts->rFlag && parseMode == Scanner::Reuse) + { + if (foundRules || opts->encoding != encodingOld) + { + // Re-parse rules + parseMode = Scanner::Parse; + i.restore_state(rules_state); + i.reuse(); + dfa_map.clear(); + parse_cleanup(); + spec.clear (); + rank_counter.reset (); + in->set_in_parse(true); + yyparse(); + in->set_in_parse(false); + + // Now append potential new rules + i.restore_state(curr_state); + parseMode = Scanner::Parse; + in->set_in_parse(true); + yyparse(); + in->set_in_parse(false); + } + encodingOld = opts->encoding; + } + o.source.set_block_line (in->get_cline ()); + uint32_t ind = opts->topIndent; + if (opts->cFlag) + { + SpecMap::iterator it; + SetupMap::const_iterator itRuleSetup; + + if (parseMode != Scanner::Reuse) + { + // <*> rules must have the lowest priority + // now that all rules have been parsed, we can fix it + for (RuleOpList::const_iterator itOp = specStar.begin(); itOp != specStar.end(); ++itOp) + { + (*itOp)->rank = rank_counter.next (); + } + // merge <*> rules to all conditions + for (it = specMap.begin(); it != specMap.end(); ++it) + { + for (RuleOpList::const_iterator itOp = specStar.begin(); itOp != specStar.end(); ++itOp) + { + it->second.add (*itOp); + } + if (star_default) + { + it->second.add_def (star_default); + } + } + + if (specNone) + { + specMap["0"].add (specNone); + // Note that "0" inserts first, which is important. + condnames.insert (condnames.begin (), "0"); + } + o.types = condnames; + } + + size_t nCount = specMap.size(); + + for (it = specMap.begin(); it != specMap.end(); ++it) + { + if (parseMode != Scanner::Reuse) + { + itRuleSetup = ruleSetupMap.find(it->first); + if (itRuleSetup != ruleSetupMap.end()) + { + yySetupRule = itRuleSetup->second.second; + } + else + { + itRuleSetup = ruleSetupMap.find("*"); + if (itRuleSetup != ruleSetupMap.end()) + { + yySetupRule = itRuleSetup->second.second; + } + else + { + yySetupRule = ""; + } + } + + dfa_map[it->first] = compile(it->second, o, it->first, opts->encoding.nCodeUnits ()); + } + if (parseMode != Scanner::Rules && dfa_map.find(it->first) != dfa_map.end()) + { + dfa_map[it->first]->emit(o, ind, !--nCount, bPrologBrace); + } + } + } + else + { + if (spec.re || !dfa_map.empty()) + { + if (parseMode != Scanner::Reuse) + { + dfa_map[""] = compile(spec, o, "", opts->encoding.nCodeUnits ()); + } + if (parseMode != Scanner::Rules && dfa_map.find("") != dfa_map.end()) + { + dfa_map[""]->emit(o, ind, 0, bPrologBrace); + } + } + } + o.source.wline_info (in->get_cline (), in->get_fname ().c_str ()); + /* restore original char handling mode*/ + opts.reset_encoding (encodingOld); + } + + if (opts->cFlag) + { + SetupMap::const_iterator itRuleSetup; + for (itRuleSetup = ruleSetupMap.begin(); itRuleSetup != ruleSetupMap.end(); ++itRuleSetup) + { + if (itRuleSetup->first != "*" && specMap.find(itRuleSetup->first) == specMap.end()) + { + in->fatalf_at(itRuleSetup->second.first, "setup for non existing rule '%s' found", itRuleSetup->first.c_str()); + } + } + if (specMap.size() < ruleSetupMap.size()) + { + uint32_t line = in->get_cline(); + itRuleSetup = ruleSetupMap.find("*"); + if (itRuleSetup != ruleSetupMap.end()) + { + line = itRuleSetup->second.first; + } + in->fatalf_at(line, "setup for all rules with '*' not possible when all rules are setup explicitly"); + } + } + + if (opts->target == opt_t::SKELETON) + { + Skeleton::emit_epilog (o.source, o.skeletons); + } + + parse_cleanup(); + in = NULL; +} + +void parse_cleanup() +{ + RegExp::vFreeList.clear(); + Range::vFreeList.clear(); + RangeSuffix::freeList.clear(); + Code::freelist.clear(); + symbol_table.clear (); + condnames.clear (); + specMap.clear(); + specStar.clear(); + star_default = NULL; + specNone = NULL; +} + +} // end namespace re2c diff --git a/tools/re2c/src/parse/parser.h b/tools/re2c/src/parse/parser.h new file mode 100644 index 000000000..8901d7924 --- /dev/null +++ b/tools/re2c/src/parse/parser.h @@ -0,0 +1,28 @@ +#ifndef _RE2C_PARSE_PARSER_ +#define _RE2C_PARSE_PARSER_ + +#include +#include + +#include "src/codegen/output.h" +#include "src/ir/regexp/regexp.h" +#include "src/ir/regexp/regexp_rule.h" +#include "src/parse/scanner.h" +#include "src/parse/spec.h" + +namespace re2c +{ + +extern void parse(Scanner &, Output &); +extern void parse_cleanup(); + +typedef std::set CondList; +typedef std::list RuleOpList; +typedef std::map SpecMap; +typedef std::map > SetupMap; +typedef std::map DefaultMap; +typedef std::map symbol_table_t; + +} // namespace re2c + +#endif // _RE2C_PARSE_PARSER_ diff --git a/tools/re2c/src/parse/parser.ypp b/tools/re2c/src/parse/parser.ypp new file mode 100644 index 000000000..76540ecc0 --- /dev/null +++ b/tools/re2c/src/parse/parser.ypp @@ -0,0 +1,775 @@ +%{ + +#include "src/util/c99_stdint.h" +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "src/codegen/output.h" +#include "src/conf/opt.h" +#include "src/globals.h" +#include "src/ir/compile.h" +#include "src/ir/adfa/adfa.h" +#include "src/ir/regexp/encoding/enc.h" +#include "src/ir/regexp/encoding/range_suffix.h" +#include "src/ir/regexp/regexp.h" +#include "src/ir/regexp/regexp_cat.h" +#include "src/ir/regexp/regexp_close.h" +#include "src/ir/regexp/regexp_null.h" +#include "src/ir/regexp/regexp_rule.h" +#include "src/ir/rule_rank.h" +#include "src/ir/skeleton/skeleton.h" +#include "src/parse/code.h" +#include "src/parse/extop.h" +#include "src/parse/loc.h" +#include "src/parse/parser.h" +#include "src/parse/scanner.h" +#include "src/parse/spec.h" +#include "src/util/counter.h" +#include "src/util/free_list.h" +#include "src/util/range.h" +#include "src/util/smart_ptr.h" + +#define YYMALLOC malloc +#define YYFREE free + +using namespace re2c; + +extern "C" +{ +int yylex(); +void yyerror(const char*); +} + +static counter_t rank_counter; +static std::vector condnames; +static re2c::SpecMap specMap; +static Spec spec; +static RuleOp *specNone = NULL; +static RuleOpList specStar; +static RuleOp * star_default = NULL; +static Scanner *in = NULL; +static Scanner::ParseMode parseMode; +static SetupMap ruleSetupMap; +static bool foundRules; +static symbol_table_t symbol_table; + +/* Bison version 1.875 emits a definition that is not working + * with several g++ version. Hence we disable it here. + */ +#if defined(__GNUC__) +#define __attribute__(x) +#endif + +void context_check(CondList *clist) +{ + if (!opts->cFlag) + { + delete clist; + in->fatal("conditions are only allowed when using -c switch"); + } +} + +void context_none(CondList *clist) +{ + delete clist; + context_check(NULL); + in->fatal("no expression specified"); +} + +void context_rule + ( CondList * clist + , const Loc & loc + , RegExp * expr + , RegExp * look + , const Code * code + , const std::string * newcond + ) +{ + context_check(clist); + for(CondList::const_iterator it = clist->begin(); it != clist->end(); ++it) + { + if (specMap.find(*it) == specMap.end()) + { + condnames.push_back (*it); + } + + RuleOp * rule = new RuleOp + ( loc + , expr + , look + , rank_counter.next () + , code + , newcond + ); + specMap[*it].add (rule); + } + delete clist; + delete newcond; +} + +void setup_rule(CondList *clist, const Code * code) +{ + assert(clist); + assert(code); + context_check(clist); + for(CondList::const_iterator it = clist->begin(); it != clist->end(); ++it) + { + if (ruleSetupMap.find(*it) != ruleSetupMap.end()) + { + in->fatalf_at(code->loc.line, "code to setup rule '%s' is already defined", it->c_str()); + } + ruleSetupMap[*it] = std::make_pair(code->loc.line, code->text); + } + delete clist; +} + +void default_rule(CondList *clist, const Code * code) +{ + assert(clist); + assert(code); + context_check(clist); + for(CondList::const_iterator it = clist->begin(); it != clist->end(); ++it) + { + RuleOp * def = new RuleOp + ( code->loc + , in->mkDefault () + , new NullOp + , rule_rank_t::def () + , code + , NULL + ); + if (!specMap[*it].add_def (def)) + { + in->fatalf_at(code->loc.line, "code to default rule '%s' is already defined", it->c_str()); + } + } + delete clist; +} + +%} + +%start spec + +%union { + re2c::RegExp * regexp; + const re2c::Code * code; + char op; + re2c::ExtOp extop; + std::string * str; + re2c::CondList * clist; +}; + +%token TOKEN_CLOSE +%token TOKEN_CLOSESIZE +%token TOKEN_CODE +%token TOKEN_CONF +%token TOKEN_ID +%token TOKEN_FID +%token TOKEN_FID_END +%token TOKEN_NOCOND +%token TOKEN_REGEXP +%token TOKEN_SETUP +%token TOKEN_STAR + +%type TOKEN_CLOSE TOKEN_STAR TOKEN_SETUP close +%type TOKEN_CLOSESIZE +%type TOKEN_CODE +%type TOKEN_REGEXP rule look expr diff term factor primary +%type TOKEN_ID TOKEN_FID newcond +%type cond clist + +%% + +spec: + /* empty */ + { + } + | spec rule + { + foundRules = true; + } + | spec decl +; + +decl: + TOKEN_ID '=' expr ';' + { + if (!symbol_table.insert (std::make_pair (* $1, $3)).second) + { + in->fatal("sym already defined"); + } + delete $1; + } + | TOKEN_FID expr TOKEN_FID_END + { + if (!symbol_table.insert (std::make_pair (* $1, $2)).second) + { + in->fatal("sym already defined"); + } + delete $1; + } + | TOKEN_ID '=' expr '/' + { + in->fatal("trailing contexts are not allowed in named definitions"); + } + | TOKEN_FID expr '/' + { + in->fatal("trailing contexts are not allowed in named definitions"); + } + | TOKEN_CONF {} +; + +rule: + expr look TOKEN_CODE + { + if (opts->cFlag) + { + in->fatal("condition or '<*>' required when using -c switch"); + } + RuleOp * rule = new RuleOp + ( $3->loc + , $1 + , $2 + , rank_counter.next () + , $3 + , NULL + ); + spec.add (rule); + } + | TOKEN_STAR TOKEN_CODE /* default rule */ + { + if (opts->cFlag) + in->fatal("condition or '<*>' required when using -c switch"); + RuleOp * def = new RuleOp + ( $2->loc + , in->mkDefault () + , new NullOp + , rule_rank_t::def () + , $2 + , NULL + ); + if (!spec.add_def (def)) + { + in->fatal("code to default rule is already defined"); + } + } + | '<' cond '>' expr look newcond TOKEN_CODE + { + context_rule ($2, $7->loc, $4, $5, $7, $6); + } + | '<' cond '>' expr look ':' newcond + { + assert($7); + Loc loc (in->get_fname (), in->get_cline ()); + context_rule ($2, loc, $4, $5, NULL, $7); + } + | '<' cond '>' look newcond TOKEN_CODE + { + context_none($2); + delete $5; + } + | '<' cond '>' look ':' newcond + { + assert($6); + context_none($2); + delete $6; + } + | '<' cond '>' TOKEN_STAR TOKEN_CODE /* default rule for conditions */ + { + default_rule($2, $5); + } + | '<' TOKEN_STAR '>' expr look newcond TOKEN_CODE + { + context_check(NULL); + RuleOp * rule = new RuleOp + ( $7->loc + , $4 + , $5 + , rank_counter.next () + , $7 + , $6 + ); + specStar.push_back (rule); + delete $6; + } + | '<' TOKEN_STAR '>' expr look ':' newcond + { + assert($7); + context_check(NULL); + Loc loc (in->get_fname (), in->get_cline ()); + RuleOp * rule = new RuleOp + ( loc + , $4 + , $5 + , rank_counter.next () + , NULL + , $7 + ); + specStar.push_back (rule); + delete $7; + } + | '<' TOKEN_STAR '>' look newcond TOKEN_CODE + { + context_none(NULL); + delete $5; + } + | '<' TOKEN_STAR '>' look ':' newcond + { + assert($6); + context_none(NULL); + delete $6; + } + | '<' TOKEN_STAR '>' TOKEN_STAR TOKEN_CODE /* default rule for all conditions */ + { + if (star_default) + { + in->fatal ("code to default rule '*' is already defined"); + } + star_default = new RuleOp + ( $5->loc + , in->mkDefault () + , new NullOp + , rule_rank_t::def () + , $5 + , NULL + ); + } + | TOKEN_NOCOND newcond TOKEN_CODE + { + context_check(NULL); + if (specNone) + { + in->fatal("code to handle illegal condition already defined"); + } + $$ = specNone = new RuleOp + ( $3->loc + , new NullOp + , new NullOp + , rank_counter.next () + , $3 + , $2 + ); + delete $2; + } + | TOKEN_NOCOND ':' newcond + { + assert($3); + context_check(NULL); + if (specNone) + { + in->fatal("code to handle illegal condition already defined"); + } + Loc loc (in->get_fname (), in->get_cline ()); + $$ = specNone = new RuleOp + ( loc + , new NullOp + , new NullOp + , rank_counter.next () + , NULL + , $3 + ); + delete $3; + } + | TOKEN_SETUP TOKEN_STAR '>' TOKEN_CODE + { + CondList *clist = new CondList(); + clist->insert("*"); + setup_rule(clist, $4); + } + | TOKEN_SETUP cond '>' TOKEN_CODE + { + setup_rule($2, $4); + } +; + +cond: + /* empty */ + { + in->fatal("unnamed condition not supported"); + } + | clist + { + $$ = $1; + } + ; + +clist: + TOKEN_ID + { + $$ = new CondList(); + $$->insert(* $1); + delete $1; + } + | clist ',' TOKEN_ID + { + $1->insert(* $3); + delete $3; + $$ = $1; + } +; + +newcond: + /* empty */ + { + $$ = NULL; + } + | '=' '>' TOKEN_ID + { + $$ = $3; + } +; + +look: + /* empty */ + { + $$ = new NullOp; + } + | '/' expr + { + $$ = $2; + } +; + +expr: + diff + { + $$ = $1; + } + | expr '|' diff + { + $$ = mkAlt($1, $3); + } +; + +diff: + term + { + $$ = $1; + } + | diff '\\' term + { + $$ = in->mkDiff($1, $3); + } +; + +term: + factor + { + $$ = $1; + } + | term factor + { + $$ = new CatOp($1, $2); + } +; + +factor: + primary + { + $$ = $1; + } + | primary close + { + switch($2) + { + case '*': + $$ = new CloseOp($1); + break; + case '+': + $$ = new CatOp (new CloseOp($1), $1); + break; + case '?': + $$ = mkAlt($1, new NullOp()); + break; + } + } + | primary TOKEN_CLOSESIZE + { + if ($2.max == std::numeric_limits::max()) + { + $$ = repeat_from ($1, $2.min); + } + else if ($2.min == $2.max) + { + $$ = repeat ($1, $2.min); + } + else + { + $$ = repeat_from_to ($1, $2.min, $2.max); + } + $$ = $$ ? $$ : new NullOp; + } +; + +close: + TOKEN_CLOSE + { + $$ = $1; + } + | TOKEN_STAR + { + $$ = $1; + } + | close TOKEN_CLOSE + { + $$ = ($1 == $2) ? $1 : '*'; + } + | close TOKEN_STAR + { + $$ = ($1 == $2) ? $1 : '*'; + } +; + +primary: + TOKEN_ID + { + symbol_table_t::iterator i = symbol_table.find (* $1); + delete $1; + if (i == symbol_table.end ()) + { + in->fatal("can't find symbol"); + } + $$ = i->second; + } + | TOKEN_REGEXP + { + $$ = $1; + } + | '(' expr ')' + { + $$ = $2; + } +; + +%% + +extern "C" { +void yyerror(const char* s) +{ + in->fatal(s); +} + +int yylex(){ + return in ? in->scan() : 0; +} +} // end extern "C" + +namespace re2c +{ + +void parse(Scanner& i, Output & o) +{ + std::map > dfa_map; + ScannerState rules_state; + + in = &i; + + o.source.wversion_time () + .wline_info (in->get_cline (), in->get_fname ().c_str ()); + if (opts->target == opt_t::SKELETON) + { + Skeleton::emit_prolog (o.source); + } + + Enc encodingOld = opts->encoding; + + while ((parseMode = i.echo()) != Scanner::Stop) + { + o.source.new_block (); + bool bPrologBrace = false; + ScannerState curr_state; + + i.save_state(curr_state); + foundRules = false; + + if (opts->rFlag && parseMode == Scanner::Rules && dfa_map.size()) + { + in->fatal("cannot have a second 'rules:re2c' block"); + } + if (parseMode == Scanner::Reuse) + { + if (dfa_map.empty()) + { + in->fatal("got 'use:re2c' without 'rules:re2c'"); + } + } + else if (parseMode == Scanner::Rules) + { + i.save_state(rules_state); + } + else + { + dfa_map.clear(); + } + rank_counter.reset (); + spec.clear (); + in->set_in_parse(true); + yyparse(); + in->set_in_parse(false); + if (opts->rFlag && parseMode == Scanner::Reuse) + { + if (foundRules || opts->encoding != encodingOld) + { + // Re-parse rules + parseMode = Scanner::Parse; + i.restore_state(rules_state); + i.reuse(); + dfa_map.clear(); + parse_cleanup(); + spec.clear (); + rank_counter.reset (); + in->set_in_parse(true); + yyparse(); + in->set_in_parse(false); + + // Now append potential new rules + i.restore_state(curr_state); + parseMode = Scanner::Parse; + in->set_in_parse(true); + yyparse(); + in->set_in_parse(false); + } + encodingOld = opts->encoding; + } + o.source.set_block_line (in->get_cline ()); + uint32_t ind = opts->topIndent; + if (opts->cFlag) + { + SpecMap::iterator it; + SetupMap::const_iterator itRuleSetup; + + if (parseMode != Scanner::Reuse) + { + // <*> rules must have the lowest priority + // now that all rules have been parsed, we can fix it + for (RuleOpList::const_iterator itOp = specStar.begin(); itOp != specStar.end(); ++itOp) + { + (*itOp)->rank = rank_counter.next (); + } + // merge <*> rules to all conditions + for (it = specMap.begin(); it != specMap.end(); ++it) + { + for (RuleOpList::const_iterator itOp = specStar.begin(); itOp != specStar.end(); ++itOp) + { + it->second.add (*itOp); + } + if (star_default) + { + it->second.add_def (star_default); + } + } + + if (specNone) + { + specMap["0"].add (specNone); + // Note that "0" inserts first, which is important. + condnames.insert (condnames.begin (), "0"); + } + o.types = condnames; + } + + size_t nCount = specMap.size(); + + for (it = specMap.begin(); it != specMap.end(); ++it) + { + if (parseMode != Scanner::Reuse) + { + itRuleSetup = ruleSetupMap.find(it->first); + if (itRuleSetup != ruleSetupMap.end()) + { + yySetupRule = itRuleSetup->second.second; + } + else + { + itRuleSetup = ruleSetupMap.find("*"); + if (itRuleSetup != ruleSetupMap.end()) + { + yySetupRule = itRuleSetup->second.second; + } + else + { + yySetupRule = ""; + } + } + + dfa_map[it->first] = compile(it->second, o, it->first, opts->encoding.nCodeUnits ()); + } + if (parseMode != Scanner::Rules && dfa_map.find(it->first) != dfa_map.end()) + { + dfa_map[it->first]->emit(o, ind, !--nCount, bPrologBrace); + } + } + } + else + { + if (spec.re || !dfa_map.empty()) + { + if (parseMode != Scanner::Reuse) + { + dfa_map[""] = compile(spec, o, "", opts->encoding.nCodeUnits ()); + } + if (parseMode != Scanner::Rules && dfa_map.find("") != dfa_map.end()) + { + dfa_map[""]->emit(o, ind, 0, bPrologBrace); + } + } + } + o.source.wline_info (in->get_cline (), in->get_fname ().c_str ()); + /* restore original char handling mode*/ + opts.reset_encoding (encodingOld); + } + + if (opts->cFlag) + { + SetupMap::const_iterator itRuleSetup; + for (itRuleSetup = ruleSetupMap.begin(); itRuleSetup != ruleSetupMap.end(); ++itRuleSetup) + { + if (itRuleSetup->first != "*" && specMap.find(itRuleSetup->first) == specMap.end()) + { + in->fatalf_at(itRuleSetup->second.first, "setup for non existing rule '%s' found", itRuleSetup->first.c_str()); + } + } + if (specMap.size() < ruleSetupMap.size()) + { + uint32_t line = in->get_cline(); + itRuleSetup = ruleSetupMap.find("*"); + if (itRuleSetup != ruleSetupMap.end()) + { + line = itRuleSetup->second.first; + } + in->fatalf_at(line, "setup for all rules with '*' not possible when all rules are setup explicitly"); + } + } + + if (opts->target == opt_t::SKELETON) + { + Skeleton::emit_epilog (o.source, o.skeletons); + } + + parse_cleanup(); + in = NULL; +} + +void parse_cleanup() +{ + RegExp::vFreeList.clear(); + Range::vFreeList.clear(); + RangeSuffix::freeList.clear(); + Code::freelist.clear(); + symbol_table.clear (); + condnames.clear (); + specMap.clear(); + specStar.clear(); + star_default = NULL; + specNone = NULL; +} + +} // end namespace re2c diff --git a/tools/re2c/src/parse/rules.h b/tools/re2c/src/parse/rules.h new file mode 100644 index 000000000..7815372e1 --- /dev/null +++ b/tools/re2c/src/parse/rules.h @@ -0,0 +1,29 @@ +#ifndef _RE2C_PARSE_RULES_ +#define _RE2C_PARSE_RULES_ + +#include +#include + +#include "src/ir/rule_rank.h" + +namespace re2c +{ + +struct rule_info_t +{ + uint32_t line; + std::set shadow; + bool reachable; + + rule_info_t () + : line (0) + , shadow () + , reachable (false) + {} +}; + +typedef std::map rules_t; + +} // namespace re2c + +#endif // _RE2C_PARSE_RULES_ diff --git a/tools/re2c/src/parse/scanner.cc b/tools/re2c/src/parse/scanner.cc new file mode 100644 index 000000000..09dd2bda4 --- /dev/null +++ b/tools/re2c/src/parse/scanner.cc @@ -0,0 +1,211 @@ +#include +#include +#include +#include +#include +#include + +#include "src/codegen/label.h" +#include "src/codegen/output.h" +#include "src/conf/opt.h" +#include "src/globals.h" +#include "src/parse/scanner.h" +#include "src/util/counter.h" + +// used by Scanner::fatal_at and Scanner::fatalf +#if defined(_MSC_VER) && !defined(vsnprintf) +# define vsnprintf _vsnprintf +#endif + +namespace re2c { + +const uint32_t Scanner::BSIZE = 8192; + +ScannerState::ScannerState () + : tok (NULL) + , ptr (NULL) + , cur (NULL) + , pos (NULL) + , ctx (NULL) + , bot (NULL) + , lim (NULL) + , top (NULL) + , eof (NULL) + , tchar (0) + , tline (0) + , cline (1) + , in_parse (false) + , lexer_state (LEX_NORMAL) +{} + +ScannerState::ScannerState (const ScannerState & s) + : tok (s.tok) + , ptr (s.ptr) + , cur (s.cur) + , pos (s.pos) + , ctx (s.ctx) + , bot (s.bot) + , lim (s.lim) + , top (s.top) + , eof (s.eof) + , tchar (s.tchar) + , tline (s.tline) + , cline (s.cline) + , in_parse (s.in_parse) + , lexer_state (s.lexer_state) +{} + +ScannerState & ScannerState::operator = (const ScannerState & s) +{ + this->~ScannerState (); + new (this) ScannerState (s); + return * this; +} + +Scanner::Scanner (Input & i, OutputFile & o) + : ScannerState () + , in (i) + , out (o) +{} + +void Scanner::fill (uint32_t need) +{ + if(!eof) + { + /* Do not get rid of anything when rFlag is active. Otherwise + * get rid of everything that was already handedout. */ + if (!opts->rFlag) + { + const ptrdiff_t diff = tok - bot; + if (diff > 0) + { + const size_t move = static_cast (top - tok); + memmove (bot, tok, move); + tok -= diff; + ptr -= diff; + cur -= diff; + pos -= diff; + lim -= diff; + ctx -= diff; + } + } + /* In crease buffer size. */ + if (BSIZE > need) + { + need = BSIZE; + } + if (static_cast (top - lim) < need) + { + const size_t copy = static_cast (lim - bot); + char * buf = new char[copy + need]; + if (!buf) + { + fatal("Out of memory"); + } + memcpy (buf, bot, copy); + tok = &buf[tok - bot]; + ptr = &buf[ptr - bot]; + cur = &buf[cur - bot]; + pos = &buf[pos - bot]; + lim = &buf[lim - bot]; + top = &lim[need]; + ctx = &buf[ctx - bot]; + delete [] bot; + bot = buf; + } + /* Append to buffer. */ + const size_t have = fread (lim, 1, need, in.file); + if (have != need) + { + eof = &lim[have]; + *eof++ = '\0'; + } + lim += have; + } +} + +void Scanner::set_in_parse(bool new_in_parse) +{ + in_parse = new_in_parse; +} + +void Scanner::fatal_at(uint32_t line, ptrdiff_t ofs, const char *msg) const +{ + std::cerr << "re2c: error: " + << "line " << line << ", column " << (tchar + ofs + 1) << ": " + << msg << std::endl; + exit(1); +} + +void Scanner::fatal(ptrdiff_t ofs, const char *msg) const +{ + fatal_at(in_parse ? tline : cline, ofs, msg); +} + +void Scanner::fatalf_at(uint32_t line, const char* fmt, ...) const +{ + char szBuf[4096]; + + va_list args; + + va_start(args, fmt); + vsnprintf(szBuf, sizeof(szBuf), fmt, args); + va_end(args); + + szBuf[sizeof(szBuf)-1] = '0'; + + fatal_at(line, 0, szBuf); +} + +void Scanner::fatalf(const char *fmt, ...) const +{ + char szBuf[4096]; + + va_list args; + + va_start(args, fmt); + vsnprintf(szBuf, sizeof(szBuf), fmt, args); + va_end(args); + + szBuf[sizeof(szBuf)-1] = '0'; + + fatal(szBuf); +} + +Scanner::~Scanner() +{ + delete [] bot; +} + +void Scanner::reuse() +{ + out.label_counter.reset (); + last_fill_index = 0; + bWroteGetState = false; + bWroteCondCheck = false; + opts.reset_mapCodeName (); +} + +void Scanner::restore_state(const ScannerState& state) +{ + ptrdiff_t diff = bot - state.bot; + char *old_bot = bot; + char *old_lim = lim; + char *old_top = top; + char *old_eof = eof; + *(ScannerState*)this = state; + if (diff) + { + tok -= diff; + ptr -= diff; + cur -= diff; + pos -= diff; + ctx -= diff; + bot = old_bot; + lim = old_lim; + top = old_top; + eof = old_eof; + } +} + +} // namespace re2c diff --git a/tools/re2c/src/parse/scanner.h b/tools/re2c/src/parse/scanner.h new file mode 100644 index 000000000..d13eeaa1d --- /dev/null +++ b/tools/re2c/src/parse/scanner.h @@ -0,0 +1,147 @@ +#ifndef _RE2C_PARSE_SCANNER_ +#define _RE2C_PARSE_SCANNER_ + +#include "src/util/c99_stdint.h" +#include +#include + +#include "src/parse/input.h" +#include "src/util/attribute.h" +#include "src/util/forbid_copy.h" + +namespace re2c +{ + +class Range; +class RegExp; +struct OutputFile; + +struct ScannerState +{ + enum lexer_state_t + { + LEX_NORMAL, + LEX_FLEX_NAME + }; + + // positioning + char * tok; + char * ptr; + char * cur; + char * pos; + char * ctx; + + // buffer + char * bot; + char * lim; + char * top; + char * eof; + + ptrdiff_t tchar; + uint32_t tline; + uint32_t cline; + + bool in_parse; + lexer_state_t lexer_state; + + ScannerState (); + ScannerState (const ScannerState &); + ScannerState & operator = (const ScannerState &); +}; + +class Scanner: private ScannerState +{ + static const uint32_t BSIZE; + + Input & in; +public: + OutputFile & out; + +private: + void fill (uint32_t); + void set_sourceline (); + uint32_t lex_cls_chr(); + uint32_t lex_str_chr(char quote, bool &end); + RegExp *lex_cls(bool neg); + RegExp *lex_str(char quote, bool casing); + RegExp *schr(uint32_t c) const; + RegExp *ichr(uint32_t c) const; + RegExp *cls(Range *r) const; + + void lex_conf (); + void lex_conf_assign (); + void lex_conf_semicolon (); + int32_t lex_conf_number (); + std::string lex_conf_string (); + + size_t tok_len () const; + +public: + Scanner(Input &, OutputFile &); + ~Scanner(); + + enum ParseMode { + Stop, + Parse, + Reuse, + Rules + }; + + ParseMode echo(); + int scan(); + void reuse(); + + void save_state(ScannerState&) const; + void restore_state(const ScannerState&); + + uint32_t get_cline() const; + uint32_t get_line() const; + const std::string & get_fname () const; + void set_in_parse(bool new_in_parse); + void fatal_at(uint32_t line, ptrdiff_t ofs, const char *msg) const; + void fatalf_at(uint32_t line, const char*, ...) const RE2C_GXX_ATTRIBUTE ((format (printf, 3, 4))); + void fatalf(const char*, ...) const RE2C_GXX_ATTRIBUTE ((format (printf, 2, 3))); + void fatal(const char*) const; + void fatal(ptrdiff_t, const char*) const; + + RegExp * mkDiff (RegExp * e1, RegExp * e2) const; + RegExp * mkDot () const; + RegExp * mkDefault () const; + + FORBID_COPY (Scanner); +}; + +inline size_t Scanner::tok_len () const +{ + // lexing and fill procedures must maintain: token pointer <= cursor pointer + return static_cast (cur - tok); +} + +inline const std::string & Scanner::get_fname () const +{ + return in.file_name; +} + +inline uint32_t Scanner::get_cline() const +{ + return cline; +} + +inline uint32_t Scanner::get_line() const +{ + return in_parse ? tline : cline; +} + +inline void Scanner::save_state(ScannerState& state) const +{ + state = *this; +} + +inline void Scanner::fatal(const char *msg) const +{ + fatal(0, msg); +} + +} // end namespace re2c + +#endif // _RE2C_PARSE_SCANNER_ diff --git a/tools/re2c/src/parse/spec.h b/tools/re2c/src/parse/spec.h new file mode 100644 index 000000000..0b68af74f --- /dev/null +++ b/tools/re2c/src/parse/spec.h @@ -0,0 +1,55 @@ +#ifndef _RE2C_PARSE_SPEC_ +#define _RE2C_PARSE_SPEC_ + +#include "src/ir/regexp/regexp_rule.h" +#include "src/parse/rules.h" + +namespace re2c +{ + +struct Spec +{ + RegExp * re; + rules_t rules; + + Spec () + : re (NULL) + , rules () + {} + Spec (const Spec & spec) + : re (spec.re) + , rules (spec.rules) + {} + Spec & operator = (const Spec & spec) + { + re = spec.re; + rules = spec.rules; + return *this; + } + bool add_def (RuleOp * r) + { + if (rules.find (rule_rank_t::def ()) != rules.end ()) + { + return false; + } + else + { + add (r); + return true; + } + } + void add (RuleOp * r) + { + rules[r->rank].line = r->loc.line; + re = mkAlt (re, r); + } + void clear () + { + re = NULL; + rules.clear (); + } +}; + +} // namespace re2c + +#endif // _RE2C_PARSE_SPEC_ diff --git a/tools/re2c/src/parse/unescape.cc b/tools/re2c/src/parse/unescape.cc new file mode 100644 index 000000000..0c17139fb --- /dev/null +++ b/tools/re2c/src/parse/unescape.cc @@ -0,0 +1,60 @@ +#include "src/parse/unescape.h" + +namespace re2c { + +// expected characters: [0-9a-zA-Z] +static inline uint32_t hex_digit (const char c) +{ + switch (c) + { + case '0': return 0; + case '1': return 1; + case '2': return 2; + case '3': return 3; + case '4': return 4; + case '5': return 5; + case '6': return 6; + case '7': return 7; + case '8': return 8; + case '9': return 9; + case 'a': + case 'A': return 0xA; + case 'b': + case 'B': return 0xB; + case 'c': + case 'C': return 0xC; + case 'd': + case 'D': return 0xD; + case 'e': + case 'E': return 0xE; + case 'f': + case 'F': return 0xF; + default: return ~0u; // unexpected + } +} + +// expected string format: "\" [xXuU] [0-9a-zA-Z]* +uint32_t unesc_hex (const char * s, const char * s_end) +{ + uint32_t n = 0; + for (s += 2; s != s_end; ++s) + { + n <<= 4; + n += hex_digit (*s); + } + return n; +} + +// expected string format: "\" [0-7]* +uint32_t unesc_oct (const char * s, const char * s_end) +{ + uint32_t n = 0; + for (++s; s != s_end; ++s) + { + n <<= 3; + n += static_cast (*s - '0'); + } + return n; +} + +} // namespace re2c diff --git a/tools/re2c/src/parse/unescape.h b/tools/re2c/src/parse/unescape.h new file mode 100644 index 000000000..000c378d7 --- /dev/null +++ b/tools/re2c/src/parse/unescape.h @@ -0,0 +1,13 @@ +#ifndef _RE2C_PARSE_UNESCAPE_ +#define _RE2C_PARSE_UNESCAPE_ + +#include "src/util/c99_stdint.h" + +namespace re2c { + +uint32_t unesc_hex (const char * s, const char * s_end); +uint32_t unesc_oct (const char * s, const char * s_end); + +} // namespace re2c + +#endif // _RE2C_PARSE_UNESCAPE_ diff --git a/tools/re2c/src/parse/y.tab.h b/tools/re2c/src/parse/y.tab.h new file mode 100644 index 000000000..52bd8d1b9 --- /dev/null +++ b/tools/re2c/src/parse/y.tab.h @@ -0,0 +1,89 @@ +/* A Bison parser, made by GNU Bison 3.0.4. */ + +/* Bison interface for Yacc-like parsers in C + + Copyright (C) 1984, 1989-1990, 2000-2015 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ + +/* As a special exception, you may create a larger work that contains + part or all of the Bison parser skeleton and distribute that work + under terms of your choice, so long as that work isn't itself a + parser generator using the skeleton or a modified version thereof + as a parser skeleton. Alternatively, if you modify or redistribute + the parser skeleton itself, you may (at your option) remove this + special exception, which will cause the skeleton and the resulting + Bison output files to be licensed under the GNU General Public + License without this special exception. + + This special exception was added by the Free Software Foundation in + version 2.2 of Bison. */ + +#ifndef YY_YY_SRC_PARSE_Y_TAB_H_INCLUDED +# define YY_YY_SRC_PARSE_Y_TAB_H_INCLUDED +/* Debug traces. */ +#ifndef YYDEBUG +# define YYDEBUG 0 +#endif +#if YYDEBUG +extern int yydebug; +#endif + +/* Token type. */ +#ifndef YYTOKENTYPE +# define YYTOKENTYPE + enum yytokentype + { + TOKEN_CLOSE = 258, + TOKEN_CLOSESIZE = 259, + TOKEN_CODE = 260, + TOKEN_CONF = 261, + TOKEN_ID = 262, + TOKEN_FID = 263, + TOKEN_FID_END = 264, + TOKEN_NOCOND = 265, + TOKEN_REGEXP = 266, + TOKEN_SETUP = 267, + TOKEN_STAR = 268 + }; +#endif + +/* Value type. */ +#if ! defined YYSTYPE && ! defined YYSTYPE_IS_DECLARED + +union YYSTYPE +{ +#line 161 "../src/parse/parser.ypp" /* yacc.c:1909 */ + + re2c::RegExp * regexp; + const re2c::Code * code; + char op; + re2c::ExtOp extop; + std::string * str; + re2c::CondList * clist; + +#line 77 "src/parse/y.tab.h" /* yacc.c:1909 */ +}; + +typedef union YYSTYPE YYSTYPE; +# define YYSTYPE_IS_TRIVIAL 1 +# define YYSTYPE_IS_DECLARED 1 +#endif + + +extern YYSTYPE yylval; + +int yyparse (void); + +#endif /* !YY_YY_SRC_PARSE_Y_TAB_H_INCLUDED */ diff --git a/tools/re2c/src/test/range/test-impl.h b/tools/re2c/src/test/range/test-impl.h new file mode 100644 index 000000000..3f1d78403 --- /dev/null +++ b/tools/re2c/src/test/range/test-impl.h @@ -0,0 +1,50 @@ +#ifndef _RE2C_TEST_RANGE_TEST_IMPL_ +#define _RE2C_TEST_RANGE_TEST_IMPL_ + +#include "src/test/range/test.h" +#include "src/util/range.h" +#include "src/util/static_assert.h" + +namespace re2c_test { + +static inline bool bit_set (uint32_t n, uint32_t bit) +{ + return n & (1u << bit); +} + +template +re2c::Range * range (uint32_t n) +{ + RE2C_STATIC_ASSERT (BITS <= 31); + + re2c::Range * r = NULL; + re2c::Range ** p = &r; + for (uint32_t i = 0; i < BITS; ++i) + { + for (; i < BITS && !bit_set (n, i); ++i); + if (i == BITS && !bit_set (n, BITS - 1)) + { + break; + } + const uint32_t lb = i; + for (; i < BITS && bit_set (n, i); ++i); + re2c::Range::append (p, lb, i); + } + return r; +} + +template +re2c::Range * add (uint32_t n1, uint32_t n2) +{ + return range (n1 | n2); +} + +template +re2c::Range * sub (uint32_t n1, uint32_t n2) +{ + return range (n1 & ~n2); +} + +} // namespace re2c_test + +#endif // _RE2C_TEST_RANGE_TEST_IMPL_ diff --git a/tools/re2c/src/test/range/test.cc b/tools/re2c/src/test/range/test.cc new file mode 100644 index 000000000..a602f79db --- /dev/null +++ b/tools/re2c/src/test/range/test.cc @@ -0,0 +1,94 @@ +#include + +#include "src/test/range/test.h" +#include "src/test/range/test-impl.h" + +namespace re2c_test { + +static bool equal (const re2c::Range * r1, const re2c::Range * r2) +{ + for (; r1 && r2; r1 = r1->next (), r2 = r2->next ()) + { + if (r1->lower () != r2->lower () + || r1->upper () != r2->upper ()) + { + return false; + } + } + return !r1 && !r2; +} + +static void show (const re2c::Range * r) +{ + if (!r) + { + fprintf (stderr, "[]"); + } + for (; r; r = r->next ()) + { + const uint32_t l = r->lower (); + const uint32_t u = r->upper () - 1; + if (l < u) + { + fprintf (stderr, "[%X-%X]", l, u); + } + else + { + fprintf (stderr, "[%X]", l); + } + } +} + +static int32_t diff + ( const re2c::Range * r1 + , const re2c::Range * r2 + , const re2c::Range * op1 + , const re2c::Range * op2 + , const char * op) +{ + if (equal (op1, op2)) + { + return 0; + } + else + { + fprintf (stderr, "%s error: ", op); + show (r1); + fprintf (stderr, " %s ", op); + show (r2); + fprintf (stderr, " ====> "); + show (op2); + fprintf (stderr, " =/= "); + show (op1); + fprintf (stderr, "\n"); + return 1; + } +} + +static int32_t test () +{ + int32_t ok = 0; + + static const uint32_t BITS = 8; + static const uint32_t N = 1u << BITS; + for (uint32_t i = 0; i <= N; ++i) + { + for (uint32_t j = 0; j <= N; ++j) + { + re2c::Range * r1 = range (i); + re2c::Range * r2 = range (j); + ok |= diff (r1, r2, add (i, j), re2c::Range::add (r1, r2), "U"); + ok |= diff (r1, r2, sub (i, j), re2c::Range::sub (r1, r2), "D"); + re2c::Range::vFreeList.clear (); + } + } + + return ok; +} + +} // namespace re2c_test + +int main () +{ + return re2c_test::test (); +} diff --git a/tools/re2c/src/test/range/test.h b/tools/re2c/src/test/range/test.h new file mode 100644 index 000000000..18aa1aca7 --- /dev/null +++ b/tools/re2c/src/test/range/test.h @@ -0,0 +1,26 @@ +#ifndef _RE2C_TEST_RANGE_TEST_ +#define _RE2C_TEST_RANGE_TEST_ + +#include "src/util/c99_stdint.h" + +namespace re2c { class Range; } + +namespace re2c_test { + +/* + * If encoding has N code units (characters), character class can be + * represented as an N-bit integer: k-th bit is set iff k-th character + * belongs to the class. + * + * Addition and subtraction can be implemented trivially for such + * integer representation of character classes: addition is simply + * bitwise OR of two classes, subtraction is bitwise AND of the first + * class and negated second class. + */ +template re2c::Range * range (uint32_t n); +template re2c::Range * add (uint32_t n1, uint32_t n2); +template re2c::Range * sub (uint32_t n1, uint32_t n2); + +} // namespace re2c_test + +#endif // _RE2C_TEST_RANGE_TEST_ diff --git a/tools/re2c/src/test/s_to_n32_unsafe/test.cc b/tools/re2c/src/test/s_to_n32_unsafe/test.cc new file mode 100644 index 000000000..47b3d47d7 --- /dev/null +++ b/tools/re2c/src/test/s_to_n32_unsafe/test.cc @@ -0,0 +1,102 @@ +#include +#include + +#include "src/util/s_to_n32_unsafe.h" + +namespace re2c_test { + +static const uint32_t DIGITS = 256; + +// writes string backwards and returns pointer to the start +// no terminating null as we don't need it +static char * u64_to_s_fastest_ever (uint64_t u, char * s) +{ + while (u > 0) + { + const uint64_t d = u % 10 + '0'; + *--s = static_cast (d); + u /= 10; + } + return s; +} + +static int32_t test_u (uint64_t i) +{ + char s [DIGITS]; + char * const s_end = s + DIGITS; + char * const s_start = u64_to_s_fastest_ever (i, s_end); + uint32_t u = i == 0; // not equal to i + if (s_to_u32_unsafe (s_start, s_end, u) && u != i) + { + fprintf (stderr, "unsigned: expected: %lu, got: %u\n", i, u); + return 1; + } + return 0; +} + +static int32_t test_i (int64_t i) +{ + char s [DIGITS]; + char * const s_end = s + DIGITS; + const uint64_t i_abs = i < 0 + ? static_cast (-i) + : static_cast (i); + char * s_start = u64_to_s_fastest_ever (i_abs, s_end); + if (i < 0) + { + *--s_start = '-'; + } + int32_t j = i == 0; // not equal to i + if (s_to_i32_unsafe (s_start, s_end, j) && j != i) + { + fprintf (stderr, "signed: expected: %ld, got: %d\n", i, j); + return 1; + } + return 0; +} + +static int32_t test () +{ + int32_t ok = 0; + + static const uint64_t UDELTA = 0xFFFF; + // zero neighbourhood + for (uint64_t i = 0; i <= UDELTA; ++i) + { + ok |= test_u (i); + } + // u32_max neighbourhood + static const uint64_t u32_max = std::numeric_limits::max(); + for (uint64_t i = u32_max - UDELTA; i <= u32_max + UDELTA; ++i) + { + ok |= test_u (i); + } + + static const int64_t IDELTA = 0xFFFF; + // i32_min neighbourhood + static const int64_t i32_min = std::numeric_limits::min(); + for (int64_t i = i32_min - IDELTA; i <= i32_min + IDELTA; ++i) + { + ok |= test_i (i); + } + // zero neighbourhood + for (int64_t i = -IDELTA; i <= IDELTA; ++i) + { + ok |= test_i (i); + } + // i32_max neighbourhood + static const int64_t i32_max = std::numeric_limits::max(); + for (int64_t i = i32_max - IDELTA; i <= i32_max + IDELTA; ++i) + { + ok |= test_i (i); + } + + return ok; +} + +} // namespace re2c_test + +int main () +{ + return re2c_test::test (); +} diff --git a/tools/re2c/src/util/allocate.h b/tools/re2c/src/util/allocate.h new file mode 100644 index 000000000..f664910c8 --- /dev/null +++ b/tools/re2c/src/util/allocate.h @@ -0,0 +1,19 @@ +#ifndef _RE2C_UTIL_ALLOCATE_ +#define _RE2C_UTIL_ALLOCATE_ + +#include // size_t + +namespace re2c { + +// useful fof allocation of arrays of POD objects +// 'new []' invokes default constructor for each object +// this can be unacceptable for performance reasons +template T * allocate (size_t n) +{ + void * p = operator new (n * sizeof (T)); + return static_cast (p); +} + +} // namespace re2c + +#endif // _RE2C_UTIL_ALLOCATE_ diff --git a/tools/re2c/src/util/attribute.h b/tools/re2c/src/util/attribute.h new file mode 100644 index 000000000..027a6072d --- /dev/null +++ b/tools/re2c/src/util/attribute.h @@ -0,0 +1,10 @@ +#ifndef _RE2C_UTIL_ATTRIBUTE_ +#define _RE2C_UTIL_ATTRIBUTE_ + +#ifdef __GNUC__ +# define RE2C_GXX_ATTRIBUTE(x) __attribute__(x) +#else +# define RE2C_GXX_ATTRIBUTE(x) +#endif + +#endif // _RE2C_UTIL_ATTRIBUTE_ diff --git a/tools/re2c/src/util/c99_stdint.h b/tools/re2c/src/util/c99_stdint.h new file mode 100644 index 000000000..ffe2e3ee0 --- /dev/null +++ b/tools/re2c/src/util/c99_stdint.h @@ -0,0 +1,262 @@ +#ifndef _RE2C_UTIL_C99_STDINT_ +#define _RE2C_UTIL_C99_STDINT_ + +#include "config.h" + +#if HAVE_STDINT_H +# include +#else // HAVE_STDINT_H + +// A humble attempt to provide C99 compliant +// for environments that don't have it (e.g., MSVC 2003). +// +// First, we try to define exact-width integer types. We don't +// rely on any particular environment: instead, we search for +// a type of certain width in the following list: +// char (C89) +// short (C89) +// int (C89) +// long (C89) +// long long (C99) +// __int64 (MSVC-specific) +// (we consider even insane possibilities for simplicity). +// The size of each type is defined by autoconf in the form +// of a macro SIZEOF_ (set to 0 for nonexistent types). +// If we don't find a type with the required width, we don't +// define the corresponding exact-width C99 type at all. +// +// We define other types and constants based on exact-width +// types and C99 standard. +// +// We use SIZEOF_VOID_P to determine size of pointers. +// +// We use SIZEOF_0 to find suitable 64-bit integer +// constant suffix. + +// C99-7.18.1.1 Exact-width integer types + +// int8_t, uint8_t +#if SIZEOF_CHAR == 1 + typedef signed char int8_t; + typedef unsigned char uint8_t; +#elif SIZEOF_SHORT == 1 + typedef signed short int8_t; + typedef unsigned short uint8_t; +#elif SIZEOF_INT == 1 + typedef signed int int8_t; + typedef unsigned int uint8_t; +#elif SIZEOF_LONG == 1 + typedef signed long int8_t; + typedef unsigned long uint8_t; +#elif SIZEOF_LONG_LONG == 1 + typedef signed long long int8_t; + typedef unsigned long long uint8_t; +#elif SIZEOF___INT64 == 1 + typedef signed __int64 int8_t; + typedef unsigned __int64 uint8_t; +#endif + +// int16_t, uint16_t +#if SIZEOF_CHAR == 2 + typedef signed char int16_t; + typedef unsigned char uint16_t; +#elif SIZEOF_SHORT == 2 + typedef signed short int16_t; + typedef unsigned short uint16_t; +#elif SIZEOF_INT == 2 + typedef signed int int16_t; + typedef unsigned int uint16_t; +#elif SIZEOF_LONG == 2 + typedef signed long int16_t; + typedef unsigned long uint16_t; +#elif SIZEOF_LONG_LONG == 2 + typedef signed long long int16_t; + typedef unsigned long long uint16_t; +#elif SIZEOF___INT64 == 2 + typedef signed __int64 int16_t; + typedef unsigned __int64 uint16_t; +#endif + +// int32_t, uint32_t +#if SIZEOF_CHAR == 4 + typedef signed char int32_t; + typedef unsigned char uint32_t; +#elif SIZEOF_SHORT == 4 + typedef signed short int32_t; + typedef unsigned short uint32_t; +#elif SIZEOF_INT == 4 + typedef signed int int32_t; + typedef unsigned int uint32_t; +#elif SIZEOF_LONG == 4 + typedef signed long int32_t; + typedef unsigned long uint32_t; +#elif SIZEOF_LONG_LONG == 4 + typedef signed long long int32_t; + typedef unsigned long long uint32_t; +#elif SIZEOF___INT64 == 4 + typedef signed __int64 int32_t; + typedef unsigned __int64 uint32_t; +#endif + +// int64_t, uint64_t +#if SIZEOF_CHAR == 8 + typedef signed char int64_t; + typedef unsigned char uint64_t; +#elif SIZEOF_SHORT == 8 + typedef signed short int64_t; + typedef unsigned short uint64_t; +#elif SIZEOF_INT == 8 + typedef signed int int64_t; + typedef unsigned int uint64_t; +#elif SIZEOF_LONG == 8 + typedef signed long int64_t; + typedef unsigned long uint64_t; +#elif SIZEOF_LONG_LONG == 8 + typedef signed long long int64_t; + typedef unsigned long long uint64_t; +#elif SIZEOF___INT64 == 8 + typedef signed __int64 int64_t; + typedef unsigned __int64 uint64_t; +#endif + +// C99-7.18.1.2 Minimum-width integer types +typedef int8_t int_least8_t; +typedef int16_t int_least16_t; +typedef int32_t int_least32_t; +typedef int64_t int_least64_t; +typedef uint8_t uint_least8_t; +typedef uint16_t uint_least16_t; +typedef uint32_t uint_least32_t; +typedef uint64_t uint_least64_t; + +// C99-7.18.1.3 Fastest minimum-width integer types +typedef int8_t int_fast8_t; +typedef int16_t int_fast16_t; +typedef int32_t int_fast32_t; +typedef int64_t int_fast64_t; +typedef uint8_t uint_fast8_t; +typedef uint16_t uint_fast16_t; +typedef uint32_t uint_fast32_t; +typedef uint64_t uint_fast64_t; + +// C99-7.18.1.4 Integer types capable of holding object pointers +#if SIZEOF_VOID_P == 8 + typedef int64_t intptr_t; + typedef uint64_t uintptr_t; +#else + typedef int intptr_t; + typedef unsigned int uintptr_t; +#endif + +// C99-7.18.1.5 Greatest-width integer types +typedef int64_t intmax_t; +typedef uint64_t uintmax_t; + +#if !defined(__cplusplus) || defined(__STDC_LIMIT_MACROS) // See footnote 220 at page 257 and footnote 221 at page 259 + +// C99-7.18.2.1 Limits of exact-width integer types +#define INT8_MIN (-128) // -2^(8 - 1) +#define INT8_MAX 127 // 2^(8 - 1) - 1 +#define INT16_MIN (-32768) // -2^(16 - 1) +#define INT16_MAX 32767 // 2^(16 - 1) - 1 +#define INT32_MIN (-2147483648) // -2^(32 - 1) +#define INT32_MAX 2147483647 // 2^(32 - 1) - 1 +#define INT64_MIN (-9223372036854775808) // -2^(64 - 1) +#define INT64_MAX 9223372036854775807 // 2^(64 - 1) - 1 +#define UINT8_MAX 0xFF // 2^8 - 1 +#define UINT16_MAX 0xFFFF // 2^16 - 1 +#define UINT32_MAX 0xFFFFffff // 2^32 - 1 +#define UINT64_MAX 0xFFFFffffFFFFffff // 2^64 - 1 + +// C99-7.18.2.2 Limits of minimum-width integer types +#define INT_LEAST8_MIN INT8_MIN +#define INT_LEAST8_MAX INT8_MAX +#define INT_LEAST16_MIN INT16_MIN +#define INT_LEAST16_MAX INT16_MAX +#define INT_LEAST32_MIN INT32_MIN +#define INT_LEAST32_MAX INT32_MAX +#define INT_LEAST64_MIN INT64_MIN +#define INT_LEAST64_MAX INT64_MAX +#define UINT_LEAST8_MAX UINT8_MAX +#define UINT_LEAST16_MAX UINT16_MAX +#define UINT_LEAST32_MAX UINT32_MAX +#define UINT_LEAST64_MAX UINT64_MAX + +// C99-7.18.2.3 Limits of fastest minimum-width integer types +#define INT_FAST8_MIN INT8_MIN +#define INT_FAST8_MAX INT8_MAX +#define INT_FAST16_MIN INT16_MIN +#define INT_FAST16_MAX INT16_MAX +#define INT_FAST32_MIN INT32_MIN +#define INT_FAST32_MAX INT32_MAX +#define INT_FAST64_MIN INT64_MIN +#define INT_FAST64_MAX INT64_MAX +#define UINT_FAST8_MAX UINT8_MAX +#define UINT_FAST16_MAX UINT16_MAX +#define UINT_FAST32_MAX UINT32_MAX +#define UINT_FAST64_MAX UINT64_MAX + +// C99-7.18.2.4 Limits of integer types capable of holding object pointers +#define INTPTR_MIN (-32767) // -(2^15 - 1) +#define INTPTR_MAX 32767 // 2^15 - 1 +#define UINTPTR_MAX 0xFFFF // 2^16 - 1 + +// C99-7.18.2.5 Limits of greatest-width integer types +#define INTMAX_MIN (-9223372036854775807) // -(2^63 - 1) +#define INTMAX_MAX 9223372036854775807 // 2^63 - 1 +#define UINTMAX_MAX 0xFFFFffffFFFFffff // 2^64 - 1 + +// C99-7.18.3 Limits of other integer types: +// "An implementation shall define only the macros +// corresponding to those typedef names it actually +// provides" +// and footnote 222 at page 259: +// "A freestanding implementation need not provide +// all of these types." +// +// Since we don't define corresponding types, we don't +// define the following limits either: +// PTRDIFF_MIN +// PTRDIFF_MAX +// SIG_ATOMIC_MIN +// SIG_ATOMIC_MAX +// SIZE_MAX +// WCHAR_MIN +// WCHAR_MAX +// WINT_MIN +// WINT_MAX + +#endif // __STDC_LIMIT_MACROS + +#if !defined(__cplusplus) || defined(__STDC_CONSTANT_MACROS) // See footnote 224 at page 260 + +// C99-7.18.4.1 Macros for minimum-width integer constants +#define INT8_C(x) x +#define UINT8_C(x) x##u +#define INT16_C(x) x +#define UINT16_C(x) x##u +#define INT32_C(x) x +#define UINT32_C(x) x##u +#if SIZEOF_0L == 8 +# define INT64_C(x) x##l +# define UINT64_C(x) x##ul +#elif SIZEOF_0LL == 8 +# define INT64_C(x) x##ll +# define UINT64_C(x) x##ull +#elif SIZEOF_0I8 == 8 +# define INT64_C(x) x##i8 +# define UINT64_C(x) x##ui8 +#else +# define INT64_C(x) x +# define UINT64_C(x) x##u +#endif + +// C99-7.18.4.2 Macros for greatest-width integer constants +#define INTMAX_C INT64_C +#define UINTMAX_C UINT64_C + +#endif // __STDC_CONSTANT_MACROS + +#endif // HAVE_STDINT_H + +#endif // _RE2C_UTIL_C99_STDINT_ diff --git a/tools/re2c/src/util/counter.h b/tools/re2c/src/util/counter.h new file mode 100644 index 000000000..f4f583944 --- /dev/null +++ b/tools/re2c/src/util/counter.h @@ -0,0 +1,29 @@ +#ifndef _RE2C_UTIL_COUNTER_ +#define _RE2C_UTIL_COUNTER_ + +namespace re2c { + +template +class counter_t +{ + num_t num; + +public: + counter_t () + : num () + {} + num_t next () + { + num_t n = num; + num.inc (); + return n; + } + void reset () + { + num = num_t (); + } +}; + +} // namespace re2c + +#endif // _RE2C_UTIL_COUNTER_ diff --git a/tools/re2c/src/util/forbid_copy.h b/tools/re2c/src/util/forbid_copy.h new file mode 100644 index 000000000..9c5701d7d --- /dev/null +++ b/tools/re2c/src/util/forbid_copy.h @@ -0,0 +1,11 @@ +#ifndef _RE2C_UTIL_FORBID_COPY_ +#define _RE2C_UTIL_FORBID_COPY_ + +// must be used at the end of class definition +// (since this macro changes scope to private) +#define FORBID_COPY(type) \ + private: \ + type (const type &); \ + type & operator = (const type &) + +#endif // _RE2C_UTIL_FORBID_COPY_ diff --git a/tools/re2c/src/util/free_list.h b/tools/re2c/src/util/free_list.h new file mode 100644 index 000000000..8d3ac6546 --- /dev/null +++ b/tools/re2c/src/util/free_list.h @@ -0,0 +1,56 @@ +#ifndef _RE2C_UTIL_FREE_LIST_ +#define _RE2C_UTIL_FREE_LIST_ + +#include + +namespace re2c +{ + +template +class free_list: protected std::set<_Ty> +{ +public: + typedef typename std::set<_Ty>::iterator iterator; + typedef typename std::set<_Ty>::size_type size_type; + typedef typename std::set<_Ty>::key_type key_type; + + free_list(): in_clear(false) + { + } + + using std::set<_Ty>::insert; + + size_type erase(const key_type& key) + { + if (!in_clear) + { + return std::set<_Ty>::erase(key); + } + return 0; + } + + void clear() + { + in_clear = true; + + for(iterator it = this->begin(); it != this->end(); ++it) + { + delete *it; + } + std::set<_Ty>::clear(); + + in_clear = false; + } + + ~free_list() + { + clear(); + } + +protected: + bool in_clear; +}; + +} // end namespace re2c + +#endif // _RE2C_UTIL_FREE_LIST_ diff --git a/tools/re2c/src/util/local_increment.h b/tools/re2c/src/util/local_increment.h new file mode 100644 index 000000000..799ced43b --- /dev/null +++ b/tools/re2c/src/util/local_increment.h @@ -0,0 +1,22 @@ +#ifndef _RE2C_UTIL_LOCAL_INCREMENT_ +#define _RE2C_UTIL_LOCAL_INCREMENT_ + +namespace re2c +{ + +template +struct local_increment_t +{ + counter_t & counter; + inline explicit local_increment_t (counter_t & c) + : counter (++c) + {} + inline ~local_increment_t () + { + --counter; + } +}; + +} // namespace re2c + +#endif // _RE2C_UTIL_LOCAL_INCREMENT_ diff --git a/tools/re2c/src/util/ord_hash_set.h b/tools/re2c/src/util/ord_hash_set.h new file mode 100644 index 000000000..9a0e73eae --- /dev/null +++ b/tools/re2c/src/util/ord_hash_set.h @@ -0,0 +1,115 @@ +#ifndef _RE2C_UTIL_ORD_HASH_SET_ +#define _RE2C_UTIL_ORD_HASH_SET_ + +#include "src/util/c99_stdint.h" +#include // malloc, free +#include // memcpy +#include +#include + +namespace re2c +{ + +/* + * ordered hash set: + * - access element by index: O(1) + * - insert element (find existing or add new): O(log(n)) + * + */ +class ord_hash_set_t +{ + struct elem_t + { + elem_t *next; + size_t index; + size_t size; + char data[1]; // inlined array of variable length + }; + typedef size_t hash_t; + + std::vector elems; + std::map lookup; + + static hash_t hash(const void *data, size_t size); + elem_t *make_elem(elem_t *next, size_t index, size_t size, const void *data); + +public: + ord_hash_set_t(); + ~ord_hash_set_t(); + size_t size() const; + size_t insert(const void *data, size_t size); + template size_t deref(size_t i, data_t *&data); +}; + +ord_hash_set_t::hash_t ord_hash_set_t::hash(const void *data, size_t size) +{ + const uint8_t *bytes = static_cast(data); + hash_t h = size; // seed + for (size_t i = 0; i < size; ++i) + { + h = h ^ ((h << 5) + (h >> 2) + bytes[i]); + } + return h; +} + +ord_hash_set_t::elem_t* ord_hash_set_t::make_elem( + elem_t *next, + size_t index, + size_t size, + const void *data) +{ + elem_t *e = static_cast(malloc(offsetof(elem_t, data) + size)); + e->next = next; + e->index = index; + e->size = size; + memcpy(e->data, data, size); + return e; +} + +ord_hash_set_t::ord_hash_set_t() + : elems() + , lookup() +{} + +ord_hash_set_t::~ord_hash_set_t() +{ + std::for_each(elems.begin(), elems.end(), free); +} + +size_t ord_hash_set_t::size() const +{ + return elems.size(); +} + +size_t ord_hash_set_t::insert(const void *data, size_t size) +{ + const hash_t h = hash(data, size); + + std::map::const_iterator i = lookup.find(h); + if (i != lookup.end()) + { + for (elem_t *e = i->second; e; e = e->next) + { + if (e->size == size + && memcmp(e->data, data, size) == 0) + { + return e->index; + } + } + } + + const size_t index = elems.size(); + elems.push_back(lookup[h] = make_elem(lookup[h], index, size, data)); + return index; +} + +template size_t ord_hash_set_t::deref(size_t i, data_t *&data) +{ + elem_t *e = elems[i]; + data = reinterpret_cast(e->data); + return e->size / sizeof(data_t); +} + +} // namespace re2c + +#endif // _RE2C_UTIL_ORD_HASH_SET_ diff --git a/tools/re2c/src/util/range.cc b/tools/re2c/src/util/range.cc new file mode 100644 index 000000000..fa46ab332 --- /dev/null +++ b/tools/re2c/src/util/range.cc @@ -0,0 +1,97 @@ +#include "src/util/range.h" + +namespace re2c +{ + +free_list Range::vFreeList; + +void Range::append_overlapping (Range * & head, Range * & tail, const Range * r) +{ + if (!head) + { + head = Range::ran (r->lb, r->ub); + tail = head; + } + else if (tail->ub < r->lb) + { + tail->nx = Range::ran (r->lb, r->ub); + tail = tail->nx; + } + else if (tail->ub < r->ub) + { + tail->ub = r->ub; + } +} + +Range * Range::add (const Range * r1, const Range * r2) +{ + Range * head = NULL; + Range * tail = NULL; + for (; r1 && r2;) + { + if (r1->lb < r2->lb) + { + append_overlapping (head, tail, r1); + r1 = r1->nx; + } + else + { + append_overlapping (head, tail, r2); + r2 = r2->nx; + } + } + for (; r1; r1 = r1->nx) + { + append_overlapping (head, tail, r1); + } + for (; r2; r2 = r2->nx) + { + append_overlapping (head, tail, r2); + } + return head; +} + +void Range::append (Range ** & ptail, uint32_t l, uint32_t u) +{ + Range * & tail = * ptail; + tail = Range::ran (l, u); + ptail = &tail->nx; +} + +Range * Range::sub (const Range * r1, const Range * r2) +{ + Range * head = NULL; + Range ** ptail = &head; + while (r1) + { + if (!r2 || r2->lb >= r1->ub) + { + append (ptail, r1->lb, r1->ub); + r1 = r1->nx; + } + else if (r2->ub <= r1->lb) + { + r2 = r2->nx; + } + else + { + if (r1->lb < r2->lb) + { + append (ptail, r1->lb, r2->lb); + } + while (r2 && r2->ub < r1->ub) + { + const uint32_t lb = r2->ub; + r2 = r2->nx; + const uint32_t ub = r2 && r2->lb < r1->ub + ? r2->lb + : r1->ub; + append (ptail, lb, ub); + } + r1 = r1->nx; + } + } + return head; +} + +} // namespace re2c diff --git a/tools/re2c/src/util/range.h b/tools/re2c/src/util/range.h new file mode 100644 index 000000000..9a9cf03b4 --- /dev/null +++ b/tools/re2c/src/util/range.h @@ -0,0 +1,65 @@ +#ifndef _RE2C_UTIL_RANGE_ +#define _RE2C_UTIL_RANGE_ + +#include "src/util/c99_stdint.h" +#include +#include // NULL + +#include "src/test/range/test.h" +#include "src/util/forbid_copy.h" +#include "src/util/free_list.h" + +namespace re2c +{ + +class Range +{ +public: + static free_list vFreeList; + +private: + Range * nx; + // [lb,ub) + uint32_t lb; + uint32_t ub; + +public: + static Range * sym (uint32_t c) + { + return new Range (NULL, c, c + 1); + } + static Range * ran (uint32_t l, uint32_t u) + { + return new Range (NULL, l, u); + } + ~Range () + { + vFreeList.erase (this); + } + Range * next () const { return nx; } + uint32_t lower () const { return lb; } + uint32_t upper () const { return ub; } + static Range * add (const Range * r1, const Range * r2); + static Range * sub (const Range * r1, const Range * r2); + +private: + Range (Range * n, uint32_t l, uint32_t u) + : nx (n) + , lb (l) + , ub (u) + { + assert (lb < ub); + vFreeList.insert (this); + } + static void append_overlapping (Range * & head, Range * & tail, const Range * r); + static void append (Range ** & ptail, uint32_t l, uint32_t u); + + // test addition and subtraction + template friend Range * re2c_test::range (uint32_t n); + + FORBID_COPY (Range); +}; + +} // namespace re2c + +#endif // _RE2C_UTIL_RANGE_ diff --git a/tools/re2c/src/util/s_to_n32_unsafe.cc b/tools/re2c/src/util/s_to_n32_unsafe.cc new file mode 100644 index 000000000..fa7590b99 --- /dev/null +++ b/tools/re2c/src/util/s_to_n32_unsafe.cc @@ -0,0 +1,55 @@ +#include + +#include "src/util/s_to_n32_unsafe.h" + +// assumes that string matches regexp [0-9]+ +// returns false on overflow +bool s_to_u32_unsafe (const char * s, const char * s_end, uint32_t & number) +{ + uint64_t u = 0; + for (; s != s_end; ++s) + { + u *= 10; + u += static_cast (*s) - 0x30; + if (u >= std::numeric_limits::max()) + { + return false; + } + } + number = static_cast (u); + return true; +} + +// assumes that string matches regexp "-"? [0-9]+ +// returns false on underflow/overflow +bool s_to_i32_unsafe (const char * s, const char * s_end, int32_t & number) +{ + int64_t i = 0; + if (*s == '-') + { + ++s; + for (; s != s_end; ++s) + { + i *= 10; + i -= *s - 0x30; + if (i < std::numeric_limits::min()) + { + return false; + } + } + } + else + { + for (; s != s_end; ++s) + { + i *= 10; + i += *s - 0x30; + if (i > std::numeric_limits::max()) + { + return false; + } + } + } + number = static_cast (i); + return true; +} diff --git a/tools/re2c/src/util/s_to_n32_unsafe.h b/tools/re2c/src/util/s_to_n32_unsafe.h new file mode 100644 index 000000000..e733c1613 --- /dev/null +++ b/tools/re2c/src/util/s_to_n32_unsafe.h @@ -0,0 +1,10 @@ +#ifndef _RE2C_UTIL_S_TO_N32_UNSAFE_ +#define _RE2C_UTIL_S_TO_N32_UNSAFE_ + +#include "src/util/attribute.h" +#include "src/util/c99_stdint.h" + +bool s_to_u32_unsafe (const char * s, const char * s_end, uint32_t & number) RE2C_GXX_ATTRIBUTE ((warn_unused_result)); +bool s_to_i32_unsafe (const char * s, const char * s_end, int32_t & number) RE2C_GXX_ATTRIBUTE ((warn_unused_result)); + +#endif // _RE2C_UTIL_S_TO_N32_UNSAFE_ diff --git a/tools/re2c/src/util/smart_ptr.h b/tools/re2c/src/util/smart_ptr.h new file mode 100644 index 000000000..c138cf554 --- /dev/null +++ b/tools/re2c/src/util/smart_ptr.h @@ -0,0 +1,69 @@ +#ifndef _RE2C_UTIL_SMART_PTR_ +#define _RE2C_UTIL_SMART_PTR_ + +namespace re2c +{ + + template + class smart_ptr + { + private: + T* ptr; + long* count; // shared number of owners + + public: + explicit smart_ptr (T* p=0) + : ptr(p), count(new long(1)) {} + + smart_ptr (const smart_ptr& p) throw() + : ptr(p.ptr), count(p.count) + { + ++*count; + } + + ~smart_ptr () + { + dispose(); + } + + smart_ptr& operator= (const smart_ptr& p) + { + if (this != &p) + { + dispose(); + ptr = p.ptr; + count = p.count; + ++*count; + } + return *this; + } + + T& operator*() const + { + return *ptr; + } + + T* operator->() const + { + return ptr; + } + + private: + void dispose() + { + if (--*count == 0) + { + delete count; + delete ptr; + } + } + }; + + template + smart_ptr make_smart_ptr(T* p) + { + return smart_ptr(p); + } +} + +#endif // _RE2C_UTIL_SMART_PTR_ diff --git a/tools/re2c/src/util/static_assert.h b/tools/re2c/src/util/static_assert.h new file mode 100644 index 000000000..c2a1327db --- /dev/null +++ b/tools/re2c/src/util/static_assert.h @@ -0,0 +1,14 @@ +#ifndef _RE2C_UTIL_STATIC_ASSERT_ +#define _RE2C_UTIL_STATIC_ASSERT_ + +namespace re2c { + +template struct static_assert_t; +template<> struct static_assert_t {}; + +} // namespace re2c + +#define RE2C_STATIC_ASSERT(e) \ + { re2c::static_assert_t _; (void) _; } + +#endif // _RE2C_UTIL_STATIC_ASSERT_ diff --git a/tools/re2c/src/util/u32lim.h b/tools/re2c/src/util/u32lim.h new file mode 100644 index 000000000..d9c356ccf --- /dev/null +++ b/tools/re2c/src/util/u32lim.h @@ -0,0 +1,72 @@ +#ifndef _RE2C_UTIL_U32LIM_ +#define _RE2C_UTIL_U32LIM_ + +#include "src/util/c99_stdint.h" + +// uint32_t truncated to LIMIT +// any overflow (either result of a binary operation +// or conversion from another type) results in LIMIT +// LIMIT is a fixpoint +template +class u32lim_t +{ + uint32_t value; + explicit u32lim_t (uint32_t x) + : value (x < LIMIT ? x : LIMIT) + {} + explicit u32lim_t (uint64_t x) + : value (x < LIMIT ? static_cast (x) : LIMIT) + {} + +public: + // implicit conversion is forbidden, because + // operands should be converted before operation: + // uint32_t x, y; ... u32lim_t z = x + y; + // will result in 32-bit addition and may overflow + // Don't export overloaded constructors: it breaks OS X builds + // ('size_t' causes resolution ambiguity) + static u32lim_t from32 (uint32_t x) { return u32lim_t(x); } + static u32lim_t from64 (uint64_t x) { return u32lim_t(x); } + + static u32lim_t limit () + { + return u32lim_t (LIMIT); + } + + uint32_t uint32 () const + { + return value; + } + + bool overflow () const + { + return value == LIMIT; + } + + friend u32lim_t operator + (u32lim_t x, u32lim_t y) + { + const uint64_t z + = static_cast (x.value) + + static_cast (y.value); + return z < LIMIT + ? u32lim_t (z) + : u32lim_t (LIMIT); + } + + friend u32lim_t operator * (u32lim_t x, u32lim_t y) + { + const uint64_t z + = static_cast (x.value) + * static_cast (y.value); + return z < LIMIT + ? u32lim_t (z) + : u32lim_t (LIMIT); + } + + friend bool operator < (u32lim_t x, u32lim_t y) + { + return x.value < y.value; + } +}; + +#endif // _RE2C_UTIL_U32LIM_ diff --git a/tools/re2c/src/util/uniq_vector.h b/tools/re2c/src/util/uniq_vector.h new file mode 100644 index 000000000..76c0512f0 --- /dev/null +++ b/tools/re2c/src/util/uniq_vector.h @@ -0,0 +1,46 @@ +#ifndef _RE2C_UTIL_UNIQ_VECTOR_ +#define _RE2C_UTIL_UNIQ_VECTOR_ + +#include + +namespace re2c +{ + +// wrapper over std::vector +// O(n) lookup +// O(n) insertion +template +class uniq_vector_t +{ + typedef std::vector elems_t; + elems_t elems; +public: + uniq_vector_t () + : elems () + {} + size_t size () const + { + return elems.size (); + } + const value_t & operator [] (size_t i) const + { + return elems[i]; + } + size_t find_or_add (const value_t & v) + { + const size_t size = elems.size (); + for (size_t i = 0; i < size; ++i) + { + if (elems[i] == v) + { + return i; + } + } + elems.push_back (v); + return size; + } +}; + +} // namespace re2c + +#endif // _RE2C_UTIL_UNIQ_VECTOR_ diff --git a/tools/re2c/stream_lc.h b/tools/re2c/stream_lc.h deleted file mode 100644 index cc6682291..000000000 --- a/tools/re2c/stream_lc.h +++ /dev/null @@ -1,433 +0,0 @@ -/* - Author: Marcus Boerger -*/ - -/* $Id: stream_lc.h 767 2007-06-26 15:21:10Z helly $ */ - -#ifndef _stream_lc_h -#define _stream_lc_h - -#include -#include -#include -#include - -namespace re2c -{ - -template > -class basic_null_streambuf - : public std::basic_streambuf<_E, _Tr> -{ -public: - basic_null_streambuf() - : std::basic_streambuf<_E, _Tr>() - { - } -}; - -typedef basic_null_streambuf null_streambuf; - -template > -class basic_null_stream - : public std::basic_ostream<_E, _Tr> -{ -public: - basic_null_stream() - : std::basic_ostream<_E, _Tr>(null_buf = new basic_null_streambuf<_E, _Tr>()) - { - } - - virtual ~basic_null_stream() - { - delete null_buf; - } - - basic_null_stream& put(_E) - { - // nothing to do - return *this; - } - - basic_null_stream& write(const _E *, std::streamsize) - { - // nothing to do - return *this; - } - -protected: - basic_null_streambuf<_E, _Tr> * null_buf; -}; - -typedef basic_null_stream null_stream; - -class line_number -{ -public: - virtual ~line_number() - { - } - - virtual uint get_line() const = 0; -}; - -template > -class basic_filebuf_lc - : public std::basic_streambuf<_E, _Tr> - , public line_number -{ -public: - typedef std::basic_streambuf<_E, _Tr> _Mybase; - typedef basic_filebuf_lc<_E, _Tr> _Myt; - typedef _E char_type; - typedef _Tr traits_type; - typedef typename _Tr::int_type int_type; - typedef typename _Tr::pos_type pos_type; - typedef typename _Tr::off_type off_type; - - basic_filebuf_lc(FILE *_fp = 0) - : _Mybase() - , fp(_fp) - , must_close(false) - , fline(1) - { - } - - virtual ~basic_filebuf_lc() - { - sync(); - if (must_close) - { - close(); - } - } - - uint get_line() const - { - return fline + 1; - } - - bool is_open() const - { - return fp != 0; - } - - _Myt* open(const char *filename, std::ios_base::openmode mode = std::ios_base::out) - { - if (fp != 0) - { - return 0; - } - const char * fmode = (mode & std::ios_base::out) - ? "wt" - : "rt"; - if ((fp = fopen(filename, fmode)) == 0) - { - return 0; - } - - must_close = true; - return this; - } - - _Myt* open(FILE * _fp) - { - if (fp != 0) - { - return 0; - } - fp = _fp; - must_close = false; - return this; - } - - _Myt* close() - { - sync(); - - if (fp == 0 || fclose(fp) != 0) - { - fp = 0; - return 0; - } - else - { - fp = 0; - return this; - } - } - -protected: - - virtual int_type overflow(int_type c = _Tr::eof()) - { - if (c == '\n') - { - ++fline; - } - if (_Tr::eq_int_type(_Tr::eof(), c)) - { - return _Tr::not_eof(c); - } - else - { - buffer += _Tr::to_char_type(c); - return c; - } - } - - virtual int_type pbackfail(int_type c = _Tr::eof()) - { - assert(0); - c = 0; - return _Tr::eof(); - } - - virtual int_type underflow() // don't point past it - { - int c; - - if (buffer.length()) - { - return buffer[0]; - } - if (fp == 0 || ((c = fgetc(fp)) == EOF)) - { - return _Tr::eof(); - } - buffer += (char)c; - return c; - } - - virtual int_type uflow() // point past it - { - int c; - - if (buffer.length()) - { - c = buffer[0]; - buffer.erase(0, 1); - return c; - } - if (fp == 0 || ((c = fgetc(fp)) == EOF)) - { - return _Tr::eof(); - } - else if (c == '\n') - { - ++fline; - } - return c; - } - -#if 0 - virtual std::streamsize xsgetn(_E* buf, std::streamsize n) - { - std::streamsize r = 0; - while(n--) - { - int_type c = underflow(); - if (_Tr::eq_int_type(_Tr::eof(), c)) - { - break; - } - buf[r++] = c; - } - buf[r] = '\0'; - return r; - } -#endif - - virtual pos_type seekoff(off_type off, std::ios_base::seekdir whence, - std::ios_base::openmode = (std::ios_base::openmode)(std::ios_base::in | std::ios_base::out)) - { - return fseek(fp, (long)off, whence); - } - - virtual pos_type seekpos(pos_type fpos, - std::ios_base::openmode = (std::ios_base::openmode)(std::ios_base::in | std::ios_base::out)) - { - return fseek(fp, (long)fpos, SEEK_SET); - } - - virtual _Mybase * setbuf(_E *, std::streamsize) - { - assert(0); - return this; - } - - virtual int sync() - { - if (buffer.length() != 0) { - fwrite(buffer.c_str(), sizeof(_E), buffer.length(), fp); - } - buffer.clear(); - return fp == 0 - || _Tr::eq_int_type(_Tr::eof(), overflow()) - || 0 <= fflush(fp) ? 0 : -1; - } - - virtual std::streamsize xsputn(const _E *buf, std::streamsize cnt) - { - if (buffer.length() != 0) { - fwrite(buffer.c_str(), sizeof(_E), buffer.length(), fp); - } - buffer.clear(); - /*fline += std::count(buf, buf + cnt, '\n');*/ - for (std::streamsize pos = 0; pos < cnt; ++pos) - { - if (buf[pos] == '\n') - { - ++fline; - } - } - if (cnt != 0) { - return fwrite(buf, sizeof(_E), cnt, fp); - } else { - return 0; - } - } - -private: - - FILE * fp; - bool must_close; - uint fline; - std::basic_string<_E, _Tr> buffer; -}; - -typedef basic_filebuf_lc filebuf_lc; - -template< - class _E, - class _BaseStream, - std::ios_base::openmode _DefOpenMode, - class _Tr = std::char_traits<_E> > -class basic_fstream_lc - : public _BaseStream - , public line_number -{ -public: - typedef basic_fstream_lc<_E, _BaseStream, _DefOpenMode, _Tr> _Myt; - typedef std::basic_ios<_E, _Tr> _Myios; - typedef _BaseStream _Mybase; - typedef basic_filebuf_lc<_E, _Tr> _Mybuf; - - basic_fstream_lc() - : _Mybase(mybuf = new _Mybuf()) - { - } - - virtual ~basic_fstream_lc() - { - delete mybuf; - } - - bool is_open() const - { - return mybuf->is_open(); - } - - _Myt& open(const char * filename, std::ios_base::openmode mode = _DefOpenMode) - { - if ((mode & _DefOpenMode) == 0 || mybuf->open(filename, mode) == 0) - { - _Myios::setstate(std::ios_base::failbit); - } - return *this; - } - - _Myt& open(FILE *fp) - { - if (mybuf->open(fp) == 0) - { - _Myios::setstate(std::ios_base::failbit); - } - return *this; - } - - void close() - { - if (mybuf->close() == 0) - { - _Myios::setstate(std::ios_base::failbit); - } - } - - uint get_line() const - { - return mybuf->get_line(); - } - -protected: - mutable _Mybuf *mybuf; -}; - -template > -class basic_ofstream_lc - : public basic_fstream_lc<_E, std::basic_ostream<_E, _Tr>, std::ios_base::out, _Tr> -{ -}; - -typedef basic_ofstream_lc ofstream_lc; - -template > -class basic_ifstream_lc - : public basic_fstream_lc<_E, std::basic_istream<_E, _Tr>, std::ios_base::in, _Tr> -{ -}; - -typedef basic_ifstream_lc ifstream_lc; - -class file_info -{ -public: - - static std::string escape(const std::string& _str) - { - std::string str(_str); - size_t l = str.length(); - for (size_t p = 0; p < l; ++p) - { - if (str[p] == '\\') - { - str.insert(++p, "\\"); - ++l; - } - } - return str; - } - - file_info() - : ln(NULL) - { - } - - file_info(const std::string& _fname, const line_number* _ln, bool _escape = true) - : fname(_escape ? escape(_fname) : _fname) - , ln(_ln) - { - } - - file_info(const file_info& oth, const line_number* _ln = NULL) - : fname(oth.fname) - , ln(_ln) - { - } - - file_info& operator = (const file_info& oth) - { - *(const_cast(&this->fname)) = oth.fname; - ln = oth.ln; - return *this; - } - - const std::string fname; - const line_number* ln; -}; - -std::ostream& operator << (std::ostream& o, const file_info& li); - -} // end namespace re2c - -#endif /* _stream_lc_h */ diff --git a/tools/re2c/substr.cc b/tools/re2c/substr.cc deleted file mode 100644 index 7796c1dc0..000000000 --- a/tools/re2c/substr.cc +++ /dev/null @@ -1,62 +0,0 @@ -/* $Id: substr.cc 546 2006-05-25 13:40:14Z helly $ */ -#include -#include -#include "substr.h" -#include "globals.h" - -#ifndef HAVE_STRNDUP - -char *strndup(const char *str, size_t len) -{ - char * ret = (char*)malloc(len + 1); - - memcpy(ret, str, len); - ret[len] = '\0'; - return ret; -} - -#endif - -namespace re2c -{ - -void SubStr::out(std::ostream& o) const -{ - o.write(str, len); -} - -bool operator==(const SubStr &s1, const SubStr &s2) -{ - return (bool) (s1.len == s2.len && memcmp(s1.str, s2.str, s1.len) == 0); -} - -Str::Str(const SubStr& s) - : SubStr(strndup(s.str, s.len), s.len) -{ - ; -} - -Str::Str(Str& s) - : SubStr(s.str, s.len) -{ - s.str = NULL; - s.len = 0; -} - -Str::Str() - : SubStr((char*) NULL, 0) -{ - ; -} - - -Str::~Str() -{ - if (str) { - free((void*)str); - } - str = (char*) - 1; - len = (uint) - 1; -} - -} // end namespace re2c diff --git a/tools/re2c/substr.h b/tools/re2c/substr.h deleted file mode 100644 index 7b56e0042..000000000 --- a/tools/re2c/substr.h +++ /dev/null @@ -1,101 +0,0 @@ -/* $Id: substr.h 530 2006-05-25 13:34:33Z helly $ */ -#ifndef _substr_h -#define _substr_h - -#include -#include -#include "basics.h" - -namespace re2c -{ - -class SubStr -{ -public: - const char * str; - const char * const org; - uint len; - -public: - friend bool operator==(const SubStr &, const SubStr &); - SubStr(const uchar*, uint); - SubStr(const char*, uint); - SubStr(const char*); - SubStr(const SubStr&); - virtual ~SubStr(); - void out(std::ostream&) const; - std::string to_string() const; - uint ofs() const; - -#ifdef PEDANTIC -protected: - SubStr& operator = (const SubStr& oth); -#endif -}; - -class Str: public SubStr -{ -public: - Str(const SubStr&); - Str(Str&); - Str(); - ~Str(); -}; - -inline std::ostream& operator<<(std::ostream& o, const SubStr &s) -{ - s.out(o); - return o; -} - -inline std::ostream& operator<<(std::ostream& o, const SubStr* s) -{ - return o << *s; -} - -inline SubStr::SubStr(const uchar *s, uint l) - : str((char*)s), org((char*)s), len(l) -{ } - -inline SubStr::SubStr(const char *s, uint l) - : str(s), org(s), len(l) -{ } - -inline SubStr::SubStr(const char *s) - : str(s), org(s), len(strlen(s)) -{ } - -inline SubStr::SubStr(const SubStr &s) - : str(s.str), org(s.str), len(s.len) -{ } - -inline SubStr::~SubStr() -{ } - -inline std::string SubStr::to_string() const -{ - return std::string(str, len); -} - -inline uint SubStr::ofs() const -{ - return str - org; -} - -#ifdef PEDANTIC -inline SubStr& SubStr::operator = (const SubStr& oth) -{ - new(this) SubStr(oth); - return *this; -} -#endif - -} // end namespace re2c - -#ifndef HAVE_STRNDUP - -char *strndup(const char *str, size_t len); - -#endif - -#endif diff --git a/tools/re2c/token.h b/tools/re2c/token.h deleted file mode 100644 index 4abfbff01..000000000 --- a/tools/re2c/token.h +++ /dev/null @@ -1,28 +0,0 @@ -/* $Id: token.h 547 2006-05-25 13:40:35Z helly $ */ -#ifndef _token_h -#define _token_h - -#include "substr.h" - -namespace re2c -{ - -class Token -{ - -public: - Str text; - uint line; - -public: - Token(SubStr, uint); -}; - -inline Token::Token(SubStr t, uint l) : text(t), line(l) -{ - ; -} - -} // end namespace re2c - -#endif diff --git a/tools/re2c/y.tab.h b/tools/re2c/y.tab.h deleted file mode 100644 index 84b92e807..000000000 --- a/tools/re2c/y.tab.h +++ /dev/null @@ -1,88 +0,0 @@ -/* A Bison parser, made by GNU Bison 2.3. */ - -/* Skeleton interface for Bison's Yacc-like parsers in C - - Copyright (C) 1984, 1989, 1990, 2000, 2001, 2002, 2003, 2004, 2005, 2006 - Free Software Foundation, Inc. - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2, or (at your option) - any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 51 Franklin Street, Fifth Floor, - Boston, MA 02110-1301, USA. */ - -/* As a special exception, you may create a larger work that contains - part or all of the Bison parser skeleton and distribute that work - under terms of your choice, so long as that work isn't itself a - parser generator using the skeleton or a modified version thereof - as a parser skeleton. Alternatively, if you modify or redistribute - the parser skeleton itself, you may (at your option) remove this - special exception, which will cause the skeleton and the resulting - Bison output files to be licensed under the GNU General Public - License without this special exception. - - This special exception was added by the Free Software Foundation in - version 2.2 of Bison. */ - -/* Tokens. */ -#ifndef YYTOKENTYPE -# define YYTOKENTYPE - /* Put the tokens into the symbol table, so that GDB and other debuggers - know about them. */ - enum yytokentype { - CLOSESIZE = 258, - CLOSE = 259, - ID = 260, - CODE = 261, - RANGE = 262, - STRING = 263, - CONFIG = 264, - VALUE = 265, - NUMBER = 266 - }; -#endif -/* Tokens. */ -#define CLOSESIZE 258 -#define CLOSE 259 -#define ID 260 -#define CODE 261 -#define RANGE 262 -#define STRING 263 -#define CONFIG 264 -#define VALUE 265 -#define NUMBER 266 - - - - -#if ! defined YYSTYPE && ! defined YYSTYPE_IS_DECLARED -typedef union YYSTYPE -#line 58 "./parser.y" -{ - re2c::Symbol *symbol; - re2c::RegExp *regexp; - re2c::Token *token; - char op; - int number; - re2c::ExtOp extop; - re2c::Str *str; -} -/* Line 1489 of yacc.c. */ -#line 81 "y.tab.h" - YYSTYPE; -# define yystype YYSTYPE /* obsolescent; will be withdrawn */ -# define YYSTYPE_IS_DECLARED 1 -# define YYSTYPE_IS_TRIVIAL 1 -#endif - -extern YYSTYPE yylval; -