From 7aa402b2a5f1471b533b974c0e0c916ad613cb6c Mon Sep 17 00:00:00 2001 From: Randy Heit Date: Fri, 19 Mar 2010 04:04:13 +0000 Subject: [PATCH] - Add a preliminary grammar and a test driver for it. SVN r2233 (scripting) --- src/CMakeLists.txt | 7 + src/c_dispatch.h | 1 + src/sc_man.cpp | 30 +++ src/sc_man.h | 2 + src/sc_man_scanner.re | 50 +++- src/sc_man_tokens.h | 8 + src/thingdef/thingdef_parse.cpp | 20 +- src/zscript/vm.h | 4 +- src/zscript/zcc-parse.lemon | 443 ++++++++++++++++++++++++++++++++ src/zscript/zcc_parser.cpp | 331 ++++++++++++++++++++++++ zdoom.vcproj | 92 +++++++ 11 files changed, 972 insertions(+), 16 deletions(-) create mode 100644 src/zscript/zcc-parse.lemon create mode 100644 src/zscript/zcc_parser.cpp diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 67e83a96e0..c360dfcb74 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -495,6 +495,12 @@ add_custom_command( OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/xlat_parser.c ${CMAKE_CUR WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR} DEPENDS lemon ${CMAKE_CURRENT_SOURCE_DIR}/xlat/xlat_parser.y ) +add_custom_command( OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/zcc-parse.c ${CMAKE_CURRENT_BINARY_DIR}/zcc-parse.h + COMMAND ${CMAKE_COMMAND} -E copy_if_different ${CMAKE_CURRENT_SOURCE_DIR}/zscript/zcc-parse.lemon . + COMMAND ${CMAKE_BINARY_DIR}/tools/lemon/lemon zcc-parse.lemon + WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR} + DEPENDS lemon ${CMAKE_CURRENT_SOURCE_DIR}/zscript/zcc-parse.lemon ) + add_custom_command( OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/sc_man_scanner.h COMMAND ${CMAKE_BINARY_DIR}/tools/re2c/re2c --no-generation-date -s -o ${CMAKE_CURRENT_BINARY_DIR}/sc_man_scanner.h ${CMAKE_CURRENT_SOURCE_DIR}/sc_man_scanner.re DEPENDS re2c ${CMAKE_CURRENT_SOURCE_DIR}/sc_man_scanner.re ) @@ -773,6 +779,7 @@ add_executable( zdoom WIN32 zscript/vmdisasm.cpp zscript/vmexec.cpp zscript/vmframe.cpp + zscript/zcc_parser.cpp autozend.cpp ) set_source_files_properties( xlat/parse_xlat.cpp PROPERTIES OBJECT_DEPENDS "${CMAKE_CURRENT_BINARY_DIR}/xlat_parser.c" ) diff --git a/src/c_dispatch.h b/src/c_dispatch.h index cde53996a9..96e375ea2d 100644 --- a/src/c_dispatch.h +++ b/src/c_dispatch.h @@ -159,5 +159,6 @@ void ResetButtonStates (); // Same as above, but also clear bDown extern unsigned int MakeKey (const char *s); extern unsigned int MakeKey (const char *s, size_t len); +extern unsigned int SuperFastHash (const char *data, size_t len); #endif //__C_DISPATCH_H__ diff --git a/src/sc_man.cpp b/src/sc_man.cpp index 298676dc65..505c9c0d4e 100644 --- a/src/sc_man.cpp +++ b/src/sc_man.cpp @@ -119,6 +119,7 @@ FScanner &FScanner::operator=(const FScanner &other) LastGotLine = other.LastGotLine; CMode = other.CMode; Escape = other.Escape; + StateMode = other.StateMode; // Copy public members if (other.String == other.StringBuffer) @@ -262,6 +263,7 @@ void FScanner::PrepareScript () LastGotLine = 1; CMode = false; Escape = true; + StateMode = 0; StringBuffer[0] = '\0'; BigStringBuffer = ""; } @@ -359,6 +361,34 @@ void FScanner::SetEscape (bool esc) Escape = esc; } +//========================================================================== +// +// FScanner :: SetStateMode +// +// Enters state mode. This mode is very permissive for identifiers, which +// it returns as TOK_NonWhitespace. The only character sequences that are +// not returned as such are these: +// +// * stop +// * wait +// * fail +// * loop +// * goto - Automatically exits state mode after it's seen. +// * : +// * ; +// * } - Automatically exits state mode after it's seen. +// +// Quoted strings are returned as TOK_NonWhitespace, minus the quotes. In +// addition, any two consecutive sequences of TOK_NonWhitespace also exit +// state mode. +// +//========================================================================== + +void FScanner::SetStateMode(bool stately) +{ + StateMode = stately ? 2 : 0; +} + //========================================================================== // // FScanner::ScanString diff --git a/src/sc_man.h b/src/sc_man.h index 777e8cdcd7..cc2c1c5377 100644 --- a/src/sc_man.h +++ b/src/sc_man.h @@ -26,6 +26,7 @@ public: void SetCMode(bool cmode); void SetEscape(bool esc); + void SetStateMode(bool stately); const SavedPos SavePos(); void RestorePos(const SavedPos &pos); @@ -94,6 +95,7 @@ protected: const char *LastGotPtr; int LastGotLine; bool CMode; + BYTE StateMode; bool Escape; }; diff --git a/src/sc_man_scanner.re b/src/sc_man_scanner.re index 66058ac8cd..d8070b53a0 100644 --- a/src/sc_man_scanner.re +++ b/src/sc_man_scanner.re @@ -49,9 +49,34 @@ std2: TOK2 = (NWS\STOP1); TOKC2 = (NWS\STOPC); */ - if (tokens) // A well-defined scanner, based on the c.re example. - { #define RET(x) TokenType = x; goto normal_token; + if (tokens && StateMode != 0) + { + /*!re2c + "/*" { goto comment; } /* C comment */ + "//" (any\"\n")* "\n" { goto newline; } /* C++ comment */ + + (["](([\\]["])|[^"])*["]) { RET(TK_StringConst); } + 'stop' { RET(TK_Stop); } + 'wait' { RET(TK_Wait); } + 'fail' { RET(TK_Fail); } + 'loop' { RET(TK_Loop); } + 'goto' { StateMode = 0; RET(TK_Goto); } + ":" { RET(':'); } + ";" { RET(';'); } + "}" { StateMode = 0; RET('}'); } + + WSP+ { goto std1; } + "\n" { goto newline; } + + TOKS = (NWS\[/":;}]); + TOKS* ([/] (TOKS\[*]) TOKS*)* + { RET(TK_NonWhitespace); } + + */ + } + else if (tokens) // A well-defined scanner, based on the c.re example. + { /*!re2c "/*" { goto comment; } /* C comment */ "//" (any\"\n")* "\n" { goto newline; } /* C++ comment */ @@ -146,8 +171,9 @@ std2: 'is' { RET(TK_Is); } 'replaces' { RET(TK_Replaces); } - - /* Needed for decorate action functions */ + 'states' { RET(TK_States); } + 'meta' { RET(TK_Meta); } + 'deprecated' { RET(TK_Deprecated); } 'action' { RET(TK_Action); } /* other DECORATE top level keywords */ @@ -200,6 +226,7 @@ std2: "~==" { RET(TK_ApproxEq); } "<>=" { RET(TK_LtGtEq); } "**" { RET(TK_MulMul); } + "::" { RET(TK_ColonColon); } ";" { RET(';'); } "{" { RET('{'); } "}" { RET('}'); } @@ -352,6 +379,10 @@ normal_token: { memcpy (StringBuffer, tok+1, StringLen); } + if (StateMode && TokenType == TK_StringConst) + { + TokenType = TK_NonWhitespace; + } } else { @@ -364,6 +395,17 @@ normal_token: memcpy (StringBuffer, tok, StringLen); } } + if (tokens && StateMode) + { // State mode is exited after two consecutive TK_NonWhitespace tokens + if (TokenType == TK_NonWhitespace) + { + StateMode--; + } + else + { + StateMode = 2; + } + } if (StringLen < MAX_STRING_SIZE) { String = StringBuffer; diff --git a/src/sc_man_tokens.h b/src/sc_man_tokens.h index 9dde749723..de08154478 100644 --- a/src/sc_man_tokens.h +++ b/src/sc_man_tokens.h @@ -3,6 +3,8 @@ xx(TK_StringConst, "string constant") xx(TK_NameConst, "name constant") xx(TK_IntConst, "integer constant") xx(TK_FloatConst, "float constant") +xx(TK_NonWhitespace, "non-whitespace") +xx(TK_ColonColon, "'::'") xx(TK_DotDot, "'..'") xx(TK_Ellipsis, "'...'") xx(TK_RShiftEq, "'>>='") @@ -122,4 +124,10 @@ xx(TK_Array, "'array'") xx(TK_In, "'in'") xx(TK_SizeOf, "'sizeof'") xx(TK_AlignOf, "'alignof'") +xx(TK_States, "'states'") +xx(TK_Loop, "'loop'") +xx(TK_Fail, "'fail'") +xx(TK_Wait, "'wait'") +xx(TK_Meta, "'meta'") +xx(TK_Deprecated, "'deprecated'") #undef xx diff --git a/src/thingdef/thingdef_parse.cpp b/src/thingdef/thingdef_parse.cpp index ed3ed4b15c..9a9d99cc28 100644 --- a/src/thingdef/thingdef_parse.cpp +++ b/src/thingdef/thingdef_parse.cpp @@ -859,16 +859,6 @@ static void ParseActorProperty(FScanner &sc, Baggage &bag) FScriptPosition::ErrorCounter++; } } - else if (!propname.CompareNoCase("States")) - { - if (bag.StateSet) - { - sc.ScriptMessage("'%s' contains multiple state declarations", bag.Info->Class->TypeName.GetChars()); - FScriptPosition::ErrorCounter++; - } - ParseStates(sc, bag.Info, (AActor *)bag.Info->Class->Defaults, bag); - bag.StateSet=true; - } else if (MatchString(propname, statenames) != -1) { bag.statedef.SetStateLabel(propname, CheckState (sc, bag.Info->Class)); @@ -1184,6 +1174,16 @@ static void ParseActor(FScanner &sc) ParseActorProperty(sc, bag); break; + case TK_States: + if (bag.StateSet) + { + sc.ScriptMessage("'%s' contains multiple state declarations", bag.Info->Class->TypeName.GetChars()); + FScriptPosition::ErrorCounter++; + } + ParseStates(sc, bag.Info, (AActor *)bag.Info->Class->Defaults, bag); + bag.StateSet = true; + break; + case '+': case '-': ParseActorFlag(sc, bag, sc.TokenType); diff --git a/src/zscript/vm.h b/src/zscript/vm.h index 72e2141d58..c7d3e302fb 100644 --- a/src/zscript/vm.h +++ b/src/zscript/vm.h @@ -867,7 +867,7 @@ void VMDisasm(FILE *out, const VMOP *code, int codesize, const VMScriptFunction #define PARAM_SOUND_AT(p,x) assert((p) < numparam); assert(param[p].Type == REGT_INT); FSoundID x = param[p].i; #define PARAM_COLOR_AT(p,x) assert((p) < numparam); assert(param[p].Type == REGT_INT); PalEntry x; x.d = param[p].i; #define PARAM_FLOAT_AT(p,x) assert((p) < numparam); assert(param[p].Type == REGT_FLOAT); double x = param[p].f; -#define PARAM_FIXED_AT(p,x) assert((p) < numparam); assert(param[p].Type == REGT_FLOAT); fixed_t x = fixed_t(param[p].f * 65536.0); +#define PARAM_FIXED_AT(p,x) assert((p) < numparam); assert(param[p].Type == REGT_FLOAT); fixed_t x = FLOAT2FIXED(param[p].f); #define PARAM_ANGLE_AT(p,x) assert((p) < numparam); assert(param[p].Type == REGT_FLOAT); angle_t x = angle_t(param[p].f * (ANGLE_90 / 90.0)); #define PARAM_STRING_AT(p,x) assert((p) < numparam); assert(param[p].Type == REGT_STRING); FString x = param[p].s(); #define PARAM_STATE_AT(p,x) assert((p) < numparam); assert(param[p].Type == REGT_POINTER && (param[p].atag == ATAG_STATE || param[p].a == NULL)); FState *x = (FState *)param[p].a; @@ -885,7 +885,7 @@ void VMDisasm(FILE *out, const VMOP *code, int codesize, const VMScriptFunction #define PARAM_SOUND_OPT_AT(p,x) FSoundID x; if ((p) < numparam && param[p].Type != REGT_NIL) { assert(param[p].Type == REGT_INT); x = FSoundID(param[p].i); } else #define PARAM_COLOR_OPT_AT(p,x) PalEntry x; if ((p) < numparam && param[p].Type != REGT_NIL) { assert(param[p].Type == REGT_INT); x.d = param[p].i; } else #define PARAM_FLOAT_OPT_AT(p,x) double x; if ((p) < numparam && param[p].Type != REGT_NIL) { assert(param[p].Type == REGT_FLOAT); x = param[p].f; } else -#define PARAM_FIXED_OPT_AT(p,x) fixed_t x; if ((p) < numparam && param[p].Type != REGT_NIL) { assert(param[p].Type == REGT_FLOAT); x = fixed_t(param[p].f * 65536.0); } else +#define PARAM_FIXED_OPT_AT(p,x) fixed_t x; if ((p) < numparam && param[p].Type != REGT_NIL) { assert(param[p].Type == REGT_FLOAT); x = FLOAT2FIXED(param[p].f); } else #define PARAM_ANGLE_OPT_AT(p,x) angle_t x; if ((p) < numparam && param[p].Type != REGT_NIL) { assert(param[p].Type == REGT_FLOAT); x = angle_t(param[p].f * (ANGLE_90 / 90.0)); } else #define PARAM_STRING_OPT_AT(p,x) FString x; if ((p) < numparam && param[p].Type != REGT_NIL) { assert(param[p].Type == REGT_STRING); x = param[p].s(); } else #define PARAM_STATE_OPT_AT(p,x) FState *x; if ((p) < numparam && param[p].Type != REGT_NIL) { assert(param[p].Type == REGT_POINTER && (param[p].atag == ATAG_STATE || param[p].a == NULL)); x = (FState *)param[p].a; } else diff --git a/src/zscript/zcc-parse.lemon b/src/zscript/zcc-parse.lemon new file mode 100644 index 0000000000..d699f37da2 --- /dev/null +++ b/src/zscript/zcc-parse.lemon @@ -0,0 +1,443 @@ +%token_prefix ZCC_ +%token_type { ZCCToken } +%token_destructor {} // just to avoid a compiler warning +%name ZCCParse +%extra_argument { FScanner *sc } +%syntax_error +{ + FString unexpected, expecting; + + int i; + int stateno = yypParser->yystack[yypParser->yyidx].stateno; + + unexpected << "Unexpected " << ZCCTokenName(yymajor); + + // Determine all the terminals that the parser would have accepted at this point + // (see yy_find_shift_action). This list can get quite long. Is it worthwhile to + // print it when not debugging the grammar, or would that be too confusing to + // the average user? + if (stateno < YY_SHIFT_MAX && (i = yy_shift_ofst[stateno])!=YY_SHIFT_USE_DFLT) + { + for (int j = 1; j < YYERRORSYMBOL; ++j) + { + int k = i + j; + if (k >= 0 && k < YY_SZ_ACTTAB && yy_lookahead[k] == j) + { + expecting << (expecting.IsEmpty() ? "Expecting " : " or ") << ZCCTokenName(j); + } + } + } + sc->ScriptMessage("%s\n%s\n", unexpected.GetChars(), expecting.GetChars()); +} +%parse_accept { sc->ScriptMessage("input accepted\n"); } +%parse_failure { /**failed = true;*/ } + +%nonassoc EQ MULEQ DIVEQ MODEQ ADDEQ SUBEQ LSHEQ RSHEQ ANDEQ OREQ XOREQ. +%right QUESTION COLON. +%left OROR. +%left ANDAND. +%left EQEQ NEQ APPROXEQ. +%left LT GT LTEQ GTEQ LTGTEQ IS. +%left DOTDOT. +%left OR. /* Note that this is like the Ruby precedence for these */ +%left XOR. /* three operators and not the C precedence, since */ +%left AND. /* they are higher priority than the comparisons. */ +%left LSH RSH. +%left SUB ADD. +%left MUL DIV MOD CROSSPROD DOTPROD. +%left POW. +%right UNARY ADDADD SUBSUB. +%left DOT LPAREN LBRACKET. +%left SCOPE. + + +main ::= translation_unit. { sc->ScriptMessage("Parse complete\n"); } + +translation_unit ::= . +translation_unit ::= translation_unit external_declaration. +translation_unit ::= translation_unit EOF. +translation_unit ::= error. + +external_declaration ::= class_definition. + +/* Optional bits. */ +opt_semicolon ::= . +opt_semicolon ::= SEMICOLON. + +opt_comma ::= . +opt_comma ::= COMMA. + +opt_expr ::= . +opt_expr ::= expr. + + +/* A class definition. Can only occur at global scope. */ +class_definition ::= CLASS id_or_string class_ancestry class_flags class_body. + +id_or_string ::= IDENTIFIER. +id_or_string ::= string_constant. + +class_ancestry ::= . +class_ancestry ::= COLON id_or_string. + +class_flags ::= . +class_flags ::= class_flags ABSTRACT. +class_flags ::= class_flags NATIVE. +class_flags ::= class_flags REPLACES id_or_string. + + +class_body ::= SEMICOLON class_innards EOF. +class_body ::= LBRACE class_innards RBRACE. + +class_innards ::= . +class_innards ::= class_innards class_member. + +/* Classes can define variables, functions, enums, structs, states, constants, and defaults. */ +class_member ::= declarator. +class_member ::= enum_def. +class_member ::= struct_def. +class_member ::= states_def. +class_member ::= default_def. +class_member ::= const_def. + +/* Structs can define variables, enums, and structs. */ +struct_def ::= STRUCT IDENTIFIER LBRACE struct_body RBRACE opt_semicolon. +struct_member ::= declarator_no_fun. +struct_member ::= enum_def. + +/* Enumerators are lists of named integers. */ +enum_def ::= ENUM IDENTIFIER enum_type LBRACE enum_list opt_comma RBRACE opt_semicolon. + +enum_type ::= . +enum_type ::= COLON int_type. + +enum_list ::= enumerator. +enum_list ::= enum_list COMMA enumerator. + +enumerator ::= IDENTIFIER. +enumerator ::= IDENTIFIER EQ expr. /* Expression must be constant. */ + +/* States */ +states_def ::= STATES scanner_mode LBRACE states_body RBRACE. + +/* We use a special scanner mode to allow for sprite names and frame characters + * to not be quoted even if they contain special characters. The scanner_mode + * nonterminal is used to enter this mode. The scanner automatically leaves it + * upon pre-defined conditions. See the comments by FScanner::SetStateMode(). + * + * Note that rules are reduced *after* one token of lookahead has been + * consumed, so this nonterminal must be placed one token before we want it to + * take effect. For example, in states_def above, the scanner mode will be + * set immediately after LBRACE is consumed, rather than immediately after + * STATES is consumed. + */ +scanner_mode ::= . { sc->SetStateMode(true); } + +states_body ::= . +states_body ::= error. +states_body ::= states_body LABELID. +states_body ::= states_body state_line. +states_body ::= states_body state_label. +states_body ::= states_body state_flow. + +state_label ::= NWS COLON. + +state_flow ::= state_flow_type scanner_mode SEMICOLON. +state_flow_type ::= STOP. +state_flow_type ::= WAIT. +state_flow_type ::= FAIL. +state_flow_type ::= LOOP. +state_flow_type ::= GOTO dotted_identifier state_goto_offset. + +state_goto_offset ::= . +state_goto_offset ::= PLUS expr. /* Must evaluate to an integer constant. */ + +state_line ::= NWS NWS expr state_opts state_action. + +state_opts ::= . +state_opts ::= state_opts BRIGHT. +state_opts ::= state_opts OFFSET LPAREN expr COMMA expr RPAREN. +state_opts ::= state_opts LIGHT LPAREN light_list RPAREN. + +light_list ::= STRCONST. +light_list ::= light_list COMMA STRCONST. + +/* A state action can be either a compound statement or a single action function call. */ +state_action ::= LBRACE statement_list scanner_mode RBRACE. +state_action ::= LBRACE error scanner_mode RBRACE. +state_action ::= state_call scanner_mode SEMICOLON. + +state_call ::= . +state_call ::= IDENTIFIER state_call_parms. +state_call_parms ::= . +state_call_parms ::= LPAREN opt_expr_list RPAREN. +state_call_parms ::= LPAREN error RPAREN. + +dotted_identifier ::= IDENTIFIER. +dotted_identifier ::= dotted_identifier DOT IDENTIFIER. + +/* Definition of a default class instance. */ +default_def ::= DEFAULT compound_statement. + +/* Type names */ +int_type ::= SBYTE. +int_type ::= BYTE. +int_type ::= SHORT. +int_type ::= USHORT. +int_type ::= INT. +int_type ::= UINT. + +type_name ::= BOOL. +type_name ::= int_type. +type_name ::= FLOAT. +type_name ::= DOUBLE. +type_name ::= STRING. +type_name ::= VECTOR vector_size. +type_name ::= NAME. +type_name ::= IDENTIFIER. /* User-defined type (struct, enum, or class) */ + +vector_size ::= . +vector_size ::= LT INTCONST GT. + +/* Type names can also be used as identifiers in contexts where type names + * are not normally allowed. */ +%fallback IDENTIFIER + SBYTE BYTE SHORT USHORT INT UINT BOOL FLOAT DOUBLE STRING VECTOR NAME. + +/* Aggregate types */ +aggregate_type ::= MAP LT type array_size COMMA type array_size GT. /* Hash table */ +aggregate_type ::= ARRAY LT type array_size GT. /* TArray */ +aggregate_type ::= CLASS class_restrictor. /* class */ +class_restrictor ::= . +class_restrictor ::= LT IDENTIFIER GT. + +type ::= type_name. +type ::= aggregate_type. + +type_list ::= type array_size. /* A comma-separated list of types */ +type_list ::= type_list COMMA type array_size. + +type_list_or_void ::= VOID. +type_list_or_void ::= type_list. + +array_size ::= . +array_size ::= array_size LBRACKET opt_expr RBRACKET. + +declarator ::= decl_flags type_list_or_void variables_or_function. /* Multiple type names are only valid for functions. */ +declarator_no_fun ::= decl_flags type variable_list. + +variables_or_function ::= IDENTIFIER LPAREN func_params RPAREN func_const opt_func_body. /* Function */ +variables_or_function ::= variable_list SEMICOLON. +variables_or_function ::= error SEMICOLON. + +variable_list ::= IDENTIFIER array_size. +variable_list ::= variable_list COMMA IDENTIFIER array_size. + +decl_flags ::= . +decl_flags ::= decl_flags NATIVE. +decl_flags ::= decl_flags STATIC. +decl_flags ::= decl_flags PRIVATE. +decl_flags ::= decl_flags PROTECTED. +decl_flags ::= decl_flags LATENT. +decl_flags ::= decl_flags FINAL. +decl_flags ::= decl_flags META. +decl_flags ::= decl_flags ACTION. +decl_flags ::= decl_flags DEPRECATED LPAREN string_constant RPAREN. + +func_const ::= . +func_const ::= CONST. + +opt_func_body ::= SEMICOLON. +opt_func_body ::= function_body. + +func_params ::= . /* empty */ +func_params ::= VOID. +func_params ::= func_param_list. + +func_param_list ::= func_param. +func_param_list ::= func_param COMMA func_param_list. + +func_param ::= func_param_flags type IDENTIFIER array_size. + +func_param_flags ::= . +func_param_flags ::= func_param_flags IN. +func_param_flags ::= func_param_flags OUT. +func_param_flags ::= func_param_flags OPTIONAL. + +struct_body ::= struct_member. +struct_body ::= struct_member struct_body. + +/* Like UnrealScript, a constant's type is implied by its value's type. */ +const_def ::= CONST IDENTIFIER EQ expr SEMICOLON. + +/* Expressions */ + +/* We use default to access a class's default instance. */ +%fallback IDENTIFIER + DEFAULT. + +primary ::= IDENTIFIER. +primary ::= SUPER. +primary ::= constant. +primary ::= SELF. +primary ::= LPAREN expr RPAREN. +primary ::= LPAREN error RPAREN. +primary ::= primary LPAREN func_expr_list RPAREN. [DOT] // Function call +primary ::= primary LBRACKET expr RBRACKET. [DOT] // Array access +primary ::= primary DOT IDENTIFIER. // Member access +primary ::= primary ADDADD. /* postfix++ */ +primary ::= primary SUBSUB. /* postfix-- */ +primary ::= SCOPE primary. + +unary_expr ::= primary. +unary_expr ::= SUB unary_expr. [UNARY] +unary_expr ::= ADD unary_expr. [UNARY] +unary_expr ::= SUBSUB unary_expr. [UNARY] +unary_expr ::= ADDADD unary_expr. [UNARY] +unary_expr ::= TILDE unary_expr. [UNARY] +unary_expr ::= BANG unary_expr. [UNARY] +unary_expr ::= SIZEOF unary_expr. [UNARY] +unary_expr ::= ALIGNOF unary_expr. [UNARY] + +/* Due to parsing conflicts, C-style casting is not supported. You + * must use C++ function call-style casting instead. + */ + +expr ::= unary_expr. +expr ::= expr ADD expr. /* a + b */ +expr ::= expr SUB expr. /* a - b */ +expr ::= expr MUL expr. /* a * b */ +expr ::= expr DIV expr. /* a / b */ +expr ::= expr MOD expr. /* a % b */ +expr ::= expr POW expr. /* a ** b */ +expr ::= expr CROSSPROD expr. /* a cross b */ +expr ::= expr DOTPROD expr. /* a dot b */ +expr ::= expr LSH expr. /* a << b */ +expr ::= expr RSH expr. /* a >> b */ +expr ::= expr DOTDOT expr. /* a .. b */ + +expr ::= expr LT expr. /* a < b */ +expr ::= expr GT expr. /* a > b */ +expr ::= expr LTEQ expr. /* a <= b */ +expr ::= expr GTEQ expr. /* a >= b */ +expr ::= expr LTGTEQ expr. /* a <>= b */ +expr ::= expr IS expr. /* a is b */ + +expr ::= expr EQEQ expr. /* a == b */ +expr ::= expr NEQ expr. /* a != b */ +expr ::= expr APPROXEQ expr. /* a ~== b */ + +expr ::= expr AND expr. /* a & b */ +expr ::= expr XOR expr. /* a ^ b */ +expr ::= expr OR expr. /* a | b */ +expr ::= expr ANDAND expr. /* a && b */ +expr ::= expr OROR expr. /* a || b */ + +expr ::= expr SCOPE expr. + +expr ::= expr QUESTION expr COLON expr. + +opt_expr_list ::= . +opt_expr_list ::= expr_list. + +expr_list ::= expr. +expr_list ::= expr_list COMMA expr. + +/* A function expression list can also specify a parameter's name, + * but once you do that, all remaining parameters must also be named. */ +func_expr_list ::= . +func_expr_list ::= expr_list. +func_expr_list ::= expr_list COMMA named_expr_list. +func_expr_list ::= named_expr_list. + +named_expr_list ::= named_expr. +named_expr_list ::= named_expr_list COMMA named_expr. + +named_expr ::= IDENTIFIER COLON expr. + +/* Allow C-like concatenation of adjacent string constants. */ +string_constant ::= STRCONST. +string_constant ::= string_constant STRCONST. + +constant ::= string_constant. +constant ::= INTCONST. +constant ::= FLOATCONST. + +function_body ::= compound_statement. + +statement ::= labeled_statement. +statement ::= compound_statement. +statement ::= expression_statement. +statement ::= selection_statement. +statement ::= iteration_statement. +statement ::= jump_statement. +statement ::= assign_statement. +statement ::= local_var. +statement ::= error SEMICOLON. + +jump_statement ::= CONTINUE SEMICOLON. +jump_statement ::= BREAK SEMICOLON. +jump_statement ::= RETURN SEMICOLON. +jump_statement ::= RETURN expr_list SEMICOLON. + +compound_statement ::= LBRACE RBRACE. +compound_statement ::= LBRACE statement_list RBRACE. +compound_statement ::= LBRACE error RBRACE. + +statement_list ::= statement. +statement_list ::= statement_list statement. + +expression_statement ::= SEMICOLON. +expression_statement ::= expr SEMICOLON. + +iteration_statement ::= while_or_until LPAREN expr RPAREN statement. +iteration_statement ::= DO statement while_or_until LPAREN expr RPAREN. +iteration_statement ::= FOR LPAREN for_init_expr SEMICOLON opt_expr SEMICOLON for_bump_expr RPAREN statement. + +while_or_until ::= WHILE. +while_or_until ::= UNTIL. + +for_init_expr ::= . +for_init_expr ::= expr. +for_init_expr ::= type variable_list EQ expr_list. +for_init_expr ::= assign_expr. + +for_bump_expr ::= . +for_bump_expr ::= expr. +for_bump_expr ::= assign_expr. + +/* Resolve the shift-reduce conflict here in favor of the shift. + * This is the default behavior, but using precedence symbols + * lets us do it without warnings. + */ +%left IF. +%left ELSE. +selection_statement ::= if_front. [IF] +selection_statement ::= if_front ELSE statement. [ELSE] + +selection_statement ::= SWITCH LPAREN expr RPAREN statement. + +if_front ::= IF LPAREN expr RPAREN statement. + +labeled_statement ::= CASE expr COLON. +labeled_statement ::= DEFAULT COLON. + +assign_statement ::= assign_expr SEMICOLON. [EQ] + +assign_expr ::= expr_list assign_op expr_list. +assign_op ::= EQ. +assign_op ::= MULEQ. +assign_op ::= DIVEQ. +assign_op ::= MODEQ. +assign_op ::= ADDEQ. +assign_op ::= SUBEQ. +assign_op ::= LSHEQ. +assign_op ::= RSHEQ. +assign_op ::= ANDEQ. +assign_op ::= OREQ. +assign_op ::= XOREQ. + +local_var ::= type variable_list var_init SEMICOLON. + +var_init ::= . +var_init ::= EQ expr_list. diff --git a/src/zscript/zcc_parser.cpp b/src/zscript/zcc_parser.cpp new file mode 100644 index 0000000000..8e3abe9425 --- /dev/null +++ b/src/zscript/zcc_parser.cpp @@ -0,0 +1,331 @@ +#include "dobject.h" +#include "sc_man.h" +#include "c_console.h" +#include "c_dispatch.h" +#include "w_wad.h" +#include "cmdlib.h" +#include "m_alloc.h" + +class ZCCParser +{ +public: + void PrintError(const char *fmt, ...) + { + va_list argptr; + va_start(argptr, fmt); + VPrintf(PRINT_HIGH, fmt, argptr); + va_end(argptr); + } +}; + +union ZCCToken +{ + int Int; + double Float; + const char *String; +}; + +class StringTable +{ +public: + StringTable() + { + memset(Buckets, 0, sizeof(Buckets)); + } + ~StringTable() + { + for (size_t i = 0; i < countof(Buckets); ++i) + { + Node *node, *next; + for (node = Buckets[i]; node != NULL; node = next) + { + if (node->String != NULL) + { + delete[] node->String; + } + next = node->Next; + delete node; + } + } + } + const char *Get(const char *str, int len) + { + unsigned int hash = SuperFastHash(str, len); + Node *node, **nodep; + + nodep = &Buckets[hash % countof(Buckets)]; + node = *nodep; + // Search for string in the hash table. + while (node != NULL) + { + if (node->Hash == hash && strcmp(str, node->String)) + { + return node->String; + } + nodep = &node->Next; + node = *nodep; + } + // String is not in the hash table; add it. + node = new Node; + *nodep = node; + node->Hash = hash; + node->String = copystring(str); + node->Next = NULL; + return node->String; + } +private: + struct Node + { + Node *Next; + const char *String; + unsigned int Hash; + }; + Node *Buckets[256]; +}; + +static FString ZCCTokenName(int terminal); + +#include "zcc-parse.h" +#include "zcc-parse.c" + +static TMap TokenMap; +static SWORD BackTokenMap[YYERRORSYMBOL]; // YYERRORSYMBOL immediately follows the terminals described by the grammar + +static void InitTokenMap() +{ +#define TOKENDEF(sc, zcc) TokenMap.Insert(sc, zcc); BackTokenMap[zcc] = sc + TOKENDEF('=', ZCC_EQ); + TOKENDEF(TK_MulEq, ZCC_MULEQ); + TOKENDEF(TK_DivEq, ZCC_DIVEQ); + TOKENDEF(TK_ModEq, ZCC_MODEQ); + TOKENDEF(TK_AddEq, ZCC_ADDEQ); + TOKENDEF(TK_SubEq, ZCC_SUBEQ); + TOKENDEF(TK_LShiftEq, ZCC_LSHEQ); + TOKENDEF(TK_RShiftEq, ZCC_RSHEQ); + TOKENDEF(TK_AndEq, ZCC_ANDEQ); + TOKENDEF(TK_OrEq, ZCC_OREQ); + TOKENDEF(TK_XorEq, ZCC_XOREQ); + TOKENDEF('?', ZCC_QUESTION); + TOKENDEF(':', ZCC_COLON); + TOKENDEF(TK_OrOr, ZCC_OROR); + TOKENDEF(TK_AndAnd, ZCC_ANDAND); + TOKENDEF(TK_Eq, ZCC_EQEQ); + TOKENDEF(TK_Neq, ZCC_NEQ); + TOKENDEF(TK_ApproxEq, ZCC_APPROXEQ); + TOKENDEF('<', ZCC_LT); + TOKENDEF('>', ZCC_GT); + TOKENDEF(TK_Leq, ZCC_LTEQ); + TOKENDEF(TK_Geq, ZCC_GTEQ); + TOKENDEF(TK_LtGtEq, ZCC_LTGTEQ); + TOKENDEF(TK_Is, ZCC_IS); + TOKENDEF(TK_DotDot, ZCC_DOTDOT); + TOKENDEF('|', ZCC_OR); + TOKENDEF('^', ZCC_XOR); + TOKENDEF('&', ZCC_AND); + TOKENDEF(TK_LShift, ZCC_LSH); + TOKENDEF(TK_RShift, ZCC_RSH); + TOKENDEF('-', ZCC_SUB); + TOKENDEF('+', ZCC_ADD); + TOKENDEF('*', ZCC_MUL); + TOKENDEF('/', ZCC_DIV); + TOKENDEF('%', ZCC_MOD); + TOKENDEF(TK_Cross, ZCC_CROSSPROD); + TOKENDEF(TK_Dot, ZCC_DOTPROD); + TOKENDEF(TK_MulMul, ZCC_POW); + TOKENDEF(TK_Incr, ZCC_ADDADD); + TOKENDEF(TK_Decr, ZCC_SUBSUB); + TOKENDEF('.', ZCC_DOT); + TOKENDEF('(', ZCC_LPAREN); + TOKENDEF(')', ZCC_RPAREN); + TOKENDEF(TK_ColonColon, ZCC_SCOPE); + TOKENDEF(';', ZCC_SEMICOLON); + TOKENDEF(',', ZCC_COMMA); + TOKENDEF(TK_Class, ZCC_CLASS); + TOKENDEF(TK_Abstract, ZCC_ABSTRACT); + TOKENDEF(TK_Native, ZCC_NATIVE); + TOKENDEF(TK_Replaces, ZCC_REPLACES); + TOKENDEF(TK_Static, ZCC_STATIC); + TOKENDEF(TK_Private, ZCC_PRIVATE); + TOKENDEF(TK_Protected, ZCC_PROTECTED); + TOKENDEF(TK_Latent, ZCC_LATENT); + TOKENDEF(TK_Final, ZCC_FINAL); + TOKENDEF(TK_Meta, ZCC_META); + TOKENDEF(TK_Deprecated, ZCC_DEPRECATED); + TOKENDEF('{', ZCC_LBRACE); + TOKENDEF('}', ZCC_RBRACE); + TOKENDEF(TK_Struct, ZCC_STRUCT); + TOKENDEF(TK_Enum, ZCC_ENUM); + TOKENDEF(TK_SByte, ZCC_SBYTE); + TOKENDEF(TK_Byte, ZCC_BYTE); + TOKENDEF(TK_Short, ZCC_SHORT); + TOKENDEF(TK_UShort, ZCC_USHORT); + TOKENDEF(TK_Int, ZCC_INT); + TOKENDEF(TK_UInt, ZCC_UINT); + TOKENDEF(TK_Bool, ZCC_BOOL); + TOKENDEF(TK_Float, ZCC_FLOAT); + TOKENDEF(TK_Double, ZCC_DOUBLE); + TOKENDEF(TK_String, ZCC_STRING); + TOKENDEF(TK_Vector, ZCC_VECTOR); + TOKENDEF(TK_Name, ZCC_NAME); + TOKENDEF(TK_Map, ZCC_MAP); + TOKENDEF(TK_Array, ZCC_ARRAY); + TOKENDEF(TK_Void, ZCC_VOID); + TOKENDEF('[', ZCC_LBRACKET); + TOKENDEF(']', ZCC_RBRACKET); + TOKENDEF(TK_In, ZCC_IN); + TOKENDEF(TK_Out, ZCC_OUT); + TOKENDEF(TK_Optional, ZCC_OPTIONAL); + TOKENDEF(TK_Super, ZCC_SUPER); + TOKENDEF(TK_Self, ZCC_SELF); + TOKENDEF('~', ZCC_TILDE); + TOKENDEF('!', ZCC_BANG); + TOKENDEF(TK_SizeOf, ZCC_SIZEOF); + TOKENDEF(TK_AlignOf, ZCC_ALIGNOF); + TOKENDEF(TK_Continue, ZCC_CONTINUE); + TOKENDEF(TK_Break, ZCC_BREAK); + TOKENDEF(TK_Return, ZCC_RETURN); + TOKENDEF(TK_Do, ZCC_DO); + TOKENDEF(TK_For, ZCC_FOR); + TOKENDEF(TK_While, ZCC_WHILE); + TOKENDEF(TK_Until, ZCC_UNTIL); + TOKENDEF(TK_If, ZCC_IF); + TOKENDEF(TK_Else, ZCC_ELSE); + TOKENDEF(TK_Switch, ZCC_SWITCH); + TOKENDEF(TK_Case, ZCC_CASE); + TOKENDEF(TK_Default, ZCC_DEFAULT); + TOKENDEF(TK_Const, ZCC_CONST); + TOKENDEF(TK_Stop, ZCC_STOP); + TOKENDEF(TK_Wait, ZCC_WAIT); + TOKENDEF(TK_Fail, ZCC_FAIL); + TOKENDEF(TK_Loop, ZCC_LOOP); + TOKENDEF(TK_Goto, ZCC_GOTO); + TOKENDEF(TK_States, ZCC_STATES); + + TOKENDEF(TK_Identifier, ZCC_IDENTIFIER); + TOKENDEF(TK_StringConst, ZCC_STRCONST); + TOKENDEF(TK_IntConst, ZCC_INTCONST); + TOKENDEF(TK_FloatConst, ZCC_FLOATCONST); + TOKENDEF(TK_NonWhitespace, ZCC_NWS); +#undef TOKENDEF +} + +static void DoParse(const char *filename) +{ + if (TokenMap.CountUsed() == 0) + { + InitTokenMap(); + } + + FScanner sc; + StringTable strings; + void *parser; + int tokentype; + int lump; + bool failed; + ZCCToken value; + + lump = Wads.CheckNumForFullName(filename, true); + if (lump >= 0) + { + sc.OpenLumpNum(lump); + } + else if (FileExists(filename)) + { + sc.OpenFile(filename); + } + else + { + Printf("Could not find script lump '%s'\n", filename); + return; + } + + parser = ZCCParseAlloc(malloc); + failed = false; + FILE *f = fopen("trace.txt", "w"); + ZCCParseTrace(f, ""); + while (sc.GetToken()) + { + if (sc.TokenType == TK_StringConst) + { + value.String = strings.Get(sc.String, sc.StringLen); + tokentype = ZCC_STRCONST; + } + else if (sc.TokenType == TK_IntConst) + { + value.Int = sc.Number; + tokentype = ZCC_INTCONST; + } + else if (sc.TokenType == TK_FloatConst) + { + value.Float = sc.Float; + tokentype = ZCC_FLOATCONST; + } + else if (sc.TokenType == TK_Identifier) + { + value.Int = FName(sc.String); + tokentype = ZCC_IDENTIFIER; + } + else if (sc.TokenType == TK_NonWhitespace) + { + value.Int = FName(sc.String); + tokentype = ZCC_NWS; + } + else + { + SWORD *zcctoken = TokenMap.CheckKey(sc.TokenType); + if (zcctoken != NULL) + { + tokentype = *zcctoken; + } + else + { + sc.ScriptMessage("Unexpected token %s.\n", sc.TokenName(sc.TokenType).GetChars()); + break; + } + } + ZCCParse(parser, tokentype, value, &sc); + if (failed) + { + sc.ScriptMessage("Parse failed\n"); + break; + } + } + value.Int = -1; + ZCCParse(parser, ZCC_EOF, value, &sc); + ZCCParse(parser, 0, value, &sc); + ZCCParseFree(parser, free); + if (f != NULL) + { + fclose(f); + } +} + +CCMD(parse) +{ + if (argv.argc() == 2) + { + DoParse(argv[1]); + } +} + +static FString ZCCTokenName(int terminal) +{ + if (terminal == ZCC_EOF) + { + return "end of file"; + } + int sc_token; + if (terminal > 0 && terminal < countof(BackTokenMap)) + { + sc_token = BackTokenMap[terminal]; + if (sc_token == 0) + { // This token was not initialized. Whoops! + sc_token = -terminal; + } + } + else + { // This should never happen. + sc_token = -terminal; + } + return FScanner::TokenName(sc_token); +} diff --git a/zdoom.vcproj b/zdoom.vcproj index c37dd5d912..689a1fe3df 100644 --- a/zdoom.vcproj +++ b/zdoom.vcproj @@ -1501,6 +1501,10 @@ RelativePath=".\src\sc_man.h" > + + @@ -6461,6 +6465,94 @@ RelativePath=".\src\zscript\vmops.h" > + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +