Lunatic translator: nearing syntax completion while treading in murky waters.

- bump lpeg stack limit to 1024 (hack) for long if/if/if...else chains
- lexical: newline, EOF and whitespace; "bad" identifiers; hex literals,
           newline terminated strings (allow empty string)
- syntax: allow nesting of array expressions, it's easier than special-casing;
          handle issues with structs expecting parm2 (hackish); userdef;
          switch/case/default
- fix parsing some commands; mind prefixes!
- frontend: if failing match, print last keyword and its position; warn on
  "bad" identifiers (one per such); batch processing from command line
- Files that locate finds from my linux box now pass (except where there are
  really syntax errors).

git-svn-id: https://svn.eduke32.com/eduke32@2616 1a8010ca-5511-0410-912e-c29ae57300e0
This commit is contained in:
helixhorned 2012-04-29 20:54:06 +00:00
parent b2b8ab7890
commit f5ee6ca7eb

View file

@ -3,6 +3,17 @@
local lpeg = require("lpeg") local lpeg = require("lpeg")
-- If/else nesting is problematic in CON: because a dangling 'else' is attached
-- to the outermost 'if', I think there's no way of linearizing its (recursive)
-- pattern, so the "too many pending calls/choices" is unavoidable in general.
-- This limit is of course still arbitrary, but writing if/else cascades
-- in CON isn't pretty either (though sometimes necessary because nested switches
-- don't work?)
-- See also: http://lua-users.org/lists/lua-l/2010-03/msg00086.html
lpeg.setmaxstack(1024);
local Pat, Set, Range, Var = lpeg.P, lpeg.S, lpeg.R, lpeg.V local Pat, Set, Range, Var = lpeg.P, lpeg.S, lpeg.R, lpeg.V
@ -19,20 +30,24 @@ end
----==== patterns ====---- ----==== patterns ====----
---- basic ones ---- basic ones
local newline = Pat("\n") -- need to do stuff on newline later... -- Windows, *nix and Mac newlines all exist in the wild!
local newline = "\r"*Pat("\n")^-1 + "\n"
local EOF = Pat(-1)
local anychar = Pat(1) local anychar = Pat(1)
-- comments -- comments
local comment = "/*" * match_until(anychar, "*/") * "*/" local comment = "/*" * match_until(anychar, "*/") * "*/"
local linecomment = "//" * match_until(anychar, newline) local linecomment = "//" * match_until(anychar, newline)
local whitespace = Var("whitespace") local whitespace = Var("whitespace")
local sp0 = whitespace^0 local sp0 = whitespace^0
local sp1 = whitespace^1 -- + (-1) -- matches EOF, too -- This "WS+" pattern matches EOF too, so that a forgotten newline at EOF is
-- properly handled
local sp1 = whitespace^1 + EOF
local alpha = Range("AZ", "az") -- locale? local alpha = Range("AZ", "az") -- locale?
local alphanum = alpha + Range("09") local alphanum = alpha + Range("09")
local alnumtok = alphanum + Set("{}/\\*-_.") -- see isaltok() in gamedef.c --local alnumtok = alphanum + Set("{}/\\*-_.") -- see isaltok() in gamedef.c
--- basic lexical elements ("tokens") --- basic lexical elements ("tokens")
local t_number = Range("09")^1 + (Pat("0x") + "0X")*Range("09", "af")^1 local t_number = Range("09")^1 + (Pat("0x") + "0X")*Range("09", "af", "AF")^1
-- Valid identifier names are disjunct from keywords! -- Valid identifier names are disjunct from keywords!
-- XXX: CON is more permissive with identifier name characters: -- XXX: CON is more permissive with identifier name characters:
local t_identifier = Var("t_identifier") local t_identifier = Var("t_identifier")
@ -53,7 +68,8 @@ local t_wvar = t_arrayexp + t_identifier
---- helper patterns / pattern constructing functions ---- helper patterns / pattern constructing functions
local maybe_quoted_filename = ('"' * t_filename * '"' + t_filename) local maybe_quoted_filename = ('"' * t_filename * '"' + t_filename)
local newline_term_string = (whitespace-newline)^1 * t_newline_term_str -- XXX: empty string? -- empty string is handled too; we must not eat the newline then!
local newline_term_string = (#newline + EOF) + (whitespace-newline)^1 * t_newline_term_str
-- (sp1 * t_define) repeated exactly n times -- (sp1 * t_define) repeated exactly n times
@ -102,6 +118,7 @@ end
-- The command names will be attached to the front of the patterns later! -- The command names will be attached to the front of the patterns later!
--== Top level CON commands ==-- --== Top level CON commands ==--
-- XXX: many of these are also allowed inside actors/states/events in CON.
local Co = { local Co = {
--- 1. Preprocessor --- 1. Preprocessor
include = sp1 * maybe_quoted_filename, include = sp1 * maybe_quoted_filename,
@ -163,14 +180,22 @@ local Co = {
local varop = cmd(W,D) local varop = cmd(W,D)
local varvarop = cmd(W,R) local varvarop = cmd(W,R)
-- t_define is t_rvar without t_arrayexp, actually, so that no nesting is allowed: -- Allow nesting... stuff like
local arraypat = sp0 * "[" * sp0 * t_define * sp0 * "]" -- ifvarl actorvar[sprite[THISACTOR].owner].burning 0
-- is kinda breaking the classic "no array nesting" rules
-- (if there ever were any) and making our life harder else.
local arraypat = sp0 * "[" * sp0 * t_rvar * sp0 * "]"
-- Have to bite the bullet here and list actor/player members with second parameters,
-- even though it's ugly to make it part of the syntax. Also, stuff like
-- actor[xxx].loogiex parm2 x
-- will be wrongly accepted at the parsing stage because we don't discriminate between
-- actor and player (but it will be rejected later).
local parm2memberpat = (Pat("htg_t") + "loogiex" + "loogiey" + "ammo_amount" +
"weaprecs" + "gotweapon" + "pals" + "max_ammo_amount") * sp0 * t_rvar
-- The member name must match keywords, too (_all), because e.g. cstat is a member -- The member name must match keywords, too (_all), because e.g. cstat is a member
-- of sprite[] -- of sprite[].
-- XXX: actor and sprite members can have parm2, e.g. this is legal: local memberpat = sp0 * "." * sp0 * (parm2memberpat + t_identifier_all)
-- ifvare player[THISACTOR].gotweapon 4 0
-- but not parsed currently.
local memberpat = sp0 * "." * sp0 * t_identifier_all
local getstructcmd = -- get<structname>[<idx>].<member> (<parm2>)? <<var>> local getstructcmd = -- get<structname>[<idx>].<member> (<parm2>)? <<var>>
-- existence of a second parameter is determined later -- existence of a second parameter is determined later
@ -204,7 +229,11 @@ local Ci = {
getsector = getstructcmd, getsector = getstructcmd,
getthisprojectile = getstructcmd, getthisprojectile = getstructcmd,
gettspr = getstructcmd, gettspr = getstructcmd,
getuserdef = getstructcmd, -- NOTE: {get,set}userdef is the only struct that can be accessed without
-- an "array part", e.g. H266mod has "setuserdef .weaponswitch 0" (space
-- between keyword and "." is mandatory)
getuserdef = (arraypat + sp1) * memberpat * sp1 * (t_rvar * sp1 * t_wvar + t_wvar),
-- getuserdef = getstructcmd,
getwall = getstructcmd, getwall = getstructcmd,
getactorvar = getperxvarcmd, getactorvar = getperxvarcmd,
@ -217,7 +246,8 @@ local Ci = {
setsector = setstructcmd, setsector = setstructcmd,
setthisprojectile = setstructcmd, setthisprojectile = setstructcmd,
settspr = setstructcmd, settspr = setstructcmd,
setuserdef = setstructcmd, setuserdef = (arraypat + sp1) * memberpat * sp1 * (t_rvar * sp1 * t_wvar + t_rvar),
-- setuserdef = setstructcmd,
setwall = setstructcmd, setwall = setstructcmd,
setactorvar = setperxvarcmd, setactorvar = setperxvarcmd,
@ -355,7 +385,6 @@ local Ci = {
killit = cmd(), killit = cmd(),
mikesnd = cmd(), mikesnd = cmd(),
nullop = cmd(), nullop = cmd(),
operate = cmd(),
pkick = cmd(), pkick = cmd(),
pstomp = cmd(), pstomp = cmd(),
resetactioncount = cmd(), resetactioncount = cmd(),
@ -392,12 +421,12 @@ local Ci = {
qsubstr = cmd(R,R), qsubstr = cmd(R,R),
-- array stuff -- array stuff
copy = sp1 * t_identifier * arraypat * sp1 * t_identifier * arraypat, copy = sp1 * t_identifier * arraypat * sp1 * t_identifier * arraypat * sp1 * t_rvar,
setarray = sp1 * t_identifier * arraypat * sp1 * t_rvar, setarray = sp1 * t_identifier * arraypat * sp1 * t_rvar,
activatebysector = cmd(R,R), activatebysector = cmd(R,R),
addlog = cmd(), addlogvar = cmd(R), -- HERE, it's significant that addlogvar
addlogvar = cmd(R), -- addlog = cmd(), -- comes before addlog!
addweaponvar = cmd(R,R), -- exec SPECIAL HANDLING! addweaponvar = cmd(R,R), -- exec SPECIAL HANDLING!
cansee = cmd(R,R,R,R,R,R,R,R,W), cansee = cmd(R,R,R,R,R,R,R,R,W),
canseespr = cmd(R,R,W), canseespr = cmd(R,R,W),
@ -416,7 +445,7 @@ local Ci = {
gametext = cmd(R,R,R,R,R,R,R,R,R,R,R), -- 11 R gametext = cmd(R,R,R,R,R,R,R,R,R,R,R), -- 11 R
gametextz = cmd(R,R,R,R,R,R,R,R,R,R,R,R), -- 12 R gametextz = cmd(R,R,R,R,R,R,R,R,R,R,R,R), -- 12 R
digitalnumber = cmd(R,R,R,R,R,R,R,R,R,R,R), -- 11R digitalnumber = cmd(R,R,R,R,R,R,R,R,R,R,R), -- 11R
digitalnumberz = cmd(W,R,R,R,R,R,R,R,R,R,R,R), -- 1W 11R digitalnumberz = cmd(R,R,R,R,R,R,R,R,R,R,R,R), -- 12R
minitext = cmd(R,R,R,R,R), minitext = cmd(R,R,R,R,R),
ldist = cmd(W,R,R), ldist = cmd(W,R,R),
@ -426,14 +455,17 @@ local Ci = {
savemapstate = cmd(), savemapstate = cmd(),
movesprite = cmd(R,R,R,R,R,W), movesprite = cmd(R,R,R,R,R,W),
neartag = cmd(R,R,R,R,R,W,W,W,W,R,R), neartag = cmd(R,R,R,R,R,W,W,W,W,R,R),
operateactivators = cmd(R), operateactivators = cmd(R,R),
operatesectors = cmd(R), operatesectors = cmd(R,R),
palfrom = (sp1 * t_define)^-4, palfrom = (sp1 * t_define)^-4,
-- must come after all other operate* commands
-- operate = cmd(),
myos = cmd(R,R,R,R,R), myos = cmd(R,R,R,R,R),
myosx = cmd(R,R,R,R,R),
myospal = cmd(R,R,R,R,R,R), myospal = cmd(R,R,R,R,R,R),
myospalx = cmd(R,R,R,R,R,R), myospalx = cmd(R,R,R,R,R,R),
myosx = cmd(R,R,R,R,R),
headspritesect = cmd(R,R), headspritesect = cmd(R,R),
headspritestat = cmd(R,R), headspritestat = cmd(R,R),
@ -519,31 +551,6 @@ local Cif = {
ifsound = cmd(D), ifsound = cmd(D),
ifpinventory = cmd(D,D), ifpinventory = cmd(D,D),
ifp = (sp1 * t_define)^1,
ifclient = cmd(),
ifserver = cmd(),
ifonwater = cmd(),
ifinwater = cmd(),
ifactornotstayput = cmd(),
ifactorsound = cmd(),
ifcansee = cmd(),
ifhitweapon = cmd(),
ifsquished = cmd(),
ifdead = cmd(),
ifcanshoottarget = cmd(),
ifhitspace = cmd(),
ifoutside = cmd(),
ifmultiplayer = cmd(),
ifinspace = cmd(),
ifbulletnear = cmd(),
ifrespawn = cmd(),
ifinouterspace = cmd(),
ifnotmoving = cmd(),
ifawayfromwall = cmd(),
ifcanseetarget = cmd(),
ifnosounds = cmd(),
ifvarl = cmd(R,D), ifvarl = cmd(R,D),
ifvarg = cmd(R,D), ifvarg = cmd(R,D),
ifvare = cmd(R,D), ifvare = cmd(R,D),
@ -553,7 +560,6 @@ local Cif = {
ifvarxor = cmd(R,D), ifvarxor = cmd(R,D),
ifvareither = cmd(R,D), ifvareither = cmd(R,D),
ifactorsound = cmd(R,R),
ifvarvarg = cmd(R,R), ifvarvarg = cmd(R,R),
ifvarvarl = cmd(R,R), ifvarvarl = cmd(R,R),
ifvarvare = cmd(R,R), ifvarvare = cmd(R,R),
@ -562,6 +568,31 @@ local Cif = {
ifvarvaror = cmd(R,R), ifvarvaror = cmd(R,R),
ifvarvarxor = cmd(R,R), ifvarvarxor = cmd(R,R),
ifvarvareither = cmd(R,R), ifvarvareither = cmd(R,R),
ifactorsound = cmd(R,R),
ifp = (sp1 * t_define)^1,
ifsquished = cmd(),
ifserver = cmd(),
ifrespawn = cmd(),
ifoutside = cmd(),
ifonwater = cmd(),
ifnotmoving = cmd(),
ifnosounds = cmd(),
ifmultiplayer = cmd(),
ifinwater = cmd(),
ifinspace = cmd(),
ifinouterspace = cmd(),
ifhitweapon = cmd(),
ifhitspace = cmd(),
ifdead = cmd(),
ifclient = cmd(),
ifcanshoottarget = cmd(),
ifcanseetarget = cmd(),
-- ifcansee = cmd(),
ifbulletnear = cmd(),
ifawayfromwall = cmd(),
ifactornotstayput = cmd(),
} }
@ -613,10 +644,15 @@ end
local function getlinecol(pos) local function getlinecol(pos)
local line = bsearch(newlineidxs, pos) local line = bsearch(newlineidxs, pos)
local col = pos-newlineidxs[line] local col = pos-newlineidxs[line-1]
return line, col return line, col
end end
-- Last keyword position, for error diagnosis.
local g_lastkwpos = nil
local g_lastkw = nil
local g_badids = {} -- maps bad id strings to 'true'
-- A generic trace function, prints a position together with the match content -- A generic trace function, prints a position together with the match content
-- A non-existing 'doit' means 'true'. -- A non-existing 'doit' means 'true'.
local function TraceFunc(pat, label, doit) local function TraceFunc(pat, label, doit)
@ -625,20 +661,42 @@ local function TraceFunc(pat, label, doit)
if (doit==nil or doit) then if (doit==nil or doit) then
local function tfunc(subj, pos, a) local function tfunc(subj, pos, a)
local line, col = getlinecol(pos) local line, col = getlinecol(pos)
printf("%d,%d:%s:%s", line, col, label, a) printf("%d,%d:%s: %s", line, col, label, a)
return true return true
end end
pat = lpeg.Cmt(pat, tfunc) pat = lpeg.Cmt(pat, tfunc)
elseif (label=="kw") then -- HACK
local function tfunc(subj, pos, a)
g_lastkwpos = pos
g_lastkw = a
return true
end
-- XXX: is there a better way?
pat = lpeg.Cmt(pat, tfunc)
end end
return pat return pat
end end
local function BadIdentFunc(pat)
local function tfunc(subj, pos, a)
if (not g_badids[a]) then
local line, col = getlinecol(pos)
printf("%d,%d: warning: bad identifier: %s", line, col, a)
g_badids[a] = true
end
return true
end
return lpeg.Cmt(Pat(pat), tfunc)
end
-- These are tracers for specific patterns which can be disabled -- These are tracers for specific patterns which can be disabled
-- if desired. -- if desired.
local function Keyw(kwname) return TraceFunc(kwname, "kw", true) end local function Keyw(kwname) return TraceFunc(kwname, "kw", false) end
local function NotKeyw(text) return TraceFunc(text, "!kw", true) end local function NotKeyw(text) return TraceFunc(text, "!kw", false) end
local function Ident(idname) return TraceFunc(idname, "id", true) end local function Ident(idname) return TraceFunc(idname, "id", false) end
local function Stmt(cmdpat) return TraceFunc(cmdpat, "st", true) end local function BadIdent(idname) return BadIdentFunc(idname) end
local function Stmt(cmdpat) return TraceFunc(cmdpat, "st", false) end
----==== Translator continued ====---- ----==== Translator continued ====----
@ -662,8 +720,10 @@ local function all_alt_pattern(...)
local args = {...} local args = {...}
for argi=1,#args do for argi=1,#args do
local pattab = args[argi] local pattab = args[argi]
-- NOTE: pairs() iterates in undefined order!
-- We can't handle prefix-problematic commands this way here.
for cmdname,cmdpat in pairs(pattab) do for cmdname,cmdpat in pairs(pattab) do
pat = pat + cmdpat pat = cmdpat + pat
end end
end end
return pat return pat
@ -671,24 +731,32 @@ end
-- actor ORGANTIC is greeting! -- actor ORGANTIC is greeting!
local function warn_on_lonely_else(subj, pos) local function warn_on_lonely_else(subj, pos)
print(pos..": warning: found `else' with no `if'") local line, col = getlinecol(pos)
printf("%d,%d: warning: found `else' with no `if'", line, col)
return true return true
end end
-- About prefixes: I think it's not a problem *here* if e.g. "getactor" comes -- NOTE: The indented text is not true, e.g. addlog vs. addlogvar:
-- before "getactorvar", because the pattern for the former will fail -- since 'addlog' has no args, it will get matched given an 'addlogvar' in the subject:
-- eventually in the ordered choice if fed with the latter. However, it DOES -- About prefixes: I think it's not a problem *here* if e.g. "getactor" comes
-- matter in the keyword list, see NotKeyw() trace function and comment in -- before "getactorvar", because the pattern for the former will fail
-- con_lang.lua. -- eventually in the ordered choice if fed with the latter. However, it DOES
-- matter in the keyword list, see NotKeyw() trace function and comment in
-- con_lang.lua.
-- Do we have more of them? Yes.
-- operate/operate*
-- ifcansee/ifcanseetarget
local con_outer_command = all_alt_pattern(Co) local con_outer_command = all_alt_pattern(Co)
local con_inner_command = all_alt_pattern(Ci) -- Empty-arged commands that are prefixes of others must come last:
local con_if_begs = all_alt_pattern(Cif) local con_inner_command = all_alt_pattern(Ci) + "addlog" + "operate"
local con_if_begs = all_alt_pattern(Cif) + "ifcansee"
local lone_else = lpeg.Cmt("else" * sp1, warn_on_lonely_else) local lone_else = lpeg.Cmt("else" * sp1, warn_on_lonely_else)
local stmt_list = Var("stmt_list") local stmt_list = Var("stmt_list")
-- possibly empty statement list: -- possibly empty statement list:
local stmt_list_or_eps = (stmt_list * sp1)^-1 local stmt_list_or_eps = (stmt_list * sp1)^-1
local stmt_list_nosp_or_eps = (stmt_list * (sp1 * stmt_list)^0)^-1
-- common to all three: <name/tilenum> [<strength> [<action> [<move> [<ai>... ]]]] -- common to all three: <name/tilenum> [<strength> [<action> [<move> [<ai>... ]]]]
local common_actor_end = sp1 * t_define * sp1 * (t_define * sp1)^0 * stmt_list_or_eps * "enda" local common_actor_end = sp1 * t_define * sp1 * (t_define * sp1)^0 * stmt_list_or_eps * "enda"
@ -710,19 +778,32 @@ local Cb = {
attachnames(Cb) attachnames(Cb)
local t_good_identifier = Range("AZ", "az", "__") * Range("AZ", "az", "__", "09")^0
-- CON isaltok also has chars in "{}.", but these could potentially
-- interfere with *CON* syntax. The "]" is so that the number in array[80]
-- isn't considered a broken identifier.
-- "-" is somewhat problematic, but we allow it only as 2nd and up character, so
-- there's no ambiguity with unary minus. (Commands must be separated by spaces
-- in CON, so a trailing "-" is "OK", too.)
-- This is broken in itself, so we ought to make a compatibility/modern CON switch.
local t_broken_identifier = BadIdent(-((t_number + t_good_identifier) * (sp1 + Set("[]:"))) *
(alphanum + Set("_/\\*")) * (alphanum + Set("_/\\*-"))^0)
--- The final grammar! --- The final grammar!
local Grammar = Pat{ local Grammar = Pat{
-- The starting symbol. -- The starting symbol.
-- A translation unit is a (possibly empty) sequence of outer CON -- A translation unit is a (possibly empty) sequence of outer CON
-- commands, separated by at least one whitespace which may be -- commands, separated by at least one whitespace which may be
-- omitted at the EOF. -- omitted at the EOF.
sp0 * ((con_outer_command + all_alt_pattern(Cb)) * (sp1 + (-1)))^0, sp0 * ((con_outer_command + all_alt_pattern(Cb)) * sp1)^0,
-- Deps. These appear here because we're hitting a limit with LPeg else: -- Deps. These appear here because we're hitting a limit with LPeg else:
-- http://lua-users.org/lists/lua-l/2008-11/msg00462.html -- http://lua-users.org/lists/lua-l/2008-11/msg00462.html
whitespace = Set(" \t\r") + newline + Set("(),;") + comment + linecomment, -- NOTE: NW demo (NWSNOW.CON) contains a Ctrl-Z char (dec 26)
whitespace = Set(" \t\r\26") + newline + Set("(),;") + comment + linecomment,
t_identifier_all = Range("AZ", "az", "__") * Range("AZ", "az", "__", "09")^0, t_identifier_all = t_broken_identifier + t_good_identifier,
-- NOTE: -con_keyword alone would be wrong, e.g. "state breakobject": -- NOTE: -con_keyword alone would be wrong, e.g. "state breakobject":
-- NOTE 2: The + "[" is so that stuff like -- NOTE 2: The + "[" is so that stuff like
-- getactor[THISACTOR].x x -- getactor[THISACTOR].x x
@ -732,13 +813,19 @@ local Grammar = Pat{
-- getactor [THISACTOR].y y -- getactor [THISACTOR].y y
-- This is in need of cleanup! -- This is in need of cleanup!
t_identifier = -NotKeyw(con_keyword * (sp1 + "[")) * Ident(t_identifier_all), t_identifier = -NotKeyw(con_keyword * (sp1 + "[")) * Ident(t_identifier_all),
t_define = Pat("-")^-1 * sp0 * (t_identifier + t_number), t_define = (Pat("-") * sp0)^-1 * (t_identifier + t_number),
t_arrayexp = t_identifier * arraypat * memberpat^-1, t_arrayexp = t_identifier * arraypat * memberpat^-1,
switch_stmt = Keyw("switch") * (sp1 * (Var("case") + Var("default")))^0 * sp1 * "endswitch", -- SWITCH
case = Keyw("case") * sp1 * t_define * sp0 * Pat(":")^-1 * sp1 * stmt_list_or_eps * "break", switch_stmt = Keyw("switch") * sp1 * t_rvar *
default = Keyw("default") * sp0 * Pat(":")^-1 * sp1 * stmt_list_or_eps * "break", (Var("case_block") + Var("default_block"))^0 * sp1 * "endswitch",
-- NOTE: some old DNWMD has "case: PIGCOP". I don't think I'll allow that.
case_block = (sp1 * Keyw("case") * sp1 * t_define * (sp0*":")^-1)^1 * sp1 *
stmt_list_nosp_or_eps, -- * "break",
default_block = sp1 * Keyw("default") * (sp0*":"*sp0 + sp1) * stmt_list_nosp_or_eps, -- * "break",
-- The "lone" if statement is tested first, so that a potential dangling "else" is -- The "lone" if statement is tested first, so that a potential dangling "else" is
-- attached to the outermost possible "if", as done by CON -- attached to the outermost possible "if", as done by CON
@ -751,7 +838,7 @@ local Grammar = Pat{
-- TODO: some sp1 --> sp0? -- TODO: some sp1 --> sp0?
single_stmt = Stmt( single_stmt = Stmt(
lone_else^-1 * lone_else^-1 *
( Keyw("{") * sp0 * "}" ( Keyw("{") * sp1 * "}" -- space separation of commands in CON is for a reason!
+ Keyw("{") * sp1 * stmt_list * sp1 * "}" + Keyw("{") * sp1 * stmt_list * sp1 * "}"
+ (con_inner_command + Var("switch_stmt") + Var("if_stmt") + Var("while_stmt")) + (con_inner_command + Var("switch_stmt") + Var("if_stmt") + Var("while_stmt"))
-- + lpeg.Cmt(t_newline_term_str, function (subj, curpos) print("Error at "..curpos) end) -- + lpeg.Cmt(t_newline_term_str, function (subj, curpos) print("Error at "..curpos) end)
@ -769,37 +856,44 @@ local function setup_newlineidxs(contents)
for i in string.gmatch(contents, "()\n") do for i in string.gmatch(contents, "()\n") do
newlineidxs[#newlineidxs+1] = i newlineidxs[#newlineidxs+1] = i
end end
newlineidxs[#newlineidxs+1] = #contents+1 -- dummy newline -- dummy newlines at beginning and end
newlineidxs[#newlineidxs+1] = #contents+1
newlineidxs[0] = 0
end end
---=== stand-alone: ===--- ---=== stand-alone: ===---
if (not EDUKE32_LUNATIC) then if (not EDUKE32_LUNATIC) then
local io = require("io") local io = require("io")
local filename = arg[1] for argi=1,#arg do
assert(filename) local filename = arg[argi]
printf("\n---- Parsing file \"%s\"", filename);
local contents = io.open(filename):read("*all") local contents = io.open(filename):read("*all")
setup_newlineidxs(contents) setup_newlineidxs(contents)
local idx = lpeg.match(Grammar, contents) g_badids = {}
if (not idx) then local idx = lpeg.match(Grammar, contents)
print("Match failed.")
return if (not idx) then
print("Match failed.")
elseif (idx == #contents+1) then
print("Matched whole contents.")
else
local i, col = getlinecol(idx)
local bi, ei = newlineidxs[i]+1, newlineidxs[i+1]-1
printf("Match succeeded up to %d (line %d, col %d; len=%d)",
idx, i, col, #contents)
-- printf("Line goes from %d to %d", bi, ei)
print(string.sub(contents, bi, ei))
if (g_lastkwpos) then
i, col = getlinecol(g_lastkwpos)
printf("Last keyword was at line %d, col %d: %s", i, col, g_lastkw)
end
end
end end
if (idx == #contents+1) then
print("Matched whole contents.")
return
end
local i, col = getlinecol(idx)
local bi, ei = newlineidxs[i]+1, newlineidxs[i+1]-1
printf("Match succeeded up to %d (line %d, col %d; len=%d)",
idx, i, col, #contents)
-- printf("Line goes from %d to %d", bi, ei)
print(string.sub(contents, bi, ei))
end end