Skip to content

Parser grammars

Karsten Schmidt edited this page Jul 20, 2020 · 6 revisions

S-expression grammar

Lisp / Clojure / WASM-style languages & data structures

list: '('! <expr> ')'! ;
vec: '['! <expr> ']'! ;
sym: ( <ALPHA_NUM> | [?!$+\u002d*/.~#^=<>] )+ => join ;
expr: ( <FLOAT> | <STRING> | <sym> | <list> | <vec> | <WS1> )* ;
main: <START> <expr> <END> => hoist ;

Playground

Forth style language grammar

# forth style grammar w/ additional line & stack comments, quotations
csym: [a-z0-9]+ => join ;
sym: [a-z0-9+\-*/$#!]+ => join ;
comment: '#'! <WS0> .(?-<LEND>) => join ;
sfxpart: ( <csym> | <WS1>)+ ;
stackfx: <WS1> '('! <sfxpart> "--"! <sfxpart>? ')'! ;
prim: (<FLOAT> | <STRING>) ;
quote: '['! ( <comment> | <prim> | <sym> | <quote> | <WS1> )* ']'! ;
def: ':'! <WS1> <sym> <stackfx>? <WS1> ( <comment> | <prim> | <sym> | <quote> | <WS1> )+ ';'! ;
main: ( <comment> | <prim> | <sym> | <def> | <quote> | <WS1> )+ ;

Playground

XML(ish)

sym: [A-Za-z0-9:\-_.]+ => join ;
aval: "=\""! .(?+'"'!) => join ;
vattrib: <WS1> <sym> <aval> ;
battrib: <WS1> <sym>(?-<WS1>) => hoist ;
attrib: (<vattrib> | <battrib>) ;
elstart: '<'! <sym> <attrib>* '>'! ;
elend: "</"! <sym> '>'! => discard ;
voidtag: '<'! <sym> <attrib>* <WS0> "/>"! ;
body: .(?-'<'!) => join ;
elem: <elstart> (<body> | <elem> | <voidtag>)* <elend> ;
main: (<elem> | <voidtag> | <WS1>)* ;

Playground

Markdown(ish)

DNL1: <DNL>+ => discard ;
DNL2: <NL>{2,} ;
inlinedelim: ( "![" | '[' | "**" | "_" | "~~" | '`' ) ;
delim: ( <inlinedelim> | <DNL2> ) ;
delim1: ( <inlinedelim> | <NL> ) ;
body: .(?-<delim>!) => join ;
body1: .(?-<delim1>!) => join ;

ref: "[["! .(?+"]]"!) => join ;
label: .(?+']'!) => join ;
target: .(?+')'!) => join ;
link: '['! <label> '('! <target> ;
img: "!["! <label> '('! <target> ;
bold: "**"! .(?+"**"!) => join ;
italic: "_"! .(?+"_"!) => join ;
code: '`'! .(?+'`'!) => join ;
strike: "~~"! .(?+"~~"!) => join ;
para: (<ref> | <img> | <link> | <bold> | <italic> | <strike> | <code> | <body>)* <DNL2>! ;

hdlevel: '#'+ => count ;
hd: <LSTART> <hdlevel> <WS0>
    (<ref> | <img> | <link> | <bold> | <italic> | <strike> | <code> | <body1> )* <DNL1> ;

lilevel: ' '* => count ;
ulid: <DNL> <WS0> '-'! ;
ulidelim: ( <delim> | <ulid> ) ;
ulibody: .(?-<ulidelim>!) => join ;
todo: '['! [ xX] ']'! <WS1> => hoistR ;
ulitem: <LSTART> <lilevel> "- "! <todo>?
        (<ref> | <img> | <link> | <bold> | <italic> | <strike> | <code> | <ulibody> )* <DNL> ;
ulist: <ulitem>+ <DNL1> ;

uint: <DIGIT>+ => join ;
olid: <DNL> <WS0> <DIGIT>+! '.'! ;
olidelim: ( <delim> | <olid> ) ;
olibody: .(?-<olidelim>!) => join ;
olitem: <LSTART> <lilevel> <uint> ". "! <todo>?
        (<ref> | <img> | <link> | <bold> | <italic> | <strike> | <code> | <olibody> )* <DNL> ;
olist: <olitem>+ <DNL1> ;

codeblock: <LSTART> "```"! <codemeta> <codebody> <DNL1> ;
codemeta: .(?+<NL>!) => join ;
codebody: .(?+"```"!) => join ;

bqline: <LSTART> "> "!
        (<ref> | <img> | <link> | <bold> | <italic> | <strike> | <code> | <body1>)* <DNL> ;
bquote: <bqline>+ <DNL1> ;

tdelim: (<inlinedelim> | '|' ) ;
tbody: .(?-<tdelim>!) => join ;
tcell: <WS0> (<ref> | <img> | <link> | <bold> | <italic> | <strike> | <code> | <tbody> )* '|'! ;
trow: <LSTART> '|'! <tcell>(?+<DNL>) ;
table: <trow>+ <DNL1> ;

hr: "--"! '-'!(?-<NL>!) <DNL1> ;

main: <WS0> (<hd> | <ulist> | <olist> | <bquote> | <codeblock> | <table> | <hr> | <para>)* ;

Playground