From 3eaf0d3d49ae7353f3ef52eb2e0b663b45a8ec78 Mon Sep 17 00:00:00 2001 From: Josh Holtrop Date: Tue, 2 Apr 2024 17:44:15 -0400 Subject: [PATCH] allow one line user code blocks - close #21 --- doc/user_guide.md | 120 +++++++------------------ lib/propane/grammar.rb | 11 ++- spec/propane_spec.rb | 198 +++++++++++------------------------------ 3 files changed, 89 insertions(+), 240 deletions(-) diff --git a/doc/user_guide.md b/doc/user_guide.md index 6ddc251..770cf58 100644 --- a/doc/user_guide.md +++ b/doc/user_guide.md @@ -77,33 +77,15 @@ token rparen /\\)/; # Drop whitespace. drop /\\s+/; -Start -> E1 << - $$ = $1; ->> -E1 -> E2 << - $$ = $1; ->> -E1 -> E1 plus E2 << - $$ = $1 + $3; ->> -E2 -> E3 << - $$ = $1; ->> -E2 -> E2 times E3 << - $$ = $1 * $3; ->> -E3 -> E4 << - $$ = $1; ->> -E3 -> E3 power E4 << - $$ = pow($1, $3); ->> -E4 -> integer << - $$ = $1; ->> -E4 -> lparen E1 rparen << - $$ = $2; ->> +Start -> E1 << $$ = $1; >> +E1 -> E2 << $$ = $1; >> +E1 -> E1 plus E2 << $$ = $1 + $3; >> +E2 -> E3 << $$ = $1; >> +E2 -> E2 times E3 << $$ = $1 * $3; >> +E3 -> E4 << $$ = $1; >> +E3 -> E3 power E4 << $$ = pow($1, $3); >> +E4 -> integer << $$ = $1; >> +E4 -> lparen E1 rparen << $$ = $2; >> ``` Grammar files can contain comment lines beginning with `#` which are ignored. @@ -117,8 +99,8 @@ lowercase character and beginning a rule name with an uppercase character. ##> User Code Blocks -User code blocks begin with the line following a "<<" token and end with the -line preceding a grammar line consisting of solely the ">>" token. +User code blocks begin following a "<<" token and end with a ">>" token found +at the end of a line. All text lines in the code block are copied verbatim into the output file. ### Standalone Code Blocks @@ -189,9 +171,7 @@ This parser value can then be used later in a parser rule. Example: ``` -E1 -> E1 plus E2 << - $$ = $1 + $3; ->> +E1 -> E1 plus E2 << $$ = $1 + $3; >> ``` Parser rule code blocks appear following a rule expression. @@ -238,9 +218,7 @@ lexer. Example: ``` -token if << - writeln("'if' keyword lexed"); ->> +token if << writeln("'if' keyword lexed"); >> ``` The `token` statement is actually a shortcut statement for a combination of a @@ -277,9 +255,7 @@ code but may not result in a matched token. Example: ``` -/foo+/ << - writeln("saw a foo pattern"); ->> +/foo+/ << writeln("saw a foo pattern"); >> ``` This can be especially useful with ${#Lexer modes}. @@ -388,9 +364,7 @@ tokenid str; mystringvalue = ""; $mode(string); >> -string: /[^"]+/ << - mystringvalue += match; ->> +string: /[^"]+/ << mystringvalue += match; >> string: /"/ << $mode(default); return $token(str); @@ -447,20 +421,12 @@ ptype Value; ptype array = Value[]; ptype dict = Value[string]; -Object -> lbrace rbrace << - $$ = new Value(); ->> +Object -> lbrace rbrace << $$ = new Value(); >> -Values (array) -> Value << - $$ = [$1]; ->> -Values -> Values comma Value << - $$ = $1 ~ [$3]; ->> +Values (array) -> Value << $$ = [$1]; >> +Values -> Values comma Value << $$ = $1 ~ [$3]; >> -KeyValue (dict) -> string colon Value << - $$ = [$1: $3]; ->> +KeyValue (dict) -> string colon Value << $$ = [$1: $3]; >> ``` In this example, the default parser value type is `Value`. @@ -493,12 +459,8 @@ Example: ``` ptype ulong; -token word /[a-z]+/ << - $$ = match.length; ->> -Start -> word << - $$ = $1; ->> +token word /[a-z]+/ << $$ = match.length; >> +Start -> word << $$ = $1; >> ``` In the above example the `Start` rule is defined to match a single `word` @@ -507,33 +469,15 @@ token. Example: ``` -Start -> E1 << - $$ = $1; ->> -E1 -> E2 << - $$ = $1; ->> -E1 -> E1 plus E2 << - $$ = $1 + $3; ->> -E2 -> E3 << - $$ = $1; ->> -E2 -> E2 times E3 << - $$ = $1 * $3; ->> -E3 -> E4 << - $$ = $1; ->> -E3 -> E3 power E4 << - $$ = pow($1, $3); ->> -E4 -> integer << - $$ = $1; ->> -E4 -> lparen E1 rparen << - $$ = $2; ->> +Start -> E1 << $$ = $1; >> +E1 -> E2 << $$ = $1; >> +E1 -> E1 plus E2 << $$ = $1 + $3; >> +E2 -> E3 << $$ = $1; >> +E2 -> E2 times E3 << $$ = $1 * $3; >> +E3 -> E4 << $$ = $1; >> +E3 -> E3 power E4 << $$ = pow($1, $3); >> +E4 -> integer << $$ = $1; >> +E4 -> lparen E1 rparen << $$ = $2; >> ``` A parser rule has zero or more terms on the right side of its definition. @@ -596,9 +540,7 @@ To terminate parsing from a lexer or parser user code block, use the For example: ``` -NewExpression -> new Expression << - $terminate(42); ->> +NewExpression -> new Expression << $terminate(42); >> ``` The value passed to the `$terminate()` function is known as the "user terminate diff --git a/lib/propane/grammar.rb b/lib/propane/grammar.rb index f517d2f..c3b2f0f 100644 --- a/lib/propane/grammar.rb +++ b/lib/propane/grammar.rb @@ -183,8 +183,10 @@ class Propane end def parse_code_block_statement! - if md = consume!(/<<([a-z]*)\n(.*?)^>>\n/m) + if md = consume!(/<<([a-z]*)(.*?)>>\n/m) name, code = md[1..2] + code.sub!(/\A\n/, "") + code += "\n" unless code.end_with?("\n") if @code_blocks[name] @code_blocks[name] += code else @@ -222,8 +224,11 @@ class Propane end def parse_code_block! - if md = consume!(/<<\n(.*?)^>>\n/m) - md[1] + if md = consume!(/<<(.*?)>>\n/m) + code = md[1] + code.sub!(/\A\n/, "") + code += "\n" unless code.end_with?("\n") + code end end diff --git a/spec/propane_spec.rb b/spec/propane_spec.rb index 441571c..99aa66f 100644 --- a/spec/propane_spec.rb +++ b/spec/propane_spec.rb @@ -123,10 +123,8 @@ token plus /\\+/; token times /\\*/; drop /\\s+/; Start -> Foo; -Foo -> int << ->> -Foo -> plus << ->> +Foo -> int <<>> +Foo -> plus <<>> EOF build_parser(language: language) compile("spec/test_lexer.#{language}", language: language) @@ -149,9 +147,7 @@ token int /\\d+/ << } $$ = v; >> -Start -> int << - $$ = $1; ->> +Start -> int << $$ = $1; >> EOF when "d" write_grammar <> -Start -> int << - $$ = $1; ->> +Start -> int << $$ = $1; >> EOF end build_parser(language: language) @@ -219,33 +213,15 @@ token lparen /\\(/; token rparen /\\)/; drop /\\s+/; -Start -> E1 << - $$ = $1; ->> -E1 -> E2 << - $$ = $1; ->> -E1 -> E1 plus E2 << - $$ = $1 + $3; ->> -E2 -> E3 << - $$ = $1; ->> -E2 -> E2 times E3 << - $$ = $1 * $3; ->> -E3 -> E4 << - $$ = $1; ->> -E3 -> E3 power E4 << - $$ = (size_t)pow($1, $3); ->> -E4 -> integer << - $$ = $1; ->> -E4 -> lparen E1 rparen << - $$ = $2; ->> +Start -> E1 << $$ = $1; >> +E1 -> E2 << $$ = $1; >> +E1 -> E1 plus E2 << $$ = $1 + $3; >> +E2 -> E3 << $$ = $1; >> +E2 -> E2 times E3 << $$ = $1 * $3; >> +E3 -> E4 << $$ = $1; >> +E3 -> E3 power E4 << $$ = (size_t)pow($1, $3); >> +E4 -> integer << $$ = $1; >> +E4 -> lparen E1 rparen << $$ = $2; >> EOF when "d" write_grammar < E1 << - $$ = $1; ->> -E1 -> E2 << - $$ = $1; ->> -E1 -> E1 plus E2 << - $$ = $1 + $3; ->> -E2 -> E3 << - $$ = $1; ->> -E2 -> E2 times E3 << - $$ = $1 * $3; ->> -E3 -> E4 << - $$ = $1; ->> -E3 -> E3 power E4 << - $$ = pow($1, $3); ->> -E4 -> integer << - $$ = $1; ->> -E4 -> lparen E1 rparen << - $$ = $2; ->> +Start -> E1 << $$ = $1; >> +E1 -> E2 << $$ = $1; >> +E1 -> E1 plus E2 << $$ = $1 + $3; >> +E2 -> E3 << $$ = $1; >> +E2 -> E2 times E3 << $$ = $1 * $3; >> +E3 -> E4 << $$ = $1; >> +E3 -> E3 power E4 << $$ = pow($1, $3); >> +E4 -> integer << $$ = $1; >> +E4 -> lparen E1 rparen << $$ = $2; >> EOF end build_parser(language: language) @@ -408,9 +366,7 @@ EOF import std.stdio; >> token abc; -/def/ << - writeln("def!"); ->> +/def/ << writeln("def!"); >> Start -> abc; EOF end @@ -435,9 +391,7 @@ EOF #include >> token abc; -/def/ << - printf("def!\\n"); ->> +/def/ << printf("def!\\n"); >> /ghi/ << printf("ghi!\\n"); return $token(abc); @@ -450,9 +404,7 @@ EOF import std.stdio; >> token abc; -/def/ << - writeln("def!"); ->> +/def/ << writeln("def!"); >> /ghi/ << writeln("ghi!"); return $token(abc); @@ -541,15 +493,9 @@ EOF >> token a; token b; -Start -> A B << - printf("Start!\\n"); ->> -A -> a << - printf("A!\\n"); ->> -B -> b << - printf("B!\\n"); ->> +Start -> A B << printf("Start!\\n"); >> +A -> a << printf("A!\\n"); >> +B -> b << printf("B!\\n"); >> EOF when "d" write_grammar <> token a; token b; -Start -> A B << - writeln("Start!"); ->> -A -> a << - writeln("A!"); ->> -B -> b << - writeln("B!"); ->> +Start -> A B << writeln("Start!"); >> +A -> a << writeln("A!"); >> +B -> b << writeln("B!"); >> EOF end build_parser(language: language) @@ -584,15 +524,9 @@ EOF write_grammar < As << - $$ = $1; ->> -As -> << - $$ = 0u; ->> -As -> As a << - $$ = $1 + 1u; ->> +Start -> As << $$ = $1; >> +As -> << $$ = 0u; >> +As -> As a << $$ = $1 + 1u; >> EOF build_parser(language: language) compile("spec/test_parsing_lists.#{language}", language: language) @@ -756,9 +690,7 @@ token b; token c; Start -> Any; Any -> a Any; -Any -> b Any << - $terminate(4200); ->> +Any -> b Any << $terminate(4200); >> Any -> c Any; Any -> ; EOF @@ -777,30 +709,14 @@ EOF #include >> tokenid t; -/\\a/ << - printf("A\\n"); ->> -/\\b/ << - printf("B\\n"); ->> -/\\t/ << - printf("T\\n"); ->> -/\\n/ << - printf("N\\n"); ->> -/\\v/ << - printf("V\\n"); ->> -/\\f/ << - printf("F\\n"); ->> -/\\r/ << - printf("R\\n"); ->> -/t/ << - return $token(t); ->> +/\\a/ << printf("A\\n"); >> +/\\b/ << printf("B\\n"); >> +/\\t/ << printf("T\\n"); >> +/\\n/ << printf("N\\n"); >> +/\\v/ << printf("V\\n"); >> +/\\f/ << printf("F\\n"); >> +/\\r/ << printf("R\\n"); >> +/t/ << return $token(t); >> Start -> t; EOF when "d" @@ -809,27 +725,13 @@ EOF import std.stdio; >> tokenid t; -/\\a/ << - writeln("A"); ->> -/\\b/ << - writeln("B"); ->> -/\\t/ << - writeln("T"); ->> -/\\n/ << - writeln("N"); ->> -/\\v/ << - writeln("V"); ->> -/\\f/ << - writeln("F"); ->> -/\\r/ << - writeln("R"); ->> +/\\a/ << writeln("A"); >> +/\\b/ << writeln("B"); >> +/\\t/ << writeln("T"); >> +/\\n/ << writeln("N"); >> +/\\v/ << writeln("V"); >> +/\\f/ << writeln("F"); >> +/\\r/ << writeln("R"); >> /t/ << return $token(t); >>