From 3eaf0d3d49ae7353f3ef52eb2e0b663b45a8ec78 Mon Sep 17 00:00:00 2001
From: Josh Holtrop <jholtrop@gmail.com>
Date: Tue, 2 Apr 2024 17:44:15 -0400
Subject: [PATCH] allow one line user code blocks - close #21

---
 doc/user_guide.md      | 120 +++++++------------------
 lib/propane/grammar.rb |  11 ++-
 spec/propane_spec.rb   | 198 +++++++++++------------------------------
 3 files changed, 89 insertions(+), 240 deletions(-)

diff --git a/doc/user_guide.md b/doc/user_guide.md
index 6ddc251..770cf58 100644
--- a/doc/user_guide.md
+++ b/doc/user_guide.md
@@ -77,33 +77,15 @@ token rparen /\\)/;
 # Drop whitespace.
 drop /\\s+/;
 
-Start -> E1 <<
-  $$ = $1;
->>
-E1 -> E2 <<
-  $$ = $1;
->>
-E1 -> E1 plus E2 <<
-  $$ = $1 + $3;
->>
-E2 -> E3 <<
-  $$ = $1;
->>
-E2 -> E2 times E3 <<
-  $$ = $1 * $3;
->>
-E3 -> E4 <<
-  $$ = $1;
->>
-E3 -> E3 power E4 <<
-  $$ = pow($1, $3);
->>
-E4 -> integer <<
-  $$ = $1;
->>
-E4 -> lparen E1 rparen <<
-  $$ = $2;
->>
+Start -> E1 << $$ = $1; >>
+E1 -> E2 << $$ = $1; >>
+E1 -> E1 plus E2 << $$ = $1 + $3; >>
+E2 -> E3 << $$ = $1; >>
+E2 -> E2 times E3 << $$ = $1 * $3; >>
+E3 -> E4 << $$ = $1; >>
+E3 -> E3 power E4 << $$ = pow($1, $3); >>
+E4 -> integer << $$ = $1; >>
+E4 -> lparen E1 rparen << $$ = $2; >>
 ```
 
 Grammar files can contain comment lines beginning with `#` which are ignored.
@@ -117,8 +99,8 @@ lowercase character and beginning a rule name with an uppercase character.
 
 ##> User Code Blocks
 
-User code blocks begin with the line following a "<<" token and end with the
-line preceding a grammar line consisting of solely the ">>" token.
+User code blocks begin following a "<<" token and end with a ">>" token found
+at the end of a line.
 All text lines in the code block are copied verbatim into the output file.
 
 ### Standalone Code Blocks
@@ -189,9 +171,7 @@ This parser value can then be used later in a parser rule.
 Example:
 
 ```
-E1 -> E1 plus E2 <<
-  $$ = $1 + $3;
->>
+E1 -> E1 plus E2 << $$ = $1 + $3; >>
 ```
 
 Parser rule code blocks appear following a rule expression.
@@ -238,9 +218,7 @@ lexer.
 Example:
 
 ```
-token if <<
-  writeln("'if' keyword lexed");
->>
+token if << writeln("'if' keyword lexed"); >>
 ```
 
 The `token` statement is actually a shortcut statement for a combination of a
@@ -277,9 +255,7 @@ code but may not result in a matched token.
 Example:
 
 ```
-/foo+/ <<
-  writeln("saw a foo pattern");
->>
+/foo+/ << writeln("saw a foo pattern"); >>
 ```
 
 This can be especially useful with ${#Lexer modes}.
@@ -388,9 +364,7 @@ tokenid str;
   mystringvalue = "";
   $mode(string);
 >>
-string: /[^"]+/ <<
-  mystringvalue += match;
->>
+string: /[^"]+/ << mystringvalue += match; >>
 string: /"/ <<
   $mode(default);
   return $token(str);
@@ -447,20 +421,12 @@ ptype Value;
 ptype array = Value[];
 ptype dict = Value[string];
 
-Object -> lbrace rbrace <<
-  $$ = new Value();
->>
+Object -> lbrace rbrace << $$ = new Value(); >>
 
-Values (array) -> Value <<
-  $$ = [$1];
->>
-Values -> Values comma Value <<
-  $$ = $1 ~ [$3];
->>
+Values (array) -> Value << $$ = [$1]; >>
+Values -> Values comma Value << $$ = $1 ~ [$3]; >>
 
-KeyValue (dict) -> string colon Value <<
-  $$ = [$1: $3];
->>
+KeyValue (dict) -> string colon Value << $$ = [$1: $3]; >>
 ```
 
 In this example, the default parser value type is `Value`.
@@ -493,12 +459,8 @@ Example:
 
 ```
 ptype ulong;
-token word /[a-z]+/ <<
-  $$ = match.length;
->>
-Start -> word <<
-  $$ = $1;
->>
+token word /[a-z]+/ << $$ = match.length; >>
+Start -> word << $$ = $1; >>
 ```
 
 In the above example the `Start` rule is defined to match a single `word`
@@ -507,33 +469,15 @@ token.
 Example:
 
 ```
-Start -> E1 <<
-  $$ = $1;
->>
-E1 -> E2 <<
-  $$ = $1;
->>
-E1 -> E1 plus E2 <<
-  $$ = $1 + $3;
->>
-E2 -> E3 <<
-  $$ = $1;
->>
-E2 -> E2 times E3 <<
-  $$ = $1 * $3;
->>
-E3 -> E4 <<
-  $$ = $1;
->>
-E3 -> E3 power E4 <<
-  $$ = pow($1, $3);
->>
-E4 -> integer <<
-  $$ = $1;
->>
-E4 -> lparen E1 rparen <<
-  $$ = $2;
->>
+Start -> E1 << $$ = $1; >>
+E1 -> E2 << $$ = $1; >>
+E1 -> E1 plus E2 << $$ = $1 + $3; >>
+E2 -> E3 << $$ = $1; >>
+E2 -> E2 times E3 << $$ = $1 * $3; >>
+E3 -> E4 << $$ = $1; >>
+E3 -> E3 power E4 << $$ = pow($1, $3); >>
+E4 -> integer << $$ = $1; >>
+E4 -> lparen E1 rparen << $$ = $2; >>
 ```
 
 A parser rule has zero or more terms on the right side of its definition.
@@ -596,9 +540,7 @@ To terminate parsing from a lexer or parser user code block, use the
 For example:
 
 ```
-NewExpression -> new Expression <<
-  $terminate(42);
->>
+NewExpression -> new Expression << $terminate(42); >>
 ```
 
 The value passed to the `$terminate()` function is known as the "user terminate
diff --git a/lib/propane/grammar.rb b/lib/propane/grammar.rb
index f517d2f..c3b2f0f 100644
--- a/lib/propane/grammar.rb
+++ b/lib/propane/grammar.rb
@@ -183,8 +183,10 @@ class Propane
     end
 
     def parse_code_block_statement!
-      if md = consume!(/<<([a-z]*)\n(.*?)^>>\n/m)
+      if md = consume!(/<<([a-z]*)(.*?)>>\n/m)
         name, code = md[1..2]
+        code.sub!(/\A\n/, "")
+        code += "\n" unless code.end_with?("\n")
         if @code_blocks[name]
           @code_blocks[name] += code
         else
@@ -222,8 +224,11 @@ class Propane
     end
 
     def parse_code_block!
-      if md = consume!(/<<\n(.*?)^>>\n/m)
-        md[1]
+      if md = consume!(/<<(.*?)>>\n/m)
+        code = md[1]
+        code.sub!(/\A\n/, "")
+        code += "\n" unless code.end_with?("\n")
+        code
       end
     end
 
diff --git a/spec/propane_spec.rb b/spec/propane_spec.rb
index 441571c..99aa66f 100644
--- a/spec/propane_spec.rb
+++ b/spec/propane_spec.rb
@@ -123,10 +123,8 @@ token plus /\\+/;
 token times /\\*/;
 drop /\\s+/;
 Start -> Foo;
-Foo -> int <<
->>
-Foo -> plus <<
->>
+Foo -> int <<>>
+Foo -> plus <<>>
 EOF
         build_parser(language: language)
         compile("spec/test_lexer.#{language}", language: language)
@@ -149,9 +147,7 @@ token int /\\d+/ <<
   }
   $$ = v;
 >>
-Start -> int <<
-  $$ = $1;
->>
+Start -> int << $$ = $1; >>
 EOF
         when "d"
           write_grammar <<EOF
@@ -165,9 +161,7 @@ token int /\\d+/ <<
   }
   $$ = v;
 >>
-Start -> int <<
-  $$ = $1;
->>
+Start -> int << $$ = $1; >>
 EOF
         end
         build_parser(language: language)
@@ -219,33 +213,15 @@ token lparen /\\(/;
 token rparen /\\)/;
 drop /\\s+/;
 
-Start -> E1 <<
-  $$ = $1;
->>
-E1 -> E2 <<
-  $$ = $1;
->>
-E1 -> E1 plus E2 <<
-  $$ = $1 + $3;
->>
-E2 -> E3 <<
-  $$ = $1;
->>
-E2 -> E2 times E3 <<
-  $$ = $1 * $3;
->>
-E3 -> E4 <<
-  $$ = $1;
->>
-E3 -> E3 power E4 <<
-  $$ = (size_t)pow($1, $3);
->>
-E4 -> integer <<
-  $$ = $1;
->>
-E4 -> lparen E1 rparen <<
-  $$ = $2;
->>
+Start -> E1 << $$ = $1; >>
+E1 -> E2 << $$ = $1; >>
+E1 -> E1 plus E2 << $$ = $1 + $3; >>
+E2 -> E3 << $$ = $1; >>
+E2 -> E2 times E3 << $$ = $1 * $3; >>
+E3 -> E4 << $$ = $1; >>
+E3 -> E3 power E4 << $$ = (size_t)pow($1, $3); >>
+E4 -> integer << $$ = $1; >>
+E4 -> lparen E1 rparen << $$ = $2; >>
 EOF
         when "d"
           write_grammar <<EOF
@@ -271,33 +247,15 @@ token lparen /\\(/;
 token rparen /\\)/;
 drop /\\s+/;
 
-Start -> E1 <<
-  $$ = $1;
->>
-E1 -> E2 <<
-  $$ = $1;
->>
-E1 -> E1 plus E2 <<
-  $$ = $1 + $3;
->>
-E2 -> E3 <<
-  $$ = $1;
->>
-E2 -> E2 times E3 <<
-  $$ = $1 * $3;
->>
-E3 -> E4 <<
-  $$ = $1;
->>
-E3 -> E3 power E4 <<
-  $$ = pow($1, $3);
->>
-E4 -> integer <<
-  $$ = $1;
->>
-E4 -> lparen E1 rparen <<
-  $$ = $2;
->>
+Start -> E1 << $$ = $1; >>
+E1 -> E2 << $$ = $1; >>
+E1 -> E1 plus E2 << $$ = $1 + $3; >>
+E2 -> E3 << $$ = $1; >>
+E2 -> E2 times E3 << $$ = $1 * $3; >>
+E3 -> E4 << $$ = $1; >>
+E3 -> E3 power E4 << $$ = pow($1, $3); >>
+E4 -> integer << $$ = $1; >>
+E4 -> lparen E1 rparen << $$ = $2; >>
 EOF
         end
         build_parser(language: language)
@@ -408,9 +366,7 @@ EOF
 import std.stdio;
 >>
 token abc;
-/def/ <<
-  writeln("def!");
->>
+/def/ << writeln("def!"); >>
 Start -> abc;
 EOF
         end
@@ -435,9 +391,7 @@ EOF
 #include <stdio.h>
 >>
 token abc;
-/def/ <<
-  printf("def!\\n");
->>
+/def/ << printf("def!\\n"); >>
 /ghi/ <<
   printf("ghi!\\n");
   return $token(abc);
@@ -450,9 +404,7 @@ EOF
 import std.stdio;
 >>
 token abc;
-/def/ <<
-  writeln("def!");
->>
+/def/ << writeln("def!"); >>
 /ghi/ <<
   writeln("ghi!");
   return $token(abc);
@@ -541,15 +493,9 @@ EOF
 >>
 token a;
 token b;
-Start -> A B <<
-  printf("Start!\\n");
->>
-A -> a <<
-  printf("A!\\n");
->>
-B -> b <<
-  printf("B!\\n");
->>
+Start -> A B << printf("Start!\\n"); >>
+A -> a << printf("A!\\n"); >>
+B -> b << printf("B!\\n"); >>
 EOF
         when "d"
           write_grammar <<EOF
@@ -558,15 +504,9 @@ import std.stdio;
 >>
 token a;
 token b;
-Start -> A B <<
-  writeln("Start!");
->>
-A -> a <<
-  writeln("A!");
->>
-B -> b <<
-  writeln("B!");
->>
+Start -> A B << writeln("Start!"); >>
+A -> a << writeln("A!"); >>
+B -> b << writeln("B!"); >>
 EOF
         end
         build_parser(language: language)
@@ -584,15 +524,9 @@ EOF
         write_grammar <<EOF
 ptype #{language == "c" ? "uint32_t" : "uint"};
 token a;
-Start -> As <<
-  $$ = $1;
->>
-As -> <<
-  $$ = 0u;
->>
-As -> As a <<
-  $$ = $1 + 1u;
->>
+Start -> As << $$ = $1; >>
+As -> << $$ = 0u; >>
+As -> As a << $$ = $1 + 1u; >>
 EOF
         build_parser(language: language)
         compile("spec/test_parsing_lists.#{language}", language: language)
@@ -756,9 +690,7 @@ token b;
 token c;
 Start -> Any;
 Any -> a Any;
-Any -> b Any <<
-  $terminate(4200);
->>
+Any -> b Any << $terminate(4200); >>
 Any -> c Any;
 Any -> ;
 EOF
@@ -777,30 +709,14 @@ EOF
   #include <stdio.h>
 >>
 tokenid t;
-/\\a/ <<
-  printf("A\\n");
->>
-/\\b/ <<
-  printf("B\\n");
->>
-/\\t/ <<
-  printf("T\\n");
->>
-/\\n/ <<
-  printf("N\\n");
->>
-/\\v/ <<
-  printf("V\\n");
->>
-/\\f/ <<
-  printf("F\\n");
->>
-/\\r/ <<
-  printf("R\\n");
->>
-/t/ <<
-  return $token(t);
->>
+/\\a/ << printf("A\\n"); >>
+/\\b/ << printf("B\\n"); >>
+/\\t/ << printf("T\\n"); >>
+/\\n/ << printf("N\\n"); >>
+/\\v/ << printf("V\\n"); >>
+/\\f/ << printf("F\\n"); >>
+/\\r/ << printf("R\\n"); >>
+/t/ << return $token(t); >>
 Start -> t;
 EOF
         when "d"
@@ -809,27 +725,13 @@ EOF
   import std.stdio;
 >>
 tokenid t;
-/\\a/ <<
-  writeln("A");
->>
-/\\b/ <<
-  writeln("B");
->>
-/\\t/ <<
-  writeln("T");
->>
-/\\n/ <<
-  writeln("N");
->>
-/\\v/ <<
-  writeln("V");
->>
-/\\f/ <<
-  writeln("F");
->>
-/\\r/ <<
-  writeln("R");
->>
+/\\a/ << writeln("A"); >>
+/\\b/ << writeln("B"); >>
+/\\t/ << writeln("T"); >>
+/\\n/ << writeln("N"); >>
+/\\v/ << writeln("V"); >>
+/\\f/ << writeln("F"); >>
+/\\r/ << writeln("R"); >>
 /t/ <<
   return $token(t);
 >>