From 6bd9d4a09b9066d451d33a4f05713fa20b90a8cb Mon Sep 17 00:00:00 2001
From: Josh Holtrop <jholtrop@gmail.com>
Date: Mon, 3 Oct 2022 21:40:34 -0400
Subject: [PATCH] Remove _TOKEN_NONE and use _TOKEN_COUNT instead

---
 assets/parser.d.erb      | 27 +++++++++++++--------------
 lib/propane.rb           |  3 ---
 lib/propane/generator.rb |  2 +-
 lib/propane/lexer.rb     |  9 +++++----
 lib/propane/parser.rb    |  2 +-
 5 files changed, 20 insertions(+), 23 deletions(-)

diff --git a/assets/parser.d.erb b/assets/parser.d.erb
index 70b2b9d..ff25d2c 100644
--- a/assets/parser.d.erb
+++ b/assets/parser.d.erb
@@ -19,7 +19,6 @@ class <%= @classname %>
         _TOKEN_COUNT = <%= @grammar.tokens.size %>,
         _TOKEN_DECODE_ERROR = <%= TOKEN_DECODE_ERROR %>,
         _TOKEN_DROP = <%= TOKEN_DROP %>,
-        _TOKEN_NONE = <%= TOKEN_NONE %>,
     }
 
     static immutable string token_names[] = [
@@ -166,7 +165,7 @@ class <%= @classname %>
             for (;;)
             {
                 LexedToken lt = attempt_lex_token();
-                if ((lt.token != _TOKEN_DROP) && (lt.token != _TOKEN_NONE))
+                if (lt.token < _TOKEN_COUNT)
                 {
                     return lt;
                 }
@@ -178,7 +177,7 @@ class <%= @classname %>
          *
          * @param code_id The ID of the user code block to execute.
          *
-         * @return Token ID to accept, or _TOKEN_NONE if the user code does
+         * @return Token ID to accept, or _TOKEN_COUNT if the user code does
          *   not explicitly return a token.
          */
         private uint user_code(uint code_id)
@@ -195,12 +194,12 @@ class <%= @classname %>
             default: break;
             }
 
-            return _TOKEN_NONE;
+            return _TOKEN_COUNT;
         }
 
         private LexedToken attempt_lex_token()
         {
-            LexedToken lt = LexedToken(m_input_row, m_input_col, 0, _TOKEN_NONE);
+            LexedToken lt = LexedToken(m_input_row, m_input_col, 0, _TOKEN_COUNT);
             struct MatchInfo
             {
                 size_t length;
@@ -210,7 +209,7 @@ class <%= @classname %>
                 uint code_id;
             }
             MatchInfo longest_match_info;
-            longest_match_info.token = _TOKEN_NONE;
+            longest_match_info.token = _TOKEN_COUNT;
             MatchInfo attempt_match_info;
             uint current_state;
             for (;;)
@@ -239,7 +238,7 @@ class <%= @classname %>
                             attempt_match_info.delta_col++;
                         }
                         current_state = dest;
-                        if ((states[current_state].token != _TOKEN_NONE) ||
+                        if ((states[current_state].token != _TOKEN_COUNT) ||
                             (states[current_state].code_id != 0xFFFF_FFFFu))
                         {
                             attempt_match_info.token = states[current_state].token;
@@ -260,17 +259,17 @@ class <%= @classname %>
                     if (longest_match_info.code_id != 0xFFFF_FFFFu)
                     {
                         uint user_code_token = user_code(longest_match_info.code_id);
-                        /* A return of _TOKEN_NONE from user_code() means
+                        /* A return of _TOKEN_COUNT from user_code() means
                          * that the user code did not explicitly return a
                          * token. So only override the token to return if the
                          * user code does explicitly return a token. */
-                        if (user_code_token != _TOKEN_NONE)
+                        if (user_code_token != _TOKEN_COUNT)
                         {
                             token_to_accept = user_code_token;
                         }
                         pattern_accepted = true;
                     }
-                    if (pattern_accepted || (token_to_accept != _TOKEN_NONE))
+                    if (pattern_accepted || (token_to_accept != _TOKEN_COUNT))
                     {
                         /* Update the input position tracking. */
                         m_input_position += longest_match_info.length;
@@ -360,12 +359,12 @@ class <%= @classname %>
         bool parse()
         {
             Lexer.LexedToken lexed_token;
-            uint token = _TOKEN_NONE;
+            uint token = _TOKEN_COUNT;
             uint[] states = new uint[](1);
             uint reduced_rule_set = 0xFFFFFFFFu;
             for (;;)
             {
-                if (token == _TOKEN_NONE)
+                if (token == _TOKEN_COUNT)
                 {
                     lexed_token = m_lexer.lex_token();
                     token = lexed_token.token;
@@ -389,7 +388,7 @@ class <%= @classname %>
                     states ~= shift_state;
                     if (reduced_rule_set == 0xFFFFFFFFu)
                     {
-                        token = _TOKEN_NONE;
+                        token = _TOKEN_COUNT;
                     }
                     else
                     {
@@ -449,7 +448,7 @@ class <%= @classname %>
             for (uint i = start; i < end; i++)
             {
                 if ((reduces[i].token == token) ||
-                    (reduces[i].token == _TOKEN_NONE))
+                    (reduces[i].token == _TOKEN_COUNT))
                 {
 //                    write("Reducing rule ", reduces[i].rule, ", rule set ", reduces[i].rule_set, " lookahead ");
 //                    if (token < _TOKEN_COUNT)
diff --git a/lib/propane.rb b/lib/propane.rb
index b381fa8..b3a3e42 100644
--- a/lib/propane.rb
+++ b/lib/propane.rb
@@ -31,9 +31,6 @@ class Propane
   # Token ID for a "dropped" token.
   TOKEN_DROP = 0xFFFFFFFE
 
-  # Invalid token ID.
-  TOKEN_NONE = 0xFFFFFFFF
-
   class Error < RuntimeError
   end
 
diff --git a/lib/propane/generator.rb b/lib/propane/generator.rb
index 16c8745..8f37416 100644
--- a/lib/propane/generator.rb
+++ b/lib/propane/generator.rb
@@ -81,7 +81,7 @@ class Propane
       end
       determine_possibly_empty_rulesets!(rule_sets)
       # Generate the lexer.
-      @lexer = Lexer.new(@grammar.patterns)
+      @lexer = Lexer.new(@grammar)
       # Generate the parser.
       @parser = Parser.new(@grammar, rule_sets, @log)
     end
diff --git a/lib/propane/lexer.rb b/lib/propane/lexer.rb
index 2e05e58..59bbd17 100644
--- a/lib/propane/lexer.rb
+++ b/lib/propane/lexer.rb
@@ -5,8 +5,9 @@ class Propane
     #   Lexer DFA.
     attr_accessor :dfa
 
-    def initialize(patterns)
-      @dfa = DFA.new(patterns)
+    def initialize(grammar)
+      @grammar = grammar
+      @dfa = DFA.new(grammar.patterns)
     end
 
     def build_tables
@@ -16,13 +17,13 @@ class Propane
       states.each do |state, id|
         token =
           if state.accepts.nil?
-            TOKEN_NONE
+            @grammar.tokens.size
           elsif state.accepts.drop?
             TOKEN_DROP
           elsif state.accepts.token
             state.accepts.token.id
           else
-            TOKEN_NONE
+            @grammar.tokens.size
           end
         code_id =
           if state.accepts && state.accepts.code_id
diff --git a/lib/propane/parser.rb b/lib/propane/parser.rb
index 5bd1e20..d21b13a 100644
--- a/lib/propane/parser.rb
+++ b/lib/propane/parser.rb
@@ -55,7 +55,7 @@ class Propane
         reduce_entries =
           case ra = item_set.reduce_actions
           when Rule
-            [{token_id: TOKEN_NONE, rule_id: ra.id,
+            [{token_id: @grammar.tokens.size, rule_id: ra.id,
               rule_set_id: ra.rule_set.id, n_states: ra.components.size}]
           when Hash
             ra.map do |token, rule|