From 6bd9d4a09b9066d451d33a4f05713fa20b90a8cb Mon Sep 17 00:00:00 2001 From: Josh Holtrop Date: Mon, 3 Oct 2022 21:40:34 -0400 Subject: [PATCH] Remove _TOKEN_NONE and use _TOKEN_COUNT instead --- assets/parser.d.erb | 27 +++++++++++++-------------- lib/propane.rb | 3 --- lib/propane/generator.rb | 2 +- lib/propane/lexer.rb | 9 +++++---- lib/propane/parser.rb | 2 +- 5 files changed, 20 insertions(+), 23 deletions(-) diff --git a/assets/parser.d.erb b/assets/parser.d.erb index 70b2b9d..ff25d2c 100644 --- a/assets/parser.d.erb +++ b/assets/parser.d.erb @@ -19,7 +19,6 @@ class <%= @classname %> _TOKEN_COUNT = <%= @grammar.tokens.size %>, _TOKEN_DECODE_ERROR = <%= TOKEN_DECODE_ERROR %>, _TOKEN_DROP = <%= TOKEN_DROP %>, - _TOKEN_NONE = <%= TOKEN_NONE %>, } static immutable string token_names[] = [ @@ -166,7 +165,7 @@ class <%= @classname %> for (;;) { LexedToken lt = attempt_lex_token(); - if ((lt.token != _TOKEN_DROP) && (lt.token != _TOKEN_NONE)) + if (lt.token < _TOKEN_COUNT) { return lt; } @@ -178,7 +177,7 @@ class <%= @classname %> * * @param code_id The ID of the user code block to execute. * - * @return Token ID to accept, or _TOKEN_NONE if the user code does + * @return Token ID to accept, or _TOKEN_COUNT if the user code does * not explicitly return a token. */ private uint user_code(uint code_id) @@ -195,12 +194,12 @@ class <%= @classname %> default: break; } - return _TOKEN_NONE; + return _TOKEN_COUNT; } private LexedToken attempt_lex_token() { - LexedToken lt = LexedToken(m_input_row, m_input_col, 0, _TOKEN_NONE); + LexedToken lt = LexedToken(m_input_row, m_input_col, 0, _TOKEN_COUNT); struct MatchInfo { size_t length; @@ -210,7 +209,7 @@ class <%= @classname %> uint code_id; } MatchInfo longest_match_info; - longest_match_info.token = _TOKEN_NONE; + longest_match_info.token = _TOKEN_COUNT; MatchInfo attempt_match_info; uint current_state; for (;;) @@ -239,7 +238,7 @@ class <%= @classname %> attempt_match_info.delta_col++; } current_state = dest; - if ((states[current_state].token != _TOKEN_NONE) || + if ((states[current_state].token != _TOKEN_COUNT) || (states[current_state].code_id != 0xFFFF_FFFFu)) { attempt_match_info.token = states[current_state].token; @@ -260,17 +259,17 @@ class <%= @classname %> if (longest_match_info.code_id != 0xFFFF_FFFFu) { uint user_code_token = user_code(longest_match_info.code_id); - /* A return of _TOKEN_NONE from user_code() means + /* A return of _TOKEN_COUNT from user_code() means * that the user code did not explicitly return a * token. So only override the token to return if the * user code does explicitly return a token. */ - if (user_code_token != _TOKEN_NONE) + if (user_code_token != _TOKEN_COUNT) { token_to_accept = user_code_token; } pattern_accepted = true; } - if (pattern_accepted || (token_to_accept != _TOKEN_NONE)) + if (pattern_accepted || (token_to_accept != _TOKEN_COUNT)) { /* Update the input position tracking. */ m_input_position += longest_match_info.length; @@ -360,12 +359,12 @@ class <%= @classname %> bool parse() { Lexer.LexedToken lexed_token; - uint token = _TOKEN_NONE; + uint token = _TOKEN_COUNT; uint[] states = new uint[](1); uint reduced_rule_set = 0xFFFFFFFFu; for (;;) { - if (token == _TOKEN_NONE) + if (token == _TOKEN_COUNT) { lexed_token = m_lexer.lex_token(); token = lexed_token.token; @@ -389,7 +388,7 @@ class <%= @classname %> states ~= shift_state; if (reduced_rule_set == 0xFFFFFFFFu) { - token = _TOKEN_NONE; + token = _TOKEN_COUNT; } else { @@ -449,7 +448,7 @@ class <%= @classname %> for (uint i = start; i < end; i++) { if ((reduces[i].token == token) || - (reduces[i].token == _TOKEN_NONE)) + (reduces[i].token == _TOKEN_COUNT)) { // write("Reducing rule ", reduces[i].rule, ", rule set ", reduces[i].rule_set, " lookahead "); // if (token < _TOKEN_COUNT) diff --git a/lib/propane.rb b/lib/propane.rb index b381fa8..b3a3e42 100644 --- a/lib/propane.rb +++ b/lib/propane.rb @@ -31,9 +31,6 @@ class Propane # Token ID for a "dropped" token. TOKEN_DROP = 0xFFFFFFFE - # Invalid token ID. - TOKEN_NONE = 0xFFFFFFFF - class Error < RuntimeError end diff --git a/lib/propane/generator.rb b/lib/propane/generator.rb index 16c8745..8f37416 100644 --- a/lib/propane/generator.rb +++ b/lib/propane/generator.rb @@ -81,7 +81,7 @@ class Propane end determine_possibly_empty_rulesets!(rule_sets) # Generate the lexer. - @lexer = Lexer.new(@grammar.patterns) + @lexer = Lexer.new(@grammar) # Generate the parser. @parser = Parser.new(@grammar, rule_sets, @log) end diff --git a/lib/propane/lexer.rb b/lib/propane/lexer.rb index 2e05e58..59bbd17 100644 --- a/lib/propane/lexer.rb +++ b/lib/propane/lexer.rb @@ -5,8 +5,9 @@ class Propane # Lexer DFA. attr_accessor :dfa - def initialize(patterns) - @dfa = DFA.new(patterns) + def initialize(grammar) + @grammar = grammar + @dfa = DFA.new(grammar.patterns) end def build_tables @@ -16,13 +17,13 @@ class Propane states.each do |state, id| token = if state.accepts.nil? - TOKEN_NONE + @grammar.tokens.size elsif state.accepts.drop? TOKEN_DROP elsif state.accepts.token state.accepts.token.id else - TOKEN_NONE + @grammar.tokens.size end code_id = if state.accepts && state.accepts.code_id diff --git a/lib/propane/parser.rb b/lib/propane/parser.rb index 5bd1e20..d21b13a 100644 --- a/lib/propane/parser.rb +++ b/lib/propane/parser.rb @@ -55,7 +55,7 @@ class Propane reduce_entries = case ra = item_set.reduce_actions when Rule - [{token_id: TOKEN_NONE, rule_id: ra.id, + [{token_id: @grammar.tokens.size, rule_id: ra.id, rule_set_id: ra.rule_set.id, n_states: ra.components.size}] when Hash ra.map do |token, rule|