From b92679e0c27bfc488e47c509a95c6f4c68ff9fc5 Mon Sep 17 00:00:00 2001 From: Josh Holtrop Date: Sat, 11 Mar 2023 21:16:55 -0500 Subject: [PATCH] Replace LexedToken with Result struct --- assets/parser.d.erb | 57 ++++++++++++++++++++++++++-------------- lib/propane/generator.rb | 2 +- spec/test_d_lexer.d | 20 +++++++------- 3 files changed, 48 insertions(+), 31 deletions(-) diff --git a/assets/parser.d.erb b/assets/parser.d.erb index 7a4b182..3efc16d 100644 --- a/assets/parser.d.erb +++ b/assets/parser.d.erb @@ -190,8 +190,16 @@ class <%= @classname %> <% end %> ]; - struct LexedToken + struct Result { + enum Type + { + DECODE_ERROR, + DROP, + TOKEN, + } + + Type type; size_t row; size_t col; size_t length; @@ -211,14 +219,14 @@ class <%= @classname %> m_mode = <%= @lexer.mode_id("default") %>; } - LexedToken lex_token() + Result lex_token() { for (;;) { - LexedToken lt = attempt_lex_token(); - if (lt.token < _TOKEN_COUNT) + Result result = attempt_lex_token(); + if (result.token < _TOKEN_COUNT) { - return lt; + return result; } } } @@ -228,12 +236,12 @@ class <%= @classname %> * * @param code_id The ID of the user code block to execute. * @param match Matched text for this pattern. - * @param lt LexedToken lexer result in progress. + * @param result Result lexer result in progress. * * @return Token ID to accept, or _TOKEN_COUNT if the user code does * not explicitly return a token. */ - private uint user_code(uint code_id, string match, LexedToken * lt) + private uint user_code(uint code_id, string match, Result * result) { switch (code_id) { @@ -250,12 +258,12 @@ class <%= @classname %> return _TOKEN_COUNT; } - private LexedToken attempt_lex_token() + private Result attempt_lex_token() { - LexedToken lt; - lt.row = m_input_row; - lt.col = m_input_col; - lt.token = _TOKEN_COUNT; + Result result; + result.row = m_input_row; + result.col = m_input_col; + result.token = _TOKEN_COUNT; struct MatchInfo { size_t length; @@ -271,8 +279,9 @@ class <%= @classname %> auto decoded = Decoder.decode_code_point(m_input[(m_input_position + attempt_match_info.length)..(m_input.length)]); if (decoded.is_decode_error()) { - lt.token = _TOKEN_DECODE_ERROR; - return lt; + result.type = Result.Type.DECODE_ERROR; + result.token = _TOKEN_DECODE_ERROR; + return result; } bool lex_continue = false; if (!decoded.is_eof()) @@ -302,7 +311,7 @@ class <%= @classname %> } else if (attempt_match_info.length == 0u) { - lt.token = TOKEN_0EOF; + result.token = TOKEN_0EOF; break; } if (!lex_continue && (longest_match_info.accepting_state != null)) @@ -310,7 +319,7 @@ class <%= @classname %> uint token_to_accept = longest_match_info.accepting_state.token; if (longest_match_info.accepting_state.code_id != 0xFFFF_FFFFu) { - uint user_code_token = user_code(longest_match_info.accepting_state.code_id, m_input[m_input_position..(m_input_position + longest_match_info.length)], <); + uint user_code_token = user_code(longest_match_info.accepting_state.code_id, m_input[m_input_position..(m_input_position + longest_match_info.length)], &result); /* A return of _TOKEN_COUNT from user_code() means * that the user code did not explicitly return a * token. So only override the token to return if the @@ -332,12 +341,20 @@ class <%= @classname %> { m_input_col += longest_match_info.delta_col; } - lt.token = token_to_accept; - lt.length = longest_match_info.length; + result.token = token_to_accept; + result.length = longest_match_info.length; break; } } - return lt; + if (result.token == _TOKEN_DROP) + { + result.type = Result.Type.DROP; + } + else + { + result.type = Result.Type.TOKEN; + } + return result; } private uint transition(uint current_state, uint code_point) @@ -420,7 +437,7 @@ class <%= @classname %> bool parse() { - Lexer.LexedToken lexed_token; + Lexer.Result lexed_token; uint token = _TOKEN_COUNT; StateValue[] statevalues = new StateValue[](1); uint reduced_rule_set = 0xFFFFFFFFu; diff --git a/lib/propane/generator.rb b/lib/propane/generator.rb index 539591f..b860e42 100644 --- a/lib/propane/generator.rb +++ b/lib/propane/generator.rb @@ -195,7 +195,7 @@ class Propane end else code = code.gsub(/\$\$/) do |match| - "lt.pvalue.v_#{pattern.ptypename}" + "result.pvalue.v_#{pattern.ptypename}" end code = code.gsub(/\$mode\(([a-zA-Z_][a-zA-Z_0-9]*)\)/) do |match| mode_name = $1 diff --git a/spec/test_d_lexer.d b/spec/test_d_lexer.d index bce2979..16deb0c 100644 --- a/spec/test_d_lexer.d +++ b/spec/test_d_lexer.d @@ -35,18 +35,18 @@ unittest unittest { - alias LT = Testparser.Lexer.LexedToken; + alias Result = Testparser.Lexer.Result; string input = "5 + 4 * \n677 + 567"; Testparser.Lexer lexer = new Testparser.Lexer(input); - assert(lexer.lex_token() == LT(0, 0, 1, Testparser.TOKEN_int)); - assert(lexer.lex_token() == LT(0, 2, 1, Testparser.TOKEN_plus)); - assert(lexer.lex_token() == LT(0, 4, 1, Testparser.TOKEN_int)); - assert(lexer.lex_token() == LT(0, 6, 1, Testparser.TOKEN_times)); - assert(lexer.lex_token() == LT(1, 0, 3, Testparser.TOKEN_int)); - assert(lexer.lex_token() == LT(1, 4, 1, Testparser.TOKEN_plus)); - assert(lexer.lex_token() == LT(1, 6, 3, Testparser.TOKEN_int)); - assert(lexer.lex_token() == LT(1, 9, 0, Testparser.TOKEN_0EOF)); + assert(lexer.lex_token() == Result(Result.Type.TOKEN, 0, 0, 1, Testparser.TOKEN_int)); + assert(lexer.lex_token() == Result(Result.Type.TOKEN, 0, 2, 1, Testparser.TOKEN_plus)); + assert(lexer.lex_token() == Result(Result.Type.TOKEN, 0, 4, 1, Testparser.TOKEN_int)); + assert(lexer.lex_token() == Result(Result.Type.TOKEN, 0, 6, 1, Testparser.TOKEN_times)); + assert(lexer.lex_token() == Result(Result.Type.TOKEN, 1, 0, 3, Testparser.TOKEN_int)); + assert(lexer.lex_token() == Result(Result.Type.TOKEN, 1, 4, 1, Testparser.TOKEN_plus)); + assert(lexer.lex_token() == Result(Result.Type.TOKEN, 1, 6, 3, Testparser.TOKEN_int)); + assert(lexer.lex_token() == Result(Result.Type.TOKEN, 1, 9, 0, Testparser.TOKEN_0EOF)); lexer = new Testparser.Lexer(""); - assert(lexer.lex_token() == LT(0, 0, 0, Testparser.TOKEN_0EOF)); + assert(lexer.lex_token() == Result(Result.Type.TOKEN, 0, 0, 0, Testparser.TOKEN_0EOF)); }