diff --git a/assets/parser.d.erb b/assets/parser.d.erb index 01bc68f..6c044ee 100644 --- a/assets/parser.d.erb +++ b/assets/parser.d.erb @@ -235,17 +235,16 @@ class <%= @classname %> <% end %> ]; - struct Result + public enum : size_t { - enum Type - { - DECODE_ERROR, - DROP, - TOKEN, - UNEXPECTED_INPUT, - } + P_TOKEN, + P_UNEXPECTED_INPUT, + P_DECODE_ERROR, + P_DROP, + } - Type type; + public static struct TokenInfo + { size_t row; size_t col; size_t length; @@ -265,12 +264,12 @@ class <%= @classname %> m_mode = <%= @lexer.mode_id("default") %>; } - Result lex_token() + size_t lex_token(TokenInfo * out_token_info) { for (;;) { - Result result = attempt_lex_token(); - if (result.token < _TOKEN_COUNT) + size_t result = attempt_lex_token(out_token_info); + if (out_token_info.token < _TOKEN_COUNT) { return result; } @@ -282,12 +281,12 @@ class <%= @classname %> * * @param code_id The ID of the user code block to execute. * @param match Matched text for this pattern. - * @param result Result lexer result in progress. + * @param out_token_info Lexer token info in progress. * * @return Token to accept, or invalid token if the user code does * not explicitly return a token. */ - private Token user_code(UserCodeID code_id, string match, Result * result) + private Token user_code(UserCodeID code_id, string match, TokenInfo * out_token_info) { switch (code_id) { @@ -304,12 +303,13 @@ class <%= @classname %> return Token.invalid(); } - private Result attempt_lex_token() + private size_t attempt_lex_token(TokenInfo * out_token_info) { - Result result; - result.row = m_input_row; - result.col = m_input_col; - result.token = _TOKEN_COUNT; + TokenInfo token_info; + token_info.row = m_input_row; + token_info.col = m_input_col; + token_info.token = _TOKEN_COUNT; + *out_token_info = token_info; // TODO: remove MatchInfo match_info; size_t unexpected_input_length; switch (find_longest_match(match_info, unexpected_input_length)) @@ -318,7 +318,7 @@ class <%= @classname %> uint token_to_accept = match_info.accepting_state.token; if (match_info.accepting_state.code_id.is_valid()) { - Token user_code_token = user_code(match_info.accepting_state.code_id, m_input[m_input_position..(m_input_position + match_info.length)], &result); + Token user_code_token = user_code(match_info.accepting_state.code_id, m_input[m_input_position..(m_input_position + match_info.length)], &token_info); /* An invalid Token from user_code() means that the user * code did not explicitly return a token. So only override * the token to return if the user code does explicitly @@ -340,30 +340,25 @@ class <%= @classname %> { m_input_col += match_info.delta_col; } - result.token = token_to_accept; - result.length = match_info.length; if (match_info.accepting_state.drop) { - result.type = Result.Type.DROP; + return P_DROP; } - else - { - result.type = Result.Type.TOKEN; - } - return result; + token_info.token = token_to_accept; + token_info.length = match_info.length; + *out_token_info = token_info; + return P_TOKEN; case FindLongestMatchResult.DECODE_ERROR: - result.type = Result.Type.DECODE_ERROR; - return result; + return P_DECODE_ERROR; case FindLongestMatchResult.EOF: - result.type = Result.Type.TOKEN; - result.token = TOKEN___EOF; - return result; + token_info.token = TOKEN___EOF; + *out_token_info = token_info; + return P_TOKEN; case FindLongestMatchResult.UNEXPECTED_INPUT: - result.type = Result.Type.UNEXPECTED_INPUT; - return result; + return P_UNEXPECTED_INPUT; default: assert(false); @@ -576,7 +571,7 @@ class <%= @classname %> bool parse() { - Lexer.Result lexed_token; + Lexer.TokenInfo token_info; uint token = _TOKEN_COUNT; StateValue[] statevalues = new StateValue[](1); uint reduced_rule_set = 0xFFFFFFFFu; @@ -585,8 +580,8 @@ class <%= @classname %> { if (token == _TOKEN_COUNT) { - lexed_token = m_lexer.lex_token(); - token = lexed_token.token; + size_t lexer_result = m_lexer.lex_token(&token_info); + token = token_info.token; } uint shift_state = 0xFFFFFFFFu; if (reduced_rule_set != 0xFFFFFFFFu) @@ -611,7 +606,7 @@ class <%= @classname %> { /* We shifted a token, mark it consumed. */ token = _TOKEN_COUNT; - statevalues[$-1].pvalue = lexed_token.pvalue; + statevalues[$-1].pvalue = token_info.pvalue; } else { diff --git a/lib/propane/generator.rb b/lib/propane/generator.rb index e2f8989..95975dd 100644 --- a/lib/propane/generator.rb +++ b/lib/propane/generator.rb @@ -195,7 +195,7 @@ class Propane end else code = code.gsub(/\$\$/) do |match| - "result.pvalue.v_#{pattern.ptypename}" + "out_token_info.pvalue.v_#{pattern.ptypename}" end code = code.gsub(/\$mode\(([a-zA-Z_][a-zA-Z_0-9]*)\)/) do |match| mode_name = $1 diff --git a/spec/test_d_lexer.d b/spec/test_d_lexer.d index 77cf247..5238aee 100644 --- a/spec/test_d_lexer.d +++ b/spec/test_d_lexer.d @@ -43,18 +43,28 @@ unittest unittest { - alias Result = Testparser.Lexer.Result; + alias TokenInfo = Testparser.Lexer.TokenInfo; + TokenInfo token_info; string input = "5 + 4 * \n677 + 567"; Testparser.Lexer lexer = new Testparser.Lexer(input); - assert(lexer.lex_token() == Result(Result.Type.TOKEN, 0, 0, 1, Testparser.TOKEN_int)); - assert(lexer.lex_token() == Result(Result.Type.TOKEN, 0, 2, 1, Testparser.TOKEN_plus)); - assert(lexer.lex_token() == Result(Result.Type.TOKEN, 0, 4, 1, Testparser.TOKEN_int)); - assert(lexer.lex_token() == Result(Result.Type.TOKEN, 0, 6, 1, Testparser.TOKEN_times)); - assert(lexer.lex_token() == Result(Result.Type.TOKEN, 1, 0, 3, Testparser.TOKEN_int)); - assert(lexer.lex_token() == Result(Result.Type.TOKEN, 1, 4, 1, Testparser.TOKEN_plus)); - assert(lexer.lex_token() == Result(Result.Type.TOKEN, 1, 6, 3, Testparser.TOKEN_int)); - assert(lexer.lex_token() == Result(Result.Type.TOKEN, 1, 9, 0, Testparser.TOKEN___EOF)); + assert(lexer.lex_token(&token_info) == lexer.P_TOKEN); + assert(token_info == TokenInfo(0, 0, 1, Testparser.TOKEN_int)); + assert(lexer.lex_token(&token_info) == lexer.P_TOKEN); + assert(token_info == TokenInfo(0, 2, 1, Testparser.TOKEN_plus)); + assert(lexer.lex_token(&token_info) == lexer.P_TOKEN); + assert(token_info == TokenInfo(0, 4, 1, Testparser.TOKEN_int)); + assert(lexer.lex_token(&token_info) == lexer.P_TOKEN); + assert(token_info == TokenInfo(0, 6, 1, Testparser.TOKEN_times)); + assert(lexer.lex_token(&token_info) == lexer.P_TOKEN); + assert(token_info == TokenInfo(1, 0, 3, Testparser.TOKEN_int)); + assert(lexer.lex_token(&token_info) == lexer.P_TOKEN); + assert(token_info == TokenInfo(1, 4, 1, Testparser.TOKEN_plus)); + assert(lexer.lex_token(&token_info) == lexer.P_TOKEN); + assert(token_info == TokenInfo(1, 6, 3, Testparser.TOKEN_int)); + assert(lexer.lex_token(&token_info) == lexer.P_TOKEN); + assert(token_info == TokenInfo(1, 9, 0, Testparser.TOKEN___EOF)); lexer = new Testparser.Lexer(""); - assert(lexer.lex_token() == Result(Result.Type.TOKEN, 0, 0, 0, Testparser.TOKEN___EOF)); + assert(lexer.lex_token(&token_info) == lexer.P_TOKEN); + assert(token_info == TokenInfo(0, 0, 0, Testparser.TOKEN___EOF)); }