From 7a1b4064c1c599cc1f85bfaf2067a3b585b2609b Mon Sep 17 00:00:00 2001 From: Josh Holtrop Date: Wed, 12 Jul 2023 15:46:13 -0400 Subject: [PATCH] Switch to new API - close #8 The new API is more C-like and will allow consistency across all future supported language targets. --- assets/parser.d.erb | 897 ++++++++++-------- lib/propane/generator.rb | 2 +- spec/test_d_lexer.d | 37 +- .../test_d_parser_identical_rules_lookahead.d | 9 +- .../test_d_parser_rule_from_multiple_states.d | 13 +- spec/test_lexer_match_text.d | 5 +- spec/test_lexer_modes.d | 9 +- spec/test_lexer_result_value.d | 13 +- spec/test_lexer_unknown_character.d | 11 +- spec/test_parser_rule_user_code.d | 5 +- spec/test_parsing_json.d | 49 +- spec/test_parsing_lists.d | 19 +- spec/test_pattern.d | 9 +- spec/test_return_token_from_pattern.d | 5 +- spec/test_user_code.d | 9 +- 15 files changed, 611 insertions(+), 481 deletions(-) diff --git a/assets/parser.d.erb b/assets/parser.d.erb index 40ed012..36a8a07 100644 --- a/assets/parser.d.erb +++ b/assets/parser.d.erb @@ -91,12 +91,40 @@ public static struct TokenInfo ParserValue pvalue; } +/** + * Lexer and parser context. + * + * The user must allocate an instance of this structure and pass it to any + * public API function. + */ +public struct p_context_t +{ + /* Lexer context data. */ + + /** Input text. */ + string input; + + /** Input text index (byte offset). */ + size_t input_index; + + /** Input text position (row/column). */ + Position input_position; + + /** Current lexer mode. */ + size_t mode; + + /* Parser context data. */ + + /** Parse result value. */ + ParserValue parse_result; +} + /************************************************************************** * Public data *************************************************************************/ /** Token names. */ -public static immutable string[] token_names = [ +public static immutable string[] p_token_names = [ <% @grammar.tokens.each_with_index do |token, index| %> "<%= token.name %>", <% end %> @@ -109,87 +137,115 @@ public static immutable string[] token_names = [ /* An invalid ID value. */ private enum size_t INVALID_ID = cast(size_t)-1; +/************************************************************************** + * State initialization + *************************************************************************/ + +/** + * Initialize lexer/parser context structure. + * + * @param[out] context + * Lexer/parser context structure. + * @param input + * Text input. + */ +public void p_context_init(p_context_t * context, string input) +{ + /* New default-initialized context structure. */ + p_context_t newcontext; + + /* Lexer initialization. */ + newcontext.input = input; + newcontext.mode = <%= @lexer.mode_id("default") %>; + + /* Copy to the user's context structure. */ + *context = newcontext; +} + /************************************************************************** * Decoder *************************************************************************/ -public static class Decoder +/** + * Decode a UTF-8 code point. + * + * @param input + * Text input to decode. + * @param[out] out_code_point + * The decoded code point is stored here if the return value is P_SUCCESS. + * @param[out] out_code_point_length + * The number of bytes the code point used is stored here if the return value + * is P_SUCCESS. + * + * @retval P_SUCCESS on a successful code point decode + * @retval P_DECODE_ERROR when an encoding error is observed + * @retval P_EOF when the end of the text input is reached + */ +public size_t p_decode_code_point(string input, + CodePoint * out_code_point, ubyte * out_code_point_length) { - /** - * Decode a UTF-8 code point. - * - * Returns one of: - * - P_SUCCESS - * - P_DECODE_ERROR - * - P_EOF - */ - static size_t decode_code_point(string input, - CodePoint * out_code_point, - ubyte * out_code_point_length) + if (input.length == 0u) { - if (input.length == 0u) + return P_EOF; + } + char c = input[0]; + CodePoint code_point; + ubyte code_point_length; + if ((c & 0x80u) == 0u) + { + code_point = c; + code_point_length = 1u; + } + else + { + ubyte following_bytes; + if ((c & 0xE0u) == 0xC0u) { - return P_EOF; + code_point = c & 0x1Fu; + following_bytes = 1u; } - char c = input[0]; - CodePoint code_point; - ubyte code_point_length; - if ((c & 0x80u) == 0u) + else if ((c & 0xF0u) == 0xE0u) { - code_point = c; - code_point_length = 1u; + code_point = c & 0x0Fu; + following_bytes = 2u; + } + else if ((c & 0xF8u) == 0xF0u) + { + code_point = c & 0x07u; + following_bytes = 3u; + } + else if ((c & 0xFCu) == 0xF8u) + { + code_point = c & 0x03u; + following_bytes = 4u; + } + else if ((c & 0xFEu) == 0xFCu) + { + code_point = c & 0x01u; + following_bytes = 5u; } else { - ubyte following_bytes; - if ((c & 0xE0u) == 0xC0u) - { - code_point = c & 0x1Fu; - following_bytes = 1u; - } - else if ((c & 0xF0u) == 0xE0u) - { - code_point = c & 0x0Fu; - following_bytes = 2u; - } - else if ((c & 0xF8u) == 0xF0u) - { - code_point = c & 0x07u; - following_bytes = 3u; - } - else if ((c & 0xFCu) == 0xF8u) - { - code_point = c & 0x03u; - following_bytes = 4u; - } - else if ((c & 0xFEu) == 0xFCu) - { - code_point = c & 0x01u; - following_bytes = 5u; - } - else - { - return P_DECODE_ERROR; - } - if (input.length <= following_bytes) - { - return P_DECODE_ERROR; - } - code_point_length = cast(ubyte)(following_bytes + 1u); - for (size_t i = 0u; i < following_bytes; i++) - { - char b = input[i + 1u]; - if ((b & 0xC0u) != 0x80u) - { - return P_DECODE_ERROR; - } - code_point = (code_point << 6u) | (b & 0x3Fu); - } + return P_DECODE_ERROR; + } + if (input.length <= following_bytes) + { + return P_DECODE_ERROR; + } + code_point_length = cast(ubyte)(following_bytes + 1u); + for (size_t i = 0u; i < following_bytes; i++) + { + char b = input[i + 1u]; + if ((b & 0xC0u) != 0x80u) + { + return P_DECODE_ERROR; + } + code_point = (code_point << 6u) | (b & 0x3Fu); } - *out_code_point = code_point; - *out_code_point_length = code_point_length; - return P_SUCCESS; } + *out_code_point = code_point; + *out_code_point_length = code_point_length; + return P_SUCCESS; } /************************************************************************** @@ -223,6 +279,13 @@ private struct Mode uint state_table_offset; } +private struct MatchInfo +{ + size_t length; + Position delta_position; + const(LexerState) * accepting_state; +} + private static immutable Transition[] lexer_transitions = [ <% @lexer.transition_table.each do |transition_table_entry| %> Transition(<%= transition_table_entry[:first] %>u, @@ -255,234 +318,256 @@ private static immutable Mode[] modes = [ <% end %> ]; -public static class Lexer +/** + * Execute user code associated with a lexer pattern. + * + * @param context + * Lexer/parser context structure. + * @param code_id + * The ID of the user code block to execute. + * @param match + * Matched text for this pattern. + * @param out_token_info + * Lexer token info in progress. + * + * @return Token to accept, or invalid token if the user code does + * not explicitly return a token. + */ +private Token lexer_user_code(p_context_t * context, UserCodeID code_id, string match, TokenInfo * out_token_info) { - private string m_input; - private size_t m_input_index; - private Position m_input_position; - private size_t m_mode; - - this(string input) + switch (code_id) { - m_input = input; - m_mode = <%= @lexer.mode_id("default") %>; - } - - /** - * Lex the next token in the input stream. - * - * Returns one of: - * - P_SUCCESS - * - P_DECODE_ERROR - * - P_UNEXPECTED_INPUT - */ - size_t lex_token(TokenInfo * out_token_info) - { - for (;;) - { - size_t result = attempt_lex_token(out_token_info); - if (result != P_DROP) - { - return result; - } - } - } - - /** - * Execute user code associated with a lexer pattern. - * - * @param code_id The ID of the user code block to execute. - * @param match Matched text for this pattern. - * @param out_token_info Lexer token info in progress. - * - * @return Token to accept, or invalid token if the user code does - * not explicitly return a token. - */ - private Token user_code(UserCodeID code_id, string match, TokenInfo * out_token_info) - { - switch (code_id) - { <% @grammar.patterns.each do |pattern| %> <% if pattern.code_id %> - case <%= pattern.code_id %>u: { + case <%= pattern.code_id %>u: { <%= expand_code(pattern.code, false, nil, pattern) %> - } break; + } break; <% end %> <% end %> - default: break; - } - - return INVALID_TOKEN_ID; + default: break; } - /** - * Attempt to lex the next token in the input stream. - * - * Returns one of: - * - P_SUCCESS - * - P_DECODE_ERROR - * - P_UNEXPECTED_INPUT - * - P_DROP - */ - private size_t attempt_lex_token(TokenInfo * out_token_info) + return INVALID_TOKEN_ID; +} + +/** + * Check if there is a transition from the current lexer state to another + * based on the given input code point. + * + * @param current_state + * Current lexer state. + * @param code_point + * Input code point. + * + * @return Lexer state to transition to, or INVALID_LEXER_STATE_ID if none. + */ +private LexerStateID check_lexer_transition(uint current_state, uint code_point) +{ + uint transition_table_index = lexer_states[current_state].transition_table_index; + for (uint i = 0u; i < lexer_states[current_state].n_transitions; i++) { - TokenInfo token_info; - token_info.position = m_input_position; - token_info.token = INVALID_TOKEN_ID; - *out_token_info = token_info; // TODO: remove - MatchInfo match_info; - size_t unexpected_input_length; - size_t result = find_longest_match(&match_info, &unexpected_input_length); + if ((lexer_transitions[transition_table_index + i].first <= code_point) && + (code_point <= lexer_transitions[transition_table_index + i].last)) + { + return lexer_transitions[transition_table_index + i].destination_state; + } + } + return INVALID_LEXER_STATE_ID; +} + +/** + * Find the longest lexer pattern match at the current position. + * + * @param context + * Lexer/parser context structure. + * @param[out] out_token_info + * The lexed token information is stored here if the return value is + * P_SUCCESS. + * + * @reval P_SUCCESS + * A token was successfully lexed. + * @reval P_DECODE_ERROR + * The decoder encountered invalid text encoding. + * @reval P_UNEXPECTED_INPUT + * Input text does not match any lexer pattern. + * @retval P_EOF + * The end of the text input was reached. + */ +private size_t find_longest_match( + p_context_t * context, + MatchInfo * out_match_info, + size_t * out_unexpected_input_length) +{ + MatchInfo longest_match; + MatchInfo attempt_match; + uint current_state = modes[context.mode].state_table_offset; + for (;;) + { + string input = context.input[(context.input_index + attempt_match.length)..(context.input.length)]; + CodePoint code_point; + ubyte code_point_length; + size_t result = p_decode_code_point(input, &code_point, &code_point_length); switch (result) { case P_SUCCESS: - Token token_to_accept = match_info.accepting_state.token; - if (match_info.accepting_state.code_id != INVALID_USER_CODE_ID) + LexerStateID transition_state = check_lexer_transition(current_state, code_point); + if (transition_state != INVALID_LEXER_STATE_ID) { - Token user_code_token = user_code(match_info.accepting_state.code_id, m_input[m_input_index..(m_input_index + match_info.length)], &token_info); - /* An invalid Token from user_code() means that the user - * code did not explicitly return a token. So only override - * the token to return if the user code does explicitly - * return a token. */ - if (user_code_token != INVALID_TOKEN_ID) + attempt_match.length += code_point_length; + if (code_point == '\n') { - token_to_accept = user_code_token; + attempt_match.delta_position.row++; + attempt_match.delta_position.col = 0u; + } + else + { + attempt_match.delta_position.col++; + } + current_state = transition_state; + if (lexer_states[current_state].accepts) + { + attempt_match.accepting_state = &lexer_states[current_state]; + longest_match = attempt_match; } } - - /* Update the input position tracking. */ - m_input_index += match_info.length; - m_input_position.row += match_info.delta_position.row; - if (match_info.delta_position.row != 0u) + else if (longest_match.length > 0) { - m_input_position.col = match_info.delta_position.col; + *out_match_info = longest_match; + return P_SUCCESS; } else { - m_input_position.col += match_info.delta_position.col; + *out_unexpected_input_length = attempt_match.length + code_point_length; + return P_UNEXPECTED_INPUT; } - if (token_to_accept == INVALID_TOKEN_ID) - { - return P_DROP; - } - token_info.token = token_to_accept; - token_info.length = match_info.length; - *out_token_info = token_info; - return P_SUCCESS; + break; case P_EOF: - token_info.token = TOKEN___EOF; - *out_token_info = token_info; - return P_SUCCESS; + /* We hit EOF. */ + if (longest_match.length > 0) + { + /* We have a match, so use it. */ + *out_match_info = longest_match; + return P_SUCCESS; + } + else if (attempt_match.length != 0) + { + /* There is a partial match - error! */ + *out_unexpected_input_length = attempt_match.length; + return P_UNEXPECTED_INPUT; + } + else + { + /* Valid EOF return. */ + return P_EOF; + } + break; default: return result; } } +} - private struct MatchInfo +/** + * Attempt to lex the next token in the input stream. + * + * @param context + * Lexer/parser context structure. + * @param[out] out_token_info + * The lexed token information is stored here if the return value is + * P_SUCCESS. + * + * @reval P_SUCCESS + * A token was successfully lexed. + * @reval P_DECODE_ERROR + * The decoder encountered invalid text encoding. + * @reval P_UNEXPECTED_INPUT + * Input text does not match any lexer pattern. + * @retval P_DROP + * A drop pattern was matched so the lexer should continue. + */ +private size_t attempt_lex_token(p_context_t * context, TokenInfo * out_token_info) +{ + TokenInfo token_info; + token_info.position = context.input_position; + token_info.token = INVALID_TOKEN_ID; + *out_token_info = token_info; // TODO: remove + MatchInfo match_info; + size_t unexpected_input_length; + size_t result = find_longest_match(context, &match_info, &unexpected_input_length); + switch (result) { - size_t length; - Position delta_position; - const(LexerState) * accepting_state; - } - - /** - * Find the longest lexer pattern match at the current position. - * - * Returns one of: - * - P_SUCCESS - * - P_DECODE_ERROR - * - P_UNEXPECTED_INPUT - * - P_EOF - */ - private size_t find_longest_match( - MatchInfo * out_match_info, - size_t * out_unexpected_input_length) - { - MatchInfo longest_match; - MatchInfo attempt_match; - uint current_state = modes[m_mode].state_table_offset; - for (;;) + case P_SUCCESS: + Token token_to_accept = match_info.accepting_state.token; + if (match_info.accepting_state.code_id != INVALID_USER_CODE_ID) { - string input = m_input[(m_input_index + attempt_match.length)..(m_input.length)]; - CodePoint code_point; - ubyte code_point_length; - size_t result = Decoder.decode_code_point(input, &code_point, &code_point_length); - switch (result) + Token user_code_token = lexer_user_code(context, match_info.accepting_state.code_id, context.input[context.input_index..(context.input_index + match_info.length)], &token_info); + /* An invalid Token from lexer_user_code() means that the user + * code did not explicitly return a token. So only override + * the token to return if the user code does explicitly + * return a token. */ + if (user_code_token != INVALID_TOKEN_ID) { - case P_SUCCESS: - LexerStateID transition_state = transition(current_state, code_point); - if (transition_state != INVALID_LEXER_STATE_ID) - { - attempt_match.length += code_point_length; - if (code_point == '\n') - { - attempt_match.delta_position.row++; - attempt_match.delta_position.col = 0u; - } - else - { - attempt_match.delta_position.col++; - } - current_state = transition_state; - if (lexer_states[current_state].accepts) - { - attempt_match.accepting_state = &lexer_states[current_state]; - longest_match = attempt_match; - } - } - else if (longest_match.length > 0) - { - *out_match_info = longest_match; - return P_SUCCESS; - } - else - { - *out_unexpected_input_length = attempt_match.length + code_point_length; - return P_UNEXPECTED_INPUT; - } - break; - - case P_EOF: - /* We hit EOF. */ - if (longest_match.length > 0) - { - /* We have a match, so use it. */ - *out_match_info = longest_match; - return P_SUCCESS; - } - else if (attempt_match.length != 0) - { - /* There is a partial match - error! */ - *out_unexpected_input_length = attempt_match.length; - return P_UNEXPECTED_INPUT; - } - else - { - /* Valid EOF return. */ - return P_EOF; - } - break; - - default: - return result; + token_to_accept = user_code_token; } } - } - private LexerStateID transition(uint current_state, uint code_point) - { - uint transition_table_index = lexer_states[current_state].transition_table_index; - for (uint i = 0u; i < lexer_states[current_state].n_transitions; i++) + /* Update the input position tracking. */ + context.input_index += match_info.length; + context.input_position.row += match_info.delta_position.row; + if (match_info.delta_position.row != 0u) { - if ((lexer_transitions[transition_table_index + i].first <= code_point) && - (code_point <= lexer_transitions[transition_table_index + i].last)) - { - return lexer_transitions[transition_table_index + i].destination_state; - } + context.input_position.col = match_info.delta_position.col; + } + else + { + context.input_position.col += match_info.delta_position.col; + } + if (token_to_accept == INVALID_TOKEN_ID) + { + return P_DROP; + } + token_info.token = token_to_accept; + token_info.length = match_info.length; + *out_token_info = token_info; + return P_SUCCESS; + + case P_EOF: + token_info.token = TOKEN___EOF; + *out_token_info = token_info; + return P_SUCCESS; + + default: + return result; + } +} + +/** + * Lex the next token in the input stream. + * + * @param context + * Lexer/parser context structure. + * @param[out] out_token_info + * The lexed token information is stored here if the return value is + * P_SUCCESS. + * + * @reval P_SUCCESS + * A token was successfully lexed. + * @reval P_DECODE_ERROR + * The decoder encountered invalid text encoding. + * @reval P_UNEXPECTED_INPUT + * Input text does not match any lexer pattern. + */ +public size_t p_lex(p_context_t * context, TokenInfo * out_token_info) +{ + for (;;) + { + size_t result = attempt_lex_token(context, out_token_info); + if (result != P_DROP) + { + return result; } - return INVALID_LEXER_STATE_ID; } } @@ -551,169 +636,201 @@ private static immutable ParserState[] parser_states = [ <% end %> ]; -public static class Parser +/** + * Execute user code associated with a parser rule. + * + * @param rule The ID of the rule. + * + * @return Parse value. + */ +private ParserValue parser_user_code(uint rule, StateValue[] statevalues, uint n_states) { - private Lexer m_lexer; + ParserValue _pvalue; - private ParserValue parse_result; - - this(string input) + switch (rule) { - m_lexer = new Lexer(input); +<% @grammar.rules.each do |rule| %> +<% if rule.code %> + case <%= rule.id %>u: { +<%= expand_code(rule.code, true, rule, nil) %> + } break; +<% end %> +<% end %> + default: break; } - size_t parse() + return _pvalue; +} + +/** + * Check if the parser should shift to a new state. + * + * @param state + * Parser state ID. + * @param symbol + * Incoming token/rule set ID. + * + * @return State to shift to, or INVALID_ID if none. + */ +private size_t check_shift(size_t state, size_t symbol) +{ + uint start = parser_states[state].shift_table_index; + uint end = start + parser_states[state].n_shift_entries; + for (uint i = start; i < end; i++) { - TokenInfo token_info; - Token token = INVALID_TOKEN_ID; - StateValue[] statevalues = new StateValue[](1); - size_t reduced_rule_set = INVALID_ID; - ParserValue reduced_parser_value; - for (;;) + if (parser_shifts[i].symbol == symbol) { - if (token == INVALID_TOKEN_ID) - { - size_t lexer_result = m_lexer.lex_token(&token_info); - if (lexer_result != P_SUCCESS) - { - return lexer_result; - } - token = token_info.token; - } - size_t shift_state = INVALID_ID; - if (reduced_rule_set != INVALID_ID) - { - shift_state = check_shift(statevalues[$-1].state, reduced_rule_set); - } - if (shift_state == INVALID_ID) - { - shift_state = check_shift(statevalues[$-1].state, token); - if ((shift_state != INVALID_ID) && (token == TOKEN___EOF)) - { - /* Successful parse. */ - parse_result = statevalues[$-1].pvalue; - return P_SUCCESS; - } - } - if (shift_state != INVALID_ID) - { - /* We have something to shift. */ - statevalues ~= StateValue(shift_state); - if (reduced_rule_set == INVALID_ID) - { - /* We shifted a token, mark it consumed. */ - token = INVALID_TOKEN_ID; - statevalues[$-1].pvalue = token_info.pvalue; - } - else - { - /* We shifted a RuleSet. */ - statevalues[$-1].pvalue = reduced_parser_value; - ParserValue new_parse_result; - reduced_parser_value = new_parse_result; - reduced_rule_set = INVALID_ID; - } - continue; - } - - size_t reduce_index = check_reduce(statevalues[$-1].state, token); - if (reduce_index != INVALID_ID) - { - /* We have something to reduce. */ - reduced_parser_value = user_code(parser_reduces[reduce_index].rule, statevalues, parser_reduces[reduce_index].n_states); - reduced_rule_set = parser_reduces[reduce_index].rule_set; - statevalues.length -= parser_reduces[reduce_index].n_states; - continue; - } - - /* Error, unexpected token. */ - write("Unexpected token "); - if (token != INVALID_TOKEN_ID) - { - writeln(token_names[token]); - } - else - { - writeln("{other}"); - } - return P_UNEXPECTED_TOKEN; - } - } - - @property <%= start_rule_type[1] %> result() - { - return parse_result.v_<%= start_rule_type[0] %>; - } - - private size_t check_shift(size_t state, size_t symbol) - { - uint start = parser_states[state].shift_table_index; - uint end = start + parser_states[state].n_shift_entries; - for (uint i = start; i < end; i++) - { - if (parser_shifts[i].symbol == symbol) - { // if (symbol != INVALID_TOKEN_ID) // { -// writeln("Shifting ", token_names[symbol]); +// writeln("Shifting ", p_token_names[symbol]); // } // else // { // writeln("Shifting rule set ", symbol); // } - return parser_shifts[i].state; - } + return parser_shifts[i].state; } - return INVALID_ID; } + return INVALID_ID; +} - private size_t check_reduce(size_t state, Token token) +/** + * Check if the parser should reduce to a new state. + * + * @param state + * Parser state ID. + * @param token + * Incoming token ID. + * + * @return State to reduce to, or INVALID_ID if none. + */ +private size_t check_reduce(size_t state, Token token) +{ + size_t start = parser_states[state].reduce_table_index; + size_t end = start + parser_states[state].n_reduce_entries; + for (size_t i = start; i < end; i++) { - size_t start = parser_states[state].reduce_table_index; - size_t end = start + parser_states[state].n_reduce_entries; - for (size_t i = start; i < end; i++) + if ((parser_reduces[i].token == token) || + (parser_reduces[i].token == INVALID_TOKEN_ID)) { - if ((parser_reduces[i].token == token) || - (parser_reduces[i].token == INVALID_TOKEN_ID)) - { // write("Reducing rule ", parser_reduces[i].rule, ", rule set ", parser_reduces[i].rule_set, " lookahead "); // if (token != INVALID_TOKEN_ID) // { -// writeln(token_names[token]); +// writeln(p_token_names[token]); // } // else // { // writeln("{other}"); // } - return i; + return i; + } + } + return INVALID_ID; +} + +/** + * Run the parser. + * + * @param context + * Lexer/parser context structure. + * + * @retval P_SUCCESS + * The parser successfully matched the input text. The parse result value + * can be accessed with p_result(). + * @retval P_UNEXPECTED_TOKEN + * An unexpected token was encountered that does not match any grammar rule. + * @reval P_DECODE_ERROR + * The decoder encountered invalid text encoding. + * @reval P_UNEXPECTED_INPUT + * Input text does not match any lexer pattern. + */ +public size_t p_parse(p_context_t * context) +{ + TokenInfo token_info; + Token token = INVALID_TOKEN_ID; + StateValue[] statevalues = new StateValue[](1); + size_t reduced_rule_set = INVALID_ID; + ParserValue reduced_parser_value; + for (;;) + { + if (token == INVALID_TOKEN_ID) + { + size_t lexer_result = p_lex(context, &token_info); + if (lexer_result != P_SUCCESS) + { + return lexer_result; + } + token = token_info.token; + } + size_t shift_state = INVALID_ID; + if (reduced_rule_set != INVALID_ID) + { + shift_state = check_shift(statevalues[$-1].state, reduced_rule_set); + } + if (shift_state == INVALID_ID) + { + shift_state = check_shift(statevalues[$-1].state, token); + if ((shift_state != INVALID_ID) && (token == TOKEN___EOF)) + { + /* Successful parse. */ + context.parse_result = statevalues[$-1].pvalue; + return P_SUCCESS; } } - return INVALID_ID; - } - - /** - * Execute user code associated with a parser rule. - * - * @param rule The ID of the rule. - * - * @return Parse value. - */ - private ParserValue user_code(uint rule, StateValue[] statevalues, uint n_states) - { - ParserValue _pvalue; - - switch (rule) + if (shift_state != INVALID_ID) { -<% @grammar.rules.each do |rule| %> -<% if rule.code %> - case <%= rule.id %>u: { -<%= expand_code(rule.code, true, rule, nil) %> - } break; -<% end %> -<% end %> - default: break; + /* We have something to shift. */ + statevalues ~= StateValue(shift_state); + if (reduced_rule_set == INVALID_ID) + { + /* We shifted a token, mark it consumed. */ + token = INVALID_TOKEN_ID; + statevalues[$-1].pvalue = token_info.pvalue; + } + else + { + /* We shifted a RuleSet. */ + statevalues[$-1].pvalue = reduced_parser_value; + ParserValue new_parse_result; + reduced_parser_value = new_parse_result; + reduced_rule_set = INVALID_ID; + } + continue; } - return _pvalue; + size_t reduce_index = check_reduce(statevalues[$-1].state, token); + if (reduce_index != INVALID_ID) + { + /* We have something to reduce. */ + reduced_parser_value = parser_user_code(parser_reduces[reduce_index].rule, statevalues, parser_reduces[reduce_index].n_states); + reduced_rule_set = parser_reduces[reduce_index].rule_set; + statevalues.length -= parser_reduces[reduce_index].n_states; + continue; + } + + /* Error, unexpected token. */ + write("Unexpected token "); + if (token != INVALID_TOKEN_ID) + { + writeln(p_token_names[token]); + } + else + { + writeln("{other}"); + } + return P_UNEXPECTED_TOKEN; } } + +/** + * Get the parse result value. + * + * @param context + * Lexer/parser context structure. + * + * @return Parse result value. + */ +public <%= start_rule_type[1] %> p_result(p_context_t * context) +{ + return context.parse_result.v_<%= start_rule_type[0] %>; +} diff --git a/lib/propane/generator.rb b/lib/propane/generator.rb index 6fc96a2..6903e3e 100644 --- a/lib/propane/generator.rb +++ b/lib/propane/generator.rb @@ -203,7 +203,7 @@ class Propane unless mode_id raise Error.new("Lexer mode '#{mode_name}' not found") end - "m_mode = #{mode_id}u" + "context.mode = #{mode_id}u" end end code diff --git a/spec/test_d_lexer.d b/spec/test_d_lexer.d index d7e71b1..b14deeb 100644 --- a/spec/test_d_lexer.d +++ b/spec/test_d_lexer.d @@ -12,31 +12,31 @@ unittest CodePoint code_point; ubyte code_point_length; - result = Decoder.decode_code_point("5", &code_point, &code_point_length); + result = p_decode_code_point("5", &code_point, &code_point_length); assert(result == P_SUCCESS); assert(code_point == '5'); assert(code_point_length == 1u); - result = Decoder.decode_code_point("", &code_point, &code_point_length); + result = p_decode_code_point("", &code_point, &code_point_length); assert(result == P_EOF); - result = Decoder.decode_code_point("\xC2\xA9", &code_point, &code_point_length); + result = p_decode_code_point("\xC2\xA9", &code_point, &code_point_length); assert(result == P_SUCCESS); assert(code_point == 0xA9u); assert(code_point_length == 2u); - result = Decoder.decode_code_point("\xf0\x9f\xa7\xa1", &code_point, &code_point_length); + result = p_decode_code_point("\xf0\x9f\xa7\xa1", &code_point, &code_point_length); assert(result == P_SUCCESS); assert(code_point == 0x1F9E1u); assert(code_point_length == 4u); - result = Decoder.decode_code_point("\xf0\x9f\x27", &code_point, &code_point_length); + result = p_decode_code_point("\xf0\x9f\x27", &code_point, &code_point_length); assert(result == P_DECODE_ERROR); - result = Decoder.decode_code_point("\xf0\x9f\xa7\xFF", &code_point, &code_point_length); + result = p_decode_code_point("\xf0\x9f\xa7\xFF", &code_point, &code_point_length); assert(result == P_DECODE_ERROR); - result = Decoder.decode_code_point("\xfe", &code_point, &code_point_length); + result = p_decode_code_point("\xfe", &code_point, &code_point_length); assert(result == P_DECODE_ERROR); } @@ -44,25 +44,26 @@ unittest { TokenInfo token_info; string input = "5 + 4 * \n677 + 567"; - Lexer lexer = new Lexer(input); - assert(lexer.lex_token(&token_info) == P_SUCCESS); + p_context_t context; + p_context_init(&context, input); + assert(p_lex(&context, &token_info) == P_SUCCESS); assert(token_info == TokenInfo(Position(0, 0), 1, TOKEN_int)); - assert(lexer.lex_token(&token_info) == P_SUCCESS); + assert(p_lex(&context, &token_info) == P_SUCCESS); assert(token_info == TokenInfo(Position(0, 2), 1, TOKEN_plus)); - assert(lexer.lex_token(&token_info) == P_SUCCESS); + assert(p_lex(&context, &token_info) == P_SUCCESS); assert(token_info == TokenInfo(Position(0, 4), 1, TOKEN_int)); - assert(lexer.lex_token(&token_info) == P_SUCCESS); + assert(p_lex(&context, &token_info) == P_SUCCESS); assert(token_info == TokenInfo(Position(0, 6), 1, TOKEN_times)); - assert(lexer.lex_token(&token_info) == P_SUCCESS); + assert(p_lex(&context, &token_info) == P_SUCCESS); assert(token_info == TokenInfo(Position(1, 0), 3, TOKEN_int)); - assert(lexer.lex_token(&token_info) == P_SUCCESS); + assert(p_lex(&context, &token_info) == P_SUCCESS); assert(token_info == TokenInfo(Position(1, 4), 1, TOKEN_plus)); - assert(lexer.lex_token(&token_info) == P_SUCCESS); + assert(p_lex(&context, &token_info) == P_SUCCESS); assert(token_info == TokenInfo(Position(1, 6), 3, TOKEN_int)); - assert(lexer.lex_token(&token_info) == P_SUCCESS); + assert(p_lex(&context, &token_info) == P_SUCCESS); assert(token_info == TokenInfo(Position(1, 9), 0, TOKEN___EOF)); - lexer = new Lexer(""); - assert(lexer.lex_token(&token_info) == P_SUCCESS); + p_context_init(&context, ""); + assert(p_lex(&context, &token_info) == P_SUCCESS); assert(token_info == TokenInfo(Position(0, 0), 0, TOKEN___EOF)); } diff --git a/spec/test_d_parser_identical_rules_lookahead.d b/spec/test_d_parser_identical_rules_lookahead.d index cd626aa..3ea08a0 100644 --- a/spec/test_d_parser_identical_rules_lookahead.d +++ b/spec/test_d_parser_identical_rules_lookahead.d @@ -9,10 +9,11 @@ int main() unittest { string input = "aba"; - auto parser = new Parser(input); - assert(parser.parse() == P_SUCCESS); + p_context_t context; + p_context_init(&context, input); + assert(p_parse(&context) == P_SUCCESS); input = "abb"; - parser = new Parser(input); - assert(parser.parse() == P_SUCCESS); + p_context_init(&context, input); + assert(p_parse(&context) == P_SUCCESS); } diff --git a/spec/test_d_parser_rule_from_multiple_states.d b/spec/test_d_parser_rule_from_multiple_states.d index af3e2bd..4671381 100644 --- a/spec/test_d_parser_rule_from_multiple_states.d +++ b/spec/test_d_parser_rule_from_multiple_states.d @@ -9,14 +9,15 @@ int main() unittest { string input = "a"; - auto parser = new Parser(input); - assert(parser.parse() == P_UNEXPECTED_TOKEN); + p_context_t context; + p_context_init(&context, input); + assert(p_parse(&context) == P_UNEXPECTED_TOKEN); input = "a b"; - parser = new Parser(input); - assert(parser.parse() == P_SUCCESS); + p_context_init(&context, input); + assert(p_parse(&context) == P_SUCCESS); input = "bb"; - parser = new Parser(input); - assert(parser.parse() == P_SUCCESS); + p_context_init(&context, input); + assert(p_parse(&context) == P_SUCCESS); } diff --git a/spec/test_lexer_match_text.d b/spec/test_lexer_match_text.d index 0f2053d..d4d0a95 100644 --- a/spec/test_lexer_match_text.d +++ b/spec/test_lexer_match_text.d @@ -9,7 +9,8 @@ int main() unittest { string input = `identifier_123`; - auto parser = new Parser(input); - assert(parser.parse() == P_SUCCESS); + p_context_t context; + p_context_init(&context, input); + assert(p_parse(&context) == P_SUCCESS); writeln("pass1"); } diff --git a/spec/test_lexer_modes.d b/spec/test_lexer_modes.d index 1e02165..e6617b9 100644 --- a/spec/test_lexer_modes.d +++ b/spec/test_lexer_modes.d @@ -9,12 +9,13 @@ int main() unittest { string input = `abc "a string" def`; - auto parser = new Parser(input); - assert(parser.parse() == P_SUCCESS); + p_context_t context; + p_context_init(&context, input); + assert(p_parse(&context) == P_SUCCESS); writeln("pass1"); input = `abc "abc def" def`; - parser = new Parser(input); - assert(parser.parse() == P_SUCCESS); + p_context_init(&context, input); + assert(p_parse(&context) == P_SUCCESS); writeln("pass2"); } diff --git a/spec/test_lexer_result_value.d b/spec/test_lexer_result_value.d index 541e8a0..08c7b9a 100644 --- a/spec/test_lexer_result_value.d +++ b/spec/test_lexer_result_value.d @@ -9,12 +9,13 @@ int main() unittest { string input = `x`; - auto parser = new Parser(input); - assert(parser.parse() == P_SUCCESS); - assert(parser.result == 1u); + p_context_t context; + p_context_init(&context, input); + assert(p_parse(&context) == P_SUCCESS); + assert(p_result(&context) == 1u); input = `fabulous`; - parser = new Parser(input); - assert(parser.parse() == P_SUCCESS); - assert(parser.result == 8u); + p_context_init(&context, input); + assert(p_parse(&context) == P_SUCCESS); + assert(p_result(&context) == 8u); } diff --git a/spec/test_lexer_unknown_character.d b/spec/test_lexer_unknown_character.d index 43d838c..2812c7c 100644 --- a/spec/test_lexer_unknown_character.d +++ b/spec/test_lexer_unknown_character.d @@ -9,11 +9,12 @@ int main() unittest { string input = `x`; - auto parser = new Parser(input); - assert(parser.parse() == P_UNEXPECTED_INPUT); + p_context_t context; + p_context_init(&context, input); + assert(p_parse(&context) == P_UNEXPECTED_INPUT); input = `123`; - parser = new Parser(input); - assert(parser.parse() == P_SUCCESS); - assert(parser.result == 123u); + p_context_init(&context, input); + assert(p_parse(&context) == P_SUCCESS); + assert(p_result(&context) == 123u); } diff --git a/spec/test_parser_rule_user_code.d b/spec/test_parser_rule_user_code.d index f45b3c0..8866eb0 100644 --- a/spec/test_parser_rule_user_code.d +++ b/spec/test_parser_rule_user_code.d @@ -9,6 +9,7 @@ int main() unittest { string input = "ab"; - auto parser = new Parser(input); - assert(parser.parse() == P_SUCCESS); + p_context_t context; + p_context_init(&context, input); + assert(p_parse(&context) == P_SUCCESS); } diff --git a/spec/test_parsing_json.d b/spec/test_parsing_json.d index f9f2909..27fb386 100644 --- a/spec/test_parsing_json.d +++ b/spec/test_parsing_json.d @@ -10,44 +10,45 @@ int main() unittest { string input = ``; - auto parser = new Parser(input); - assert(parser.parse() == P_SUCCESS); + p_context_t context; + p_context_init(&context, input); + assert(p_parse(&context) == P_SUCCESS); input = `{}`; - parser = new Parser(input); - assert(parser.parse() == P_SUCCESS); - assert(cast(JSONObject)parser.result); + p_context_init(&context, input); + assert(p_parse(&context) == P_SUCCESS); + assert(cast(JSONObject)p_result(&context)); input = `[]`; - parser = new Parser(input); - assert(parser.parse() == P_SUCCESS); - assert(cast(JSONArray)parser.result); + p_context_init(&context, input); + assert(p_parse(&context) == P_SUCCESS); + assert(cast(JSONArray)p_result(&context)); input = `-45.6`; - parser = new Parser(input); - assert(parser.parse() == P_SUCCESS); - assert(cast(JSONNumber)parser.result); - assert((cast(JSONNumber)parser.result).value == -45.6); + p_context_init(&context, input); + assert(p_parse(&context) == P_SUCCESS); + assert(cast(JSONNumber)p_result(&context)); + assert((cast(JSONNumber)p_result(&context)).value == -45.6); input = `2E-2`; - parser = new Parser(input); - assert(parser.parse() == P_SUCCESS); - assert(cast(JSONNumber)parser.result); - assert((cast(JSONNumber)parser.result).value == 0.02); + p_context_init(&context, input); + assert(p_parse(&context) == P_SUCCESS); + assert(cast(JSONNumber)p_result(&context)); + assert((cast(JSONNumber)p_result(&context)).value == 0.02); input = `{"hi":true}`; - parser = new Parser(input); - assert(parser.parse() == P_SUCCESS); - assert(cast(JSONObject)parser.result); - JSONObject o = cast(JSONObject)parser.result; + p_context_init(&context, input); + assert(p_parse(&context) == P_SUCCESS); + assert(cast(JSONObject)p_result(&context)); + JSONObject o = cast(JSONObject)p_result(&context); assert(o.value["hi"]); assert(cast(JSONTrue)o.value["hi"]); input = `{"ff": false, "nn": null}`; - parser = new Parser(input); - assert(parser.parse() == P_SUCCESS); - assert(cast(JSONObject)parser.result); - o = cast(JSONObject)parser.result; + p_context_init(&context, input); + assert(p_parse(&context) == P_SUCCESS); + assert(cast(JSONObject)p_result(&context)); + o = cast(JSONObject)p_result(&context); assert(o.value["ff"]); assert(cast(JSONFalse)o.value["ff"]); assert(o.value["nn"]); diff --git a/spec/test_parsing_lists.d b/spec/test_parsing_lists.d index d4fb46b..86fdd20 100644 --- a/spec/test_parsing_lists.d +++ b/spec/test_parsing_lists.d @@ -9,17 +9,18 @@ int main() unittest { string input = "a"; - auto parser = new Parser(input); - assert(parser.parse() == P_SUCCESS); - assert(parser.result == 1u); + p_context_t context; + p_context_init(&context, input); + assert(p_parse(&context) == P_SUCCESS); + assert(p_result(&context) == 1u); input = ""; - parser = new Parser(input); - assert(parser.parse() == P_SUCCESS); - assert(parser.result == 0u); + p_context_init(&context, input); + assert(p_parse(&context) == P_SUCCESS); + assert(p_result(&context) == 0u); input = "aaaaaaaaaaaaaaaa"; - parser = new Parser(input); - assert(parser.parse() == P_SUCCESS); - assert(parser.result == 16u); + p_context_init(&context, input); + assert(p_parse(&context) == P_SUCCESS); + assert(p_result(&context) == 16u); } diff --git a/spec/test_pattern.d b/spec/test_pattern.d index 80d1db1..df62ad4 100644 --- a/spec/test_pattern.d +++ b/spec/test_pattern.d @@ -9,12 +9,13 @@ int main() unittest { string input = "abcdef"; - auto parser = new Parser(input); - assert(parser.parse() == P_SUCCESS); + p_context_t context; + p_context_init(&context, input); + assert(p_parse(&context) == P_SUCCESS); writeln("pass1"); input = "defabcdef"; - parser = new Parser(input); - assert(parser.parse() == P_SUCCESS); + p_context_init(&context, input); + assert(p_parse(&context) == P_SUCCESS); writeln("pass2"); } diff --git a/spec/test_return_token_from_pattern.d b/spec/test_return_token_from_pattern.d index 255c34c..f14fcd8 100644 --- a/spec/test_return_token_from_pattern.d +++ b/spec/test_return_token_from_pattern.d @@ -9,6 +9,7 @@ int main() unittest { string input = "defghidef"; - auto parser = new Parser(input); - assert(parser.parse() == P_SUCCESS); + p_context_t context; + p_context_init(&context, input); + assert(p_parse(&context) == P_SUCCESS); } diff --git a/spec/test_user_code.d b/spec/test_user_code.d index 9fe1b74..18e182d 100644 --- a/spec/test_user_code.d +++ b/spec/test_user_code.d @@ -9,12 +9,13 @@ int main() unittest { string input = "abcdef"; - auto parser = new Parser(input); - assert(parser.parse() == P_SUCCESS); + p_context_t context; + p_context_init(&context, input); + assert(p_parse(&context) == P_SUCCESS); writeln("pass1"); input = "abcabcdef"; - parser = new Parser(input); - assert(parser.parse() == P_SUCCESS); + p_context_init(&context, input); + assert(p_parse(&context) == P_SUCCESS); writeln("pass2"); }