From 48b4033ef2f5929fb6b723a8618ac374dcfd7297 Mon Sep 17 00:00:00 2001 From: Josh Holtrop Date: Wed, 2 Aug 2023 21:36:43 -0400 Subject: [PATCH] wip --- assets/parser.c.erb | 839 ++++++++++++++++++++++++++++++++++++++++++++ assets/parser.h.erb | 108 ++++++ 2 files changed, 947 insertions(+) create mode 100644 assets/parser.c.erb create mode 100644 assets/parser.h.erb diff --git a/assets/parser.c.erb b/assets/parser.c.erb new file mode 100644 index 0000000..475e3ad --- /dev/null +++ b/assets/parser.c.erb @@ -0,0 +1,839 @@ +#include ".h" +#include + +/************************************************************************** + * User code blocks + *************************************************************************/ + +<% @grammar.code_blocks.each do |code| %> +<%= code %> +<% end %> + +/************************************************************************** + * Private types + *************************************************************************/ + +<% if @grammar.prefix.upcase != "P_" %> +/* Result codes. */ +#define P_SUCCESS 0u +#define P_DECODE_ERROR 1u +#define P_UNEXPECTED_INPUT 2u +#define P_UNEXPECTED_TOKEN 3u +#define P_DROP 4u +#define P_EOF 5u +<% end %> + +/* An invalid ID value. */ +#define INVALID_ID ((size_t)-1) + +/************************************************************************** + * State initialization + *************************************************************************/ + +/** + * Initialize lexer/parser context structure. + * + * @param[out] context + * Lexer/parser context structure. + * @param input + * Text input. + * @param input_length + * Text input length. + */ +void <%= @grammar.prefix %>context_init(<%= @grammar.prefix %>context_t * context, uint8_t const * input, size_t input_length) +{ + /* New default-initialized context structure. */ + <%= @grammar.prefix %>context_t newcontext = {0}; + + /* Lexer initialization. */ + newcontext.input = input; + newcontext.input_length = input_length; + newcontext.mode = <%= @lexer.mode_id("default") %>; + + /* Copy to the user's context structure. */ + *context = newcontext; +} + +/************************************************************************** + * Decoder + *************************************************************************/ + +/** + * Decode a UTF-8 code point. + * + * @param input + * Text input to decode. + * @param input_length + * Input text length. + * @param[out] out_code_point + * The decoded code point is stored here if the return value is P_SUCCESS. + * @param[out] out_code_point_length + * The number of bytes the code point used is stored here if the return value + * is P_SUCCESS. + * + * @retval P_SUCCESS on a successful code point decode + * @retval P_DECODE_ERROR when an encoding error is observed + * @retval P_EOF when the end of the text input is reached + */ +size_t <%= @grammar.prefix %>decode_code_point(uint8_t const * input, size_t input_length, + <%= @grammar.prefix %>code_point_t * out_code_point, uint8_t * out_code_point_length) +{ + if (input_length == 0u) + { + return P_EOF; + } + char c = input[0]; + <%= @grammar.prefix %>code_point_t code_point; + uint8_t code_point_length; + if ((c & 0x80u) == 0u) + { + code_point = c; + code_point_length = 1u; + } + else + { + uint8_t following_bytes; + if ((c & 0xE0u) == 0xC0u) + { + code_point = c & 0x1Fu; + following_bytes = 1u; + } + else if ((c & 0xF0u) == 0xE0u) + { + code_point = c & 0x0Fu; + following_bytes = 2u; + } + else if ((c & 0xF8u) == 0xF0u) + { + code_point = c & 0x07u; + following_bytes = 3u; + } + else if ((c & 0xFCu) == 0xF8u) + { + code_point = c & 0x03u; + following_bytes = 4u; + } + else if ((c & 0xFEu) == 0xFCu) + { + code_point = c & 0x01u; + following_bytes = 5u; + } + else + { + return P_DECODE_ERROR; + } + if (input_length <= following_bytes) + { + return P_DECODE_ERROR; + } + code_point_length = (uint8_t)(following_bytes + 1u); + for (size_t i = 0u; i < following_bytes; i++) + { + char b = input[i + 1u]; + if ((b & 0xC0u) != 0x80u) + { + return P_DECODE_ERROR; + } + code_point = (code_point << 6u) | (b & 0x3Fu); + } + } + *out_code_point = code_point; + *out_code_point_length = code_point_length; + return P_SUCCESS; +} + +/************************************************************************** + * Lexer + *************************************************************************/ + +/** Lexer state ID type. */ +typedef <%= get_type_for(@lexer.state_table.size) %> lexer_state_id_t; + +/** Invalid lexer state ID. */ +#define INVALID_LEXER_STATE_ID <%= @lexer.state_table.size %>u + +/** Lexer user code ID type. */ +<% user_code_id_count = (@grammar.patterns.map(&:code_id).compact.max || 0) + 1 %> +typedef <%= get_type_for(user_code_id_count) %> lexer_user_code_id_t; + +/** Invalid lexer user code ID. */ +#define INVALID_USER_CODE_ID <%= user_code_id_count %>u + +/** + * Lexer transition table entry. + * + * An incoming code point matching the range for a transition entry will cause + * the lexer to progress to the destination state. + */ +typedef struct +{ + /** First code point in the range for this transition. */ + <%= @grammar.prefix %>code_point_t first; + + /** Last code point in the range for this transition. */ + <%= @grammar.prefix %>code_point_t last; + + /** Destination lexer state ID for this transition. */ + lexer_state_id_t destination_state; +} lexer_transition_t; + +/** Lexer state table entry. */ +typedef struct +{ + /** Index to the transition table for this state. */ + <%= get_type_for(@lexer.transition_table.size - 1) %> transition_table_index; + + /** Number of transition table entries for this state. */ + <%= get_type_for(@lexer.state_table.map {|ste| ste[:n_transitions]}.max) %> n_transitions; + + /** Lexer token formed at this state. */ + <%= @grammar.prefix %>token_t token; + + /** Lexer user code ID to execute at this state. */ + lexer_user_code_id_t code_id; + + /** Whether this state matches a lexer pattern. */ + bool accepts; +} lexer_state_t; + +/** Lexer mode table entry. */ +typedef struct +{ + /** Offset in the state table to be used for this mode. */ + uint32_t state_table_offset; +} lexer_mode_t; + +/** + * Lexer match info structure. + * + * This structure holds output values from the lexer upon a successful pattern + * match. + */ +typedef struct +{ + /** Number of bytes of input text used to match. */ + size_t length; + + /** Input text position delta. */ + <%= @grammar.prefix %>position_t delta_position; + + /** Accepting lexer state from the match. */ + lexer_state_t const * accepting_state; +} lexer_match_info_t; + +/** Lexer transition table. */ +static lexer_transition_t lexer_transition_table[] = { +<% @lexer.transition_table.each do |transition_table_entry| %> + {<%= transition_table_entry[:first] %>u, <%= transition_table_entry[:last] %>u, <%= transition_table_entry[:destination] %>u}, +<% end %> +}; + +/** Lexer state table. */ +static lexer_state_t lexer_state_table[] = { +<% @lexer.state_table.each do |state_table_entry| %> + {<%= state_table_entry[:transition_table_index] %>u, <%= state_table_entry[:n_transitions] %>u, <%= state_table_entry[:token] || "INVALID_TOKEN_ID" %>, <%= state_table_entry[:code_id] || "INVALID_USER_CODE_ID" %>, <%= state_table_entry[:accepts] %>}, +<% end %> +}; + +/** Lexer mode table. */ +static lexer_mode_t lexer_mode_table[] = { +<% @lexer.mode_table.each do |mode_table_entry| %> + {<%= mode_table_entry[:state_table_offset] %>}, +<% end %> +}; + +/** + * Execute user code associated with a lexer pattern. + * + * @param context + * Lexer/parser context structure. + * @param code_id + * The ID of the user code block to execute. + * @param match + * Matched text for this pattern. + * @param match_length + * Matched text length. + * @param out_token_info + * Lexer token info in progress. + * + * @return Token to accept, or invalid token if the user code does + * not explicitly return a token. + */ +static <%= @grammar.prefix %>token_t lexer_user_code(<%= @grammar.prefix %>context_t * context, + lexer_user_code_id_t code_id, uint8_t const * match, + size_t match_length, <%= @grammar.prefix %>token_info_t * out_token_info) +{ + switch (code_id) + { +<% @grammar.patterns.each do |pattern| %> +<% if pattern.code_id %> + case <%= pattern.code_id %>u: { +<%= expand_code(pattern.code, false, nil, pattern) %> + } break; +<% end %> +<% end %> + default: break; + } + + return INVALID_TOKEN_ID; +} + +/** + * Check if there is a transition from the current lexer state to another + * based on the given input code point. + * + * @param current_state + * Current lexer state. + * @param code_point + * Input code point. + * + * @return Lexer state to transition to, or INVALID_LEXER_STATE_ID if none. + */ +static lexer_state_id_t check_lexer_transition(uint32_t current_state, uint32_t code_point) +{ + uint32_t transition_table_index = lexer_state_table[current_state].transition_table_index; + for (uint32_t i = 0u; i < lexer_state_table[current_state].n_transitions; i++) + { + if ((lexer_transition_table[transition_table_index + i].first <= code_point) && + (code_point <= lexer_transition_table[transition_table_index + i].last)) + { + return lexer_transition_table[transition_table_index + i].destination_state; + } + } + return INVALID_LEXER_STATE_ID; +} + +/** + * Find the longest lexer pattern match at the current position. + * + * @param context + * Lexer/parser context structure. + * @param[out] out_token_info + * The lexed token information is stored here if the return value is + * P_SUCCESS. + * + * @reval P_SUCCESS + * A token was successfully lexed. + * @reval P_DECODE_ERROR + * The decoder encountered invalid text encoding. + * @reval P_UNEXPECTED_INPUT + * Input text does not match any lexer pattern. + * @retval P_EOF + * The end of the text input was reached. + */ +static size_t find_longest_match(<%= @grammar.prefix %>context_t * context, + lexer_match_info_t * out_match_info, size_t * out_unexpected_input_length) +{ + lexer_match_info_t longest_match; + lexer_match_info_t attempt_match; + *out_match_info = longest_match; + uint32_t current_state = lexer_mode_table[context.mode].state_table_offset; + for (;;) + { + size_t const input_index = context.input_index + attempt_match.length; + uint8_t const * input = &context.input[input_index]; + size_t input_length = context.input_length - input_index; + <%= @grammar.prefix %>code_point_t code_point; + uint8_t code_point_length; + size_t result = <%= @grammar.prefix %>decode_code_point(input, input_length, &code_point, &code_point_length); + switch (result) + { + case P_SUCCESS: + lexer_state_id_t transition_state = check_lexer_transition(current_state, code_point); + if (transition_state != INVALID_LEXER_STATE_ID) + { + attempt_match.length += code_point_length; + if (code_point == '\n') + { + attempt_match.delta_position.row++; + attempt_match.delta_position.col = 0u; + } + else + { + attempt_match.delta_position.col++; + } + current_state = transition_state; + if (lexer_state_table[current_state].accepts) + { + attempt_match.accepting_state = &lexer_state_table[current_state]; + longest_match = attempt_match; + } + } + else if (longest_match.length > 0) + { + *out_match_info = longest_match; + return P_SUCCESS; + } + else + { + *out_unexpected_input_length = attempt_match.length + code_point_length; + return P_UNEXPECTED_INPUT; + } + break; + + case P_EOF: + /* We hit EOF. */ + if (longest_match.length > 0) + { + /* We have a match, so use it. */ + *out_match_info = longest_match; + return P_SUCCESS; + } + else if (attempt_match.length != 0) + { + /* There is a partial match - error! */ + *out_unexpected_input_length = attempt_match.length; + return P_UNEXPECTED_INPUT; + } + else + { + /* Valid EOF return. */ + return P_EOF; + } + break; + + case P_DECODE_ERROR: + /* If we see a decode error, we may be partially in the middle of + * matching a pattern, so return the attempted match info so that + * the input text position can be updated. */ + *out_match_info = attempt_match; + return result; + + default: + return result; + } + } +} + +/** + * Attempt to lex the next token in the input stream. + * + * @param context + * Lexer/parser context structure. + * @param[out] out_token_info + * The lexed token information is stored here if the return value is + * P_SUCCESS. + * + * @reval P_SUCCESS + * A token was successfully lexed. + * @reval P_DECODE_ERROR + * The decoder encountered invalid text encoding. + * @reval P_UNEXPECTED_INPUT + * Input text does not match any lexer pattern. + * @retval P_DROP + * A drop pattern was matched so the lexer should continue. + */ +static size_t attempt_lex_token(<%= @grammar.prefix %>context_t * context, <%= @grammar.prefix %>token_info_t * out_token_info) +{ + <%= @grammar.prefix %>token_info_t token_info; + token_info.position = context.text_position; + token_info.token = INVALID_TOKEN_ID; + *out_token_info = token_info; // TODO: remove + lexer_match_info_t match_info; + size_t unexpected_input_length; + size_t result = find_longest_match(context, &match_info, &unexpected_input_length); + switch (result) + { + case P_SUCCESS: + <%= @grammar.prefix %>token_t token_to_accept = match_info.accepting_state.token; + if (match_info.accepting_state.code_id != INVALID_USER_CODE_ID) + { + uint8_t const * match = &context.input[context.input_index]; + <%= @grammar.prefix %>token_t user_code_token = lexer_user_code(context, + match_info.accepting_state.code_id, match, match_info.length, &token_info); + /* An invalid token returned from lexer_user_code() means that the + * user code did not explicitly return a token. So only override + * the token to return if the user code does explicitly return a + * token. */ + if (user_code_token != INVALID_TOKEN_ID) + { + token_to_accept = user_code_token; + } + } + + /* Update the input position tracking. */ + context.input_index += match_info.length; + context.text_position.row += match_info.delta_position.row; + if (match_info.delta_position.row != 0u) + { + context.text_position.col = match_info.delta_position.col; + } + else + { + context.text_position.col += match_info.delta_position.col; + } + + if (token_to_accept == INVALID_TOKEN_ID) + { + return P_DROP; + } + token_info.token = token_to_accept; + token_info.length = match_info.length; + *out_token_info = token_info; + return P_SUCCESS; + + case P_EOF: + token_info.token = TOKEN___EOF; + *out_token_info = token_info; + return P_SUCCESS; + + case P_DECODE_ERROR: + /* Update the input position tracking. */ + context.input_index += match_info.length; + context.text_position.row += match_info.delta_position.row; + if (match_info.delta_position.row != 0u) + { + context.text_position.col = match_info.delta_position.col; + } + else + { + context.text_position.col += match_info.delta_position.col; + } + return result; + + default: + return result; + } +} + +/** + * Lex the next token in the input stream. + * + * @param context + * Lexer/parser context structure. + * @param[out] out_token_info + * The lexed token information is stored here if the return value is + * P_SUCCESS. + * + * @reval P_SUCCESS + * A token was successfully lexed. + * @reval P_DECODE_ERROR + * The decoder encountered invalid text encoding. + * @reval P_UNEXPECTED_INPUT + * Input text does not match any lexer pattern. + */ +size_t <%= @grammar.prefix %>lex(<%= @grammar.prefix %>context_t * context, <%= @grammar.prefix %>token_info_t * out_token_info) +{ + for (;;) + { + size_t result = attempt_lex_token(context, out_token_info); + if (result != P_DROP) + { + return result; + } + } +} + +/************************************************************************** + * Parser + *************************************************************************/ + +/** Reduce ID type. */ +alias reduce_id_t = <%= get_type_for(@parser.reduce_table.size) %>; + +/** + * A symbol ID can hold either a token ID or a rule set ID. + * + * Token IDs and rule set IDs share the same namespace, with rule set IDs + * beginning after token IDs end. + */ +alias symbol_id_t = <%= get_type_for(@parser.rule_sets.map(&:last).map(&:id).max) %>; + +/** Parser state ID type. */ +alias parser_state_id_t = <%= get_type_for(@parser.state_table.size) %>; + +/** Parser rule ID type. */ +alias rule_id_t = <%= get_type_for(@grammar.rules.size) %>; + +/** Parser shift ID type. */ +alias shift_id_t = <%= get_type_for(@parser.shift_table.size) %>; + +/** Shift table entry. */ +struct shift_t +{ + /** Token or rule set ID. */ + symbol_id_t symbol_id; + + /** Parser state to shift to. */ + parser_state_id_t state_id; +} + +/** Reduce table entry. */ +struct reduce_t +{ + /** Lookahead token. */ + <%= @grammar.prefix %>token_t token; + + /** + * Rule ID. + * + * This is used to execute the parser user code block associated with a + * grammar rule. + */ + rule_id_t rule; + + /** + * Rule set ID. + * + * This is used as the new top symbol ID of the parse stack after this + * reduce action. + */ + symbol_id_t rule_set; + + /** + * Number of states leading to this reduce action. + * + * This is the number of entries popped from the parse stack after this + * reduce action. + */ + parser_state_id_t n_states; +} + +/** Parser state entry. */ +struct parser_state_t +{ + /** First shift table entry for this parser state. */ + shift_id_t shift_table_index; + + /** Number of shift table entries for this parser state. */ + shift_id_t n_shift_entries; + + /** First reduce table entry for this parser state. */ + reduce_id_t reduce_table_index; + + /** Number of reduce table entries for this parser state. */ + reduce_id_t n_reduce_entries; +} + +/** + * Structure to hold a state ID and value pair. + * + * A stack of these structures makes up the parse stack. + */ +struct state_value_t +{ + /** Parser state ID. */ + size_t state_id; + + /** Parser value from this state. */ + <%= @grammar.prefix %>value_t pvalue; + + this(size_t state_id) + { + this.state_id = state_id; + } +} + +/** Parser shift table. */ +static immutable shift_t[] parser_shift_table = [ +<% @parser.shift_table.each do |shift| %> + shift_t(<%= shift[:symbol_id] %>u, <%= shift[:state_id] %>u), +<% end %> +]; + +/** Parser reduce table. */ +static immutable reduce_t[] parser_reduce_table = [ +<% @parser.reduce_table.each do |reduce| %> + reduce_t(<%= reduce[:token_id] %>u, <%= reduce[:rule_id] %>u, <%= reduce[:rule_set_id] %>u, <%= reduce[:n_states] %>u), +<% end %> +]; + +/** Parser state table. */ +static immutable parser_state_t[] parser_state_table = [ +<% @parser.state_table.each do |state| %> + parser_state_t(<%= state[:shift_index] %>u, <%= state[:n_shifts] %>u, <%= state[:reduce_index] %>u, <%= state[:n_reduces] %>u), +<% end %> +]; + +/** + * Execute user code associated with a parser rule. + * + * @param rule The ID of the rule. + * + * @return Parse value. + */ +static <%= @grammar.prefix %>value_t parser_user_code(uint32_t rule, state_value_t[] statevalues, uint32_t n_states) +{ + <%= @grammar.prefix %>value_t _pvalue; + + switch (rule) + { +<% @grammar.rules.each do |rule| %> +<% if rule.code %> + case <%= rule.id %>u: { +<%= expand_code(rule.code, true, rule, nil) %> + } break; +<% end %> +<% end %> + default: break; + } + + return _pvalue; +} + +/** + * Check if the parser should shift to a new state. + * + * @param state_id + * Parser state ID. + * @param symbol_id + * Incoming token/rule set ID. + * + * @return State to shift to, or INVALID_ID if none. + */ +static size_t check_shift(size_t state_id, size_t symbol_id) +{ + uint32_t start = parser_state_table[state_id].shift_table_index; + uint32_t end = start + parser_state_table[state_id].n_shift_entries; + for (uint32_t i = start; i < end; i++) + { + if (parser_shift_table[i].symbol_id == symbol_id) + { + return parser_shift_table[i].state_id; + } + } + return INVALID_ID; +} + +/** + * Check if the parser should reduce to a new state. + * + * @param state_id + * Parser state ID. + * @param token + * Incoming token. + * + * @return State to reduce to, or INVALID_ID if none. + */ +static size_t check_reduce(size_t state_id, <%= @grammar.prefix %>token_t token) +{ + size_t start = parser_state_table[state_id].reduce_table_index; + size_t end = start + parser_state_table[state_id].n_reduce_entries; + for (size_t i = start; i < end; i++) + { + if ((parser_reduce_table[i].token == token) || + (parser_reduce_table[i].token == INVALID_TOKEN_ID)) + { + return i; + } + } + return INVALID_ID; +} + +/** + * Run the parser. + * + * @param context + * Lexer/parser context structure. + * + * @retval P_SUCCESS + * The parser successfully matched the input text. The parse result value + * can be accessed with <%= @grammar.prefix %>result(). + * @retval P_UNEXPECTED_TOKEN + * An unexpected token was encountered that does not match any grammar rule. + * The value context.token holds the unexpected token. + * @reval P_DECODE_ERROR + * The decoder encountered invalid text encoding. + * @reval P_UNEXPECTED_INPUT + * Input text does not match any lexer pattern. + */ +size_t <%= @grammar.prefix %>parse(<%= @grammar.prefix %>context_t * context) +{ + <%= @grammar.prefix %>token_info_t token_info; + <%= @grammar.prefix %>token_t token = INVALID_TOKEN_ID; + state_value_t[] statevalues = new state_value_t[](1); + size_t reduced_rule_set = INVALID_ID; + <%= @grammar.prefix %>value_t reduced_parser_value; + for (;;) + { + if (token == INVALID_TOKEN_ID) + { + size_t lexer_result = <%= @grammar.prefix %>lex(context, &token_info); + if (lexer_result != P_SUCCESS) + { + return lexer_result; + } + token = token_info.token; + } + size_t shift_state = INVALID_ID; + if (reduced_rule_set != INVALID_ID) + { + shift_state = check_shift(statevalues[$-1].state_id, reduced_rule_set); + } + if (shift_state == INVALID_ID) + { + shift_state = check_shift(statevalues[$-1].state_id, token); + if ((shift_state != INVALID_ID) && (token == TOKEN___EOF)) + { + /* Successful parse. */ + context.parse_result = statevalues[$-1].pvalue; + return P_SUCCESS; + } + } + if (shift_state != INVALID_ID) + { + /* We have something to shift. */ + statevalues ~= state_value_t(shift_state); + if (reduced_rule_set == INVALID_ID) + { + /* We shifted a token, mark it consumed. */ + token = INVALID_TOKEN_ID; + statevalues[$-1].pvalue = token_info.pvalue; + } + else + { + /* We shifted a RuleSet. */ + statevalues[$-1].pvalue = reduced_parser_value; + <%= @grammar.prefix %>value_t new_parse_result; + reduced_parser_value = new_parse_result; + reduced_rule_set = INVALID_ID; + } + continue; + } + + size_t reduce_index = check_reduce(statevalues[$-1].state_id, token); + if (reduce_index != INVALID_ID) + { + /* We have something to reduce. */ + reduced_parser_value = parser_user_code(parser_reduce_table[reduce_index].rule, statevalues, parser_reduce_table[reduce_index].n_states); + reduced_rule_set = parser_reduce_table[reduce_index].rule_set; + statevalues.length -= parser_reduce_table[reduce_index].n_states; + continue; + } + + /* A token was successfully lexed, so the input text position was + * advanced. However, this is an unexpected token, so we want to reset + * the context text position to point to the token rather than the text + * after it, so that if the caller wants to report the error position, + * it will point to the correct position of the unexpected token. */ + context.text_position = token_info.position; + context.token = token; + return P_UNEXPECTED_TOKEN; + } +} + +/** + * Get the parse result value. + * + * @param context + * Lexer/parser context structure. + * + * @return Parse result value. + */ +<%= start_rule_type[1] %> <%= @grammar.prefix %>result(<%= @grammar.prefix %>context_t * context) +{ + return context.parse_result.v_<%= start_rule_type[0] %>; +} + +/** + * Get the current text input position. + * + * @param context + * Lexer/parser context structure. + * + * @return Current text position. + */ +<%= @grammar.prefix %>position_t <%= @grammar.prefix %>position(<%= @grammar.prefix %>context_t * context) +{ + return context.text_position; +} diff --git a/assets/parser.h.erb b/assets/parser.h.erb new file mode 100644 index 0000000..64dd587 --- /dev/null +++ b/assets/parser.h.erb @@ -0,0 +1,108 @@ +/** + * @file + * + * This file is generated by Propane. + */ + +#ifndef PROPANE_PARSER_H +#define PROPANE_PARSER_H + +#include + +/************************************************************************** + * Public types + *************************************************************************/ + +/* Result codes. */ +#define <%= @grammar.prefix.upcase %>SUCCESS 0u +#define <%= @grammar.prefix.upcase %>DECODE_ERROR 1u +#define <%= @grammar.prefix.upcase %>UNEXPECTED_INPUT 2u +#define <%= @grammar.prefix.upcase %>UNEXPECTED_TOKEN 3u +#define <%= @grammar.prefix.upcase %>DROP 4u +#define <%= @grammar.prefix.upcase %>EOF 5u + +/** Token type. */ +typedef <%= get_type_for(@grammar.invalid_token_id) %> <%= @grammar.prefix %>token_t; + +/** Token IDs. */ +<% @grammar.tokens.each_with_index do |token, index| %> +#define TOKEN_<%= token.code_name %> <%= index %>u +<% unless token.id == index %> +<% raise "Token ID (#{token.id}) does not match index (#{index}) for token #{token.name}!" %> +<% end %> +<% end %> +#define INVALID_TOKEN_ID <%= @grammar.invalid_token_id %>u + +/** Code point type. */ +typedef uint32_t <%= @grammar.prefix %>code_point_t; + +/** Parser values type(s). */ +typedef union +{ +<% @grammar.ptypes.each do |name, typestring| %> + <%= typestring %> v_<%= name %>; +<% end %> +} <%= @grammar.prefix %>value_t; + +/** + * A structure to keep track of parser position. + * + * This is useful for reporting errors, etc... + */ +typedef struct +{ + /** Input text row (0-based). */ + uint32_t row; + + /** Input text column (0-based). */ + uint32_t col; +} <%= @grammar.prefix %>position_t; + +/** Lexed token information. */ +typedef struct +{ + /** Text position where the token was found. */ + <%= @grammar.prefix %>position_t position; + + /** Number of input bytes used by the token. */ + size_t length; + + /** Token that was lexed. */ + <%= @grammar.prefix %>token_t token; + + /** Parser value associated with the token. */ + <%= @grammar.prefix %>value_t pvalue; +} <%= @grammar.prefix %>token_info_t; + +/** + * Lexer and parser context. + * + * The user must allocate an instance of this structure and pass it to any + * public API function. + */ +typedef struct +{ + /* Lexer context data. */ + + /** Input text. */ + uint8_t const * input; + + /** Input text index (byte offset). */ + size_t input_index; + + /** Input text position (row/column). */ + <%= @grammar.prefix %>position_t text_position; + + /** Current lexer mode. */ + size_t mode; + + /* Parser context data. */ + + /** Parse result value. */ + <%= @grammar.prefix %>value_t parse_result; + + /** Unexpected token received. */ + <%= @grammar.prefix %>token_t token; +} <%= @grammar.prefix %>context_t; + +#endif