From 3c8794058f94d28aa0ad6e356eceb8adf0309eb2 Mon Sep 17 00:00:00 2001 From: Josh Holtrop Date: Wed, 23 Aug 2023 16:00:04 -0400 Subject: [PATCH] Add C backend - close #4 --- assets/parser.c.erb | 936 ++++++++++++++++++ assets/parser.d.erb | 4 +- assets/parser.h.erb | 125 +++ doc/user_guide.md | 2 +- lib/propane/generator.rb | 63 +- lib/propane/grammar.rb | 11 +- spec/json_parser.c.propane | 183 ++++ ...n_parser.propane => json_parser.d.propane} | 0 spec/json_types.c | 64 ++ spec/json_types.h | 46 + spec/propane_spec.rb | 552 +++++++---- spec/test_basic_math_grammar.c | 29 + spec/test_error_positions.c | 39 + spec/test_error_positions.d | 1 - spec/test_lexer.c | 92 ++ spec/test_lexer_match_text.c | 15 + spec/test_lexer_modes.c | 20 + spec/test_lexer_result_value.c | 19 + spec/test_lexer_unknown_character.c | 18 + spec/test_multiple_parsers.c | 19 + spec/test_parser_identical_rules_lookahead.c | 17 + spec/test_parser_rule_from_multiple_states.c | 24 + spec/test_parser_rule_user_code.c | 13 + spec/test_parsing_json.c | 56 ++ spec/test_parsing_lists.c | 24 + spec/test_pattern.c | 20 + spec/test_return_token_from_pattern.c | 13 + spec/test_user_code.c | 20 + spec/testutils.c | 38 + spec/testutils.h | 19 + 30 files changed, 2292 insertions(+), 190 deletions(-) create mode 100644 assets/parser.c.erb create mode 100644 assets/parser.h.erb create mode 100644 spec/json_parser.c.propane rename spec/{json_parser.propane => json_parser.d.propane} (100%) create mode 100644 spec/json_types.c create mode 100644 spec/json_types.h create mode 100644 spec/test_basic_math_grammar.c create mode 100644 spec/test_error_positions.c create mode 100644 spec/test_lexer.c create mode 100644 spec/test_lexer_match_text.c create mode 100644 spec/test_lexer_modes.c create mode 100644 spec/test_lexer_result_value.c create mode 100644 spec/test_lexer_unknown_character.c create mode 100644 spec/test_multiple_parsers.c create mode 100644 spec/test_parser_identical_rules_lookahead.c create mode 100644 spec/test_parser_rule_from_multiple_states.c create mode 100644 spec/test_parser_rule_user_code.c create mode 100644 spec/test_parsing_json.c create mode 100644 spec/test_parsing_lists.c create mode 100644 spec/test_pattern.c create mode 100644 spec/test_return_token_from_pattern.c create mode 100644 spec/test_user_code.c create mode 100644 spec/testutils.c create mode 100644 spec/testutils.h diff --git a/assets/parser.c.erb b/assets/parser.c.erb new file mode 100644 index 0000000..e7da05c --- /dev/null +++ b/assets/parser.c.erb @@ -0,0 +1,936 @@ +#include "<%= File.basename(output_file).sub(%r{\.[a-z]+$}, "") %>.h" +#include +#include +#include + +/************************************************************************** + * User code blocks + *************************************************************************/ + +<%= @grammar.code_blocks.fetch("", "") %> + +/************************************************************************** + * Private types + *************************************************************************/ + +<% if @grammar.prefix.upcase != "P_" %> +/* Result codes. */ +#define P_SUCCESS 0u +#define P_DECODE_ERROR 1u +#define P_UNEXPECTED_INPUT 2u +#define P_UNEXPECTED_TOKEN 3u +#define P_DROP 4u +#define P_EOF 5u +<% end %> + +/* An invalid ID value. */ +#define INVALID_ID ((size_t)-1) + +/************************************************************************** + * State initialization + *************************************************************************/ + +/** + * Initialize lexer/parser context structure. + * + * @param[out] context + * Lexer/parser context structure. + * @param input + * Text input. + * @param input_length + * Text input length. + */ +void <%= @grammar.prefix %>context_init(<%= @grammar.prefix %>context_t * context, uint8_t const * input, size_t input_length) +{ + /* New default-initialized context structure. */ + <%= @grammar.prefix %>context_t newcontext = {0}; + + /* Lexer initialization. */ + newcontext.input = input; + newcontext.input_length = input_length; + newcontext.mode = <%= @lexer.mode_id("default") %>; + + /* Copy to the user's context structure. */ + *context = newcontext; +} + +/************************************************************************** + * Decoder + *************************************************************************/ + +/** + * Decode a UTF-8 code point. + * + * @param input + * Text input to decode. + * @param input_length + * Input text length. + * @param[out] out_code_point + * The decoded code point is stored here if the return value is P_SUCCESS. + * @param[out] out_code_point_length + * The number of bytes the code point used is stored here if the return value + * is P_SUCCESS. + * + * @retval P_SUCCESS on a successful code point decode + * @retval P_DECODE_ERROR when an encoding error is observed + * @retval P_EOF when the end of the text input is reached + */ +size_t <%= @grammar.prefix %>decode_code_point(uint8_t const * input, size_t input_length, + <%= @grammar.prefix %>code_point_t * out_code_point, uint8_t * out_code_point_length) +{ + if (input_length == 0u) + { + return P_EOF; + } + char c = input[0]; + <%= @grammar.prefix %>code_point_t code_point; + uint8_t code_point_length; + if ((c & 0x80u) == 0u) + { + code_point = c; + code_point_length = 1u; + } + else + { + uint8_t following_bytes; + if ((c & 0xE0u) == 0xC0u) + { + code_point = c & 0x1Fu; + following_bytes = 1u; + } + else if ((c & 0xF0u) == 0xE0u) + { + code_point = c & 0x0Fu; + following_bytes = 2u; + } + else if ((c & 0xF8u) == 0xF0u) + { + code_point = c & 0x07u; + following_bytes = 3u; + } + else if ((c & 0xFCu) == 0xF8u) + { + code_point = c & 0x03u; + following_bytes = 4u; + } + else if ((c & 0xFEu) == 0xFCu) + { + code_point = c & 0x01u; + following_bytes = 5u; + } + else + { + return P_DECODE_ERROR; + } + if (input_length <= following_bytes) + { + return P_DECODE_ERROR; + } + code_point_length = (uint8_t)(following_bytes + 1u); + for (size_t i = 0u; i < following_bytes; i++) + { + char b = input[i + 1u]; + if ((b & 0xC0u) != 0x80u) + { + return P_DECODE_ERROR; + } + code_point = (code_point << 6u) | (b & 0x3Fu); + } + } + *out_code_point = code_point; + *out_code_point_length = code_point_length; + return P_SUCCESS; +} + +/************************************************************************** + * Lexer + *************************************************************************/ + +/** Lexer state ID type. */ +typedef <%= get_type_for(@lexer.state_table.size) %> lexer_state_id_t; + +/** Invalid lexer state ID. */ +#define INVALID_LEXER_STATE_ID <%= @lexer.state_table.size %>u + +/** Lexer user code ID type. */ +<% user_code_id_count = (@grammar.patterns.map(&:code_id).compact.max || 0) + 1 %> +typedef <%= get_type_for(user_code_id_count) %> lexer_user_code_id_t; + +/** Invalid lexer user code ID. */ +#define INVALID_USER_CODE_ID <%= user_code_id_count %>u + +/** + * Lexer transition table entry. + * + * An incoming code point matching the range for a transition entry will cause + * the lexer to progress to the destination state. + */ +typedef struct +{ + /** First code point in the range for this transition. */ + <%= @grammar.prefix %>code_point_t first; + + /** Last code point in the range for this transition. */ + <%= @grammar.prefix %>code_point_t last; + + /** Destination lexer state ID for this transition. */ + lexer_state_id_t destination_state; +} lexer_transition_t; + +/** Lexer state table entry. */ +typedef struct +{ + /** Index to the transition table for this state. */ + <%= get_type_for(@lexer.transition_table.size - 1) %> transition_table_index; + + /** Number of transition table entries for this state. */ + <%= get_type_for(@lexer.state_table.map {|ste| ste[:n_transitions]}.max) %> n_transitions; + + /** Lexer token formed at this state. */ + <%= @grammar.prefix %>token_t token; + + /** Lexer user code ID to execute at this state. */ + lexer_user_code_id_t code_id; + + /** Whether this state matches a lexer pattern. */ + bool accepts; +} lexer_state_t; + +/** Lexer mode table entry. */ +typedef struct +{ + /** Offset in the state table to be used for this mode. */ + uint32_t state_table_offset; +} lexer_mode_t; + +/** + * Lexer match info structure. + * + * This structure holds output values from the lexer upon a successful pattern + * match. + */ +typedef struct +{ + /** Number of bytes of input text used to match. */ + size_t length; + + /** Input text position delta. */ + <%= @grammar.prefix %>position_t delta_position; + + /** Accepting lexer state from the match. */ + lexer_state_t const * accepting_state; +} lexer_match_info_t; + +/** Lexer transition table. */ +static lexer_transition_t lexer_transition_table[] = { +<% @lexer.transition_table.each do |transition_table_entry| %> + {<%= transition_table_entry[:first] %>u, <%= transition_table_entry[:last] %>u, <%= transition_table_entry[:destination] %>u}, +<% end %> +}; + +/** Lexer state table. */ +static lexer_state_t lexer_state_table[] = { +<% @lexer.state_table.each do |state_table_entry| %> + {<%= state_table_entry[:transition_table_index] %>u, <%= state_table_entry[:n_transitions] %>u, <%= state_table_entry[:token] || "INVALID_TOKEN_ID" %>, <%= state_table_entry[:code_id] || "INVALID_USER_CODE_ID" %>, <%= state_table_entry[:accepts] %>}, +<% end %> +}; + +/** Lexer mode table. */ +static lexer_mode_t lexer_mode_table[] = { +<% @lexer.mode_table.each do |mode_table_entry| %> + {<%= mode_table_entry[:state_table_offset] %>}, +<% end %> +}; + +/** + * Execute user code associated with a lexer pattern. + * + * @param context + * Lexer/parser context structure. + * @param code_id + * The ID of the user code block to execute. + * @param match + * Matched text for this pattern. + * @param match_length + * Matched text length. + * @param out_token_info + * Lexer token info in progress. + * + * @return Token to accept, or invalid token if the user code does + * not explicitly return a token. + */ +static <%= @grammar.prefix %>token_t lexer_user_code(<%= @grammar.prefix %>context_t * context, + lexer_user_code_id_t code_id, uint8_t const * match, + size_t match_length, <%= @grammar.prefix %>token_info_t * out_token_info) +{ + switch (code_id) + { +<% @grammar.patterns.each do |pattern| %> +<% if pattern.code_id %> + case <%= pattern.code_id %>u: { +<%= expand_code(pattern.code, false, nil, pattern) %> + } break; +<% end %> +<% end %> + default: break; + } + + return INVALID_TOKEN_ID; +} + +/** + * Check if there is a transition from the current lexer state to another + * based on the given input code point. + * + * @param current_state + * Current lexer state. + * @param code_point + * Input code point. + * + * @return Lexer state to transition to, or INVALID_LEXER_STATE_ID if none. + */ +static lexer_state_id_t check_lexer_transition(uint32_t current_state, uint32_t code_point) +{ + uint32_t transition_table_index = lexer_state_table[current_state].transition_table_index; + for (uint32_t i = 0u; i < lexer_state_table[current_state].n_transitions; i++) + { + if ((lexer_transition_table[transition_table_index + i].first <= code_point) && + (code_point <= lexer_transition_table[transition_table_index + i].last)) + { + return lexer_transition_table[transition_table_index + i].destination_state; + } + } + return INVALID_LEXER_STATE_ID; +} + +/** + * Find the longest lexer pattern match at the current position. + * + * @param context + * Lexer/parser context structure. + * @param[out] out_token_info + * The lexed token information is stored here if the return value is + * P_SUCCESS. + * + * @reval P_SUCCESS + * A token was successfully lexed. + * @reval P_DECODE_ERROR + * The decoder encountered invalid text encoding. + * @reval P_UNEXPECTED_INPUT + * Input text does not match any lexer pattern. + * @retval P_EOF + * The end of the text input was reached. + */ +static size_t find_longest_match(<%= @grammar.prefix %>context_t * context, + lexer_match_info_t * out_match_info, size_t * out_unexpected_input_length) +{ + lexer_match_info_t longest_match = {0}; + lexer_match_info_t attempt_match = {0}; + *out_match_info = longest_match; + uint32_t current_state = lexer_mode_table[context->mode].state_table_offset; + for (;;) + { + size_t const input_index = context->input_index + attempt_match.length; + uint8_t const * input = &context->input[input_index]; + size_t input_length = context->input_length - input_index; + <%= @grammar.prefix %>code_point_t code_point; + uint8_t code_point_length; + size_t result = <%= @grammar.prefix %>decode_code_point(input, input_length, &code_point, &code_point_length); + switch (result) + { + case P_SUCCESS: + lexer_state_id_t transition_state = check_lexer_transition(current_state, code_point); + if (transition_state != INVALID_LEXER_STATE_ID) + { + attempt_match.length += code_point_length; + if (code_point == '\n') + { + attempt_match.delta_position.row++; + attempt_match.delta_position.col = 0u; + } + else + { + attempt_match.delta_position.col++; + } + current_state = transition_state; + if (lexer_state_table[current_state].accepts) + { + attempt_match.accepting_state = &lexer_state_table[current_state]; + longest_match = attempt_match; + } + } + else if (longest_match.length > 0) + { + *out_match_info = longest_match; + return P_SUCCESS; + } + else + { + *out_unexpected_input_length = attempt_match.length + code_point_length; + return P_UNEXPECTED_INPUT; + } + break; + + case P_EOF: + /* We hit EOF. */ + if (longest_match.length > 0) + { + /* We have a match, so use it. */ + *out_match_info = longest_match; + return P_SUCCESS; + } + else if (attempt_match.length != 0) + { + /* There is a partial match - error! */ + *out_unexpected_input_length = attempt_match.length; + return P_UNEXPECTED_INPUT; + } + else + { + /* Valid EOF return. */ + return P_EOF; + } + break; + + case P_DECODE_ERROR: + /* If we see a decode error, we may be partially in the middle of + * matching a pattern, so return the attempted match info so that + * the input text position can be updated. */ + *out_match_info = attempt_match; + return result; + + default: + return result; + } + } +} + +/** + * Attempt to lex the next token in the input stream. + * + * @param context + * Lexer/parser context structure. + * @param[out] out_token_info + * The lexed token information is stored here if the return value is + * P_SUCCESS. + * + * @reval P_SUCCESS + * A token was successfully lexed. + * @reval P_DECODE_ERROR + * The decoder encountered invalid text encoding. + * @reval P_UNEXPECTED_INPUT + * Input text does not match any lexer pattern. + * @retval P_DROP + * A drop pattern was matched so the lexer should continue. + */ +static size_t attempt_lex_token(<%= @grammar.prefix %>context_t * context, <%= @grammar.prefix %>token_info_t * out_token_info) +{ + <%= @grammar.prefix %>token_info_t token_info = {0}; + token_info.position = context->text_position; + token_info.token = INVALID_TOKEN_ID; + *out_token_info = token_info; // TODO: remove + lexer_match_info_t match_info; + size_t unexpected_input_length; + size_t result = find_longest_match(context, &match_info, &unexpected_input_length); + switch (result) + { + case P_SUCCESS: + <%= @grammar.prefix %>token_t token_to_accept = match_info.accepting_state->token; + if (match_info.accepting_state->code_id != INVALID_USER_CODE_ID) + { + uint8_t const * match = &context->input[context->input_index]; + <%= @grammar.prefix %>token_t user_code_token = lexer_user_code(context, + match_info.accepting_state->code_id, match, match_info.length, &token_info); + /* An invalid token returned from lexer_user_code() means that the + * user code did not explicitly return a token. So only override + * the token to return if the user code does explicitly return a + * token. */ + if (user_code_token != INVALID_TOKEN_ID) + { + token_to_accept = user_code_token; + } + } + + /* Update the input position tracking. */ + context->input_index += match_info.length; + context->text_position.row += match_info.delta_position.row; + if (match_info.delta_position.row != 0u) + { + context->text_position.col = match_info.delta_position.col; + } + else + { + context->text_position.col += match_info.delta_position.col; + } + + if (token_to_accept == INVALID_TOKEN_ID) + { + return P_DROP; + } + token_info.token = token_to_accept; + token_info.length = match_info.length; + *out_token_info = token_info; + return P_SUCCESS; + + case P_EOF: + token_info.token = TOKEN___EOF; + *out_token_info = token_info; + return P_SUCCESS; + + case P_DECODE_ERROR: + /* Update the input position tracking. */ + context->input_index += match_info.length; + context->text_position.row += match_info.delta_position.row; + if (match_info.delta_position.row != 0u) + { + context->text_position.col = match_info.delta_position.col; + } + else + { + context->text_position.col += match_info.delta_position.col; + } + return result; + + default: + return result; + } +} + +/** + * Lex the next token in the input stream. + * + * @param context + * Lexer/parser context structure. + * @param[out] out_token_info + * The lexed token information is stored here if the return value is + * P_SUCCESS. + * + * @reval P_SUCCESS + * A token was successfully lexed. + * @reval P_DECODE_ERROR + * The decoder encountered invalid text encoding. + * @reval P_UNEXPECTED_INPUT + * Input text does not match any lexer pattern. + */ +size_t <%= @grammar.prefix %>lex(<%= @grammar.prefix %>context_t * context, <%= @grammar.prefix %>token_info_t * out_token_info) +{ + for (;;) + { + size_t result = attempt_lex_token(context, out_token_info); + if (result != P_DROP) + { + return result; + } + } +} + +/************************************************************************** + * Parser + *************************************************************************/ + +/** Reduce ID type. */ +typedef <%= get_type_for(@parser.reduce_table.size) %> reduce_id_t; + +/** + * A symbol ID can hold either a token ID or a rule set ID. + * + * Token IDs and rule set IDs share the same namespace, with rule set IDs + * beginning after token IDs end. + */ +typedef <%= get_type_for(@parser.rule_sets.map(&:last).map(&:id).max) %> symbol_id_t; + +/** Parser state ID type. */ +typedef <%= get_type_for(@parser.state_table.size) %> parser_state_id_t; + +/** Parser rule ID type. */ +typedef <%= get_type_for(@grammar.rules.size) %> rule_id_t; + +/** Parser shift ID type. */ +typedef <%= get_type_for(@parser.shift_table.size) %> shift_id_t; + +/** Shift table entry. */ +typedef struct +{ + /** Token or rule set ID. */ + symbol_id_t symbol_id; + + /** Parser state to shift to. */ + parser_state_id_t state_id; +} shift_t; + +/** Reduce table entry. */ +typedef struct +{ + /** Lookahead token. */ + <%= @grammar.prefix %>token_t token; + + /** + * Rule ID. + * + * This is used to execute the parser user code block associated with a + * grammar rule. + */ + rule_id_t rule; + + /** + * Rule set ID. + * + * This is used as the new top symbol ID of the parse stack after this + * reduce action. + */ + symbol_id_t rule_set; + + /** + * Number of states leading to this reduce action. + * + * This is the number of entries popped from the parse stack after this + * reduce action. + */ + parser_state_id_t n_states; +} reduce_t; + +/** Parser state entry. */ +typedef struct +{ + /** First shift table entry for this parser state. */ + shift_id_t shift_table_index; + + /** Number of shift table entries for this parser state. */ + shift_id_t n_shift_entries; + + /** First reduce table entry for this parser state. */ + reduce_id_t reduce_table_index; + + /** Number of reduce table entries for this parser state. */ + reduce_id_t n_reduce_entries; +} parser_state_t; + +/** + * Structure to hold a state ID and value pair. + * + * A stack of these structures makes up the parse stack. + */ +typedef struct +{ + /** Parser state ID. */ + size_t state_id; + + /** Parser value from this state. */ + <%= @grammar.prefix %>value_t pvalue; +} state_value_t; + +/** Parser shift table. */ +static const shift_t parser_shift_table[] = { +<% @parser.shift_table.each do |shift| %> + {<%= shift[:symbol_id] %>u, <%= shift[:state_id] %>u}, +<% end %> +}; + +/** Parser reduce table. */ +static const reduce_t parser_reduce_table[] = { +<% @parser.reduce_table.each do |reduce| %> + {<%= reduce[:token_id] %>u, <%= reduce[:rule_id] %>u, <%= reduce[:rule_set_id] %>u, <%= reduce[:n_states] %>u}, +<% end %> +}; + +/** Parser state table. */ +static const parser_state_t parser_state_table[] = { +<% @parser.state_table.each do |state| %> + {<%= state[:shift_index] %>u, <%= state[:n_shifts] %>u, <%= state[:reduce_index] %>u, <%= state[:n_reduces] %>u}, +<% end %> +}; + +/* state_values stack functionality */ + +/** state_values stack type. */ +typedef struct +{ + size_t length; + size_t capacity; + state_value_t * entries; +} state_values_stack_t; + +/** + * Initialize state_values stack structure. + * + * @param stack + * state_values stack structure. + */ +static void state_values_stack_init(state_values_stack_t * stack) +{ + const size_t initial_capacity = 10u; + stack->length = 0u; + stack->capacity = initial_capacity; + stack->entries = (state_value_t *)malloc(initial_capacity * sizeof(state_value_t)); +} + +/** + * Index a state_values stack. + * + * @param stack + * state_values stack structure. + * @param index + * Index to the stack. + * + * @return Pointer to the state value structure at the given index. + */ +static state_value_t * state_values_stack_index(state_values_stack_t * stack, int index) +{ + if (index >= 0) + { + return &stack->entries[index]; + } + else + { + return &stack->entries[stack->length - (size_t)(unsigned int)(-index)]; + } +} + +/** + * Push a new state_value to the state_values stack. + * + * @param stack + * state_values stack structure. + */ +static void state_values_stack_push(state_values_stack_t * stack) +{ + size_t const current_capacity = stack->capacity; + size_t const current_length = stack->length; + if (current_length >= current_capacity) + { + size_t const new_capacity = current_capacity * 2u; + state_value_t * new_entries = malloc(new_capacity * sizeof(state_value_t)); + memcpy(new_entries, stack->entries, current_length * sizeof(state_value_t)); + free(stack->entries); + stack->capacity = new_capacity; + stack->entries = new_entries; + } + memset(&stack->entries[current_length], 0, sizeof(state_value_t)); + stack->length = current_length + 1u; +} + +/** + * Pop entries from a state_values stack. + * + * @param stack + * state_values stack structure. + * @param n + * Number of states to pop. + */ +static void state_values_stack_pop(state_values_stack_t * stack, size_t n) +{ + stack->length -= n; +} + +/** + * Free memory for a state_values stack structure. + * + * @param stack + * state_values stack structure. + */ +static void state_values_stack_free(state_values_stack_t * stack) +{ + free(stack->entries); +} + +/** + * Execute user code associated with a parser rule. + * + * @param rule The ID of the rule. + * + * @return Parse value. + */ +static <%= @grammar.prefix %>value_t parser_user_code(uint32_t rule, state_values_stack_t * statevalues, uint32_t n_states) +{ + <%= @grammar.prefix %>value_t _pvalue = {0}; + + switch (rule) + { +<% @grammar.rules.each do |rule| %> +<% if rule.code %> + case <%= rule.id %>u: { +<%= expand_code(rule.code, true, rule, nil) %> + } break; +<% end %> +<% end %> + default: break; + } + + return _pvalue; +} + +/** + * Check if the parser should shift to a new state. + * + * @param state_id + * Parser state ID. + * @param symbol_id + * Incoming token/rule set ID. + * + * @return State to shift to, or INVALID_ID if none. + */ +static size_t check_shift(size_t state_id, size_t symbol_id) +{ + uint32_t start = parser_state_table[state_id].shift_table_index; + uint32_t end = start + parser_state_table[state_id].n_shift_entries; + for (uint32_t i = start; i < end; i++) + { + if (parser_shift_table[i].symbol_id == symbol_id) + { + return parser_shift_table[i].state_id; + } + } + return INVALID_ID; +} + +/** + * Check if the parser should reduce to a new state. + * + * @param state_id + * Parser state ID. + * @param token + * Incoming token. + * + * @return State to reduce to, or INVALID_ID if none. + */ +static size_t check_reduce(size_t state_id, <%= @grammar.prefix %>token_t token) +{ + size_t start = parser_state_table[state_id].reduce_table_index; + size_t end = start + parser_state_table[state_id].n_reduce_entries; + for (size_t i = start; i < end; i++) + { + if ((parser_reduce_table[i].token == token) || + (parser_reduce_table[i].token == INVALID_TOKEN_ID)) + { + return i; + } + } + return INVALID_ID; +} + +/** + * Run the parser. + * + * @param context + * Lexer/parser context structure. + * + * @retval P_SUCCESS + * The parser successfully matched the input text. The parse result value + * can be accessed with <%= @grammar.prefix %>result(). + * @retval P_UNEXPECTED_TOKEN + * An unexpected token was encountered that does not match any grammar rule. + * The value context->token holds the unexpected token. + * @reval P_DECODE_ERROR + * The decoder encountered invalid text encoding. + * @reval P_UNEXPECTED_INPUT + * Input text does not match any lexer pattern. + */ +size_t <%= @grammar.prefix %>parse(<%= @grammar.prefix %>context_t * context) +{ + <%= @grammar.prefix %>token_info_t token_info; + <%= @grammar.prefix %>token_t token = INVALID_TOKEN_ID; + state_values_stack_t statevalues; + size_t reduced_rule_set = INVALID_ID; + <%= @grammar.prefix %>value_t reduced_parser_value; + state_values_stack_init(&statevalues); + state_values_stack_push(&statevalues); + size_t result; + for (;;) + { + if (token == INVALID_TOKEN_ID) + { + size_t lexer_result = <%= @grammar.prefix %>lex(context, &token_info); + if (lexer_result != P_SUCCESS) + { + result = lexer_result; + break; + } + token = token_info.token; + } + size_t shift_state = INVALID_ID; + if (reduced_rule_set != INVALID_ID) + { + shift_state = check_shift(state_values_stack_index(&statevalues, -1)->state_id, reduced_rule_set); + } + if (shift_state == INVALID_ID) + { + shift_state = check_shift(state_values_stack_index(&statevalues, -1)->state_id, token); + if ((shift_state != INVALID_ID) && (token == TOKEN___EOF)) + { + /* Successful parse. */ + context->parse_result = state_values_stack_index(&statevalues, -1)->pvalue; + result = P_SUCCESS; + break; + } + } + if (shift_state != INVALID_ID) + { + /* We have something to shift. */ + state_values_stack_push(&statevalues); + state_values_stack_index(&statevalues, -1)->state_id = shift_state; + if (reduced_rule_set == INVALID_ID) + { + /* We shifted a token, mark it consumed. */ + token = INVALID_TOKEN_ID; + state_values_stack_index(&statevalues, -1)->pvalue = token_info.pvalue; + } + else + { + /* We shifted a RuleSet. */ + state_values_stack_index(&statevalues, -1)->pvalue = reduced_parser_value; + <%= @grammar.prefix %>value_t new_parse_result = {0}; + reduced_parser_value = new_parse_result; + reduced_rule_set = INVALID_ID; + } + continue; + } + + size_t reduce_index = check_reduce(state_values_stack_index(&statevalues, -1)->state_id, token); + if (reduce_index != INVALID_ID) + { + /* We have something to reduce. */ + reduced_parser_value = parser_user_code(parser_reduce_table[reduce_index].rule, &statevalues, parser_reduce_table[reduce_index].n_states); + reduced_rule_set = parser_reduce_table[reduce_index].rule_set; + state_values_stack_pop(&statevalues, parser_reduce_table[reduce_index].n_states); + continue; + } + + /* A token was successfully lexed, so the input text position was + * advanced. However, this is an unexpected token, so we want to reset + * the context text position to point to the token rather than the text + * after it, so that if the caller wants to report the error position, + * it will point to the correct position of the unexpected token. */ + context->text_position = token_info.position; + context->token = token; + result = P_UNEXPECTED_TOKEN; + break; + } + state_values_stack_free(&statevalues); + return result; +} + +/** + * Get the parse result value. + * + * @param context + * Lexer/parser context structure. + * + * @return Parse result value. + */ +<%= start_rule_type[1] %> <%= @grammar.prefix %>result(<%= @grammar.prefix %>context_t * context) +{ + return context->parse_result.v_<%= start_rule_type[0] %>; +} + +/** + * Get the current text input position. + * + * @param context + * Lexer/parser context structure. + * + * @return Current text position. + */ +<%= @grammar.prefix %>position_t <%= @grammar.prefix %>position(<%= @grammar.prefix %>context_t * context) +{ + return context->text_position; +} diff --git a/assets/parser.d.erb b/assets/parser.d.erb index 5d505fb..9c9ec72 100644 --- a/assets/parser.d.erb +++ b/assets/parser.d.erb @@ -12,9 +12,7 @@ module <%= @grammar.modulename %>; * User code blocks *************************************************************************/ -<% @grammar.code_blocks.each do |code| %> -<%= code %> -<% end %> +<%= @grammar.code_blocks.fetch("", "") %> /************************************************************************** * Public types diff --git a/assets/parser.h.erb b/assets/parser.h.erb new file mode 100644 index 0000000..883b7d6 --- /dev/null +++ b/assets/parser.h.erb @@ -0,0 +1,125 @@ +/** + * @file + * + * This file is generated by Propane. + */ + +#pragma once + +#include +#include + +/************************************************************************** + * Public types + *************************************************************************/ + +/* Result codes. */ +#define <%= @grammar.prefix.upcase %>SUCCESS 0u +#define <%= @grammar.prefix.upcase %>DECODE_ERROR 1u +#define <%= @grammar.prefix.upcase %>UNEXPECTED_INPUT 2u +#define <%= @grammar.prefix.upcase %>UNEXPECTED_TOKEN 3u +#define <%= @grammar.prefix.upcase %>DROP 4u +#define <%= @grammar.prefix.upcase %>EOF 5u + +/** Token type. */ +typedef <%= get_type_for(@grammar.invalid_token_id) %> <%= @grammar.prefix %>token_t; + +/** Token IDs. */ +<% @grammar.tokens.each_with_index do |token, index| %> +#define TOKEN_<%= token.code_name %> <%= index %>u +<% unless token.id == index %> +<% raise "Token ID (#{token.id}) does not match index (#{index}) for token #{token.name}!" %> +<% end %> +<% end %> +#define INVALID_TOKEN_ID <%= @grammar.invalid_token_id %>u + +/** Code point type. */ +typedef uint32_t <%= @grammar.prefix %>code_point_t; + +/** User header code blocks. */ +<%= @grammar.code_blocks.fetch("header", "") %> + +/** Parser values type(s). */ +typedef union +{ +<% @grammar.ptypes.each do |name, typestring| %> + <%= typestring %> v_<%= name %>; +<% end %> +} <%= @grammar.prefix %>value_t; + +/** + * A structure to keep track of parser position. + * + * This is useful for reporting errors, etc... + */ +typedef struct +{ + /** Input text row (0-based). */ + uint32_t row; + + /** Input text column (0-based). */ + uint32_t col; +} <%= @grammar.prefix %>position_t; + +/** Lexed token information. */ +typedef struct +{ + /** Text position where the token was found. */ + <%= @grammar.prefix %>position_t position; + + /** Number of input bytes used by the token. */ + size_t length; + + /** Token that was lexed. */ + <%= @grammar.prefix %>token_t token; + + /** Parser value associated with the token. */ + <%= @grammar.prefix %>value_t pvalue; +} <%= @grammar.prefix %>token_info_t; + +/** + * Lexer and parser context. + * + * The user must allocate an instance of this structure and pass it to any + * public API function. + */ +typedef struct +{ + /* Lexer context data. */ + + /** Input text. */ + uint8_t const * input; + + /** Input text length. */ + size_t input_length; + + /** Input text index (byte offset). */ + size_t input_index; + + /** Input text position (row/column). */ + <%= @grammar.prefix %>position_t text_position; + + /** Current lexer mode. */ + size_t mode; + + /* Parser context data. */ + + /** Parse result value. */ + <%= @grammar.prefix %>value_t parse_result; + + /** Unexpected token received. */ + <%= @grammar.prefix %>token_t token; +} <%= @grammar.prefix %>context_t; + +void <%= @grammar.prefix %>context_init(<%= @grammar.prefix %>context_t * context, uint8_t const * input, size_t input_length); + +size_t <%= @grammar.prefix %>decode_code_point(uint8_t const * input, size_t input_length, + <%= @grammar.prefix %>code_point_t * out_code_point, uint8_t * out_code_point_length); + +size_t <%= @grammar.prefix %>lex(<%= @grammar.prefix %>context_t * context, <%= @grammar.prefix %>token_info_t * out_token_info); + +size_t <%= @grammar.prefix %>parse(<%= @grammar.prefix %>context_t * context); + +<%= start_rule_type[1] %> <%= @grammar.prefix %>result(<%= @grammar.prefix %>context_t * context); + +<%= @grammar.prefix %>position_t <%= @grammar.prefix %>position(<%= @grammar.prefix %>context_t * context); diff --git a/doc/user_guide.md b/doc/user_guide.md index 4141c5e..89716ae 100644 --- a/doc/user_guide.md +++ b/doc/user_guide.md @@ -13,9 +13,9 @@ Propane is an LR Parser Generator (LPG) which: * generates a built-in lexer to tokenize input * supports UTF-8 lexer inputs * generates a table-driven parser to parse input in linear time + * target C or D language outputs * is MIT-licensed * is distributable as a standalone Ruby script - * supports D language #> Installation diff --git a/lib/propane/generator.rb b/lib/propane/generator.rb index b90bf79..f6e8d26 100644 --- a/lib/propane/generator.rb +++ b/lib/propane/generator.rb @@ -11,14 +11,27 @@ class Propane @log = StringIO.new end @classname = @grammar.classname || File.basename(output_file).sub(%r{[^a-zA-Z0-9].*}, "").capitalize + @language = + if output_file =~ /\.([a-z]+)$/ + $1 + else + "d" + end process_grammar! end def generate - erb = ERB.new(File.read(File.join(File.dirname(File.expand_path(__FILE__)), "../../assets/parser.d.erb")), trim_mode: "<>") - result = erb.result(binding.clone) - File.open(@output_file, "wb") do |fh| - fh.write(result) + extensions = [@language] + if @language == "c" + extensions += %w[h] + end + extensions.each do |extension| + erb = ERB.new(File.read(File.join(File.dirname(File.expand_path(__FILE__)), "../../assets/parser.#{extension}.erb")), trim_mode: "<>") + output_file = @output_file.sub(%r{\.[a-z]+$}, ".#{extension}") + result = erb.result(binding.clone) + File.open(output_file, "wb") do |fh| + fh.write(result) + end end @log.close end @@ -191,11 +204,21 @@ class Propane end code = code.gsub(/\$(\d+)/) do |match| index = $1.to_i - "statevalues[$-1-n_states+#{index}].pvalue.v_#{rule.components[index - 1].ptypename}" + case @language + when "c" + "state_values_stack_index(statevalues, -1 - (int)n_states + #{index})->pvalue.v_#{rule.components[index - 1].ptypename}" + when "d" + "statevalues[$-1-n_states+#{index}].pvalue.v_#{rule.components[index - 1].ptypename}" + end end else code = code.gsub(/\$\$/) do |match| - "out_token_info.pvalue.v_#{pattern.ptypename}" + case @language + when "c" + "out_token_info->pvalue.v_#{pattern.ptypename}" + when "d" + "out_token_info.pvalue.v_#{pattern.ptypename}" + end end code = code.gsub(/\$mode\(([a-zA-Z_][a-zA-Z_0-9]*)\)/) do |match| mode_name = $1 @@ -203,7 +226,12 @@ class Propane unless mode_id raise Error.new("Lexer mode '#{mode_name}' not found") end - "context.mode = #{mode_id}u" + case @language + when "c" + "context->mode = #{mode_id}u" + when "d" + "context.mode = #{mode_id}u" + end end end code @@ -229,11 +257,26 @@ class Propane # Type. def get_type_for(max) if max <= 0xFF - "ubyte" + case @language + when "c" + "uint8_t" + when "d" + "ubyte" + end elsif max <= 0xFFFF - "ushort" + case @language + when "c" + "uint16_t" + when "d" + "ushort" + end else - "uint" + case @language + when "c" + "uint32_t" + else + "uint" + end end end diff --git a/lib/propane/grammar.rb b/lib/propane/grammar.rb index 4381ecb..c971c37 100644 --- a/lib/propane/grammar.rb +++ b/lib/propane/grammar.rb @@ -18,7 +18,7 @@ class Propane @patterns = [] @tokens = [] @rules = [] - @code_blocks = [] + @code_blocks = {} @line_number = 1 @next_line_number = @line_number @mode = nil @@ -191,8 +191,13 @@ class Propane end def parse_code_block_statement! - if code = parse_code_block! - @code_blocks << code + if md = consume!(/<<([a-z]*)\n(.*?)^>>\n/m) + name, code = md[1..2] + if @code_blocks[name] + @code_blocks[name] += code + else + @code_blocks[name] = code + end @mode = nil true end diff --git a/spec/json_parser.c.propane b/spec/json_parser.c.propane new file mode 100644 index 0000000..1ce02ca --- /dev/null +++ b/spec/json_parser.c.propane @@ -0,0 +1,183 @@ +<
> +<< +#include "math.h" +#include +static str_t string_value; +>> + +ptype JSONValue *; + +drop /\s+/; +token lbrace /\{/; +token rbrace /\}/; +token lbracket /\[/; +token rbracket /\]/; +token comma /,/; +token colon /:/; +token number /-?(0|[1-9][0-9]*)(\.[0-9]+)?([eE][-+]?[0-9]+)?/ << + double n = 0.0; + bool negative = false; + size_t i = 0u; + if (match[i] == '-') + { + negative = true; + i++; + } + while ('0' <= match[i] && match[i] <= '9') + { + n *= 10.0; + n += (match[i] - '0'); + i++; + } + if (match[i] == '.') + { + i++; + double mult = 0.1; + while ('0' <= match[i] && match[i] <= '9') + { + n += mult * (match[i] - '0'); + mult /= 10.0; + i++; + } + } + if (match[i] == 'e' || match[i] == 'E') + { + bool exp_negative = false; + i++; + if (match[i] == '-') + { + exp_negative = true; + i++; + } + else if (match[i] == '+') + { + i++; + } + long exp = 0.0; + while ('0' <= match[i] && match[i] <= '9') + { + exp *= 10; + exp += (match[i] - '0'); + i++; + } + if (exp_negative) + { + exp = -exp; + } + n = pow(n, exp); + } + if (negative) + { + n = -n; + } + $$ = JSONValue_new(JSON_NUMBER); + $$->number = n; +>> +token true << + $$ = JSONValue_new(JSON_TRUE); +>> +token false << + $$ = JSONValue_new(JSON_FALSE); +>> +token null << + $$ = JSONValue_new(JSON_NULL); +>> +/"/ << + $mode(string); + str_init(&string_value, ""); +>> +string: token string /"/ << + $$ = JSONValue_new(JSON_STRING); + $$->string = string_value; + $mode(default); +>> +string: /\\"/ << + str_append(&string_value, "\""); +>> +string: /\\\\/ << + str_append(&string_value, "\\"); +>> +string: /\\\// << + str_append(&string_value, "/"); +>> +string: /\\b/ << + str_append(&string_value, "\b"); +>> +string: /\\f/ << + str_append(&string_value, "\f"); +>> +string: /\\n/ << + str_append(&string_value, "\n"); +>> +string: /\\r/ << + str_append(&string_value, "\r"); +>> +string: /\\t/ << + str_append(&string_value, "\t"); +>> +string: /\\u[0-9a-fA-F]{4}/ << + /* Not actually going to encode the code point for this example... */ + char s[] = {'{', match[2], match[3], match[4], match[5], '}', 0}; + str_append(&string_value, s); +>> +string: /[^\\]/ << + char s[] = {match[0], 0}; + str_append(&string_value, s); +>> +Start -> Value << + $$ = $1; +>> +Value -> string << + $$ = $1; +>> +Value -> number << + $$ = $1; +>> +Value -> Object << + $$ = $1; +>> +Value -> Array << + $$ = $1; +>> +Value -> true << + $$ = $1; +>> +Value -> false << + $$ = $1; +>> +Value -> null << + $$ = $1; +>> +Object -> lbrace rbrace << + $$ = JSONObject_new(); +>> +Object -> lbrace KeyValues rbrace << + $$ = $2; +>> +KeyValues -> KeyValue << + $$ = $1; +>> +KeyValues -> KeyValues comma KeyValue << + JSONObject_append($1, $3->object.entries[0].name, $3->object.entries[0].value); + $$ = $1; +>> +KeyValue -> string colon Value << + $$ = JSONObject_new(); + JSONObject_append($$, str_cstr(&$1->string), $3); +>> +Array -> lbracket rbracket << + $$ = JSONArray_new(); +>> +Array -> lbracket Values rbracket << + $$ = $2; +>> +Values -> Value << + $$ = $1; +>> +Values -> Values comma Value << + JSONArray_append($1, $3); + $$ = $1; +>> diff --git a/spec/json_parser.propane b/spec/json_parser.d.propane similarity index 100% rename from spec/json_parser.propane rename to spec/json_parser.d.propane diff --git a/spec/json_types.c b/spec/json_types.c new file mode 100644 index 0000000..aa05062 --- /dev/null +++ b/spec/json_types.c @@ -0,0 +1,64 @@ +#include "json_types.h" +#include +#include +#include "testutils.h" + +JSONValue * JSONValue_new(size_t id) +{ + JSONValue * jv = calloc(1, sizeof(JSONValue)); + jv->id = id; + return jv; +} + +JSONValue * JSONObject_new(void) +{ + JSONValue * jv = JSONValue_new(JSON_OBJECT); + jv->object.size = 0u; + return jv; +} + +void JSONObject_append(JSONValue * object, char const * name, JSONValue * value) +{ + size_t const size = object->object.size; + for (size_t i = 0u; i < size; i++) + { + if (strcmp(name, object->object.entries[i].name) == 0) + { + object->object.entries[i].value = value; + return; + } + } + size_t const new_size = size + 1; + void * new_entries = malloc(sizeof(object->object.entries[0]) * new_size); + if (size > 0) + { + memcpy(new_entries, object->object.entries, size * sizeof(object->object.entries[0])); + free(object->object.entries); + } + object->object.entries = new_entries; + object->object.entries[size].name = name; + object->object.entries[size].value = value; + object->object.size = new_size; +} + +JSONValue * JSONArray_new(void) +{ + JSONValue * jv = JSONValue_new(JSON_ARRAY); + jv->array.size = 0u; + return jv; +} + +void JSONArray_append(JSONValue * array, JSONValue * value) +{ + size_t const size = array->array.size; + size_t const new_size = size + 1; + JSONValue ** new_entries = malloc(sizeof(JSONValue *) * new_size); + if (array->array.size > 0) + { + memcpy(new_entries, array->array.entries, sizeof(JSONValue *) * size); + free(array->array.entries); + } + array->array.entries = new_entries; + array->array.entries[size] = value; + array->array.size = new_size; +} diff --git a/spec/json_types.h b/spec/json_types.h new file mode 100644 index 0000000..5bb32ce --- /dev/null +++ b/spec/json_types.h @@ -0,0 +1,46 @@ +#pragma once + +#include +#include "testutils.h" + +#define JSON_OBJECT 0u +#define JSON_ARRAY 1u +#define JSON_NUMBER 2u +#define JSON_STRING 3u +#define JSON_TRUE 4u +#define JSON_FALSE 5u +#define JSON_NULL 6u + +typedef struct JSONValue_s +{ + size_t id; + union + { + struct + { + size_t size; + struct + { + char const * name; + struct JSONValue_s * value; + } * entries; + } object; + struct + { + size_t size; + struct JSONValue_s ** entries; + } array; + double number; + str_t string; + }; +} JSONValue; + +JSONValue * JSONValue_new(size_t id); + +JSONValue * JSONObject_new(void); + +void JSONObject_append(JSONValue * object, char const * name, JSONValue * value); + +JSONValue * JSONArray_new(void); + +void JSONArray_append(JSONValue * array, JSONValue * value); diff --git a/spec/propane_spec.rb b/spec/propane_spec.rb index bd88982..5e9a9f1 100644 --- a/spec/propane_spec.rb +++ b/spec/propane_spec.rb @@ -11,7 +11,7 @@ describe Propane do def build_parser(options = {}) options[:name] ||= "" - command = %W[./propane.sh spec/run/testparser#{options[:name]}.propane spec/run/testparser#{options[:name]}.d --log spec/run/testparser#{options[:name]}.log] + command = %W[./propane.sh spec/run/testparser#{options[:name]}.propane spec/run/testparser#{options[:name]}.#{options[:language]} --log spec/run/testparser#{options[:name]}.log] if (options[:capture]) stdout, stderr, status = Open3.capture3(*command) Results.new(stdout, stderr, status) @@ -25,9 +25,14 @@ describe Propane do test_files = Array(test_files) options[:parsers] ||= [""] parsers = options[:parsers].map do |name| - "spec/run/testparser#{name}.d" + "spec/run/testparser#{name}.#{options[:language]}" + end + case options[:language] + when "c" + result = system(*%w[gcc -Wall -o spec/run/testparser -Ispec -Ispec/run], *parsers, *test_files, "spec/testutils.c", "-lm") + when "d" + result = system(*%w[ldc2 --unittest -of spec/run/testparser -Ispec], *parsers, *test_files, "spec/testutils.d") end - result = system(*%w[ldc2 --unittest -of spec/run/testparser -Ispec], *parsers, *test_files, "spec/testutils.d") expect(result).to be_truthy end @@ -69,8 +74,12 @@ describe Propane do FileUtils.mkdir_p("spec/run") end - it "generates a lexer" do - write_grammar < int << Foo -> plus << >> EOF - build_parser - compile("spec/test_lexer.d") - results = run - expect(results.stderr).to eq "" - expect(results.status).to eq 0 - end + build_parser(language: language) + compile("spec/test_lexer.#{language}", language: language) + results = run + expect(results.stderr).to eq "" + expect(results.status).to eq 0 + end - it "detects a lexer error when an unknown character is seen" do - write_grammar <> +Start -> int << + $$ = $1; +>> +EOF + when "d" + write_grammar < int << $$ = $1; >> EOF - build_parser - compile("spec/test_lexer_unknown_character.d") - results = run - expect(results.stderr).to eq "" - expect(results.status).to eq 0 - end + end + build_parser(language: language) + compile("spec/test_lexer_unknown_character.#{language}", language: language) + results = run + expect(results.stderr).to eq "" + expect(results.status).to eq 0 + end - it "generates a parser" do - write_grammar < B; B -> zero; B -> one; EOF - build_parser - end + build_parser(language: language) + end - it "generates a parser that does basic math - user guide example" do - write_grammar < +>> + +ptype size_t; + +token plus /\\+/; +token times /\\*/; +token power /\\*\\*/; +token integer /\\d+/ << + size_t v = 0u; + for (size_t i = 0u; i < match_length; i++) + { + v *= 10; + v += (match[i] - '0'); + } + $$ = v; +>> +token lparen /\\(/; +token rparen /\\)/; +drop /\\s+/; + +Start -> E1 << + $$ = $1; +>> +E1 -> E2 << + $$ = $1; +>> +E1 -> E1 plus E2 << + $$ = $1 + $3; +>> +E2 -> E3 << + $$ = $1; +>> +E2 -> E2 times E3 << + $$ = $1 * $3; +>> +E3 -> E4 << + $$ = $1; +>> +E3 -> E3 power E4 << + $$ = (size_t)pow($1, $3); +>> +E4 -> integer << + $$ = $1; +>> +E4 -> lparen E1 rparen << + $$ = $2; +>> +EOF + when "d" + write_grammar <> @@ -179,25 +261,26 @@ E4 -> lparen E1 rparen << $$ = $2; >> EOF - build_parser - compile("spec/test_basic_math_grammar.d") - results = run - expect(results.stderr).to eq "" - expect(results.status).to eq 0 - end + end + build_parser(language: language) + compile("spec/test_basic_math_grammar.#{language}", language: language) + results = run + expect(results.stderr).to eq "" + expect(results.status).to eq 0 + end - it "generates an SLR parser" do - write_grammar < E; E -> one E; E -> one; EOF - build_parser - end + build_parser(language: language) + end - it "distinguishes between multiple identical rules with lookahead symbol" do - write_grammar < R1 a; @@ -205,14 +288,14 @@ Start -> R2 b; R1 -> a b; R2 -> a b; EOF - build_parser - compile("spec/test_parser_identical_rules_lookahead.d") - results = run - expect(results.status).to eq 0 - end + build_parser(language: language) + compile("spec/test_parser_identical_rules_lookahead.#{language}", language: language) + results = run + expect(results.status).to eq 0 + end - it "handles reducing a rule that could be arrived at from multiple states" do - write_grammar < a R1; Start -> b R1; R1 -> b; EOF - build_parser - compile("spec/test_parser_rule_from_multiple_states.d") - results = run - expect(results.status).to eq 0 - end + build_parser(language: language) + compile("spec/test_parser_rule_from_multiple_states.#{language}", language: language) + results = run + expect(results.status).to eq 0 + end - it "executes user code when matching lexer token" do - write_grammar < +>> +token abc << + printf("abc!\\n"); +>> +token def; +Start -> Abcs def; +Abcs -> ; +Abcs -> abc Abcs; +EOF + when "d" + write_grammar <> @@ -239,21 +337,35 @@ Start -> Abcs def; Abcs -> ; Abcs -> abc Abcs; EOF - build_parser - compile("spec/test_user_code.d") - results = run - expect(results.status).to eq 0 - verify_lines(results.stdout, [ - "abc!", - "pass1", - "abc!", - "abc!", - "pass2", - ]) - end + end + build_parser(language: language) + compile("spec/test_user_code.#{language}", language: language) + results = run + expect(results.status).to eq 0 + verify_lines(results.stdout, [ + "abc!", + "pass1", + "abc!", + "abc!", + "pass2", + ]) + end - it "supports a pattern statement" do - write_grammar < +>> +token abc; +/def/ << + printf("def!\\n"); +>> +Start -> abc; +EOF + when "d" + write_grammar <> @@ -263,21 +375,39 @@ token abc; >> Start -> abc; EOF - build_parser - compile("spec/test_pattern.d") - results = run - expect(results.status).to eq 0 - verify_lines(results.stdout, [ - "def!", - "pass1", - "def!", - "def!", - "pass2", - ]) - end + end + build_parser(language: language) + compile("spec/test_pattern.#{language}", language: language) + results = run + expect(results.status).to eq 0 + verify_lines(results.stdout, [ + "def!", + "pass1", + "def!", + "def!", + "pass2", + ]) + end - it "supports returning tokens from pattern code blocks" do - write_grammar < +>> +token abc; +/def/ << + printf("def!\\n"); +>> +/ghi/ << + printf("ghi!\\n"); + return $token(abc); +>> +Start -> abc; +EOF + when "d" + write_grammar <> @@ -291,19 +421,44 @@ token abc; >> Start -> abc; EOF - build_parser - compile("spec/test_return_token_from_pattern.d") - results = run - expect(results.status).to eq 0 - verify_lines(results.stdout, [ - "def!", - "ghi!", - "def!", - ]) - end + end + build_parser(language: language) + compile("spec/test_return_token_from_pattern.#{language}", language: language) + results = run + expect(results.status).to eq 0 + verify_lines(results.stdout, [ + "def!", + "ghi!", + "def!", + ]) + end - it "supports lexer modes" do - write_grammar < +>> +token abc; +token def; +tokenid string; +drop /\\s+/; +/"/ << + printf("begin string mode\\n"); + $mode(string); +>> +string: /[^"]+/ << + printf("captured string\\n"); +>> +string: /"/ << + $mode(default); + return $token(string); +>> +Start -> abc string def; +EOF + when "d" + write_grammar <> @@ -324,22 +479,42 @@ string: /"/ << >> Start -> abc string def; EOF - build_parser - compile("spec/test_lexer_modes.d") - results = run - expect(results.status).to eq 0 - verify_lines(results.stdout, [ - "begin string mode", - "captured string", - "pass1", - "begin string mode", - "captured string", - "pass2", - ]) - end + end + build_parser(language: language) + compile("spec/test_lexer_modes.#{language}", language: language) + results = run + expect(results.status).to eq 0 + verify_lines(results.stdout, [ + "begin string mode", + "captured string", + "pass1", + "begin string mode", + "captured string", + "pass2", + ]) + end - it "executes user code associated with a parser rule" do - write_grammar < +>> +token a; +token b; +Start -> A B << + printf("Start!\\n"); +>> +A -> a << + printf("A!\\n"); +>> +B -> b << + printf("B!\\n"); +>> +EOF + when "d" + write_grammar <> @@ -355,20 +530,21 @@ B -> b << writeln("B!"); >> EOF - build_parser - compile("spec/test_parser_rule_user_code.d") - results = run - expect(results.status).to eq 0 - verify_lines(results.stdout, [ - "A!", - "B!", - "Start!", - ]) - end + end + build_parser(language: language) + compile("spec/test_parser_rule_user_code.#{language}", language: language) + results = run + expect(results.status).to eq 0 + verify_lines(results.stdout, [ + "A!", + "B!", + "Start!", + ]) + end - it "parses lists" do - write_grammar < As << $$ = $1; @@ -380,15 +556,15 @@ As -> As a << $$ = $1 + 1u; >> EOF - build_parser - compile("spec/test_parsing_lists.d") - results = run - expect(results.status).to eq 0 - expect(results.stderr).to eq "" - end + build_parser(language: language) + compile("spec/test_parsing_lists.#{language}", language: language) + results = run + expect(results.status).to eq 0 + expect(results.stderr).to eq "" + end - it "fails to generate a parser for a LR(1) grammar that is not LALR" do - write_grammar < b E d; E -> e; F -> e; EOF - results = build_parser(capture: true) - expect(results.status).to_not eq 0 - expect(results.stderr).to match %r{reduce/reduce conflict.*\(E\).*\(F\)} - end + results = build_parser(capture: true, language: language) + expect(results.status).to_not eq 0 + expect(results.stderr).to match %r{reduce/reduce conflict.*\(E\).*\(F\)} + end - it "provides matched text to user code blocks" do - write_grammar < +#include +>> +token id /[a-zA-Z_][a-zA-Z0-9_]*/ << + char * t = malloc(match_length + 1); + strncpy(t, (char *)match, match_length); + printf("Matched token is %s\\n", t); + free(t); +>> +Start -> id; +EOF + when "d" + write_grammar <> @@ -416,18 +608,31 @@ token id /[a-zA-Z_][a-zA-Z0-9_]*/ << >> Start -> id; EOF - build_parser - compile("spec/test_lexer_match_text.d") - results = run - expect(results.status).to eq 0 - verify_lines(results.stdout, [ - "Matched token is identifier_123", - "pass1", - ]) - end + end + build_parser(language: language) + compile("spec/test_lexer_match_text.#{language}", language: language) + results = run + expect(results.status).to eq 0 + verify_lines(results.stdout, [ + "Matched token is identifier_123", + "pass1", + ]) + end - it "allows storing a result value for the lexer" do - write_grammar <> +Start -> word << + $$ = $1; +>> +EOF + when "d" + write_grammar < word << $$ = $1; >> EOF - build_parser - compile("spec/test_lexer_result_value.d") - results = run - expect(results.stderr).to eq "" - expect(results.status).to eq 0 - end + end + build_parser(language: language) + compile("spec/test_lexer_result_value.#{language}", language: language) + results = run + expect(results.stderr).to eq "" + expect(results.status).to eq 0 + end - it "tracks position of parser errors" do - write_grammar < a num Start; Start -> a num; EOF - build_parser - compile("spec/test_error_positions.d") - results = run - expect(results.stderr).to eq "" - expect(results.status).to eq 0 - end + build_parser(language: language) + compile("spec/test_error_positions.#{language}", language: language) + results = run + expect(results.stderr).to eq "" + expect(results.status).to eq 0 + end - it "allows creating a JSON parser" do - write_grammar(File.read("spec/json_parser.propane")) - build_parser - compile(["spec/test_parsing_json.d", "spec/json_types.d"]) - end + it "allows creating a JSON parser" do + write_grammar(File.read("spec/json_parser.#{language}.propane")) + build_parser(language: language) + compile(["spec/test_parsing_json.#{language}", "spec/json_types.#{language}"], language: language) + end - it "allows generating multiple parsers in the same program" do - write_grammar(< a num; EOF - build_parser(name: "myp1") - write_grammar(< b c b; EOF - build_parser(name: "myp2") - compile("spec/test_multiple_parsers.d", parsers: %w[myp1 myp2]) - results = run - expect(results.stderr).to eq "" - expect(results.status).to eq 0 + build_parser(name: "myp2", language: language) + compile("spec/test_multiple_parsers.#{language}", parsers: %w[myp1 myp2], language: language) + results = run + expect(results.stderr).to eq "" + expect(results.status).to eq 0 + end + end end end diff --git a/spec/test_basic_math_grammar.c b/spec/test_basic_math_grammar.c new file mode 100644 index 0000000..4fa1ce2 --- /dev/null +++ b/spec/test_basic_math_grammar.c @@ -0,0 +1,29 @@ +#include "testparser.h" +#include "testutils.h" +#include + +int main() +{ + char const * input = "1 + 2 * 3 + 4"; + p_context_t context; + p_context_init(&context, (uint8_t const *)input, strlen(input)); + assert_eq(P_SUCCESS, p_parse(&context)); + assert_eq(11, p_result(&context)); + + input = "1 * 2 ** 4 * 3"; + p_context_init(&context, (uint8_t const *)input, strlen(input)); + assert_eq(P_SUCCESS, p_parse(&context)); + assert_eq(48, p_result(&context)); + + input = "(1 + 2) * 3 + 4"; + p_context_init(&context, (uint8_t const *)input, strlen(input)); + assert_eq(P_SUCCESS, p_parse(&context)); + assert_eq(13, p_result(&context)); + + input = "(2 * 2) ** 3 + 4 + 5"; + p_context_init(&context, (uint8_t const *)input, strlen(input)); + assert_eq(P_SUCCESS, p_parse(&context)); + assert_eq(73, p_result(&context)); + + return 0; +} diff --git a/spec/test_error_positions.c b/spec/test_error_positions.c new file mode 100644 index 0000000..b871449 --- /dev/null +++ b/spec/test_error_positions.c @@ -0,0 +1,39 @@ +#include "testparser.h" +#include +#include + +int main() +{ + char const * input = "a 42"; + p_context_t context; + p_context_init(&context, (uint8_t const *)input, strlen(input)); + assert(p_parse(&context) == P_SUCCESS); + + input = "a\n123\na a"; + p_context_init(&context, (uint8_t const *)input, strlen(input)); + assert(p_parse(&context) == P_UNEXPECTED_TOKEN); + assert(p_position(&context).row == 2); + assert(p_position(&context).col == 3); + assert(context.token == TOKEN_a); + + input = "12"; + p_context_init(&context, (uint8_t const *)input, strlen(input)); + assert(p_parse(&context) == P_UNEXPECTED_TOKEN); + assert(p_position(&context).row == 0); + assert(p_position(&context).col == 0); + assert(context.token == TOKEN_num); + + input = "a 12\n\nab"; + p_context_init(&context, (uint8_t const *)input, strlen(input)); + assert(p_parse(&context) == P_UNEXPECTED_INPUT); + assert(p_position(&context).row == 2); + assert(p_position(&context).col == 1); + + input = "a 12\n\na\n\n77\na \xAA"; + p_context_init(&context, (uint8_t const *)input, strlen(input)); + assert(p_parse(&context) == P_DECODE_ERROR); + assert(p_position(&context).row == 5); + assert(p_position(&context).col == 4); + + return 0; +} diff --git a/spec/test_error_positions.d b/spec/test_error_positions.d index c8c4059..0282252 100644 --- a/spec/test_error_positions.d +++ b/spec/test_error_positions.d @@ -33,6 +33,5 @@ unittest input = "a 12\n\na\n\n77\na \xAA"; p_context_init(&context, input); assert(p_parse(&context) == P_DECODE_ERROR); - writeln(p_position(&context)); assert(p_position(&context) == p_position_t(5, 4)); } diff --git a/spec/test_lexer.c b/spec/test_lexer.c new file mode 100644 index 0000000..551ecb4 --- /dev/null +++ b/spec/test_lexer.c @@ -0,0 +1,92 @@ +#include "testparser.h" +#include +#include + +int main() +{ + size_t result; + p_code_point_t code_point; + uint8_t code_point_length; + + result = p_decode_code_point((uint8_t const *)"5", 1u, &code_point, &code_point_length); + assert(result == P_SUCCESS); + assert(code_point == '5'); + assert(code_point_length == 1u); + + result = p_decode_code_point((uint8_t const *)"", 0u, &code_point, &code_point_length); + assert(result == P_EOF); + + result = p_decode_code_point((uint8_t const *)"\xC2\xA9", 2u, &code_point, &code_point_length); + assert(result == P_SUCCESS); + assert(code_point == 0xA9u); + assert(code_point_length == 2u); + + result = p_decode_code_point((uint8_t const *)"\xf0\x9f\xa7\xa1", 4u, &code_point, &code_point_length); + assert(result == P_SUCCESS); + assert(code_point == 0x1F9E1u); + assert(code_point_length == 4u); + + result = p_decode_code_point((uint8_t const *)"\xf0\x9f\x27", 3u, &code_point, &code_point_length); + assert(result == P_DECODE_ERROR); + + result = p_decode_code_point((uint8_t const *)"\xf0\x9f\xa7\xFF", 4u, &code_point, &code_point_length); + assert(result == P_DECODE_ERROR); + + result = p_decode_code_point((uint8_t const *)"\xfe", 1u, &code_point, &code_point_length); + assert(result == P_DECODE_ERROR); + + + p_token_info_t token_info; + char const * input = "5 + 4 * \n677 + 567"; + p_context_t context; + p_context_init(&context, (uint8_t const *)input, strlen(input)); + assert(p_lex(&context, &token_info) == P_SUCCESS); + assert(token_info.position.row == 0u); + assert(token_info.position.col == 0u); + assert(token_info.length == 1u); + assert(token_info.token == TOKEN_int); + assert(p_lex(&context, &token_info) == P_SUCCESS); + assert(token_info.position.row == 0u); + assert(token_info.position.col == 2u); + assert(token_info.length == 1u); + assert(token_info.token == TOKEN_plus); + assert(p_lex(&context, &token_info) == P_SUCCESS); + assert(token_info.position.row == 0u); + assert(token_info.position.col == 4u); + assert(token_info.length == 1u); + assert(token_info.token == TOKEN_int); + assert(p_lex(&context, &token_info) == P_SUCCESS); + assert(token_info.position.row == 0u); + assert(token_info.position.col == 6u); + assert(token_info.length == 1u); + assert(token_info.token == TOKEN_times); + assert(p_lex(&context, &token_info) == P_SUCCESS); + assert(token_info.position.row == 1u); + assert(token_info.position.col == 0u); + assert(token_info.length == 3u); + assert(token_info.token == TOKEN_int); + assert(p_lex(&context, &token_info) == P_SUCCESS); + assert(token_info.position.row == 1u); + assert(token_info.position.col == 4u); + assert(token_info.length == 1u); + assert(token_info.token == TOKEN_plus); + assert(p_lex(&context, &token_info) == P_SUCCESS); + assert(token_info.position.row == 1u); + assert(token_info.position.col == 6u); + assert(token_info.length == 3u); + assert(token_info.token == TOKEN_int); + assert(p_lex(&context, &token_info) == P_SUCCESS); + assert(token_info.position.row == 1u); + assert(token_info.position.col == 9u); + assert(token_info.length == 0u); + assert(token_info.token == TOKEN___EOF); + + p_context_init(&context, (uint8_t const *)"", 0u); + assert(p_lex(&context, &token_info) == P_SUCCESS); + assert(token_info.position.row == 0u); + assert(token_info.position.col == 0u); + assert(token_info.length == 0u); + assert(token_info.token == TOKEN___EOF); + + return 0; +} diff --git a/spec/test_lexer_match_text.c b/spec/test_lexer_match_text.c new file mode 100644 index 0000000..e7104d9 --- /dev/null +++ b/spec/test_lexer_match_text.c @@ -0,0 +1,15 @@ +#include "testparser.h" +#include +#include +#include + +int main() +{ + char const * input = "identifier_123"; + p_context_t context; + p_context_init(&context, (uint8_t const *)input, strlen(input)); + assert(p_parse(&context) == P_SUCCESS); + printf("pass1\n"); + + return 0; +} diff --git a/spec/test_lexer_modes.c b/spec/test_lexer_modes.c new file mode 100644 index 0000000..c8e3c90 --- /dev/null +++ b/spec/test_lexer_modes.c @@ -0,0 +1,20 @@ +#include "testparser.h" +#include +#include +#include + +int main() +{ + char const * input = "abc \"a string\" def"; + p_context_t context; + p_context_init(&context, (uint8_t const *)input, strlen(input)); + assert(p_parse(&context) == P_SUCCESS); + printf("pass1\n"); + + input = "abc \"abc def\" def"; + p_context_init(&context, (uint8_t const *)input, strlen(input)); + assert(p_parse(&context) == P_SUCCESS); + printf("pass2\n"); + + return 0; +} diff --git a/spec/test_lexer_result_value.c b/spec/test_lexer_result_value.c new file mode 100644 index 0000000..251c6f2 --- /dev/null +++ b/spec/test_lexer_result_value.c @@ -0,0 +1,19 @@ +#include "testparser.h" +#include +#include + +int main() +{ + char const * input = "x"; + p_context_t context; + p_context_init(&context, (uint8_t const *)input, strlen(input)); + assert(p_parse(&context) == P_SUCCESS); + assert(p_result(&context) == 1u); + + input = "fabulous"; + p_context_init(&context, (uint8_t const *)input, strlen(input)); + assert(p_parse(&context) == P_SUCCESS); + assert(p_result(&context) == 8u); + + return 0; +} diff --git a/spec/test_lexer_unknown_character.c b/spec/test_lexer_unknown_character.c new file mode 100644 index 0000000..5b4e4d4 --- /dev/null +++ b/spec/test_lexer_unknown_character.c @@ -0,0 +1,18 @@ +#include "testparser.h" +#include +#include + +int main() +{ + char const * input = "x"; + p_context_t context; + p_context_init(&context, (uint8_t const *)input, strlen(input)); + assert(p_parse(&context) == P_UNEXPECTED_INPUT); + + input = "123"; + p_context_init(&context, (uint8_t const *)input, strlen(input)); + assert(p_parse(&context) == P_SUCCESS); + assert(p_result(&context) == 123u); + + return 0; +} diff --git a/spec/test_multiple_parsers.c b/spec/test_multiple_parsers.c new file mode 100644 index 0000000..cad14a7 --- /dev/null +++ b/spec/test_multiple_parsers.c @@ -0,0 +1,19 @@ +#include "testparsermyp1.h" +#include "testparsermyp2.h" +#include +#include + +int main() +{ + char const * input1 = "a\n1"; + myp1_context_t context1; + myp1_context_init(&context1, (uint8_t const *)input1, strlen(input1)); + assert(myp1_parse(&context1) == MYP1_SUCCESS); + + char const * input2 = "bcb"; + myp2_context_t context2; + myp2_context_init(&context2, (uint8_t const *)input2, strlen(input2)); + assert(myp2_parse(&context2) == MYP2_SUCCESS); + + return 0; +} diff --git a/spec/test_parser_identical_rules_lookahead.c b/spec/test_parser_identical_rules_lookahead.c new file mode 100644 index 0000000..c66932d --- /dev/null +++ b/spec/test_parser_identical_rules_lookahead.c @@ -0,0 +1,17 @@ +#include "testparser.h" +#include +#include + +int main() +{ + char const * input = "aba"; + p_context_t context; + p_context_init(&context, (uint8_t const *)input, strlen(input)); + assert(p_parse(&context) == P_SUCCESS); + + input = "abb"; + p_context_init(&context, (uint8_t const *)input, strlen(input)); + assert(p_parse(&context) == P_SUCCESS); + + return 0; +} diff --git a/spec/test_parser_rule_from_multiple_states.c b/spec/test_parser_rule_from_multiple_states.c new file mode 100644 index 0000000..3acf0ce --- /dev/null +++ b/spec/test_parser_rule_from_multiple_states.c @@ -0,0 +1,24 @@ +#include "testparser.h" +#include +#include + +int main() +{ + char const * input = "a"; + p_context_t context; + p_context_init(&context, (uint8_t const *)input, strlen(input)); + assert(p_parse(&context) == P_UNEXPECTED_TOKEN); + assert(p_position(&context).row == 0); + assert(p_position(&context).col == 1); + assert(context.token == TOKEN___EOF); + + input = "a b"; + p_context_init(&context, (uint8_t const *)input, strlen(input)); + assert(p_parse(&context) == P_SUCCESS); + + input = "bb"; + p_context_init(&context, (uint8_t const *)input, strlen(input)); + assert(p_parse(&context) == P_SUCCESS); + + return 0; +} diff --git a/spec/test_parser_rule_user_code.c b/spec/test_parser_rule_user_code.c new file mode 100644 index 0000000..d191cc7 --- /dev/null +++ b/spec/test_parser_rule_user_code.c @@ -0,0 +1,13 @@ +#include "testparser.h" +#include +#include + +int main() +{ + char const * input = "ab"; + p_context_t context; + p_context_init(&context, (uint8_t const *)input, strlen(input)); + assert(p_parse(&context) == P_SUCCESS); + + return 0; +} diff --git a/spec/test_parsing_json.c b/spec/test_parsing_json.c new file mode 100644 index 0000000..8382e36 --- /dev/null +++ b/spec/test_parsing_json.c @@ -0,0 +1,56 @@ +#include "testparser.h" +#include "json_types.h" +#include +#include + +int main() +{ + char const * input = ""; + p_context_t context; + p_context_init(&context, (uint8_t const *)input, strlen(input)); + assert(p_parse(&context) == P_SUCCESS); + + input = "{}"; + p_context_init(&context, (uint8_t const *)input, strlen(input)); + assert(p_parse(&context) == P_SUCCESS); + assert(p_result(&context)->id == JSON_OBJECT); + + input = "[]"; + p_context_init(&context, (uint8_t const *)input, strlen(input)); + assert(p_parse(&context) == P_SUCCESS); + assert(p_result(&context)->id == JSON_ARRAY); + + input = "-45.6"; + p_context_init(&context, (uint8_t const *)input, strlen(input)); + assert(p_parse(&context) == P_SUCCESS); + assert(p_result(&context)->id == JSON_NUMBER); + assert(p_result(&context)->number == -45.6); + + input = "2E-2"; + p_context_init(&context, (uint8_t const *)input, strlen(input)); + assert(p_parse(&context) == P_SUCCESS); + assert(p_result(&context)->id == JSON_NUMBER); + assert(p_result(&context)->number == 0.02); + + input = "{\"hi\":true}"; + p_context_init(&context, (uint8_t const *)input, strlen(input)); + assert(p_parse(&context) == P_SUCCESS); + JSONValue * o = p_result(&context); + assert(o->id == JSON_OBJECT); + assert_eq(1, o->object.size); + assert(strcmp(o->object.entries[0].name, "hi") == 0); + assert(o->object.entries[0].value->id == JSON_TRUE); + + input = "{\"ff\": false, \"nn\": null}"; + p_context_init(&context, (uint8_t const *)input, strlen(input)); + assert(p_parse(&context) == P_SUCCESS); + o = p_result(&context); + assert(o->id == JSON_OBJECT); + assert_eq(2, o->object.size); + assert(strcmp(o->object.entries[0].name, "ff") == 0); + assert(o->object.entries[0].value->id == JSON_FALSE); + assert(strcmp(o->object.entries[1].name, "nn") == 0); + assert(o->object.entries[1].value->id == JSON_NULL); + + return 0; +} diff --git a/spec/test_parsing_lists.c b/spec/test_parsing_lists.c new file mode 100644 index 0000000..e78d8f1 --- /dev/null +++ b/spec/test_parsing_lists.c @@ -0,0 +1,24 @@ +#include "testparser.h" +#include +#include + +int main() +{ + char const * input = "a"; + p_context_t context; + p_context_init(&context, (uint8_t const *)input, strlen(input)); + assert(p_parse(&context) == P_SUCCESS); + assert(p_result(&context) == 1u); + + input = ""; + p_context_init(&context, (uint8_t const *)input, strlen(input)); + assert(p_parse(&context) == P_SUCCESS); + assert(p_result(&context) == 0u); + + input = "aaaaaaaaaaaaaaaa"; + p_context_init(&context, (uint8_t const *)input, strlen(input)); + assert(p_parse(&context) == P_SUCCESS); + assert(p_result(&context) == 16u); + + return 0; +} diff --git a/spec/test_pattern.c b/spec/test_pattern.c new file mode 100644 index 0000000..193c957 --- /dev/null +++ b/spec/test_pattern.c @@ -0,0 +1,20 @@ +#include "testparser.h" +#include +#include +#include + +int main() +{ + char const * input = "abcdef"; + p_context_t context; + p_context_init(&context, (uint8_t const *)input, strlen(input)); + assert(p_parse(&context) == P_SUCCESS); + printf("pass1\n"); + + input = "defabcdef"; + p_context_init(&context, (uint8_t const *)input, strlen(input)); + assert(p_parse(&context) == P_SUCCESS); + printf("pass2\n"); + + return 0; +} diff --git a/spec/test_return_token_from_pattern.c b/spec/test_return_token_from_pattern.c new file mode 100644 index 0000000..a8be3d2 --- /dev/null +++ b/spec/test_return_token_from_pattern.c @@ -0,0 +1,13 @@ +#include "testparser.h" +#include +#include + +int main() +{ + char const * input = "defghidef"; + p_context_t context; + p_context_init(&context, (uint8_t const *)input, strlen(input)); + assert(p_parse(&context) == P_SUCCESS); + + return 0; +} diff --git a/spec/test_user_code.c b/spec/test_user_code.c new file mode 100644 index 0000000..cd853bc --- /dev/null +++ b/spec/test_user_code.c @@ -0,0 +1,20 @@ +#include "testparser.h" +#include +#include +#include + +int main() +{ + char const * input = "abcdef"; + p_context_t context; + p_context_init(&context, (uint8_t const *)input, strlen(input)); + assert(p_parse(&context) == P_SUCCESS); + printf("pass1\n"); + + input = "abcabcdef"; + p_context_init(&context, (uint8_t const *)input, strlen(input)); + assert(p_parse(&context) == P_SUCCESS); + printf("pass2\n"); + + return 0; +} diff --git a/spec/testutils.c b/spec/testutils.c new file mode 100644 index 0000000..606b936 --- /dev/null +++ b/spec/testutils.c @@ -0,0 +1,38 @@ +#include +#include +#include +#include +#include +#include "testutils.h" + +void assert_eq_size_t_i(size_t expected, size_t actual, char const * file, size_t line) +{ + if (expected != actual) + { + fprintf(stderr, "%s:%lu: expected %lu, got %lu\n", file, line, expected, actual); + assert(false); + } +} + +void str_init(str_t * str, char const * cs) +{ + size_t length = strlen(cs); + str->cs = malloc(length + 1u); + strcpy(str->cs, cs); +} + +void str_append(str_t * str, char const * cs) +{ + size_t length = strlen(str->cs); + size_t length2 = strlen(cs); + char * new_cs = malloc(length + length2 + 1u); + memcpy(new_cs, str->cs, length); + strcpy(&new_cs[length], cs); + free(str->cs); + str->cs = new_cs; +} + +void str_free(str_t * str) +{ + free(str->cs); +} diff --git a/spec/testutils.h b/spec/testutils.h new file mode 100644 index 0000000..c93ffc9 --- /dev/null +++ b/spec/testutils.h @@ -0,0 +1,19 @@ +#pragma once + +void assert_eq_size_t_i(size_t expected, size_t actual, char const * file, size_t line); + +#define assert_eq(expected, actual) \ + assert_eq_size_t_i(expected, actual, __FILE__, __LINE__) + +typedef struct +{ + char * cs; +} str_t; + +void str_init(str_t * str, char const * cs); +void str_append(str_t * str, char const * cs); +void str_free(str_t * str); +static inline char * str_cstr(str_t * str) +{ + return str->cs; +}