diff --git a/assets/parser.c.erb b/assets/parser.c.erb index fc5b1cf..d59068f 100644 --- a/assets/parser.c.erb +++ b/assets/parser.c.erb @@ -326,8 +326,8 @@ static lexer_state_id_t check_lexer_transition(uint32_t current_state, uint32_t static size_t find_longest_match(<%= @grammar.prefix %>context_t * context, lexer_match_info_t * out_match_info, size_t * out_unexpected_input_length) { - lexer_match_info_t longest_match; - lexer_match_info_t attempt_match; + lexer_match_info_t longest_match = {0}; + lexer_match_info_t attempt_match = {0}; *out_match_info = longest_match; uint32_t current_state = lexer_mode_table[context->mode].state_table_offset; for (;;) @@ -427,7 +427,7 @@ static size_t find_longest_match(<%= @grammar.prefix %>context_t * context, */ static size_t attempt_lex_token(<%= @grammar.prefix %>context_t * context, <%= @grammar.prefix %>token_info_t * out_token_info) { - <%= @grammar.prefix %>token_info_t token_info; + <%= @grammar.prefix %>token_info_t token_info = {0}; token_info.position = context->text_position; token_info.token = INVALID_TOKEN_ID; *out_token_info = token_info; // TODO: remove @@ -437,12 +437,12 @@ static size_t attempt_lex_token(<%= @grammar.prefix %>context_t * context, <%= @ switch (result) { case P_SUCCESS: - <%= @grammar.prefix %>token_t token_to_accept = match_info.accepting_state.token; - if (match_info.accepting_state.code_id != INVALID_USER_CODE_ID) + <%= @grammar.prefix %>token_t token_to_accept = match_info.accepting_state->token; + if (match_info.accepting_state->code_id != INVALID_USER_CODE_ID) { uint8_t const * match = &context->input[context->input_index]; <%= @grammar.prefix %>token_t user_code_token = lexer_user_code(context, - match_info.accepting_state.code_id, match, match_info.length, &token_info); + match_info.accepting_state->code_id, match, match_info.length, &token_info); /* An invalid token returned from lexer_user_code() means that the * user code did not explicitly return a token. So only override * the token to return if the user code does explicitly return a @@ -531,7 +531,7 @@ size_t <%= @grammar.prefix %>lex(<%= @grammar.prefix %>context_t * context, <%= *************************************************************************/ /** Reduce ID type. */ -typedef <%= get_type_for(@parser.reduce_table.size) %> = reduce_id_t; +typedef <%= get_type_for(@parser.reduce_table.size) %> reduce_id_t; /** * A symbol ID can hold either a token ID or a rule set ID. @@ -658,7 +658,7 @@ typedef struct * @param stack * state_values stack structure. */ -void state_values_stack_init(state_values_stack_t * stack) +static void state_values_stack_init(state_values_stack_t * stack) { const size_t initial_capacity = 10u; stack->length = 0u; @@ -676,7 +676,7 @@ void state_values_stack_init(state_values_stack_t * stack) * * @return Pointer to the state value structure at the given index. */ -state_value_t * state_values_stack_index(state_values_stack_t * stack, int index) +static state_value_t * state_values_stack_index(state_values_stack_t * stack, int index) { if (index >= 0) { @@ -694,7 +694,7 @@ state_value_t * state_values_stack_index(state_values_stack_t * stack, int index * @param stack * state_values stack structure. */ -void state_values_stack_push(state_values_stack_t * stack) +static void state_values_stack_push(state_values_stack_t * stack) { size_t const current_capacity = stack->capacity; size_t const current_length = stack->length; @@ -702,7 +702,7 @@ void state_values_stack_push(state_values_stack_t * stack) { size_t const new_capacity = current_capacity * 2u; state_value_t * new_entries = malloc(new_capacity * sizeof(state_value_t)); - memcpy(new_entries, stack->entries, current_length * sizeof(state_value_t); + memcpy(new_entries, stack->entries, current_length * sizeof(state_value_t)); free(stack->entries); stack->capacity = new_capacity; stack->entries = new_entries; @@ -719,7 +719,7 @@ void state_values_stack_push(state_values_stack_t * stack) * @param n * Number of states to pop. */ -void state_values_stack_pop(state_values_stack_t * stack, size_t n) +static void state_values_stack_pop(state_values_stack_t * stack, size_t n) { stack->length -= n; } @@ -730,7 +730,7 @@ void state_values_stack_pop(state_values_stack_t * stack, size_t n) * @param stack * state_values stack structure. */ -void state_values_stack_free(state_values_stack_t * stack) +static void state_values_stack_free(state_values_stack_t * stack) { free(stack->entries); } @@ -744,7 +744,7 @@ void state_values_stack_free(state_values_stack_t * stack) */ static <%= @grammar.prefix %>value_t parser_user_code(uint32_t rule, state_values_stack_t * statevalues, uint32_t n_states) { - <%= @grammar.prefix %>value_t _pvalue; + <%= @grammar.prefix %>value_t _pvalue = {0}; switch (rule) { @@ -821,7 +821,7 @@ static size_t check_reduce(size_t state_id, <%= @grammar.prefix %>token_t token) * can be accessed with <%= @grammar.prefix %>result(). * @retval P_UNEXPECTED_TOKEN * An unexpected token was encountered that does not match any grammar rule. - * The value context.token holds the unexpected token. + * The value context->token holds the unexpected token. * @reval P_DECODE_ERROR * The decoder encountered invalid text encoding. * @reval P_UNEXPECTED_INPUT @@ -880,7 +880,7 @@ size_t <%= @grammar.prefix %>parse(<%= @grammar.prefix %>context_t * context) { /* We shifted a RuleSet. */ state_values_stack_index(&statevalues, -1)->pvalue = reduced_parser_value; - <%= @grammar.prefix %>value_t new_parse_result; + <%= @grammar.prefix %>value_t new_parse_result = {0}; reduced_parser_value = new_parse_result; reduced_rule_set = INVALID_ID; } @@ -908,6 +908,7 @@ size_t <%= @grammar.prefix %>parse(<%= @grammar.prefix %>context_t * context) break; } state_values_stack_free(&statevalues); + return result; } /** @@ -920,7 +921,7 @@ size_t <%= @grammar.prefix %>parse(<%= @grammar.prefix %>context_t * context) */ <%= start_rule_type[1] %> <%= @grammar.prefix %>result(<%= @grammar.prefix %>context_t * context) { - return context.parse_result.v_<%= start_rule_type[0] %>; + return context->parse_result.v_<%= start_rule_type[0] %>; } /** @@ -933,5 +934,5 @@ size_t <%= @grammar.prefix %>parse(<%= @grammar.prefix %>context_t * context) */ <%= @grammar.prefix %>position_t <%= @grammar.prefix %>position(<%= @grammar.prefix %>context_t * context) { - return context.text_position; + return context->text_position; } diff --git a/assets/parser.h.erb b/assets/parser.h.erb index 64dd587..7796e4c 100644 --- a/assets/parser.h.erb +++ b/assets/parser.h.erb @@ -8,6 +8,7 @@ #define PROPANE_PARSER_H #include +#include /************************************************************************** * Public types @@ -87,6 +88,9 @@ typedef struct /** Input text. */ uint8_t const * input; + /** Input text length. */ + size_t input_length; + /** Input text index (byte offset). */ size_t input_index; @@ -105,4 +109,17 @@ typedef struct <%= @grammar.prefix %>token_t token; } <%= @grammar.prefix %>context_t; +void <%= @grammar.prefix %>context_init(<%= @grammar.prefix %>context_t * context, uint8_t const * input, size_t input_length); + +size_t <%= @grammar.prefix %>decode_code_point(uint8_t const * input, size_t input_length, + <%= @grammar.prefix %>code_point_t * out_code_point, uint8_t * out_code_point_length); + +size_t <%= @grammar.prefix %>lex(<%= @grammar.prefix %>context_t * context, <%= @grammar.prefix %>token_info_t * out_token_info); + +size_t <%= @grammar.prefix %>parse(<%= @grammar.prefix %>context_t * context); + +<%= start_rule_type[1] %> <%= @grammar.prefix %>result(<%= @grammar.prefix %>context_t * context); + +<%= @grammar.prefix %>position_t <%= @grammar.prefix %>position(<%= @grammar.prefix %>context_t * context); + #endif diff --git a/spec/propane_spec.rb b/spec/propane_spec.rb index 461d0e8..88f345d 100644 --- a/spec/propane_spec.rb +++ b/spec/propane_spec.rb @@ -28,10 +28,10 @@ describe Propane do "spec/run/testparser#{name}.#{options[:language]}" end case options[:language] + when "c" + result = system(*%w[gcc -Wall -o spec/run/testparser -Ispec -Ispec/run], *parsers, *test_files) when "d" result = system(*%w[ldc2 --unittest -of spec/run/testparser -Ispec], *parsers, *test_files, "spec/testutils.d") - when "c" - result = system(*%w[gcc -o spec/run/testparser -Ispec], *parsers, *test_files) end expect(result).to be_truthy end diff --git a/spec/test_lexer.c b/spec/test_lexer.c new file mode 100644 index 0000000..551ecb4 --- /dev/null +++ b/spec/test_lexer.c @@ -0,0 +1,92 @@ +#include "testparser.h" +#include +#include + +int main() +{ + size_t result; + p_code_point_t code_point; + uint8_t code_point_length; + + result = p_decode_code_point((uint8_t const *)"5", 1u, &code_point, &code_point_length); + assert(result == P_SUCCESS); + assert(code_point == '5'); + assert(code_point_length == 1u); + + result = p_decode_code_point((uint8_t const *)"", 0u, &code_point, &code_point_length); + assert(result == P_EOF); + + result = p_decode_code_point((uint8_t const *)"\xC2\xA9", 2u, &code_point, &code_point_length); + assert(result == P_SUCCESS); + assert(code_point == 0xA9u); + assert(code_point_length == 2u); + + result = p_decode_code_point((uint8_t const *)"\xf0\x9f\xa7\xa1", 4u, &code_point, &code_point_length); + assert(result == P_SUCCESS); + assert(code_point == 0x1F9E1u); + assert(code_point_length == 4u); + + result = p_decode_code_point((uint8_t const *)"\xf0\x9f\x27", 3u, &code_point, &code_point_length); + assert(result == P_DECODE_ERROR); + + result = p_decode_code_point((uint8_t const *)"\xf0\x9f\xa7\xFF", 4u, &code_point, &code_point_length); + assert(result == P_DECODE_ERROR); + + result = p_decode_code_point((uint8_t const *)"\xfe", 1u, &code_point, &code_point_length); + assert(result == P_DECODE_ERROR); + + + p_token_info_t token_info; + char const * input = "5 + 4 * \n677 + 567"; + p_context_t context; + p_context_init(&context, (uint8_t const *)input, strlen(input)); + assert(p_lex(&context, &token_info) == P_SUCCESS); + assert(token_info.position.row == 0u); + assert(token_info.position.col == 0u); + assert(token_info.length == 1u); + assert(token_info.token == TOKEN_int); + assert(p_lex(&context, &token_info) == P_SUCCESS); + assert(token_info.position.row == 0u); + assert(token_info.position.col == 2u); + assert(token_info.length == 1u); + assert(token_info.token == TOKEN_plus); + assert(p_lex(&context, &token_info) == P_SUCCESS); + assert(token_info.position.row == 0u); + assert(token_info.position.col == 4u); + assert(token_info.length == 1u); + assert(token_info.token == TOKEN_int); + assert(p_lex(&context, &token_info) == P_SUCCESS); + assert(token_info.position.row == 0u); + assert(token_info.position.col == 6u); + assert(token_info.length == 1u); + assert(token_info.token == TOKEN_times); + assert(p_lex(&context, &token_info) == P_SUCCESS); + assert(token_info.position.row == 1u); + assert(token_info.position.col == 0u); + assert(token_info.length == 3u); + assert(token_info.token == TOKEN_int); + assert(p_lex(&context, &token_info) == P_SUCCESS); + assert(token_info.position.row == 1u); + assert(token_info.position.col == 4u); + assert(token_info.length == 1u); + assert(token_info.token == TOKEN_plus); + assert(p_lex(&context, &token_info) == P_SUCCESS); + assert(token_info.position.row == 1u); + assert(token_info.position.col == 6u); + assert(token_info.length == 3u); + assert(token_info.token == TOKEN_int); + assert(p_lex(&context, &token_info) == P_SUCCESS); + assert(token_info.position.row == 1u); + assert(token_info.position.col == 9u); + assert(token_info.length == 0u); + assert(token_info.token == TOKEN___EOF); + + p_context_init(&context, (uint8_t const *)"", 0u); + assert(p_lex(&context, &token_info) == P_SUCCESS); + assert(token_info.position.row == 0u); + assert(token_info.position.col == 0u); + assert(token_info.length == 0u); + assert(token_info.token == TOKEN___EOF); + + return 0; +}